From a38e9d3e98ca3459667bb4fed56d5d537a2ce0c6 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Thu, 19 Aug 2021 11:19:53 -0700 Subject: [PATCH 0002/1016] msm: kgsl: Initial KGSL commit Snapshot of the Adreno GPU (KGSL) driver from msm-5.10 commit b809380cd817 (Merge "defconfig: enable new touch NT36XXX_SPI"). Add in the Makefile and other changes required to compile kgsl code outside the kernel tree. Change-Id: I63801a9c9ebcd6e2dbd20f023b664caf7255818c Signed-off-by: Lynus Vaz --- Android.bp | 35 + Android.mk | 29 + Kbuild | 95 + Kconfig | 89 + Makefile | 16 + a3xx_reg.h | 564 ++++ a5xx_reg.h | 902 +++++++ a6xx_reg.h | 1197 ++++++++ adreno-gpulist.h | 1915 +++++++++++++ adreno.c | 3214 ++++++++++++++++++++++ adreno.h | 1781 ++++++++++++ adreno_a3xx.c | 1569 +++++++++++ adreno_a3xx.h | 69 + adreno_a3xx_perfcounter.c | 411 +++ adreno_a3xx_ringbuffer.c | 454 ++++ adreno_a3xx_snapshot.c | 448 +++ adreno_a5xx.c | 2726 +++++++++++++++++++ adreno_a5xx.h | 307 +++ adreno_a5xx_packets.h | 1406 ++++++++++ adreno_a5xx_perfcounter.c | 695 +++++ adreno_a5xx_preempt.c | 552 ++++ adreno_a5xx_ringbuffer.c | 521 ++++ adreno_a5xx_snapshot.c | 1220 +++++++++ adreno_a6xx.c | 2774 +++++++++++++++++++ adreno_a6xx.h | 458 ++++ adreno_a6xx_gmu.c | 3459 ++++++++++++++++++++++++ adreno_a6xx_gmu.h | 425 +++ adreno_a6xx_gmu_snapshot.c | 431 +++ adreno_a6xx_hfi.c | 860 ++++++ adreno_a6xx_hfi.h | 184 ++ adreno_a6xx_hwsched.c | 1178 ++++++++ adreno_a6xx_hwsched.h | 84 + adreno_a6xx_hwsched_hfi.c | 1569 +++++++++++ adreno_a6xx_hwsched_hfi.h | 141 + adreno_a6xx_perfcounter.c | 952 +++++++ adreno_a6xx_preempt.c | 768 ++++++ adreno_a6xx_rgmu.c | 1443 ++++++++++ adreno_a6xx_rgmu.h | 99 + adreno_a6xx_ringbuffer.c | 523 ++++ adreno_a6xx_rpmh.c | 478 ++++ adreno_a6xx_snapshot.c | 2262 ++++++++++++++++ adreno_compat.c | 185 ++ adreno_compat.h | 46 + adreno_coresight.c | 407 +++ adreno_coresight.h | 132 + adreno_cp_parser.c | 1047 +++++++ adreno_cp_parser.h | 175 ++ adreno_debugfs.c | 416 +++ adreno_dispatch.c | 2976 ++++++++++++++++++++ adreno_dispatch.h | 112 + adreno_drawctxt.c | 586 ++++ adreno_drawctxt.h | 178 ++ adreno_gen7.c | 1318 +++++++++ adreno_gen7.h | 436 +++ adreno_gen7_gmu.c | 2708 +++++++++++++++++++ adreno_gen7_gmu.h | 422 +++ adreno_gen7_gmu_snapshot.c | 326 +++ adreno_gen7_hfi.c | 636 +++++ adreno_gen7_hfi.h | 185 ++ adreno_gen7_hwsched.c | 1161 ++++++++ adreno_gen7_hwsched.h | 84 + adreno_gen7_hwsched_hfi.c | 1606 +++++++++++ adreno_gen7_hwsched_hfi.h | 155 ++ adreno_gen7_perfcounter.c | 896 ++++++ adreno_gen7_preempt.c | 746 +++++ adreno_gen7_ringbuffer.c | 556 ++++ adreno_gen7_rpmh.c | 469 ++++ adreno_gen7_snapshot.c | 1254 +++++++++ adreno_gen7_snapshot.h | 1311 +++++++++ adreno_hfi.h | 869 ++++++ adreno_hwsched.c | 1714 ++++++++++++ adreno_hwsched.h | 129 + adreno_ioctl.c | 227 ++ adreno_perfcounter.c | 580 ++++ adreno_perfcounter.h | 137 + adreno_pm4types.h | 404 +++ adreno_profile.c | 1130 ++++++++ adreno_profile.h | 107 + adreno_ringbuffer.c | 435 +++ adreno_ringbuffer.h | 247 ++ adreno_snapshot.c | 1134 ++++++++ adreno_snapshot.h | 85 + adreno_sysfs.c | 337 +++ adreno_sysfs.h | 78 + adreno_trace.c | 36 + adreno_trace.h | 786 ++++++ build.config.msm_kgsl | 1 + config/gki_waipiodisp.conf | 15 + gen7_reg.h | 1158 ++++++++ gfx_driver_product.mk | 4 + gfx_kernel_board.mk | 10 + gfx_kernel_headers.py | 96 + governor_gpubw_mon.c | 318 +++ governor_msm_adreno_tz.c | 563 ++++ include/linux/msm_kgsl.h | 52 + include/uapi/linux/msm_kgsl.h | 2001 ++++++++++++++ kgsl.c | 4809 +++++++++++++++++++++++++++++++++ kgsl.h | 606 +++++ kgsl_bus.c | 179 ++ kgsl_bus.h | 25 + kgsl_compat.c | 392 +++ kgsl_compat.h | 243 ++ kgsl_debugfs.c | 406 +++ kgsl_debugfs.h | 36 + kgsl_device.h | 962 +++++++ kgsl_drawobj.c | 1489 ++++++++++ kgsl_drawobj.h | 332 +++ kgsl_eventlog.c | 232 ++ kgsl_eventlog.h | 20 + kgsl_events.c | 434 +++ kgsl_gmu_core.c | 213 ++ kgsl_gmu_core.h | 321 +++ kgsl_ioctl.c | 189 ++ kgsl_iommu.c | 2419 +++++++++++++++++ kgsl_iommu.h | 180 ++ kgsl_mmu.c | 618 +++++ kgsl_mmu.h | 393 +++ kgsl_pool.c | 641 +++++ kgsl_pool.h | 68 + kgsl_pwrctrl.c | 2329 ++++++++++++++++ kgsl_pwrctrl.h | 265 ++ kgsl_pwrscale.c | 805 ++++++ kgsl_pwrscale.h | 110 + kgsl_reclaim.c | 422 +++ kgsl_reclaim.h | 52 + kgsl_regmap.c | 328 +++ kgsl_regmap.h | 265 ++ kgsl_sharedmem.c | 1605 +++++++++++ kgsl_sharedmem.h | 463 ++++ kgsl_snapshot.c | 1273 +++++++++ kgsl_snapshot.h | 314 +++ kgsl_sync.c | 884 ++++++ kgsl_sync.h | 184 ++ kgsl_sysfs.h | 31 + kgsl_timeline.c | 551 ++++ kgsl_timeline.h | 115 + kgsl_trace.c | 12 + kgsl_trace.h | 1522 +++++++++++ kgsl_util.c | 350 +++ kgsl_util.h | 150 + kgsl_vbo.c | 627 +++++ msm_adreno_devfreq.h | 77 + 142 files changed, 99426 insertions(+) create mode 100644 Android.bp create mode 100644 Android.mk create mode 100644 Kbuild create mode 100644 Kconfig create mode 100644 Makefile create mode 100644 a3xx_reg.h create mode 100644 a5xx_reg.h create mode 100644 a6xx_reg.h create mode 100644 adreno-gpulist.h create mode 100644 adreno.c create mode 100644 adreno.h create mode 100644 adreno_a3xx.c create mode 100644 adreno_a3xx.h create mode 100644 adreno_a3xx_perfcounter.c create mode 100644 adreno_a3xx_ringbuffer.c create mode 100644 adreno_a3xx_snapshot.c create mode 100644 adreno_a5xx.c create mode 100644 adreno_a5xx.h create mode 100644 adreno_a5xx_packets.h create mode 100644 adreno_a5xx_perfcounter.c create mode 100644 adreno_a5xx_preempt.c create mode 100644 adreno_a5xx_ringbuffer.c create mode 100644 adreno_a5xx_snapshot.c create mode 100644 adreno_a6xx.c create mode 100644 adreno_a6xx.h create mode 100644 adreno_a6xx_gmu.c create mode 100644 adreno_a6xx_gmu.h create mode 100644 adreno_a6xx_gmu_snapshot.c create mode 100644 adreno_a6xx_hfi.c create mode 100644 adreno_a6xx_hfi.h create mode 100644 adreno_a6xx_hwsched.c create mode 100644 adreno_a6xx_hwsched.h create mode 100644 adreno_a6xx_hwsched_hfi.c create mode 100644 adreno_a6xx_hwsched_hfi.h create mode 100644 adreno_a6xx_perfcounter.c create mode 100644 adreno_a6xx_preempt.c create mode 100644 adreno_a6xx_rgmu.c create mode 100644 adreno_a6xx_rgmu.h create mode 100644 adreno_a6xx_ringbuffer.c create mode 100644 adreno_a6xx_rpmh.c create mode 100644 adreno_a6xx_snapshot.c create mode 100644 adreno_compat.c create mode 100644 adreno_compat.h create mode 100644 adreno_coresight.c create mode 100644 adreno_coresight.h create mode 100644 adreno_cp_parser.c create mode 100644 adreno_cp_parser.h create mode 100644 adreno_debugfs.c create mode 100644 adreno_dispatch.c create mode 100644 adreno_dispatch.h create mode 100644 adreno_drawctxt.c create mode 100644 adreno_drawctxt.h create mode 100644 adreno_gen7.c create mode 100644 adreno_gen7.h create mode 100644 adreno_gen7_gmu.c create mode 100644 adreno_gen7_gmu.h create mode 100644 adreno_gen7_gmu_snapshot.c create mode 100644 adreno_gen7_hfi.c create mode 100644 adreno_gen7_hfi.h create mode 100644 adreno_gen7_hwsched.c create mode 100644 adreno_gen7_hwsched.h create mode 100644 adreno_gen7_hwsched_hfi.c create mode 100644 adreno_gen7_hwsched_hfi.h create mode 100644 adreno_gen7_perfcounter.c create mode 100644 adreno_gen7_preempt.c create mode 100644 adreno_gen7_ringbuffer.c create mode 100644 adreno_gen7_rpmh.c create mode 100644 adreno_gen7_snapshot.c create mode 100644 adreno_gen7_snapshot.h create mode 100644 adreno_hfi.h create mode 100644 adreno_hwsched.c create mode 100644 adreno_hwsched.h create mode 100644 adreno_ioctl.c create mode 100644 adreno_perfcounter.c create mode 100644 adreno_perfcounter.h create mode 100644 adreno_pm4types.h create mode 100644 adreno_profile.c create mode 100644 adreno_profile.h create mode 100644 adreno_ringbuffer.c create mode 100644 adreno_ringbuffer.h create mode 100644 adreno_snapshot.c create mode 100644 adreno_snapshot.h create mode 100644 adreno_sysfs.c create mode 100644 adreno_sysfs.h create mode 100644 adreno_trace.c create mode 100644 adreno_trace.h create mode 100644 build.config.msm_kgsl create mode 100644 config/gki_waipiodisp.conf create mode 100644 gen7_reg.h create mode 100644 gfx_driver_product.mk create mode 100644 gfx_kernel_board.mk create mode 100644 gfx_kernel_headers.py create mode 100644 governor_gpubw_mon.c create mode 100644 governor_msm_adreno_tz.c create mode 100644 include/linux/msm_kgsl.h create mode 100644 include/uapi/linux/msm_kgsl.h create mode 100644 kgsl.c create mode 100644 kgsl.h create mode 100644 kgsl_bus.c create mode 100644 kgsl_bus.h create mode 100644 kgsl_compat.c create mode 100644 kgsl_compat.h create mode 100644 kgsl_debugfs.c create mode 100644 kgsl_debugfs.h create mode 100644 kgsl_device.h create mode 100644 kgsl_drawobj.c create mode 100644 kgsl_drawobj.h create mode 100644 kgsl_eventlog.c create mode 100644 kgsl_eventlog.h create mode 100644 kgsl_events.c create mode 100644 kgsl_gmu_core.c create mode 100644 kgsl_gmu_core.h create mode 100644 kgsl_ioctl.c create mode 100644 kgsl_iommu.c create mode 100644 kgsl_iommu.h create mode 100644 kgsl_mmu.c create mode 100644 kgsl_mmu.h create mode 100644 kgsl_pool.c create mode 100644 kgsl_pool.h create mode 100644 kgsl_pwrctrl.c create mode 100644 kgsl_pwrctrl.h create mode 100644 kgsl_pwrscale.c create mode 100644 kgsl_pwrscale.h create mode 100644 kgsl_reclaim.c create mode 100644 kgsl_reclaim.h create mode 100644 kgsl_regmap.c create mode 100644 kgsl_regmap.h create mode 100644 kgsl_sharedmem.c create mode 100644 kgsl_sharedmem.h create mode 100644 kgsl_snapshot.c create mode 100644 kgsl_snapshot.h create mode 100644 kgsl_sync.c create mode 100644 kgsl_sync.h create mode 100644 kgsl_sysfs.h create mode 100644 kgsl_timeline.c create mode 100644 kgsl_timeline.h create mode 100644 kgsl_trace.c create mode 100644 kgsl_trace.h create mode 100644 kgsl_util.c create mode 100644 kgsl_util.h create mode 100644 kgsl_vbo.c create mode 100644 msm_adreno_devfreq.h diff --git a/Android.bp b/Android.bp new file mode 100644 index 0000000000..44160ae331 --- /dev/null +++ b/Android.bp @@ -0,0 +1,35 @@ +headers_src = [ + "include/uapi/linux/*.h", +] + +gfx_headers_out = [ + "linux/msm_kgsl.h", +] + +gfx_kernel_headers_verbose = "--verbose " +genrule { + name: "qti_generate_gfx_kernel_headers", + tools: ["headers_install.sh", + "unifdef" + ], + tool_files: [ + "gfx_kernel_headers.py", + ], + srcs: headers_src, + cmd: "python3 -u $(location gfx_kernel_headers.py) " + + gfx_kernel_headers_verbose + + "--header_arch arm64 " + + "--gen_dir $(genDir) " + + "--gfx_include_uapi $(locations include/uapi/linux/*.h) " + + "--unifdef $(location unifdef) " + + "--headers_install $(location headers_install.sh)", + out: gfx_headers_out, +} + +cc_library_headers { + name: "qti_gfx_kernel_uapi", + generated_headers: ["qti_generate_gfx_kernel_headers"], + export_generated_headers: ["qti_generate_gfx_kernel_headers"], + vendor: true, + recovery_available: true +} diff --git a/Android.mk b/Android.mk new file mode 100644 index 0000000000..cad91bfab6 --- /dev/null +++ b/Android.mk @@ -0,0 +1,29 @@ +# Test dlkm +DLKM_DIR := device/qcom/common/dlkm +KGSL_SELECT := CONFIG_QCOM_KGSL=m +KERN_SRC := $(ANDROID_TOP)/kernel_platform/msm-kernel + +LOCAL_PATH := $(call my-dir) + +KBUILD_OPTIONS += BOARD_PLATFORM=$(TARGET_BOARD_PLATFORM) +KBUILD_OPTIONS += $(KGSL_SELECT) +KBUILD_OPTIONS += MODNAME=msm_kgsl +KBUILD_OPTIONS += KERN_SRC=$(KERN_SRC) + +KBUILD_OPTIONS += KBUILD_EXTRA_SYMBOLS=$(PWD)/$(call intermediates-dir-for,DLKM,mmrm-module-symvers)/Module.symvers + +include $(CLEAR_VARS) +# For incremental compilation +LOCAL_SRC_FILES := $(wildcard $(LOCAL_PATH)/**/*) $(wildcard $(LOCAL_PATH)/*) +LOCAL_MODULE := msm_kgsl.ko +LOCAL_MODULE_KBUILD_NAME := msm_kgsl.ko +LOCAL_MODULE_TAGS := optional +LOCAL_MODULE_DEBUG_ENABLE := true +LOCAL_MODULE_PATH := $(KERNEL_MODULES_OUT) +#LOCAL_REQUIRED_MODULES := mmrm-module-symvers +#LOCAL_ADDITIONAL_DEPENDENCIES := $(call intermediates-dir-for,DLKM,mmrm-module-symvers)/Module.symvers + +# Include msm_kgsl.ko in the /vendor/lib/modules (vendor.img) +BOARD_VENDOR_KERNEL_MODULES += $(LOCAL_MODULE_PATH)/$(LOCAL_MODULE) +include $(DLKM_DIR)/Build_external_kernelmodule.mk + diff --git a/Kbuild b/Kbuild new file mode 100644 index 0000000000..b1212a31a7 --- /dev/null +++ b/Kbuild @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: GPL-2.0-only + +KDIR := $(TOP)/kernel_platform/common + +ifeq ($(KGSL_PATH),) +KGSL_PATH=$(src) +endif + +ifeq ($(CONFIG_ARCH_WAIPIO), y) + include $(KGSL_PATH)/config/gki_waipiodisp.conf +endif + +ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERN_SRC)/drivers/devfreq + +obj-$(CONFIG_QCOM_KGSL) += msm_kgsl.o + +msm_kgsl-y = \ + kgsl.o \ + kgsl_bus.o \ + kgsl_drawobj.o \ + kgsl_events.o \ + kgsl_eventlog.o \ + kgsl_gmu_core.o \ + kgsl_ioctl.o \ + kgsl_mmu.o \ + kgsl_pwrctrl.o \ + kgsl_pwrscale.o \ + kgsl_regmap.o \ + kgsl_sharedmem.o \ + kgsl_snapshot.o \ + kgsl_timeline.o \ + kgsl_trace.o \ + kgsl_util.o \ + kgsl_vbo.o + +msm_kgsl-$(CONFIG_COMPAT) += kgsl_compat.o +msm_kgsl-$(CONFIG_DEBUG_FS) += kgsl_debugfs.o +msm_kgsl-$(CONFIG_ARM_SMMU) += kgsl_iommu.o +msm_kgsl-$(CONFIG_SYNC_FILE) += kgsl_sync.o +msm_kgsl-$(CONFIG_QCOM_KGSL_PROCESS_RECLAIM) += kgsl_reclaim.o + +ifndef CONFIG_QCOM_KGSL_USE_SHMEM + msm_kgsl-y += kgsl_pool.o +endif + +msm_kgsl-y += \ + adreno.o \ + adreno_a3xx.o \ + adreno_a3xx_perfcounter.o \ + adreno_a3xx_ringbuffer.o \ + adreno_a3xx_snapshot.o \ + adreno_a5xx.o \ + adreno_a5xx_perfcounter.o \ + adreno_a5xx_preempt.o \ + adreno_a5xx_ringbuffer.o \ + adreno_a5xx_snapshot.o \ + adreno_a6xx.o \ + adreno_a6xx_gmu.o \ + adreno_a6xx_gmu_snapshot.o \ + adreno_a6xx_hfi.o \ + adreno_a6xx_hwsched.o \ + adreno_a6xx_hwsched_hfi.o \ + adreno_a6xx_perfcounter.o \ + adreno_a6xx_preempt.o \ + adreno_a6xx_rgmu.o \ + adreno_a6xx_ringbuffer.o \ + adreno_a6xx_rpmh.o \ + adreno_a6xx_snapshot.o \ + adreno_cp_parser.o \ + adreno_dispatch.o \ + adreno_drawctxt.o \ + adreno_gen7.o \ + adreno_gen7_gmu.o \ + adreno_gen7_gmu_snapshot.o \ + adreno_gen7_hfi.o \ + adreno_gen7_hwsched.o \ + adreno_gen7_hwsched_hfi.o \ + adreno_gen7_perfcounter.o \ + adreno_gen7_preempt.o \ + adreno_gen7_ringbuffer.o \ + adreno_gen7_rpmh.o \ + adreno_gen7_snapshot.o \ + adreno_hwsched.o \ + adreno_ioctl.o \ + adreno_perfcounter.o \ + adreno_ringbuffer.o \ + adreno_snapshot.o \ + adreno_sysfs.o \ + adreno_trace.o \ + governor_msm_adreno_tz.o \ + governor_gpubw_mon.o + +msm_kgsl-$(CONFIG_COMPAT) += adreno_compat.o +msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_coresight.o +msm_kgsl-$(CONFIG_DEBUG_FS) += adreno_debugfs.o adreno_profile.o diff --git a/Kconfig b/Kconfig new file mode 100644 index 0000000000..0c04a88e74 --- /dev/null +++ b/Kconfig @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: GPL-2.0-only +config QCOM_KGSL + tristate "Qualcomm Technologies, Inc. 3D Graphics driver" + depends on ARCH_QCOM + depends on QCOM_QFPROM + select QCOM_MDT_LOADER + select INTERVAL_TREE + select TRACE_GPU_MEM + help + 3D graphics driver for the Adreno family of GPUs from QTI. + Required to use hardware accelerated OpenGL, compute and Vulkan + on QTI targets. This includes power management, memory management, + and scheduling for the Adreno GPUs. + +config DEVFREQ_GOV_QCOM_ADRENO_TZ + tristate "Qualcomm Technologies, Inc. GPU frequency governor" + depends on PM_DEVFREQ && QCOM_KGSL + help + GPU frequency governor for the Adreno GPU. Sets the frequency + using an "on demand" algorithm in conjunction with other + components on Adreno platforms. This is not useful for non-Adreno + devices. + +config DEVFREQ_GOV_QCOM_GPUBW_MON + tristate "Qualcomm Technologies, Inc. GPU bandwidth governor" + depends on DEVFREQ_GOV_QCOM_ADRENO_TZ + help + This governor works together with the Adreno GPU governor to + select bus frequency votes using an "on-demand" algorithm. + This governor will not be useful for non-Adreno based + targets. + +config QCOM_ADRENO_DEFAULT_GOVERNOR + string "devfreq governor for the adreno core" + default "msm-adreno-tz" + depends on QCOM_KGSL + +config QCOM_KGSL_CORESIGHT + bool "Enable coresight support for the Adreno GPU" + depends on QCOM_KGSL && CORESIGHT + help + When enabled, the Adreno GPU is available as a source for Coresight + data. On a6xx targets there are two sources available for the GX and + CX domains respectively. Debug kernels should say 'Y' here. + +config QCOM_KGSL_IOCOHERENCY_DEFAULT + bool "Enable I/O coherency on cached GPU memory by default" + depends on QCOM_KGSL + default y if ARCH_LAHAINA + help + Say 'Y' here to enable I/O cache coherency by default on targets that + support hardware I/O coherency. If enabled all cached GPU memory + will use I/O coherency regardless of the user flags. If not enabled + the user can still selectively enable I/O coherency with a flag. + +config QCOM_KGSL_IDLE_TIMEOUT + int + depends on QCOM_KGSL + default 80 + help + GPU idle timeout for Adreno GPU. This value decides after how + long the GPU will go into slumber. A higher value will mean that + the GPU is powered ON for a longer duration which will have + power costs. + +config QCOM_KGSL_CONTEXT_DEBUG + bool "Log kgsl context information for all processes" + depends on QCOM_KGSL + help + When enabled, total number of KGSL contexts, number of attached and + detached contexts are dumped into kernel log for all the processes. + This gives insight about the number of contexts held by each process. + +config QCOM_KGSL_SORT_POOL + bool "Sort pool page list based on physical address" + depends on QCOM_KGSL + default y + help + When enabled, the pool page list is sorted based on physical + addresses. This can be turned on for targets where better DDR + efficiency is attained on accesses for adjacent memory. + +config QCOM_KGSL_QDSS_STM + bool "Enable support for QDSS STM for Adreno GPU" + depends on QCOM_KGSL && CORESIGHT + help + When enabled, the Adreno GPU QDSS STM support is enabled. GPU QDSS STM + memory will be mapped to GPU and QDSS clock needed to access this memory + is voted. Debug kernels should say 'Y' here. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..9743341b60 --- /dev/null +++ b/Makefile @@ -0,0 +1,16 @@ +ifeq ($(KGSL_MODULE_ROOT),) +KGSL_MODULE_ROOT=$(KERNEL_SRC)/$(M) +endif + +KBUILD_OPTIONS+=KGSL_PATH=$(KGSL_MODULE_ROOT) + +all: modules + +modules_install: + $(MAKE) INSTALL_MOD_STRIP=1 -C $(KERNEL_SRC) M=$(M) modules_install + +clean: + $(MAKE) -C $(KERNEL_SRC) M=$(M) clean + +%: + $(MAKE) -C $(KERNEL_SRC) M=$(M) $@ $(KBUILD_OPTIONS) diff --git a/a3xx_reg.h b/a3xx_reg.h new file mode 100644 index 0000000000..ab5079aa45 --- /dev/null +++ b/a3xx_reg.h @@ -0,0 +1,564 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2012-2017,2019-2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _A300_REG_H +#define _A300_REG_H + +/* Interrupt bit positions within RBBM_INT_0 */ + +#define A3XX_INT_RBBM_GPU_IDLE 0 +#define A3XX_INT_RBBM_AHB_ERROR 1 +#define A3XX_INT_RBBM_REG_TIMEOUT 2 +#define A3XX_INT_RBBM_ME_MS_TIMEOUT 3 +#define A3XX_INT_RBBM_PFP_MS_TIMEOUT 4 +#define A3XX_INT_RBBM_ATB_BUS_OVERFLOW 5 +#define A3XX_INT_VFD_ERROR 6 +#define A3XX_INT_CP_SW_INT 7 +#define A3XX_INT_CP_T0_PACKET_IN_IB 8 +#define A3XX_INT_CP_OPCODE_ERROR 9 +#define A3XX_INT_CP_RESERVED_BIT_ERROR 10 +#define A3XX_INT_CP_HW_FAULT 11 +#define A3XX_INT_CP_DMA 12 +#define A3XX_INT_CP_IB2_INT 13 +#define A3XX_INT_CP_IB1_INT 14 +#define A3XX_INT_CP_RB_INT 15 +#define A3XX_INT_CP_REG_PROTECT_FAULT 16 +#define A3XX_INT_CP_RB_DONE_TS 17 +#define A3XX_INT_CP_VS_DONE_TS 18 +#define A3XX_INT_CP_PS_DONE_TS 19 +#define A3XX_INT_CACHE_FLUSH_TS 20 +#define A3XX_INT_CP_AHB_ERROR_HALT 21 +#define A3XX_INT_MISC_HANG_DETECT 24 +#define A3XX_INT_UCHE_OOB_ACCESS 25 + +/* Register definitions */ + +#define A3XX_RBBM_CLOCK_CTL 0x010 +#define A3XX_RBBM_SP_HYST_CNT 0x012 +#define A3XX_RBBM_SW_RESET_CMD 0x018 +#define A3XX_RBBM_AHB_CTL0 0x020 +#define A3XX_RBBM_AHB_CTL1 0x021 +#define A3XX_RBBM_AHB_CMD 0x022 +#define A3XX_RBBM_AHB_ERROR_STATUS 0x027 +#define A3XX_RBBM_GPR0_CTL 0x02E +/* This the same register as on A2XX, just in a different place */ +#define A3XX_RBBM_STATUS 0x030 +#define A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x33 +#define A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x50 +#define A3XX_RBBM_INT_CLEAR_CMD 0x061 +#define A3XX_RBBM_INT_0_MASK 0x063 +#define A3XX_RBBM_INT_0_STATUS 0x064 +#define A3XX_RBBM_PERFCTR_CTL 0x80 +#define A3XX_RBBM_PERFCTR_LOAD_CMD0 0x81 +#define A3XX_RBBM_PERFCTR_LOAD_CMD1 0x82 +#define A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x84 +#define A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x85 +#define A3XX_RBBM_PERFCOUNTER0_SELECT 0x86 +#define A3XX_RBBM_PERFCOUNTER1_SELECT 0x87 +#define A3XX_RBBM_GPU_BUSY_MASKED 0x88 +#define A3XX_RBBM_PERFCTR_CP_0_LO 0x90 +#define A3XX_RBBM_PERFCTR_CP_0_HI 0x91 +#define A3XX_RBBM_PERFCTR_RBBM_0_LO 0x92 +#define A3XX_RBBM_PERFCTR_RBBM_0_HI 0x93 +#define A3XX_RBBM_PERFCTR_RBBM_1_LO 0x94 +#define A3XX_RBBM_PERFCTR_RBBM_1_HI 0x95 +#define A3XX_RBBM_PERFCTR_PC_0_LO 0x96 +#define A3XX_RBBM_PERFCTR_PC_0_HI 0x97 +#define A3XX_RBBM_PERFCTR_PC_1_LO 0x98 +#define A3XX_RBBM_PERFCTR_PC_1_HI 0x99 +#define A3XX_RBBM_PERFCTR_PC_2_LO 0x9A +#define A3XX_RBBM_PERFCTR_PC_2_HI 0x9B +#define A3XX_RBBM_PERFCTR_PC_3_LO 0x9C +#define A3XX_RBBM_PERFCTR_PC_3_HI 0x9D +#define A3XX_RBBM_PERFCTR_VFD_0_LO 0x9E +#define A3XX_RBBM_PERFCTR_VFD_0_HI 0x9F +#define A3XX_RBBM_PERFCTR_VFD_1_LO 0xA0 +#define A3XX_RBBM_PERFCTR_VFD_1_HI 0xA1 +#define A3XX_RBBM_PERFCTR_HLSQ_0_LO 0xA2 +#define A3XX_RBBM_PERFCTR_HLSQ_0_HI 0xA3 +#define A3XX_RBBM_PERFCTR_HLSQ_1_LO 0xA4 +#define A3XX_RBBM_PERFCTR_HLSQ_1_HI 0xA5 +#define A3XX_RBBM_PERFCTR_HLSQ_2_LO 0xA6 +#define A3XX_RBBM_PERFCTR_HLSQ_2_HI 0xA7 +#define A3XX_RBBM_PERFCTR_HLSQ_3_LO 0xA8 +#define A3XX_RBBM_PERFCTR_HLSQ_3_HI 0xA9 +#define A3XX_RBBM_PERFCTR_HLSQ_4_LO 0xAA +#define A3XX_RBBM_PERFCTR_HLSQ_4_HI 0xAB +#define A3XX_RBBM_PERFCTR_HLSQ_5_LO 0xAC +#define A3XX_RBBM_PERFCTR_HLSQ_5_HI 0xAD +#define A3XX_RBBM_PERFCTR_VPC_0_LO 0xAE +#define A3XX_RBBM_PERFCTR_VPC_0_HI 0xAF +#define A3XX_RBBM_PERFCTR_VPC_1_LO 0xB0 +#define A3XX_RBBM_PERFCTR_VPC_1_HI 0xB1 +#define A3XX_RBBM_PERFCTR_TSE_0_LO 0xB2 +#define A3XX_RBBM_PERFCTR_TSE_0_HI 0xB3 +#define A3XX_RBBM_PERFCTR_TSE_1_LO 0xB4 +#define A3XX_RBBM_PERFCTR_TSE_1_HI 0xB5 +#define A3XX_RBBM_PERFCTR_RAS_0_LO 0xB6 +#define A3XX_RBBM_PERFCTR_RAS_0_HI 0xB7 +#define A3XX_RBBM_PERFCTR_RAS_1_LO 0xB8 +#define A3XX_RBBM_PERFCTR_RAS_1_HI 0xB9 +#define A3XX_RBBM_PERFCTR_UCHE_0_LO 0xBA +#define A3XX_RBBM_PERFCTR_UCHE_0_HI 0xBB +#define A3XX_RBBM_PERFCTR_UCHE_1_LO 0xBC +#define A3XX_RBBM_PERFCTR_UCHE_1_HI 0xBD +#define A3XX_RBBM_PERFCTR_UCHE_2_LO 0xBE +#define A3XX_RBBM_PERFCTR_UCHE_2_HI 0xBF +#define A3XX_RBBM_PERFCTR_UCHE_3_LO 0xC0 +#define A3XX_RBBM_PERFCTR_UCHE_3_HI 0xC1 +#define A3XX_RBBM_PERFCTR_UCHE_4_LO 0xC2 +#define A3XX_RBBM_PERFCTR_UCHE_4_HI 0xC3 +#define A3XX_RBBM_PERFCTR_UCHE_5_LO 0xC4 +#define A3XX_RBBM_PERFCTR_UCHE_5_HI 0xC5 +#define A3XX_RBBM_PERFCTR_TP_0_LO 0xC6 +#define A3XX_RBBM_PERFCTR_TP_0_HI 0xC7 +#define A3XX_RBBM_PERFCTR_TP_1_LO 0xC8 +#define A3XX_RBBM_PERFCTR_TP_1_HI 0xC9 +#define A3XX_RBBM_PERFCTR_TP_2_LO 0xCA +#define A3XX_RBBM_PERFCTR_TP_2_HI 0xCB +#define A3XX_RBBM_PERFCTR_TP_3_LO 0xCC +#define A3XX_RBBM_PERFCTR_TP_3_HI 0xCD +#define A3XX_RBBM_PERFCTR_TP_4_LO 0xCE +#define A3XX_RBBM_PERFCTR_TP_4_HI 0xCF +#define A3XX_RBBM_PERFCTR_TP_5_LO 0xD0 +#define A3XX_RBBM_PERFCTR_TP_5_HI 0xD1 +#define A3XX_RBBM_PERFCTR_SP_0_LO 0xD2 +#define A3XX_RBBM_PERFCTR_SP_0_HI 0xD3 +#define A3XX_RBBM_PERFCTR_SP_1_LO 0xD4 +#define A3XX_RBBM_PERFCTR_SP_1_HI 0xD5 +#define A3XX_RBBM_PERFCTR_SP_2_LO 0xD6 +#define A3XX_RBBM_PERFCTR_SP_2_HI 0xD7 +#define A3XX_RBBM_PERFCTR_SP_3_LO 0xD8 +#define A3XX_RBBM_PERFCTR_SP_3_HI 0xD9 +#define A3XX_RBBM_PERFCTR_SP_4_LO 0xDA +#define A3XX_RBBM_PERFCTR_SP_4_HI 0xDB +#define A3XX_RBBM_PERFCTR_SP_5_LO 0xDC +#define A3XX_RBBM_PERFCTR_SP_5_HI 0xDD +#define A3XX_RBBM_PERFCTR_SP_6_LO 0xDE +#define A3XX_RBBM_PERFCTR_SP_6_HI 0xDF +#define A3XX_RBBM_PERFCTR_SP_7_LO 0xE0 +#define A3XX_RBBM_PERFCTR_SP_7_HI 0xE1 +#define A3XX_RBBM_PERFCTR_RB_0_LO 0xE2 +#define A3XX_RBBM_PERFCTR_RB_0_HI 0xE3 +#define A3XX_RBBM_PERFCTR_RB_1_LO 0xE4 +#define A3XX_RBBM_PERFCTR_RB_1_HI 0xE5 + +#define A3XX_RBBM_RBBM_CTL 0x100 +#define A3XX_RBBM_PERFCTR_PWR_0_LO 0x0EA +#define A3XX_RBBM_PERFCTR_PWR_0_HI 0x0EB +#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC +#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED +#define A3XX_RBBM_DEBUG_BUS_CTL 0x111 +#define A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x112 +#define A3XX_RBBM_DEBUG_BUS_STB_CTL0 0x11B +#define A3XX_RBBM_DEBUG_BUS_STB_CTL1 0x11C +#define A3XX_RBBM_INT_TRACE_BUS_CTL 0x11D +#define A3XX_RBBM_EXT_TRACE_BUS_CTL 0x11E +#define A3XX_RBBM_EXT_TRACE_STOP_CNT 0x11F +#define A3XX_RBBM_EXT_TRACE_START_CNT 0x120 +#define A3XX_RBBM_EXT_TRACE_PERIOD_CNT 0x121 +#define A3XX_RBBM_EXT_TRACE_CMD 0x122 +#define A3XX_CP_RB_BASE 0x01C0 +#define A3XX_CP_RB_CNTL 0x01C1 +#define A3XX_CP_RB_RPTR 0x01C4 +#define A3XX_CP_RB_WPTR 0x01C5 +/* Following two are same as on A2XX, just in a different place */ +#define A3XX_CP_PFP_UCODE_ADDR 0x1C9 +#define A3XX_CP_PFP_UCODE_DATA 0x1CA +#define A3XX_CP_ROQ_ADDR 0x1CC +#define A3XX_CP_ROQ_DATA 0x1CD +#define A3XX_CP_MERCIU_ADDR 0x1D1 +#define A3XX_CP_MERCIU_DATA 0x1D2 +#define A3XX_CP_MERCIU_DATA2 0x1D3 +#define A3XX_CP_QUEUE_THRESHOLDS 0x01D5 +#define A3XX_CP_MEQ_ADDR 0x1DA +#define A3XX_CP_MEQ_DATA 0x1DB +#define A3XX_CP_STATE_DEBUG_INDEX 0x01EC +#define A3XX_CP_STATE_DEBUG_DATA 0x01ED +#define A3XX_CP_CNTL 0x01F4 +#define A3XX_CP_WFI_PEND_CTR 0x01F5 +#define A3XX_CP_ME_CNTL 0x01F6 +#define A3XX_CP_ME_STATUS 0x01F7 +#define A3XX_CP_ME_RAM_WADDR 0x01F8 +#define A3XX_CP_ME_RAM_RADDR 0x01F9 +#define A3XX_CP_ME_RAM_DATA 0x01FA +#define A3XX_CP_DEBUG 0x01FC + +#define A3XX_RBBM_PM_OVERRIDE2 0x039D + +#define A3XX_CP_PERFCOUNTER_SELECT 0x445 +#define A3XX_CP_IB1_BASE 0x0458 +#define A3XX_CP_IB1_BUFSZ 0x0459 +#define A3XX_CP_IB2_BASE 0x045A +#define A3XX_CP_IB2_BUFSZ 0x045B + +#define A3XX_CP_HW_FAULT 0x45C +#define A3XX_CP_PROTECT_CTRL 0x45E +#define A3XX_CP_PROTECT_STATUS 0x45F +#define A3XX_CP_PROTECT_REG_0 0x460 +#define A3XX_CP_STAT 0x047F +#define A3XX_CP_SCRATCH_REG0 0x578 +#define A3XX_CP_SCRATCH_REG6 0x57E +#define A3XX_CP_SCRATCH_REG7 0x57F +#define A3XX_VSC_SIZE_ADDRESS 0xC02 +#define A3XX_VSC_PIPE_DATA_ADDRESS_0 0xC07 +#define A3XX_VSC_PIPE_DATA_LENGTH_0 0xC08 +#define A3XX_VSC_PIPE_DATA_ADDRESS_1 0xC0A +#define A3XX_VSC_PIPE_DATA_LENGTH_1 0xC0B +#define A3XX_VSC_PIPE_DATA_ADDRESS_2 0xC0D +#define A3XX_VSC_PIPE_DATA_LENGTH_2 0xC0E +#define A3XX_VSC_PIPE_DATA_ADDRESS_3 0xC10 +#define A3XX_VSC_PIPE_DATA_LENGTH_3 0xC11 +#define A3XX_VSC_PIPE_DATA_ADDRESS_4 0xC13 +#define A3XX_VSC_PIPE_DATA_LENGTH_4 0xC14 +#define A3XX_VSC_PIPE_DATA_ADDRESS_5 0xC16 +#define A3XX_VSC_PIPE_DATA_LENGTH_5 0xC17 +#define A3XX_VSC_PIPE_DATA_ADDRESS_6 0xC19 +#define A3XX_VSC_PIPE_DATA_LENGTH_6 0xC1A +#define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C +#define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D +#define A3XX_PC_PERFCOUNTER0_SELECT 0xC48 +#define A3XX_PC_PERFCOUNTER1_SELECT 0xC49 +#define A3XX_PC_PERFCOUNTER2_SELECT 0xC4A +#define A3XX_PC_PERFCOUNTER3_SELECT 0xC4B +#define A3XX_GRAS_TSE_DEBUG_ECO 0xC81 +#define A3XX_GRAS_PERFCOUNTER0_SELECT 0xC88 +#define A3XX_GRAS_PERFCOUNTER1_SELECT 0xC89 +#define A3XX_GRAS_PERFCOUNTER2_SELECT 0xC8A +#define A3XX_GRAS_PERFCOUNTER3_SELECT 0xC8B +#define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0 +#define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1 +#define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2 +#define A3XX_GRAS_CL_USER_PLANE_W0 0xCA3 +#define A3XX_GRAS_CL_USER_PLANE_X1 0xCA4 +#define A3XX_GRAS_CL_USER_PLANE_Y1 0xCA5 +#define A3XX_GRAS_CL_USER_PLANE_Z1 0xCA6 +#define A3XX_GRAS_CL_USER_PLANE_W1 0xCA7 +#define A3XX_GRAS_CL_USER_PLANE_X2 0xCA8 +#define A3XX_GRAS_CL_USER_PLANE_Y2 0xCA9 +#define A3XX_GRAS_CL_USER_PLANE_Z2 0xCAA +#define A3XX_GRAS_CL_USER_PLANE_W2 0xCAB +#define A3XX_GRAS_CL_USER_PLANE_X3 0xCAC +#define A3XX_GRAS_CL_USER_PLANE_Y3 0xCAD +#define A3XX_GRAS_CL_USER_PLANE_Z3 0xCAE +#define A3XX_GRAS_CL_USER_PLANE_W3 0xCAF +#define A3XX_GRAS_CL_USER_PLANE_X4 0xCB0 +#define A3XX_GRAS_CL_USER_PLANE_Y4 0xCB1 +#define A3XX_GRAS_CL_USER_PLANE_Z4 0xCB2 +#define A3XX_GRAS_CL_USER_PLANE_W4 0xCB3 +#define A3XX_GRAS_CL_USER_PLANE_X5 0xCB4 +#define A3XX_GRAS_CL_USER_PLANE_Y5 0xCB5 +#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6 +#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7 +#define A3XX_RB_GMEM_BASE_ADDR 0xCC0 +#define A3XX_RB_DEBUG_ECO_CONTROLS_ADDR 0xCC1 +#define A3XX_RB_PERFCOUNTER0_SELECT 0xCC6 +#define A3XX_RB_PERFCOUNTER1_SELECT 0xCC7 +#define A3XX_RB_FRAME_BUFFER_DIMENSION 0xCE0 +#define A3XX_SQ_GPR_MANAGEMENT 0x0D00 +#define A3XX_SQ_INST_STORE_MANAGEMENT 0x0D02 +#define A3XX_HLSQ_PERFCOUNTER0_SELECT 0xE00 +#define A3XX_HLSQ_PERFCOUNTER1_SELECT 0xE01 +#define A3XX_HLSQ_PERFCOUNTER2_SELECT 0xE02 +#define A3XX_HLSQ_PERFCOUNTER3_SELECT 0xE03 +#define A3XX_HLSQ_PERFCOUNTER4_SELECT 0xE04 +#define A3XX_HLSQ_PERFCOUNTER5_SELECT 0xE05 +#define A3XX_TP0_CHICKEN 0x0E1E +#define A3XX_VFD_PERFCOUNTER0_SELECT 0xE44 +#define A3XX_VFD_PERFCOUNTER1_SELECT 0xE45 +#define A3XX_VPC_VPC_DEBUG_RAM_SEL 0xE61 +#define A3XX_VPC_VPC_DEBUG_RAM_READ 0xE62 +#define A3XX_VPC_PERFCOUNTER0_SELECT 0xE64 +#define A3XX_VPC_PERFCOUNTER1_SELECT 0xE65 +#define A3XX_UCHE_CACHE_MODE_CONTROL_REG 0xE82 +#define A3XX_UCHE_PERFCOUNTER0_SELECT 0xE84 +#define A3XX_UCHE_PERFCOUNTER1_SELECT 0xE85 +#define A3XX_UCHE_PERFCOUNTER2_SELECT 0xE86 +#define A3XX_UCHE_PERFCOUNTER3_SELECT 0xE87 +#define A3XX_UCHE_PERFCOUNTER4_SELECT 0xE88 +#define A3XX_UCHE_PERFCOUNTER5_SELECT 0xE89 +#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0 +#define A3XX_UCHE_CACHE_INVALIDATE1_REG 0xEA1 +#define A3XX_UCHE_CACHE_WAYS_VFD 0xEA6 +#define A3XX_SP_PERFCOUNTER0_SELECT 0xEC4 +#define A3XX_SP_PERFCOUNTER1_SELECT 0xEC5 +#define A3XX_SP_PERFCOUNTER2_SELECT 0xEC6 +#define A3XX_SP_PERFCOUNTER3_SELECT 0xEC7 +#define A3XX_SP_PERFCOUNTER4_SELECT 0xEC8 +#define A3XX_SP_PERFCOUNTER5_SELECT 0xEC9 +#define A3XX_SP_PERFCOUNTER6_SELECT 0xECA +#define A3XX_SP_PERFCOUNTER7_SELECT 0xECB +#define A3XX_TP_PERFCOUNTER0_SELECT 0xF04 +#define A3XX_TP_PERFCOUNTER1_SELECT 0xF05 +#define A3XX_TP_PERFCOUNTER2_SELECT 0xF06 +#define A3XX_TP_PERFCOUNTER3_SELECT 0xF07 +#define A3XX_TP_PERFCOUNTER4_SELECT 0xF08 +#define A3XX_TP_PERFCOUNTER5_SELECT 0xF09 +#define A3XX_GRAS_CL_CLIP_CNTL 0x2040 +#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044 +#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048 +#define A3XX_GRAS_CL_VPORT_XSCALE 0x2049 +#define A3XX_GRAS_CL_VPORT_YOFFSET 0x204A +#define A3XX_GRAS_CL_VPORT_YSCALE 0x204B +#define A3XX_GRAS_CL_VPORT_ZOFFSET 0x204C +#define A3XX_GRAS_CL_VPORT_ZSCALE 0x204D +#define A3XX_GRAS_SU_POINT_MINMAX 0x2068 +#define A3XX_GRAS_SU_POINT_SIZE 0x2069 +#define A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x206C +#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x206D +#define A3XX_GRAS_SU_MODE_CONTROL 0x2070 +#define A3XX_GRAS_SC_CONTROL 0x2072 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x2074 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x2075 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x2079 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x207A +#define A3XX_RB_MODE_CONTROL 0x20C0 +#define A3XX_RB_RENDER_CONTROL 0x20C1 +#define A3XX_RB_MSAA_CONTROL 0x20C2 +#define A3XX_RB_ALPHA_REFERENCE 0x20C3 +#define A3XX_RB_MRT_CONTROL0 0x20C4 +#define A3XX_RB_MRT_BUF_INFO0 0x20C5 +#define A3XX_RB_MRT_BUF_BASE0 0x20C6 +#define A3XX_RB_MRT_BLEND_CONTROL0 0x20C7 +#define A3XX_RB_MRT_CONTROL1 0x20C8 +#define A3XX_RB_MRT_BUF_INFO1 0x20C9 +#define A3XX_RB_MRT_BUF_BASE1 0x20CA +#define A3XX_RB_MRT_BLEND_CONTROL1 0x20CB +#define A3XX_RB_MRT_CONTROL2 0x20CC +#define A3XX_RB_MRT_BUF_INFO2 0x20CD +#define A3XX_RB_MRT_BUF_BASE2 0x20CE +#define A3XX_RB_MRT_BLEND_CONTROL2 0x20CF +#define A3XX_RB_MRT_CONTROL3 0x20D0 +#define A3XX_RB_MRT_BUF_INFO3 0x20D1 +#define A3XX_RB_MRT_BUF_BASE3 0x20D2 +#define A3XX_RB_MRT_BLEND_CONTROL3 0x20D3 +#define A3XX_RB_BLEND_RED 0x20E4 +#define A3XX_RB_BLEND_GREEN 0x20E5 +#define A3XX_RB_BLEND_BLUE 0x20E6 +#define A3XX_RB_BLEND_ALPHA 0x20E7 +#define A3XX_RB_CLEAR_COLOR_DW0 0x20E8 +#define A3XX_RB_CLEAR_COLOR_DW1 0x20E9 +#define A3XX_RB_CLEAR_COLOR_DW2 0x20EA +#define A3XX_RB_CLEAR_COLOR_DW3 0x20EB +#define A3XX_RB_COPY_CONTROL 0x20EC +#define A3XX_RB_COPY_DEST_BASE 0x20ED +#define A3XX_RB_COPY_DEST_PITCH 0x20EE +#define A3XX_RB_COPY_DEST_INFO 0x20EF +#define A3XX_RB_DEPTH_CONTROL 0x2100 +#define A3XX_RB_DEPTH_CLEAR 0x2101 +#define A3XX_RB_DEPTH_BUF_INFO 0x2102 +#define A3XX_RB_DEPTH_BUF_PITCH 0x2103 +#define A3XX_RB_STENCIL_CONTROL 0x2104 +#define A3XX_RB_STENCIL_CLEAR 0x2105 +#define A3XX_RB_STENCIL_BUF_INFO 0x2106 +#define A3XX_RB_STENCIL_BUF_PITCH 0x2107 +#define A3XX_RB_STENCIL_REF_MASK 0x2108 +#define A3XX_RB_STENCIL_REF_MASK_BF 0x2109 +#define A3XX_RB_LRZ_VSC_CONTROL 0x210C +#define A3XX_RB_WINDOW_OFFSET 0x210E +#define A3XX_RB_SAMPLE_COUNT_CONTROL 0x2110 +#define A3XX_RB_SAMPLE_COUNT_ADDR 0x2111 +#define A3XX_RB_Z_CLAMP_MIN 0x2114 +#define A3XX_RB_Z_CLAMP_MAX 0x2115 +#define A3XX_HLSQ_CONTROL_0_REG 0x2200 +#define A3XX_HLSQ_CONTROL_1_REG 0x2201 +#define A3XX_HLSQ_CONTROL_2_REG 0x2202 +#define A3XX_HLSQ_CONTROL_3_REG 0x2203 +#define A3XX_HLSQ_VS_CONTROL_REG 0x2204 +#define A3XX_HLSQ_FS_CONTROL_REG 0x2205 +#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x2206 +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x2207 +#define A3XX_HLSQ_CL_NDRANGE_0_REG 0x220A +#define A3XX_HLSQ_CL_NDRANGE_1_REG 0x220B +#define A3XX_HLSQ_CL_NDRANGE_2_REG 0x220C +#define A3XX_HLSQ_CL_NDRANGE_3_REG 0x220D +#define A3XX_HLSQ_CL_NDRANGE_4_REG 0x220E +#define A3XX_HLSQ_CL_NDRANGE_5_REG 0x220F +#define A3XX_HLSQ_CL_NDRANGE_6_REG 0x2210 +#define A3XX_HLSQ_CL_CONTROL_0_REG 0x2211 +#define A3XX_HLSQ_CL_CONTROL_1_REG 0x2212 +#define A3XX_HLSQ_CL_KERNEL_CONST_REG 0x2214 +#define A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x2215 +#define A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG 0x2216 +#define A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x2217 +#define A3XX_HLSQ_CL_WG_OFFSET_REG 0x221A +#define A3XX_VFD_FETCH_INSTR_1_0 0x2247 +#define A3XX_VFD_FETCH_INSTR_1_1 0x2249 +#define A3XX_VFD_FETCH_INSTR_1_2 0x224B +#define A3XX_VFD_FETCH_INSTR_1_3 0x224D +#define A3XX_VFD_FETCH_INSTR_1_4 0x224F +#define A3XX_VFD_FETCH_INSTR_1_5 0x2251 +#define A3XX_VFD_FETCH_INSTR_1_6 0x2253 +#define A3XX_VFD_FETCH_INSTR_1_7 0x2255 +#define A3XX_VFD_FETCH_INSTR_1_8 0x2257 +#define A3XX_VFD_FETCH_INSTR_1_9 0x2259 +#define A3XX_VFD_FETCH_INSTR_1_A 0x225B +#define A3XX_VFD_FETCH_INSTR_1_B 0x225D +#define A3XX_VFD_FETCH_INSTR_1_C 0x225F +#define A3XX_VFD_FETCH_INSTR_1_D 0x2261 +#define A3XX_VFD_FETCH_INSTR_1_E 0x2263 +#define A3XX_VFD_FETCH_INSTR_1_F 0x2265 +#define A3XX_SP_SP_CTRL_REG 0x22C0 +#define A3XX_SP_VS_CTRL_REG0 0x22C4 +#define A3XX_SP_VS_CTRL_REG1 0x22C5 +#define A3XX_SP_VS_PARAM_REG 0x22C6 +#define A3XX_SP_VS_OUT_REG_0 0x22C7 +#define A3XX_SP_VS_OUT_REG_1 0x22C8 +#define A3XX_SP_VS_OUT_REG_2 0x22C9 +#define A3XX_SP_VS_OUT_REG_3 0x22CA +#define A3XX_SP_VS_OUT_REG_4 0x22CB +#define A3XX_SP_VS_OUT_REG_5 0x22CC +#define A3XX_SP_VS_OUT_REG_6 0x22CD +#define A3XX_SP_VS_OUT_REG_7 0x22CE +#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0 +#define A3XX_SP_VS_VPC_DST_REG_1 0x22D1 +#define A3XX_SP_VS_VPC_DST_REG_2 0x22D2 +#define A3XX_SP_VS_VPC_DST_REG_3 0x22D3 +#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4 +#define A3XX_SP_VS_OBJ_START_REG 0x22D5 +#define A3XX_SP_VS_PVT_MEM_PARAM_REG 0x22D6 +#define A3XX_SP_VS_PVT_MEM_ADDR_REG 0x22D7 +#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8 +#define A3XX_SP_VS_LENGTH_REG 0x22DF +#define A3XX_SP_FS_CTRL_REG0 0x22E0 +#define A3XX_SP_FS_CTRL_REG1 0x22E1 +#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2 +#define A3XX_SP_FS_OBJ_START_REG 0x22E3 +#define A3XX_SP_FS_PVT_MEM_PARAM_REG 0x22E4 +#define A3XX_SP_FS_PVT_MEM_ADDR_REG 0x22E5 +#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6 +#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8 +#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x22E9 +#define A3XX_SP_FS_OUTPUT_REG 0x22EC +#define A3XX_SP_FS_MRT_REG_0 0x22F0 +#define A3XX_SP_FS_MRT_REG_1 0x22F1 +#define A3XX_SP_FS_MRT_REG_2 0x22F2 +#define A3XX_SP_FS_MRT_REG_3 0x22F3 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_0 0x22F4 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_1 0x22F5 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_2 0x22F6 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_3 0x22F7 +#define A3XX_SP_FS_LENGTH_REG 0x22FF +#define A3XX_PA_SC_AA_CONFIG 0x2301 +#define A3XX_VBIF_CLKON 0x3001 +#define A3XX_VBIF_ABIT_SORT 0x301C +#define A3XX_VBIF_ABIT_SORT_CONF 0x301D +#define A3XX_VBIF_GATE_OFF_WRREQ_EN 0x302A +#define A3XX_VBIF_IN_RD_LIM_CONF0 0x302C +#define A3XX_VBIF_IN_RD_LIM_CONF1 0x302D +#define A3XX_VBIF_IN_WR_LIM_CONF0 0x3030 +#define A3XX_VBIF_IN_WR_LIM_CONF1 0x3031 +#define A3XX_VBIF_OUT_RD_LIM_CONF0 0x3034 +#define A3XX_VBIF_OUT_WR_LIM_CONF0 0x3035 +#define A3XX_VBIF_DDR_OUT_MAX_BURST 0x3036 +#define A3XX_VBIF_ARB_CTL 0x303C +#define A3XX_VBIF_ROUND_ROBIN_QOS_ARB 0x3049 +#define A3XX_VBIF_OUT_AXI_AOOO_EN 0x305E +#define A3XX_VBIF_OUT_AXI_AOOO 0x305F +#define A3XX_VBIF_PERF_CNT0_LO 0x3073 +#define A3XX_VBIF_PERF_CNT0_HI 0x3074 +#define A3XX_VBIF_PERF_CNT1_LO 0x3075 +#define A3XX_VBIF_PERF_CNT1_HI 0x3076 +#define A3XX_VBIF_PERF_PWR_CNT0_LO 0x3077 +#define A3XX_VBIF_PERF_PWR_CNT0_HI 0x3078 +#define A3XX_VBIF_PERF_PWR_CNT1_LO 0x3079 +#define A3XX_VBIF_PERF_PWR_CNT1_HI 0x307a +#define A3XX_VBIF_PERF_PWR_CNT2_LO 0x307b +#define A3XX_VBIF_PERF_PWR_CNT2_HI 0x307c + +#define A3XX_VBIF_XIN_HALT_CTRL0 0x3080 +#define A3XX_VBIF_XIN_HALT_CTRL0_MASK 0x3F +#define A30X_VBIF_XIN_HALT_CTRL0_MASK 0x7 + +#define A3XX_VBIF_XIN_HALT_CTRL1 0x3081 + +/* VBIF register offsets for A306 */ +#define A3XX_VBIF2_PERF_CNT_SEL0 0x30d0 +#define A3XX_VBIF2_PERF_CNT_SEL1 0x30d1 +#define A3XX_VBIF2_PERF_CNT_SEL2 0x30d2 +#define A3XX_VBIF2_PERF_CNT_SEL3 0x30d3 +#define A3XX_VBIF2_PERF_CNT_LOW0 0x30d8 +#define A3XX_VBIF2_PERF_CNT_LOW1 0x30d9 +#define A3XX_VBIF2_PERF_CNT_LOW2 0x30da +#define A3XX_VBIF2_PERF_CNT_LOW3 0x30db +#define A3XX_VBIF2_PERF_CNT_HIGH0 0x30e0 +#define A3XX_VBIF2_PERF_CNT_HIGH1 0x30e1 +#define A3XX_VBIF2_PERF_CNT_HIGH2 0x30e2 +#define A3XX_VBIF2_PERF_CNT_HIGH3 0x30e3 + +#define A3XX_VBIF2_PERF_PWR_CNT_EN0 0x3100 +#define A3XX_VBIF2_PERF_PWR_CNT_EN1 0x3101 +#define A3XX_VBIF2_PERF_PWR_CNT_EN2 0x3102 +#define A3XX_VBIF2_PERF_PWR_CNT_LOW0 0x3110 +#define A3XX_VBIF2_PERF_PWR_CNT_LOW1 0x3111 +#define A3XX_VBIF2_PERF_PWR_CNT_LOW2 0x3112 +#define A3XX_VBIF2_PERF_PWR_CNT_HIGH0 0x3118 +#define A3XX_VBIF2_PERF_PWR_CNT_HIGH1 0x3119 +#define A3XX_VBIF2_PERF_PWR_CNT_HIGH2 0x311a + +#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0 0x3800 +#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1 0x3801 + +/* RBBM Debug bus block IDs */ +#define RBBM_BLOCK_ID_CP 0x1 +#define RBBM_BLOCK_ID_RBBM 0x2 +#define RBBM_BLOCK_ID_VBIF 0x3 +#define RBBM_BLOCK_ID_HLSQ 0x4 +#define RBBM_BLOCK_ID_UCHE 0x5 +#define RBBM_BLOCK_ID_PC 0x8 +#define RBBM_BLOCK_ID_VFD 0x9 +#define RBBM_BLOCK_ID_VPC 0xa +#define RBBM_BLOCK_ID_TSE 0xb +#define RBBM_BLOCK_ID_RAS 0xc +#define RBBM_BLOCK_ID_VSC 0xd +#define RBBM_BLOCK_ID_SP_0 0x10 +#define RBBM_BLOCK_ID_SP_1 0x11 +#define RBBM_BLOCK_ID_SP_2 0x12 +#define RBBM_BLOCK_ID_SP_3 0x13 +#define RBBM_BLOCK_ID_TPL1_0 0x18 +#define RBBM_BLOCK_ID_TPL1_1 0x19 +#define RBBM_BLOCK_ID_TPL1_2 0x1a +#define RBBM_BLOCK_ID_TPL1_3 0x1b +#define RBBM_BLOCK_ID_RB_0 0x20 +#define RBBM_BLOCK_ID_RB_1 0x21 +#define RBBM_BLOCK_ID_RB_2 0x22 +#define RBBM_BLOCK_ID_RB_3 0x23 +#define RBBM_BLOCK_ID_MARB_0 0x28 +#define RBBM_BLOCK_ID_MARB_1 0x29 +#define RBBM_BLOCK_ID_MARB_2 0x2a +#define RBBM_BLOCK_ID_MARB_3 0x2b + +/* RBBM_CLOCK_CTL default value */ +#define A3XX_RBBM_CLOCK_CTL_DEFAULT 0xAAAAAAAA +#define A320_RBBM_CLOCK_CTL_DEFAULT 0xBFFFFFFF +#define A330_RBBM_CLOCK_CTL_DEFAULT 0xBFFCFFFF + +#define A330_RBBM_GPR0_CTL_DEFAULT 0x00000000 +#define A330v2_RBBM_GPR0_CTL_DEFAULT 0x05515455 +#define A310_RBBM_GPR0_CTL_DEFAULT 0x000000AA + +/* COUNTABLE FOR SP PERFCOUNTER */ +#define SP_ALU_ACTIVE_CYCLES 0x1D +#define SP0_ICL1_MISSES 0x1A +#define SP_FS_CFLOW_INSTRUCTIONS 0x0C + +/* COUNTABLE FOR TSE PERFCOUNTER */ +#define TSE_INPUT_PRIM_NUM 0x0 + +/* VBIF countables */ +#define VBIF_AXI_TOTAL_BEATS 85 + +/* VBIF Recoverable HALT bit value */ +#define VBIF_RECOVERABLE_HALT_CTRL 0x1 + +/* + * CP DEBUG settings for A3XX core: + * DYNAMIC_CLK_DISABLE [27] - turn off the dynamic clock control + * MIU_128BIT_WRITE_ENABLE [25] - Allow 128 bit writes to the VBIF + */ +#define A3XX_CP_DEBUG_DEFAULT ((1 << 27) | (1 << 25)) + + +#endif diff --git a/a5xx_reg.h b/a5xx_reg.h new file mode 100644 index 0000000000..137a11c3d9 --- /dev/null +++ b/a5xx_reg.h @@ -0,0 +1,902 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2014-2016,2019, The Linux Foundation. All rights reserved. + */ + +#ifndef _A5XX_REG_H +#define _A5XX_REG_H + +/* A5XX interrupt bits */ +#define A5XX_INT_RBBM_GPU_IDLE 0 +#define A5XX_INT_RBBM_AHB_ERROR 1 +#define A5XX_INT_RBBM_TRANSFER_TIMEOUT 2 +#define A5XX_INT_RBBM_ME_MS_TIMEOUT 3 +#define A5XX_INT_RBBM_PFP_MS_TIMEOUT 4 +#define A5XX_INT_RBBM_ETS_MS_TIMEOUT 5 +#define A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW 6 +#define A5XX_INT_RBBM_GPC_ERROR 7 +#define A5XX_INT_CP_SW 8 +#define A5XX_INT_CP_HW_ERROR 9 +#define A5XX_INT_CP_CCU_FLUSH_DEPTH_TS 10 +#define A5XX_INT_CP_CCU_FLUSH_COLOR_TS 11 +#define A5XX_INT_CP_CCU_RESOLVE_TS 12 +#define A5XX_INT_CP_IB2 13 +#define A5XX_INT_CP_IB1 14 +#define A5XX_INT_CP_RB 15 +#define A5XX_INT_CP_UNUSED_1 16 +#define A5XX_INT_CP_RB_DONE_TS 17 +#define A5XX_INT_CP_WT_DONE_TS 18 +#define A5XX_INT_UNKNOWN_1 19 +#define A5XX_INT_CP_CACHE_FLUSH_TS 20 +#define A5XX_INT_UNUSED_2 21 +#define A5XX_INT_RBBM_ATB_BUS_OVERFLOW 22 +#define A5XX_INT_MISC_HANG_DETECT 23 +#define A5XX_INT_UCHE_OOB_ACCESS 24 +#define A5XX_INT_UCHE_TRAP_INTR 25 +#define A5XX_INT_DEBBUS_INTR_0 26 +#define A5XX_INT_DEBBUS_INTR_1 27 +#define A5XX_INT_GPMU_VOLTAGE_DROOP 28 +#define A5XX_INT_GPMU_FIRMWARE 29 +#define A5XX_INT_ISDB_CPU_IRQ 30 +#define A5XX_INT_ISDB_UNDER_DEBUG 31 + +/* CP Interrupt bits */ +#define A5XX_CP_OPCODE_ERROR 0 +#define A5XX_CP_RESERVED_BIT_ERROR 1 +#define A5XX_CP_HW_FAULT_ERROR 2 +#define A5XX_CP_DMA_ERROR 3 +#define A5XX_CP_REGISTER_PROTECTION_ERROR 4 +#define A5XX_CP_AHB_ERROR 5 + +/* CP registers */ +#define A5XX_CP_RB_BASE 0x800 +#define A5XX_CP_RB_BASE_HI 0x801 +#define A5XX_CP_RB_CNTL 0x802 +#define A5XX_CP_RB_RPTR_ADDR_LO 0x804 +#define A5XX_CP_RB_RPTR_ADDR_HI 0x805 +#define A5XX_CP_RB_RPTR 0x806 +#define A5XX_CP_RB_WPTR 0x807 +#define A5XX_CP_PFP_STAT_ADDR 0x808 +#define A5XX_CP_PFP_STAT_DATA 0x809 +#define A5XX_CP_DRAW_STATE_ADDR 0x80B +#define A5XX_CP_DRAW_STATE_DATA 0x80C +#define A5XX_CP_CRASH_SCRIPT_BASE_LO 0x817 +#define A5XX_CP_CRASH_SCRIPT_BASE_HI 0x818 +#define A5XX_CP_CRASH_DUMP_CNTL 0x819 +#define A5XX_CP_ME_STAT_ADDR 0x81A +#define A5XX_CP_ROQ_THRESHOLDS_1 0x81F +#define A5XX_CP_ROQ_THRESHOLDS_2 0x820 +#define A5XX_CP_ROQ_DBG_ADDR 0x821 +#define A5XX_CP_ROQ_DBG_DATA 0x822 +#define A5XX_CP_MEQ_DBG_ADDR 0x823 +#define A5XX_CP_MEQ_DBG_DATA 0x824 +#define A5XX_CP_MEQ_THRESHOLDS 0x825 +#define A5XX_CP_MERCIU_SIZE 0x826 +#define A5XX_CP_MERCIU_DBG_ADDR 0x827 +#define A5XX_CP_MERCIU_DBG_DATA_1 0x828 +#define A5XX_CP_MERCIU_DBG_DATA_2 0x829 +#define A5XX_CP_PFP_UCODE_DBG_ADDR 0x82A +#define A5XX_CP_PFP_UCODE_DBG_DATA 0x82B +#define A5XX_CP_ME_UCODE_DBG_ADDR 0x82F +#define A5XX_CP_ME_UCODE_DBG_DATA 0x830 +#define A5XX_CP_CNTL 0x831 +#define A5XX_CP_ME_CNTL 0x832 +#define A5XX_CP_CHICKEN_DBG 0x833 +#define A5XX_CP_PFP_INSTR_BASE_LO 0x835 +#define A5XX_CP_PFP_INSTR_BASE_HI 0x836 +#define A5XX_CP_PM4_INSTR_BASE_LO 0x838 +#define A5XX_CP_PM4_INSTR_BASE_HI 0x839 +#define A5XX_CP_CONTEXT_SWITCH_CNTL 0x83B +#define A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO 0x83C +#define A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI 0x83D +#define A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO 0x83E +#define A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI 0x83F +#define A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x840 +#define A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x841 +#define A5XX_CP_ADDR_MODE_CNTL 0x860 +#define A5XX_CP_ME_STAT_DATA 0xB14 +#define A5XX_CP_WFI_PEND_CTR 0xB15 +#define A5XX_CP_INTERRUPT_STATUS 0xB18 +#define A5XX_CP_HW_FAULT 0xB1A +#define A5XX_CP_PROTECT_STATUS 0xB1C +#define A5XX_CP_IB1_BASE 0xB1F +#define A5XX_CP_IB1_BASE_HI 0xB20 +#define A5XX_CP_IB1_BUFSZ 0xB21 +#define A5XX_CP_IB2_BASE 0xB22 +#define A5XX_CP_IB2_BASE_HI 0xB23 +#define A5XX_CP_IB2_BUFSZ 0xB24 +#define A5XX_CP_PROTECT_REG_0 0x880 +#define A5XX_CP_PROTECT_CNTL 0x8A0 +#define A5XX_CP_AHB_FAULT 0xB1B +#define A5XX_CP_PERFCTR_CP_SEL_0 0xBB0 +#define A5XX_CP_PERFCTR_CP_SEL_1 0xBB1 +#define A5XX_CP_PERFCTR_CP_SEL_2 0xBB2 +#define A5XX_CP_PERFCTR_CP_SEL_3 0xBB3 +#define A5XX_CP_PERFCTR_CP_SEL_4 0xBB4 +#define A5XX_CP_PERFCTR_CP_SEL_5 0xBB5 +#define A5XX_CP_PERFCTR_CP_SEL_6 0xBB6 +#define A5XX_CP_PERFCTR_CP_SEL_7 0xBB7 + +#define A5XX_VSC_ADDR_MODE_CNTL 0xBC1 + +/* CP Power Counter Registers Select */ +#define A5XX_CP_POWERCTR_CP_SEL_0 0xBBA +#define A5XX_CP_POWERCTR_CP_SEL_1 0xBBB +#define A5XX_CP_POWERCTR_CP_SEL_2 0xBBC +#define A5XX_CP_POWERCTR_CP_SEL_3 0xBBD + +/* RBBM registers */ +#define A5XX_RBBM_CFG_DBGBUS_SEL_A 0x4 +#define A5XX_RBBM_CFG_DBGBUS_SEL_B 0x5 +#define A5XX_RBBM_CFG_DBGBUS_SEL_C 0x6 +#define A5XX_RBBM_CFG_DBGBUS_SEL_D 0x7 +#define A5XX_RBBM_CFG_DBGBUS_SEL_PING_INDEX_SHIFT 0x0 +#define A5XX_RBBM_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT 0x8 + +#define A5XX_RBBM_CFG_DBGBUS_CNTLT 0x8 +#define A5XX_RBBM_CFG_DBGBUS_CNTLM 0x9 +#define A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT 0x18 +#define A5XX_RBBM_CFG_DBGBUS_OPL 0xA +#define A5XX_RBBM_CFG_DBGBUS_OPE 0xB +#define A5XX_RBBM_CFG_DBGBUS_IVTL_0 0xC +#define A5XX_RBBM_CFG_DBGBUS_IVTL_1 0xD +#define A5XX_RBBM_CFG_DBGBUS_IVTL_2 0xE +#define A5XX_RBBM_CFG_DBGBUS_IVTL_3 0xF +#define A5XX_RBBM_CFG_DBGBUS_MASKL_0 0x10 +#define A5XX_RBBM_CFG_DBGBUS_MASKL_1 0x11 +#define A5XX_RBBM_CFG_DBGBUS_MASKL_2 0x12 +#define A5XX_RBBM_CFG_DBGBUS_MASKL_3 0x13 +#define A5XX_RBBM_CFG_DBGBUS_BYTEL_0 0x14 +#define A5XX_RBBM_CFG_DBGBUS_BYTEL_1 0x15 +#define A5XX_RBBM_CFG_DBGBUS_IVTE_0 0x16 +#define A5XX_RBBM_CFG_DBGBUS_IVTE_1 0x17 +#define A5XX_RBBM_CFG_DBGBUS_IVTE_2 0x18 +#define A5XX_RBBM_CFG_DBGBUS_IVTE_3 0x19 +#define A5XX_RBBM_CFG_DBGBUS_MASKE_0 0x1A +#define A5XX_RBBM_CFG_DBGBUS_MASKE_1 0x1B +#define A5XX_RBBM_CFG_DBGBUS_MASKE_2 0x1C +#define A5XX_RBBM_CFG_DBGBUS_MASKE_3 0x1D +#define A5XX_RBBM_CFG_DBGBUS_NIBBLEE 0x1E +#define A5XX_RBBM_CFG_DBGBUS_PTRC0 0x1F +#define A5XX_RBBM_CFG_DBGBUS_PTRC1 0x20 +#define A5XX_RBBM_CFG_DBGBUS_LOADREG 0x21 +#define A5XX_RBBM_CFG_DBGBUS_IDX 0x22 +#define A5XX_RBBM_CFG_DBGBUS_CLRC 0x23 +#define A5XX_RBBM_CFG_DBGBUS_LOADIVT 0x24 +#define A5XX_RBBM_INTERFACE_HANG_INT_CNTL 0x2F +#define A5XX_RBBM_INT_CLEAR_CMD 0x37 +#define A5XX_RBBM_INT_0_MASK 0x38 +#define A5XX_RBBM_AHB_DBG_CNTL 0x3F +#define A5XX_RBBM_EXT_VBIF_DBG_CNTL 0x41 +#define A5XX_RBBM_SW_RESET_CMD 0x43 +#define A5XX_RBBM_BLOCK_SW_RESET_CMD 0x45 +#define A5XX_RBBM_BLOCK_SW_RESET_CMD2 0x46 +#define A5XX_RBBM_DBG_LO_HI_GPIO 0x48 +#define A5XX_RBBM_EXT_TRACE_BUS_CNTL 0x49 +#define A5XX_RBBM_CLOCK_CNTL_TP0 0x4A +#define A5XX_RBBM_CLOCK_CNTL_TP1 0x4B +#define A5XX_RBBM_CLOCK_CNTL_TP2 0x4C +#define A5XX_RBBM_CLOCK_CNTL_TP3 0x4D +#define A5XX_RBBM_CLOCK_CNTL2_TP0 0x4E +#define A5XX_RBBM_CLOCK_CNTL2_TP1 0x4F +#define A5XX_RBBM_CLOCK_CNTL2_TP2 0x50 +#define A5XX_RBBM_CLOCK_CNTL2_TP3 0x51 +#define A5XX_RBBM_CLOCK_CNTL3_TP0 0x52 +#define A5XX_RBBM_CLOCK_CNTL3_TP1 0x53 +#define A5XX_RBBM_CLOCK_CNTL3_TP2 0x54 +#define A5XX_RBBM_CLOCK_CNTL3_TP3 0x55 +#define A5XX_RBBM_READ_AHB_THROUGH_DBG 0x59 +#define A5XX_RBBM_CLOCK_CNTL_UCHE 0x5A +#define A5XX_RBBM_CLOCK_CNTL2_UCHE 0x5B +#define A5XX_RBBM_CLOCK_CNTL3_UCHE 0x5C +#define A5XX_RBBM_CLOCK_CNTL4_UCHE 0x5D +#define A5XX_RBBM_CLOCK_HYST_UCHE 0x5E +#define A5XX_RBBM_CLOCK_DELAY_UCHE 0x5F +#define A5XX_RBBM_CLOCK_MODE_GPC 0x60 +#define A5XX_RBBM_CLOCK_DELAY_GPC 0x61 +#define A5XX_RBBM_CLOCK_HYST_GPC 0x62 +#define A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x63 +#define A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x64 +#define A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x65 +#define A5XX_RBBM_CLOCK_DELAY_HLSQ 0x66 +#define A5XX_RBBM_CLOCK_CNTL 0x67 +#define A5XX_RBBM_CLOCK_CNTL_SP0 0x68 +#define A5XX_RBBM_CLOCK_CNTL_SP1 0x69 +#define A5XX_RBBM_CLOCK_CNTL_SP2 0x6A +#define A5XX_RBBM_CLOCK_CNTL_SP3 0x6B +#define A5XX_RBBM_CLOCK_CNTL2_SP0 0x6C +#define A5XX_RBBM_CLOCK_CNTL2_SP1 0x6D +#define A5XX_RBBM_CLOCK_CNTL2_SP2 0x6E +#define A5XX_RBBM_CLOCK_CNTL2_SP3 0x6F +#define A5XX_RBBM_CLOCK_HYST_SP0 0x70 +#define A5XX_RBBM_CLOCK_HYST_SP1 0x71 +#define A5XX_RBBM_CLOCK_HYST_SP2 0x72 +#define A5XX_RBBM_CLOCK_HYST_SP3 0x73 +#define A5XX_RBBM_CLOCK_DELAY_SP0 0x74 +#define A5XX_RBBM_CLOCK_DELAY_SP1 0x75 +#define A5XX_RBBM_CLOCK_DELAY_SP2 0x76 +#define A5XX_RBBM_CLOCK_DELAY_SP3 0x77 +#define A5XX_RBBM_CLOCK_CNTL_RB0 0x78 +#define A5XX_RBBM_CLOCK_CNTL_RB1 0x79 +#define A5XX_RBBM_CLOCK_CNTL_RB2 0x7a +#define A5XX_RBBM_CLOCK_CNTL_RB3 0x7B +#define A5XX_RBBM_CLOCK_CNTL2_RB0 0x7C +#define A5XX_RBBM_CLOCK_CNTL2_RB1 0x7D +#define A5XX_RBBM_CLOCK_CNTL2_RB2 0x7E +#define A5XX_RBBM_CLOCK_CNTL2_RB3 0x7F +#define A5XX_RBBM_CLOCK_HYST_RAC 0x80 +#define A5XX_RBBM_CLOCK_DELAY_RAC 0x81 +#define A5XX_RBBM_CLOCK_CNTL_CCU0 0x82 +#define A5XX_RBBM_CLOCK_CNTL_CCU1 0x83 +#define A5XX_RBBM_CLOCK_CNTL_CCU2 0x84 +#define A5XX_RBBM_CLOCK_CNTL_CCU3 0x85 +#define A5XX_RBBM_CLOCK_HYST_RB_CCU0 0x86 +#define A5XX_RBBM_CLOCK_HYST_RB_CCU1 0x87 +#define A5XX_RBBM_CLOCK_HYST_RB_CCU2 0x88 +#define A5XX_RBBM_CLOCK_HYST_RB_CCU3 0x89 +#define A5XX_RBBM_CLOCK_CNTL_RAC 0x8A +#define A5XX_RBBM_CLOCK_CNTL2_RAC 0x8B +#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0 0x8C +#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1 0x8D +#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2 0x8E +#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3 0x8F +#define A5XX_RBBM_CLOCK_HYST_VFD 0x90 +#define A5XX_RBBM_CLOCK_MODE_VFD 0x91 +#define A5XX_RBBM_CLOCK_DELAY_VFD 0x92 +#define A5XX_RBBM_AHB_CNTL0 0x93 +#define A5XX_RBBM_AHB_CNTL1 0x94 +#define A5XX_RBBM_AHB_CNTL2 0x95 +#define A5XX_RBBM_AHB_CMD 0x96 +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11 0x9C +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12 0x9D +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13 0x9E +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14 0x9F +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15 0xA0 +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16 0xA1 +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17 0xA2 +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18 0xA3 +#define A5XX_RBBM_CLOCK_DELAY_TP0 0xA4 +#define A5XX_RBBM_CLOCK_DELAY_TP1 0xA5 +#define A5XX_RBBM_CLOCK_DELAY_TP2 0xA6 +#define A5XX_RBBM_CLOCK_DELAY_TP3 0xA7 +#define A5XX_RBBM_CLOCK_DELAY2_TP0 0xA8 +#define A5XX_RBBM_CLOCK_DELAY2_TP1 0xA9 +#define A5XX_RBBM_CLOCK_DELAY2_TP2 0xAA +#define A5XX_RBBM_CLOCK_DELAY2_TP3 0xAB +#define A5XX_RBBM_CLOCK_DELAY3_TP0 0xAC +#define A5XX_RBBM_CLOCK_DELAY3_TP1 0xAD +#define A5XX_RBBM_CLOCK_DELAY3_TP2 0xAE +#define A5XX_RBBM_CLOCK_DELAY3_TP3 0xAF +#define A5XX_RBBM_CLOCK_HYST_TP0 0xB0 +#define A5XX_RBBM_CLOCK_HYST_TP1 0xB1 +#define A5XX_RBBM_CLOCK_HYST_TP2 0xB2 +#define A5XX_RBBM_CLOCK_HYST_TP3 0xB3 +#define A5XX_RBBM_CLOCK_HYST2_TP0 0xB4 +#define A5XX_RBBM_CLOCK_HYST2_TP1 0xB5 +#define A5XX_RBBM_CLOCK_HYST2_TP2 0xB6 +#define A5XX_RBBM_CLOCK_HYST2_TP3 0xB7 +#define A5XX_RBBM_CLOCK_HYST3_TP0 0xB8 +#define A5XX_RBBM_CLOCK_HYST3_TP1 0xB9 +#define A5XX_RBBM_CLOCK_HYST3_TP2 0xBA +#define A5XX_RBBM_CLOCK_HYST3_TP3 0xBB +#define A5XX_RBBM_CLOCK_CNTL_GPMU 0xC8 +#define A5XX_RBBM_CLOCK_DELAY_GPMU 0xC9 +#define A5XX_RBBM_CLOCK_HYST_GPMU 0xCA +#define A5XX_RBBM_PERFCTR_CP_0_LO 0x3A0 +#define A5XX_RBBM_PERFCTR_CP_0_HI 0x3A1 +#define A5XX_RBBM_PERFCTR_CP_1_LO 0x3A2 +#define A5XX_RBBM_PERFCTR_CP_1_HI 0x3A3 +#define A5XX_RBBM_PERFCTR_CP_2_LO 0x3A4 +#define A5XX_RBBM_PERFCTR_CP_2_HI 0x3A5 +#define A5XX_RBBM_PERFCTR_CP_3_LO 0x3A6 +#define A5XX_RBBM_PERFCTR_CP_3_HI 0x3A7 +#define A5XX_RBBM_PERFCTR_CP_4_LO 0x3A8 +#define A5XX_RBBM_PERFCTR_CP_4_HI 0x3A9 +#define A5XX_RBBM_PERFCTR_CP_5_LO 0x3AA +#define A5XX_RBBM_PERFCTR_CP_5_HI 0x3AB +#define A5XX_RBBM_PERFCTR_CP_6_LO 0x3AC +#define A5XX_RBBM_PERFCTR_CP_6_HI 0x3AD +#define A5XX_RBBM_PERFCTR_CP_7_LO 0x3AE +#define A5XX_RBBM_PERFCTR_CP_7_HI 0x3AF +#define A5XX_RBBM_PERFCTR_RBBM_0_LO 0x3B0 +#define A5XX_RBBM_PERFCTR_RBBM_0_HI 0x3B1 +#define A5XX_RBBM_PERFCTR_RBBM_1_LO 0x3B2 +#define A5XX_RBBM_PERFCTR_RBBM_1_HI 0x3B3 +#define A5XX_RBBM_PERFCTR_RBBM_2_LO 0x3B4 +#define A5XX_RBBM_PERFCTR_RBBM_2_HI 0x3B5 +#define A5XX_RBBM_PERFCTR_RBBM_3_LO 0x3B6 +#define A5XX_RBBM_PERFCTR_RBBM_3_HI 0x3B7 +#define A5XX_RBBM_PERFCTR_PC_0_LO 0x3B8 +#define A5XX_RBBM_PERFCTR_PC_0_HI 0x3B9 +#define A5XX_RBBM_PERFCTR_PC_1_LO 0x3BA +#define A5XX_RBBM_PERFCTR_PC_1_HI 0x3BB +#define A5XX_RBBM_PERFCTR_PC_2_LO 0x3BC +#define A5XX_RBBM_PERFCTR_PC_2_HI 0x3BD +#define A5XX_RBBM_PERFCTR_PC_3_LO 0x3BE +#define A5XX_RBBM_PERFCTR_PC_3_HI 0x3BF +#define A5XX_RBBM_PERFCTR_PC_4_LO 0x3C0 +#define A5XX_RBBM_PERFCTR_PC_4_HI 0x3C1 +#define A5XX_RBBM_PERFCTR_PC_5_LO 0x3C2 +#define A5XX_RBBM_PERFCTR_PC_5_HI 0x3C3 +#define A5XX_RBBM_PERFCTR_PC_6_LO 0x3C4 +#define A5XX_RBBM_PERFCTR_PC_6_HI 0x3C5 +#define A5XX_RBBM_PERFCTR_PC_7_LO 0x3C6 +#define A5XX_RBBM_PERFCTR_PC_7_HI 0x3C7 +#define A5XX_RBBM_PERFCTR_VFD_0_LO 0x3C8 +#define A5XX_RBBM_PERFCTR_VFD_0_HI 0x3C9 +#define A5XX_RBBM_PERFCTR_VFD_1_LO 0x3CA +#define A5XX_RBBM_PERFCTR_VFD_1_HI 0x3CB +#define A5XX_RBBM_PERFCTR_VFD_2_LO 0x3CC +#define A5XX_RBBM_PERFCTR_VFD_2_HI 0x3CD +#define A5XX_RBBM_PERFCTR_VFD_3_LO 0x3CE +#define A5XX_RBBM_PERFCTR_VFD_3_HI 0x3CF +#define A5XX_RBBM_PERFCTR_VFD_4_LO 0x3D0 +#define A5XX_RBBM_PERFCTR_VFD_4_HI 0x3D1 +#define A5XX_RBBM_PERFCTR_VFD_5_LO 0x3D2 +#define A5XX_RBBM_PERFCTR_VFD_5_HI 0x3D3 +#define A5XX_RBBM_PERFCTR_VFD_6_LO 0x3D4 +#define A5XX_RBBM_PERFCTR_VFD_6_HI 0x3D5 +#define A5XX_RBBM_PERFCTR_VFD_7_LO 0x3D6 +#define A5XX_RBBM_PERFCTR_VFD_7_HI 0x3D7 +#define A5XX_RBBM_PERFCTR_HLSQ_0_LO 0x3D8 +#define A5XX_RBBM_PERFCTR_HLSQ_0_HI 0x3D9 +#define A5XX_RBBM_PERFCTR_HLSQ_1_LO 0x3DA +#define A5XX_RBBM_PERFCTR_HLSQ_1_HI 0x3DB +#define A5XX_RBBM_PERFCTR_HLSQ_2_LO 0x3DC +#define A5XX_RBBM_PERFCTR_HLSQ_2_HI 0x3DD +#define A5XX_RBBM_PERFCTR_HLSQ_3_LO 0x3DE +#define A5XX_RBBM_PERFCTR_HLSQ_3_HI 0x3DF +#define A5XX_RBBM_PERFCTR_HLSQ_4_LO 0x3E0 +#define A5XX_RBBM_PERFCTR_HLSQ_4_HI 0x3E1 +#define A5XX_RBBM_PERFCTR_HLSQ_5_LO 0x3E2 +#define A5XX_RBBM_PERFCTR_HLSQ_5_HI 0x3E3 +#define A5XX_RBBM_PERFCTR_HLSQ_6_LO 0x3E4 +#define A5XX_RBBM_PERFCTR_HLSQ_6_HI 0x3E5 +#define A5XX_RBBM_PERFCTR_HLSQ_7_LO 0x3E6 +#define A5XX_RBBM_PERFCTR_HLSQ_7_HI 0x3E7 +#define A5XX_RBBM_PERFCTR_VPC_0_LO 0x3E8 +#define A5XX_RBBM_PERFCTR_VPC_0_HI 0x3E9 +#define A5XX_RBBM_PERFCTR_VPC_1_LO 0x3EA +#define A5XX_RBBM_PERFCTR_VPC_1_HI 0x3EB +#define A5XX_RBBM_PERFCTR_VPC_2_LO 0x3EC +#define A5XX_RBBM_PERFCTR_VPC_2_HI 0x3ED +#define A5XX_RBBM_PERFCTR_VPC_3_LO 0x3EE +#define A5XX_RBBM_PERFCTR_VPC_3_HI 0x3EF +#define A5XX_RBBM_PERFCTR_CCU_0_LO 0x3F0 +#define A5XX_RBBM_PERFCTR_CCU_0_HI 0x3F1 +#define A5XX_RBBM_PERFCTR_CCU_1_LO 0x3F2 +#define A5XX_RBBM_PERFCTR_CCU_1_HI 0x3F3 +#define A5XX_RBBM_PERFCTR_CCU_2_LO 0x3F4 +#define A5XX_RBBM_PERFCTR_CCU_2_HI 0x3F5 +#define A5XX_RBBM_PERFCTR_CCU_3_LO 0x3F6 +#define A5XX_RBBM_PERFCTR_CCU_3_HI 0x3F7 +#define A5XX_RBBM_PERFCTR_TSE_0_LO 0x3F8 +#define A5XX_RBBM_PERFCTR_TSE_0_HI 0x3F9 +#define A5XX_RBBM_PERFCTR_TSE_1_LO 0x3FA +#define A5XX_RBBM_PERFCTR_TSE_1_HI 0x3FB +#define A5XX_RBBM_PERFCTR_TSE_2_LO 0x3FC +#define A5XX_RBBM_PERFCTR_TSE_2_HI 0x3FD +#define A5XX_RBBM_PERFCTR_TSE_3_LO 0x3FE +#define A5XX_RBBM_PERFCTR_TSE_3_HI 0x3FF +#define A5XX_RBBM_PERFCTR_RAS_0_LO 0x400 +#define A5XX_RBBM_PERFCTR_RAS_0_HI 0x401 +#define A5XX_RBBM_PERFCTR_RAS_1_LO 0x402 +#define A5XX_RBBM_PERFCTR_RAS_1_HI 0x403 +#define A5XX_RBBM_PERFCTR_RAS_2_LO 0x404 +#define A5XX_RBBM_PERFCTR_RAS_2_HI 0x405 +#define A5XX_RBBM_PERFCTR_RAS_3_LO 0x406 +#define A5XX_RBBM_PERFCTR_RAS_3_HI 0x407 +#define A5XX_RBBM_PERFCTR_UCHE_0_LO 0x408 +#define A5XX_RBBM_PERFCTR_UCHE_0_HI 0x409 +#define A5XX_RBBM_PERFCTR_UCHE_1_LO 0x40A +#define A5XX_RBBM_PERFCTR_UCHE_1_HI 0x40B +#define A5XX_RBBM_PERFCTR_UCHE_2_LO 0x40C +#define A5XX_RBBM_PERFCTR_UCHE_2_HI 0x40D +#define A5XX_RBBM_PERFCTR_UCHE_3_LO 0x40E +#define A5XX_RBBM_PERFCTR_UCHE_3_HI 0x40F +#define A5XX_RBBM_PERFCTR_UCHE_4_LO 0x410 +#define A5XX_RBBM_PERFCTR_UCHE_4_HI 0x411 +#define A5XX_RBBM_PERFCTR_UCHE_5_LO 0x412 +#define A5XX_RBBM_PERFCTR_UCHE_5_HI 0x413 +#define A5XX_RBBM_PERFCTR_UCHE_6_LO 0x414 +#define A5XX_RBBM_PERFCTR_UCHE_6_HI 0x415 +#define A5XX_RBBM_PERFCTR_UCHE_7_LO 0x416 +#define A5XX_RBBM_PERFCTR_UCHE_7_HI 0x417 +#define A5XX_RBBM_PERFCTR_TP_0_LO 0x418 +#define A5XX_RBBM_PERFCTR_TP_0_HI 0x419 +#define A5XX_RBBM_PERFCTR_TP_1_LO 0x41A +#define A5XX_RBBM_PERFCTR_TP_1_HI 0x41B +#define A5XX_RBBM_PERFCTR_TP_2_LO 0x41C +#define A5XX_RBBM_PERFCTR_TP_2_HI 0x41D +#define A5XX_RBBM_PERFCTR_TP_3_LO 0x41E +#define A5XX_RBBM_PERFCTR_TP_3_HI 0x41F +#define A5XX_RBBM_PERFCTR_TP_4_LO 0x420 +#define A5XX_RBBM_PERFCTR_TP_4_HI 0x421 +#define A5XX_RBBM_PERFCTR_TP_5_LO 0x422 +#define A5XX_RBBM_PERFCTR_TP_5_HI 0x423 +#define A5XX_RBBM_PERFCTR_TP_6_LO 0x424 +#define A5XX_RBBM_PERFCTR_TP_6_HI 0x425 +#define A5XX_RBBM_PERFCTR_TP_7_LO 0x426 +#define A5XX_RBBM_PERFCTR_TP_7_HI 0x427 +#define A5XX_RBBM_PERFCTR_SP_0_LO 0x428 +#define A5XX_RBBM_PERFCTR_SP_0_HI 0x429 +#define A5XX_RBBM_PERFCTR_SP_1_LO 0x42A +#define A5XX_RBBM_PERFCTR_SP_1_HI 0x42B +#define A5XX_RBBM_PERFCTR_SP_2_LO 0x42C +#define A5XX_RBBM_PERFCTR_SP_2_HI 0x42D +#define A5XX_RBBM_PERFCTR_SP_3_LO 0x42E +#define A5XX_RBBM_PERFCTR_SP_3_HI 0x42F +#define A5XX_RBBM_PERFCTR_SP_4_LO 0x430 +#define A5XX_RBBM_PERFCTR_SP_4_HI 0x431 +#define A5XX_RBBM_PERFCTR_SP_5_LO 0x432 +#define A5XX_RBBM_PERFCTR_SP_5_HI 0x433 +#define A5XX_RBBM_PERFCTR_SP_6_LO 0x434 +#define A5XX_RBBM_PERFCTR_SP_6_HI 0x435 +#define A5XX_RBBM_PERFCTR_SP_7_LO 0x436 +#define A5XX_RBBM_PERFCTR_SP_7_HI 0x437 +#define A5XX_RBBM_PERFCTR_SP_8_LO 0x438 +#define A5XX_RBBM_PERFCTR_SP_8_HI 0x439 +#define A5XX_RBBM_PERFCTR_SP_9_LO 0x43A +#define A5XX_RBBM_PERFCTR_SP_9_HI 0x43B +#define A5XX_RBBM_PERFCTR_SP_10_LO 0x43C +#define A5XX_RBBM_PERFCTR_SP_10_HI 0x43D +#define A5XX_RBBM_PERFCTR_SP_11_LO 0x43E +#define A5XX_RBBM_PERFCTR_SP_11_HI 0x43F +#define A5XX_RBBM_PERFCTR_RB_0_LO 0x440 +#define A5XX_RBBM_PERFCTR_RB_0_HI 0x441 +#define A5XX_RBBM_PERFCTR_RB_1_LO 0x442 +#define A5XX_RBBM_PERFCTR_RB_1_HI 0x443 +#define A5XX_RBBM_PERFCTR_RB_2_LO 0x444 +#define A5XX_RBBM_PERFCTR_RB_2_HI 0x445 +#define A5XX_RBBM_PERFCTR_RB_3_LO 0x446 +#define A5XX_RBBM_PERFCTR_RB_3_HI 0x447 +#define A5XX_RBBM_PERFCTR_RB_4_LO 0x448 +#define A5XX_RBBM_PERFCTR_RB_4_HI 0x449 +#define A5XX_RBBM_PERFCTR_RB_5_LO 0x44A +#define A5XX_RBBM_PERFCTR_RB_5_HI 0x44B +#define A5XX_RBBM_PERFCTR_RB_6_LO 0x44C +#define A5XX_RBBM_PERFCTR_RB_6_HI 0x44D +#define A5XX_RBBM_PERFCTR_RB_7_LO 0x44E +#define A5XX_RBBM_PERFCTR_RB_7_HI 0x44F +#define A5XX_RBBM_PERFCTR_VSC_0_LO 0x450 +#define A5XX_RBBM_PERFCTR_VSC_0_HI 0x451 +#define A5XX_RBBM_PERFCTR_VSC_1_LO 0x452 +#define A5XX_RBBM_PERFCTR_VSC_1_HI 0x453 +#define A5XX_RBBM_PERFCTR_LRZ_0_LO 0x454 +#define A5XX_RBBM_PERFCTR_LRZ_0_HI 0x455 +#define A5XX_RBBM_PERFCTR_LRZ_1_LO 0x456 +#define A5XX_RBBM_PERFCTR_LRZ_1_HI 0x457 +#define A5XX_RBBM_PERFCTR_LRZ_2_LO 0x458 +#define A5XX_RBBM_PERFCTR_LRZ_2_HI 0x459 +#define A5XX_RBBM_PERFCTR_LRZ_3_LO 0x45A +#define A5XX_RBBM_PERFCTR_LRZ_3_HI 0x45B +#define A5XX_RBBM_PERFCTR_CMP_0_LO 0x45C +#define A5XX_RBBM_PERFCTR_CMP_0_HI 0x45D +#define A5XX_RBBM_PERFCTR_CMP_1_LO 0x45E +#define A5XX_RBBM_PERFCTR_CMP_1_HI 0x45F +#define A5XX_RBBM_PERFCTR_CMP_2_LO 0x460 +#define A5XX_RBBM_PERFCTR_CMP_2_HI 0x461 +#define A5XX_RBBM_PERFCTR_CMP_3_LO 0x462 +#define A5XX_RBBM_PERFCTR_CMP_3_HI 0x463 +#define A5XX_RBBM_PERFCTR_RBBM_SEL_0 0x46B +#define A5XX_RBBM_PERFCTR_RBBM_SEL_1 0x46C +#define A5XX_RBBM_PERFCTR_RBBM_SEL_2 0x46D +#define A5XX_RBBM_PERFCTR_RBBM_SEL_3 0x46E +#define A5XX_RBBM_ALWAYSON_COUNTER_LO 0x4D2 +#define A5XX_RBBM_ALWAYSON_COUNTER_HI 0x4D3 +#define A5XX_RBBM_STATUS 0x4F5 +#define A5XX_RBBM_STATUS3 0x530 +#define A5XX_RBBM_INT_0_STATUS 0x4E1 +#define A5XX_RBBM_AHB_ME_SPLIT_STATUS 0x4F0 +#define A5XX_RBBM_AHB_PFP_SPLIT_STATUS 0x4F1 +#define A5XX_RBBM_AHB_ERROR_STATUS 0x4F4 +#define A5XX_RBBM_PERFCTR_CNTL 0x464 +#define A5XX_RBBM_PERFCTR_LOAD_CMD0 0x465 +#define A5XX_RBBM_PERFCTR_LOAD_CMD1 0x466 +#define A5XX_RBBM_PERFCTR_LOAD_CMD2 0x467 +#define A5XX_RBBM_PERFCTR_LOAD_CMD3 0x468 +#define A5XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x469 +#define A5XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x46A +#define A5XX_RBBM_PERFCTR_RBBM_SEL_0 0x46B +#define A5XX_RBBM_PERFCTR_RBBM_SEL_1 0x46C +#define A5XX_RBBM_PERFCTR_RBBM_SEL_2 0x46D +#define A5XX_RBBM_PERFCTR_RBBM_SEL_3 0x46E +#define A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED 0x46F +#define A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC 0x504 +#define A5XX_RBBM_CFG_DBGBUS_OVER 0x505 +#define A5XX_RBBM_CFG_DBGBUS_COUNT0 0x506 +#define A5XX_RBBM_CFG_DBGBUS_COUNT1 0x507 +#define A5XX_RBBM_CFG_DBGBUS_COUNT2 0x508 +#define A5XX_RBBM_CFG_DBGBUS_COUNT3 0x509 +#define A5XX_RBBM_CFG_DBGBUS_COUNT4 0x50A +#define A5XX_RBBM_CFG_DBGBUS_COUNT5 0x50B +#define A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR 0x50C +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0 0x50D +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1 0x50E +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2 0x50F +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3 0x510 +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4 0x511 +#define A5XX_RBBM_CFG_DBGBUS_MISR0 0x512 +#define A5XX_RBBM_CFG_DBGBUS_MISR1 0x513 +#define A5XX_RBBM_ISDB_CNT 0x533 +#define A5XX_RBBM_SECVID_TRUST_CONFIG 0xF000 +#define A5XX_RBBM_SECVID_TRUST_CNTL 0xF400 +#define A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0xF800 +#define A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0xF801 +#define A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE 0xF802 +#define A5XX_RBBM_SECVID_TSB_CNTL 0xF803 +#define A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL 0xF810 + +/* VSC registers */ +#define A5XX_VSC_PERFCTR_VSC_SEL_0 0xC60 +#define A5XX_VSC_PERFCTR_VSC_SEL_1 0xC61 + +#define A5XX_GRAS_ADDR_MODE_CNTL 0xC81 + +/* TSE registers */ +#define A5XX_GRAS_PERFCTR_TSE_SEL_0 0xC90 +#define A5XX_GRAS_PERFCTR_TSE_SEL_1 0xC91 +#define A5XX_GRAS_PERFCTR_TSE_SEL_2 0xC92 +#define A5XX_GRAS_PERFCTR_TSE_SEL_3 0xC93 + +/* RAS registers */ +#define A5XX_GRAS_PERFCTR_RAS_SEL_0 0xC94 +#define A5XX_GRAS_PERFCTR_RAS_SEL_1 0xC95 +#define A5XX_GRAS_PERFCTR_RAS_SEL_2 0xC96 +#define A5XX_GRAS_PERFCTR_RAS_SEL_3 0xC97 + +/* LRZ registers */ +#define A5XX_GRAS_PERFCTR_LRZ_SEL_0 0xC98 +#define A5XX_GRAS_PERFCTR_LRZ_SEL_1 0xC99 +#define A5XX_GRAS_PERFCTR_LRZ_SEL_2 0xC9A +#define A5XX_GRAS_PERFCTR_LRZ_SEL_3 0xC9B + + +/* RB registers */ +#define A5XX_RB_DBG_ECO_CNT 0xCC4 +#define A5XX_RB_ADDR_MODE_CNTL 0xCC5 +#define A5XX_RB_MODE_CNTL 0xCC6 +#define A5XX_RB_PERFCTR_RB_SEL_0 0xCD0 +#define A5XX_RB_PERFCTR_RB_SEL_1 0xCD1 +#define A5XX_RB_PERFCTR_RB_SEL_2 0xCD2 +#define A5XX_RB_PERFCTR_RB_SEL_3 0xCD3 +#define A5XX_RB_PERFCTR_RB_SEL_4 0xCD4 +#define A5XX_RB_PERFCTR_RB_SEL_5 0xCD5 +#define A5XX_RB_PERFCTR_RB_SEL_6 0xCD6 +#define A5XX_RB_PERFCTR_RB_SEL_7 0xCD7 + +/* CCU registers */ +#define A5XX_RB_PERFCTR_CCU_SEL_0 0xCD8 +#define A5XX_RB_PERFCTR_CCU_SEL_1 0xCD9 +#define A5XX_RB_PERFCTR_CCU_SEL_2 0xCDA +#define A5XX_RB_PERFCTR_CCU_SEL_3 0xCDB + +/* RB Power Counter RB Registers Select */ +#define A5XX_RB_POWERCTR_RB_SEL_0 0xCE0 +#define A5XX_RB_POWERCTR_RB_SEL_1 0xCE1 +#define A5XX_RB_POWERCTR_RB_SEL_2 0xCE2 +#define A5XX_RB_POWERCTR_RB_SEL_3 0xCE3 + +/* RB Power Counter CCU Registers Select */ +#define A5XX_RB_POWERCTR_CCU_SEL_0 0xCE4 +#define A5XX_RB_POWERCTR_CCU_SEL_1 0xCE5 + +/* CMP registers */ +#define A5XX_RB_PERFCTR_CMP_SEL_0 0xCEC +#define A5XX_RB_PERFCTR_CMP_SEL_1 0xCED +#define A5XX_RB_PERFCTR_CMP_SEL_2 0xCEE +#define A5XX_RB_PERFCTR_CMP_SEL_3 0xCEF + +/* PC registers */ +#define A5XX_PC_DBG_ECO_CNTL 0xD00 +#define A5XX_PC_ADDR_MODE_CNTL 0xD01 +#define A5XX_PC_PERFCTR_PC_SEL_0 0xD10 +#define A5XX_PC_PERFCTR_PC_SEL_1 0xD11 +#define A5XX_PC_PERFCTR_PC_SEL_2 0xD12 +#define A5XX_PC_PERFCTR_PC_SEL_3 0xD13 +#define A5XX_PC_PERFCTR_PC_SEL_4 0xD14 +#define A5XX_PC_PERFCTR_PC_SEL_5 0xD15 +#define A5XX_PC_PERFCTR_PC_SEL_6 0xD16 +#define A5XX_PC_PERFCTR_PC_SEL_7 0xD17 + +/* HLSQ registers */ +#define A5XX_HLSQ_DBG_ECO_CNTL 0xE04 +#define A5XX_HLSQ_ADDR_MODE_CNTL 0xE05 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_0 0xE10 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_1 0xE11 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_2 0xE12 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_3 0xE13 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_4 0xE14 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_5 0xE15 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_6 0xE16 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_7 0xE17 +#define A5XX_HLSQ_DBG_READ_SEL 0xBC00 +#define A5XX_HLSQ_DBG_AHB_READ_APERTURE 0xA000 + +/* VFD registers */ +#define A5XX_VFD_ADDR_MODE_CNTL 0xE41 +#define A5XX_VFD_PERFCTR_VFD_SEL_0 0xE50 +#define A5XX_VFD_PERFCTR_VFD_SEL_1 0xE51 +#define A5XX_VFD_PERFCTR_VFD_SEL_2 0xE52 +#define A5XX_VFD_PERFCTR_VFD_SEL_3 0xE53 +#define A5XX_VFD_PERFCTR_VFD_SEL_4 0xE54 +#define A5XX_VFD_PERFCTR_VFD_SEL_5 0xE55 +#define A5XX_VFD_PERFCTR_VFD_SEL_6 0xE56 +#define A5XX_VFD_PERFCTR_VFD_SEL_7 0xE57 + +/* VPC registers */ +#define A5XX_VPC_DBG_ECO_CNTL 0xE60 +#define A5XX_VPC_ADDR_MODE_CNTL 0xE61 +#define A5XX_VPC_PERFCTR_VPC_SEL_0 0xE64 +#define A5XX_VPC_PERFCTR_VPC_SEL_1 0xE65 +#define A5XX_VPC_PERFCTR_VPC_SEL_2 0xE66 +#define A5XX_VPC_PERFCTR_VPC_SEL_3 0xE67 + +/* UCHE registers */ +#define A5XX_UCHE_ADDR_MODE_CNTL 0xE80 +#define A5XX_UCHE_MODE_CNTL 0xE81 +#define A5XX_UCHE_WRITE_THRU_BASE_LO 0xE87 +#define A5XX_UCHE_WRITE_THRU_BASE_HI 0xE88 +#define A5XX_UCHE_TRAP_BASE_LO 0xE89 +#define A5XX_UCHE_TRAP_BASE_HI 0xE8A +#define A5XX_UCHE_GMEM_RANGE_MIN_LO 0xE8B +#define A5XX_UCHE_GMEM_RANGE_MIN_HI 0xE8C +#define A5XX_UCHE_GMEM_RANGE_MAX_LO 0xE8D +#define A5XX_UCHE_GMEM_RANGE_MAX_HI 0xE8E +#define A5XX_UCHE_DBG_ECO_CNTL_2 0xE8F +#define A5XX_UCHE_INVALIDATE0 0xE95 +#define A5XX_UCHE_CACHE_WAYS 0xE96 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_0 0xEA0 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_1 0xEA1 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_2 0xEA2 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_3 0xEA3 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_4 0xEA4 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_5 0xEA5 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_6 0xEA6 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_7 0xEA7 + +/* UCHE Power Counter UCHE Registers Select */ +#define A5XX_UCHE_POWERCTR_UCHE_SEL_0 0xEA8 +#define A5XX_UCHE_POWERCTR_UCHE_SEL_1 0xEA9 +#define A5XX_UCHE_POWERCTR_UCHE_SEL_2 0xEAA +#define A5XX_UCHE_POWERCTR_UCHE_SEL_3 0xEAB + +/* SP registers */ +#define A5XX_SP_DBG_ECO_CNTL 0xEC0 +#define A5XX_SP_ADDR_MODE_CNTL 0xEC1 +#define A5XX_SP_PERFCTR_SP_SEL_0 0xED0 +#define A5XX_SP_PERFCTR_SP_SEL_1 0xED1 +#define A5XX_SP_PERFCTR_SP_SEL_2 0xED2 +#define A5XX_SP_PERFCTR_SP_SEL_3 0xED3 +#define A5XX_SP_PERFCTR_SP_SEL_4 0xED4 +#define A5XX_SP_PERFCTR_SP_SEL_5 0xED5 +#define A5XX_SP_PERFCTR_SP_SEL_6 0xED6 +#define A5XX_SP_PERFCTR_SP_SEL_7 0xED7 +#define A5XX_SP_PERFCTR_SP_SEL_8 0xED8 +#define A5XX_SP_PERFCTR_SP_SEL_9 0xED9 +#define A5XX_SP_PERFCTR_SP_SEL_10 0xEDA +#define A5XX_SP_PERFCTR_SP_SEL_11 0xEDB + +/* SP Power Counter SP Registers Select */ +#define A5XX_SP_POWERCTR_SP_SEL_0 0xEDC +#define A5XX_SP_POWERCTR_SP_SEL_1 0xEDD +#define A5XX_SP_POWERCTR_SP_SEL_2 0xEDE +#define A5XX_SP_POWERCTR_SP_SEL_3 0xEDF + +/* TP registers */ +#define A5XX_TPL1_ADDR_MODE_CNTL 0xF01 +#define A5XX_TPL1_MODE_CNTL 0xF02 +#define A5XX_TPL1_PERFCTR_TP_SEL_0 0xF10 +#define A5XX_TPL1_PERFCTR_TP_SEL_1 0xF11 +#define A5XX_TPL1_PERFCTR_TP_SEL_2 0xF12 +#define A5XX_TPL1_PERFCTR_TP_SEL_3 0xF13 +#define A5XX_TPL1_PERFCTR_TP_SEL_4 0xF14 +#define A5XX_TPL1_PERFCTR_TP_SEL_5 0xF15 +#define A5XX_TPL1_PERFCTR_TP_SEL_6 0xF16 +#define A5XX_TPL1_PERFCTR_TP_SEL_7 0xF17 + +/* TP Power Counter TP Registers Select */ +#define A5XX_TPL1_POWERCTR_TP_SEL_0 0xF18 +#define A5XX_TPL1_POWERCTR_TP_SEL_1 0xF19 +#define A5XX_TPL1_POWERCTR_TP_SEL_2 0xF1A +#define A5XX_TPL1_POWERCTR_TP_SEL_3 0xF1B + +/* VBIF registers */ +#define A5XX_VBIF_VERSION 0x3000 +#define A5XX_VBIF_CLKON 0x3001 +#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK 0x1 +#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT 0x1 + +#define A5XX_VBIF_ROUND_ROBIN_QOS_ARB 0x3049 +#define A5XX_VBIF_GATE_OFF_WRREQ_EN 0x302A + +#define A5XX_VBIF_XIN_HALT_CTRL0 0x3080 +#define A5XX_VBIF_XIN_HALT_CTRL0_MASK 0xF +#define A510_VBIF_XIN_HALT_CTRL0_MASK 0x7 +#define A5XX_VBIF_XIN_HALT_CTRL1 0x3081 + +#define A5XX_VBIF_TEST_BUS_OUT_CTRL 0x3084 +#define A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK 0x1 +#define A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT 0x0 + +#define A5XX_VBIF_TEST_BUS1_CTRL0 0x3085 +#define A5XX_VBIF_TEST_BUS1_CTRL1 0x3086 +#define A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK 0xF +#define A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT 0x0 + +#define A5XX_VBIF_TEST_BUS2_CTRL0 0x3087 +#define A5XX_VBIF_TEST_BUS2_CTRL1 0x3088 +#define A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK 0x1FF +#define A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT 0x0 + +#define A5XX_VBIF_TEST_BUS_OUT 0x308c + +#define A5XX_VBIF_PERF_CNT_SEL0 0x30D0 +#define A5XX_VBIF_PERF_CNT_SEL1 0x30D1 +#define A5XX_VBIF_PERF_CNT_SEL2 0x30D2 +#define A5XX_VBIF_PERF_CNT_SEL3 0x30D3 +#define A5XX_VBIF_PERF_CNT_LOW0 0x30D8 +#define A5XX_VBIF_PERF_CNT_LOW1 0x30D9 +#define A5XX_VBIF_PERF_CNT_LOW2 0x30DA +#define A5XX_VBIF_PERF_CNT_LOW3 0x30DB +#define A5XX_VBIF_PERF_CNT_HIGH0 0x30E0 +#define A5XX_VBIF_PERF_CNT_HIGH1 0x30E1 +#define A5XX_VBIF_PERF_CNT_HIGH2 0x30E2 +#define A5XX_VBIF_PERF_CNT_HIGH3 0x30E3 + +#define A5XX_VBIF_PERF_PWR_CNT_EN0 0x3100 +#define A5XX_VBIF_PERF_PWR_CNT_EN1 0x3101 +#define A5XX_VBIF_PERF_PWR_CNT_EN2 0x3102 + +#define A5XX_VBIF_PERF_PWR_CNT_LOW0 0x3110 +#define A5XX_VBIF_PERF_PWR_CNT_LOW1 0x3111 +#define A5XX_VBIF_PERF_PWR_CNT_LOW2 0x3112 + +#define A5XX_VBIF_PERF_PWR_CNT_HIGH0 0x3118 +#define A5XX_VBIF_PERF_PWR_CNT_HIGH1 0x3119 +#define A5XX_VBIF_PERF_PWR_CNT_HIGH2 0x311A + +/* GPMU registers */ +#define A5XX_GPMU_INST_RAM_BASE 0x8800 +#define A5XX_GPMU_DATA_RAM_BASE 0x9800 +#define A5XX_GPMU_SP_POWER_CNTL 0xA881 +#define A5XX_GPMU_RBCCU_CLOCK_CNTL 0xA886 +#define A5XX_GPMU_RBCCU_POWER_CNTL 0xA887 +#define A5XX_GPMU_SP_PWR_CLK_STATUS 0xA88B +#define A5XX_GPMU_RBCCU_PWR_CLK_STATUS 0xA88D +#define A5XX_GPMU_PWR_COL_STAGGER_DELAY 0xA891 +#define A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL 0xA892 +#define A5XX_GPMU_PWR_COL_INTER_FRAME_HYST 0xA893 +#define A5XX_GPMU_PWR_COL_BINNING_CTRL 0xA894 +#define A5XX_GPMU_CLOCK_THROTTLE_CTRL 0xA8A3 +#define A5XX_GPMU_WFI_CONFIG 0xA8C1 +#define A5XX_GPMU_RBBM_INTR_INFO 0xA8D6 +#define A5XX_GPMU_CM3_SYSRESET 0xA8D8 +#define A5XX_GPMU_GENERAL_0 0xA8E0 +#define A5XX_GPMU_GENERAL_1 0xA8E1 + +/* COUNTABLE FOR SP PERFCOUNTER */ +#define A5XX_SP_ALU_ACTIVE_CYCLES 0x1 +#define A5XX_SP0_ICL1_MISSES 0x35 +#define A5XX_SP_FS_CFLOW_INSTRUCTIONS 0x27 + +/* COUNTABLE FOR TSE PERFCOUNTER */ +#define A5XX_TSE_INPUT_PRIM_NUM 0x6 + +/* COUNTABLE FOR RBBM PERFCOUNTER */ +#define A5XX_RBBM_ALWAYS_COUNT 0x0 + +/* GPMU POWER COUNTERS */ +#define A5XX_SP_POWER_COUNTER_0_LO 0xA840 +#define A5XX_SP_POWER_COUNTER_0_HI 0xA841 +#define A5XX_SP_POWER_COUNTER_1_LO 0xA842 +#define A5XX_SP_POWER_COUNTER_1_HI 0xA843 +#define A5XX_SP_POWER_COUNTER_2_LO 0xA844 +#define A5XX_SP_POWER_COUNTER_2_HI 0xA845 +#define A5XX_SP_POWER_COUNTER_3_LO 0xA846 +#define A5XX_SP_POWER_COUNTER_3_HI 0xA847 + +#define A5XX_TP_POWER_COUNTER_0_LO 0xA848 +#define A5XX_TP_POWER_COUNTER_0_HI 0xA849 +#define A5XX_TP_POWER_COUNTER_1_LO 0xA84A +#define A5XX_TP_POWER_COUNTER_1_HI 0xA84B +#define A5XX_TP_POWER_COUNTER_2_LO 0xA84C +#define A5XX_TP_POWER_COUNTER_2_HI 0xA84D +#define A5XX_TP_POWER_COUNTER_3_LO 0xA84E +#define A5XX_TP_POWER_COUNTER_3_HI 0xA84F + +#define A5XX_RB_POWER_COUNTER_0_LO 0xA850 +#define A5XX_RB_POWER_COUNTER_0_HI 0xA851 +#define A5XX_RB_POWER_COUNTER_1_LO 0xA852 +#define A5XX_RB_POWER_COUNTER_1_HI 0xA853 +#define A5XX_RB_POWER_COUNTER_2_LO 0xA854 +#define A5XX_RB_POWER_COUNTER_2_HI 0xA855 +#define A5XX_RB_POWER_COUNTER_3_LO 0xA856 +#define A5XX_RB_POWER_COUNTER_3_HI 0xA857 + +#define A5XX_CCU_POWER_COUNTER_0_LO 0xA858 +#define A5XX_CCU_POWER_COUNTER_0_HI 0xA859 +#define A5XX_CCU_POWER_COUNTER_1_LO 0xA85A +#define A5XX_CCU_POWER_COUNTER_1_HI 0xA85B + +#define A5XX_UCHE_POWER_COUNTER_0_LO 0xA85C +#define A5XX_UCHE_POWER_COUNTER_0_HI 0xA85D +#define A5XX_UCHE_POWER_COUNTER_1_LO 0xA85E +#define A5XX_UCHE_POWER_COUNTER_1_HI 0xA85F +#define A5XX_UCHE_POWER_COUNTER_2_LO 0xA860 +#define A5XX_UCHE_POWER_COUNTER_2_HI 0xA861 +#define A5XX_UCHE_POWER_COUNTER_3_LO 0xA862 +#define A5XX_UCHE_POWER_COUNTER_3_HI 0xA863 + +#define A5XX_CP_POWER_COUNTER_0_LO 0xA864 +#define A5XX_CP_POWER_COUNTER_0_HI 0xA865 +#define A5XX_CP_POWER_COUNTER_1_LO 0xA866 +#define A5XX_CP_POWER_COUNTER_1_HI 0xA867 +#define A5XX_CP_POWER_COUNTER_2_LO 0xA868 +#define A5XX_CP_POWER_COUNTER_2_HI 0xA869 +#define A5XX_CP_POWER_COUNTER_3_LO 0xA86A +#define A5XX_CP_POWER_COUNTER_3_HI 0xA86B + +#define A5XX_GPMU_POWER_COUNTER_0_LO 0xA86C +#define A5XX_GPMU_POWER_COUNTER_0_HI 0xA86D +#define A5XX_GPMU_POWER_COUNTER_1_LO 0xA86E +#define A5XX_GPMU_POWER_COUNTER_1_HI 0xA86F +#define A5XX_GPMU_POWER_COUNTER_2_LO 0xA870 +#define A5XX_GPMU_POWER_COUNTER_2_HI 0xA871 +#define A5XX_GPMU_POWER_COUNTER_3_LO 0xA872 +#define A5XX_GPMU_POWER_COUNTER_3_HI 0xA873 +#define A5XX_GPMU_POWER_COUNTER_4_LO 0xA874 +#define A5XX_GPMU_POWER_COUNTER_4_HI 0xA875 +#define A5XX_GPMU_POWER_COUNTER_5_LO 0xA876 +#define A5XX_GPMU_POWER_COUNTER_5_HI 0xA877 + +#define A5XX_GPMU_POWER_COUNTER_ENABLE 0xA878 +#define A5XX_GPMU_ALWAYS_ON_COUNTER_LO 0xA879 +#define A5XX_GPMU_ALWAYS_ON_COUNTER_HI 0xA87A +#define A5XX_GPMU_ALWAYS_ON_COUNTER_RESET 0xA87B +#define A5XX_GPMU_POWER_COUNTER_SELECT_0 0xA87C +#define A5XX_GPMU_POWER_COUNTER_SELECT_1 0xA87D +#define A5XX_GPMU_GPMU_SP_CLOCK_CONTROL 0xA880 + +#define A5XX_GPMU_CLOCK_THROTTLE_CTRL 0xA8A3 +#define A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL 0xA8A8 + +#define A5XX_GPMU_TEMP_SENSOR_ID 0xAC00 +#define A5XX_GPMU_TEMP_SENSOR_CONFIG 0xAC01 +#define A5XX_GPMU_DELTA_TEMP_THRESHOLD 0xAC03 +#define A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK 0xAC06 + +#define A5XX_GPMU_LEAKAGE_TEMP_COEFF_0_1 0xAC40 +#define A5XX_GPMU_LEAKAGE_TEMP_COEFF_2_3 0xAC41 +#define A5XX_GPMU_LEAKAGE_VTG_COEFF_0_1 0xAC42 +#define A5XX_GPMU_LEAKAGE_VTG_COEFF_2_3 0xAC43 +#define A5XX_GPMU_BASE_LEAKAGE 0xAC46 + +#define A5XX_GPMU_GPMU_VOLTAGE 0xAC60 +#define A5XX_GPMU_GPMU_VOLTAGE_INTR_STATUS 0xAC61 +#define A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK 0xAC62 +#define A5XX_GPMU_GPMU_PWR_THRESHOLD 0xAC80 +#define A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL 0xACC4 +#define A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS 0xACC5 +#define A5XX_GPMU_GPMU_ISENSE_CTRL 0xACD0 + +#define A5XX_GDPM_CONFIG1 0xB80C +#define A5XX_GDPM_INT_EN 0xB80F +#define A5XX_GDPM_INT_MASK 0xB811 +#define A5XX_GPMU_BEC_ENABLE 0xB9A0 + +/* ISENSE registers */ +#define A5XX_GPU_CS_DECIMAL_ALIGN 0xC16A +#define A5XX_GPU_CS_SENSOR_PARAM_CORE_1 0xC126 +#define A5XX_GPU_CS_SENSOR_PARAM_CORE_2 0xC127 +#define A5XX_GPU_CS_SW_OV_FUSE_EN 0xC168 +#define A5XX_GPU_CS_SENSOR_GENERAL_STATUS 0xC41A +#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_0 0xC41D +#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_2 0xC41F +#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_4 0xC421 +#define A5XX_GPU_CS_ENABLE_REG 0xC520 +#define A5XX_GPU_CS_AMP_CALIBRATION_CONTROL1 0xC557 +#define A5XX_GPU_CS_AMP_CALIBRATION_DONE 0xC565 +#define A5XX_GPU_CS_ENDPOINT_CALIBRATION_DONE 0xC556 +#endif /* _A5XX_REG_H */ + diff --git a/a6xx_reg.h b/a6xx_reg.h new file mode 100644 index 0000000000..f6b7dcde8a --- /dev/null +++ b/a6xx_reg.h @@ -0,0 +1,1197 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _A6XX_REG_H +#define _A6XX_REG_H + +/* A6XX interrupt bits */ +#define A6XX_INT_RBBM_GPU_IDLE 0 +#define A6XX_INT_CP_AHB_ERROR 1 +#define A6XX_INT_ATB_ASYNCFIFO_OVERFLOW 6 +#define A6XX_INT_RBBM_GPC_ERROR 7 +#define A6XX_INT_CP_SW 8 +#define A6XX_INT_CP_HW_ERROR 9 +#define A6XX_INT_CP_CCU_FLUSH_DEPTH_TS 10 +#define A6XX_INT_CP_CCU_FLUSH_COLOR_TS 11 +#define A6XX_INT_CP_CCU_RESOLVE_TS 12 +#define A6XX_INT_CP_IB2 13 +#define A6XX_INT_CP_IB1 14 +#define A6XX_INT_CP_RB 15 +#define A6XX_INT_CP_RB_DONE_TS 17 +#define A6XX_INT_CP_WT_DONE_TS 18 +#define A6XX_INT_CP_CACHE_FLUSH_TS 20 +#define A6XX_INT_RBBM_ATB_BUS_OVERFLOW 22 +#define A6XX_INT_RBBM_HANG_DETECT 23 +#define A6XX_INT_UCHE_OOB_ACCESS 24 +#define A6XX_INT_UCHE_TRAP_INTR 25 +#define A6XX_INT_DEBBUS_INTR_0 26 +#define A6XX_INT_DEBBUS_INTR_1 27 +#define A6XX_INT_TSB_WRITE_ERROR 28 +#define A6XX_INT_ISDB_CPU_IRQ 30 +#define A6XX_INT_ISDB_UNDER_DEBUG 31 + +/* CP Interrupt bits */ +#define A6XX_CP_OPCODE_ERROR 0 +#define A6XX_CP_UCODE_ERROR 1 +#define A6XX_CP_HW_FAULT_ERROR 2 +#define A6XX_CP_REGISTER_PROTECTION_ERROR 4 +#define A6XX_CP_AHB_ERROR 5 +#define A6XX_CP_VSD_PARITY_ERROR 6 +#define A6XX_CP_ILLEGAL_INSTR_ERROR 7 + +/* CP registers */ +#define A6XX_CP_RB_BASE 0x800 +#define A6XX_CP_RB_BASE_HI 0x801 +#define A6XX_CP_RB_CNTL 0x802 +#define A6XX_CP_RB_RPTR_ADDR_LO 0x804 +#define A6XX_CP_RB_RPTR_ADDR_HI 0x805 +#define A6XX_CP_RB_RPTR 0x806 +#define A6XX_CP_RB_WPTR 0x807 +#define A6XX_CP_SQE_CNTL 0x808 +#define A6XX_CP_CP2GMU_STATUS 0x812 +#define A6XX_CP_HW_FAULT 0x821 +#define A6XX_CP_INTERRUPT_STATUS 0x823 +#define A6XX_CP_PROTECT_STATUS 0x824 +#define A6XX_CP_STATUS_1 0x825 +#define A6XX_CP_SQE_INSTR_BASE_LO 0x830 +#define A6XX_CP_SQE_INSTR_BASE_HI 0x831 +#define A6XX_CP_MISC_CNTL 0x840 +#define A6XX_CP_APRIV_CNTL 0X844 +#define A6XX_CP_ROQ_THRESHOLDS_1 0x8C1 +#define A6XX_CP_ROQ_THRESHOLDS_2 0x8C2 +#define A6XX_CP_MEM_POOL_SIZE 0x8C3 +#define A6XX_CP_CHICKEN_DBG 0x841 +#define A6XX_CP_ADDR_MODE_CNTL 0x842 +#define A6XX_CP_DBG_ECO_CNTL 0x843 +#define A6XX_CP_PROTECT_CNTL 0x84F +#define A6XX_CP_PROTECT_REG 0x850 +#define A6XX_CP_CONTEXT_SWITCH_CNTL 0x8A0 +#define A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x8A1 +#define A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x8A2 +#define A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO 0x8A3 +#define A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI 0x8A4 +#define A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO 0x8A5 +#define A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI 0x8A6 +#define A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO 0x8A7 +#define A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI 0x8A8 +#define A6XX_CP_CONTEXT_SWITCH_LEVEL_STATUS 0x8AB +#define A6XX_CP_PERFCTR_CP_SEL_0 0x8D0 +#define A6XX_CP_PERFCTR_CP_SEL_1 0x8D1 +#define A6XX_CP_PERFCTR_CP_SEL_2 0x8D2 +#define A6XX_CP_PERFCTR_CP_SEL_3 0x8D3 +#define A6XX_CP_PERFCTR_CP_SEL_4 0x8D4 +#define A6XX_CP_PERFCTR_CP_SEL_5 0x8D5 +#define A6XX_CP_PERFCTR_CP_SEL_6 0x8D6 +#define A6XX_CP_PERFCTR_CP_SEL_7 0x8D7 +#define A6XX_CP_PERFCTR_CP_SEL_8 0x8D8 +#define A6XX_CP_PERFCTR_CP_SEL_9 0x8D9 +#define A6XX_CP_PERFCTR_CP_SEL_10 0x8DA +#define A6XX_CP_PERFCTR_CP_SEL_11 0x8DB +#define A6XX_CP_PERFCTR_CP_SEL_12 0x8DC +#define A6XX_CP_PERFCTR_CP_SEL_13 0x8DD +#define A6XX_CP_CRASH_SCRIPT_BASE_LO 0x900 +#define A6XX_CP_CRASH_SCRIPT_BASE_HI 0x901 +#define A6XX_CP_CRASH_DUMP_CNTL 0x902 +#define A6XX_CP_CRASH_DUMP_STATUS 0x903 +#define A6XX_CP_SQE_STAT_ADDR 0x908 +#define A6XX_CP_SQE_STAT_DATA 0x909 +#define A6XX_CP_DRAW_STATE_ADDR 0x90A +#define A6XX_CP_DRAW_STATE_DATA 0x90B +#define A6XX_CP_ROQ_DBG_ADDR 0x90C +#define A6XX_CP_ROQ_DBG_DATA 0x90D +#define A6XX_CP_MEM_POOL_DBG_ADDR 0x90E +#define A6XX_CP_MEM_POOL_DBG_DATA 0x90F +#define A6XX_CP_SQE_UCODE_DBG_ADDR 0x910 +#define A6XX_CP_SQE_UCODE_DBG_DATA 0x911 +#define A6XX_CP_IB1_BASE 0x928 +#define A6XX_CP_IB1_BASE_HI 0x929 +#define A6XX_CP_IB1_REM_SIZE 0x92A +#define A6XX_CP_IB2_BASE 0x92B +#define A6XX_CP_IB2_BASE_HI 0x92C +#define A6XX_CP_IB2_REM_SIZE 0x92D +#define A6XX_CP_ALWAYS_ON_COUNTER_LO 0x980 +#define A6XX_CP_ALWAYS_ON_COUNTER_HI 0x981 +#define A6XX_CP_AHB_CNTL 0x98D +#define A6XX_CP_APERTURE_CNTL_HOST 0xA00 +#define A6XX_CP_APERTURE_CNTL_CD 0xA03 +#define A6XX_VSC_ADDR_MODE_CNTL 0xC01 + +/* LPAC registers */ +#define A6XX_CP_LPAC_DRAW_STATE_ADDR 0xB0A +#define A6XX_CP_LPAC_DRAW_STATE_DATA 0xB0B +#define A6XX_CP_LPAC_ROQ_DBG_ADDR 0xB0C +#define A6XX_CP_SQE_AC_UCODE_DBG_ADDR 0xB27 +#define A6XX_CP_SQE_AC_UCODE_DBG_DATA 0xB28 +#define A6XX_CP_SQE_AC_STAT_ADDR 0xB29 +#define A6XX_CP_SQE_AC_STAT_DATA 0xB2A +#define A6XX_CP_LPAC_ROQ_THRESHOLDS_1 0xB32 +#define A6XX_CP_LPAC_ROQ_THRESHOLDS_2 0xB33 +#define A6XX_CP_LPAC_PROG_FIFO_SIZE 0xB34 +#define A6XX_CP_LPAC_ROQ_DBG_DATA 0xB35 +#define A6XX_CP_LPAC_FIFO_DBG_DATA 0xB36 +#define A6XX_CP_LPAC_FIFO_DBG_ADDR 0xB40 + +/* RBBM registers */ +#define A6XX_RBBM_INT_0_STATUS 0x201 +#define A6XX_RBBM_STATUS 0x210 +#define A6XX_RBBM_STATUS3 0x213 +#define A6XX_RBBM_VBIF_GX_RESET_STATUS 0x215 +#define A6XX_RBBM_PERFCTR_CP_0_LO 0x400 +#define A6XX_RBBM_PERFCTR_CP_0_HI 0x401 +#define A6XX_RBBM_PERFCTR_CP_1_LO 0x402 +#define A6XX_RBBM_PERFCTR_CP_1_HI 0x403 +#define A6XX_RBBM_PERFCTR_CP_2_LO 0x404 +#define A6XX_RBBM_PERFCTR_CP_2_HI 0x405 +#define A6XX_RBBM_PERFCTR_CP_3_LO 0x406 +#define A6XX_RBBM_PERFCTR_CP_3_HI 0x407 +#define A6XX_RBBM_PERFCTR_CP_4_LO 0x408 +#define A6XX_RBBM_PERFCTR_CP_4_HI 0x409 +#define A6XX_RBBM_PERFCTR_CP_5_LO 0x40a +#define A6XX_RBBM_PERFCTR_CP_5_HI 0x40b +#define A6XX_RBBM_PERFCTR_CP_6_LO 0x40c +#define A6XX_RBBM_PERFCTR_CP_6_HI 0x40d +#define A6XX_RBBM_PERFCTR_CP_7_LO 0x40e +#define A6XX_RBBM_PERFCTR_CP_7_HI 0x40f +#define A6XX_RBBM_PERFCTR_CP_8_LO 0x410 +#define A6XX_RBBM_PERFCTR_CP_8_HI 0x411 +#define A6XX_RBBM_PERFCTR_CP_9_LO 0x412 +#define A6XX_RBBM_PERFCTR_CP_9_HI 0x413 +#define A6XX_RBBM_PERFCTR_CP_10_LO 0x414 +#define A6XX_RBBM_PERFCTR_CP_10_HI 0x415 +#define A6XX_RBBM_PERFCTR_CP_11_LO 0x416 +#define A6XX_RBBM_PERFCTR_CP_11_HI 0x417 +#define A6XX_RBBM_PERFCTR_CP_12_LO 0x418 +#define A6XX_RBBM_PERFCTR_CP_12_HI 0x419 +#define A6XX_RBBM_PERFCTR_CP_13_LO 0x41a +#define A6XX_RBBM_PERFCTR_CP_13_HI 0x41b +#define A6XX_RBBM_PERFCTR_RBBM_0_LO 0x41c +#define A6XX_RBBM_PERFCTR_RBBM_0_HI 0x41d +#define A6XX_RBBM_PERFCTR_RBBM_1_LO 0x41e +#define A6XX_RBBM_PERFCTR_RBBM_1_HI 0x41f +#define A6XX_RBBM_PERFCTR_RBBM_2_LO 0x420 +#define A6XX_RBBM_PERFCTR_RBBM_2_HI 0x421 +#define A6XX_RBBM_PERFCTR_RBBM_3_LO 0x422 +#define A6XX_RBBM_PERFCTR_RBBM_3_HI 0x423 +#define A6XX_RBBM_PERFCTR_PC_0_LO 0x424 +#define A6XX_RBBM_PERFCTR_PC_0_HI 0x425 +#define A6XX_RBBM_PERFCTR_PC_1_LO 0x426 +#define A6XX_RBBM_PERFCTR_PC_1_HI 0x427 +#define A6XX_RBBM_PERFCTR_PC_2_LO 0x428 +#define A6XX_RBBM_PERFCTR_PC_2_HI 0x429 +#define A6XX_RBBM_PERFCTR_PC_3_LO 0x42a +#define A6XX_RBBM_PERFCTR_PC_3_HI 0x42b +#define A6XX_RBBM_PERFCTR_PC_4_LO 0x42c +#define A6XX_RBBM_PERFCTR_PC_4_HI 0x42d +#define A6XX_RBBM_PERFCTR_PC_5_LO 0x42e +#define A6XX_RBBM_PERFCTR_PC_5_HI 0x42f +#define A6XX_RBBM_PERFCTR_PC_6_LO 0x430 +#define A6XX_RBBM_PERFCTR_PC_6_HI 0x431 +#define A6XX_RBBM_PERFCTR_PC_7_LO 0x432 +#define A6XX_RBBM_PERFCTR_PC_7_HI 0x433 +#define A6XX_RBBM_PERFCTR_VFD_0_LO 0x434 +#define A6XX_RBBM_PERFCTR_VFD_0_HI 0x435 +#define A6XX_RBBM_PERFCTR_VFD_1_LO 0x436 +#define A6XX_RBBM_PERFCTR_VFD_1_HI 0x437 +#define A6XX_RBBM_PERFCTR_VFD_2_LO 0x438 +#define A6XX_RBBM_PERFCTR_VFD_2_HI 0x439 +#define A6XX_RBBM_PERFCTR_VFD_3_LO 0x43a +#define A6XX_RBBM_PERFCTR_VFD_3_HI 0x43b +#define A6XX_RBBM_PERFCTR_VFD_4_LO 0x43c +#define A6XX_RBBM_PERFCTR_VFD_4_HI 0x43d +#define A6XX_RBBM_PERFCTR_VFD_5_LO 0x43e +#define A6XX_RBBM_PERFCTR_VFD_5_HI 0x43f +#define A6XX_RBBM_PERFCTR_VFD_6_LO 0x440 +#define A6XX_RBBM_PERFCTR_VFD_6_HI 0x441 +#define A6XX_RBBM_PERFCTR_VFD_7_LO 0x442 +#define A6XX_RBBM_PERFCTR_VFD_7_HI 0x443 +#define A6XX_RBBM_PERFCTR_HLSQ_0_LO 0x444 +#define A6XX_RBBM_PERFCTR_HLSQ_0_HI 0x445 +#define A6XX_RBBM_PERFCTR_HLSQ_1_LO 0x446 +#define A6XX_RBBM_PERFCTR_HLSQ_1_HI 0x447 +#define A6XX_RBBM_PERFCTR_HLSQ_2_LO 0x448 +#define A6XX_RBBM_PERFCTR_HLSQ_2_HI 0x449 +#define A6XX_RBBM_PERFCTR_HLSQ_3_LO 0x44a +#define A6XX_RBBM_PERFCTR_HLSQ_3_HI 0x44b +#define A6XX_RBBM_PERFCTR_HLSQ_4_LO 0x44c +#define A6XX_RBBM_PERFCTR_HLSQ_4_HI 0x44d +#define A6XX_RBBM_PERFCTR_HLSQ_5_LO 0x44e +#define A6XX_RBBM_PERFCTR_HLSQ_5_HI 0x44f +#define A6XX_RBBM_PERFCTR_VPC_0_LO 0x450 +#define A6XX_RBBM_PERFCTR_VPC_0_HI 0x451 +#define A6XX_RBBM_PERFCTR_VPC_1_LO 0x452 +#define A6XX_RBBM_PERFCTR_VPC_1_HI 0x453 +#define A6XX_RBBM_PERFCTR_VPC_2_LO 0x454 +#define A6XX_RBBM_PERFCTR_VPC_2_HI 0x455 +#define A6XX_RBBM_PERFCTR_VPC_3_LO 0x456 +#define A6XX_RBBM_PERFCTR_VPC_3_HI 0x457 +#define A6XX_RBBM_PERFCTR_VPC_4_LO 0x458 +#define A6XX_RBBM_PERFCTR_VPC_4_HI 0x459 +#define A6XX_RBBM_PERFCTR_VPC_5_LO 0x45a +#define A6XX_RBBM_PERFCTR_VPC_5_HI 0x45b +#define A6XX_RBBM_PERFCTR_CCU_0_LO 0x45c +#define A6XX_RBBM_PERFCTR_CCU_0_HI 0x45d +#define A6XX_RBBM_PERFCTR_CCU_1_LO 0x45e +#define A6XX_RBBM_PERFCTR_CCU_1_HI 0x45f +#define A6XX_RBBM_PERFCTR_CCU_2_LO 0x460 +#define A6XX_RBBM_PERFCTR_CCU_2_HI 0x461 +#define A6XX_RBBM_PERFCTR_CCU_3_LO 0x462 +#define A6XX_RBBM_PERFCTR_CCU_3_HI 0x463 +#define A6XX_RBBM_PERFCTR_CCU_4_LO 0x464 +#define A6XX_RBBM_PERFCTR_CCU_4_HI 0x465 +#define A6XX_RBBM_PERFCTR_TSE_0_LO 0x466 +#define A6XX_RBBM_PERFCTR_TSE_0_HI 0x467 +#define A6XX_RBBM_PERFCTR_TSE_1_LO 0x468 +#define A6XX_RBBM_PERFCTR_TSE_1_HI 0x469 +#define A6XX_RBBM_PERFCTR_TSE_2_LO 0x46a +#define A6XX_RBBM_PERFCTR_CCU_4_HI 0x465 +#define A6XX_RBBM_PERFCTR_TSE_0_LO 0x466 +#define A6XX_RBBM_PERFCTR_TSE_0_HI 0x467 +#define A6XX_RBBM_PERFCTR_TSE_1_LO 0x468 +#define A6XX_RBBM_PERFCTR_TSE_1_HI 0x469 +#define A6XX_RBBM_PERFCTR_TSE_2_LO 0x46a +#define A6XX_RBBM_PERFCTR_TSE_2_HI 0x46b +#define A6XX_RBBM_PERFCTR_TSE_3_LO 0x46c +#define A6XX_RBBM_PERFCTR_TSE_3_HI 0x46d +#define A6XX_RBBM_PERFCTR_RAS_0_LO 0x46e +#define A6XX_RBBM_PERFCTR_RAS_0_HI 0x46f +#define A6XX_RBBM_PERFCTR_RAS_1_LO 0x470 +#define A6XX_RBBM_PERFCTR_RAS_1_HI 0x471 +#define A6XX_RBBM_PERFCTR_RAS_2_LO 0x472 +#define A6XX_RBBM_PERFCTR_RAS_2_HI 0x473 +#define A6XX_RBBM_PERFCTR_RAS_3_LO 0x474 +#define A6XX_RBBM_PERFCTR_RAS_3_HI 0x475 +#define A6XX_RBBM_PERFCTR_UCHE_0_LO 0x476 +#define A6XX_RBBM_PERFCTR_UCHE_0_HI 0x477 +#define A6XX_RBBM_PERFCTR_UCHE_1_LO 0x478 +#define A6XX_RBBM_PERFCTR_UCHE_1_HI 0x479 +#define A6XX_RBBM_PERFCTR_UCHE_2_LO 0x47a +#define A6XX_RBBM_PERFCTR_UCHE_2_HI 0x47b +#define A6XX_RBBM_PERFCTR_UCHE_3_LO 0x47c +#define A6XX_RBBM_PERFCTR_UCHE_3_HI 0x47d +#define A6XX_RBBM_PERFCTR_UCHE_4_LO 0x47e +#define A6XX_RBBM_PERFCTR_UCHE_4_HI 0x47f +#define A6XX_RBBM_PERFCTR_UCHE_5_LO 0x480 +#define A6XX_RBBM_PERFCTR_UCHE_5_HI 0x481 +#define A6XX_RBBM_PERFCTR_UCHE_6_LO 0x482 +#define A6XX_RBBM_PERFCTR_UCHE_6_HI 0x483 +#define A6XX_RBBM_PERFCTR_UCHE_7_LO 0x484 +#define A6XX_RBBM_PERFCTR_UCHE_7_HI 0x485 +#define A6XX_RBBM_PERFCTR_UCHE_8_LO 0x486 +#define A6XX_RBBM_PERFCTR_UCHE_8_HI 0x487 +#define A6XX_RBBM_PERFCTR_UCHE_9_LO 0x488 +#define A6XX_RBBM_PERFCTR_UCHE_9_HI 0x489 +#define A6XX_RBBM_PERFCTR_UCHE_10_LO 0x48a +#define A6XX_RBBM_PERFCTR_UCHE_10_HI 0x48b +#define A6XX_RBBM_PERFCTR_UCHE_11_LO 0x48c +#define A6XX_RBBM_PERFCTR_UCHE_11_HI 0x48d +#define A6XX_RBBM_PERFCTR_TP_0_LO 0x48e +#define A6XX_RBBM_PERFCTR_TP_0_HI 0x48f +#define A6XX_RBBM_PERFCTR_TP_1_LO 0x490 +#define A6XX_RBBM_PERFCTR_TP_1_HI 0x491 +#define A6XX_RBBM_PERFCTR_TP_2_LO 0x492 +#define A6XX_RBBM_PERFCTR_TP_2_HI 0x493 +#define A6XX_RBBM_PERFCTR_TP_3_LO 0x494 +#define A6XX_RBBM_PERFCTR_TP_3_HI 0x495 +#define A6XX_RBBM_PERFCTR_TP_4_LO 0x496 +#define A6XX_RBBM_PERFCTR_TP_4_HI 0x497 +#define A6XX_RBBM_PERFCTR_TP_5_LO 0x498 +#define A6XX_RBBM_PERFCTR_TP_5_HI 0x499 +#define A6XX_RBBM_PERFCTR_TP_6_LO 0x49a +#define A6XX_RBBM_PERFCTR_TP_6_HI 0x49b +#define A6XX_RBBM_PERFCTR_TP_7_LO 0x49c +#define A6XX_RBBM_PERFCTR_TP_7_HI 0x49d +#define A6XX_RBBM_PERFCTR_TP_8_LO 0x49e +#define A6XX_RBBM_PERFCTR_TP_8_HI 0x49f +#define A6XX_RBBM_PERFCTR_TP_9_LO 0x4a0 +#define A6XX_RBBM_PERFCTR_TP_9_HI 0x4a1 +#define A6XX_RBBM_PERFCTR_TP_10_LO 0x4a2 +#define A6XX_RBBM_PERFCTR_TP_10_HI 0x4a3 +#define A6XX_RBBM_PERFCTR_TP_11_LO 0x4a4 +#define A6XX_RBBM_PERFCTR_TP_11_HI 0x4a5 +#define A6XX_RBBM_PERFCTR_SP_0_LO 0x4a6 +#define A6XX_RBBM_PERFCTR_SP_0_HI 0x4a7 +#define A6XX_RBBM_PERFCTR_SP_1_LO 0x4a8 +#define A6XX_RBBM_PERFCTR_SP_1_HI 0x4a9 +#define A6XX_RBBM_PERFCTR_SP_2_LO 0x4aa +#define A6XX_RBBM_PERFCTR_SP_2_HI 0x4ab +#define A6XX_RBBM_PERFCTR_SP_3_LO 0x4ac +#define A6XX_RBBM_PERFCTR_SP_3_HI 0x4ad +#define A6XX_RBBM_PERFCTR_SP_4_LO 0x4ae +#define A6XX_RBBM_PERFCTR_SP_4_HI 0x4af +#define A6XX_RBBM_PERFCTR_SP_5_LO 0x4b0 +#define A6XX_RBBM_PERFCTR_SP_5_HI 0x4b1 +#define A6XX_RBBM_PERFCTR_SP_6_LO 0x4b2 +#define A6XX_RBBM_PERFCTR_SP_6_HI 0x4b3 +#define A6XX_RBBM_PERFCTR_SP_7_LO 0x4b4 +#define A6XX_RBBM_PERFCTR_SP_7_HI 0x4b5 +#define A6XX_RBBM_PERFCTR_SP_8_LO 0x4b6 +#define A6XX_RBBM_PERFCTR_SP_8_HI 0x4b7 +#define A6XX_RBBM_PERFCTR_SP_9_LO 0x4b8 +#define A6XX_RBBM_PERFCTR_SP_9_HI 0x4b9 +#define A6XX_RBBM_PERFCTR_SP_10_LO 0x4ba +#define A6XX_RBBM_PERFCTR_SP_10_HI 0x4bb +#define A6XX_RBBM_PERFCTR_SP_11_LO 0x4bc +#define A6XX_RBBM_PERFCTR_SP_11_HI 0x4bd +#define A6XX_RBBM_PERFCTR_SP_12_LO 0x4be +#define A6XX_RBBM_PERFCTR_SP_12_HI 0x4bf +#define A6XX_RBBM_PERFCTR_SP_13_LO 0x4c0 +#define A6XX_RBBM_PERFCTR_SP_13_HI 0x4c1 +#define A6XX_RBBM_PERFCTR_SP_14_LO 0x4c2 +#define A6XX_RBBM_PERFCTR_SP_14_HI 0x4c3 +#define A6XX_RBBM_PERFCTR_SP_15_LO 0x4c4 +#define A6XX_RBBM_PERFCTR_SP_15_HI 0x4c5 +#define A6XX_RBBM_PERFCTR_SP_16_LO 0x4c6 +#define A6XX_RBBM_PERFCTR_SP_16_HI 0x4c7 +#define A6XX_RBBM_PERFCTR_SP_17_LO 0x4c8 +#define A6XX_RBBM_PERFCTR_SP_17_HI 0x4c9 +#define A6XX_RBBM_PERFCTR_SP_18_LO 0x4ca +#define A6XX_RBBM_PERFCTR_SP_18_HI 0x4cb +#define A6XX_RBBM_PERFCTR_SP_19_LO 0x4cc +#define A6XX_RBBM_PERFCTR_SP_19_HI 0x4cd +#define A6XX_RBBM_PERFCTR_SP_20_LO 0x4ce +#define A6XX_RBBM_PERFCTR_SP_20_HI 0x4cf +#define A6XX_RBBM_PERFCTR_SP_21_LO 0x4d0 +#define A6XX_RBBM_PERFCTR_SP_21_HI 0x4d1 +#define A6XX_RBBM_PERFCTR_SP_22_LO 0x4d2 +#define A6XX_RBBM_PERFCTR_SP_22_HI 0x4d3 +#define A6XX_RBBM_PERFCTR_SP_23_LO 0x4d4 +#define A6XX_RBBM_PERFCTR_SP_23_HI 0x4d5 +#define A6XX_RBBM_PERFCTR_RB_0_LO 0x4d6 +#define A6XX_RBBM_PERFCTR_RB_0_HI 0x4d7 +#define A6XX_RBBM_PERFCTR_RB_1_LO 0x4d8 +#define A6XX_RBBM_PERFCTR_RB_1_HI 0x4d9 +#define A6XX_RBBM_PERFCTR_RB_2_LO 0x4da +#define A6XX_RBBM_PERFCTR_RB_2_HI 0x4db +#define A6XX_RBBM_PERFCTR_RB_3_LO 0x4dc +#define A6XX_RBBM_PERFCTR_RB_3_HI 0x4dd +#define A6XX_RBBM_PERFCTR_RB_4_LO 0x4de +#define A6XX_RBBM_PERFCTR_RB_4_HI 0x4df +#define A6XX_RBBM_PERFCTR_RB_5_LO 0x4e0 +#define A6XX_RBBM_PERFCTR_RB_5_HI 0x4e1 +#define A6XX_RBBM_PERFCTR_RB_6_LO 0x4e2 +#define A6XX_RBBM_PERFCTR_RB_6_HI 0x4e3 +#define A6XX_RBBM_PERFCTR_RB_7_LO 0x4e4 +#define A6XX_RBBM_PERFCTR_RB_7_HI 0x4e5 +#define A6XX_RBBM_PERFCTR_VSC_0_LO 0x4e6 +#define A6XX_RBBM_PERFCTR_VSC_0_HI 0x4e7 +#define A6XX_RBBM_PERFCTR_VSC_1_LO 0x4e8 +#define A6XX_RBBM_PERFCTR_VSC_1_HI 0x4e9 +#define A6XX_RBBM_PERFCTR_LRZ_0_LO 0x4ea +#define A6XX_RBBM_PERFCTR_LRZ_0_HI 0x4eb +#define A6XX_RBBM_PERFCTR_LRZ_1_LO 0x4ec +#define A6XX_RBBM_PERFCTR_LRZ_1_HI 0x4ed +#define A6XX_RBBM_PERFCTR_LRZ_2_LO 0x4ee +#define A6XX_RBBM_PERFCTR_LRZ_2_HI 0x4ef +#define A6XX_RBBM_PERFCTR_LRZ_3_LO 0x4f0 +#define A6XX_RBBM_PERFCTR_LRZ_3_HI 0x4f1 +#define A6XX_RBBM_PERFCTR_CMP_0_LO 0x4f2 +#define A6XX_RBBM_PERFCTR_CMP_0_HI 0x4f3 +#define A6XX_RBBM_PERFCTR_CMP_1_LO 0x4f4 +#define A6XX_RBBM_PERFCTR_CMP_1_HI 0x4f5 +#define A6XX_RBBM_PERFCTR_CMP_2_LO 0x4f6 +#define A6XX_RBBM_PERFCTR_CMP_2_HI 0x4f7 +#define A6XX_RBBM_PERFCTR_CMP_3_LO 0x4f8 +#define A6XX_RBBM_PERFCTR_CMP_3_HI 0x4f9 +#define A6XX_RBBM_PERFCTR_CNTL 0x500 +#define A6XX_RBBM_PERFCTR_LOAD_CMD0 0x501 +#define A6XX_RBBM_PERFCTR_LOAD_CMD1 0x502 +#define A6XX_RBBM_PERFCTR_LOAD_CMD2 0x503 +#define A6XX_RBBM_PERFCTR_LOAD_CMD3 0x504 +#define A6XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x505 +#define A6XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x506 +#define A6XX_RBBM_PERFCTR_RBBM_SEL_0 0x507 +#define A6XX_RBBM_PERFCTR_RBBM_SEL_1 0x508 +#define A6XX_RBBM_PERFCTR_RBBM_SEL_2 0x509 +#define A6XX_RBBM_PERFCTR_RBBM_SEL_3 0x50A +#define A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED 0x50B + +#define A6XX_RBBM_ISDB_CNT 0x533 +#define A6XX_RBBM_NC_MODE_CNTL 0X534 +#define A6XX_RBBM_SNAPSHOT_STATUS 0x535 + +#define A6XX_RBBM_SECVID_TRUST_CNTL 0xF400 +#define A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0xF800 +#define A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0xF801 +#define A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE 0xF802 +#define A6XX_RBBM_SECVID_TSB_CNTL 0xF803 +#define A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL 0xF810 + +#define A6XX_RBBM_VBIF_CLIENT_QOS_CNTL 0x00010 +#define A6XX_RBBM_GBIF_CLIENT_QOS_CNTL 0x00011 +#define A6XX_RBBM_GBIF_HALT 0x00016 +#define A6XX_RBBM_GBIF_HALT_ACK 0x00017 +#define A6XX_RBBM_GPR0_CNTL 0x00018 +#define A6XX_RBBM_INTERFACE_HANG_INT_CNTL 0x0001f +#define A6XX_RBBM_INT_CLEAR_CMD 0x00037 +#define A6XX_RBBM_INT_0_MASK 0x00038 +#define A6XX_RBBM_INT_2_MASK 0x0003A +#define A6XX_RBBM_SP_HYST_CNT 0x00042 +#define A6XX_RBBM_SW_RESET_CMD 0x00043 +#define A6XX_RBBM_RAC_THRESHOLD_CNT 0x00044 +#define A6XX_RBBM_BLOCK_SW_RESET_CMD 0x00045 +#define A6XX_RBBM_BLOCK_SW_RESET_CMD2 0x00046 +#define A6XX_RBBM_BLOCK_GX_RETENTION_CNTL 0x00050 +#define A6XX_RBBM_CLOCK_CNTL 0x000ae +#define A6XX_RBBM_CLOCK_CNTL_SP0 0x000b0 +#define A6XX_RBBM_CLOCK_CNTL_SP1 0x000b1 +#define A6XX_RBBM_CLOCK_CNTL_SP2 0x000b2 +#define A6XX_RBBM_CLOCK_CNTL_SP3 0x000b3 +#define A6XX_RBBM_CLOCK_CNTL2_SP0 0x000b4 +#define A6XX_RBBM_CLOCK_CNTL2_SP1 0x000b5 +#define A6XX_RBBM_CLOCK_CNTL2_SP2 0x000b6 +#define A6XX_RBBM_CLOCK_CNTL2_SP3 0x000b7 +#define A6XX_RBBM_CLOCK_DELAY_SP0 0x000b8 +#define A6XX_RBBM_CLOCK_DELAY_SP1 0x000b9 +#define A6XX_RBBM_CLOCK_DELAY_SP2 0x000ba +#define A6XX_RBBM_CLOCK_DELAY_SP3 0x000bb +#define A6XX_RBBM_CLOCK_HYST_SP0 0x000bc +#define A6XX_RBBM_CLOCK_HYST_SP1 0x000bd +#define A6XX_RBBM_CLOCK_HYST_SP2 0x000be +#define A6XX_RBBM_CLOCK_HYST_SP3 0x000bf +#define A6XX_RBBM_CLOCK_CNTL_TP0 0x000c0 +#define A6XX_RBBM_CLOCK_CNTL_TP1 0x000c1 +#define A6XX_RBBM_CLOCK_CNTL_TP2 0x000c2 +#define A6XX_RBBM_CLOCK_CNTL_TP3 0x000c3 +#define A6XX_RBBM_CLOCK_CNTL2_TP0 0x000c4 +#define A6XX_RBBM_CLOCK_CNTL2_TP1 0x000c5 +#define A6XX_RBBM_CLOCK_CNTL2_TP2 0x000c6 +#define A6XX_RBBM_CLOCK_CNTL2_TP3 0x000c7 +#define A6XX_RBBM_CLOCK_CNTL3_TP0 0x000c8 +#define A6XX_RBBM_CLOCK_CNTL3_TP1 0x000c9 +#define A6XX_RBBM_CLOCK_CNTL3_TP2 0x000ca +#define A6XX_RBBM_CLOCK_CNTL3_TP3 0x000cb +#define A6XX_RBBM_CLOCK_CNTL4_TP0 0x000cc +#define A6XX_RBBM_CLOCK_CNTL4_TP1 0x000cd +#define A6XX_RBBM_CLOCK_CNTL4_TP2 0x000ce +#define A6XX_RBBM_CLOCK_CNTL4_TP3 0x000cf +#define A6XX_RBBM_CLOCK_DELAY_TP0 0x000d0 +#define A6XX_RBBM_CLOCK_DELAY_TP1 0x000d1 +#define A6XX_RBBM_CLOCK_DELAY_TP2 0x000d2 +#define A6XX_RBBM_CLOCK_DELAY_TP3 0x000d3 +#define A6XX_RBBM_CLOCK_DELAY2_TP0 0x000d4 +#define A6XX_RBBM_CLOCK_DELAY2_TP1 0x000d5 +#define A6XX_RBBM_CLOCK_DELAY2_TP2 0x000d6 +#define A6XX_RBBM_CLOCK_DELAY2_TP3 0x000d7 +#define A6XX_RBBM_CLOCK_DELAY3_TP0 0x000d8 +#define A6XX_RBBM_CLOCK_DELAY3_TP1 0x000d9 +#define A6XX_RBBM_CLOCK_DELAY3_TP2 0x000da +#define A6XX_RBBM_CLOCK_DELAY3_TP3 0x000db +#define A6XX_RBBM_CLOCK_DELAY4_TP0 0x000dc +#define A6XX_RBBM_CLOCK_DELAY4_TP1 0x000dd +#define A6XX_RBBM_CLOCK_DELAY4_TP2 0x000de +#define A6XX_RBBM_CLOCK_DELAY4_TP3 0x000df +#define A6XX_RBBM_CLOCK_HYST_TP0 0x000e0 +#define A6XX_RBBM_CLOCK_HYST_TP1 0x000e1 +#define A6XX_RBBM_CLOCK_HYST_TP2 0x000e2 +#define A6XX_RBBM_CLOCK_HYST_TP3 0x000e3 +#define A6XX_RBBM_CLOCK_HYST2_TP0 0x000e4 +#define A6XX_RBBM_CLOCK_HYST2_TP1 0x000e5 +#define A6XX_RBBM_CLOCK_HYST2_TP2 0x000e6 +#define A6XX_RBBM_CLOCK_HYST2_TP3 0x000e7 +#define A6XX_RBBM_CLOCK_HYST3_TP0 0x000e8 +#define A6XX_RBBM_CLOCK_HYST3_TP1 0x000e9 +#define A6XX_RBBM_CLOCK_HYST3_TP2 0x000ea +#define A6XX_RBBM_CLOCK_HYST3_TP3 0x000eb +#define A6XX_RBBM_CLOCK_HYST4_TP0 0x000ec +#define A6XX_RBBM_CLOCK_HYST4_TP1 0x000ed +#define A6XX_RBBM_CLOCK_HYST4_TP2 0x000ee +#define A6XX_RBBM_CLOCK_HYST4_TP3 0x000ef +#define A6XX_RBBM_CLOCK_CNTL_RB0 0x000f0 +#define A6XX_RBBM_CLOCK_CNTL_RB1 0x000f1 +#define A6XX_RBBM_CLOCK_CNTL_RB2 0x000f2 +#define A6XX_RBBM_CLOCK_CNTL_RB3 0x000f3 +#define A6XX_RBBM_CLOCK_CNTL2_RB0 0x000f4 +#define A6XX_RBBM_CLOCK_CNTL2_RB1 0x000f5 +#define A6XX_RBBM_CLOCK_CNTL2_RB2 0x000f6 +#define A6XX_RBBM_CLOCK_CNTL2_RB3 0x000f7 +#define A6XX_RBBM_CLOCK_CNTL_CCU0 0x000f8 +#define A6XX_RBBM_CLOCK_CNTL_CCU1 0x000f9 +#define A6XX_RBBM_CLOCK_CNTL_CCU2 0x000fa +#define A6XX_RBBM_CLOCK_CNTL_CCU3 0x000fb +#define A6XX_RBBM_CLOCK_HYST_RB_CCU0 0x00100 +#define A6XX_RBBM_CLOCK_HYST_RB_CCU1 0x00101 +#define A6XX_RBBM_CLOCK_HYST_RB_CCU2 0x00102 +#define A6XX_RBBM_CLOCK_HYST_RB_CCU3 0x00103 +#define A6XX_RBBM_CLOCK_CNTL_RAC 0x00104 +#define A6XX_RBBM_CLOCK_CNTL2_RAC 0x00105 +#define A6XX_RBBM_CLOCK_DELAY_RAC 0x00106 +#define A6XX_RBBM_CLOCK_HYST_RAC 0x00107 +#define A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x00108 +#define A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x00109 +#define A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x0010a +#define A6XX_RBBM_CLOCK_CNTL_UCHE 0x0010b +#define A6XX_RBBM_CLOCK_CNTL2_UCHE 0x0010c +#define A6XX_RBBM_CLOCK_CNTL3_UCHE 0x0010d +#define A6XX_RBBM_CLOCK_CNTL4_UCHE 0x0010e +#define A6XX_RBBM_CLOCK_DELAY_UCHE 0x0010f +#define A6XX_RBBM_CLOCK_HYST_UCHE 0x00110 +#define A6XX_RBBM_CLOCK_MODE_VFD 0x00111 +#define A6XX_RBBM_CLOCK_DELAY_VFD 0x00112 +#define A6XX_RBBM_CLOCK_HYST_VFD 0x00113 +#define A6XX_RBBM_CLOCK_MODE_GPC 0x00114 +#define A6XX_RBBM_CLOCK_DELAY_GPC 0x00115 +#define A6XX_RBBM_CLOCK_HYST_GPC 0x00116 +#define A6XX_RBBM_CLOCK_DELAY_HLSQ_2 0x00117 +#define A6XX_RBBM_CLOCK_CNTL_GMU_GX 0x00118 +#define A6XX_RBBM_CLOCK_DELAY_GMU_GX 0x00119 +#define A6XX_RBBM_CLOCK_CNTL_TEX_FCHE 0x00120 +#define A6XX_RBBM_CLOCK_DELAY_TEX_FCHE 0x00121 +#define A6XX_RBBM_CLOCK_HYST_TEX_FCHE 0x00122 +#define A6XX_RBBM_CLOCK_HYST_GMU_GX 0x0011a +#define A6XX_RBBM_CLOCK_MODE_HLSQ 0x0011b +#define A6XX_RBBM_CLOCK_DELAY_HLSQ 0x0011c +#define A6XX_RBBM_CLOCK_HYST_HLSQ 0x0011d + +/* DBGC_CFG registers */ +#define A6XX_DBGC_CFG_DBGBUS_SEL_A 0x600 +#define A6XX_DBGC_CFG_DBGBUS_SEL_B 0x601 +#define A6XX_DBGC_CFG_DBGBUS_SEL_C 0x602 +#define A6XX_DBGC_CFG_DBGBUS_SEL_D 0x603 +#define A6XX_DBGC_CFG_DBGBUS_SEL_PING_INDEX_SHIFT 0x0 +#define A6XX_DBGC_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT 0x8 +#define A6XX_DBGC_CFG_DBGBUS_CNTLT 0x604 +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN_SHIFT 0x0 +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU_SHIFT 0xC +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT_SHIFT 0x1C +#define A6XX_DBGC_CFG_DBGBUS_CNTLM 0x605 +#define A6XX_DBGC_CFG_DBGBUS_CTLTM_ENABLE_SHIFT 0x18 +#define A6XX_DBGC_CFG_DBGBUS_OPL 0x606 +#define A6XX_DBGC_CFG_DBGBUS_OPE 0x607 +#define A6XX_DBGC_CFG_DBGBUS_IVTL_0 0x608 +#define A6XX_DBGC_CFG_DBGBUS_IVTL_1 0x609 +#define A6XX_DBGC_CFG_DBGBUS_IVTL_2 0x60a +#define A6XX_DBGC_CFG_DBGBUS_IVTL_3 0x60b +#define A6XX_DBGC_CFG_DBGBUS_MASKL_0 0x60c +#define A6XX_DBGC_CFG_DBGBUS_MASKL_1 0x60d +#define A6XX_DBGC_CFG_DBGBUS_MASKL_2 0x60e +#define A6XX_DBGC_CFG_DBGBUS_MASKL_3 0x60f +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0 0x610 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1 0x611 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL0_SHIFT 0x0 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL1_SHIFT 0x4 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL2_SHIFT 0x8 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL3_SHIFT 0xC +#define A6XX_DBGC_CFG_DBGBUS_BYTEL4_SHIFT 0x10 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL5_SHIFT 0x14 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL6_SHIFT 0x18 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL7_SHIFT 0x1C +#define A6XX_DBGC_CFG_DBGBUS_BYTEL8_SHIFT 0x0 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL9_SHIFT 0x4 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL10_SHIFT 0x8 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL11_SHIFT 0xC +#define A6XX_DBGC_CFG_DBGBUS_BYTEL12_SHIFT 0x10 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL13_SHIFT 0x14 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL14_SHIFT 0x18 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL15_SHIFT 0x1C +#define A6XX_DBGC_CFG_DBGBUS_IVTE_0 0x612 +#define A6XX_DBGC_CFG_DBGBUS_IVTE_1 0x613 +#define A6XX_DBGC_CFG_DBGBUS_IVTE_2 0x614 +#define A6XX_DBGC_CFG_DBGBUS_IVTE_3 0x615 +#define A6XX_DBGC_CFG_DBGBUS_MASKE_0 0x616 +#define A6XX_DBGC_CFG_DBGBUS_MASKE_1 0x617 +#define A6XX_DBGC_CFG_DBGBUS_MASKE_2 0x618 +#define A6XX_DBGC_CFG_DBGBUS_MASKE_3 0x619 +#define A6XX_DBGC_CFG_DBGBUS_NIBBLEE 0x61a +#define A6XX_DBGC_CFG_DBGBUS_PTRC0 0x61b +#define A6XX_DBGC_CFG_DBGBUS_PTRC1 0x61c +#define A6XX_DBGC_CFG_DBGBUS_LOADREG 0x61d +#define A6XX_DBGC_CFG_DBGBUS_IDX 0x61e +#define A6XX_DBGC_CFG_DBGBUS_CLRC 0x61f +#define A6XX_DBGC_CFG_DBGBUS_LOADIVT 0x620 +#define A6XX_DBGC_VBIF_DBG_CNTL 0x621 +#define A6XX_DBGC_DBG_LO_HI_GPIO 0x622 +#define A6XX_DBGC_EXT_TRACE_BUS_CNTL 0x623 +#define A6XX_DBGC_READ_AHB_THROUGH_DBG 0x624 +#define A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1 0x62f +#define A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2 0x630 +#define A6XX_DBGC_EVT_CFG 0x640 +#define A6XX_DBGC_EVT_INTF_SEL_0 0x641 +#define A6XX_DBGC_EVT_INTF_SEL_1 0x642 +#define A6XX_DBGC_PERF_ATB_CFG 0x643 +#define A6XX_DBGC_PERF_ATB_COUNTER_SEL_0 0x644 +#define A6XX_DBGC_PERF_ATB_COUNTER_SEL_1 0x645 +#define A6XX_DBGC_PERF_ATB_COUNTER_SEL_2 0x646 +#define A6XX_DBGC_PERF_ATB_COUNTER_SEL_3 0x647 +#define A6XX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 0x648 +#define A6XX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 0x649 +#define A6XX_DBGC_PERF_ATB_DRAIN_CMD 0x64a +#define A6XX_DBGC_ECO_CNTL 0x650 +#define A6XX_DBGC_AHB_DBG_CNTL 0x651 + +/* VSC registers */ +#define A6XX_VSC_PERFCTR_VSC_SEL_0 0xCD8 +#define A6XX_VSC_PERFCTR_VSC_SEL_1 0xCD9 + +/* GRAS registers */ +#define A6XX_GRAS_ADDR_MODE_CNTL 0x8601 +#define A6XX_GRAS_PERFCTR_TSE_SEL_0 0x8610 +#define A6XX_GRAS_PERFCTR_TSE_SEL_1 0x8611 +#define A6XX_GRAS_PERFCTR_TSE_SEL_2 0x8612 +#define A6XX_GRAS_PERFCTR_TSE_SEL_3 0x8613 +#define A6XX_GRAS_PERFCTR_RAS_SEL_0 0x8614 +#define A6XX_GRAS_PERFCTR_RAS_SEL_1 0x8615 +#define A6XX_GRAS_PERFCTR_RAS_SEL_2 0x8616 +#define A6XX_GRAS_PERFCTR_RAS_SEL_3 0x8617 +#define A6XX_GRAS_PERFCTR_LRZ_SEL_0 0x8618 +#define A6XX_GRAS_PERFCTR_LRZ_SEL_1 0x8619 +#define A6XX_GRAS_PERFCTR_LRZ_SEL_2 0x861A +#define A6XX_GRAS_PERFCTR_LRZ_SEL_3 0x861B + +/* RB registers */ +#define A6XX_RB_ADDR_MODE_CNTL 0x8E05 +#define A6XX_RB_NC_MODE_CNTL 0x8E08 +#define A6XX_RB_PERFCTR_RB_SEL_0 0x8E10 +#define A6XX_RB_PERFCTR_RB_SEL_1 0x8E11 +#define A6XX_RB_PERFCTR_RB_SEL_2 0x8E12 +#define A6XX_RB_PERFCTR_RB_SEL_3 0x8E13 +#define A6XX_RB_PERFCTR_RB_SEL_4 0x8E14 +#define A6XX_RB_PERFCTR_RB_SEL_5 0x8E15 +#define A6XX_RB_PERFCTR_RB_SEL_6 0x8E16 +#define A6XX_RB_PERFCTR_RB_SEL_7 0x8E17 +#define A6XX_RB_PERFCTR_CCU_SEL_0 0x8E18 +#define A6XX_RB_PERFCTR_CCU_SEL_1 0x8E19 +#define A6XX_RB_PERFCTR_CCU_SEL_2 0x8E1A +#define A6XX_RB_PERFCTR_CCU_SEL_3 0x8E1B +#define A6XX_RB_PERFCTR_CCU_SEL_4 0x8E1C +#define A6XX_RB_PERFCTR_CMP_SEL_0 0x8E2C +#define A6XX_RB_PERFCTR_CMP_SEL_1 0x8E2D +#define A6XX_RB_PERFCTR_CMP_SEL_2 0x8E2E +#define A6XX_RB_PERFCTR_CMP_SEL_3 0x8E2F +#define A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_HOST 0x8E3B +#define A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD 0x8E3D +#define A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE 0x8E50 + +/* PC registers */ +#define A6XX_PC_DBG_ECO_CNTL 0x9E00 +#define A6XX_PC_ADDR_MODE_CNTL 0x9E01 +#define A6XX_PC_PERFCTR_PC_SEL_0 0x9E34 +#define A6XX_PC_PERFCTR_PC_SEL_1 0x9E35 +#define A6XX_PC_PERFCTR_PC_SEL_2 0x9E36 +#define A6XX_PC_PERFCTR_PC_SEL_3 0x9E37 +#define A6XX_PC_PERFCTR_PC_SEL_4 0x9E38 +#define A6XX_PC_PERFCTR_PC_SEL_5 0x9E39 +#define A6XX_PC_PERFCTR_PC_SEL_6 0x9E3A +#define A6XX_PC_PERFCTR_PC_SEL_7 0x9E3B + +/* HLSQ registers */ +#define A6XX_HLSQ_ADDR_MODE_CNTL 0xBE05 +#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_0 0xBE10 +#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_1 0xBE11 +#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_2 0xBE12 +#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_3 0xBE13 +#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_4 0xBE14 +#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_5 0xBE15 +#define A6XX_HLSQ_DBG_AHB_READ_APERTURE 0xC800 +#define A6XX_HLSQ_DBG_READ_SEL 0xD000 + +/* VFD registers */ +#define A6XX_VFD_ADDR_MODE_CNTL 0xA601 +#define A6XX_VFD_PERFCTR_VFD_SEL_0 0xA610 +#define A6XX_VFD_PERFCTR_VFD_SEL_1 0xA611 +#define A6XX_VFD_PERFCTR_VFD_SEL_2 0xA612 +#define A6XX_VFD_PERFCTR_VFD_SEL_3 0xA613 +#define A6XX_VFD_PERFCTR_VFD_SEL_4 0xA614 +#define A6XX_VFD_PERFCTR_VFD_SEL_5 0xA615 +#define A6XX_VFD_PERFCTR_VFD_SEL_6 0xA616 +#define A6XX_VFD_PERFCTR_VFD_SEL_7 0xA617 + +/* VPC registers */ +#define A6XX_VPC_ADDR_MODE_CNTL 0x9601 +#define A6XX_VPC_PERFCTR_VPC_SEL_0 0x9604 +#define A6XX_VPC_PERFCTR_VPC_SEL_1 0x9605 +#define A6XX_VPC_PERFCTR_VPC_SEL_2 0x9606 +#define A6XX_VPC_PERFCTR_VPC_SEL_3 0x9607 +#define A6XX_VPC_PERFCTR_VPC_SEL_4 0x9608 +#define A6XX_VPC_PERFCTR_VPC_SEL_5 0x9609 + +/* UCHE registers */ +#define A6XX_UCHE_ADDR_MODE_CNTL 0xE00 +#define A6XX_UCHE_MODE_CNTL 0xE01 +#define A6XX_UCHE_WRITE_RANGE_MAX_LO 0xE05 +#define A6XX_UCHE_WRITE_RANGE_MAX_HI 0xE06 +#define A6XX_UCHE_WRITE_THRU_BASE_LO 0xE07 +#define A6XX_UCHE_WRITE_THRU_BASE_HI 0xE08 +#define A6XX_UCHE_TRAP_BASE_LO 0xE09 +#define A6XX_UCHE_TRAP_BASE_HI 0xE0A +#define A6XX_UCHE_GMEM_RANGE_MIN_LO 0xE0B +#define A6XX_UCHE_GMEM_RANGE_MIN_HI 0xE0C +#define A6XX_UCHE_GMEM_RANGE_MAX_LO 0xE0D +#define A6XX_UCHE_GMEM_RANGE_MAX_HI 0xE0E +#define A6XX_UCHE_CACHE_WAYS 0xE17 +#define A6XX_UCHE_FILTER_CNTL 0xE18 +#define A6XX_UCHE_CLIENT_PF 0xE19 +#define A6XX_UCHE_CLIENT_PF_CLIENT_ID_MASK 0x7 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_0 0xE1C +#define A6XX_UCHE_PERFCTR_UCHE_SEL_1 0xE1D +#define A6XX_UCHE_PERFCTR_UCHE_SEL_2 0xE1E +#define A6XX_UCHE_PERFCTR_UCHE_SEL_3 0xE1F +#define A6XX_UCHE_PERFCTR_UCHE_SEL_4 0xE20 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_5 0xE21 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_6 0xE22 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_7 0xE23 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_8 0xE24 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_9 0xE25 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_10 0xE26 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_11 0xE27 +#define A6XX_UCHE_GBIF_GX_CONFIG 0xE3A +#define A6XX_UCHE_CMDQ_CONFIG 0xE3C + +/* SP registers */ +#define A6XX_SP_ADDR_MODE_CNTL 0xAE01 +#define A6XX_SP_NC_MODE_CNTL 0xAE02 +#define A6XX_SP_PERFCTR_SP_SEL_0 0xAE10 +#define A6XX_SP_PERFCTR_SP_SEL_1 0xAE11 +#define A6XX_SP_PERFCTR_SP_SEL_2 0xAE12 +#define A6XX_SP_PERFCTR_SP_SEL_3 0xAE13 +#define A6XX_SP_PERFCTR_SP_SEL_4 0xAE14 +#define A6XX_SP_PERFCTR_SP_SEL_5 0xAE15 +#define A6XX_SP_PERFCTR_SP_SEL_6 0xAE16 +#define A6XX_SP_PERFCTR_SP_SEL_7 0xAE17 +#define A6XX_SP_PERFCTR_SP_SEL_8 0xAE18 +#define A6XX_SP_PERFCTR_SP_SEL_9 0xAE19 +#define A6XX_SP_PERFCTR_SP_SEL_10 0xAE1A +#define A6XX_SP_PERFCTR_SP_SEL_11 0xAE1B +#define A6XX_SP_PERFCTR_SP_SEL_12 0xAE1C +#define A6XX_SP_PERFCTR_SP_SEL_13 0xAE1D +#define A6XX_SP_PERFCTR_SP_SEL_14 0xAE1E +#define A6XX_SP_PERFCTR_SP_SEL_15 0xAE1F +#define A6XX_SP_PERFCTR_SP_SEL_16 0xAE20 +#define A6XX_SP_PERFCTR_SP_SEL_17 0xAE21 +#define A6XX_SP_PERFCTR_SP_SEL_18 0xAE22 +#define A6XX_SP_PERFCTR_SP_SEL_19 0xAE23 +#define A6XX_SP_PERFCTR_SP_SEL_20 0xAE24 +#define A6XX_SP_PERFCTR_SP_SEL_21 0xAE25 +#define A6XX_SP_PERFCTR_SP_SEL_22 0xAE26 +#define A6XX_SP_PERFCTR_SP_SEL_23 0xAE27 + +/* TP registers */ +#define A6XX_TPL1_ADDR_MODE_CNTL 0xB601 +#define A6XX_TPL1_NC_MODE_CNTL 0xB604 +#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0 0xB608 +#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1 0xB609 +#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2 0xB60A +#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3 0xB60B +#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4 0xB60C +#define A6XX_TPL1_PERFCTR_TP_SEL_0 0xB610 +#define A6XX_TPL1_PERFCTR_TP_SEL_1 0xB611 +#define A6XX_TPL1_PERFCTR_TP_SEL_2 0xB612 +#define A6XX_TPL1_PERFCTR_TP_SEL_3 0xB613 +#define A6XX_TPL1_PERFCTR_TP_SEL_4 0xB614 +#define A6XX_TPL1_PERFCTR_TP_SEL_5 0xB615 +#define A6XX_TPL1_PERFCTR_TP_SEL_6 0xB616 +#define A6XX_TPL1_PERFCTR_TP_SEL_7 0xB617 +#define A6XX_TPL1_PERFCTR_TP_SEL_8 0xB618 +#define A6XX_TPL1_PERFCTR_TP_SEL_9 0xB619 +#define A6XX_TPL1_PERFCTR_TP_SEL_10 0xB61A +#define A6XX_TPL1_PERFCTR_TP_SEL_11 0xB61B + +/* VBIF registers */ +#define A6XX_VBIF_VERSION 0x3000 +#define A6XX_VBIF_CLKON 0x3001 +#define A6XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK 0x1 +#define A6XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT 0x1 +#define A6XX_VBIF_GATE_OFF_WRREQ_EN 0x302A +#define A6XX_VBIF_XIN_HALT_CTRL0 0x3080 +#define A6XX_VBIF_XIN_HALT_CTRL0_MASK 0xF +#define A6XX_VBIF_XIN_HALT_CTRL1 0x3081 +#define A6XX_VBIF_TEST_BUS_OUT_CTRL 0x3084 +#define A6XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK 0x1 +#define A6XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT 0x0 +#define A6XX_VBIF_TEST_BUS1_CTRL0 0x3085 +#define A6XX_VBIF_TEST_BUS1_CTRL1 0x3086 +#define A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK 0xF +#define A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT 0x0 +#define A6XX_VBIF_TEST_BUS2_CTRL0 0x3087 +#define A6XX_VBIF_TEST_BUS2_CTRL1 0x3088 +#define A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK 0x1FF +#define A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT 0x0 +#define A6XX_VBIF_TEST_BUS_OUT 0x308C +#define A6XX_VBIF_PERF_CNT_SEL0 0x30d0 +#define A6XX_VBIF_PERF_CNT_SEL1 0x30d1 +#define A6XX_VBIF_PERF_CNT_SEL2 0x30d2 +#define A6XX_VBIF_PERF_CNT_SEL3 0x30d3 +#define A6XX_VBIF_PERF_CNT_LOW0 0x30d8 +#define A6XX_VBIF_PERF_CNT_LOW1 0x30d9 +#define A6XX_VBIF_PERF_CNT_LOW2 0x30da +#define A6XX_VBIF_PERF_CNT_LOW3 0x30db +#define A6XX_VBIF_PERF_CNT_HIGH0 0x30e0 +#define A6XX_VBIF_PERF_CNT_HIGH1 0x30e1 +#define A6XX_VBIF_PERF_CNT_HIGH2 0x30e2 +#define A6XX_VBIF_PERF_CNT_HIGH3 0x30e3 +#define A6XX_VBIF_PERF_PWR_CNT_EN0 0x3100 +#define A6XX_VBIF_PERF_PWR_CNT_EN1 0x3101 +#define A6XX_VBIF_PERF_PWR_CNT_EN2 0x3102 +#define A6XX_VBIF_PERF_PWR_CNT_LOW0 0x3110 +#define A6XX_VBIF_PERF_PWR_CNT_LOW1 0x3111 +#define A6XX_VBIF_PERF_PWR_CNT_LOW2 0x3112 +#define A6XX_VBIF_PERF_PWR_CNT_HIGH0 0x3118 +#define A6XX_VBIF_PERF_PWR_CNT_HIGH1 0x3119 +#define A6XX_VBIF_PERF_PWR_CNT_HIGH2 0x311a + +/* GBIF countables */ +#define GBIF_AXI0_READ_DATA_TOTAL_BEATS 34 +#define GBIF_AXI1_READ_DATA_TOTAL_BEATS 35 +#define GBIF_AXI0_WRITE_DATA_TOTAL_BEATS 46 +#define GBIF_AXI1_WRITE_DATA_TOTAL_BEATS 47 + +/* GBIF registers */ +#define A6XX_GBIF_SCACHE_CNTL0 0x3c01 +#define A6XX_GBIF_SCACHE_CNTL1 0x3c02 +#define A6XX_GBIF_QSB_SIDE0 0x3c03 +#define A6XX_GBIF_QSB_SIDE1 0x3c04 +#define A6XX_GBIF_QSB_SIDE2 0x3c05 +#define A6XX_GBIF_QSB_SIDE3 0x3c06 +#define A6XX_GBIF_HALT 0x3c45 +#define A6XX_GBIF_HALT_ACK 0x3c46 + +#define A6XX_GBIF_CLIENT_HALT_MASK BIT(0) +#define A6XX_GBIF_ARB_HALT_MASK BIT(1) +#define A6XX_GBIF_GX_HALT_MASK BIT(0) + +#define A6XX_GBIF_PERF_PWR_CNT_EN 0x3cc0 +#define A6XX_GBIF_PERF_CNT_SEL 0x3cc2 +#define A6XX_GBIF_PERF_PWR_CNT_SEL 0x3cc3 +#define A6XX_GBIF_PERF_CNT_LOW0 0x3cc4 +#define A6XX_GBIF_PERF_CNT_LOW1 0x3cc5 +#define A6XX_GBIF_PERF_CNT_LOW2 0x3cc6 +#define A6XX_GBIF_PERF_CNT_LOW3 0x3cc7 +#define A6XX_GBIF_PERF_CNT_HIGH0 0x3cc8 +#define A6XX_GBIF_PERF_CNT_HIGH1 0x3cc9 +#define A6XX_GBIF_PERF_CNT_HIGH2 0x3cca +#define A6XX_GBIF_PERF_CNT_HIGH3 0x3ccb +#define A6XX_GBIF_PWR_CNT_LOW0 0x3ccc +#define A6XX_GBIF_PWR_CNT_LOW1 0x3ccd +#define A6XX_GBIF_PWR_CNT_LOW2 0x3cce +#define A6XX_GBIF_PWR_CNT_HIGH0 0x3ccf +#define A6XX_GBIF_PWR_CNT_HIGH1 0x3cd0 +#define A6XX_GBIF_PWR_CNT_HIGH2 0x3cd1 + + +/* CX_DBGC_CFG registers */ +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A 0x18400 +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_B 0x18401 +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_C 0x18402 +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_D 0x18403 +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_PING_INDEX_SHIFT 0x0 +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT 0x8 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT 0x18404 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN_SHIFT 0x0 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU_SHIFT 0xC +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT_SHIFT 0x1C +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM 0x18405 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE_SHIFT 0x18 +#define A6XX_CX_DBGC_CFG_DBGBUS_OPL 0x18406 +#define A6XX_CX_DBGC_CFG_DBGBUS_OPE 0x18407 +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0 0x18408 +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1 0x18409 +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2 0x1840A +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3 0x1840B +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0 0x1840C +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1 0x1840D +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2 0x1840E +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3 0x1840F +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0 0x18410 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1 0x18411 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL0_SHIFT 0x0 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL1_SHIFT 0x4 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL2_SHIFT 0x8 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL3_SHIFT 0xC +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL4_SHIFT 0x10 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL5_SHIFT 0x14 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL6_SHIFT 0x18 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL7_SHIFT 0x1C +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL8_SHIFT 0x0 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL9_SHIFT 0x4 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL10_SHIFT 0x8 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL11_SHIFT 0xC +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL12_SHIFT 0x10 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL13_SHIFT 0x14 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL14_SHIFT 0x18 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL15_SHIFT 0x1C +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTE_0 0x18412 +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTE_1 0x18413 +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTE_2 0x18414 +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTE_3 0x18415 +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKE_0 0x18416 +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKE_1 0x18417 +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKE_2 0x18418 +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKE_3 0x18419 +#define A6XX_CX_DBGC_CFG_DBGBUS_NIBBLEE 0x1841A +#define A6XX_CX_DBGC_CFG_DBGBUS_PTRC0 0x1841B +#define A6XX_CX_DBGC_CFG_DBGBUS_PTRC1 0x1841C +#define A6XX_CX_DBGC_CFG_DBGBUS_LOADREG 0x1841D +#define A6XX_CX_DBGC_CFG_DBGBUS_IDX 0x1841E +#define A6XX_CX_DBGC_CFG_DBGBUS_CLRC 0x1841F +#define A6XX_CX_DBGC_CFG_DBGBUS_LOADIVT 0x18420 +#define A6XX_CX_DBGC_VBIF_DBG_CNTL 0x18421 +#define A6XX_CX_DBGC_DBG_LO_HI_GPIO 0x18422 +#define A6XX_CX_DBGC_EXT_TRACE_BUS_CNTL 0x18423 +#define A6XX_CX_DBGC_READ_AHB_THROUGH_DBG 0x18424 +#define A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 0x1842F +#define A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 0x18430 +#define A6XX_CX_DBGC_EVT_CFG 0x18440 +#define A6XX_CX_DBGC_EVT_INTF_SEL_0 0x18441 +#define A6XX_CX_DBGC_EVT_INTF_SEL_1 0x18442 +#define A6XX_CX_DBGC_PERF_ATB_CFG 0x18443 +#define A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_0 0x18444 +#define A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_1 0x18445 +#define A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_2 0x18446 +#define A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_3 0x18447 +#define A6XX_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 0x18448 +#define A6XX_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 0x18449 +#define A6XX_CX_DBGC_PERF_ATB_DRAIN_CMD 0x1844A +#define A6XX_CX_DBGC_ECO_CNTL 0x18450 +#define A6XX_CX_DBGC_AHB_DBG_CNTL 0x18451 + +/* GMU control registers */ +#define A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL 0x1A880 +#define A6XX_GMU_GX_SPTPRAC_POWER_CONTROL 0x1A881 +#define A6XX_GMU_CM3_ITCM_START 0x1B400 +#define A6XX_GMU_CM3_DTCM_START 0x1C400 +#define A6XX_GMU_NMI_CONTROL_STATUS 0x1CBF0 +#define A6XX_GMU_BOOT_SLUMBER_OPTION 0x1CBF8 +#define A6XX_GMU_GX_VOTE_IDX 0x1CBF9 +#define A6XX_GMU_MX_VOTE_IDX 0x1CBFA +#define A6XX_GMU_DCVS_ACK_OPTION 0x1CBFC +#define A6XX_GMU_DCVS_PERF_SETTING 0x1CBFD +#define A6XX_GMU_DCVS_BW_SETTING 0x1CBFE +#define A6XX_GMU_DCVS_RETURN 0x1CBFF +#define A6XX_GMU_ICACHE_CONFIG 0x1F400 +#define A6XX_GMU_DCACHE_CONFIG 0x1F401 +#define A6XX_GMU_SYS_BUS_CONFIG 0x1F40F +#define A6XX_GMU_CM3_SYSRESET 0x1F800 +#define A6XX_GMU_CM3_BOOT_CONFIG 0x1F801 +#define A6XX_GMU_CX_GMU_WFI_CONFIG 0x1F802 +#define A6XX_GMU_CX_GMU_WDOG_CTRL 0x1F813 +#define A6XX_GMU_CM3_FW_BUSY 0x1F81A +#define A6XX_GMU_CM3_FW_INIT_RESULT 0x1F81C +#define A6XX_GMU_CM3_CFG 0x1F82D +#define A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE 0x1F840 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0 0x1F841 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1 0x1F842 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L 0x1F844 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H 0x1F845 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L 0x1F846 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_H 0x1F847 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L 0x1F848 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_H 0x1F849 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L 0x1F84A +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_H 0x1F84B +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_L 0x1F84C +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_H 0x1F84D +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_L 0x1F84E +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_H 0x1F84F +#define A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_L 0x1F850 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_H 0x1F851 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_L 0x1F852 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_H 0x1F853 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_2 0x1F860 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_L 0x1F870 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_H 0x1F871 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_L 0x1F872 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_H 0x1F843 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_L 0x1F874 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_H 0x1F875 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_L 0x1F876 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_H 0x1F877 +#define A6XX_GMU_CX_GMU_ALWAYS_ON_COUNTER_L 0x1F888 +#define A6XX_GMU_CX_GMU_ALWAYS_ON_COUNTER_H 0x1F889 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_ENABLE 0x1F8A0 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_SELECT_0 0x1F8A1 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_SELECT_1 0x1F8A2 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_0_L 0x1F8A4 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_0_H 0x1F8A5 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_1_L 0x1F8A6 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_1_H 0x1F8A7 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_2_L 0x1F8A8 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_2_H 0x1F8A9 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_3_L 0x1F8AA +#define A6XX_GMU_CX_GMU_PERF_COUNTER_3_H 0x1F8AB +#define A6XX_GMU_CX_GMU_PERF_COUNTER_4_L 0x1F8AC +#define A6XX_GMU_CX_GMU_PERF_COUNTER_4_H 0x1F8AD +#define A6XX_GMU_CX_GMU_PERF_COUNTER_5_L 0x1F8AE +#define A6XX_GMU_CX_GMU_PERF_COUNTER_5_H 0x1F8AF +#define A6XX_GMU_PWR_COL_INTER_FRAME_CTRL 0x1F8C0 +#define A6XX_GMU_PWR_COL_INTER_FRAME_HYST 0x1F8C1 +#define A6XX_GMU_PWR_COL_SPTPRAC_HYST 0x1F8C2 +#define A6XX_GMU_SPTPRAC_PWR_CLK_STATUS 0x1F8D0 +#define A6XX_GMU_GPU_NAP_CTRL 0x1F8E4 +#define A6XX_GMU_RPMH_CTRL 0x1F8E8 +#define A6XX_GMU_RPMH_HYST_CTRL 0x1F8E9 +#define A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE 0x1F8EC +#define A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_MSG 0x1F900 +#define A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_RESP 0x1F901 +#define A6XX_GMU_BOOT_KMD_LM_HANDSHAKE 0x1F9F0 +#define A6XX_GMU_LLM_GLM_SLEEP_CTRL 0x1F957 +#define A6XX_GMU_LLM_GLM_SLEEP_STATUS 0x1F958 + +/* HFI registers*/ +#define A6XX_GMU_ALWAYS_ON_COUNTER_L 0x1F888 +#define A6XX_GMU_ALWAYS_ON_COUNTER_H 0x1F889 +#define A6XX_GMU_GMU_PWR_COL_KEEPALIVE 0x1F8C3 +#define A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE 0x1F8C4 +#define A6XX_GMU_HFI_CTRL_STATUS 0x1F980 +#define A6XX_GMU_HFI_VERSION_INFO 0x1F981 +#define A6XX_GMU_HFI_SFR_ADDR 0x1F982 +#define A6XX_GMU_HFI_MMAP_ADDR 0x1F983 +#define A6XX_GMU_HFI_QTBL_INFO 0x1F984 +#define A6XX_GMU_HFI_QTBL_ADDR 0x1F985 +#define A6XX_GMU_HFI_CTRL_INIT 0x1F986 +#define A6XX_GMU_GMU2HOST_INTR_SET 0x1F990 +#define A6XX_GMU_GMU2HOST_INTR_CLR 0x1F991 +#define A6XX_GMU_GMU2HOST_INTR_INFO 0x1F992 +#define A6XX_GMU_GMU2HOST_INTR_MASK 0x1F993 +#define A6XX_GMU_HOST2GMU_INTR_SET 0x1F994 +#define A6XX_GMU_HOST2GMU_INTR_CLR 0x1F995 +#define A6XX_GMU_HOST2GMU_INTR_RAW_INFO 0x1F996 +#define A6XX_GMU_HOST2GMU_INTR_EN_0 0x1F997 +#define A6XX_GMU_HOST2GMU_INTR_EN_1 0x1F998 +#define A6XX_GMU_HOST2GMU_INTR_EN_2 0x1F999 +#define A6XX_GMU_HOST2GMU_INTR_EN_3 0x1F99A +#define A6XX_GMU_HOST2GMU_INTR_INFO_0 0x1F99B +#define A6XX_GMU_HOST2GMU_INTR_INFO_1 0x1F99C +#define A6XX_GMU_HOST2GMU_INTR_INFO_2 0x1F99D +#define A6XX_GMU_HOST2GMU_INTR_INFO_3 0x1F99E +#define A6XX_GMU_GENERAL_0 0x1F9C5 +#define A6XX_GMU_GENERAL_1 0x1F9C6 +#define A6XX_GMU_GENERAL_6 0x1F9CB +#define A6XX_GMU_GENERAL_7 0x1F9CC + +/* ISENSE registers */ +#define A6XX_GMU_ISENSE_CTRL 0x1F95D +#define A6XX_GPU_GMU_CX_GMU_ISENSE_CTRL 0x1f95d +#define A6XX_GPU_CS_ENABLE_REG 0x23120 + +/* LM registers */ +#define A6XX_GPU_GMU_CX_GMU_PWR_THRESHOLD 0x1F94D + +/* FAL10 veto register */ +#define A6XX_GPU_GMU_CX_GMU_CX_FAL_INTF 0x1F8F0 +#define A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF 0x1F8F1 + +#define A6XX_GMU_AO_INTERRUPT_EN 0x23B03 +#define A6XX_GMU_AO_HOST_INTERRUPT_CLR 0x23B04 +#define A6XX_GMU_AO_HOST_INTERRUPT_STATUS 0x23B05 +#define A6XX_GMU_AO_HOST_INTERRUPT_MASK 0x23B06 +#define A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL 0x23B09 +#define A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL 0x23B0A +#define A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL 0x23B0B +#define A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS 0x23B0C +#define A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2 0x23B0D +#define A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK 0x23B0E +#define A6XX_GMU_AO_AHB_FENCE_CTRL 0x23B10 +#define A6XX_GMU_AHB_FENCE_STATUS 0x23B13 +#define A6XX_GMU_AHB_FENCE_STATUS_CLR 0x23B14 +#define A6XX_GMU_RBBM_INT_UNMASKED_STATUS 0x23B15 +#define A6XX_GMU_AO_SPARE_CNTL 0x23B16 + +/* RGMU GLM registers */ +#define A6XX_GMU_AO_RGMU_GLM_SLEEP_CTRL 0x23B80 +#define A6XX_GMU_AO_RGMU_GLM_SLEEP_STATUS 0x23B81 +#define A6XX_GMU_AO_RGMU_GLM_HW_CRC_DISABLE 0x23B82 + +/* GMU RSC control registers */ +#define A6XX_GMU_RSCC_CONTROL_REQ 0x23B07 +#define A6XX_GMU_RSCC_CONTROL_ACK 0x23B08 + +/* FENCE control registers */ +#define A6XX_GMU_AHB_FENCE_RANGE_0 0x23B11 +#define A6XX_GMU_AHB_FENCE_RANGE_1 0x23B12 + +/* GPUCC registers */ +#define A6XX_GPU_CC_GX_GDSCR 0x24403 +#define A6XX_GPU_CC_GX_DOMAIN_MISC 0x24542 +#define A6XX_GPU_CC_GX_DOMAIN_MISC3 0x24563 +#define A6XX_GPU_CC_CX_GDSCR 0x2441B + +/* GPU CPR registers */ +#define A6XX_GPU_CPR_FSM_CTL 0x26801 + +/* GPU RSC sequencer registers */ +#define A6XX_GPU_RSCC_RSC_STATUS0_DRV0 0x00004 +#define A6XX_RSCC_PDC_SEQ_START_ADDR 0x00008 +#define A6XX_RSCC_PDC_MATCH_VALUE_LO 0x00009 +#define A6XX_RSCC_PDC_MATCH_VALUE_HI 0x0000A +#define A6XX_RSCC_PDC_SLAVE_ID_DRV0 0x0000B +#define A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR 0x0000D +#define A6XX_RSCC_HIDDEN_TCS_CMD0_DATA 0x0000E +#define A6XX_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_L_DRV0 0x00082 +#define A6XX_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_H_DRV0 0x00083 +#define A6XX_RSCC_TIMESTAMP_UNIT1_EN_DRV0 0x00089 +#define A6XX_RSCC_TIMESTAMP_UNIT1_OUTPUT_DRV0 0x0008C +#define A6XX_RSCC_OVERRIDE_START_ADDR 0x00100 +#define A6XX_RSCC_SEQ_BUSY_DRV0 0x00101 +#define A6XX_RSCC_SEQ_MEM_0_DRV0 0x00180 +#define A6XX_RSCC_TCS0_DRV0_STATUS 0x00346 +#define A6XX_RSCC_TCS1_DRV0_STATUS 0x003EE +#define A6XX_RSCC_TCS2_DRV0_STATUS 0x00496 +#define A6XX_RSCC_TCS3_DRV0_STATUS 0x0053E + +/* GPU PDC sequencer registers in AOSS.RPMh domain */ +#define PDC_GPU_ENABLE_PDC 0x1140 +#define PDC_GPU_SEQ_START_ADDR 0x1148 +#define PDC_GPU_TCS0_CONTROL 0x1540 +#define PDC_GPU_TCS0_CMD_ENABLE_BANK 0x1541 +#define PDC_GPU_TCS0_CMD_WAIT_FOR_CMPL_BANK 0x1542 +#define PDC_GPU_TCS0_CMD0_MSGID 0x1543 +#define PDC_GPU_TCS0_CMD0_ADDR 0x1544 +#define PDC_GPU_TCS0_CMD0_DATA 0x1545 +#define PDC_GPU_TCS1_CONTROL 0x1572 +#define PDC_GPU_TCS1_CMD_ENABLE_BANK 0x1573 +#define PDC_GPU_TCS1_CMD_WAIT_FOR_CMPL_BANK 0x1574 +#define PDC_GPU_TCS1_CMD0_MSGID 0x1575 +#define PDC_GPU_TCS1_CMD0_ADDR 0x1576 +#define PDC_GPU_TCS1_CMD0_DATA 0x1577 +#define PDC_GPU_TCS2_CONTROL 0x15A4 +#define PDC_GPU_TCS2_CMD_ENABLE_BANK 0x15A5 +#define PDC_GPU_TCS2_CMD_WAIT_FOR_CMPL_BANK 0x15A6 +#define PDC_GPU_TCS2_CMD0_MSGID 0x15A7 +#define PDC_GPU_TCS2_CMD0_ADDR 0x15A8 +#define PDC_GPU_TCS2_CMD0_DATA 0x15A9 +#define PDC_GPU_TCS3_CONTROL 0x15D6 +#define PDC_GPU_TCS3_CMD_ENABLE_BANK 0x15D7 +#define PDC_GPU_TCS3_CMD_WAIT_FOR_CMPL_BANK 0x15D8 +#define PDC_GPU_TCS3_CMD0_MSGID 0x15D9 +#define PDC_GPU_TCS3_CMD0_ADDR 0x15DA +#define PDC_GPU_TCS3_CMD0_DATA 0x15DB + +/* + * Legacy DTSI used an offset from the start of the PDC resource + * for PDC SEQ programming. We are now using PDC subsections so + * start the PDC SEQ offset at zero. + */ +#define PDC_GPU_SEQ_MEM_0 0x0 + +/* + * Legacy RSCC register range was a part of the GMU register space + * now we are using a separate section for RSCC regsiters. Add the + * offset for backward compatibility. + */ +#define RSCC_OFFSET_LEGACY 0x23400 + +/* RGMU(PCC) registers in A6X_GMU_CX_0_NON_CONTEXT_DEC domain */ +#define A6XX_RGMU_CX_INTR_GEN_EN 0x1F80F +#define A6XX_RGMU_CX_RGMU_TIMER0 0x1F834 +#define A6XX_RGMU_CX_RGMU_TIMER1 0x1F835 +#define A6XX_RGMU_CX_PCC_CTRL 0x1F838 +#define A6XX_RGMU_CX_PCC_INIT_RESULT 0x1F839 +#define A6XX_RGMU_CX_PCC_BKPT_CFG 0x1F83A +#define A6XX_RGMU_CX_PCC_BKPT_ADDR 0x1F83B +#define A6XX_RGMU_CX_PCC_STATUS 0x1F83C +#define A6XX_RGMU_CX_PCC_DEBUG 0x1F83D + +/* GPU CX_MISC registers */ +#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0 0x1 +#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1 0x2 +#define A6XX_LLC_NUM_GPU_SCIDS 5 +#define A6XX_GPU_LLC_SCID_NUM_BITS 5 +#define A6XX_GPU_LLC_SCID_MASK \ + ((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1) +#define A6XX_GPUHTW_LLC_SCID_SHIFT 25 +#define A6XX_GPUHTW_LLC_SCID_MASK \ + (((1 << A6XX_GPU_LLC_SCID_NUM_BITS) - 1) << A6XX_GPUHTW_LLC_SCID_SHIFT) + +#endif /* _A6XX_REG_H */ + diff --git a/adreno-gpulist.h b/adreno-gpulist.h new file mode 100644 index 0000000000..e7e061f76f --- /dev/null +++ b/adreno-gpulist.h @@ -0,0 +1,1915 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ + +#define ANY_ID (~0) + +#define DEFINE_ADRENO_REV(_rev, _core, _major, _minor, _patchid) \ + .gpurev = _rev, .core = _core, .major = _major, .minor = _minor, \ + .patchid = _patchid + +#define DEFINE_DEPRECATED_CORE(_name, _rev, _core, _major, _minor, _patchid) \ +static const struct adreno_gpu_core adreno_gpu_core_##_name = { \ + DEFINE_ADRENO_REV(_rev, _core, _major, _minor, _patchid), \ + .features = ADRENO_DEPRECATED, \ +} + +static const struct kgsl_regmap_list a306_vbif_regs[] = { + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, + { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000A }, + { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000A }, +}; + +static const struct adreno_a3xx_core adreno_gpu_core_a306 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A306, 3, 0, 6, 0), + .features = ADRENO_SOFT_FAULT_DETECT, + .gpudev = &adreno_a3xx_gpudev, + .perfcounters = &adreno_a3xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_128K, + .bus_width = 0, + .snapshot_size = 600 * SZ_1K, + }, + .pm4fw_name = "a300_pm4.fw", + .pfpfw_name = "a300_pfp.fw", + .vbif = a306_vbif_regs, + .vbif_count = ARRAY_SIZE(a306_vbif_regs), +}; + +static const struct kgsl_regmap_list a306a_vbif_regs[] = { + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, + { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000010 }, + { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000010 }, +}; + +static const struct adreno_a3xx_core adreno_gpu_core_a306a = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A306A, 3, 0, 6, 0x20), + .features = ADRENO_SOFT_FAULT_DETECT, + .gpudev = &adreno_a3xx_gpudev, + .perfcounters = &adreno_a3xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_128K, + .bus_width = 16, + .snapshot_size = 600 * SZ_1K, + }, + .pm4fw_name = "a300_pm4.fw", + .pfpfw_name = "a300_pfp.fw", + .vbif = a306a_vbif_regs, + .vbif_count = ARRAY_SIZE(a306a_vbif_regs), +}; + +static const struct kgsl_regmap_list a304_vbif_regs[] = { + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, +}; + +static const struct adreno_a3xx_core adreno_gpu_core_a304 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A304, 3, 0, 4, 0), + .features = ADRENO_SOFT_FAULT_DETECT, + .gpudev = &adreno_a3xx_gpudev, + .perfcounters = &adreno_a3xx_perfcounters, + .gmem_base = 0, + .gmem_size = (SZ_64K + SZ_32K), + .bus_width = 0, + .snapshot_size = 600 * SZ_1K, + }, + .pm4fw_name = "a300_pm4.fw", + .pfpfw_name = "a300_pfp.fw", + .vbif = a304_vbif_regs, + .vbif_count = ARRAY_SIZE(a304_vbif_regs), +}; + +DEFINE_DEPRECATED_CORE(a405, ADRENO_REV_A405, 4, 0, 5, ANY_ID); +DEFINE_DEPRECATED_CORE(a418, ADRENO_REV_A418, 4, 1, 8, ANY_ID); +DEFINE_DEPRECATED_CORE(a420, ADRENO_REV_A420, 4, 2, 0, ANY_ID); +DEFINE_DEPRECATED_CORE(a430, ADRENO_REV_A430, 4, 3, 0, ANY_ID); +DEFINE_DEPRECATED_CORE(a530v1, ADRENO_REV_A530, 5, 3, 0, 0); + +static const struct kgsl_regmap_list a530_hwcg_regs[] = { + {A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220}, + {A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, + {A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777}, + {A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111}, + {A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, + {A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, +}; + +/* VBIF control registers for a530, a510, a508, a505 and a506 */ +static const struct kgsl_regmap_list a530_vbif_regs[] = { + {A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003}, +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a530v2 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A530, 5, 3, 0, 1), + .features = ADRENO_SPTP_PC | ADRENO_LM | + ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION, + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .gpmu_tsens = 0x00060007, + .max_power = 5448, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .gpmufw_name = "a530_gpmu.fw2", + .regfw_name = "a530v2_seq.fw2", + .zap_name = "a530_zap", + .hwcg = a530_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a530_hwcg_regs), + .vbif = a530_vbif_regs, + .vbif_count = ARRAY_SIZE(a530_vbif_regs), + .highest_bank_bit = 15, +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a530v3 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A530, 5, 3, 0, ANY_ID), + .features = ADRENO_SPTP_PC | ADRENO_LM | + ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION, + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .gpmu_tsens = 0x00060007, + .max_power = 5448, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .gpmufw_name = "a530v3_gpmu.fw2", + .regfw_name = "a530v3_seq.fw2", + .zap_name = "a530_zap", + .hwcg = a530_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a530_hwcg_regs), + .vbif = a530_vbif_regs, + .vbif_count = ARRAY_SIZE(a530_vbif_regs), + .highest_bank_bit = 15, +}; + +/* For a505, a506 and a508 */ +static const struct kgsl_regmap_list a50x_hwcg_regs[] = { + {A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_HYST_UCHE, 0x00FFFFF4}, + {A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222} +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a505 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A505, 5, 0, 5, ANY_ID), + .features = ADRENO_PREEMPTION, + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = (SZ_128K + SZ_8K), + .bus_width = 16, + .snapshot_size = SZ_1M, + }, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .hwcg = a50x_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a50x_hwcg_regs), + .vbif = a530_vbif_regs, + .vbif_count = ARRAY_SIZE(a530_vbif_regs), +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a506 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A506, 5, 0, 6, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION, + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = (SZ_128K + SZ_8K), + .bus_width = 16, + .snapshot_size = SZ_1M, + }, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .zap_name = "a506_zap", + .hwcg = a50x_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a50x_hwcg_regs), + .vbif = a530_vbif_regs, + .vbif_count = ARRAY_SIZE(a530_vbif_regs), + .highest_bank_bit = 14, +}; + +static const struct kgsl_regmap_list a510_hwcg_regs[] = { + {A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, + {A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, + {A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, + {A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, + {A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a510 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A510, 5, 1, 0, ANY_ID), + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_256K, + .bus_width = 16, + .snapshot_size = SZ_1M, + }, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .hwcg = a510_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a510_hwcg_regs), + .vbif = a530_vbif_regs, + .vbif_count = ARRAY_SIZE(a530_vbif_regs), +}; + +DEFINE_DEPRECATED_CORE(a540v1, ADRENO_REV_A540, 5, 4, 0, 0); + +static const struct kgsl_regmap_list a540_hwcg_regs[] = { + {A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220}, + {A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, + {A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777}, + {A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111}, + {A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, + {A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_GPMU, 0x00000222}, + {A5XX_RBBM_CLOCK_DELAY_GPMU, 0x00000770}, + {A5XX_RBBM_CLOCK_HYST_GPMU, 0x00000004}, +}; + +static const struct kgsl_regmap_list a540_vbif_regs[] = { + {A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003}, + {A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009}, +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a540v2 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A540, 5, 4, 0, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | + ADRENO_SPTP_PC, + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .gpmu_tsens = 0x000c000d, + .max_power = 5448, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .gpmufw_name = "a540_gpmu.fw2", + .zap_name = "a540_zap", + .hwcg = a540_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a540_hwcg_regs), + .vbif = a540_vbif_regs, + .vbif_count = ARRAY_SIZE(a540_vbif_regs), + .highest_bank_bit = 15, +}; + +static const struct kgsl_regmap_list a512_hwcg_regs[] = { + {A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, + {A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, + {A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, + {A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, + {A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a512 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A512, 5, 1, 2, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION, + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = (SZ_256K + SZ_16K), + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .zap_name = "a512_zap", + .hwcg = a512_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a512_hwcg_regs), + .highest_bank_bit = 14, +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a508 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A508, 5, 0, 8, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION, + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = (SZ_128K + SZ_8K), + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .zap_name = "a508_zap", + .hwcg = a50x_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a50x_hwcg_regs), + .vbif = a530_vbif_regs, + .vbif_count = ARRAY_SIZE(a530_vbif_regs), + .highest_bank_bit = 14, +}; + +DEFINE_DEPRECATED_CORE(a630v1, ADRENO_REV_A630, 6, 3, 0, 0); + +static const struct kgsl_regmap_list a630_hwcg_regs[] = { + {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL_SP3, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02022220}, + {A6XX_RBBM_CLOCK_CNTL2_SP1, 0x02022220}, + {A6XX_RBBM_CLOCK_CNTL2_SP2, 0x02022220}, + {A6XX_RBBM_CLOCK_CNTL2_SP3, 0x02022220}, + {A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A6XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {A6XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, + {A6XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, + {A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL_TP2, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL_TP3, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP2, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP3, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222}, + {A6XX_RBBM_CLOCK_CNTL4_TP2, 0x00022222}, + {A6XX_RBBM_CLOCK_CNTL4_TP3, 0x00022222}, + {A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP2, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP3, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777}, + {A6XX_RBBM_CLOCK_HYST4_TP2, 0x00077777}, + {A6XX_RBBM_CLOCK_HYST4_TP3, 0x00077777}, + {A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP2, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP3, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111}, + {A6XX_RBBM_CLOCK_DELAY4_TP2, 0x00011111}, + {A6XX_RBBM_CLOCK_DELAY4_TP3, 0x00011111}, + {A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222}, + {A6XX_RBBM_CLOCK_CNTL2_RB1, 0x00002222}, + {A6XX_RBBM_CLOCK_CNTL2_RB2, 0x00002222}, + {A6XX_RBBM_CLOCK_CNTL2_RB3, 0x00002222}, + {A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220}, + {A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220}, + {A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040F00}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040F00}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040F00}, + {A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022}, + {A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, +}; + +static const struct kgsl_regmap_list a630_vbif_regs[] = { + {A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009}, + {A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3}, +}; + + +/* For a615, a616, a618, A619, a630, a640 and a680 */ +static const struct adreno_protected_regs a630_protected_regs[] = { + { A6XX_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 }, + { A6XX_CP_PROTECT_REG + 1, 0x00501, 0x00506, 0 }, + { A6XX_CP_PROTECT_REG + 2, 0x0050b, 0x007ff, 0 }, + { A6XX_CP_PROTECT_REG + 3, 0x0050e, 0x0050e, 1 }, + { A6XX_CP_PROTECT_REG + 4, 0x00510, 0x00510, 1 }, + { A6XX_CP_PROTECT_REG + 5, 0x00534, 0x00534, 1 }, + { A6XX_CP_PROTECT_REG + 6, 0x00800, 0x00882, 1 }, + { A6XX_CP_PROTECT_REG + 7, 0x008a0, 0x008a8, 1 }, + { A6XX_CP_PROTECT_REG + 8, 0x008ab, 0x008cf, 1 }, + { A6XX_CP_PROTECT_REG + 9, 0x008d0, 0x0098c, 0 }, + { A6XX_CP_PROTECT_REG + 10, 0x00900, 0x0094d, 1 }, + { A6XX_CP_PROTECT_REG + 11, 0x0098d, 0x00bff, 1 }, + { A6XX_CP_PROTECT_REG + 12, 0x00e00, 0x00e01, 1 }, + { A6XX_CP_PROTECT_REG + 13, 0x00e03, 0x00e0f, 1 }, + { A6XX_CP_PROTECT_REG + 14, 0x03c00, 0x03cc3, 1 }, + { A6XX_CP_PROTECT_REG + 15, 0x03cc4, 0x05cc3, 0 }, + { A6XX_CP_PROTECT_REG + 16, 0x08630, 0x087ff, 1 }, + { A6XX_CP_PROTECT_REG + 17, 0x08e00, 0x08e00, 1 }, + { A6XX_CP_PROTECT_REG + 18, 0x08e08, 0x08e08, 1 }, + { A6XX_CP_PROTECT_REG + 19, 0x08e50, 0x08e6f, 1 }, + { A6XX_CP_PROTECT_REG + 20, 0x09624, 0x097ff, 1 }, + { A6XX_CP_PROTECT_REG + 21, 0x09e70, 0x09e71, 1 }, + { A6XX_CP_PROTECT_REG + 22, 0x09e78, 0x09fff, 1 }, + { A6XX_CP_PROTECT_REG + 23, 0x0a630, 0x0a7ff, 1 }, + { A6XX_CP_PROTECT_REG + 24, 0x0ae02, 0x0ae02, 1 }, + { A6XX_CP_PROTECT_REG + 25, 0x0ae50, 0x0b17f, 1 }, + { A6XX_CP_PROTECT_REG + 26, 0x0b604, 0x0b604, 1 }, + { A6XX_CP_PROTECT_REG + 27, 0x0be02, 0x0be03, 1 }, + { A6XX_CP_PROTECT_REG + 28, 0x0be20, 0x0d5ff, 1 }, + { A6XX_CP_PROTECT_REG + 29, 0x0f000, 0x0fbff, 1 }, + { A6XX_CP_PROTECT_REG + 30, 0x0fc00, 0x11bff, 0 }, + { A6XX_CP_PROTECT_REG + 31, 0x11c00, 0x11c00, 1 }, + { 0 }, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a630v2 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A630, 6, 3, 0, ANY_ID), + .features = ADRENO_IFPC | ADRENO_CONTENT_PROTECTION | + ADRENO_IOCOHERENT | ADRENO_PREEMPTION, + .gpudev = &adreno_a630_gpudev.base, + .perfcounters = &adreno_a630_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .prim_fifo_threshold = 0x0018000, + .gmu_major = 1, + .gmu_minor = 3, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a630_gmu.bin", + .zap_name = "a630_zap", + .hwcg = a630_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a630_hwcg_regs), + .vbif = a630_vbif_regs, + .vbif_count = ARRAY_SIZE(a630_vbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 15, +}; + +/* For a615, a616, a618 and a619 */ +static const struct kgsl_regmap_list a615_hwcg_regs[] = { + {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222}, + {A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777}, + {A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111}, + {A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222}, + {A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220}, + {A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220}, + {A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040F00}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040F00}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040F00}, + {A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022}, + {A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555} +}; + +/* For a615, a616, a618 and a619 */ +static const struct kgsl_regmap_list a615_gbif_regs[] = { + {A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3}, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a615 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A615, 6, 1, 5, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | + ADRENO_IOCOHERENT, + .gpudev = &adreno_a630_gpudev.base, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = 600 * SZ_1K, + }, + .prim_fifo_threshold = 0x0018000, + .gmu_major = 1, + .gmu_minor = 3, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a630_gmu.bin", + .zap_name = "a615_zap", + .hwcg = a615_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a615_hwcg_regs), + .vbif = a615_gbif_regs, + .vbif_count = ARRAY_SIZE(a615_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 14, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a618 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A618, 6, 1, 8, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | + ADRENO_IOCOHERENT, + .gpudev = &adreno_a630_gpudev.base, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .prim_fifo_threshold = 0x0018000, + .gmu_major = 1, + .gmu_minor = 7, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a630_gmu.bin", + .zap_name = "a615_zap", + .hwcg = a615_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a615_hwcg_regs), + .vbif = a615_gbif_regs, + .vbif_count = ARRAY_SIZE(a615_gbif_regs), + .hang_detect_cycles = 0x3fffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 14, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a619 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A619, 6, 1, 9, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | + ADRENO_IOCOHERENT, + .gpudev = &adreno_a630_gpudev.base, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x0018000, + .gmu_major = 1, + .gmu_minor = 9, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a619_gmu.bin", + .zap_name = "a615_zap", + .hwcg = a615_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a615_hwcg_regs), + .vbif = a615_gbif_regs, + .vbif_count = ARRAY_SIZE(a615_gbif_regs), + .hang_detect_cycles = 0x3fffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 14, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a619_variant = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A619, 6, 1, 9, ANY_ID), + .compatible = "qcom,adreno-gpu-a619-holi", + .features = ADRENO_PREEMPTION | ADRENO_CONTENT_PROTECTION, + .gpudev = &adreno_a619_holi_gpudev, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x0018000, + .sqefw_name = "a630_sqe.fw", + .zap_name = "a615_zap", + .hwcg = a615_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a615_hwcg_regs), + .vbif = a615_gbif_regs, + .vbif_count = ARRAY_SIZE(a615_gbif_regs), + .hang_detect_cycles = 0x3fffff, + .protected_regs = a630_protected_regs, + .gx_cpr_toggle = true, + .highest_bank_bit = 14, +}; + +static const struct kgsl_regmap_list a620_hwcg_regs[] = { + {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222}, + {A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022}, + {A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000777}, + {A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A6XX_RBBM_ISDB_CNT, 0x00000182}, + {A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, + {A6XX_RBBM_SP_HYST_CNT, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, +}; + +/* a620 and a650 */ +static const struct kgsl_regmap_list a650_gbif_regs[] = { + {A6XX_GBIF_QSB_SIDE0, 0x00071620}, + {A6XX_GBIF_QSB_SIDE1, 0x00071620}, + {A6XX_GBIF_QSB_SIDE2, 0x00071620}, + {A6XX_GBIF_QSB_SIDE3, 0x00071620}, + {A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3}, +}; + +/* These are for a620 and a650 */ +static const struct adreno_protected_regs a620_protected_regs[] = { + { A6XX_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 }, + { A6XX_CP_PROTECT_REG + 1, 0x00501, 0x00506, 0 }, + { A6XX_CP_PROTECT_REG + 2, 0x0050b, 0x007ff, 0 }, + { A6XX_CP_PROTECT_REG + 3, 0x0050e, 0x0050e, 1 }, + { A6XX_CP_PROTECT_REG + 4, 0x00510, 0x00510, 1 }, + { A6XX_CP_PROTECT_REG + 5, 0x00534, 0x00534, 1 }, + { A6XX_CP_PROTECT_REG + 6, 0x00800, 0x00882, 1 }, + { A6XX_CP_PROTECT_REG + 7, 0x008a0, 0x008a8, 1 }, + { A6XX_CP_PROTECT_REG + 8, 0x008ab, 0x008cf, 1 }, + { A6XX_CP_PROTECT_REG + 9, 0x008d0, 0x0098c, 0 }, + { A6XX_CP_PROTECT_REG + 10, 0x00900, 0x0094d, 1 }, + { A6XX_CP_PROTECT_REG + 11, 0x0098d, 0x00bff, 1 }, + { A6XX_CP_PROTECT_REG + 12, 0x00e00, 0x00e01, 1 }, + { A6XX_CP_PROTECT_REG + 13, 0x00e03, 0x00e0f, 1 }, + { A6XX_CP_PROTECT_REG + 14, 0x03c00, 0x03cc3, 1 }, + { A6XX_CP_PROTECT_REG + 15, 0x03cc4, 0x05cc3, 0 }, + { A6XX_CP_PROTECT_REG + 16, 0x08630, 0x087ff, 1 }, + { A6XX_CP_PROTECT_REG + 17, 0x08e00, 0x08e00, 1 }, + { A6XX_CP_PROTECT_REG + 18, 0x08e08, 0x08e08, 1 }, + { A6XX_CP_PROTECT_REG + 19, 0x08e50, 0x08e6f, 1 }, + { A6XX_CP_PROTECT_REG + 20, 0x08e80, 0x090ff, 1 }, + { A6XX_CP_PROTECT_REG + 21, 0x09624, 0x097ff, 1 }, + { A6XX_CP_PROTECT_REG + 22, 0x09e60, 0x09e71, 1 }, + { A6XX_CP_PROTECT_REG + 23, 0x09e78, 0x09fff, 1 }, + { A6XX_CP_PROTECT_REG + 24, 0x0a630, 0x0a7ff, 1 }, + { A6XX_CP_PROTECT_REG + 25, 0x0ae02, 0x0ae02, 1 }, + { A6XX_CP_PROTECT_REG + 26, 0x0ae50, 0x0b17f, 1 }, + { A6XX_CP_PROTECT_REG + 27, 0x0b604, 0x0b604, 1 }, + { A6XX_CP_PROTECT_REG + 28, 0x0b608, 0x0b60f, 1 }, + { A6XX_CP_PROTECT_REG + 29, 0x0be02, 0x0be03, 1 }, + { A6XX_CP_PROTECT_REG + 30, 0x0be20, 0x0d5ff, 1 }, + { A6XX_CP_PROTECT_REG + 31, 0x0f000, 0x0fbff, 1 }, + { A6XX_CP_PROTECT_REG + 32, 0x0fc00, 0x11bff, 0 }, + { A6XX_CP_PROTECT_REG + 33, 0x18400, 0x1a3ff, 1 }, + { A6XX_CP_PROTECT_REG + 34, 0x1a800, 0x1c7ff, 1 }, + { A6XX_CP_PROTECT_REG + 35, 0x1f400, 0x1f843, 1 }, + { A6XX_CP_PROTECT_REG + 36, 0x1f844, 0x1f8bf, 0 }, + { A6XX_CP_PROTECT_REG + 37, 0x1f887, 0x1f8a2, 1 }, + { A6XX_CP_PROTECT_REG + 47, 0x1f8c0, 0x1f8c0, 1 }, + { 0 }, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a620 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A620, 6, 2, 0, ANY_ID), + .features = ADRENO_CONTENT_PROTECTION | ADRENO_IOCOHERENT | + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_ACD | + ADRENO_APRIV, + .gpudev = &adreno_a630_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = 2 * SZ_1M, + }, + .prim_fifo_threshold = 0x0010000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a650_sqe.fw", + .gmufw_name = "a650_gmu.bin", + .zap_name = "a620_zap", + .hwcg = a620_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a620_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .veto_fal10 = true, + .hang_detect_cycles = 0x3ffff, + .protected_regs = a620_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 14, +}; + +static const struct kgsl_regmap_list a640_hwcg_regs[] = { + {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222}, + {A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {A6XX_RBBM_CLOCK_CNTL_RAC, 0x05222022}, + {A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A6XX_RBBM_ISDB_CNT, 0x00000182}, + {A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, + {A6XX_RBBM_SP_HYST_CNT, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, +}; + +/* These apply to a640, a680, a612 and a610 */ +static const struct kgsl_regmap_list a640_vbif_regs[] = { + {A6XX_GBIF_QSB_SIDE0, 0x00071620}, + {A6XX_GBIF_QSB_SIDE1, 0x00071620}, + {A6XX_GBIF_QSB_SIDE2, 0x00071620}, + {A6XX_GBIF_QSB_SIDE3, 0x00071620}, + {A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3}, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a640 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A640, 6, 4, 0, ANY_ID), + .features = ADRENO_CONTENT_PROTECTION | ADRENO_IOCOHERENT | + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_L3_VOTE, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_1M, //Verified 1MB + .bus_width = 32, + .snapshot_size = 2 * SZ_1M, + }, + .prim_fifo_threshold = 0x00200000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a640_gmu.bin", + .zap_name = "a640_zap", + .hwcg = a640_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a640_hwcg_regs), + .vbif = a640_vbif_regs, + .vbif_count = ARRAY_SIZE(a640_vbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = a630_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 15, +}; + +static const struct kgsl_regmap_list a650_hwcg_regs[] = { + {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222}, + {A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022}, + {A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000777}, + {A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A6XX_RBBM_ISDB_CNT, 0x00000182}, + {A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, + {A6XX_RBBM_SP_HYST_CNT, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a650 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A650, 6, 5, 0, 0), + .features = ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | + ADRENO_IFPC | ADRENO_APRIV | ADRENO_L3_VOTE, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_1M + SZ_128K, /* verified 1152kB */ + .bus_width = 32, + .snapshot_size = 2 * SZ_1M, + }, + .prim_fifo_threshold = 0x00300000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a650_sqe.fw", + .gmufw_name = "a650_gmu.bin", + .zap_name = "a650_zap", + .hwcg = a650_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a650_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .veto_fal10 = true, + .pdc_in_aop = true, + .hang_detect_cycles = 0xcfffff, + .protected_regs = a620_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 16, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a650v2 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A650, 6, 5, 0, ANY_ID), + .features = ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_ACD | + ADRENO_LM | ADRENO_APRIV | ADRENO_L3_VOTE, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_1M + SZ_128K, /* verified 1152kB */ + .bus_width = 32, + .snapshot_size = 2 * SZ_1M, + }, + .prim_fifo_threshold = 0x00300000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a650_sqe.fw", + .gmufw_name = "a650_gmu.bin", + .zap_name = "a650_zap", + .hwcg = a650_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a650_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .veto_fal10 = true, + .pdc_in_aop = true, + .hang_detect_cycles = 0x3ffff, + .protected_regs = a620_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 16, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a680 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A680, 6, 8, 0, ANY_ID), + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_2M, + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .prim_fifo_threshold = 0x00400000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a640_gmu.bin", + .zap_name = "a640_zap", + .hwcg = a640_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a640_hwcg_regs), + .vbif = a640_vbif_regs, + .vbif_count = ARRAY_SIZE(a640_vbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = a630_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 16, +}; + +static const struct kgsl_regmap_list a612_hwcg_regs[] = { + {A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000081}, + {A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01202222}, + {A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {A6XX_RBBM_CLOCK_CNTL_RAC, 0x05522022}, + {A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A6XX_RBBM_ISDB_CNT, 0x00000182}, + {A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, + {A6XX_RBBM_SP_HYST_CNT, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a612 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A612, 6, 1, 2, ANY_ID), + .features = ADRENO_CONTENT_PROTECTION | + ADRENO_IOCOHERENT | ADRENO_PREEMPTION | ADRENO_IFPC, + .gpudev = &adreno_a6xx_rgmu_gpudev, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = (SZ_128K + SZ_4K), + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .prim_fifo_threshold = 0x00080000, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a612_rgmu.bin", + .zap_name = "a612_zap", + .hwcg = a612_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a612_hwcg_regs), + .vbif = a640_vbif_regs, + .vbif_count = ARRAY_SIZE(a640_vbif_regs), + .hang_detect_cycles = 0x3fffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 14, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a616 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A616, 6, 1, 6, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | + ADRENO_IOCOHERENT, + .gpudev = &adreno_a630_gpudev.base, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .prim_fifo_threshold = 0x0018000, + .gmu_major = 1, + .gmu_minor = 3, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a630_gmu.bin", + .zap_name = "a615_zap", + .hwcg = a615_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a615_hwcg_regs), + .vbif = a615_gbif_regs, + .vbif_count = ARRAY_SIZE(a615_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 14, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a610 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A610, 6, 1, 0, ANY_ID), + .features = ADRENO_CONTENT_PROTECTION | + ADRENO_PREEMPTION, + .gpudev = &adreno_a6xx_gpudev, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = (SZ_128K + SZ_4K), + .bus_width = 32, + }, + .prim_fifo_threshold = 0x00080000, + .sqefw_name = "a630_sqe.fw", + .zap_name = "a610_zap", + .hwcg = a612_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a612_hwcg_regs), + .vbif = a640_vbif_regs, + .vbif_count = ARRAY_SIZE(a640_vbif_regs), + .hang_detect_cycles = 0x3ffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 14, +}; + +static const struct kgsl_regmap_list a660_hwcg_regs[] = { + {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222}, + {A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022}, + {A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A6XX_RBBM_ISDB_CNT, 0x00000182}, + {A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, + {A6XX_RBBM_SP_HYST_CNT, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, +}; + +/* A660 protected register list */ +static const struct adreno_protected_regs a660_protected_regs[] = { + { A6XX_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 }, + { A6XX_CP_PROTECT_REG + 1, 0x00501, 0x00506, 0 }, + { A6XX_CP_PROTECT_REG + 2, 0x0050b, 0x007ff, 0 }, + { A6XX_CP_PROTECT_REG + 3, 0x0050e, 0x0050e, 1 }, + { A6XX_CP_PROTECT_REG + 4, 0x00510, 0x00510, 1 }, + { A6XX_CP_PROTECT_REG + 5, 0x00534, 0x00534, 1 }, + { A6XX_CP_PROTECT_REG + 6, 0x00800, 0x00882, 1 }, + { A6XX_CP_PROTECT_REG + 7, 0x008a0, 0x008a8, 1 }, + { A6XX_CP_PROTECT_REG + 8, 0x008ab, 0x008cf, 1 }, + { A6XX_CP_PROTECT_REG + 9, 0x008d0, 0x0098c, 0 }, + { A6XX_CP_PROTECT_REG + 10, 0x00900, 0x0094d, 1 }, + { A6XX_CP_PROTECT_REG + 11, 0x0098d, 0x00bff, 1 }, + { A6XX_CP_PROTECT_REG + 12, 0x00e00, 0x00e01, 1 }, + { A6XX_CP_PROTECT_REG + 13, 0x00e03, 0x00e0f, 1 }, + { A6XX_CP_PROTECT_REG + 14, 0x03c00, 0x03cc3, 1 }, + { A6XX_CP_PROTECT_REG + 15, 0x03cc4, 0x05cc3, 0 }, + { A6XX_CP_PROTECT_REG + 16, 0x08630, 0x087ff, 1 }, + { A6XX_CP_PROTECT_REG + 17, 0x08e00, 0x08e00, 1 }, + { A6XX_CP_PROTECT_REG + 18, 0x08e08, 0x08e08, 1 }, + { A6XX_CP_PROTECT_REG + 19, 0x08e50, 0x08e6f, 1 }, + { A6XX_CP_PROTECT_REG + 20, 0x08e80, 0x090ff, 1 }, + { A6XX_CP_PROTECT_REG + 21, 0x09624, 0x097ff, 1 }, + { A6XX_CP_PROTECT_REG + 22, 0x09e60, 0x09e71, 1 }, + { A6XX_CP_PROTECT_REG + 23, 0x09e78, 0x09fff, 1 }, + { A6XX_CP_PROTECT_REG + 24, 0x0a630, 0x0a7ff, 1 }, + { A6XX_CP_PROTECT_REG + 25, 0x0ae02, 0x0ae02, 1 }, + { A6XX_CP_PROTECT_REG + 26, 0x0ae50, 0x0af7f, 1 }, + { A6XX_CP_PROTECT_REG + 27, 0x0b604, 0x0b604, 1 }, + { A6XX_CP_PROTECT_REG + 28, 0x0b608, 0x0b60e, 1 }, + { A6XX_CP_PROTECT_REG + 29, 0x0be02, 0x0be03, 1 }, + { A6XX_CP_PROTECT_REG + 30, 0x0be20, 0x0bf7f, 1 }, + { A6XX_CP_PROTECT_REG + 31, 0x0d000, 0x0d5ff, 1 }, + { A6XX_CP_PROTECT_REG + 32, 0x0f000, 0x0fbff, 1 }, + { A6XX_CP_PROTECT_REG + 33, 0x0fc00, 0x11bff, 0 }, + { A6XX_CP_PROTECT_REG + 34, 0x18400, 0x1a3ff, 1 }, + { A6XX_CP_PROTECT_REG + 35, 0x1a400, 0x1c3ff, 1 }, + { A6XX_CP_PROTECT_REG + 36, 0x1f400, 0x1f843, 1 }, + { A6XX_CP_PROTECT_REG + 37, 0x1f844, 0x1f8bf, 0 }, + { A6XX_CP_PROTECT_REG + 38, 0x1f860, 0x1f860, 1 }, + { A6XX_CP_PROTECT_REG + 39, 0x1f887, 0x1f8a2, 1 }, + { A6XX_CP_PROTECT_REG + 47, 0x1f8c0, 0x1f8c0, 1 }, + { 0 }, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a660 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A660, 6, 6, 0, 0), + .features = ADRENO_APRIV | + ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_L3_VOTE, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_1M + SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x00300000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a660_sqe.fw", + .gmufw_name = "a660_gmu.bin", + .zap_name = "a660_zap", + .hwcg = a660_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a660_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .veto_fal10 = true, + .protected_regs = a660_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 16, + .pdc_in_aop = true, + .ctxt_record_size = 2496 * 1024, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a660v2 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A660, 6, 6, 0, ANY_ID), + .features = ADRENO_APRIV | + ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_ACD | + ADRENO_L3_VOTE, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_1M + SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x00300000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a660_sqe.fw", + .gmufw_name = "a660_gmu.bin", + .zap_name = "a660_zap", + .hwcg = a660_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a660_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .veto_fal10 = true, + .protected_regs = a660_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 16, + .pdc_in_aop = true, + .ctxt_record_size = 2496 * 1024, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a660_shima = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A660, 6, 6, 0, ANY_ID), + .compatible = "qcom,adreno-gpu-a660-shima", + .features = ADRENO_APRIV | + ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_ACD, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_1M + SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x00300000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a660_sqe.fw", + .gmufw_name = "a660_gmu.bin", + .zap_name = "a660_zap", + .hwcg = a660_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a660_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .hang_detect_cycles = 0x3ffff, + .veto_fal10 = true, + .protected_regs = a660_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 15, + .pdc_in_aop = true, + .ctxt_record_size = 2496 * 1024, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a635 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A635, 6, 3, 5, ANY_ID), + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x00200000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a660_sqe.fw", + .gmufw_name = "a660_gmu.bin", + .zap_name = "a660_zap", + .hwcg = a660_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a660_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .hang_detect_cycles = 0x3ffff, + .veto_fal10 = true, + .protected_regs = a660_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 15, + .pdc_in_aop = true, + .ctxt_record_size = 2496 * 1024, +}; + +static const struct kgsl_regmap_list gen7_0_0_gbif_regs[] = { + { GEN7_GBIF_QSB_SIDE0, 0x00071620 }, + { GEN7_GBIF_QSB_SIDE1, 0x00071620 }, + { GEN7_GBIF_QSB_SIDE2, 0x00071620 }, + { GEN7_GBIF_QSB_SIDE3, 0x00071620 }, + { GEN7_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212 }, +}; + +static const struct kgsl_regmap_list gen7_0_0_hwcg_regs[] = { + { GEN7_RBBM_CLOCK_CNTL_SP0, 0x02222222 }, + { GEN7_RBBM_CLOCK_CNTL2_SP0, 0x02222222 }, + { GEN7_RBBM_CLOCK_HYST_SP0, 0x0000f3cf }, + { GEN7_RBBM_CLOCK_DELAY_SP0, 0x00000080 }, + { GEN7_RBBM_CLOCK_CNTL_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL2_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL3_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL4_TP0, 0x00222222 }, + { GEN7_RBBM_CLOCK_HYST_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST2_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST3_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST4_TP0, 0x00077777 }, + { GEN7_RBBM_CLOCK_DELAY_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY2_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY3_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY4_TP0, 0x00011111 }, + { GEN7_RBBM_CLOCK_CNTL_UCHE, 0x22222222 }, + { GEN7_RBBM_CLOCK_HYST_UCHE, 0x00000004 }, + { GEN7_RBBM_CLOCK_DELAY_UCHE, 0x00000002 }, + { GEN7_RBBM_CLOCK_CNTL_RB0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL2_RB0, 0x01002222 }, + { GEN7_RBBM_CLOCK_CNTL_CCU0, 0x00002220 }, + { GEN7_RBBM_CLOCK_HYST_RB_CCU0, 0x44000f00 }, + { GEN7_RBBM_CLOCK_CNTL_RAC, 0x25222022 }, + { GEN7_RBBM_CLOCK_CNTL2_RAC, 0x00555555 }, + { GEN7_RBBM_CLOCK_DELAY_RAC, 0x00000011 }, + { GEN7_RBBM_CLOCK_HYST_RAC, 0x00440044 }, + { GEN7_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222 }, + { GEN7_RBBM_CLOCK_MODE2_GRAS, 0x00000222 }, + { GEN7_RBBM_CLOCK_MODE_BV_GRAS, 0x00222222 }, + { GEN7_RBBM_CLOCK_MODE_GPC, 0x02222223 }, + { GEN7_RBBM_CLOCK_MODE_VFD, 0x00002222 }, + { GEN7_RBBM_CLOCK_MODE_BV_GPC, 0x00222223 }, + { GEN7_RBBM_CLOCK_MODE_BV_VFD, 0x00002222 }, + { GEN7_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000 }, + { GEN7_RBBM_CLOCK_HYST_GPC, 0x04104004 }, + { GEN7_RBBM_CLOCK_HYST_VFD, 0x00000000 }, + { GEN7_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000 }, + { GEN7_RBBM_CLOCK_DELAY_GPC, 0x00000200 }, + { GEN7_RBBM_CLOCK_DELAY_VFD, 0x00002222 }, + { GEN7_RBBM_CLOCK_MODE_HLSQ, 0x00002222 }, + { GEN7_RBBM_CLOCK_DELAY_HLSQ, 0x00000000 }, + { GEN7_RBBM_CLOCK_HYST_HLSQ, 0x00000000 }, + { GEN7_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002 }, + { GEN7_RBBM_CLOCK_MODE_BV_LRZ, 0x55555552 }, + { GEN7_RBBM_CLOCK_MODE_CP, 0x00000223 }, + { GEN7_RBBM_CLOCK_CNTL, 0x8aa8aa82 }, + { GEN7_RBBM_ISDB_CNT, 0x00000182 }, + { GEN7_RBBM_RAC_THRESHOLD_CNT, 0x00000000 }, + { GEN7_RBBM_SP_HYST_CNT, 0x00000000 }, + { GEN7_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222 }, + { GEN7_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111 }, + { GEN7_RBBM_CLOCK_HYST_GMU_GX, 0x00000555 }, +}; + +/* GEN7_0_0 protected register list */ +static const struct gen7_protected_regs gen7_0_0_protected_regs[] = { + { GEN7_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 }, + { GEN7_CP_PROTECT_REG + 1, 0x0050b, 0x00698, 0 }, + { GEN7_CP_PROTECT_REG + 2, 0x0050e, 0x0050e, 1 }, + { GEN7_CP_PROTECT_REG + 3, 0x00510, 0x00510, 1 }, + { GEN7_CP_PROTECT_REG + 4, 0x00534, 0x00534, 1 }, + { GEN7_CP_PROTECT_REG + 5, 0x00699, 0x00882, 1 }, + { GEN7_CP_PROTECT_REG + 6, 0x008a0, 0x008a8, 1 }, + { GEN7_CP_PROTECT_REG + 7, 0x008ab, 0x008cf, 1 }, + { GEN7_CP_PROTECT_REG + 8, 0x008d0, 0x00a40, 0 }, + { GEN7_CP_PROTECT_REG + 9, 0x00900, 0x0094d, 1 }, + { GEN7_CP_PROTECT_REG + 10, 0x0098d, 0x00a3f, 1 }, + { GEN7_CP_PROTECT_REG + 11, 0x00a41, 0x00bff, 1 }, + { GEN7_CP_PROTECT_REG + 12, 0x00df0, 0x00df1, 1 }, + { GEN7_CP_PROTECT_REG + 13, 0x00e01, 0x00e01, 1 }, + { GEN7_CP_PROTECT_REG + 14, 0x00e07, 0x00e0f, 1 }, + { GEN7_CP_PROTECT_REG + 15, 0x03c00, 0x03cc3, 1 }, + { GEN7_CP_PROTECT_REG + 16, 0x03cc4, 0x05cc3, 0 }, + { GEN7_CP_PROTECT_REG + 17, 0x08630, 0x087ff, 1 }, + { GEN7_CP_PROTECT_REG + 18, 0x08e00, 0x08e00, 1 }, + { GEN7_CP_PROTECT_REG + 19, 0x08e08, 0x08e08, 1 }, + { GEN7_CP_PROTECT_REG + 20, 0x08e50, 0x08e6f, 1 }, + { GEN7_CP_PROTECT_REG + 21, 0x08e80, 0x09100, 1 }, + { GEN7_CP_PROTECT_REG + 22, 0x09624, 0x097ff, 1 }, + { GEN7_CP_PROTECT_REG + 23, 0x09e40, 0x09e40, 1 }, + { GEN7_CP_PROTECT_REG + 24, 0x09e64, 0x09e71, 1 }, + { GEN7_CP_PROTECT_REG + 25, 0x09e78, 0x09fff, 1 }, + { GEN7_CP_PROTECT_REG + 26, 0x0a630, 0x0a7ff, 1 }, + { GEN7_CP_PROTECT_REG + 27, 0x0ae02, 0x0ae02, 1 }, + { GEN7_CP_PROTECT_REG + 28, 0x0ae50, 0x0ae5f, 1 }, + { GEN7_CP_PROTECT_REG + 29, 0x0ae66, 0x0ae69, 1 }, + { GEN7_CP_PROTECT_REG + 30, 0x0ae6f, 0x0ae72, 1 }, + { GEN7_CP_PROTECT_REG + 31, 0x0b604, 0x0b607, 1 }, + { GEN7_CP_PROTECT_REG + 32, 0x0ec00, 0x0fbff, 1 }, + { GEN7_CP_PROTECT_REG + 33, 0x0fc00, 0x11bff, 0 }, + { GEN7_CP_PROTECT_REG + 34, 0x18400, 0x18453, 1 }, + { GEN7_CP_PROTECT_REG + 35, 0x18454, 0x18458, 0 }, + { GEN7_CP_PROTECT_REG + 47, 0x18459, 0x18459, 1 }, + { 0 }, +}; + +static const struct adreno_gen7_core adreno_gpu_core_gen7_0_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN7_0_0, + UINT_MAX, UINT_MAX, UINT_MAX, 0), + .compatible = "qcom,adreno-gpu-gen7-0-0", + .chipid = 0x07030000, + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | + ADRENO_ACD | ADRENO_L3_VOTE | ADRENO_BCL, + .gpudev = &adreno_gen7_gmu_gpudev.base, + .perfcounters = &adreno_gen7_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_2M, + .bus_width = 32, + .snapshot_size = SZ_4M, + }, + .sqefw_name = "a730_sqe.fw", + .gmufw_name = "gmu_gen70000.bin", + .gmufw_bak_name = "c500_gmu.bin", + .zap_name = "a730_zap", + .hwcg = gen7_0_0_hwcg_regs, + .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .gbif = gen7_0_0_gbif_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen7_0_0_protected_regs, + .highest_bank_bit = 16, +}; + +static const struct adreno_gen7_core adreno_gpu_core_gen7_0_1 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN7_0_1, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen7-0-1", + .chipid = 0x07030001, + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | + ADRENO_ACD | ADRENO_L3_VOTE | ADRENO_BCL, + .gpudev = &adreno_gen7_gmu_gpudev.base, + .perfcounters = &adreno_gen7_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_2M, + .bus_width = 32, + .snapshot_size = SZ_4M, + }, + .sqefw_name = "a730_sqe.fw", + .gmufw_name = "gmu_gen70000.bin", + .gmufw_bak_name = "c500_gmu.bin", + .zap_name = "a730_zap", + .hwcg = gen7_0_0_hwcg_regs, + .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .gbif = gen7_0_0_gbif_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen7_0_0_protected_regs, + .highest_bank_bit = 16, +}; + +static const struct adreno_gpu_core *adreno_gpulist[] = { + &adreno_gpu_core_a306.base, + &adreno_gpu_core_a306a.base, + &adreno_gpu_core_a304.base, + &adreno_gpu_core_a405, /* Deprecated */ + &adreno_gpu_core_a418, /* Deprecated */ + &adreno_gpu_core_a420, /* Deprecated */ + &adreno_gpu_core_a430, /* Deprecated */ + &adreno_gpu_core_a530v1, /* Deprecated */ + &adreno_gpu_core_a530v2.base, + &adreno_gpu_core_a530v3.base, + &adreno_gpu_core_a505.base, + &adreno_gpu_core_a506.base, + &adreno_gpu_core_a510.base, + &adreno_gpu_core_a540v1, /* Deprecated */ + &adreno_gpu_core_a540v2.base, + &adreno_gpu_core_a512.base, + &adreno_gpu_core_a508.base, + &adreno_gpu_core_a630v1, /* Deprecated */ + &adreno_gpu_core_a630v2.base, + &adreno_gpu_core_a615.base, + &adreno_gpu_core_a618.base, + &adreno_gpu_core_a619.base, + &adreno_gpu_core_a619_variant.base, + &adreno_gpu_core_a620.base, + &adreno_gpu_core_a635.base, + &adreno_gpu_core_a640.base, + &adreno_gpu_core_a650.base, + &adreno_gpu_core_a650v2.base, + &adreno_gpu_core_a660.base, + &adreno_gpu_core_a660v2.base, + &adreno_gpu_core_a680.base, + &adreno_gpu_core_a612.base, + &adreno_gpu_core_a616.base, + &adreno_gpu_core_a610.base, + &adreno_gpu_core_a660_shima.base, + &adreno_gpu_core_gen7_0_0.base, + &adreno_gpu_core_gen7_0_1.base, +}; diff --git a/adreno.c b/adreno.c new file mode 100644 index 0000000000..ed9e778e9f --- /dev/null +++ b/adreno.c @@ -0,0 +1,3214 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_a3xx.h" +#include "adreno_a5xx.h" +#include "adreno_a6xx.h" +#include "adreno_compat.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" +#include "kgsl_bus.h" +#include "kgsl_trace.h" +#include "kgsl_util.h" + +/* Include the master list of GPU cores that are supported */ +#include "adreno-gpulist.h" + +static void adreno_input_work(struct work_struct *work); +static int adreno_soft_reset(struct kgsl_device *device); +static unsigned int counter_delta(struct kgsl_device *device, + unsigned int reg, unsigned int *counter); +static struct device_node * + adreno_get_gpu_model_node(struct platform_device *pdev); + +static struct adreno_device device_3d0; + +/* Nice level for the higher priority GPU start thread */ +int adreno_wake_nice = -7; + +/* Number of milliseconds to stay active active after a wake on touch */ +unsigned int adreno_wake_timeout = 100; + +bool adreno_regulator_disable_poll(struct kgsl_device *device, + struct regulator *reg, u32 offset, u32 timeout) +{ + u32 val; + int ret; + + if (IS_ERR_OR_NULL(reg)) + return true; + + regulator_disable(reg); + + ret = kgsl_regmap_read_poll_timeout(&device->regmap, offset, + val, !(val & BIT(31)), 100, timeout * 1000); + + return ret ? false : true; +} + +static u32 get_ucode_version(const u32 *data) +{ + u32 version; + + version = data[1]; + + if ((version & 0xf) != 0xa) + return version; + + version &= ~0xfff; + return version | ((data[3] & 0xfff000) >> 12); +} + +int adreno_get_firmware(struct adreno_device *adreno_dev, + const char *fwfile, struct adreno_firmware *firmware) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct firmware *fw = NULL; + int ret; + + if (!IS_ERR_OR_NULL(firmware->memdesc)) + return 0; + + ret = request_firmware(&fw, fwfile, &device->pdev->dev); + + if (ret) { + dev_err(device->dev, "request_firmware(%s) failed: %d\n", + fwfile, ret); + return ret; + } + + firmware->memdesc = kgsl_allocate_global(device, fw->size - 4, 0, + KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_UCODE, + "ucode"); + + ret = PTR_ERR_OR_ZERO(firmware->memdesc); + if (!ret) { + memcpy(firmware->memdesc->hostptr, &fw->data[4], fw->size - 4); + firmware->size = (fw->size - 4) / sizeof(u32); + firmware->version = get_ucode_version((u32 *)fw->data); + } + + release_firmware(fw); + return ret; +} + + +int adreno_zap_shader_load(struct adreno_device *adreno_dev, + const char *name) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + if (!name || adreno_dev->zap_loaded) + return 0; + + ret = kgsl_zap_shader_load(&device->pdev->dev, name); + if (!ret) + adreno_dev->zap_loaded = true; + + return ret; +} + +/** + * adreno_readreg64() - Read a 64bit register by getting its offset from the + * offset array defined in gpudev node + * @adreno_dev: Pointer to the the adreno device + * @lo: lower 32bit register enum that is to be read + * @hi: higher 32bit register enum that is to be read + * @val: 64 bit Register value read is placed here + */ +void adreno_readreg64(struct adreno_device *adreno_dev, + enum adreno_regs lo, enum adreno_regs hi, uint64_t *val) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned int val_lo = 0, val_hi = 0; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (adreno_checkreg_off(adreno_dev, lo)) + kgsl_regread(device, gpudev->reg_offsets[lo], &val_lo); + if (adreno_checkreg_off(adreno_dev, hi)) + kgsl_regread(device, gpudev->reg_offsets[hi], &val_hi); + + *val = (val_lo | ((uint64_t)val_hi << 32)); +} + +/** + * adreno_get_rptr() - Get the current ringbuffer read pointer + * @rb: Pointer the ringbuffer to query + * + * Get the latest rptr + */ +unsigned int adreno_get_rptr(struct adreno_ringbuffer *rb) +{ + struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 rptr = 0; + + if (adreno_is_a3xx(adreno_dev)) + kgsl_regread(device, A3XX_CP_RB_RPTR, &rptr); + else + kgsl_sharedmem_readl(device->scratch, &rptr, + SCRATCH_RPTR_OFFSET(rb->id)); + + return rptr; +} + +static void adreno_touch_wakeup(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* + * Don't schedule adreno_start in a high priority workqueue, we are + * already in a workqueue which should be sufficient + */ + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + + /* + * When waking up from a touch event we want to stay active long enough + * for the user to send a draw command. The default idle timer timeout + * is shorter than we want so go ahead and push the idle timer out + * further for this special case + */ + mod_timer(&device->idle_timer, + jiffies + msecs_to_jiffies(adreno_wake_timeout)); + +} + +/* + * A workqueue callback responsible for actually turning on the GPU after a + * touch event. kgsl_pwrctrl_change_state(ACTIVE) is used without any + * active_count protection to avoid the need to maintain state. Either + * somebody will start using the GPU or the idle timer will fire and put the + * GPU back into slumber. + */ +static void adreno_input_work(struct work_struct *work) +{ + struct adreno_device *adreno_dev = container_of(work, + struct adreno_device, input_work); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + + mutex_lock(&device->mutex); + + adreno_dev->wake_on_touch = true; + + ops->touch_wakeup(adreno_dev); + + mutex_unlock(&device->mutex); +} + +/* + * Process input events and schedule work if needed. At this point we are only + * interested in groking EV_ABS touchscreen events + */ +static void adreno_input_event(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ + struct kgsl_device *device = handle->handler->private; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + /* Only consider EV_ABS (touch) events */ + if (type != EV_ABS) + return; + + /* + * Don't do anything if anything hasn't been rendered since we've been + * here before + */ + + if (adreno_dev->wake_on_touch) + return; + + if (gmu_core_isenabled(device)) { + schedule_work(&adreno_dev->input_work); + return; + } + + /* + * If the device is in nap, kick the idle timer to make sure that we + * don't go into slumber before the first render. If the device is + * already in slumber schedule the wake. + */ + + if (device->state == KGSL_STATE_NAP) { + /* + * Set the wake on touch bit to keep from coming back here and + * keeping the device in nap without rendering + */ + adreno_dev->wake_on_touch = true; + kgsl_start_idle_timer(device); + + } else if (device->state == KGSL_STATE_SLUMBER) { + schedule_work(&adreno_dev->input_work); + } +} + +#ifdef CONFIG_INPUT +static int adreno_input_connect(struct input_handler *handler, + struct input_dev *dev, const struct input_device_id *id) +{ + struct input_handle *handle; + int ret; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (handle == NULL) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = handler->name; + + ret = input_register_handle(handle); + if (ret) { + kfree(handle); + return ret; + } + + ret = input_open_device(handle); + if (ret) { + input_unregister_handle(handle); + kfree(handle); + } + + return ret; +} + +static void adreno_input_disconnect(struct input_handle *handle) +{ + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); +} +#else +static int adreno_input_connect(struct input_handler *handler, + struct input_dev *dev, const struct input_device_id *id) +{ + return 0; +} +static void adreno_input_disconnect(struct input_handle *handle) {} +#endif + +/* + * We are only interested in EV_ABS events so only register handlers for those + * input devices that have EV_ABS events + */ +static const struct input_device_id adreno_input_ids[] = { + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT, + .evbit = { BIT_MASK(EV_ABS) }, + /* assumption: MT_.._X & MT_.._Y are in the same long */ + .absbit = { [BIT_WORD(ABS_MT_POSITION_X)] = + BIT_MASK(ABS_MT_POSITION_X) | + BIT_MASK(ABS_MT_POSITION_Y) }, + }, + { }, +}; + +static struct input_handler adreno_input_handler = { + .event = adreno_input_event, + .connect = adreno_input_connect, + .disconnect = adreno_input_disconnect, + .name = "kgsl", + .id_table = adreno_input_ids, +}; + +/* + * _soft_reset() - Soft reset GPU + * @adreno_dev: Pointer to adreno device + * + * Soft reset the GPU by doing a AHB write of value 1 to RBBM_SW_RESET + * register. This is used when we want to reset the GPU without + * turning off GFX power rail. The reset when asserted resets + * all the HW logic, restores GPU registers to default state and + * flushes out pending VBIF transactions. + */ +static void _soft_reset(struct adreno_device *adreno_dev) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned int reg; + + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, 1); + /* + * Do a dummy read to get a brief read cycle delay for the + * reset to take effect + */ + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, ®); + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, 0); + + /* The SP/TP regulator gets turned off after a soft reset */ + + clear_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, &adreno_dev->priv); + if (gpudev->regulator_enable) + gpudev->regulator_enable(adreno_dev); +} + +/** + * adreno_irqctrl() - Enables/disables the RBBM interrupt mask + * @adreno_dev: Pointer to an adreno_device + * @state: 1 for masked or 0 for unmasked + * Power: The caller of this function must make sure to use OOBs + * so that we know that the GPU is powered on + */ +void adreno_irqctrl(struct adreno_device *adreno_dev, int state) +{ + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK, + state ? adreno_dev->irq_mask : 0); +} + +/* + * adreno_hang_int_callback() - Isr for fatal interrupts that hang GPU + * @adreno_dev: Pointer to device + * @bit: Interrupt bit + */ +void adreno_hang_int_callback(struct adreno_device *adreno_dev, int bit) +{ + dev_crit_ratelimited(KGSL_DEVICE(adreno_dev)->dev, + "MISC: GPU hang detected\n"); + adreno_irqctrl(adreno_dev, 0); + + /* Trigger a fault in the dispatcher - this will effect a restart */ + adreno_dispatcher_fault(adreno_dev, ADRENO_HARD_FAULT); +} + +/* + * adreno_cp_callback() - CP interrupt handler + * @adreno_dev: Adreno device pointer + * @irq: irq number + * + * Handle the cp interrupt generated by GPU. + */ +void adreno_cp_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + adreno_dispatcher_schedule(device); +} + +static irqreturn_t adreno_irq_handler(int irq, void *data) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + irqreturn_t ret; + + atomic_inc(&adreno_dev->pending_irq_refcnt); + /* Ensure this increment is done before the IRQ status is updated */ + smp_mb__after_atomic(); + + ret = gpudev->irq_handler(adreno_dev); + + /* Make sure the regwrites are done before the decrement */ + smp_mb__before_atomic(); + atomic_dec(&adreno_dev->pending_irq_refcnt); + /* Ensure other CPUs see the decrement */ + smp_mb__after_atomic(); + + return ret; +} + +irqreturn_t adreno_irq_callbacks(struct adreno_device *adreno_dev, + const struct adreno_irq_funcs *funcs, u32 status) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret = IRQ_NONE; + + /* Loop through all set interrupts and call respective handlers */ + while (status) { + int i = fls(status) - 1; + + if (funcs[i].func) { + if (adreno_dev->irq_mask & BIT(i)) + funcs[i].func(adreno_dev, i); + } else + dev_crit_ratelimited(device->dev, + "Unhandled interrupt bit %x\n", i); + + ret = IRQ_HANDLED; + + status &= ~BIT(i); + } + + return ret; +} + +static int adreno_get_chipid(struct platform_device *pdev, u32 *chipid); + +static inline bool _rev_match(unsigned int id, unsigned int entry) +{ + return (entry == ANY_ID || entry == id); +} + +static const struct adreno_gpu_core * +_get_gpu_core(struct platform_device *pdev, u32 *chipid) +{ + int i; + struct device_node *node; + + /* + * When "qcom,gpu-models" is defined, use gpu model node to match + * on a compatible string, otherwise match using legacy way. + */ + node = adreno_get_gpu_model_node(pdev); + if (!node || !of_find_property(node, "compatible", NULL)) + node = pdev->dev.of_node; + + *chipid = 0; + + /* Check to see if any of the entries match on a compatible string */ + for (i = 0; i < ARRAY_SIZE(adreno_gpulist); i++) { + if (adreno_gpulist[i]->compatible && + of_device_is_compatible(node, + adreno_gpulist[i]->compatible)) { + /* + * We matched compat string, set chipid based on + * dtsi, then gpulist, else fail. + */ + if (adreno_get_chipid(pdev, chipid)) + *chipid = adreno_gpulist[i]->chipid; + + if (*chipid) + return adreno_gpulist[i]; + + dev_crit(&pdev->dev, + "No chipid associated with %s\n", + adreno_gpulist[i]->compatible); + return NULL; + } + } + + /* No compatible string so try and match on chipid */ + if (!adreno_get_chipid(pdev, chipid)) { + unsigned int core = ADRENO_CHIPID_CORE(*chipid); + unsigned int major = ADRENO_CHIPID_MAJOR(*chipid); + unsigned int minor = ADRENO_CHIPID_MINOR(*chipid); + unsigned int patchid = ADRENO_CHIPID_PATCH(*chipid); + + for (i = 0; i < ARRAY_SIZE(adreno_gpulist); i++) { + if (core == adreno_gpulist[i]->core && + _rev_match(major, adreno_gpulist[i]->major) && + _rev_match(minor, adreno_gpulist[i]->minor) && + _rev_match(patchid, adreno_gpulist[i]->patchid)) + return adreno_gpulist[i]; + } + } + + dev_crit(&pdev->dev, "Unknown GPU chip ID %8.8x\n", *chipid); + return NULL; +} + +static struct { + unsigned int quirk; + const char *prop; +} adreno_quirks[] = { + { ADRENO_QUIRK_TWO_PASS_USE_WFI, "qcom,gpu-quirk-two-pass-use-wfi" }, + { ADRENO_QUIRK_CRITICAL_PACKETS, "qcom,gpu-quirk-critical-packets" }, + { ADRENO_QUIRK_FAULT_DETECT_MASK, "qcom,gpu-quirk-fault-detect-mask" }, + { ADRENO_QUIRK_DISABLE_RB_DP2CLOCKGATING, + "qcom,gpu-quirk-dp2clockgating-disable" }, + { ADRENO_QUIRK_DISABLE_LMLOADKILL, + "qcom,gpu-quirk-lmloadkill-disable" }, + { ADRENO_QUIRK_HFI_USE_REG, "qcom,gpu-quirk-hfi-use-reg" }, + { ADRENO_QUIRK_SECVID_SET_ONCE, "qcom,gpu-quirk-secvid-set-once" }, + { ADRENO_QUIRK_LIMIT_UCHE_GBIF_RW, + "qcom,gpu-quirk-limit-uche-gbif-rw" }, + { ADRENO_QUIRK_CX_GDSC, "qcom,gpu-quirk-cx-gdsc" }, +}; + +static int adreno_get_chipid(struct platform_device *pdev, u32 *chipid) +{ + return of_property_read_u32(pdev->dev.of_node, "qcom,chipid", chipid); +} + +static void +adreno_update_soc_hw_revision_quirks(struct adreno_device *adreno_dev, + struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + int i; + + /* update quirk */ + for (i = 0; i < ARRAY_SIZE(adreno_quirks); i++) { + if (of_property_read_bool(node, adreno_quirks[i].prop)) + adreno_dev->quirks |= adreno_quirks[i].quirk; + } +} + +static const struct adreno_gpu_core * +adreno_identify_gpu(struct platform_device *pdev, u32 *chipid) +{ + const struct adreno_gpu_core *gpucore; + + gpucore = _get_gpu_core(pdev, chipid); + if (!gpucore) + return ERR_PTR(-ENODEV); + + /* + * Identify non-longer supported targets and spins and print a helpful + * message + */ + if (gpucore->features & ADRENO_DEPRECATED) { + if (gpucore->compatible) + dev_err(&pdev->dev, + "Support for GPU %s has been deprecated\n", + gpucore->compatible); + else + dev_err(&pdev->dev, + "Support for GPU %x.%d.%x.%d has been deprecated\n", + gpucore->core, gpucore->major, + gpucore->minor, gpucore->patchid); + return ERR_PTR(-ENODEV); + } + + return gpucore; +} + +static const struct of_device_id adreno_match_table[] = { + { .compatible = "qcom,kgsl-3d0", .data = &device_3d0 }, + { }, +}; + +MODULE_DEVICE_TABLE(of, adreno_match_table); + +/* Dynamically build the OPP table for the GPU device */ +static void adreno_build_opp_table(struct device *dev, struct kgsl_pwrctrl *pwr) +{ + int i; + + /* Skip if the table has already been populated */ + if (dev_pm_opp_get_opp_count(dev) > 0) + return; + + /* Add all the supported frequencies into the tree */ + for (i = 0; i < pwr->num_pwrlevels; i++) + dev_pm_opp_add(dev, pwr->pwrlevels[i].gpu_freq, 0); +} + +static int adreno_of_parse_pwrlevels(struct adreno_device *adreno_dev, + struct device_node *node) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct device_node *child; + int ret; + + pwr->num_pwrlevels = 0; + + for_each_child_of_node(node, child) { + u32 index, freq = 0, voltage, bus; + struct kgsl_pwrlevel *level; + + ret = of_property_read_u32(child, "reg", &index); + if (ret) { + dev_err(device->dev, "%pOF: powerlevel index not found\n", + child); + goto out; + } + + ret = of_property_read_u32(child, "qcom,gpu-freq", &freq); + if (ret) { + dev_err(device->dev, "%pOF: Unable to read qcom,gpu-freq\n", + child); + goto out; + } + + /* Ignore "zero" powerlevels */ + if (!freq) + continue; + + ret = of_property_read_u32(child, "qcom,level", &voltage); + if (ret) { + dev_err(device->dev, "%pOF: Unable to read qcom,level\n", + child); + goto out; + } + + ret = kgsl_of_property_read_ddrtype(child, "qcom,bus-freq", + &bus); + if (ret) { + dev_err(device->dev, "%pOF:Unable to read qcom,bus-freq\n", + child); + goto out; + } + + if (index >= ARRAY_SIZE(pwr->pwrlevels)) { + dev_err(device->dev, "%pOF: Pwrlevel index %d is out of range\n", + child, index); + continue; + } + + if (index >= pwr->num_pwrlevels) + pwr->num_pwrlevels = index + 1; + + level = &pwr->pwrlevels[index]; + + level->gpu_freq = freq; + level->bus_freq = bus; + level->voltage_level = voltage; + + of_property_read_u32(child, "qcom,acd-level", + &level->acd_level); + + level->bus_min = level->bus_freq; + kgsl_of_property_read_ddrtype(child, + "qcom,bus-min", &level->bus_min); + + level->bus_max = level->bus_freq; + kgsl_of_property_read_ddrtype(child, + "qcom,bus-max", &level->bus_max); + } + + adreno_build_opp_table(&device->pdev->dev, pwr); + return 0; +out: + of_node_put(child); + return ret; +} + +static void adreno_of_get_initial_pwrlevel(struct kgsl_pwrctrl *pwr, + struct device_node *node) +{ + int init_level = 1; + + of_property_read_u32(node, "qcom,initial-pwrlevel", &init_level); + + if (init_level < 0 || init_level >= pwr->num_pwrlevels) + init_level = 1; + + pwr->active_pwrlevel = init_level; + pwr->default_pwrlevel = init_level; +} + +static void adreno_of_get_limits(struct adreno_device *adreno_dev, + struct device_node *node) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwrctrl = &device->pwrctrl; + unsigned int throttle_level; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM) || of_property_read_u32(node, + "qcom,throttle-pwrlevel", &throttle_level)) + return; + + throttle_level = min(throttle_level, pwrctrl->num_pwrlevels - 1); + + pwrctrl->throttle_mask = GENMASK(pwrctrl->num_pwrlevels - 1, + pwrctrl->num_pwrlevels - 1 - throttle_level); + + adreno_dev->lm_enabled = true; +} + +static int adreno_of_get_legacy_pwrlevels(struct adreno_device *adreno_dev, + struct device_node *parent) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device_node *node; + int ret; + + node = of_find_node_by_name(parent, "qcom,gpu-pwrlevels"); + + if (node == NULL) { + dev_err(&device->pdev->dev, + "Unable to find 'qcom,gpu-pwrlevels'\n"); + return -EINVAL; + } + + ret = adreno_of_parse_pwrlevels(adreno_dev, node); + + if (!ret) { + adreno_of_get_initial_pwrlevel(&device->pwrctrl, parent); + adreno_of_get_limits(adreno_dev, parent); + } + + of_node_put(node); + return ret; +} + +static int adreno_of_get_pwrlevels(struct adreno_device *adreno_dev, + struct device_node *parent) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device_node *node, *child; + unsigned int bin = 0; + + node = of_find_node_by_name(parent, "qcom,gpu-pwrlevel-bins"); + if (node == NULL) + return adreno_of_get_legacy_pwrlevels(adreno_dev, parent); + + for_each_child_of_node(node, child) { + + if (of_property_read_u32(child, "qcom,speed-bin", &bin)) + continue; + + if (bin == device->speed_bin) { + int ret; + + ret = adreno_of_parse_pwrlevels(adreno_dev, child); + if (ret) { + of_node_put(child); + return ret; + } + + adreno_of_get_initial_pwrlevel(&device->pwrctrl, child); + + /* + * Check for global throttle-pwrlevel first and override + * with speedbin specific one if found. + */ + adreno_of_get_limits(adreno_dev, parent); + adreno_of_get_limits(adreno_dev, child); + + of_node_put(child); + return 0; + } + } + + dev_err(&device->pdev->dev, + "GPU speed_bin:%d mismatch for bin:%d\n", + device->speed_bin, bin); + return -ENODEV; +} + +static int register_l3_voter(struct kgsl_device *device) +{ + int ret = 0; + + mutex_lock(&device->mutex); + + if (!device->l3_vote) + goto done; + + /* This indicates that we are already set up */ + if (device->num_l3_pwrlevels != 0) + goto done; + + memset(device->l3_freq, 0x0, sizeof(device->l3_freq)); + + ret = qcom_dcvs_register_voter(KGSL_L3_DEVICE, DCVS_L3, DCVS_SLOW_PATH); + if (ret) { + dev_err_once(&device->pdev->dev, + "Unable to register l3 dcvs voter: %d\n", ret); + goto done; + } + + ret = qcom_dcvs_hw_minmax_get(DCVS_L3, &device->l3_freq[1], + &device->l3_freq[2]); + if (ret) { + dev_err_once(&device->pdev->dev, + "Unable to get min/max for l3 dcvs: %d\n", ret); + qcom_dcvs_unregister_voter(KGSL_L3_DEVICE, DCVS_L3, + DCVS_SLOW_PATH); + memset(device->l3_freq, 0x0, sizeof(device->l3_freq)); + goto done; + } + + device->num_l3_pwrlevels = 3; + +done: + mutex_unlock(&device->mutex); + + return ret; +} + +static int adreno_of_get_power(struct adreno_device *adreno_dev, + struct platform_device *pdev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + ret = adreno_of_get_pwrlevels(adreno_dev, pdev->dev.of_node); + if (ret) + return ret; + + device->pwrctrl.interval_timeout = CONFIG_QCOM_KGSL_IDLE_TIMEOUT; + + device->pwrctrl.minbw_timeout = 10; + + /* Set default bus control to true on all targets */ + device->pwrctrl.bus_control = true; + + return 0; +} + +static void adreno_cx_dbgc_probe(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct resource *res; + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "cx_dbgc"); + + if (res == NULL) + return; + + adreno_dev->cx_dbgc_base = res->start - device->regmap.base->start; + adreno_dev->cx_dbgc_len = resource_size(res); + adreno_dev->cx_dbgc_virt = devm_ioremap(&device->pdev->dev, + device->regmap.base->start + + adreno_dev->cx_dbgc_base, + adreno_dev->cx_dbgc_len); + + if (adreno_dev->cx_dbgc_virt == NULL) + dev_warn(device->dev, "cx_dbgc ioremap failed\n"); +} + +static void adreno_cx_misc_probe(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct resource *res; + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "cx_misc"); + + if (res == NULL) + return; + + adreno_dev->cx_misc_len = resource_size(res); + adreno_dev->cx_misc_virt = devm_ioremap(&device->pdev->dev, + res->start, adreno_dev->cx_misc_len); +} + +static void adreno_isense_probe(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct resource *res; + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "isense_cntl"); + if (res == NULL) + return; + + adreno_dev->isense_base = res->start - device->regmap.base->start; + adreno_dev->isense_len = resource_size(res); + adreno_dev->isense_virt = devm_ioremap(&device->pdev->dev, res->start, + adreno_dev->isense_len); + if (adreno_dev->isense_virt == NULL) + dev_warn(device->dev, "isense ioremap failed\n"); +} + +/* Read the fuse through the new and fancy nvmem method */ +static int adreno_read_speed_bin(struct platform_device *pdev) +{ + struct nvmem_cell *cell = nvmem_cell_get(&pdev->dev, "speed_bin"); + int ret = PTR_ERR_OR_ZERO(cell); + void *buf; + int val = 0; + size_t len; + + if (ret) { + if (ret == -ENOENT) + return 0; + + return ret; + } + + buf = nvmem_cell_read(cell, &len); + nvmem_cell_put(cell); + + if (IS_ERR(buf)) + return PTR_ERR(buf); + + memcpy(&val, buf, min(len, sizeof(val))); + kfree(buf); + + return val; +} + +static int adreno_read_gpu_model_fuse(struct platform_device *pdev) +{ + struct nvmem_cell *cell = nvmem_cell_get(&pdev->dev, "gpu_model"); + void *buf; + int val = 0; + size_t len; + + if (IS_ERR(cell)) + return PTR_ERR(cell); + + buf = nvmem_cell_read(cell, &len); + nvmem_cell_put(cell); + + if (IS_ERR(buf)) + return PTR_ERR(buf); + + memcpy(&val, buf, min(len, sizeof(val))); + kfree(buf); + + return val; +} + +static struct device_node * +adreno_get_gpu_model_node(struct platform_device *pdev) +{ + struct device_node *node, *child; + int fuse_model = adreno_read_gpu_model_fuse(pdev); + + if (fuse_model < 0) + return NULL; + + node = of_find_node_by_name(pdev->dev.of_node, "qcom,gpu-models"); + if (node == NULL) + return NULL; + + for_each_child_of_node(node, child) { + u32 model; + + if (of_property_read_u32(child, "qcom,gpu-model-id", &model)) + continue; + + if (model == fuse_model) { + of_node_put(node); + return child; + } + } + + of_node_put(node); + + return NULL; +} + +const char *adreno_get_gpu_model(struct kgsl_device *device) +{ + struct device_node *node; + static char gpu_model[32]; + const char *model; + int ret; + + if (strlen(gpu_model)) + return gpu_model; + + node = adreno_get_gpu_model_node(device->pdev); + if (!node) + node = of_node_get(device->pdev->dev.of_node); + + ret = of_property_read_string(node, "qcom,gpu-model", &model); + of_node_put(node); + + if (!ret) + strlcpy(gpu_model, model, sizeof(gpu_model)); + else + scnprintf(gpu_model, sizeof(gpu_model), "Adreno%d%d%dv%d", + ADRENO_CHIPID_CORE(ADRENO_DEVICE(device)->chipid), + ADRENO_CHIPID_MAJOR(ADRENO_DEVICE(device)->chipid), + ADRENO_CHIPID_MINOR(ADRENO_DEVICE(device)->chipid), + ADRENO_CHIPID_PATCH(ADRENO_DEVICE(device)->chipid) + 1); + + return gpu_model; +} + +static u32 adreno_get_vk_device_id(struct kgsl_device *device) +{ + struct device_node *node; + static u32 device_id; + + if (device_id) + return device_id; + + node = adreno_get_gpu_model_node(device->pdev); + if (!node) + node = of_node_get(device->pdev->dev.of_node); + + if (of_property_read_u32(node, "qcom,vk-device-id", &device_id)) + device_id = ADRENO_DEVICE(device)->chipid; + + of_node_put(node); + + return device_id; +} + +#if IS_ENABLED(CONFIG_QCOM_LLCC) +static int adreno_probe_llcc(struct adreno_device *adreno_dev, + struct platform_device *pdev) +{ + int ret; + + /* Get the system cache slice descriptor for GPU */ + adreno_dev->gpu_llc_slice = llcc_slice_getd(LLCC_GPU); + ret = PTR_ERR_OR_ZERO(adreno_dev->gpu_llc_slice); + + if (ret) { + /* Propagate EPROBE_DEFER back to the probe function */ + if (ret == -EPROBE_DEFER) + return ret; + + if (ret != -ENOENT) + dev_warn(&pdev->dev, + "Unable to get the GPU LLC slice: %d\n", ret); + } else + adreno_dev->gpu_llc_slice_enable = true; + + /* Get the system cache slice descriptor for GPU pagetables */ + adreno_dev->gpuhtw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); + ret = PTR_ERR_OR_ZERO(adreno_dev->gpuhtw_llc_slice); + if (ret) { + if (ret == -EPROBE_DEFER) { + llcc_slice_putd(adreno_dev->gpu_llc_slice); + return ret; + } + + if (ret != -ENOENT) + dev_warn(&pdev->dev, + "Unable to get GPU HTW LLC slice: %d\n", ret); + } else + adreno_dev->gpuhtw_llc_slice_enable = true; + + return 0; +} +#else +static int adreno_probe_llcc(struct adreno_device *adreno_dev, + struct platform_device *pdev) +{ + return 0; +} +#endif + +static void adreno_regmap_op_preaccess(struct kgsl_regmap_region *region) +{ + struct kgsl_device *device = region->priv; + /* + * kgsl panic notifier will be called in atomic context to get + * GPU snapshot. Also panic handler will skip snapshot dumping + * incase GPU is in SLUMBER state. So we can safely ignore the + * kgsl_pre_hwaccess(). + */ + if (!device->snapshot_atomic && !in_interrupt()) + kgsl_pre_hwaccess(device); +} + +static const struct kgsl_regmap_ops adreno_regmap_ops = { + .preaccess = adreno_regmap_op_preaccess, +}; + +static const struct kgsl_functable adreno_functable; + +static void adreno_setup_device(struct adreno_device *adreno_dev) +{ + u32 i; + + adreno_dev->dev.name = "kgsl-3d0"; + adreno_dev->dev.ftbl = &adreno_functable; + + init_completion(&adreno_dev->dev.hwaccess_gate); + init_completion(&adreno_dev->dev.halt_gate); + + idr_init(&adreno_dev->dev.context_idr); + + mutex_init(&adreno_dev->dev.mutex); + INIT_LIST_HEAD(&adreno_dev->dev.globals); + + /* Set the fault tolerance policy to replay, skip, throttle */ + adreno_dev->ft_policy = BIT(KGSL_FT_REPLAY) | + BIT(KGSL_FT_SKIPCMD) | BIT(KGSL_FT_THROTTLE); + + /* Enable command timeouts by default */ + adreno_dev->long_ib_detect = true; + + INIT_WORK(&adreno_dev->input_work, adreno_input_work); + + INIT_LIST_HEAD(&adreno_dev->active_list); + spin_lock_init(&adreno_dev->active_list_lock); + + for (i = 0; i < ARRAY_SIZE(adreno_dev->ringbuffers); i++) { + struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[i]; + + INIT_LIST_HEAD(&rb->events.group); + } +} + +static const struct of_device_id adreno_gmu_match[] = { + { .compatible = "qcom,gen7-gmu" }, + { .compatible = "qcom,gpu-gmu" }, + { .compatible = "qcom,gpu-rgmu" }, + {}, +}; + +int adreno_device_probe(struct platform_device *pdev, + struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device *dev = &pdev->dev; + unsigned int priv = 0; + int status; + u32 size; + + /* Initialize the adreno device structure */ + adreno_setup_device(adreno_dev); + + dev_set_drvdata(dev, device); + + device->pdev = pdev; + + adreno_update_soc_hw_revision_quirks(adreno_dev, pdev); + + status = adreno_read_speed_bin(pdev); + if (status < 0) + return status; + + device->speed_bin = status; + + status = adreno_of_get_power(adreno_dev, pdev); + if (status) + return status; + + status = kgsl_bus_init(device, pdev); + if (status) + goto err; + + status = kgsl_regmap_init(pdev, &device->regmap, "kgsl_3d0_reg_memory", + &adreno_regmap_ops, device); + if (status) + goto err; + + /* + * Bind the GMU components (if applicable) before doing the KGSL + * platform probe + */ + if (of_find_matching_node(dev->of_node, adreno_gmu_match)) { + status = component_bind_all(dev, NULL); + if (status) { + kgsl_bus_close(device); + return status; + } + } + + /* + * The SMMU APIs use unsigned long for virtual addresses which means + * that we cannot use 64 bit virtual addresses on a 32 bit kernel even + * though the hardware and the rest of the KGSL driver supports it. + */ + if (adreno_support_64bit(adreno_dev)) + kgsl_mmu_set_feature(device, KGSL_MMU_64BIT); + + /* + * Set the SMMU aperture on A6XX/Gen7 targets to use per-process + * pagetables. + */ + if (ADRENO_GPUREV(adreno_dev) >= 600) + kgsl_mmu_set_feature(device, KGSL_MMU_SMMU_APERTURE); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_IOCOHERENT)) + kgsl_mmu_set_feature(device, KGSL_MMU_IO_COHERENT); + + device->pwrctrl.bus_width = adreno_dev->gpucore->bus_width; + + device->mmu.secured = (IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) && + ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION)); + + /* Probe the LLCC - this could return -EPROBE_DEFER */ + status = adreno_probe_llcc(adreno_dev, pdev); + if (status) + goto err; + + /* + * IF the GPU HTW slice was successsful set the MMU feature so the + * domain can set the appropriate attributes + */ + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + kgsl_mmu_set_feature(device, KGSL_MMU_LLCC_ENABLE); + + status = kgsl_request_irq(pdev, "kgsl_3d0_irq", adreno_irq_handler, device); + if (status < 0) + goto err; + + device->pwrctrl.interrupt_num = status; + + status = kgsl_device_platform_probe(device); + if (status) + goto err; + + adreno_fence_trace_array_init(device); + + /* Probe for the optional CX_DBGC block */ + adreno_cx_dbgc_probe(device); + + /* Probe for the optional CX_MISC block */ + adreno_cx_misc_probe(device); + + adreno_isense_probe(device); + + /* Allocate the memstore for storing timestamps and other useful info */ + + if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) + priv |= KGSL_MEMDESC_PRIVILEGED; + + device->memstore = kgsl_allocate_global(device, + KGSL_MEMSTORE_SIZE, 0, 0, priv, "memstore"); + + status = PTR_ERR_OR_ZERO(device->memstore); + if (status) { + kgsl_device_platform_remove(device); + goto err; + } + + /* Initialize the snapshot engine */ + size = adreno_dev->gpucore->snapshot_size; + + /* + * Use a default size if one wasn't specified, but print a warning so + * the developer knows to fix it + */ + + if (WARN(!size, "The snapshot size was not specified in the gpucore\n")) + size = SZ_1M; + + kgsl_device_snapshot_probe(device, size); + + adreno_debugfs_init(adreno_dev); + adreno_profile_init(adreno_dev); + + adreno_sysfs_init(adreno_dev); + + kgsl_pwrscale_init(device, pdev, CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR); + + /* Initialize coresight for the target */ + adreno_coresight_init(adreno_dev); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_L3_VOTE)) + device->l3_vote = true; + +#ifdef CONFIG_INPUT + + if (!of_property_read_bool(pdev->dev.of_node, + "qcom,disable-wake-on-touch")) { + adreno_input_handler.private = device; + /* + * It isn't fatal if we cannot register the input handler. Sad, + * perhaps, but not fatal + */ + if (input_register_handler(&adreno_input_handler)) { + adreno_input_handler.private = NULL; + dev_err(device->dev, + "Unable to register the input handler\n"); + } + } +#endif + + kgsl_qcom_va_md_register(device); + + return 0; +err: + device->pdev = NULL; + + if (of_find_matching_node(dev->of_node, adreno_gmu_match)) + component_unbind_all(dev, NULL); + + kgsl_bus_close(device); + + return status; +} + +static int adreno_bind(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + const struct adreno_gpu_core *gpucore; + int ret; + u32 chipid; + + gpucore = adreno_identify_gpu(pdev, &chipid); + if (IS_ERR(gpucore)) + return PTR_ERR(gpucore); + + ret = gpucore->gpudev->probe(pdev, chipid, gpucore); + + if (!ret) { + struct kgsl_device *device = dev_get_drvdata(dev); + + device->pdev_loaded = true; + } + + return ret; +} + +static void adreno_unbind(struct device *dev) +{ + struct adreno_device *adreno_dev; + struct kgsl_device *device; + const struct adreno_gpudev *gpudev; + + device = dev_get_drvdata(dev); + if (!device) + return; + + device->pdev_loaded = false; + + adreno_dev = ADRENO_DEVICE(device); + gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + trace_array_put(device->fence_trace_array); + + if (gpudev->remove != NULL) + gpudev->remove(adreno_dev); + +#ifdef CONFIG_INPUT + if (adreno_input_handler.private) + input_unregister_handler(&adreno_input_handler); +#endif + + adreno_coresight_remove(adreno_dev); + adreno_profile_close(adreno_dev); + + /* Release the system cache slice descriptor */ + if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) + llcc_slice_putd(adreno_dev->gpu_llc_slice); + + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + llcc_slice_putd(adreno_dev->gpuhtw_llc_slice); + + kgsl_pwrscale_close(device); + + if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->close) + adreno_dev->dispatch_ops->close(adreno_dev); + + kgsl_device_platform_remove(device); + + if (of_find_matching_node(dev->of_node, adreno_gmu_match)) + component_unbind_all(dev, NULL); + + if (device->num_l3_pwrlevels != 0) + qcom_dcvs_unregister_voter(KGSL_L3_DEVICE, DCVS_L3, + DCVS_SLOW_PATH); + + clear_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv); + clear_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv); +} + +static void adreno_resume(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (device->state == KGSL_STATE_SUSPEND) { + adreno_put_gpu_halt(adreno_dev); + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + } else if (device->state != KGSL_STATE_INIT) { + /* + * This is an error situation so wait for the device to idle and + * then put the device in SLUMBER state. This will get us to + * the right place when we resume. + */ + if (device->state == KGSL_STATE_ACTIVE) + adreno_idle(device); + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + dev_err(device->dev, "resume invoked without a suspend\n"); + } +} + +static int adreno_pm_resume(struct device *dev) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + + mutex_lock(&device->mutex); + ops->pm_resume(adreno_dev); + mutex_unlock(&device->mutex); + + return 0; +} + +static int adreno_suspend(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int status = kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND); + + if (!status && device->state == KGSL_STATE_SUSPEND) + adreno_get_gpu_halt(adreno_dev); + + return status; +} + +static int adreno_pm_suspend(struct device *dev) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + int status; + + mutex_lock(&device->mutex); + status = ops->pm_suspend(adreno_dev); + mutex_unlock(&device->mutex); + + return status; +} + +void adreno_create_profile_buffer(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int priv = 0; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) + priv = KGSL_MEMDESC_PRIVILEGED; + + adreno_allocate_global(device, &adreno_dev->profile_buffer, + PAGE_SIZE, 0, 0, priv, "alwayson"); + + adreno_dev->profile_index = 0; + + if (!IS_ERR(adreno_dev->profile_buffer)) + set_bit(ADRENO_DEVICE_DRAWOBJ_PROFILE, + &adreno_dev->priv); +} + +static int adreno_init(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int ret; + + ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + if (ret) + return ret; + + /* + * initialization only needs to be done once initially until + * device is shutdown + */ + if (test_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv)) + return 0; + + ret = gpudev->init(adreno_dev); + if (ret) + return ret; + + set_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv); + + return 0; +} + +static bool regulators_left_on(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + if (gmu_core_gpmu_isenabled(device)) + return false; + + if (!IS_ERR_OR_NULL(pwr->cx_gdsc)) + if (regulator_is_enabled(pwr->cx_gdsc)) + return true; + + if (!IS_ERR_OR_NULL(pwr->gx_gdsc)) + return regulator_is_enabled(pwr->gx_gdsc); + + return false; +} + +void adreno_set_active_ctxs_null(struct adreno_device *adreno_dev) +{ + int i; + struct adreno_ringbuffer *rb; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + if (rb->drawctxt_active) + kgsl_context_put(&(rb->drawctxt_active->base)); + rb->drawctxt_active = NULL; + + kgsl_sharedmem_writel(rb->pagetable_desc, + PT_INFO_OFFSET(current_rb_ptname), 0); + } +} + +static int adreno_open(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* + * active_cnt special case: we are starting up for the first + * time, so use this sequence instead of the kgsl_pwrctrl_wake() + * which will be called by adreno_active_count_get(). + */ + atomic_inc(&device->active_cnt); + + memset(device->memstore->hostptr, 0, device->memstore->size); + + ret = adreno_init(device); + if (ret) + goto err; + + ret = adreno_start(device, 0); + if (ret) + goto err; + + complete_all(&device->hwaccess_gate); + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + adreno_active_count_put(adreno_dev); + + return 0; +err: + kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + atomic_dec(&device->active_cnt); + + return ret; +} + +static int adreno_first_open(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + + if (!device->pdev_loaded) + return -ENODEV; + + return ops->first_open(adreno_dev); +} + +static int adreno_close(struct adreno_device *adreno_dev) +{ + return kgsl_pwrctrl_change_state(KGSL_DEVICE(adreno_dev), + KGSL_STATE_INIT); +} + +static int adreno_last_close(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + + /* + * Wait up to 1 second for the active count to go low + * and then start complaining about it + */ + if (kgsl_active_count_wait(device, 0, HZ)) { + dev_err(device->dev, + "Waiting for the active count to become 0\n"); + + while (kgsl_active_count_wait(device, 0, HZ)) + dev_err(device->dev, + "Still waiting for the active count\n"); + } + + return ops->last_close(adreno_dev); +} + +static int adreno_pwrctrl_active_count_get(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = 0; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EINVAL; + + if ((atomic_read(&device->active_cnt) == 0) && + (device->state != KGSL_STATE_ACTIVE)) { + mutex_unlock(&device->mutex); + wait_for_completion(&device->hwaccess_gate); + mutex_lock(&device->mutex); + device->pwrctrl.superfast = true; + ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + } + if (ret == 0) + atomic_inc(&device->active_cnt); + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + return ret; +} + +static void adreno_pwrctrl_active_count_put(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (WARN(atomic_read(&device->active_cnt) == 0, + "Unbalanced get/put calls to KGSL active count\n")) + return; + + if (atomic_dec_and_test(&device->active_cnt)) { + bool nap_on = !(device->pwrctrl.ctrl_flags & + BIT(KGSL_PWRFLAGS_NAP_OFF)); + if (nap_on && device->state == KGSL_STATE_ACTIVE && + device->requested_state == KGSL_STATE_NONE) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_NAP); + kgsl_schedule_work(&device->idle_check_ws); + } else if (!nap_on) { + kgsl_pwrscale_update_stats(device); + kgsl_pwrscale_update(device); + } + + kgsl_start_idle_timer(device); + } + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + wake_up(&device->active_cnt_wq); +} + +int adreno_active_count_get(struct adreno_device *adreno_dev) +{ + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + + return ops->active_count_get(adreno_dev); +} + +void adreno_active_count_put(struct adreno_device *adreno_dev) +{ + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + + ops->active_count_put(adreno_dev); +} + +void adreno_get_bus_counters(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = 0; + + if (!device->pwrctrl.bus_control) + return; + + /* VBIF waiting for RAM */ + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF_PWR, 0, + &adreno_dev->starved_ram_lo, NULL); + + /* Target has GBIF */ + if (adreno_is_gen7(adreno_dev) || + (adreno_is_a6xx(adreno_dev) && !adreno_is_a630(adreno_dev))) { + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF_PWR, 1, + &adreno_dev->starved_ram_lo_ch1, NULL); + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF, + GBIF_AXI0_READ_DATA_TOTAL_BEATS, + &adreno_dev->ram_cycles_lo, NULL); + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF, + GBIF_AXI1_READ_DATA_TOTAL_BEATS, + &adreno_dev->ram_cycles_lo_ch1_read, NULL); + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF, + GBIF_AXI0_WRITE_DATA_TOTAL_BEATS, + &adreno_dev->ram_cycles_lo_ch0_write, NULL); + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF, + GBIF_AXI1_WRITE_DATA_TOTAL_BEATS, + &adreno_dev->ram_cycles_lo_ch1_write, NULL); + } else { + /* VBIF DDR cycles */ + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF, + VBIF_AXI_TOTAL_BEATS, + &adreno_dev->ram_cycles_lo, NULL); + } + + if (ret) + dev_err(KGSL_DEVICE(adreno_dev)->dev, + "Unable to get perf counters for bus DCVS\n"); +} + +/** + * _adreno_start - Power up the GPU and prepare to accept commands + * @adreno_dev: Pointer to an adreno_device structure + * + * The core function that powers up and initalizes the GPU. This function is + * called at init and after coming out of SLUMBER + */ +static int _adreno_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int status; + unsigned int state = device->state; + bool regulator_left_on; + + /* make sure ADRENO_DEVICE_STARTED is not set here */ + WARN_ON(test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)); + + regulator_left_on = regulators_left_on(device); + + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + + /* Put the GPU in a responsive state */ + status = kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + if (status) + goto error_pwr_off; + + /* Set any stale active contexts to NULL */ + adreno_set_active_ctxs_null(adreno_dev); + + /* Set the bit to indicate that we've just powered on */ + set_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv); + + adreno_ringbuffer_set_global(adreno_dev, 0); + + /* Clear the busy_data stats - we're starting over from scratch */ + memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data)); + + /* Soft reset the GPU if a regulator is stuck on*/ + if (regulator_left_on) + _soft_reset(adreno_dev); + + /* Start the GPU */ + status = gpudev->start(adreno_dev); + if (status) + goto error_pwr_off; + + /* Re-initialize the coresight registers if applicable */ + adreno_coresight_start(adreno_dev); + + adreno_irqctrl(adreno_dev, 1); + + adreno_perfcounter_start(adreno_dev); + + /* Clear FSR here in case it is set from a previous pagefault */ + kgsl_mmu_clear_fsr(&device->mmu); + + status = gpudev->rb_start(adreno_dev); + if (status) + goto error_pwr_off; + + /* + * At this point it is safe to assume that we recovered. Setting + * this field allows us to take a new snapshot for the next failure + * if we are prioritizing the first unrecoverable snapshot. + */ + if (device->snapshot) + device->snapshot->recovered = true; + + /* Start the dispatcher */ + adreno_dispatcher_start(device); + + device->reset_counter++; + + set_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + + /* + * There is a possible deadlock scenario during kgsl firmware reading + * (request_firmware) and devfreq update calls. During first boot, kgsl + * device mutex is held and then request_firmware is called for reading + * firmware. request_firmware internally takes dev_pm_qos_mtx lock. + * Whereas in case of devfreq update calls triggered by thermal/bcl or + * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then + * tries to take kgsl device mutex as part of get_dev_status/target + * calls. This results in deadlock when both thread are unable to acquire + * the mutex held by other thread. Enable devfreq updates now as we are + * done reading all firmware files. + */ + device->pwrscale.devfreq_enabled = true; + + return 0; + +error_pwr_off: + /* set the state back to original state */ + kgsl_pwrctrl_change_state(device, state); + + return status; +} + +/** + * adreno_start() - Power up and initialize the GPU + * @device: Pointer to the KGSL device to power up + * @priority: Boolean flag to specify of the start should be scheduled in a low + * latency work queue + * + * Power up the GPU and initialize it. If priority is specified then elevate + * the thread priority for the duration of the start operation + */ +int adreno_start(struct kgsl_device *device, int priority) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int nice = task_nice(current); + int ret; + + if (priority && (adreno_wake_nice < nice)) + set_user_nice(current, adreno_wake_nice); + + ret = _adreno_start(adreno_dev); + + if (priority) + set_user_nice(current, nice); + + return ret; +} + +static int adreno_stop(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int error = 0; + + if (!test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)) + return 0; + + kgsl_pwrscale_update_stats(device); + + adreno_irqctrl(adreno_dev, 0); + + /* Save active coresight registers if applicable */ + adreno_coresight_stop(adreno_dev); + + /* Save physical performance counter values before GPU power down*/ + adreno_perfcounter_save(adreno_dev); + + if (gpudev->clear_pending_transactions) + gpudev->clear_pending_transactions(adreno_dev); + + adreno_dispatcher_stop(adreno_dev); + + adreno_ringbuffer_stop(adreno_dev); + + if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpu_llc_slice); + + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + + adreno_set_active_ctxs_null(adreno_dev); + + clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + + return error; +} + +/** + * adreno_reset() - Helper function to reset the GPU + * @device: Pointer to the KGSL device structure for the GPU + * @fault: Type of fault. Needed to skip soft reset for MMU fault + * + * Try to reset the GPU to recover from a fault. First, try to do a low latency + * soft reset. If the soft reset fails for some reason, then bring out the big + * guns and toggle the footswitch. + */ +int adreno_reset(struct kgsl_device *device, int fault) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int ret = -EINVAL; + int i; + + if (gpudev->reset) + return gpudev->reset(adreno_dev); + + /* + * Try soft reset first Do not do soft reset for a IOMMU fault (because + * the IOMMU hardware needs a reset too) + */ + + if (!(fault & ADRENO_IOMMU_PAGE_FAULT)) + ret = adreno_soft_reset(device); + + if (ret) { + /* If soft reset failed/skipped, then pull the power */ + kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + /* since device is officially off now clear start bit */ + clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + + /* Try to reset the device */ + ret = adreno_start(device, 0); + + for (i = 0; ret && i < 4; i++) { + msleep(20); + ret = adreno_start(device, 0); + } + + if (ret) + return ret; + + if (i != 0) + dev_warn(device->dev, + "Device hard reset tried %d tries\n", i); + } + + /* + * If active_cnt is non-zero then the system was active before + * going into a reset - put it back in that state + */ + + if (atomic_read(&device->active_cnt)) + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + else + kgsl_pwrctrl_change_state(device, KGSL_STATE_NAP); + + return ret; +} + +static int copy_prop(struct kgsl_device_getproperty *param, + void *src, size_t size) +{ + if (copy_to_user(param->value, src, + min_t(u32, size, param->sizebytes))) + return -EFAULT; + + return 0; +} + +static int adreno_prop_device_info(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_devinfo devinfo = { + .device_id = device->id + 1, + .chip_id = adreno_dev->chipid, + .mmu_enabled = kgsl_mmu_has_feature(device, KGSL_MMU_PAGED), + .gmem_gpubaseaddr = adreno_dev->gpucore->gmem_base, + .gmem_sizebytes = adreno_dev->gpucore->gmem_size, + }; + + return copy_prop(param, &devinfo, sizeof(devinfo)); +} + +static int adreno_prop_gpu_model(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct kgsl_gpu_model model = {0}; + + strlcpy(model.gpu_model, adreno_get_gpu_model(device), + sizeof(model.gpu_model)); + + return copy_prop(param, &model, sizeof(model)); +} + +static int adreno_prop_device_shadow(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct kgsl_shadowprop shadowprop = { 0 }; + + if (device->memstore->hostptr) { + /* Pass a dummy address to identify memstore */ + shadowprop.gpuaddr = KGSL_MEMSTORE_TOKEN_ADDRESS; + shadowprop.size = device->memstore->size; + + shadowprop.flags = KGSL_FLAGS_INITIALIZED | + KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS; + } + + return copy_prop(param, &shadowprop, sizeof(shadowprop)); +} + +static int adreno_prop_device_qdss_stm(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct kgsl_qdss_stm_prop qdssprop = {0}; + + if (!IS_ERR_OR_NULL(device->qdss_desc)) { + qdssprop.gpuaddr = device->qdss_desc->gpuaddr; + qdssprop.size = device->qdss_desc->size; + } + + return copy_prop(param, &qdssprop, sizeof(qdssprop)); +} + +static int adreno_prop_device_qtimer(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct kgsl_qtimer_prop qtimerprop = {0}; + + if (!IS_ERR_OR_NULL(device->qtimer_desc)) { + qtimerprop.gpuaddr = device->qtimer_desc->gpuaddr; + qtimerprop.size = device->qtimer_desc->size; + } + + return copy_prop(param, &qtimerprop, sizeof(qtimerprop)); +} + +static int adreno_prop_s32(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + int val = 0; + + if (param->type == KGSL_PROP_MMU_ENABLE) + val = kgsl_mmu_has_feature(device, KGSL_MMU_PAGED); + else if (param->type == KGSL_PROP_INTERRUPT_WAITS) + val = 1; + + return copy_prop(param, &val, sizeof(val)); +} + +static int adreno_prop_uche_gmem_addr(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u64 vaddr = adreno_dev->gpucore->gmem_base; + + return copy_prop(param, &vaddr, sizeof(vaddr)); +} + +static int adreno_prop_ucode_version(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_ucode_version ucode = { + .pfp = adreno_dev->fw[ADRENO_FW_PFP].version, + .pm4 = adreno_dev->fw[ADRENO_FW_PM4].version, + }; + + return copy_prop(param, &ucode, sizeof(ucode)); +} + +static int adreno_prop_gaming_bin(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + void *buf; + size_t len; + int ret; + struct nvmem_cell *cell; + + cell = nvmem_cell_get(&device->pdev->dev, "gaming_bin"); + if (IS_ERR(cell)) + return -EINVAL; + + buf = nvmem_cell_read(cell, &len); + nvmem_cell_put(cell); + + if (!IS_ERR(buf)) { + ret = copy_prop(param, buf, len); + kfree(buf); + return ret; + } + + dev_err(device->dev, "failed to read gaming_bin nvmem cell\n"); + return -EINVAL; +} + +static int adreno_prop_u32(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 val = 0; + + if (param->type == KGSL_PROP_HIGHEST_BANK_BIT) { + val = adreno_dev->highest_bank_bit; + } else if (param->type == KGSL_PROP_MIN_ACCESS_LENGTH) + of_property_read_u32(device->pdev->dev.of_node, + "qcom,min-access-length", &val); + else if (param->type == KGSL_PROP_UBWC_MODE) + of_property_read_u32(device->pdev->dev.of_node, + "qcom,ubwc-mode", &val); + else if (param->type == KGSL_PROP_DEVICE_BITNESS) + val = adreno_support_64bit(adreno_dev) ? 48 : 32; + else if (param->type == KGSL_PROP_SPEED_BIN) + val = device->speed_bin; + else if (param->type == KGSL_PROP_VK_DEVICE_ID) + val = adreno_get_vk_device_id(device); + + return copy_prop(param, &val, sizeof(val)); +} + +static const struct { + int type; + int (*func)(struct kgsl_device *device, + struct kgsl_device_getproperty *param); +} adreno_property_funcs[] = { + { KGSL_PROP_DEVICE_INFO, adreno_prop_device_info }, + { KGSL_PROP_DEVICE_SHADOW, adreno_prop_device_shadow }, + { KGSL_PROP_DEVICE_QDSS_STM, adreno_prop_device_qdss_stm }, + { KGSL_PROP_DEVICE_QTIMER, adreno_prop_device_qtimer }, + { KGSL_PROP_MMU_ENABLE, adreno_prop_s32 }, + { KGSL_PROP_INTERRUPT_WAITS, adreno_prop_s32 }, + { KGSL_PROP_UCHE_GMEM_VADDR, adreno_prop_uche_gmem_addr }, + { KGSL_PROP_UCODE_VERSION, adreno_prop_ucode_version }, + { KGSL_PROP_HIGHEST_BANK_BIT, adreno_prop_u32 }, + { KGSL_PROP_MIN_ACCESS_LENGTH, adreno_prop_u32 }, + { KGSL_PROP_UBWC_MODE, adreno_prop_u32 }, + { KGSL_PROP_DEVICE_BITNESS, adreno_prop_u32 }, + { KGSL_PROP_SPEED_BIN, adreno_prop_u32 }, + { KGSL_PROP_GAMING_BIN, adreno_prop_gaming_bin }, + { KGSL_PROP_GPU_MODEL, adreno_prop_gpu_model}, + { KGSL_PROP_VK_DEVICE_ID, adreno_prop_u32}, +}; + +static int adreno_getproperty(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(adreno_property_funcs); i++) { + if (param->type == adreno_property_funcs[i].type) + return adreno_property_funcs[i].func(device, param); + } + + return -ENODEV; +} + +static int adreno_query_property_list(struct kgsl_device *device, u32 *list, + u32 count) +{ + int i; + + if (!list) + return ARRAY_SIZE(adreno_property_funcs); + + for (i = 0; i < count && i < ARRAY_SIZE(adreno_property_funcs); i++) + list[i] = adreno_property_funcs[i].type; + + return i; +} + +int adreno_set_constraint(struct kgsl_device *device, + struct kgsl_context *context, + struct kgsl_device_constraint *constraint) +{ + int status = 0; + + switch (constraint->type) { + case KGSL_CONSTRAINT_PWRLEVEL: { + struct kgsl_device_constraint_pwrlevel pwr; + + if (constraint->size != sizeof(pwr)) { + status = -EINVAL; + break; + } + + if (copy_from_user(&pwr, + (void __user *)constraint->data, + sizeof(pwr))) { + status = -EFAULT; + break; + } + if (pwr.level >= KGSL_CONSTRAINT_PWR_MAXLEVELS) { + status = -EINVAL; + break; + } + + context->pwr_constraint.type = + KGSL_CONSTRAINT_PWRLEVEL; + context->pwr_constraint.sub_type = pwr.level; + trace_kgsl_user_pwrlevel_constraint(device, + context->id, + context->pwr_constraint.type, + context->pwr_constraint.sub_type); + } + break; + case KGSL_CONSTRAINT_NONE: + if (context->pwr_constraint.type == KGSL_CONSTRAINT_PWRLEVEL) + trace_kgsl_user_pwrlevel_constraint(device, + context->id, + KGSL_CONSTRAINT_NONE, + context->pwr_constraint.sub_type); + context->pwr_constraint.type = KGSL_CONSTRAINT_NONE; + break; + case KGSL_CONSTRAINT_L3_PWRLEVEL: { + struct kgsl_device_constraint_pwrlevel pwr; + + if (constraint->size != sizeof(pwr)) { + status = -EINVAL; + break; + } + + if (copy_from_user(&pwr, constraint->data, sizeof(pwr))) { + status = -EFAULT; + break; + } + + status = register_l3_voter(device); + if (status) + break; + + if (pwr.level >= KGSL_CONSTRAINT_PWR_MAXLEVELS) + pwr.level = KGSL_CONSTRAINT_PWR_MAXLEVELS - 1; + + context->l3_pwr_constraint.type = KGSL_CONSTRAINT_L3_PWRLEVEL; + context->l3_pwr_constraint.sub_type = pwr.level; + trace_kgsl_user_pwrlevel_constraint(device, context->id, + context->l3_pwr_constraint.type, + context->l3_pwr_constraint.sub_type); + } + break; + case KGSL_CONSTRAINT_L3_NONE: { + unsigned int type = context->l3_pwr_constraint.type; + + if (type == KGSL_CONSTRAINT_L3_PWRLEVEL) + trace_kgsl_user_pwrlevel_constraint(device, context->id, + KGSL_CONSTRAINT_L3_NONE, + context->l3_pwr_constraint.sub_type); + context->l3_pwr_constraint.type = KGSL_CONSTRAINT_L3_NONE; + } + break; + default: + status = -EINVAL; + break; + } + + /* If a new constraint has been set for a context, cancel the old one */ + if ((status == 0) && + (context->id == device->pwrctrl.constraint.owner_id)) { + trace_kgsl_constraint(device, device->pwrctrl.constraint.type, + device->pwrctrl.active_pwrlevel, 0); + device->pwrctrl.constraint.type = KGSL_CONSTRAINT_NONE; + } + + return status; +} + +static int adreno_setproperty(struct kgsl_device_private *dev_priv, + unsigned int type, + void __user *value, + unsigned int sizebytes) +{ + int status = -EINVAL; + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + switch (type) { + case KGSL_PROP_PWR_CONSTRAINT: + case KGSL_PROP_L3_PWR_CONSTRAINT: { + struct kgsl_device_constraint constraint; + struct kgsl_context *context; + + if (sizebytes != sizeof(constraint)) + break; + + if (copy_from_user(&constraint, value, + sizeof(constraint))) { + status = -EFAULT; + break; + } + + context = kgsl_context_get_owner(dev_priv, + constraint.context_id); + + if (context == NULL) + break; + + status = adreno_set_constraint(device, context, + &constraint); + + kgsl_context_put(context); + } + break; + default: + status = gpudev->setproperty(dev_priv, type, value, sizebytes); + break; + } + + return status; +} + +/* + * adreno_soft_reset - Do a soft reset of the GPU hardware + * @device: KGSL device to soft reset + * + * "soft reset" the GPU hardware - this is a fast path GPU reset + * The GPU hardware is reset but we never pull power so we can skip + * a lot of the standard adreno_stop/adreno_start sequence + */ +static int adreno_soft_reset(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int ret; + + /* + * Don't allow a soft reset for a304 because the SMMU needs to be hard + * reset + */ + if (adreno_is_a304(adreno_dev)) + return -ENODEV; + + if (gpudev->clear_pending_transactions) { + ret = gpudev->clear_pending_transactions(adreno_dev); + if (ret) + return ret; + } + + kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + adreno_set_active_ctxs_null(adreno_dev); + + adreno_irqctrl(adreno_dev, 0); + + adreno_clear_gpu_fault(adreno_dev); + /* since device is oficially off now clear start bit */ + clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + + /* save physical performance counter values before GPU soft reset */ + adreno_perfcounter_save(adreno_dev); + + _soft_reset(adreno_dev); + + /* Clear the busy_data stats - we're starting over from scratch */ + adreno_dev->busy_data.gpu_busy = 0; + adreno_dev->busy_data.bif_ram_cycles = 0; + adreno_dev->busy_data.bif_ram_cycles_read_ch1 = 0; + adreno_dev->busy_data.bif_ram_cycles_write_ch0 = 0; + adreno_dev->busy_data.bif_ram_cycles_write_ch1 = 0; + adreno_dev->busy_data.bif_starved_ram = 0; + adreno_dev->busy_data.bif_starved_ram_ch1 = 0; + + /* Set the page table back to the default page table */ + adreno_ringbuffer_set_global(adreno_dev, 0); + + /* Reinitialize the GPU */ + gpudev->start(adreno_dev); + + /* Re-initialize the coresight registers if applicable */ + adreno_coresight_start(adreno_dev); + + /* Enable IRQ */ + adreno_irqctrl(adreno_dev, 1); + + /* stop all ringbuffers to cancel RB events */ + adreno_ringbuffer_stop(adreno_dev); + + /* Start the ringbuffer(s) again */ + ret = gpudev->rb_start(adreno_dev); + if (ret == 0) { + device->reset_counter++; + set_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + } + + /* Restore physical performance counter values after soft reset */ + adreno_perfcounter_restore(adreno_dev); + + if (ret) + dev_err(device->dev, "Device soft reset failed: %d\n", ret); + + return ret; +} + +bool adreno_isidle(struct adreno_device *adreno_dev) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb; + int i; + + if (!kgsl_state_is_awake(KGSL_DEVICE(adreno_dev))) + return true; + + /* + * wptr is updated when we add commands to ringbuffer, add a barrier + * to make sure updated wptr is compared to rptr + */ + smp_mb(); + + /* + * ringbuffer is truly idle when all ringbuffers read and write + * pointers are equal + */ + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + if (!adreno_rb_empty(rb)) + return false; + } + + return gpudev->hw_isidle(adreno_dev); +} + +/** + * adreno_spin_idle() - Spin wait for the GPU to idle + * @adreno_dev: Pointer to an adreno device + * @timeout: milliseconds to wait before returning error + * + * Spin the CPU waiting for the RBBM status to return idle + */ +int adreno_spin_idle(struct adreno_device *adreno_dev, unsigned int timeout) +{ + unsigned long wait = jiffies + msecs_to_jiffies(timeout); + + do { + /* + * If we fault, stop waiting and return an error. The dispatcher + * will clean up the fault from the work queue, but we need to + * make sure we don't block it by waiting for an idle that + * will never come. + */ + + if (adreno_gpu_fault(adreno_dev) != 0) + return -EDEADLK; + + if (adreno_isidle(adreno_dev)) + return 0; + + } while (time_before(jiffies, wait)); + + /* + * Under rare conditions, preemption can cause the while loop to exit + * without checking if the gpu is idle. check one last time before we + * return failure. + */ + if (adreno_gpu_fault(adreno_dev) != 0) + return -EDEADLK; + + if (adreno_isidle(adreno_dev)) + return 0; + + return -ETIMEDOUT; +} + +/** + * adreno_idle() - wait for the GPU hardware to go idle + * @device: Pointer to the KGSL device structure for the GPU + * + * Wait up to ADRENO_IDLE_TIMEOUT milliseconds for the GPU hardware to go quiet. + * Caller must hold the device mutex, and must not hold the dispatcher mutex. + */ + +int adreno_idle(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int ret; + + /* + * Make sure the device mutex is held so the dispatcher can't send any + * more commands to the hardware + */ + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EDEADLK; + + /* Check if we are already idle before idling dispatcher */ + if (adreno_isidle(adreno_dev)) + return 0; + /* + * Wait for dispatcher to finish completing commands + * already submitted + */ + ret = adreno_dispatcher_idle(adreno_dev); + if (ret) + return ret; + + return adreno_spin_idle(adreno_dev, ADRENO_IDLE_TIMEOUT); +} + +static int adreno_drain_and_idle(struct kgsl_device *device) +{ + int ret; + + reinit_completion(&device->halt_gate); + + ret = kgsl_active_count_wait(device, 0, HZ); + if (ret) + return ret; + + return adreno_idle(device); +} + +/* Caller must hold the device mutex. */ +int adreno_suspend_context(struct kgsl_device *device) +{ + /* process any profiling results that are available */ + adreno_profile_process_results(ADRENO_DEVICE(device)); + + /* Wait for the device to go idle */ + return adreno_idle(device); +} + +bool adreno_is_cx_dbgc_register(struct kgsl_device *device, + unsigned int offsetwords) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + return adreno_dev->cx_dbgc_virt && + (offsetwords >= (adreno_dev->cx_dbgc_base >> 2)) && + (offsetwords < (adreno_dev->cx_dbgc_base + + adreno_dev->cx_dbgc_len) >> 2); +} + +void adreno_cx_dbgc_regread(struct kgsl_device *device, + unsigned int offsetwords, unsigned int *value) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int cx_dbgc_offset; + + if (!adreno_is_cx_dbgc_register(device, offsetwords)) + return; + + cx_dbgc_offset = (offsetwords << 2) - adreno_dev->cx_dbgc_base; + *value = __raw_readl(adreno_dev->cx_dbgc_virt + cx_dbgc_offset); + + /* + * ensure this read finishes before the next one. + * i.e. act like normal readl() + */ + rmb(); +} + +void adreno_cx_dbgc_regwrite(struct kgsl_device *device, + unsigned int offsetwords, unsigned int value) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int cx_dbgc_offset; + + if (!adreno_is_cx_dbgc_register(device, offsetwords)) + return; + + cx_dbgc_offset = (offsetwords << 2) - adreno_dev->cx_dbgc_base; + trace_kgsl_regwrite(offsetwords, value); + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + __raw_writel(value, adreno_dev->cx_dbgc_virt + cx_dbgc_offset); +} + +void adreno_cx_misc_regread(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int *value) +{ + unsigned int cx_misc_offset; + + cx_misc_offset = (offsetwords << 2); + if (!adreno_dev->cx_misc_virt || + (cx_misc_offset >= adreno_dev->cx_misc_len)) + return; + + *value = __raw_readl(adreno_dev->cx_misc_virt + cx_misc_offset); + + /* + * ensure this read finishes before the next one. + * i.e. act like normal readl() + */ + rmb(); +} + +void adreno_isense_regread(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int *value) +{ + unsigned int isense_offset; + + isense_offset = (offsetwords << 2); + if (!adreno_dev->isense_virt || + (isense_offset >= adreno_dev->isense_len)) + return; + + *value = __raw_readl(adreno_dev->isense_virt + isense_offset); + + /* + * ensure this read finishes before the next one. + * i.e. act like normal readl() + */ + rmb(); +} + +void adreno_cx_misc_regwrite(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int value) +{ + unsigned int cx_misc_offset; + + cx_misc_offset = (offsetwords << 2); + if (!adreno_dev->cx_misc_virt || + (cx_misc_offset >= adreno_dev->cx_misc_len)) + return; + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + __raw_writel(value, adreno_dev->cx_misc_virt + cx_misc_offset); +} + +void adreno_cx_misc_regrmw(struct adreno_device *adreno_dev, + unsigned int offsetwords, + unsigned int mask, unsigned int bits) +{ + unsigned int val = 0; + + adreno_cx_misc_regread(adreno_dev, offsetwords, &val); + val &= ~mask; + adreno_cx_misc_regwrite(adreno_dev, offsetwords, val | bits); +} + +void adreno_profile_submit_time(struct adreno_submit_time *time) +{ + struct kgsl_drawobj *drawobj; + struct kgsl_drawobj_cmd *cmdobj; + struct kgsl_mem_entry *entry; + struct kgsl_drawobj_profiling_buffer *profile_buffer; + + if (!time) + return; + + drawobj = time->drawobj; + if (drawobj == NULL) + return; + + cmdobj = CMDOBJ(drawobj); + entry = cmdobj->profiling_buf_entry; + if (!entry) + return; + + profile_buffer = kgsl_gpuaddr_to_vaddr(&entry->memdesc, + cmdobj->profiling_buffer_gpuaddr); + + if (profile_buffer == NULL) + return; + + /* Return kernel clock time to the client if requested */ + if (drawobj->flags & KGSL_DRAWOBJ_PROFILING_KTIME) { + u64 secs = time->ktime; + + profile_buffer->wall_clock_ns = + do_div(secs, NSEC_PER_SEC); + profile_buffer->wall_clock_s = secs; + } else { + profile_buffer->wall_clock_s = time->utime.tv_sec; + profile_buffer->wall_clock_ns = time->utime.tv_nsec; + } + + profile_buffer->gpu_ticks_queued = time->ticks; + + kgsl_memdesc_unmap(&entry->memdesc); +} + +/** + * adreno_waittimestamp - sleep while waiting for the specified timestamp + * @device - pointer to a KGSL device structure + * @context - pointer to the active kgsl context + * @timestamp - GPU timestamp to wait for + * @msecs - amount of time to wait (in milliseconds) + * + * Wait up to 'msecs' milliseconds for the specified timestamp to expire. + */ +static int adreno_waittimestamp(struct kgsl_device *device, + struct kgsl_context *context, + unsigned int timestamp, + unsigned int msecs) +{ + int ret; + + if (context == NULL) { + /* If they are doing then complain once */ + dev_WARN_ONCE(device->dev, 1, + "IOCTL_KGSL_DEVICE_WAITTIMESTAMP is deprecated\n"); + return -ENOTTY; + } + + /* Return -ENOENT if the context has been detached */ + if (kgsl_context_detached(context)) + return -ENOENT; + + ret = adreno_drawctxt_wait(ADRENO_DEVICE(device), context, + timestamp, msecs); + + /* If the context got invalidated then return a specific error */ + if (kgsl_context_invalid(context)) + ret = -EDEADLK; + + /* + * Return -EPROTO if the device has faulted since the last time we + * checked. Userspace uses this as a marker for performing post + * fault activities + */ + + if (!ret && test_and_clear_bit(ADRENO_CONTEXT_FAULT, &context->priv)) + ret = -EPROTO; + + return ret; +} + +/** + * __adreno_readtimestamp() - Reads the timestamp from memstore memory + * @adreno_dev: Pointer to an adreno device + * @index: Index into the memstore memory + * @type: Type of timestamp to read + * @timestamp: The out parameter where the timestamp is read + */ +static int __adreno_readtimestamp(struct adreno_device *adreno_dev, int index, + int type, unsigned int *timestamp) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int status = 0; + + switch (type) { + case KGSL_TIMESTAMP_CONSUMED: + kgsl_sharedmem_readl(device->memstore, timestamp, + KGSL_MEMSTORE_OFFSET(index, soptimestamp)); + break; + case KGSL_TIMESTAMP_RETIRED: + kgsl_sharedmem_readl(device->memstore, timestamp, + KGSL_MEMSTORE_OFFSET(index, eoptimestamp)); + break; + default: + status = -EINVAL; + *timestamp = 0; + break; + } + return status; +} + +/** + * adreno_rb_readtimestamp(): Return the value of given type of timestamp + * for a RB + * @adreno_dev: adreno device whose timestamp values are being queried + * @priv: The object being queried for a timestamp (expected to be a rb pointer) + * @type: The type of timestamp (one of 3) to be read + * @timestamp: Pointer to where the read timestamp is to be written to + * + * CONSUMED and RETIRED type timestamps are sorted by id and are constantly + * updated by the GPU through shared memstore memory. QUEUED type timestamps + * are read directly from context struct. + + * The function returns 0 on success and timestamp value at the *timestamp + * address and returns -EINVAL on any read error/invalid type and timestamp = 0. + */ +int adreno_rb_readtimestamp(struct adreno_device *adreno_dev, + void *priv, enum kgsl_timestamp_type type, + unsigned int *timestamp) +{ + int status = 0; + struct adreno_ringbuffer *rb = priv; + + if (type == KGSL_TIMESTAMP_QUEUED) + *timestamp = rb->timestamp; + else + status = __adreno_readtimestamp(adreno_dev, + rb->id + KGSL_MEMSTORE_MAX, + type, timestamp); + + return status; +} + +/** + * adreno_readtimestamp(): Return the value of given type of timestamp + * @device: GPU device whose timestamp values are being queried + * @priv: The object being queried for a timestamp (expected to be a context) + * @type: The type of timestamp (one of 3) to be read + * @timestamp: Pointer to where the read timestamp is to be written to + * + * CONSUMED and RETIRED type timestamps are sorted by id and are constantly + * updated by the GPU through shared memstore memory. QUEUED type timestamps + * are read directly from context struct. + + * The function returns 0 on success and timestamp value at the *timestamp + * address and returns -EINVAL on any read error/invalid type and timestamp = 0. + */ +static int adreno_readtimestamp(struct kgsl_device *device, + void *priv, enum kgsl_timestamp_type type, + unsigned int *timestamp) +{ + int status = 0; + struct kgsl_context *context = priv; + + if (type == KGSL_TIMESTAMP_QUEUED) { + struct adreno_context *ctxt = ADRENO_CONTEXT(context); + + *timestamp = ctxt->timestamp; + } else + status = __adreno_readtimestamp(ADRENO_DEVICE(device), + context->id, type, timestamp); + + return status; +} + +/** + * adreno_device_private_create(): Allocate an adreno_device_private structure + */ +static struct kgsl_device_private *adreno_device_private_create(void) +{ + struct adreno_device_private *adreno_priv = + kzalloc(sizeof(*adreno_priv), GFP_KERNEL); + + if (adreno_priv) { + INIT_LIST_HEAD(&adreno_priv->perfcounter_list); + return &adreno_priv->dev_priv; + } + return NULL; +} + +/** + * adreno_device_private_destroy(): Destroy an adreno_device_private structure + * and release the perfcounters held by the kgsl fd. + * @dev_priv: The kgsl device private structure + */ +static void adreno_device_private_destroy(struct kgsl_device_private *dev_priv) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_device_private *adreno_priv = + container_of(dev_priv, struct adreno_device_private, + dev_priv); + struct adreno_perfcounter_list_node *p, *tmp; + + mutex_lock(&device->mutex); + list_for_each_entry_safe(p, tmp, &adreno_priv->perfcounter_list, node) { + adreno_perfcounter_put(adreno_dev, p->groupid, + p->countable, PERFCOUNTER_FLAG_NONE); + list_del(&p->node); + kfree(p); + } + mutex_unlock(&device->mutex); + + kfree(adreno_priv); +} + +/** + * adreno_power_stats() - Reads the counters needed for freq decisions + * @device: Pointer to device whose counters are read + * @stats: Pointer to stats set that needs updating + * Power: The caller is expected to be in a clock enabled state as this + * function does reg reads + */ +static void adreno_power_stats(struct kgsl_device *device, + struct kgsl_power_stats *stats) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + return gpudev->power_stats(adreno_dev, stats); +} + +static int adreno_regulator_enable(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (gpudev->regulator_enable) + return gpudev->regulator_enable(adreno_dev); + + return 0; +} + +static bool adreno_is_hw_collapsible(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (!gpudev->is_hw_collapsible(adreno_dev)) + return false; + + if (gpudev->clear_pending_transactions(adreno_dev)) + return false; + + adreno_dispatcher_stop_fault_timer(device); + + return true; +} + +static void adreno_regulator_disable(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (gpudev->regulator_disable) + gpudev->regulator_disable(adreno_dev); +} + +static void adreno_pwrlevel_change_settings(struct kgsl_device *device, + unsigned int prelevel, unsigned int postlevel, bool post) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (gpudev->pwrlevel_change_settings) + gpudev->pwrlevel_change_settings(adreno_dev, prelevel, + postlevel, post); +} + +static void adreno_clk_set_options(struct kgsl_device *device, const char *name, + struct clk *clk, bool on) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (gpudev->clk_set_options) + gpudev->clk_set_options(adreno_dev, name, clk, on); +} + +static bool adreno_is_hwcg_on(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + return adreno_dev->hwcg_enabled; +} + +static int adreno_queue_cmds(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_drawobj *drawobj[], + u32 count, u32 *timestamp) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + if (WARN_ON(!adreno_dev->dispatch_ops || !adreno_dev->dispatch_ops->queue_cmds)) + return -ENODEV; + + return adreno_dev->dispatch_ops->queue_cmds(dev_priv, context, drawobj, + count, timestamp); +} + +static void adreno_drawctxt_sched(struct kgsl_device *device, + struct kgsl_context *context) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + if (WARN_ON(!adreno_dev->dispatch_ops || !adreno_dev->dispatch_ops->queue_context)) + return; + + adreno_dev->dispatch_ops->queue_context(adreno_dev, + ADRENO_CONTEXT(context)); +} + +int adreno_power_cycle(struct adreno_device *adreno_dev, + void (*callback)(struct adreno_device *adreno_dev, void *priv), + void *priv) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + int ret; + + mutex_lock(&device->mutex); + ret = ops->pm_suspend(adreno_dev); + + if (!ret) { + callback(adreno_dev, priv); + ops->pm_resume(adreno_dev); + } + + mutex_unlock(&device->mutex); + + return ret; +} + +struct cycle_data { + void *ptr; + void *val; +}; + +static void cycle_set_bool(struct adreno_device *adreno_dev, void *priv) +{ + struct cycle_data *data = priv; + + *((bool *) data->ptr) = *((bool *) data->val); +} + +int adreno_power_cycle_bool(struct adreno_device *adreno_dev, + bool *flag, bool val) +{ + struct cycle_data data = { .ptr = flag, .val = &val }; + + return adreno_power_cycle(adreno_dev, cycle_set_bool, &data); +} + +static void cycle_set_u32(struct adreno_device *adreno_dev, void *priv) +{ + struct cycle_data *data = priv; + + *((u32 *) data->ptr) = *((u32 *) data->val); +} + +int adreno_power_cycle_u32(struct adreno_device *adreno_dev, + u32 *flag, u32 val) +{ + struct cycle_data data = { .ptr = flag, .val = &val }; + + return adreno_power_cycle(adreno_dev, cycle_set_u32, &data); +} + +static int adreno_gpu_clock_set(struct kgsl_device *device, u32 pwrlevel) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrlevel *pl = &pwr->pwrlevels[pwrlevel]; + int ret; + + if (ops->gpu_clock_set) + return ops->gpu_clock_set(adreno_dev, pwrlevel); + + ret = clk_set_rate(pwr->grp_clks[0], pl->gpu_freq); + if (ret) + dev_err(device->dev, "GPU clk freq set failure: %d\n", ret); + + return ret; +} + +static int adreno_interconnect_bus_set(struct adreno_device *adreno_dev, + int level, u32 ab) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + if ((level == pwr->cur_buslevel) && (ab == pwr->cur_ab)) + return 0; + + pwr->cur_buslevel = level; + pwr->cur_ab = ab; + + icc_set_bw(pwr->icc_path, MBps_to_icc(ab), + kBps_to_icc(pwr->ddr_table[level])); + + trace_kgsl_buslevel(device, pwr->active_pwrlevel, level); + + return 0; +} + +static int adreno_gpu_bus_set(struct kgsl_device *device, int level, u32 ab) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + + if (ops->gpu_bus_set) + return ops->gpu_bus_set(adreno_dev, level, ab); + + return adreno_interconnect_bus_set(adreno_dev, level, ab); +} + +static void adreno_deassert_gbif_halt(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (gpudev->deassert_gbif_halt) + gpudev->deassert_gbif_halt(adreno_dev); +} + +static const struct kgsl_functable adreno_functable = { + /* Mandatory functions */ + .suspend_context = adreno_suspend_context, + .first_open = adreno_first_open, + .start = adreno_start, + .stop = adreno_stop, + .last_close = adreno_last_close, + .getproperty = adreno_getproperty, + .getproperty_compat = adreno_getproperty_compat, + .waittimestamp = adreno_waittimestamp, + .readtimestamp = adreno_readtimestamp, + .queue_cmds = adreno_queue_cmds, + .ioctl = adreno_ioctl, + .compat_ioctl = adreno_compat_ioctl, + .power_stats = adreno_power_stats, + .snapshot = adreno_snapshot, + .drain_and_idle = adreno_drain_and_idle, + .device_private_create = adreno_device_private_create, + .device_private_destroy = adreno_device_private_destroy, + /* Optional functions */ + .drawctxt_create = adreno_drawctxt_create, + .drawctxt_detach = adreno_drawctxt_detach, + .drawctxt_destroy = adreno_drawctxt_destroy, + .drawctxt_dump = adreno_drawctxt_dump, + .setproperty = adreno_setproperty, + .setproperty_compat = adreno_setproperty_compat, + .drawctxt_sched = adreno_drawctxt_sched, + .resume = adreno_dispatcher_start, + .regulator_enable = adreno_regulator_enable, + .is_hw_collapsible = adreno_is_hw_collapsible, + .regulator_disable = adreno_regulator_disable, + .pwrlevel_change_settings = adreno_pwrlevel_change_settings, + .clk_set_options = adreno_clk_set_options, + .query_property_list = adreno_query_property_list, + .is_hwcg_on = adreno_is_hwcg_on, + .gpu_clock_set = adreno_gpu_clock_set, + .gpu_bus_set = adreno_gpu_bus_set, + .deassert_gbif_halt = adreno_deassert_gbif_halt, +}; + +static const struct component_master_ops adreno_ops = { + .bind = adreno_bind, + .unbind = adreno_unbind, +}; + +const struct adreno_power_ops adreno_power_operations = { + .first_open = adreno_open, + .last_close = adreno_close, + .active_count_get = adreno_pwrctrl_active_count_get, + .active_count_put = adreno_pwrctrl_active_count_put, + .pm_suspend = adreno_suspend, + .pm_resume = adreno_resume, + .touch_wakeup = adreno_touch_wakeup, +}; + +static int _compare_of(struct device *dev, void *data) +{ + return (dev->of_node == data); +} + +static void _release_of(struct device *dev, void *data) +{ + of_node_put(data); +} + +static void adreno_add_gmu_components(struct device *dev, + struct component_match **match) +{ + struct device_node *node; + + node = of_find_matching_node(NULL, adreno_gmu_match); + if (!node) + return; + + if (!of_device_is_available(node)) { + of_node_put(node); + return; + } + + component_match_add_release(dev, match, _release_of, + _compare_of, node); +} + +static int adreno_probe(struct platform_device *pdev) +{ + struct component_match *match = NULL; + + adreno_add_gmu_components(&pdev->dev, &match); + + if (match) + return component_master_add_with_match(&pdev->dev, + &adreno_ops, match); + else + return adreno_bind(&pdev->dev); +} + +static int adreno_remove(struct platform_device *pdev) +{ + if (of_find_matching_node(NULL, adreno_gmu_match)) + component_master_del(&pdev->dev, &adreno_ops); + else + adreno_unbind(&pdev->dev); + + return 0; +} + +static const struct dev_pm_ops adreno_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(adreno_pm_suspend, adreno_pm_resume) +}; + +static struct platform_driver adreno_platform_driver = { + .probe = adreno_probe, + .remove = adreno_remove, + .driver = { + .name = "kgsl-3d", + .pm = &adreno_pm_ops, + .of_match_table = of_match_ptr(adreno_match_table), + } +}; + +static int __init kgsl_3d_init(void) +{ + int ret; + + ret = kgsl_core_init(); + if (ret) + return ret; + + gmu_core_register(); + ret = platform_driver_register(&adreno_platform_driver); + if (ret) + kgsl_core_exit(); + + return ret; +} + +static void __exit kgsl_3d_exit(void) +{ + platform_driver_unregister(&adreno_platform_driver); + gmu_core_unregister(); + kgsl_core_exit(); +} + +module_init(kgsl_3d_init); +module_exit(kgsl_3d_exit); + +MODULE_DESCRIPTION("3D Graphics driver"); +MODULE_LICENSE("GPL v2"); +MODULE_SOFTDEP("pre: qcom-arm-smmu-mod nvmem_qfprom"); diff --git a/adreno.h b/adreno.h new file mode 100644 index 0000000000..0ed5a5f282 --- /dev/null +++ b/adreno.h @@ -0,0 +1,1781 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_H +#define __ADRENO_H + +#include +#include +#include "adreno_coresight.h" +#include "adreno_dispatch.h" +#include "adreno_drawctxt.h" +#include "adreno_hwsched.h" +#include "adreno_perfcounter.h" +#include "adreno_profile.h" +#include "adreno_ringbuffer.h" +#include "kgsl_sharedmem.h" + +/* ADRENO_DEVICE - Given a kgsl_device return the adreno device struct */ +#define ADRENO_DEVICE(device) \ + container_of(device, struct adreno_device, dev) + +/* KGSL_DEVICE - given an adreno_device, return the KGSL device struct */ +#define KGSL_DEVICE(_dev) (&((_dev)->dev)) + +/* ADRENO_CONTEXT - Given a context return the adreno context struct */ +#define ADRENO_CONTEXT(context) \ + container_of(context, struct adreno_context, base) + +/* ADRENO_GPU_DEVICE - Given an adreno device return the GPU specific struct */ +#define ADRENO_GPU_DEVICE(_a) ((_a)->gpucore->gpudev) + +/* + * ADRENO_POWER_OPS - Given an adreno device return the GPU specific power + * ops + */ +#define ADRENO_POWER_OPS(_a) ((_a)->gpucore->gpudev->power_ops) + +#define ADRENO_CHIPID_CORE(_id) FIELD_GET(GENMASK(31, 24), _id) +#define ADRENO_CHIPID_MAJOR(_id) FIELD_GET(GENMASK(23, 16), _id) +#define ADRENO_CHIPID_MINOR(_id) FIELD_GET(GENMASK(15, 8), _id) +#define ADRENO_CHIPID_PATCH(_id) FIELD_GET(GENMASK(7, 0), _id) + +#define ADRENO_GMU_CHIPID(_id) \ + (FIELD_PREP(GENMASK(31, 24), ADRENO_CHIPID_CORE(_id)) | \ + FIELD_PREP(GENMASK(23, 16), ADRENO_CHIPID_MAJOR(_id)) | \ + FIELD_PREP(GENMASK(15, 12), ADRENO_CHIPID_MINOR(_id)) | \ + FIELD_PREP(GENMASK(11, 8), ADRENO_CHIPID_PATCH(_id))) + +/* ADRENO_GPUREV - Return the GPU ID for the given adreno_device */ +#define ADRENO_GPUREV(_a) ((_a)->gpucore->gpurev) + +/* + * ADRENO_FEATURE - return true if the specified feature is supported by the GPU + * core + */ +#define ADRENO_FEATURE(_dev, _bit) \ + ((_dev)->gpucore->features & (_bit)) + +/** + * ADRENO_QUIRK - return true if the specified quirk is required by the GPU + */ +#define ADRENO_QUIRK(_dev, _bit) \ + ((_dev)->quirks & (_bit)) + +#define ADRENO_FW(a, f) (&(a->fw[f])) + +/* Adreno core features */ +/* The core supports SP/TP hw controlled power collapse */ +#define ADRENO_SPTP_PC BIT(0) +/* The GPU supports content protection */ +#define ADRENO_CONTENT_PROTECTION BIT(1) +/* The GPU supports preemption */ +#define ADRENO_PREEMPTION BIT(2) +/* The GPMU supports Limits Management */ +#define ADRENO_LM BIT(3) +/* The GPU supports retention for cpz registers */ +#define ADRENO_CPZ_RETENTION BIT(4) +/* The core has soft fault detection available */ +#define ADRENO_SOFT_FAULT_DETECT BIT(5) +/* The GMU supports IFPC power management*/ +#define ADRENO_IFPC BIT(6) +/* The core supports IO-coherent memory */ +#define ADRENO_IOCOHERENT BIT(7) +/* + * The GMU supports Adaptive Clock Distribution (ACD) + * for droop mitigation + */ +#define ADRENO_ACD BIT(8) +/* Cooperative reset enabled GMU */ +#define ADRENO_COOP_RESET BIT(9) +/* Indicates that the specific target is no longer supported */ +#define ADRENO_DEPRECATED BIT(10) +/* The target supports ringbuffer level APRIV */ +#define ADRENO_APRIV BIT(11) +/* The GMU supports Battery Current Limiting */ +#define ADRENO_BCL BIT(12) +/* L3 voting is supported with L3 constraints */ +#define ADRENO_L3_VOTE BIT(13) + +/* + * Adreno GPU quirks - control bits for various workarounds + */ + +/* Set TWOPASSUSEWFI in PC_DBG_ECO_CNTL (5XX/6XX) */ +#define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0) +/* Submit critical packets at GPU wake up */ +#define ADRENO_QUIRK_CRITICAL_PACKETS BIT(1) +/* Mask out RB1-3 activity signals from HW hang detection logic */ +#define ADRENO_QUIRK_FAULT_DETECT_MASK BIT(2) +/* Disable RB sampler datapath clock gating optimization */ +#define ADRENO_QUIRK_DISABLE_RB_DP2CLOCKGATING BIT(3) +/* Disable local memory(LM) feature to avoid corner case error */ +#define ADRENO_QUIRK_DISABLE_LMLOADKILL BIT(4) +/* Allow HFI to use registers to send message to GMU */ +#define ADRENO_QUIRK_HFI_USE_REG BIT(5) +/* Only set protected SECVID registers once */ +#define ADRENO_QUIRK_SECVID_SET_ONCE BIT(6) +/* + * Limit number of read and write transactions from + * UCHE block to GBIF to avoid possible deadlock + * between GBIF, SMMU and MEMNOC. + */ +#define ADRENO_QUIRK_LIMIT_UCHE_GBIF_RW BIT(8) +/* Do explicit mode control of cx gdsc */ +#define ADRENO_QUIRK_CX_GDSC BIT(9) + +/* Command identifiers */ +#define CONTEXT_TO_MEM_IDENTIFIER 0x2EADBEEF +#define CMD_IDENTIFIER 0x2EEDFACE +#define CMD_INTERNAL_IDENTIFIER 0x2EEDD00D +#define START_IB_IDENTIFIER 0x2EADEABE +#define END_IB_IDENTIFIER 0x2ABEDEAD +#define START_PROFILE_IDENTIFIER 0x2DEFADE1 +#define END_PROFILE_IDENTIFIER 0x2DEFADE2 +#define PWRON_FIXUP_IDENTIFIER 0x2AFAFAFA + +/* One cannot wait forever for the core to idle, so set an upper limit to the + * amount of time to wait for the core to go idle + */ +#define ADRENO_IDLE_TIMEOUT (20 * 1000) + +#define ADRENO_FW_PFP 0 +#define ADRENO_FW_SQE 0 +#define ADRENO_FW_PM4 1 + +enum adreno_gpurev { + ADRENO_REV_UNKNOWN = 0, + ADRENO_REV_A304 = 304, + ADRENO_REV_A305 = 305, + ADRENO_REV_A305C = 306, + ADRENO_REV_A306 = 307, + ADRENO_REV_A306A = 308, + ADRENO_REV_A310 = 310, + ADRENO_REV_A320 = 320, + ADRENO_REV_A330 = 330, + ADRENO_REV_A305B = 335, + ADRENO_REV_A405 = 405, + ADRENO_REV_A418 = 418, + ADRENO_REV_A420 = 420, + ADRENO_REV_A430 = 430, + ADRENO_REV_A505 = 505, + ADRENO_REV_A506 = 506, + ADRENO_REV_A508 = 508, + ADRENO_REV_A510 = 510, + ADRENO_REV_A512 = 512, + ADRENO_REV_A530 = 530, + ADRENO_REV_A540 = 540, + ADRENO_REV_A610 = 610, + ADRENO_REV_A612 = 612, + ADRENO_REV_A615 = 615, + ADRENO_REV_A616 = 616, + ADRENO_REV_A618 = 618, + ADRENO_REV_A619 = 619, + ADRENO_REV_A620 = 620, + ADRENO_REV_A630 = 630, + ADRENO_REV_A635 = 635, + ADRENO_REV_A640 = 640, + ADRENO_REV_A650 = 650, + ADRENO_REV_A660 = 660, + ADRENO_REV_A680 = 680, + /* + * Gen7 and higher version numbers may exceed 1 digit + * Bits 16-23: Major + * Bits 8-15: Minor + * Bits 0-7: Patch id + */ + ADRENO_REV_GEN7_0_0 = 0x070000, + ADRENO_REV_GEN7_0_1 = 0x070001, +}; + +#define ADRENO_SOFT_FAULT BIT(0) +#define ADRENO_HARD_FAULT BIT(1) +#define ADRENO_TIMEOUT_FAULT BIT(2) +#define ADRENO_IOMMU_PAGE_FAULT BIT(3) +#define ADRENO_PREEMPT_FAULT BIT(4) +#define ADRENO_GMU_FAULT BIT(5) +#define ADRENO_CTX_DETATCH_TIMEOUT_FAULT BIT(6) +#define ADRENO_GMU_FAULT_SKIP_SNAPSHOT BIT(7) + +/* number of throttle counters for DCVS adjustment */ +#define ADRENO_GPMU_THROTTLE_COUNTERS 4 + +struct adreno_gpudev; + +/* Time to allow preemption to complete (in ms) */ +#define ADRENO_PREEMPT_TIMEOUT 10000 + +/** + * enum adreno_preempt_states + * ADRENO_PREEMPT_NONE: No preemption is scheduled + * ADRENO_PREEMPT_START: The S/W has started + * ADRENO_PREEMPT_TRIGGERED: A preeempt has been triggered in the HW + * ADRENO_PREEMPT_FAULTED: The preempt timer has fired + * ADRENO_PREEMPT_PENDING: The H/W has signaled preemption complete + * ADRENO_PREEMPT_COMPLETE: Preemption could not be finished in the IRQ handler, + * worker has been scheduled + */ +enum adreno_preempt_states { + ADRENO_PREEMPT_NONE = 0, + ADRENO_PREEMPT_START, + ADRENO_PREEMPT_TRIGGERED, + ADRENO_PREEMPT_FAULTED, + ADRENO_PREEMPT_PENDING, + ADRENO_PREEMPT_COMPLETE, +}; + +/** + * struct adreno_protected_regs - container for a protect register span + */ +struct adreno_protected_regs { + /** @reg: Physical protected mode register to write to */ + u32 reg; + /** @start: Dword offset of the starting register in the range */ + u32 start; + /** + * @end: Dword offset of the ending register in the range + * (inclusive) + */ + u32 end; + /** + * @noaccess: 1 if the register should not be accessible from + * userspace, 0 if it can be read (but not written) + */ + u32 noaccess; +}; + +/** + * struct adreno_preemption + * @state: The current state of preemption + * @scratch: Per-target scratch memory for implementation specific functionality + * @timer: A timer to make sure preemption doesn't stall + * @work: A work struct for the preemption worker (for 5XX) + * preempt_level: The level of preemption (for 6XX) + * skipsaverestore: To skip saverestore during L1 preemption (for 6XX) + * usesgmem: enable GMEM save/restore across preemption (for 6XX) + * count: Track the number of preemptions triggered + */ +struct adreno_preemption { + atomic_t state; + struct kgsl_memdesc *scratch; + struct timer_list timer; + struct work_struct work; + unsigned int preempt_level; + bool skipsaverestore; + bool usesgmem; + unsigned int count; +}; + +struct adreno_busy_data { + unsigned int gpu_busy; + unsigned int bif_ram_cycles; + unsigned int bif_ram_cycles_read_ch1; + unsigned int bif_ram_cycles_write_ch0; + unsigned int bif_ram_cycles_write_ch1; + unsigned int bif_starved_ram; + unsigned int bif_starved_ram_ch1; + unsigned int num_ifpc; + unsigned int throttle_cycles[ADRENO_GPMU_THROTTLE_COUNTERS]; +}; + +/** + * struct adreno_firmware - Struct holding fw details + * @fwvirt: Buffer which holds the ucode + * @size: Size of ucode buffer + * @version: Version of ucode + * @memdesc: Memory descriptor which holds ucode buffer info + */ +struct adreno_firmware { + unsigned int *fwvirt; + size_t size; + unsigned int version; + struct kgsl_memdesc *memdesc; +}; + +/** + * struct adreno_perfcounter_list_node - struct to store perfcounters + * allocated by a process on a kgsl fd. + * @groupid: groupid of the allocated perfcounter + * @countable: countable assigned to the allocated perfcounter + * @node: list node for perfcounter_list of a process + */ +struct adreno_perfcounter_list_node { + unsigned int groupid; + unsigned int countable; + struct list_head node; +}; + +/** + * struct adreno_device_private - Adreno private structure per fd + * @dev_priv: the kgsl device private structure + * @perfcounter_list: list of perfcounters used by the process + */ +struct adreno_device_private { + struct kgsl_device_private dev_priv; + struct list_head perfcounter_list; +}; + +/** + * struct adreno_reglist_list - A container for list of registers and + * number of registers in the list + */ +struct adreno_reglist_list { + /** @reg: List of register **/ + const u32 *regs; + /** @count: Number of registers in the list **/ + u32 count; +}; + +/** + * struct adreno_power_ops - Container for target specific power up/down + * sequences + */ +struct adreno_power_ops { + /** + * @first_open: Target specific function triggered when first kgsl + * instance is opened + */ + int (*first_open)(struct adreno_device *adreno_dev); + /** + * @last_close: Target specific function triggered when last kgsl + * instance is closed + */ + int (*last_close)(struct adreno_device *adreno_dev); + /** + * @active_count_get: Target specific function to keep gpu from power + * collapsing + */ + int (*active_count_get)(struct adreno_device *adreno_dev); + /** + * @active_count_put: Target specific function to allow gpu to power + * collapse + */ + void (*active_count_put)(struct adreno_device *adreno_dev); + /** @pm_suspend: Target specific function to suspend the driver */ + int (*pm_suspend)(struct adreno_device *adreno_dev); + /** @pm_resume: Target specific function to resume the driver */ + void (*pm_resume)(struct adreno_device *adreno_dev); + /** + * @touch_wakeup: Target specific function to start gpu on touch event + */ + void (*touch_wakeup)(struct adreno_device *adreno_dev); + /** @gpu_clock_set: Target specific function to set gpu frequency */ + int (*gpu_clock_set)(struct adreno_device *adreno_dev, u32 pwrlevel); + /** @gpu_bus_set: Target specific function to set gpu bandwidth */ + int (*gpu_bus_set)(struct adreno_device *adreno_dev, int bus_level, + u32 ab); +}; + +/** + * struct adreno_gpu_core - A specific GPU core definition + * @gpurev: Unique GPU revision identifier + * @core: Match for the core version of the GPU + * @major: Match for the major version of the GPU + * @minor: Match for the minor version of the GPU + * @patchid: Match for the patch revision of the GPU + * @features: Common adreno features supported by this core + * @gpudev: Pointer to the GPU family specific functions for this core + * @gmem_base: Base address of binning memory (GMEM/OCMEM) + * @gmem_size: Amount of binning memory (GMEM/OCMEM) to reserve for the core + * @bus_width: Bytes transferred in 1 cycle + */ +struct adreno_gpu_core { + enum adreno_gpurev gpurev; + /** @chipid: Unique GPU chipid for external identification */ + u32 chipid; + unsigned int core, major, minor, patchid; + /** + * @compatible: If specified, use the compatible string to match the + * device + */ + const char *compatible; + unsigned long features; + const struct adreno_gpudev *gpudev; + const struct adreno_perfcounters *perfcounters; + unsigned long gmem_base; + size_t gmem_size; + u32 bus_width; + /** @snapshot_size: Size of the static snapshot region in bytes */ + u32 snapshot_size; +}; + +/** + * struct adreno_dispatch_ops - Common functions for dispatcher operations + */ +struct adreno_dispatch_ops { + /* @close: Shut down the dispatcher */ + void (*close)(struct adreno_device *adreno_dev); + /* @queue_cmds: Queue a command on the context */ + int (*queue_cmds)(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_drawobj *drawobj[], + u32 count, u32 *timestamp); + /* @queue_context: Queue a context to be dispatched */ + void (*queue_context)(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt); + void (*setup_context)(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt); + void (*fault)(struct adreno_device *adreno_dev, u32 fault); + /* @idle: Wait for dipatcher to become idle */ + int (*idle)(struct adreno_device *adreno_dev); +}; + +/** + * struct adreno_device - The mothership structure for all adreno related info + * @dev: Reference to struct kgsl_device + * @priv: Holds the private flags specific to the adreno_device + * @chipid: Chip ID specific to the GPU + * @cx_misc_len: Length of the CX MISC register block + * @cx_misc_virt: Pointer where the CX MISC block is mapped + * @isense_base: Base physical address of isense block + * @isense_len: Length of the isense register block + * @isense_virt: Pointer where isense block is mapped + * @gpucore: Pointer to the adreno_gpu_core structure + * @pfp_fw: Buffer which holds the pfp ucode + * @pfp_fw_size: Size of pfp ucode buffer + * @pfp_fw_version: Version of pfp ucode + * @pfp: Memory descriptor which holds pfp ucode buffer info + * @pm4_fw: Buffer which holds the pm4 ucode + * @pm4_fw_size: Size of pm4 ucode buffer + * @pm4_fw_version: Version of pm4 ucode + * @pm4: Memory descriptor which holds pm4 ucode buffer info + * @gpmu_cmds_size: Length of gpmu cmd stream + * @gpmu_cmds: gpmu cmd stream + * @ringbuffers: Array of pointers to adreno_ringbuffers + * @num_ringbuffers: Number of ringbuffers for the GPU + * @cur_rb: Pointer to the current ringbuffer + * @next_rb: Ringbuffer we are switching to during preemption + * @prev_rb: Ringbuffer we are switching from during preemption + * @fast_hang_detect: Software fault detection availability + * @ft_policy: Defines the fault tolerance policy + * @long_ib_detect: Long IB detection availability + * @cooperative_reset: Indicates if graceful death handshake is enabled + * between GMU and GPU + * @profile: Container for adreno profiler information + * @dispatcher: Container for adreno GPU dispatcher + * @pwron_fixup: Command buffer to run a post-power collapse shader workaround + * @pwron_fixup_dwords: Number of dwords in the command buffer + * @input_work: Work struct for turning on the GPU after a touch event + * @busy_data: Struct holding GPU VBIF busy stats + * @ram_cycles_lo: Number of DDR clock cycles for the monitor session (Only + * DDR channel 0 read cycles in case of GBIF) + * @ram_cycles_lo_ch1_read: Number of DDR channel 1 Read clock cycles for + * the monitor session + * @ram_cycles_lo_ch0_write: Number of DDR channel 0 Write clock cycles for + * the monitor session + * @ram_cycles_lo_ch1_write: Number of DDR channel 0 Write clock cycles for + * the monitor session + * @starved_ram_lo: Number of cycles VBIF/GBIF is stalled by DDR (Only channel 0 + * stall cycles in case of GBIF) + * @starved_ram_lo_ch1: Number of cycles GBIF is stalled by DDR channel 1 + * @halt: Atomic variable to check whether the GPU is currently halted + * @pending_irq_refcnt: Atomic variable to keep track of running IRQ handlers + * @ctx_d_debugfs: Context debugfs node + * @profile_buffer: Memdesc holding the drawobj profiling buffer + * @profile_index: Index to store the start/stop ticks in the profiling + * buffer + * @pwrup_reglist: Memdesc holding the power up register list + * which is used by CP during preemption and IFPC + * @lm_sequence: Pointer to the start of the register write sequence for LM + * @lm_size: The dword size of the LM sequence + * @lm_limit: limiting value for LM + * @lm_threshold_count: register value for counter for lm threshold breakin + * @lm_threshold_cross: number of current peaks exceeding threshold + * @ifpc_count: Number of times the GPU went into IFPC + * @highest_bank_bit: Value of the highest bank bit + * @csdev: Pointer to a coresight device (if applicable) + * @gpmu_throttle_counters - counteers for number of throttled clocks + * @irq_storm_work: Worker to handle possible interrupt storms + * @active_list: List to track active contexts + * @active_list_lock: Lock to protect active_list + * @gpu_llc_slice: GPU system cache slice descriptor + * @gpu_llc_slice_enable: To enable the GPU system cache slice or not + * @gpuhtw_llc_slice: GPU pagetables system cache slice descriptor + * @gpuhtw_llc_slice_enable: To enable the GPUHTW system cache slice or not + * @zap_loaded: Used to track if zap was successfully loaded or not + */ +struct adreno_device { + struct kgsl_device dev; /* Must be first field in this struct */ + unsigned long priv; + unsigned int chipid; + unsigned long cx_dbgc_base; + unsigned int cx_dbgc_len; + void __iomem *cx_dbgc_virt; + unsigned int cx_misc_len; + void __iomem *cx_misc_virt; + unsigned long isense_base; + unsigned int isense_len; + void __iomem *isense_virt; + const struct adreno_gpu_core *gpucore; + struct adreno_firmware fw[2]; + size_t gpmu_cmds_size; + unsigned int *gpmu_cmds; + struct adreno_ringbuffer ringbuffers[KGSL_PRIORITY_MAX_RB_LEVELS]; + int num_ringbuffers; + struct adreno_ringbuffer *cur_rb; + struct adreno_ringbuffer *next_rb; + struct adreno_ringbuffer *prev_rb; + unsigned int fast_hang_detect; + unsigned long ft_policy; + bool long_ib_detect; + bool cooperative_reset; + struct adreno_profile profile; + struct adreno_dispatcher dispatcher; + struct kgsl_memdesc *pwron_fixup; + unsigned int pwron_fixup_dwords; + struct work_struct input_work; + struct adreno_busy_data busy_data; + unsigned int ram_cycles_lo; + unsigned int ram_cycles_lo_ch1_read; + unsigned int ram_cycles_lo_ch0_write; + unsigned int ram_cycles_lo_ch1_write; + unsigned int starved_ram_lo; + unsigned int starved_ram_lo_ch1; + atomic_t halt; + atomic_t pending_irq_refcnt; + struct dentry *ctx_d_debugfs; + /** @lm_enabled: True if limits management is enabled for this target */ + bool lm_enabled; + /** @acd_enabled: True if acd is enabled for this target */ + bool acd_enabled; + /** @hwcg_enabled: True if hardware clock gating is enabled */ + bool hwcg_enabled; + /** @throttling_enabled: True if LM throttling is enabled on a5xx */ + bool throttling_enabled; + /** @sptp_pc_enabled: True if SPTP power collapse is enabled on a5xx */ + bool sptp_pc_enabled; + /** @bcl_enabled: True if BCL is enabled */ + bool bcl_enabled; + struct kgsl_memdesc *profile_buffer; + unsigned int profile_index; + struct kgsl_memdesc *pwrup_reglist; + uint32_t *lm_sequence; + uint32_t lm_size; + struct adreno_preemption preempt; + struct work_struct gpmu_work; + uint32_t lm_leakage; + uint32_t lm_limit; + uint32_t lm_threshold_count; + uint32_t lm_threshold_cross; + uint32_t ifpc_count; + + unsigned int highest_bank_bit; + unsigned int quirks; + + struct coresight_device *csdev[2]; + uint32_t gpmu_throttle_counters[ADRENO_GPMU_THROTTLE_COUNTERS]; + struct work_struct irq_storm_work; + + struct list_head active_list; + spinlock_t active_list_lock; + + void *gpu_llc_slice; + bool gpu_llc_slice_enable; + void *gpuhtw_llc_slice; + bool gpuhtw_llc_slice_enable; + unsigned int zap_loaded; + /** + * @critpkts: Memory descriptor for 5xx critical packets if applicable + */ + struct kgsl_memdesc *critpkts; + /** + * @critpkts: Memory descriptor for 5xx secure critical packets + */ + struct kgsl_memdesc *critpkts_secure; + /** @irq_mask: The current interrupt mask for the GPU device */ + u32 irq_mask; + /* + * @soft_ft_regs: an array of registers for soft fault detection on a3xx + * targets + */ + u32 *soft_ft_regs; + /* + * @soft_ft_vals: an array of register values for soft fault detection + * on a3xx targets + */ + u32 *soft_ft_vals; + /* + * @soft_ft_vals: number of elements in @soft_ft_regs and @soft_ft_vals + */ + int soft_ft_count; + /** @wake_on_touch: If true our last wakeup was due to a touch event */ + bool wake_on_touch; + /* @dispatch_ops: A pointer to a set of adreno dispatch ops */ + const struct adreno_dispatch_ops *dispatch_ops; + /** @hwsched: Container for the hardware dispatcher */ + struct adreno_hwsched hwsched; +}; + +/** + * enum adreno_device_flags - Private flags for the adreno_device + * @ADRENO_DEVICE_PWRON - Set during init after a power collapse + * @ADRENO_DEVICE_PWRON_FIXUP - Set if the target requires the shader fixup + * after power collapse + * @ADRENO_DEVICE_CORESIGHT - Set if the coresight (trace bus) registers should + * be restored after power collapse + * @ADRENO_DEVICE_STARTED - Set if the device start sequence is in progress + * @ADRENO_DEVICE_FAULT - Set if the device is currently in fault (and shouldn't + * send any more commands to the ringbuffer) + * @ADRENO_DEVICE_DRAWOBJ_PROFILE - Set if the device supports drawobj + * profiling via the ALWAYSON counter + * @ADRENO_DEVICE_PREEMPTION - Turn on/off preemption + * @ADRENO_DEVICE_SOFT_FAULT_DETECT - Set if soft fault detect is enabled + * @ADRENO_DEVICE_GPMU_INITIALIZED - Set if GPMU firmware initialization succeed + * @ADRENO_DEVICE_ISDB_ENABLED - Set if the Integrated Shader DeBugger is + * attached and enabled + * @ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED - Set if a CACHE_FLUSH_TS irq storm + * is in progress + */ +enum adreno_device_flags { + ADRENO_DEVICE_PWRON = 0, + ADRENO_DEVICE_PWRON_FIXUP = 1, + ADRENO_DEVICE_INITIALIZED = 2, + ADRENO_DEVICE_CORESIGHT = 3, + ADRENO_DEVICE_STARTED = 5, + ADRENO_DEVICE_FAULT = 6, + ADRENO_DEVICE_DRAWOBJ_PROFILE = 7, + ADRENO_DEVICE_GPU_REGULATOR_ENABLED = 8, + ADRENO_DEVICE_PREEMPTION = 9, + ADRENO_DEVICE_SOFT_FAULT_DETECT = 10, + ADRENO_DEVICE_GPMU_INITIALIZED = 11, + ADRENO_DEVICE_ISDB_ENABLED = 12, + ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED = 13, + ADRENO_DEVICE_CORESIGHT_CX = 14, +}; + +/** + * struct adreno_drawobj_profile_entry - a single drawobj entry in the + * kernel profiling buffer + * @started: Number of GPU ticks at start of the drawobj + * @retired: Number of GPU ticks at the end of the drawobj + */ +struct adreno_drawobj_profile_entry { + uint64_t started; + uint64_t retired; +}; + +#define ADRENO_DRAWOBJ_PROFILE_OFFSET(_index, _member) \ + ((_index) * sizeof(struct adreno_drawobj_profile_entry) \ + + offsetof(struct adreno_drawobj_profile_entry, _member)) + + +/** + * adreno_regs: List of registers that are used in kgsl driver for all + * 3D devices. Each device type has different offset value for the same + * register, so an array of register offsets are declared for every device + * and are indexed by the enumeration values defined in this enum + */ +enum adreno_regs { + ADRENO_REG_CP_ME_RAM_DATA, + ADRENO_REG_CP_RB_BASE, + ADRENO_REG_CP_RB_BASE_HI, + ADRENO_REG_CP_RB_RPTR_ADDR_LO, + ADRENO_REG_CP_RB_RPTR_ADDR_HI, + ADRENO_REG_CP_RB_RPTR, + ADRENO_REG_CP_RB_WPTR, + ADRENO_REG_CP_ME_CNTL, + ADRENO_REG_CP_RB_CNTL, + ADRENO_REG_CP_IB1_BASE, + ADRENO_REG_CP_IB1_BASE_HI, + ADRENO_REG_CP_IB1_BUFSZ, + ADRENO_REG_CP_IB2_BASE, + ADRENO_REG_CP_IB2_BASE_HI, + ADRENO_REG_CP_IB2_BUFSZ, + ADRENO_REG_CP_TIMESTAMP, + ADRENO_REG_CP_SCRATCH_REG6, + ADRENO_REG_CP_SCRATCH_REG7, + ADRENO_REG_CP_PROTECT_STATUS, + ADRENO_REG_CP_PREEMPT, + ADRENO_REG_CP_PREEMPT_DEBUG, + ADRENO_REG_CP_PREEMPT_DISABLE, + ADRENO_REG_CP_PROTECT_REG_0, + ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI, + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO, + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI, + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO, + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI, + ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO, + ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI, + ADRENO_REG_CP_PREEMPT_LEVEL_STATUS, + ADRENO_REG_RBBM_STATUS, + ADRENO_REG_RBBM_STATUS3, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3, + ADRENO_REG_RBBM_PERFCTR_PWR_1_LO, + ADRENO_REG_RBBM_INT_0_MASK, + ADRENO_REG_RBBM_PM_OVERRIDE2, + ADRENO_REG_RBBM_SW_RESET_CMD, + ADRENO_REG_RBBM_CLOCK_CTL, + ADRENO_REG_PA_SC_AA_CONFIG, + ADRENO_REG_SQ_GPR_MANAGEMENT, + ADRENO_REG_SQ_INST_STORE_MANAGEMENT, + ADRENO_REG_TP0_CHICKEN, + ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO, + ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI, + ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK, + ADRENO_REG_GMU_AHB_FENCE_STATUS, + ADRENO_REG_GMU_GMU2HOST_INTR_MASK, + ADRENO_REG_GPMU_POWER_COUNTER_ENABLE, + ADRENO_REG_REGISTER_MAX, +}; + +#define ADRENO_REG_UNUSED 0xFFFFFFFF +#define ADRENO_REG_SKIP 0xFFFFFFFE +#define ADRENO_REG_DEFINE(_offset, _reg)[_offset] = _reg + +struct adreno_irq_funcs { + void (*func)(struct adreno_device *adreno_dev, int mask); +}; +#define ADRENO_IRQ_CALLBACK(_c) { .func = _c } + +/* + * struct adreno_debugbus_block - Holds info about debug buses of a chip + * @block_id: Bus identifier + * @dwords: Number of dwords of data that this block holds + */ +struct adreno_debugbus_block { + unsigned int block_id; + unsigned int dwords; +}; + +enum adreno_cp_marker_type { + IFPC_DISABLE, + IFPC_ENABLE, + IB1LIST_START, + IB1LIST_END, +}; + +struct adreno_gpudev { + /* + * These registers are in a different location on different devices, + * so define them in the structure and use them as variables. + */ + unsigned int *const reg_offsets; + + struct adreno_coresight *coresight[2]; + + /* GPU specific function hooks */ + int (*probe)(struct platform_device *pdev, u32 chipid, + const struct adreno_gpu_core *gpucore); + void (*snapshot)(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + irqreturn_t (*irq_handler)(struct adreno_device *adreno_dev); + int (*init)(struct adreno_device *adreno_dev); + void (*remove)(struct adreno_device *adreno_dev); + int (*rb_start)(struct adreno_device *adreno_dev); + int (*start)(struct adreno_device *adreno_dev); + int (*regulator_enable)(struct adreno_device *adreno_dev); + void (*regulator_disable)(struct adreno_device *adreno_dev); + void (*pwrlevel_change_settings)(struct adreno_device *adreno_dev, + unsigned int prelevel, unsigned int postlevel, + bool post); + void (*preemption_schedule)(struct adreno_device *adreno_dev); + int (*preemption_context_init)(struct kgsl_context *context); + void (*context_detach)(struct adreno_context *drawctxt); + void (*clk_set_options)(struct adreno_device *adreno_dev, + const char *name, struct clk *clk, bool on); + void (*pre_reset)(struct adreno_device *adreno_dev); + void (*gpu_keepalive)(struct adreno_device *adreno_dev, + bool state); + bool (*hw_isidle)(struct adreno_device *adreno_dev); + const char *(*iommu_fault_block)(struct kgsl_device *device, + unsigned int fsynr1); + int (*reset)(struct adreno_device *adreno_dev); + /** @read_alwayson: Return the current value of the alwayson counter */ + u64 (*read_alwayson)(struct adreno_device *adreno_dev); + /** + * @power_ops: Target specific function pointers to power up/down the + * gpu + */ + const struct adreno_power_ops *power_ops; + int (*clear_pending_transactions)(struct adreno_device *adreno_dev); + void (*deassert_gbif_halt)(struct adreno_device *adreno_dev); + int (*ringbuffer_submitcmd)(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time); + /** + * @is_hw_collapsible: Return true if the hardware can be collapsed. + * Only used by non GMU/RGMU targets + */ + bool (*is_hw_collapsible)(struct adreno_device *adreno_dev); + /** + * @power_stats - Return the perfcounter statistics for DCVS + */ + void (*power_stats)(struct adreno_device *adreno_dev, + struct kgsl_power_stats *stats); + int (*setproperty)(struct kgsl_device_private *priv, u32 type, + void __user *value, u32 sizebytes); + int (*add_to_va_minidump)(struct adreno_device *adreno_dev); +}; + +/** + * enum kgsl_ft_policy_bits - KGSL fault tolerance policy bits + * @KGSL_FT_OFF: Disable fault detection (not used) + * @KGSL_FT_REPLAY: Replay the faulting command + * @KGSL_FT_SKIPIB: Skip the faulting indirect buffer + * @KGSL_FT_SKIPFRAME: Skip the frame containing the faulting IB + * @KGSL_FT_DISABLE: Tells the dispatcher to disable FT for the command obj + * @KGSL_FT_TEMP_DISABLE: Disables FT for all commands + * @KGSL_FT_THROTTLE: Disable the context if it faults too often + * @KGSL_FT_SKIPCMD: Skip the command containing the faulting IB + */ +enum kgsl_ft_policy_bits { + KGSL_FT_OFF = 0, + KGSL_FT_REPLAY, + KGSL_FT_SKIPIB, + KGSL_FT_SKIPFRAME, + KGSL_FT_DISABLE, + KGSL_FT_TEMP_DISABLE, + KGSL_FT_THROTTLE, + KGSL_FT_SKIPCMD, + /* KGSL_FT_MAX_BITS is used to calculate the mask */ + KGSL_FT_MAX_BITS, + /* Internal bits - set during GFT */ + /* Skip the PM dump on replayed command obj's */ + KGSL_FT_SKIP_PMDUMP = 31, +}; + +#define KGSL_FT_POLICY_MASK GENMASK(KGSL_FT_MAX_BITS - 1, 0) + +#define FOR_EACH_RINGBUFFER(_dev, _rb, _i) \ + for ((_i) = 0, (_rb) = &((_dev)->ringbuffers[0]); \ + (_i) < (_dev)->num_ringbuffers; \ + (_i)++, (_rb)++) + +extern const struct adreno_power_ops adreno_power_operations; + +extern const struct adreno_gpudev adreno_a3xx_gpudev; +extern const struct adreno_gpudev adreno_a5xx_gpudev; +extern const struct adreno_gpudev adreno_a6xx_gpudev; +extern const struct adreno_gpudev adreno_a6xx_rgmu_gpudev; +extern const struct adreno_gpudev adreno_a619_holi_gpudev; + +extern int adreno_wake_nice; +extern unsigned int adreno_wake_timeout; + +int adreno_start(struct kgsl_device *device, int priority); +long adreno_ioctl(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg); + +long adreno_ioctl_helper(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg, + const struct kgsl_ioctl *cmds, int len); + +/* + * adreno_switch_to_unsecure_mode - Execute a zap shader + * @adreno_dev: An Adreno GPU handle + * @rb: The ringbuffer to execute on + * + * Execute the zap shader from the CP to take the GPU out of secure mode. + * Return: 0 on success or negative on failure + */ +int adreno_switch_to_unsecure_mode(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb); + +int adreno_spin_idle(struct adreno_device *device, unsigned int timeout); +int adreno_idle(struct kgsl_device *device); + +int adreno_set_constraint(struct kgsl_device *device, + struct kgsl_context *context, + struct kgsl_device_constraint *constraint); + +void adreno_snapshot(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + struct kgsl_context *context); + +int adreno_reset(struct kgsl_device *device, int fault); + +void adreno_fault_skipcmd_detached(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, + struct kgsl_drawobj *drawobj); + +void adreno_hang_int_callback(struct adreno_device *adreno_dev, int bit); +void adreno_cp_callback(struct adreno_device *adreno_dev, int bit); + +int adreno_sysfs_init(struct adreno_device *adreno_dev); + +void adreno_irqctrl(struct adreno_device *adreno_dev, int state); + +long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); + +long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); + +bool adreno_is_cx_dbgc_register(struct kgsl_device *device, + unsigned int offset); +void adreno_cx_dbgc_regread(struct kgsl_device *adreno_device, + unsigned int offsetwords, unsigned int *value); +void adreno_cx_dbgc_regwrite(struct kgsl_device *device, + unsigned int offsetwords, unsigned int value); +void adreno_cx_misc_regread(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int *value); +void adreno_cx_misc_regwrite(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int value); +void adreno_cx_misc_regrmw(struct adreno_device *adreno_dev, + unsigned int offsetwords, + unsigned int mask, unsigned int bits); +void adreno_isense_regread(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int *value); + +/** + * adreno_active_count_get - Wrapper for target specific active count get + * @adreno_dev: pointer to the adreno device + * + * Increase the active count for the KGSL device and execute slumber exit + * sequence if this is the first reference. Code paths that need to touch the + * hardware or wait for the hardware to complete an operation must hold an + * active count reference until they are finished. The device mutex must be held + * while calling this function. + * + * Return: 0 on success or negative error on failure to wake up the device + */ +int adreno_active_count_get(struct adreno_device *adreno_dev); + +/** + * adreno_active_count_put - Wrapper for target specific active count put + * @adreno_dev: pointer to the adreno device + * + * Decrease the active or the KGSL device and schedule the idle thread to + * execute the slumber sequence if there are no remaining references. The + * device mutex must be held while calling this function. + */ +void adreno_active_count_put(struct adreno_device *adreno_dev); + +#define ADRENO_TARGET(_name, _id) \ +static inline int adreno_is_##_name(struct adreno_device *adreno_dev) \ +{ \ + return (ADRENO_GPUREV(adreno_dev) == (_id)); \ +} + +static inline int adreno_is_a3xx(struct adreno_device *adreno_dev) +{ + return ((ADRENO_GPUREV(adreno_dev) >= 300) && + (ADRENO_GPUREV(adreno_dev) < 400)); +} + +ADRENO_TARGET(a304, ADRENO_REV_A304) +ADRENO_TARGET(a306, ADRENO_REV_A306) +ADRENO_TARGET(a306a, ADRENO_REV_A306A) + +static inline int adreno_is_a5xx(struct adreno_device *adreno_dev) +{ + return ADRENO_GPUREV(adreno_dev) >= 500 && + ADRENO_GPUREV(adreno_dev) < 600; +} + +ADRENO_TARGET(a505, ADRENO_REV_A505) +ADRENO_TARGET(a506, ADRENO_REV_A506) +ADRENO_TARGET(a508, ADRENO_REV_A508) +ADRENO_TARGET(a510, ADRENO_REV_A510) +ADRENO_TARGET(a512, ADRENO_REV_A512) +ADRENO_TARGET(a530, ADRENO_REV_A530) +ADRENO_TARGET(a540, ADRENO_REV_A540) + +static inline int adreno_is_a530v2(struct adreno_device *adreno_dev) +{ + return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1); +} + +static inline int adreno_is_a530v3(struct adreno_device *adreno_dev) +{ + return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 2); +} + +static inline int adreno_is_a505_or_a506(struct adreno_device *adreno_dev) +{ + return ADRENO_GPUREV(adreno_dev) >= 505 && + ADRENO_GPUREV(adreno_dev) <= 506; +} + +static inline int adreno_is_a6xx(struct adreno_device *adreno_dev) +{ + return ADRENO_GPUREV(adreno_dev) >= 600 && + ADRENO_GPUREV(adreno_dev) < 700; +} + +static inline int adreno_is_a660_shima(struct adreno_device *adreno_dev) +{ + return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A660) && + (adreno_dev->gpucore->compatible && + !strcmp(adreno_dev->gpucore->compatible, + "qcom,adreno-gpu-a660-shima")); +} + +ADRENO_TARGET(a610, ADRENO_REV_A610) +ADRENO_TARGET(a612, ADRENO_REV_A612) +ADRENO_TARGET(a618, ADRENO_REV_A618) +ADRENO_TARGET(a619, ADRENO_REV_A619) +ADRENO_TARGET(a620, ADRENO_REV_A620) +ADRENO_TARGET(a630, ADRENO_REV_A630) +ADRENO_TARGET(a635, ADRENO_REV_A635) +ADRENO_TARGET(a640, ADRENO_REV_A640) +ADRENO_TARGET(a650, ADRENO_REV_A650) +ADRENO_TARGET(a680, ADRENO_REV_A680) + +/* A635 is derived from A660 and shares same logic */ +static inline int adreno_is_a660(struct adreno_device *adreno_dev) +{ + unsigned int rev = ADRENO_GPUREV(adreno_dev); + + return (rev == ADRENO_REV_A660 || rev == ADRENO_REV_A635); +} + +/* + * All the derived chipsets from A615 needs to be added to this + * list such as A616, A618, A619 etc. + */ +static inline int adreno_is_a615_family(struct adreno_device *adreno_dev) +{ + unsigned int rev = ADRENO_GPUREV(adreno_dev); + + return (rev == ADRENO_REV_A615 || rev == ADRENO_REV_A616 || + rev == ADRENO_REV_A618 || rev == ADRENO_REV_A619); +} + +/* + * Derived GPUs from A640 needs to be added to this list. + * A640 and A680 belongs to this family. + */ +static inline int adreno_is_a640_family(struct adreno_device *adreno_dev) +{ + unsigned int rev = ADRENO_GPUREV(adreno_dev); + + return (rev == ADRENO_REV_A640 || rev == ADRENO_REV_A680); +} + +/* + * Derived GPUs from A650 needs to be added to this list. + * A650 is derived from A640 but register specs has been + * changed hence do not belongs to A640 family. A620, + * A660, A690 follows the register specs of A650. + * + */ +static inline int adreno_is_a650_family(struct adreno_device *adreno_dev) +{ + unsigned int rev = ADRENO_GPUREV(adreno_dev); + + return (rev == ADRENO_REV_A650 || rev == ADRENO_REV_A620 || + rev == ADRENO_REV_A660 || rev == ADRENO_REV_A635); +} + +static inline int adreno_is_a619_holi(struct adreno_device *adreno_dev) +{ + return of_device_is_compatible(adreno_dev->dev.pdev->dev.of_node, + "qcom,adreno-gpu-a619-holi"); +} + +static inline int adreno_is_a620v1(struct adreno_device *adreno_dev) +{ + return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A620) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 0); +} + +static inline int adreno_is_a640v2(struct adreno_device *adreno_dev) +{ + return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A640) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1); +} + +static inline int adreno_is_gen7(struct adreno_device *adreno_dev) +{ + return ADRENO_GPUREV(adreno_dev) >= 0x070000 && + ADRENO_GPUREV(adreno_dev) < 0x080000; +} + +ADRENO_TARGET(gen7_0_0, ADRENO_REV_GEN7_0_0) +ADRENO_TARGET(gen7_0_1, ADRENO_REV_GEN7_0_1) + +/* + * adreno_checkreg_off() - Checks the validity of a register enum + * @adreno_dev: Pointer to adreno device + * @offset_name: The register enum that is checked + */ +static inline bool adreno_checkreg_off(struct adreno_device *adreno_dev, + enum adreno_regs offset_name) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (offset_name >= ADRENO_REG_REGISTER_MAX || + gpudev->reg_offsets[offset_name] == ADRENO_REG_UNUSED) + return false; + + /* + * GPU register programming is kept common as much as possible + * across the cores, Use ADRENO_REG_SKIP when certain register + * programming needs to be skipped for certain GPU cores. + * Example: Certain registers on a5xx like IB1_BASE are 64 bit. + * Common programming programs 64bit register but upper 32 bits + * are skipped in a3xx using ADRENO_REG_SKIP. + */ + if (gpudev->reg_offsets[offset_name] == ADRENO_REG_SKIP) + return false; + + return true; +} + +/* + * adreno_readreg() - Read a register by getting its offset from the + * offset array defined in gpudev node + * @adreno_dev: Pointer to the the adreno device + * @offset_name: The register enum that is to be read + * @val: Register value read is placed here + */ +static inline void adreno_readreg(struct adreno_device *adreno_dev, + enum adreno_regs offset_name, unsigned int *val) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (adreno_checkreg_off(adreno_dev, offset_name)) + kgsl_regread(KGSL_DEVICE(adreno_dev), + gpudev->reg_offsets[offset_name], val); + else + *val = 0; +} + +/* + * adreno_writereg() - Write a register by getting its offset from the + * offset array defined in gpudev node + * @adreno_dev: Pointer to the the adreno device + * @offset_name: The register enum that is to be written + * @val: Value to write + */ +static inline void adreno_writereg(struct adreno_device *adreno_dev, + enum adreno_regs offset_name, unsigned int val) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (adreno_checkreg_off(adreno_dev, offset_name)) + kgsl_regwrite(KGSL_DEVICE(adreno_dev), + gpudev->reg_offsets[offset_name], val); +} + +/* + * adreno_getreg() - Returns the offset value of a register from the + * register offset array in the gpudev node + * @adreno_dev: Pointer to the the adreno device + * @offset_name: The register enum whore offset is returned + */ +static inline unsigned int adreno_getreg(struct adreno_device *adreno_dev, + enum adreno_regs offset_name) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (!adreno_checkreg_off(adreno_dev, offset_name)) + return ADRENO_REG_REGISTER_MAX; + return gpudev->reg_offsets[offset_name]; +} + +/* + * adreno_write_gmureg() - Write a GMU register by getting its offset from the + * offset array defined in gpudev node + * @adreno_dev: Pointer to the the adreno device + * @offset_name: The register enum that is to be written + * @val: Value to write + */ +static inline void adreno_write_gmureg(struct adreno_device *adreno_dev, + enum adreno_regs offset_name, unsigned int val) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (adreno_checkreg_off(adreno_dev, offset_name)) + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), + gpudev->reg_offsets[offset_name], val); +} + +/** + * adreno_gpu_fault() - Return the current state of the GPU + * @adreno_dev: A pointer to the adreno_device to query + * + * Return 0 if there is no fault or positive with the last type of fault that + * occurred + */ +static inline unsigned int adreno_gpu_fault(struct adreno_device *adreno_dev) +{ + /* make sure we're reading the latest value */ + smp_rmb(); + return atomic_read(&adreno_dev->dispatcher.fault); +} + +/** + * adreno_set_gpu_fault() - Set the current fault status of the GPU + * @adreno_dev: A pointer to the adreno_device to set + * @state: fault state to set + * + */ +static inline void adreno_set_gpu_fault(struct adreno_device *adreno_dev, + int state) +{ + /* only set the fault bit w/o overwriting other bits */ + atomic_or(state, &adreno_dev->dispatcher.fault); + + /* make sure other CPUs see the update */ + smp_wmb(); +} + +static inline bool adreno_gmu_gpu_fault(struct adreno_device *adreno_dev) +{ + return adreno_gpu_fault(adreno_dev) & ADRENO_GMU_FAULT; +} + +/** + * adreno_clear_gpu_fault() - Clear the GPU fault register + * @adreno_dev: A pointer to an adreno_device structure + * + * Clear the GPU fault status for the adreno device + */ + +static inline void adreno_clear_gpu_fault(struct adreno_device *adreno_dev) +{ + atomic_set(&adreno_dev->dispatcher.fault, 0); + + /* make sure other CPUs see the update */ + smp_wmb(); +} + +/** + * adreno_gpu_halt() - Return the GPU halt refcount + * @adreno_dev: A pointer to the adreno_device + */ +static inline int adreno_gpu_halt(struct adreno_device *adreno_dev) +{ + /* make sure we're reading the latest value */ + smp_rmb(); + return atomic_read(&adreno_dev->halt); +} + + +/** + * adreno_clear_gpu_halt() - Clear the GPU halt refcount + * @adreno_dev: A pointer to the adreno_device + */ +static inline void adreno_clear_gpu_halt(struct adreno_device *adreno_dev) +{ + atomic_set(&adreno_dev->halt, 0); + + /* make sure other CPUs see the update */ + smp_wmb(); +} + +/** + * adreno_get_gpu_halt() - Increment GPU halt refcount + * @adreno_dev: A pointer to the adreno_device + */ +static inline void adreno_get_gpu_halt(struct adreno_device *adreno_dev) +{ + atomic_inc(&adreno_dev->halt); +} + +/** + * adreno_put_gpu_halt() - Decrement GPU halt refcount + * @adreno_dev: A pointer to the adreno_device + */ +static inline void adreno_put_gpu_halt(struct adreno_device *adreno_dev) +{ + /* Make sure the refcount is good */ + int ret = atomic_dec_if_positive(&adreno_dev->halt); + + WARN(ret < 0, "GPU halt refcount unbalanced\n"); +} + + +#ifdef CONFIG_DEBUG_FS +void adreno_debugfs_init(struct adreno_device *adreno_dev); +void adreno_context_debugfs_init(struct adreno_device *adreno_dev, + struct adreno_context *ctx); +#else +static inline void adreno_debugfs_init(struct adreno_device *adreno_dev) { } +static inline void adreno_context_debugfs_init(struct adreno_device *device, + struct adreno_context *context) +{ + context->debug_root = NULL; +} +#endif + +/** + * adreno_compare_pm4_version() - Compare the PM4 microcode version + * @adreno_dev: Pointer to the adreno_device struct + * @version: Version number to compare again + * + * Compare the current version against the specified version and return -1 if + * the current code is older, 0 if equal or 1 if newer. + */ +static inline int adreno_compare_pm4_version(struct adreno_device *adreno_dev, + unsigned int version) +{ + if (adreno_dev->fw[ADRENO_FW_PM4].version == version) + return 0; + + return (adreno_dev->fw[ADRENO_FW_PM4].version > version) ? 1 : -1; +} + +/** + * adreno_compare_pfp_version() - Compare the PFP microcode version + * @adreno_dev: Pointer to the adreno_device struct + * @version: Version number to compare against + * + * Compare the current version against the specified version and return -1 if + * the current code is older, 0 if equal or 1 if newer. + */ +static inline int adreno_compare_pfp_version(struct adreno_device *adreno_dev, + unsigned int version) +{ + if (adreno_dev->fw[ADRENO_FW_PFP].version == version) + return 0; + + return (adreno_dev->fw[ADRENO_FW_PFP].version > version) ? 1 : -1; +} + +/** + * adreno_in_preempt_state() - Check if preemption state is equal to given state + * @adreno_dev: Device whose preemption state is checked + * @state: State to compare against + */ +static inline bool adreno_in_preempt_state(struct adreno_device *adreno_dev, + enum adreno_preempt_states state) +{ + return atomic_read(&adreno_dev->preempt.state) == state; +} +/** + * adreno_set_preempt_state() - Set the specified preemption state + * @adreno_dev: Device to change preemption state + * @state: State to set + */ +static inline void adreno_set_preempt_state(struct adreno_device *adreno_dev, + enum adreno_preempt_states state) +{ + /* + * atomic_set doesn't use barriers, so we need to do it ourselves. One + * before... + */ + smp_wmb(); + atomic_set(&adreno_dev->preempt.state, state); + + /* ... and one after */ + smp_wmb(); +} + +static inline bool adreno_is_preemption_enabled( + struct adreno_device *adreno_dev) +{ + return test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); +} + +/* + * adreno_compare_prio_level() - Compares 2 priority levels based on enum values + * @p1: First priority level + * @p2: Second priority level + * + * Returns greater than 0 if p1 is higher priority, 0 if levels are equal else + * less than 0 + */ +static inline int adreno_compare_prio_level(int p1, int p2) +{ + return p2 - p1; +} + +void adreno_readreg64(struct adreno_device *adreno_dev, + enum adreno_regs lo, enum adreno_regs hi, uint64_t *val); + +void adreno_writereg64(struct adreno_device *adreno_dev, + enum adreno_regs lo, enum adreno_regs hi, uint64_t val); + +unsigned int adreno_get_rptr(struct adreno_ringbuffer *rb); + +static inline bool adreno_rb_empty(struct adreno_ringbuffer *rb) +{ + return (adreno_get_rptr(rb) == rb->wptr); +} + +static inline bool adreno_soft_fault_detect(struct adreno_device *adreno_dev) +{ + return adreno_dev->fast_hang_detect && + !test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv); +} + +static inline bool adreno_long_ib_detect(struct adreno_device *adreno_dev) +{ + return adreno_dev->long_ib_detect && + !test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv); +} + +/** + * adreno_support_64bit - Return true if the GPU supports 64 bit addressing + * @adreno_dev: An Adreno GPU device handle + * + * Return: True if the device supports 64 bit addressing + */ +static inline bool adreno_support_64bit(struct adreno_device *adreno_dev) +{ + /* + * The IOMMU API takes a unsigned long for the iova so we can't support + * 64 bit addresses when the kernel is in 32 bit mode even if we wanted + * so we need to check that we are using a5xx or newer and that the + * unsigned long is big enough for our purposes. + */ + return (BITS_PER_LONG > 32 && ADRENO_GPUREV(adreno_dev) >= 500); +} + +static inline void adreno_ringbuffer_set_global( + struct adreno_device *adreno_dev, int name) +{ + kgsl_sharedmem_writel(adreno_dev->ringbuffers[0].pagetable_desc, + PT_INFO_OFFSET(current_global_ptname), name); +} + +static inline void adreno_ringbuffer_set_pagetable(struct adreno_ringbuffer *rb, + struct kgsl_pagetable *pt) +{ + unsigned long flags; + + spin_lock_irqsave(&rb->preempt_lock, flags); + + kgsl_sharedmem_writel(rb->pagetable_desc, + PT_INFO_OFFSET(current_rb_ptname), pt->name); + + kgsl_sharedmem_writeq(rb->pagetable_desc, + PT_INFO_OFFSET(ttbr0), kgsl_mmu_pagetable_get_ttbr0(pt)); + + kgsl_sharedmem_writel(rb->pagetable_desc, + PT_INFO_OFFSET(contextidr), 0); + + spin_unlock_irqrestore(&rb->preempt_lock, flags); +} + +static inline u32 counter_delta(struct kgsl_device *device, + unsigned int reg, unsigned int *counter) +{ + u32 val, ret = 0; + + if (!reg) + return 0; + + kgsl_regread(device, reg, &val); + + if (*counter) { + if (val >= *counter) + ret = val - *counter; + else + ret = (UINT_MAX - *counter) + val; + } + + *counter = val; + return ret; +} + +static inline int adreno_perfcntr_active_oob_get( + struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = adreno_active_count_get(adreno_dev); + + if (!ret) { + ret = gmu_core_dev_oob_set(device, oob_perfcntr); + if (ret) + adreno_active_count_put(adreno_dev); + } + + return ret; +} + +static inline void adreno_perfcntr_active_oob_put( + struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gmu_core_dev_oob_clear(device, oob_perfcntr); + adreno_active_count_put(adreno_dev); +} + +/** + * adreno_wait_for_halt_ack - wait for acknowlegement for a bus halt request + * @ack_reg: register offset to wait for acknowledge + * @mask: A mask value to wait for + * + * Return: 0 on success or -ETIMEDOUT if the request timed out + */ +static inline int adreno_wait_for_halt_ack(struct kgsl_device *device, + int ack_reg, unsigned int mask) +{ + u32 val; + int ret = kgsl_regmap_read_poll_timeout(&device->regmap, ack_reg, + val, (val & mask) == mask, 100, 100 * 1000); + + if (ret) + dev_err(device->dev, + "GBIF/VBIF Halt ack timeout: reg=%08x mask=%08x status=%08x\n", + ack_reg, mask, val); + + return ret; +} + +/** + * adreno_move_preempt_state - Update the preemption state + * @adreno_dev: An Adreno GPU device handle + * @old: The current state of the preemption + * @new: The new state of the preemption + * + * Return: True if the state was updated or false if not + */ +static inline bool adreno_move_preempt_state(struct adreno_device *adreno_dev, + enum adreno_preempt_states old, enum adreno_preempt_states new) +{ + return (atomic_cmpxchg(&adreno_dev->preempt.state, old, new) == old); +} + +/** + * adreno_reg_offset_init - Helper function to initialize reg_offsets + * @reg_offsets: Pointer to an array of register offsets + * + * Helper function to setup register_offsets for a target. Go through + * and set ADRENO_REG_UNUSED for all unused entries in the list. + */ +static inline void adreno_reg_offset_init(u32 *reg_offsets) +{ + int i; + + /* + * Initialize uninitialzed gpu registers, only needs to be done once. + * Make all offsets that are not initialized to ADRENO_REG_UNUSED + */ + for (i = 0; i < ADRENO_REG_REGISTER_MAX; i++) { + if (!reg_offsets[i]) + reg_offsets[i] = ADRENO_REG_UNUSED; + } +} + +static inline u32 adreno_get_level(u32 priority) +{ + u32 level = priority / KGSL_PRIORITY_MAX_RB_LEVELS; + + return min_t(u32, level, KGSL_PRIORITY_MAX_RB_LEVELS - 1); +} + + +/** + * adreno_get_firwmare - Load firmware into a adreno_firmware struct + * @adreno_dev: An Adreno GPU device handle + * @fwfile: Firmware file to load + * @firmware: A &struct adreno_firmware container for the firmware. + * + * Load the specified firmware file into the memdesc in &struct adreno_firmware + * and get the size and version from the data. + * + * Return: 0 on success or negative on failure + */ +int adreno_get_firmware(struct adreno_device *adreno_dev, + const char *fwfile, struct adreno_firmware *firmware); +/** + * adreno_zap_shader_load - Helper function for loading the zap shader + * adreno_dev: A handle to an Adreno GPU device + * name: Name of the zap shader to load + * + * A target indepedent helper function for loading the zap shader. + * + * Return: 0 on success or negative on failure. + */ +int adreno_zap_shader_load(struct adreno_device *adreno_dev, + const char *name); + +/** + * adreno_irq_callbacks - Helper function to handle IRQ callbacks + * @adreno_dev: Adreno GPU device handle + * @funcs: List of callback functions + * @status: Interrupt status + * + * Walk the bits in the interrupt status and call any applicable callbacks. + * Return: IRQ_HANDLED if one or more interrupt callbacks were called. + */ +irqreturn_t adreno_irq_callbacks(struct adreno_device *adreno_dev, + const struct adreno_irq_funcs *funcs, u32 status); + + +/** + * adreno_device_probe - Generic adreno device probe function + * @pdev: Pointer to the platform device + * @adreno_dev: Adreno GPU device handle + * + * Do the generic setup for the Adreno device. Called from the target specific + * probe functions. + * + * Return: 0 on success or negative on failure + */ +int adreno_device_probe(struct platform_device *pdev, + struct adreno_device *adreno_dev); + +/** + * adreno_power_cycle - Suspend and resume the device + * @adreno_dev: Pointer to the adreno device + * @callback: Function that needs to be executed + * @priv: Argument to be passed to the callback + * + * Certain properties that can be set via sysfs need to power + * cycle the device to take effect. This function suspends + * the device, executes the callback, and resumes the device. + * + * Return: 0 on success or negative on failure + */ +int adreno_power_cycle(struct adreno_device *adreno_dev, + void (*callback)(struct adreno_device *adreno_dev, void *priv), + void *priv); + +/** + * adreno_power_cycle_bool - Power cycle the device to change device setting + * @adreno_dev: Pointer to the adreno device + * @flag: Flag that needs to be set + * @val: The value flag should be set to + * + * Certain properties that can be set via sysfs need to power cycle the device + * to take effect. This function suspends the device, sets the flag, and + * resumes the device. + * + * Return: 0 on success or negative on failure + */ +int adreno_power_cycle_bool(struct adreno_device *adreno_dev, + bool *flag, bool val); + +/** + * adreno_power_cycle_u32 - Power cycle the device to change device setting + * @adreno_dev: Pointer to the adreno device + * @flag: Flag that needs to be set + * @val: The value flag should be set to + * + * Certain properties that can be set via sysfs need to power cycle the device + * to take effect. This function suspends the device, sets the flag, and + * resumes the device. + * + * Return: 0 on success or negative on failure + */ +int adreno_power_cycle_u32(struct adreno_device *adreno_dev, + u32 *flag, u32 val); + +/** + * adreno_set_active_ctxs_null - Give up active context refcount + * @adreno_dev: Adreno GPU device handle + * + * This puts back the reference for that last active context on + * each ringbuffer when going in and out of slumber. + */ +void adreno_set_active_ctxs_null(struct adreno_device *adreno_dev); + +/** + * adreno_get_bus_counters - Allocate the bus dcvs counters + * @adreno_dev: Adreno GPU device handle + * + * This function allocates the various gpu counters to measure + * gpu bus usage for bus dcvs + */ +void adreno_get_bus_counters(struct adreno_device *adreno_dev); + +/** + * adreno_suspend_context - Make sure device is idle + * @device: Pointer to the kgsl device + * + * This function processes the profiling results and checks if the + * device is idle so that it can be turned off safely + * + * Return: 0 on success or negative error on failure + */ +int adreno_suspend_context(struct kgsl_device *device); + +/* + * adreno_profile_submit_time - Populate profiling buffer with timestamps + * @time: Container for the statistics + * + * Populate the draw object user profiling buffer with the timestamps + * recored in the adreno_submit_time structure at the time of draw object + * submission. + */ +void adreno_profile_submit_time(struct adreno_submit_time *time); + +void adreno_preemption_timer(struct timer_list *t); + +/** + * adreno_create_profile_buffer - Create a buffer to store profiling data + * @adreno_dev: Adreno GPU device handle + */ +void adreno_create_profile_buffer(struct adreno_device *adreno_dev); + +/** + * adreno_isidle - return true if the hardware is idle + * @adreno_dev: Adreno GPU device handle + * + * Return: True if the hardware is idle + */ +bool adreno_isidle(struct adreno_device *adreno_dev); + +/** + * adreno_allocate_global - Helper function to allocate a global GPU object + * @device: A GPU device handle + * @memdesc: Pointer to a &struct kgsl_memdesc pointer + * @size: Size of the allocation in bytes + * @padding: Amount of extra adding to add to the VA allocation + * @flags: Control flags for the allocation + * @priv: Internal flags for the allocation + * @name: Name of the allocation (for the debugfs file) + * + * Allocate a global object if it hasn't already been alllocated and put it in + * the pointer pointed to by @memdesc. + * Return: 0 on success or negative on error + */ +static inline int adreno_allocate_global(struct kgsl_device *device, + struct kgsl_memdesc **memdesc, u64 size, u32 padding, u64 flags, + u32 priv, const char *name) +{ + if (!IS_ERR_OR_NULL(*memdesc)) + return 0; + + *memdesc = kgsl_allocate_global(device, size, padding, flags, priv, name); + return PTR_ERR_OR_ZERO(*memdesc); +} + +/** + * adreno_regulator_disable_poll - Disable the regulator and wait for it to + * complete + * @device: A GPU device handle + * @reg: Pointer to the regulator to disable + * @offset: Offset of the register to poll for success + * @timeout: Timeout (in milliseconds) + * + * Return: true if the regulator got disabled or false on timeout + */ +bool adreno_regulator_disable_poll(struct kgsl_device *device, + struct regulator *reg, u32 offset, u32 timeout); + +static inline void adreno_set_dispatch_ops(struct adreno_device *adreno_dev, + const struct adreno_dispatch_ops *ops) +{ + adreno_dev->dispatch_ops = ops; +} + +/** + * adreno_fence_trace_array_init - Initialize an always on trace array + * @device: A GPU device handle + * + * Register an always-on trace array to for fence timeout debugging + */ +void adreno_fence_trace_array_init(struct kgsl_device *device); + +/* + * adreno_drawobj_set_constraint - Set a power constraint + * @device: Pointer to a KGSL device structure + * @drawobj: Draw object for which constraint is to be set + * + * Set the power constraint if requested by this context + */ +void adreno_drawobj_set_constraint(struct kgsl_device *device, + struct kgsl_drawobj *drawobj); + +/** + * adreno_get_gpu_model - Gets gpu model name from device tree (or) chipid + * @device: A GPU device handle + * + * Return: GPU model name string + */ +const char *adreno_get_gpu_model(struct kgsl_device *device); +#endif /*__ADRENO_H */ diff --git a/adreno_a3xx.c b/adreno_a3xx.c new file mode 100644 index 0000000000..c4dbaf2803 --- /dev/null +++ b/adreno_a3xx.c @@ -0,0 +1,1569 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_cp_parser.h" +#include "adreno_a3xx.h" +#include "adreno_pm4types.h" +#include "adreno_snapshot.h" +#include "adreno_trace.h" + +/* + * Define registers for a3xx that contain addresses used by the + * cp parser logic + */ +const unsigned int a3xx_cp_addr_regs[ADRENO_CP_ADDR_MAX] = { + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0, + A3XX_VSC_PIPE_DATA_ADDRESS_0), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0, + A3XX_VSC_PIPE_DATA_LENGTH_0), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1, + A3XX_VSC_PIPE_DATA_ADDRESS_1), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1, + A3XX_VSC_PIPE_DATA_LENGTH_1), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2, + A3XX_VSC_PIPE_DATA_ADDRESS_2), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2, + A3XX_VSC_PIPE_DATA_LENGTH_2), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3, + A3XX_VSC_PIPE_DATA_ADDRESS_3), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3, + A3XX_VSC_PIPE_DATA_LENGTH_3), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4, + A3XX_VSC_PIPE_DATA_ADDRESS_4), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4, + A3XX_VSC_PIPE_DATA_LENGTH_4), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5, + A3XX_VSC_PIPE_DATA_ADDRESS_5), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5, + A3XX_VSC_PIPE_DATA_LENGTH_5), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6, + A3XX_VSC_PIPE_DATA_ADDRESS_6), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6, + A3XX_VSC_PIPE_DATA_LENGTH_6), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7, + A3XX_VSC_PIPE_DATA_ADDRESS_7), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7, + A3XX_VSC_PIPE_DATA_LENGTH_7), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0, + A3XX_VFD_FETCH_INSTR_1_0), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1, + A3XX_VFD_FETCH_INSTR_1_1), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2, + A3XX_VFD_FETCH_INSTR_1_2), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3, + A3XX_VFD_FETCH_INSTR_1_3), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4, + A3XX_VFD_FETCH_INSTR_1_4), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5, + A3XX_VFD_FETCH_INSTR_1_5), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6, + A3XX_VFD_FETCH_INSTR_1_6), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7, + A3XX_VFD_FETCH_INSTR_1_7), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8, + A3XX_VFD_FETCH_INSTR_1_8), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9, + A3XX_VFD_FETCH_INSTR_1_9), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10, + A3XX_VFD_FETCH_INSTR_1_A), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11, + A3XX_VFD_FETCH_INSTR_1_B), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12, + A3XX_VFD_FETCH_INSTR_1_C), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13, + A3XX_VFD_FETCH_INSTR_1_D), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14, + A3XX_VFD_FETCH_INSTR_1_E), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15, + A3XX_VFD_FETCH_INSTR_1_F), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_SIZE_ADDRESS, + A3XX_VSC_SIZE_ADDRESS), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR, + A3XX_SP_VS_PVT_MEM_ADDR_REG), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR, + A3XX_SP_FS_PVT_MEM_ADDR_REG), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_OBJ_START_REG, + A3XX_SP_VS_OBJ_START_REG), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_OBJ_START_REG, + A3XX_SP_FS_OBJ_START_REG), +}; + +static const unsigned int _a3xx_pwron_fixup_fs_instructions[] = { + 0x00000000, 0x302CC300, 0x00000000, 0x302CC304, + 0x00000000, 0x302CC308, 0x00000000, 0x302CC30C, + 0x00000000, 0x302CC310, 0x00000000, 0x302CC314, + 0x00000000, 0x302CC318, 0x00000000, 0x302CC31C, + 0x00000000, 0x302CC320, 0x00000000, 0x302CC324, + 0x00000000, 0x302CC328, 0x00000000, 0x302CC32C, + 0x00000000, 0x302CC330, 0x00000000, 0x302CC334, + 0x00000000, 0x302CC338, 0x00000000, 0x302CC33C, + 0x00000000, 0x00000400, 0x00020000, 0x63808003, + 0x00060004, 0x63828007, 0x000A0008, 0x6384800B, + 0x000E000C, 0x6386800F, 0x00120010, 0x63888013, + 0x00160014, 0x638A8017, 0x001A0018, 0x638C801B, + 0x001E001C, 0x638E801F, 0x00220020, 0x63908023, + 0x00260024, 0x63928027, 0x002A0028, 0x6394802B, + 0x002E002C, 0x6396802F, 0x00320030, 0x63988033, + 0x00360034, 0x639A8037, 0x003A0038, 0x639C803B, + 0x003E003C, 0x639E803F, 0x00000000, 0x00000400, + 0x00000003, 0x80D60003, 0x00000007, 0x80D60007, + 0x0000000B, 0x80D6000B, 0x0000000F, 0x80D6000F, + 0x00000013, 0x80D60013, 0x00000017, 0x80D60017, + 0x0000001B, 0x80D6001B, 0x0000001F, 0x80D6001F, + 0x00000023, 0x80D60023, 0x00000027, 0x80D60027, + 0x0000002B, 0x80D6002B, 0x0000002F, 0x80D6002F, + 0x00000033, 0x80D60033, 0x00000037, 0x80D60037, + 0x0000003B, 0x80D6003B, 0x0000003F, 0x80D6003F, + 0x00000000, 0x03000000, 0x00000000, 0x00000000, +}; + +/** + * _a3xx_pwron_fixup() - Initialize a special command buffer to run a + * post-power collapse shader workaround + * @adreno_dev: Pointer to a adreno_device struct + * + * Some targets require a special workaround shader to be executed after + * power-collapse. Construct the IB once at init time and keep it + * handy + * + * Returns: 0 on success or negative on error + */ +static int _a3xx_pwron_fixup(struct adreno_device *adreno_dev) +{ + unsigned int *cmds; + int count = ARRAY_SIZE(_a3xx_pwron_fixup_fs_instructions); + + /* Return if the fixup is already in place */ + if (test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv)) + return 0; + + adreno_dev->pwron_fixup = kgsl_allocate_global(KGSL_DEVICE(adreno_dev), + PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "pwron_fixup"); + + if (IS_ERR(adreno_dev->pwron_fixup)) + return PTR_ERR(adreno_dev->pwron_fixup); + + cmds = adreno_dev->pwron_fixup->hostptr; + + *cmds++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); + *cmds++ = 0x00000000; + *cmds++ = 0x90000000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_REG_RMW, 3); + *cmds++ = A3XX_RBBM_CLOCK_CTL; + *cmds++ = 0xFFFCFFFF; + *cmds++ = 0x00010000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1); + *cmds++ = 0x1E000150; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + *cmds++ = 0x1E000150; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1); + *cmds++ = 0x1E000150; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_1_REG, 1); + *cmds++ = 0x00000040; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_2_REG, 1); + *cmds++ = 0x80000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_3_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_VS_CONTROL_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_FS_CONTROL_REG, 1); + *cmds++ = 0x0D001002; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_0_REG, 1); + *cmds++ = 0x00401101; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_1_REG, 1); + *cmds++ = 0x00000400; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_2_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_3_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_4_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_5_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_6_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_1_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_CONST_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_X_REG, 1); + *cmds++ = 0x00000010; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_WG_OFFSET_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_SP_CTRL_REG, 1); + *cmds++ = 0x00040000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1); + *cmds++ = 0x0000000A; + *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG1, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_SP_VS_PARAM_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_6, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_7, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_OFFSET_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_START_REG, 1); + *cmds++ = 0x00000004; + *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_PARAM_REG, 1); + *cmds++ = 0x04008001; + *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_ADDR_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_LENGTH_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1); + *cmds++ = 0x0DB0400A; + *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG1, 1); + *cmds++ = 0x00300402; + *cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_OFFSET_REG, 1); + *cmds++ = 0x00010000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_START_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_PARAM_REG, 1); + *cmds++ = 0x04008001; + *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_ADDR_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_OUTPUT_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_LENGTH_REG, 1); + *cmds++ = 0x0000000D; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_CLIP_CNTL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_GB_CLIP_ADJ, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_XOFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_XSCALE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_YOFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_YSCALE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_ZOFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_ZSCALE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POINT_MINMAX, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POINT_SIZE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POLY_OFFSET_OFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POLY_OFFSET_SCALE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_MODE_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_SCREEN_SCISSOR_BR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_WINDOW_SCISSOR_BR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_TSE_DEBUG_ECO, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER0_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER1_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER2_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER3_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MODE_CONTROL, 1); + *cmds++ = 0x00008000; + *cmds++ = cp_type0_packet(A3XX_RB_RENDER_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MSAA_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_ALPHA_REFERENCE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_BLEND_RED, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_BLEND_GREEN, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_BLEND_BLUE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_BLEND_ALPHA, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_COPY_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_BASE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_PITCH, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_INFO, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_CLEAR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_BUF_INFO, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_BUF_PITCH, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_CLEAR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_BUF_INFO, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_BUF_PITCH, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_REF_MASK, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_REF_MASK_BF, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_LRZ_VSC_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_WINDOW_OFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_SAMPLE_COUNT_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_SAMPLE_COUNT_ADDR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_Z_CLAMP_MIN, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_Z_CLAMP_MAX, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_GMEM_BASE_ADDR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEBUG_ECO_CONTROLS_ADDR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER0_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER1_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_FRAME_BUFFER_DIMENSION, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); + *cmds++ = (1 << CP_LOADSTATE_DSTOFFSET_SHIFT) | + (0 << CP_LOADSTATE_STATESRC_SHIFT) | + (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | + (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT) | + (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + *cmds++ = 0x00400000; + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); + *cmds++ = (2 << CP_LOADSTATE_DSTOFFSET_SHIFT) | + (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | + (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT); + *cmds++ = 0x00400220; + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); + *cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | + (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT); + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 2 + count); + *cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | + (13 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = 0x00000000; + + memcpy(cmds, _a3xx_pwron_fixup_fs_instructions, count << 2); + + cmds += count; + + *cmds++ = cp_type3_packet(CP_EXEC_CL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1); + *cmds++ = 0x1E000150; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + *cmds++ = 0x1E000050; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_REG_RMW, 3); + *cmds++ = A3XX_RBBM_CLOCK_CTL; + *cmds++ = 0xFFFCFFFF; + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + + /* + * Remember the number of dwords in the command buffer for when we + * program the indirect buffer call in the ringbuffer + */ + adreno_dev->pwron_fixup_dwords = + (cmds - (unsigned int *) adreno_dev->pwron_fixup->hostptr); + + /* Mark the flag in ->priv to show that we have the fix */ + set_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv); + return 0; +} + +static int a3xx_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore) +{ + struct adreno_device *adreno_dev; + struct kgsl_device *device; + int ret; + + adreno_dev = (struct adreno_device *) + of_device_get_match_data(&pdev->dev); + + memset(adreno_dev, 0, sizeof(*adreno_dev)); + + adreno_dev->gpucore = gpucore; + adreno_dev->chipid = chipid; + + adreno_reg_offset_init(gpucore->gpudev->reg_offsets); + + + device = KGSL_DEVICE(adreno_dev); + + timer_setup(&device->idle_timer, kgsl_timer, 0); + + INIT_WORK(&device->idle_check_ws, kgsl_idle_check); + + ret = adreno_device_probe(pdev, adreno_dev); + if (ret) + return ret; + + return adreno_dispatcher_init(adreno_dev); +} + +static int a3xx_send_me_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int *cmds; + int ret; + + cmds = adreno_ringbuffer_allocspace(rb, 18); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + *cmds++ = cp_type3_packet(CP_ME_INIT, 17); + + *cmds++ = 0x000003f7; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000080; + *cmds++ = 0x00000100; + *cmds++ = 0x00000180; + *cmds++ = 0x00006600; + *cmds++ = 0x00000150; + *cmds++ = 0x0000014e; + *cmds++ = 0x00000154; + *cmds++ = 0x00000001; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + /* Enable protected mode registers for A3XX */ + *cmds++ = 0x20000000; + + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + /* Submit the command to the ringbuffer */ + kgsl_pwrscale_busy(device); + kgsl_regwrite(device, A3XX_CP_RB_WPTR, rb->_wptr); + rb->wptr = rb->_wptr; + + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + dev_err(device->dev, "CP initialization failed to idle\n"); + kgsl_device_snapshot(device, NULL, false); + } + + return ret; +} + +static void a3xx_microcode_load(struct adreno_device *adreno_dev); + +static int a3xx_rb_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + + memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); + rb->wptr = 0; + rb->_wptr = 0; + rb->wptr_preempt_end = ~0; + + /* + * The size of the ringbuffer in the hardware is the log2 + * representation of the size in quadwords (sizedwords / 2). + * Also disable the host RPTR shadow register as it might be unreliable + * in certain circumstances. + */ + + kgsl_regwrite(device, A3XX_CP_RB_CNTL, + (ilog2(KGSL_RB_DWORDS >> 1) & 0x3F) | + (1 << 27)); + + kgsl_regwrite(device, A3XX_CP_RB_BASE, rb->buffer_desc->gpuaddr); + + a3xx_microcode_load(adreno_dev); + + /* clear ME_HALT to start micro engine */ + kgsl_regwrite(device, A3XX_CP_ME_CNTL, 0); + + return a3xx_send_me_init(adreno_dev, rb); +} + +/* + * a3xx soft fault detection + * + * a3xx targets do not have hardware fault detection so we need to do it the old + * fashioned way by periodically reading a set of registers and counters and + * checking that they are advancing. There are 6 registers and four 64 bit + * counters that we keep an eye on. + */ + +#define A3XX_SOFT_FAULT_DETECT_REGS 6 +#define A3XX_SOFT_FAULT_DETECT_COUNTERS 4 +#define A3XX_SOFT_FAULT_DETECT_COUNT \ + (A3XX_SOFT_FAULT_DETECT_REGS + (A3XX_SOFT_FAULT_DETECT_COUNTERS * 2)) + +static bool a3xx_soft_fault_detect_isidle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 reg; + + if (kgsl_state_is_awake(device)) { + if (!adreno_rb_empty(adreno_dev->cur_rb)) + return false; + + /* only check rbbm status to determine if GPU is idle */ + kgsl_regread(device, A3XX_RBBM_STATUS, ®); + + if (reg & 0x7ffffffe) + return false; + } + + memset(adreno_dev->soft_ft_vals, 0, A3XX_SOFT_FAULT_DETECT_COUNT << 2); + return true; +} + +/* Read the fault detect registers and compare them to the stored version */ +static int a3xx_soft_fault_detect_read_compare(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + int i, ret = 0; + unsigned int ts; + + if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv)) + return 1; + + /* Check to see if the device is idle - if so report no hang */ + if (a3xx_soft_fault_detect_isidle(adreno_dev)) + ret = 1; + + for (i = 0; i < A3XX_SOFT_FAULT_DETECT_COUNT; i++) { + unsigned int val; + + if (!adreno_dev->soft_ft_regs[i]) + continue; + + kgsl_regread(device, adreno_dev->soft_ft_regs[i], &val); + if (val != adreno_dev->soft_ft_vals[i]) + ret = 1; + adreno_dev->soft_ft_vals[i] = val; + } + + if (!adreno_rb_readtimestamp(adreno_dev, adreno_dev->cur_rb, + KGSL_TIMESTAMP_RETIRED, &ts)) { + if (ts != rb->fault_detect_ts) + ret = 1; + + rb->fault_detect_ts = ts; + } + + return ret; +} + +/* + * This is called on a regular basis while cmdobjs are inflight. Fault + * detection registers are read and compared to the existing values - if they + * changed then the GPU is still running. If they are the same between + * subsequent calls then the GPU may have faulted + */ +static void a3xx_soft_fault_timer(struct timer_list *t) +{ + struct adreno_dispatcher *dispatcher = from_timer(dispatcher, + t, fault_timer); + struct adreno_device *adreno_dev = container_of(dispatcher, + struct adreno_device, dispatcher); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Leave if the user decided to turn off fast hang detection */ + if (!adreno_soft_fault_detect(adreno_dev)) + return; + + if (adreno_gpu_fault(adreno_dev)) { + adreno_dispatcher_schedule(device); + return; + } + + /* + * Read the fault registers - if it returns 0 then they haven't changed + * so mark the dispatcher as faulted and schedule the work loop. + */ + + if (!a3xx_soft_fault_detect_read_compare(adreno_dev)) + adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT); + else if (dispatcher->inflight > 0) + adreno_dispatcher_start_fault_timer(adreno_dev); +} + +/* + * Start fault detection. The counters are only assigned while fault detection + * is running so that they can be used for other purposes if fault detection is + * disabled + */ +static void a3xx_soft_fault_detect_start(struct adreno_device *adreno_dev) +{ + u32 *regs = &adreno_dev->soft_ft_regs[A3XX_SOFT_FAULT_DETECT_COUNTERS]; + int ret = 0; + + if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv)) + return; + + if (adreno_dev->fast_hang_detect == 1) + return; + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_SP, SP_ALU_ACTIVE_CYCLES, + ®s[0], ®s[1]); + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_SP, SP0_ICL1_MISSES, + ®s[2], ®s[3]); + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_SP, SP_FS_CFLOW_INSTRUCTIONS, + ®s[4], ®s[5]); + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_TSE, TSE_INPUT_PRIM_NUM, + ®s[6], ®s[7]); + + WARN(ret, "Unable to allocate one or more fault detect counters\n"); + adreno_dev->fast_hang_detect = 1; +} + +/* Helper function to put back a counter */ +static void put_counter(struct adreno_device *adreno_dev, + int group, int countable, u32 *lo, u32 *hi) +{ + adreno_perfcounter_put(adreno_dev, group, countable, + PERFCOUNTER_FLAG_KERNEL); + + *lo = 0; + *hi = 0; +} + +/* Stop fault detection and return the counters */ +static void a3xx_soft_fault_detect_stop(struct adreno_device *adreno_dev) +{ + u32 *regs = &adreno_dev->soft_ft_regs[A3XX_SOFT_FAULT_DETECT_COUNTERS]; + + if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv)) + return; + + if (!adreno_dev->fast_hang_detect) + return; + + put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, SP_ALU_ACTIVE_CYCLES, + ®s[0], ®s[1]); + + put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, SP0_ICL1_MISSES, + ®s[2], ®s[3]); + + put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, + SP_FS_CFLOW_INSTRUCTIONS, ®s[4], ®s[5]); + + put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_TSE, TSE_INPUT_PRIM_NUM, + ®s[6], ®s[7]); + + adreno_dev->fast_hang_detect = 0; +} + +/* Initialize the registers and set up the data structures */ +static void a3xx_soft_fault_detect_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_SOFT_FAULT_DETECT)) + return; + + /* Disable the fast hang detect bit until we know its a go */ + adreno_dev->fast_hang_detect = 0; + + adreno_dev->soft_ft_regs = devm_kcalloc(&device->pdev->dev, + A3XX_SOFT_FAULT_DETECT_COUNT, sizeof(u32), GFP_KERNEL); + + adreno_dev->soft_ft_vals = devm_kcalloc(&device->pdev->dev, + A3XX_SOFT_FAULT_DETECT_COUNT, sizeof(u32), GFP_KERNEL); + + if (!adreno_dev->soft_ft_regs || !adreno_dev->soft_ft_vals) + return; + + adreno_dev->soft_ft_count = A3XX_SOFT_FAULT_DETECT_COUNT; + + adreno_dev->soft_ft_regs[0] = A3XX_RBBM_STATUS; + adreno_dev->soft_ft_regs[1] = A3XX_CP_RB_RPTR; + adreno_dev->soft_ft_regs[2] = A3XX_CP_IB1_BASE; + adreno_dev->soft_ft_regs[3] = A3XX_CP_IB1_BUFSZ; + adreno_dev->soft_ft_regs[4] = A3XX_CP_IB2_BASE; + adreno_dev->soft_ft_regs[5] = A3XX_CP_IB2_BUFSZ; + + set_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv); + + a3xx_soft_fault_detect_start(adreno_dev); +} + +static void a3xx_remove(struct adreno_device *adreno_dev) +{ + a3xx_soft_fault_detect_stop(adreno_dev); +} + +static int a3xx_microcode_read(struct adreno_device *adreno_dev); + +/* + * a3xx_init() - Initialize gpu specific data + * @adreno_dev: Pointer to adreno device + */ +static int a3xx_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + int ret; + + /* + * Set up the a3xx only soft fault timer before heading into the generic + * dispatcher setup + */ + if (ADRENO_FEATURE(adreno_dev, ADRENO_SOFT_FAULT_DETECT)) + timer_setup(&dispatcher->fault_timer, a3xx_soft_fault_timer, 0); + + ret = a3xx_ringbuffer_init(adreno_dev); + if (ret) + return ret; + + ret = a3xx_microcode_read(adreno_dev); + if (ret) + return ret; + + _a3xx_pwron_fixup(adreno_dev); + + ret = adreno_allocate_global(device, &iommu->setstate, PAGE_SIZE, + 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "setstate"); + + if (!ret) + kgsl_sharedmem_writel(iommu->setstate, + KGSL_IOMMU_SETSTATE_NOP_OFFSET, + cp_type3_packet(CP_NOP, 1)); + + kgsl_mmu_set_feature(device, KGSL_MMU_NEED_GUARD_PAGE); + + /* Put the hardware in a responsive state to set up fault detection*/ + ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + if (ret) + return ret; + + a3xx_soft_fault_detect_init(adreno_dev); + + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + return 0; +} + +/* + * a3xx_err_callback() - Call back for a3xx error interrupts + * @adreno_dev: Pointer to device + * @bit: Interrupt bit + */ +static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg; + + switch (bit) { + case A3XX_INT_RBBM_AHB_ERROR: { + kgsl_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, ®); + + /* + * Return the word address of the erroring register so that it + * matches the register specification + */ + dev_crit_ratelimited(device->dev, + "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n", + reg & (1 << 28) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2, + (reg >> 20) & 0x3, + (reg >> 24) & 0xF); + + /* Clear the error */ + kgsl_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3)); + break; + } + case A3XX_INT_RBBM_ATB_BUS_OVERFLOW: + dev_crit_ratelimited(device->dev, + "RBBM: ATB bus oveflow\n"); + break; + case A3XX_INT_CP_T0_PACKET_IN_IB: + dev_crit_ratelimited(device->dev, + "ringbuffer TO packet in IB interrupt\n"); + break; + case A3XX_INT_CP_OPCODE_ERROR: + dev_crit_ratelimited(device->dev, + "ringbuffer opcode error interrupt\n"); + break; + case A3XX_INT_CP_RESERVED_BIT_ERROR: + dev_crit_ratelimited(device->dev, + "ringbuffer reserved bit error interrupt\n"); + break; + case A3XX_INT_CP_HW_FAULT: + kgsl_regread(device, A3XX_CP_HW_FAULT, ®); + dev_crit_ratelimited(device->dev, + "CP | Ringbuffer HW fault | status=%x\n", + reg); + break; + case A3XX_INT_CP_REG_PROTECT_FAULT: + kgsl_regread(device, A3XX_CP_PROTECT_STATUS, ®); + dev_crit_ratelimited(device->dev, + "CP | Protected mode error| %s | addr=%x\n", + reg & (1 << 24) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2); + break; + case A3XX_INT_CP_AHB_ERROR_HALT: + dev_crit_ratelimited(device->dev, + "ringbuffer AHB error interrupt\n"); + break; + case A3XX_INT_UCHE_OOB_ACCESS: + dev_crit_ratelimited(device->dev, + "UCHE: Out of bounds access\n"); + break; + default: + dev_crit_ratelimited(device->dev, "Unknown interrupt\n"); + } +} + +#define A3XX_INT_MASK \ + ((1 << A3XX_INT_RBBM_AHB_ERROR) | \ + (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \ + (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \ + (1 << A3XX_INT_CP_OPCODE_ERROR) | \ + (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \ + (1 << A3XX_INT_CP_HW_FAULT) | \ + (1 << A3XX_INT_CP_IB1_INT) | \ + (1 << A3XX_INT_CP_IB2_INT) | \ + (1 << A3XX_INT_CP_RB_INT) | \ + (1 << A3XX_INT_CACHE_FLUSH_TS) | \ + (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \ + (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \ + (1 << A3XX_INT_UCHE_OOB_ACCESS)) + +static const struct adreno_irq_funcs a3xx_irq_funcs[32] = { + ADRENO_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */ + ADRENO_IRQ_CALLBACK(NULL), /* 2 - RBBM_REG_TIMEOUT */ + ADRENO_IRQ_CALLBACK(NULL), /* 3 - RBBM_ME_MS_TIMEOUT */ + ADRENO_IRQ_CALLBACK(NULL), /* 4 - RBBM_PFP_MS_TIMEOUT */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */ + ADRENO_IRQ_CALLBACK(NULL), /* 6 - RBBM_VFD_ERROR */ + ADRENO_IRQ_CALLBACK(NULL), /* 7 - CP_SW */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */ + /* 10 - CP_RESERVED_BIT_ERROR */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */ + ADRENO_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 13 - CP_IB2_INT */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 14 - CP_IB1_INT */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */ + /* 16 - CP_REG_PROTECT_FAULT */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), + ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */ + /* 21 - CP_AHB_ERROR_FAULT */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), + ADRENO_IRQ_CALLBACK(NULL), /* 22 - Unused */ + ADRENO_IRQ_CALLBACK(NULL), /* 23 - Unused */ + /* 24 - MISC_HANG_DETECT */ + ADRENO_IRQ_CALLBACK(adreno_hang_int_callback), + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */ +}; + +static struct { + u32 reg; + u32 base; + u32 count; +} a3xx_protected_blocks[] = { + /* RBBM */ + { A3XX_CP_PROTECT_REG_0, 0x0018, 0 }, + { A3XX_CP_PROTECT_REG_0 + 1, 0x0020, 2 }, + { A3XX_CP_PROTECT_REG_0 + 2, 0x0033, 0 }, + { A3XX_CP_PROTECT_REG_0 + 3, 0x0042, 0 }, + { A3XX_CP_PROTECT_REG_0 + 4, 0x0050, 4 }, + { A3XX_CP_PROTECT_REG_0 + 5, 0x0063, 0 }, + { A3XX_CP_PROTECT_REG_0 + 6, 0x0100, 4 }, + /* CP */ + { A3XX_CP_PROTECT_REG_0 + 7, 0x01c0, 5 }, + { A3XX_CP_PROTECT_REG_0 + 8, 0x01ec, 1 }, + { A3XX_CP_PROTECT_REG_0 + 9, 0x01f6, 1 }, + { A3XX_CP_PROTECT_REG_0 + 10, 0x01f8, 2 }, + { A3XX_CP_PROTECT_REG_0 + 11, 0x045e, 2 }, + { A3XX_CP_PROTECT_REG_0 + 12, 0x0460, 4 }, + /* RB */ + { A3XX_CP_PROTECT_REG_0 + 13, 0x0cc0, 0 }, + /* VBIF */ + { A3XX_CP_PROTECT_REG_0 + 14, 0x3000, 6 }, + /* SMMU */ + { A3XX_CP_PROTECT_REG_0 + 15, 0xa000, 12 }, + /* There are no remaining protected mode registers for a3xx */ +}; + +static void a3xx_protect_init(struct kgsl_device *device) +{ + int i; + + kgsl_regwrite(device, A3XX_CP_PROTECT_CTRL, 0x00000007); + + for (i = 0; i < ARRAY_SIZE(a3xx_protected_blocks); i++) { + u32 val = 0x60000000 | + (a3xx_protected_blocks[i].count << 24) | + (a3xx_protected_blocks[i].base << 2); + + kgsl_regwrite(device, a3xx_protected_blocks[i].reg, val); + } +} + +static int a3xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a3xx_core *a3xx_core = to_a3xx_core(adreno_dev); + int ret; + + ret = kgsl_mmu_start(device); + if (ret) + return ret; + + adreno_get_bus_counters(adreno_dev); + adreno_perfcounter_restore(adreno_dev); + + if (adreno_dev->soft_ft_regs) + memset(adreno_dev->soft_ft_regs, 0, + adreno_dev->soft_ft_count << 2); + + adreno_dev->irq_mask = A3XX_INT_MASK; + + /* Set up VBIF registers from the GPU core definition */ + kgsl_regmap_multi_write(&device->regmap, a3xx_core->vbif, + a3xx_core->vbif_count); + + /* Make all blocks contribute to the GPU BUSY perf counter */ + kgsl_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF); + + /* Tune the hystersis counters for SP and CP idle detection */ + kgsl_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10); + kgsl_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10); + + /* + * Enable the RBBM error reporting bits. This lets us get + * useful information on failure + */ + + kgsl_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001); + + /* Enable AHB error reporting */ + kgsl_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF); + + /* Turn on the power counters */ + kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00030000); + + /* + * Turn on hang detection - this spews a lot of useful information + * into the RBBM registers on a hang + */ + kgsl_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL, + (1 << 16) | 0xFFF); + + /* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0). */ + kgsl_regwrite(device, A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001); + + /* Enable VFD to access most of the UCHE (7 ways out of 8) */ + kgsl_regwrite(device, A3XX_UCHE_CACHE_WAYS_VFD, 0x07); + + /* Enable Clock gating */ + kgsl_regwrite(device, A3XX_RBBM_CLOCK_CTL, A3XX_RBBM_CLOCK_CTL_DEFAULT); + + /* Turn on protection */ + a3xx_protect_init(device); + + /* Turn on performance counters */ + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, 0x01); + + kgsl_regwrite(device, A3XX_CP_DEBUG, A3XX_CP_DEBUG_DEFAULT); + + /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */ + kgsl_regwrite(device, A3XX_CP_QUEUE_THRESHOLDS, 0x000E0602); + return 0; +} + +#ifdef CONFIG_QCOM_KGSL_CORESIGHT +static struct adreno_coresight_register a3xx_coresight_registers[] = { + { A3XX_RBBM_DEBUG_BUS_CTL, 0x0001093F }, + { A3XX_RBBM_EXT_TRACE_STOP_CNT, 0x00017fff }, + { A3XX_RBBM_EXT_TRACE_START_CNT, 0x0001000f }, + { A3XX_RBBM_EXT_TRACE_PERIOD_CNT, 0x0001ffff }, + { A3XX_RBBM_EXT_TRACE_CMD, 0x00000001 }, + { A3XX_RBBM_EXT_TRACE_BUS_CTL, 0x89100010 }, + { A3XX_RBBM_DEBUG_BUS_STB_CTL0, 0x00000000 }, + { A3XX_RBBM_DEBUG_BUS_STB_CTL1, 0xFFFFFFFE }, + { A3XX_RBBM_INT_TRACE_BUS_CTL, 0x00201111 }, +}; + +static ADRENO_CORESIGHT_ATTR(config_debug_bus, + &a3xx_coresight_registers[0]); +static ADRENO_CORESIGHT_ATTR(config_trace_stop_cnt, + &a3xx_coresight_registers[1]); +static ADRENO_CORESIGHT_ATTR(config_trace_start_cnt, + &a3xx_coresight_registers[2]); +static ADRENO_CORESIGHT_ATTR(config_trace_period_cnt, + &a3xx_coresight_registers[3]); +static ADRENO_CORESIGHT_ATTR(config_trace_cmd, + &a3xx_coresight_registers[4]); +static ADRENO_CORESIGHT_ATTR(config_trace_bus_ctl, + &a3xx_coresight_registers[5]); + +static struct attribute *a3xx_coresight_attrs[] = { + &coresight_attr_config_debug_bus.attr.attr, + &coresight_attr_config_trace_start_cnt.attr.attr, + &coresight_attr_config_trace_stop_cnt.attr.attr, + &coresight_attr_config_trace_period_cnt.attr.attr, + &coresight_attr_config_trace_cmd.attr.attr, + &coresight_attr_config_trace_bus_ctl.attr.attr, + NULL, +}; + +static const struct attribute_group a3xx_coresight_group = { + .attrs = a3xx_coresight_attrs, +}; + +static const struct attribute_group *a3xx_coresight_groups[] = { + &a3xx_coresight_group, + NULL, +}; + +static struct adreno_coresight a3xx_coresight = { + .registers = a3xx_coresight_registers, + .count = ARRAY_SIZE(a3xx_coresight_registers), + .groups = a3xx_coresight_groups, +}; +#endif + +/* Register offset defines for A3XX */ +static unsigned int a3xx_register_offsets[ADRENO_REG_REGISTER_MAX] = { + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A3XX_CP_RB_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A3XX_CP_RB_RPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A3XX_CP_RB_WPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A3XX_CP_ME_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A3XX_CP_RB_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A3XX_CP_IB1_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A3XX_CP_IB1_BUFSZ), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A3XX_CP_IB2_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A3XX_CP_IB2_BUFSZ), + ADRENO_REG_DEFINE(ADRENO_REG_CP_TIMESTAMP, A3XX_CP_SCRATCH_REG0), + ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG6, A3XX_CP_SCRATCH_REG6), + ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG7, A3XX_CP_SCRATCH_REG7), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A3XX_CP_PROTECT_REG_0), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A3XX_RBBM_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_PWR_1_LO, + A3XX_RBBM_PERFCTR_PWR_1_LO), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A3XX_RBBM_INT_0_MASK), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A3XX_RBBM_CLOCK_CTL), + ADRENO_REG_DEFINE(ADRENO_REG_PA_SC_AA_CONFIG, A3XX_PA_SC_AA_CONFIG), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PM_OVERRIDE2, A3XX_RBBM_PM_OVERRIDE2), + ADRENO_REG_DEFINE(ADRENO_REG_SQ_GPR_MANAGEMENT, A3XX_SQ_GPR_MANAGEMENT), + ADRENO_REG_DEFINE(ADRENO_REG_SQ_INST_STORE_MANAGEMENT, + A3XX_SQ_INST_STORE_MANAGEMENT), + ADRENO_REG_DEFINE(ADRENO_REG_TP0_CHICKEN, A3XX_TP0_CHICKEN), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A3XX_RBBM_SW_RESET_CMD), +}; + +static int _load_firmware(struct kgsl_device *device, const char *fwfile, + void **buf, int *len) +{ + const struct firmware *fw = NULL; + int ret; + + ret = request_firmware(&fw, fwfile, &device->pdev->dev); + + if (ret) { + dev_err(&device->pdev->dev, "request_firmware(%s) failed: %d\n", + fwfile, ret); + return ret; + } + + if (!fw) + return -EINVAL; + + *buf = devm_kmemdup(&device->pdev->dev, fw->data, fw->size, GFP_KERNEL); + *len = fw->size; + + release_firmware(fw); + return (*buf) ? 0 : -ENOMEM; +} + +static int a3xx_microcode_read(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_firmware *pm4_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4); + struct adreno_firmware *pfp_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP); + const struct adreno_a3xx_core *a3xx_core = to_a3xx_core(adreno_dev); + + if (pm4_fw->fwvirt == NULL) { + int len; + void *ptr; + + int ret = _load_firmware(device, + a3xx_core->pm4fw_name, &ptr, &len); + + if (ret) { + dev_err(device->dev, "Failed to read pm4 ucode %s\n", + a3xx_core->pm4fw_name); + return ret; + } + + /* PM4 size is 3 dword aligned plus 1 dword of version */ + if (len % ((sizeof(uint32_t) * 3)) != sizeof(uint32_t)) { + dev_err(device->dev, + "Bad pm4 microcode size: %d\n", + len); + kfree(ptr); + return -ENOMEM; + } + + pm4_fw->size = len / sizeof(uint32_t); + pm4_fw->fwvirt = ptr; + pm4_fw->version = pm4_fw->fwvirt[1]; + } + + if (pfp_fw->fwvirt == NULL) { + int len; + void *ptr; + + int ret = _load_firmware(device, + a3xx_core->pfpfw_name, &ptr, &len); + if (ret) { + dev_err(device->dev, "Failed to read pfp ucode %s\n", + a3xx_core->pfpfw_name); + return ret; + } + + /* PFP size shold be dword aligned */ + if (len % sizeof(uint32_t) != 0) { + dev_err(device->dev, + "Bad PFP microcode size: %d\n", + len); + kfree(ptr); + return -ENOMEM; + } + + pfp_fw->size = len / sizeof(uint32_t); + pfp_fw->fwvirt = ptr; + pfp_fw->version = pfp_fw->fwvirt[1]; + } + + return 0; +} + +static void a3xx_microcode_load(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + size_t pm4_size = adreno_dev->fw[ADRENO_FW_PM4].size; + size_t pfp_size = adreno_dev->fw[ADRENO_FW_PFP].size; + + /* load the CP ucode using AHB writes */ + kgsl_regwrite(device, A3XX_CP_ME_RAM_WADDR, 0); + + kgsl_regmap_bulk_write(&device->regmap, A3XX_CP_ME_RAM_DATA, + &adreno_dev->fw[ADRENO_FW_PM4].fwvirt[1], pm4_size - 1); + + kgsl_regwrite(device, A3XX_CP_PFP_UCODE_ADDR, 0); + + kgsl_regmap_bulk_write(&device->regmap, A3XX_CP_PFP_UCODE_DATA, + &adreno_dev->fw[ADRENO_FW_PFP].fwvirt[1], pfp_size - 1); +} + +#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) +static void a3xx_clk_set_options(struct adreno_device *adreno_dev, + const char *name, struct clk *clk, bool on) +{ + if (!clk || !adreno_is_a306a(adreno_dev)) + return; + + /* Handle clock settings for GFX PSCBCs */ + if (on) { + if (!strcmp(name, "mem_iface_clk")) { + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH); + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_MEM); + } else if (!strcmp(name, "core_clk")) { + qcom_clk_set_flags(clk, CLKFLAG_RETAIN_PERIPH); + qcom_clk_set_flags(clk, CLKFLAG_RETAIN_MEM); + } + } else { + if (!strcmp(name, "core_clk")) { + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH); + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_MEM); + } + } +} +#endif + +static u64 a3xx_read_alwayson(struct adreno_device *adreno_dev) +{ + /* A3XX does not have a always on timer */ + return 0; +} + +static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret; + u32 status; + + /* Get the current interrupt status */ + kgsl_regread(device, A3XX_RBBM_INT_0_STATUS, &status); + + /* + * Clear all the interrupt bits except A3XX_INT_RBBM_AHB_ERROR. + * The interrupt will stay asserted until it is cleared by the handler + * so don't touch it yet to avoid a storm + */ + + kgsl_regwrite(device, A3XX_RBBM_INT_CLEAR_CMD, + status & ~A3XX_INT_RBBM_AHB_ERROR); + + /* Call the helper to execute the callbacks */ + ret = adreno_irq_callbacks(adreno_dev, a3xx_irq_funcs, status); + + trace_kgsl_a3xx_irq_status(adreno_dev, status); + + /* Now clear AHB_ERROR if it was set */ + if (status & A3XX_INT_RBBM_AHB_ERROR) + kgsl_regwrite(device, A3XX_RBBM_INT_CLEAR_CMD, + A3XX_INT_RBBM_AHB_ERROR); + + return ret; +} + +static bool a3xx_hw_isidle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status; + + kgsl_regread(device, A3XX_RBBM_STATUS, &status); + + if (status & 0x7ffffffe) + return false; + + kgsl_regread(device, A3XX_RBBM_INT_0_STATUS, &status); + + /* Return busy if a interrupt is pending */ + return !((status & adreno_dev->irq_mask) || + atomic_read(&adreno_dev->pending_irq_refcnt)); +} + +static int a3xx_clear_pending_transactions(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 mask = A30X_VBIF_XIN_HALT_CTRL0_MASK; + int ret; + + kgsl_regwrite(device, A3XX_VBIF_XIN_HALT_CTRL0, mask); + ret = adreno_wait_for_halt_ack(device, A3XX_VBIF_XIN_HALT_CTRL1, mask); + kgsl_regwrite(device, A3XX_VBIF_XIN_HALT_CTRL0, 0); + + return ret; +} + +static bool a3xx_is_hw_collapsible(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* + * Skip power collapse for A304, if power ctrl flag is set to + * non zero. As A304 soft_reset will not work, power collapse + * needs to disable to avoid soft_reset. + */ + if (adreno_is_a304(adreno_dev) && device->pwrctrl.ctrl_flags) + return false; + + return adreno_isidle(adreno_dev); +} + +static void a3xx_power_stats(struct adreno_device *adreno_dev, + struct kgsl_power_stats *stats) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_busy_data *busy = &adreno_dev->busy_data; + s64 freq = kgsl_pwrctrl_active_freq(&device->pwrctrl) / 1000000; + u64 gpu_busy; + + /* Set the GPU busy counter for frequency scaling */ + gpu_busy = counter_delta(device, A3XX_RBBM_PERFCTR_PWR_1_LO, + &busy->gpu_busy); + + stats->busy_time = gpu_busy / freq; + + if (!device->pwrctrl.bus_control) + return; + + stats->ram_time = counter_delta(device, adreno_dev->ram_cycles_lo, + &busy->bif_ram_cycles); + + stats->ram_wait = counter_delta(device, adreno_dev->starved_ram_lo, + &busy->bif_starved_ram); +} + +static int a3xx_setproperty(struct kgsl_device_private *dev_priv, + u32 type, void __user *value, u32 sizebytes) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 enable; + + if (type != KGSL_PROP_PWRCTRL) + return -ENODEV; + + if (sizebytes != sizeof(enable)) + return -EINVAL; + + if (copy_from_user(&enable, value, sizeof(enable))) + return -EFAULT; + + mutex_lock(&device->mutex); + if (enable) { + device->pwrctrl.ctrl_flags = 0; + + if (!adreno_active_count_get(adreno_dev)) { + a3xx_soft_fault_detect_start(adreno_dev); + adreno_active_count_put(adreno_dev); + } + + kgsl_pwrscale_enable(device); + } else { + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + device->pwrctrl.ctrl_flags = KGSL_PWR_ON; + + a3xx_soft_fault_detect_stop(adreno_dev); + kgsl_pwrscale_disable(device, true); + } + mutex_unlock(&device->mutex); + + return 0; +} + +const struct adreno_gpudev adreno_a3xx_gpudev = { + .reg_offsets = a3xx_register_offsets, + .irq_handler = a3xx_irq_handler, + .probe = a3xx_probe, + .rb_start = a3xx_rb_start, + .init = a3xx_init, + .start = a3xx_start, + .snapshot = a3xx_snapshot, +#ifdef CONFIG_QCOM_KGSL_CORESIGHT + .coresight = {&a3xx_coresight}, +#endif +#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) + .clk_set_options = a3xx_clk_set_options, +#endif + .read_alwayson = a3xx_read_alwayson, + .hw_isidle = a3xx_hw_isidle, + .power_ops = &adreno_power_operations, + .clear_pending_transactions = a3xx_clear_pending_transactions, + .ringbuffer_submitcmd = a3xx_ringbuffer_submitcmd, + .is_hw_collapsible = a3xx_is_hw_collapsible, + .power_stats = a3xx_power_stats, + .setproperty = a3xx_setproperty, + .remove = a3xx_remove, +}; diff --git a/adreno_a3xx.h b/adreno_a3xx.h new file mode 100644 index 0000000000..d1e2e908a4 --- /dev/null +++ b/adreno_a3xx.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013-2016, 2019-2020, The Linux Foundation. All rights reserved. + */ +#ifndef __A3XX_H +#define __A3XX_H + +#include "a3xx_reg.h" +/** + * struct adreno_a3xx_core - a3xx specific GPU core definitions + */ +struct adreno_a3xx_core { + /** @base: Container for the generic &struct adreno_gpu_core */ + struct adreno_gpu_core base; + /** pm4fw_name: Name of the PM4 microcode file */ + const char *pm4fw_name; + /** pfpfw_name: Name of the PFP microcode file */ + const char *pfpfw_name; + /** @vbif: List of registers and values to write for VBIF */ + const struct kgsl_regmap_list *vbif; + /** @vbif_count: Number of registers in @vbif */ + u32 vbif_count; +}; + +struct adreno_device; + +/** + * to_a3xx_core - return the a3xx specific GPU core struct + * @adreno_dev: An Adreno GPU device handle + * + * Returns: + * A pointer to the a3xx specific GPU core struct + */ +static inline const struct adreno_a3xx_core * +to_a3xx_core(struct adreno_device *adreno_dev) +{ + const struct adreno_gpu_core *core = adreno_dev->gpucore; + + return container_of(core, struct adreno_a3xx_core, base); +} + +void a3xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + +extern const struct adreno_perfcounters adreno_a3xx_perfcounters; + +/** + * a3xx_ringbuffer_init - Initialize the ringbuffer + * @adreno_dev: An Adreno GPU handle + * + * Initialize the ringbuffer for a3xx. + * Return: 0 on success or negative on failure + */ +int a3xx_ringbuffer_init(struct adreno_device *adreno_dev); + +/** + * a3xx_ringbuffer_submitcmd - Submit a user command to the ringbuffer + * @adreno_dev: An Adreno GPU handle + * @cmdobj: Pointer to a user command object + * @flags: Internal submit flags + * @time: Optional pointer to a adreno_submit_time container + * + * Return: 0 on success or negative on failure + */ +int a3xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time); + +#endif /*__A3XX_H */ diff --git a/adreno_a3xx_perfcounter.c b/adreno_a3xx_perfcounter.c new file mode 100644 index 0000000000..a525fef97b --- /dev/null +++ b/adreno_a3xx_perfcounter.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_a3xx.h" +#include "adreno_perfcounter.h" +#include "kgsl_device.h" + +/* Bit flag for RBMM_PERFCTR_CTL */ +#define RBBM_PERFCTR_CTL_ENABLE 0x00000001 +#define VBIF2_PERF_CNT_SEL_MASK 0x7F +/* offset of clear register from select register */ +#define VBIF2_PERF_CLR_REG_SEL_OFF 8 +/* offset of enable register from select register */ +#define VBIF2_PERF_EN_REG_SEL_OFF 16 +/* offset of clear register from the enable register */ +#define VBIF2_PERF_PWR_CLR_REG_EN_OFF 8 + +static void a3xx_counter_load(struct adreno_device *adreno_dev, + struct adreno_perfcount_register *reg) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int index = reg->load_bit / 32; + u32 enable = BIT(reg->load_bit & 31); + + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_VALUE_LO, + lower_32_bits(reg->value)); + + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_VALUE_HI, + upper_32_bits(reg->value)); + + if (index == 0) + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD0, enable); + else + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD1, enable); +} + +static int a3xx_counter_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + kgsl_regwrite(device, reg->select, countable); + reg->value = 0; + + return 0; +} + +static u64 a3xx_counter_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 val, hi, lo; + + kgsl_regread(device, A3XX_RBBM_PERFCTR_CTL, &val); + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, + val & ~RBBM_PERFCTR_CTL_ENABLE); + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val); + + return (((u64) hi) << 32) | lo; +} + +static int a3xx_counter_pwr_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + return 0; +} + +static u64 a3xx_counter_pwr_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 val, hi, lo; + + kgsl_regread(device, A3XX_RBBM_RBBM_CTL, &val); + + /* Freeze the counter so we can read it */ + if (!counter) + kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val & ~0x10000); + else + kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val & ~0x20000); + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val); + + return ((((u64) hi) << 32) | lo) + reg->value; +} + +static int a3xx_counter_vbif_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + if (countable > VBIF2_PERF_CNT_SEL_MASK) + return -EINVAL; + + /* + * Write 1, followed by 0 to CLR register for + * clearing the counter + */ + kgsl_regwrite(device, + reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 1); + kgsl_regwrite(device, + reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 0); + kgsl_regwrite(device, + reg->select, countable & VBIF2_PERF_CNT_SEL_MASK); + /* enable reg is 8 DWORDS before select reg */ + kgsl_regwrite(device, + reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1); + + kgsl_regwrite(device, reg->select, countable); + + reg->value = 0; + return 0; +} + +static u64 a3xx_counter_vbif_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 hi, lo; + + /* freeze counter */ + kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 0); + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + /* un-freeze counter */ + kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1); + + return ((((u64) hi) << 32) | lo) + reg->value; +} + +static int a3xx_counter_vbif_pwr_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + /* + * Write 1, followed by 0 to CLR register for + * clearing the counter + */ + kgsl_regwrite(device, reg->select + + VBIF2_PERF_PWR_CLR_REG_EN_OFF, 1); + kgsl_regwrite(device, reg->select + + VBIF2_PERF_PWR_CLR_REG_EN_OFF, 0); + kgsl_regwrite(device, reg->select, 1); + + reg->value = 0; + return 0; +} + +static u64 a3xx_counter_vbif_pwr_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 hi, lo; + + /* freeze counter */ + kgsl_regwrite(device, reg->select, 0); + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + /* un-freeze counter */ + kgsl_regwrite(device, reg->select, 1); + + return ((((u64) hi) << 32) | lo) + reg->value; +} + +/* + * Define the available perfcounter groups - these get used by + * adreno_perfcounter_get and adreno_perfcounter_put + */ + +static struct adreno_perfcount_register a3xx_perfcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_CP_0_LO, + A3XX_RBBM_PERFCTR_CP_0_HI, 0, A3XX_CP_PERFCOUNTER_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_rbbm[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO, + A3XX_RBBM_PERFCTR_RBBM_0_HI, 1, A3XX_RBBM_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO, + A3XX_RBBM_PERFCTR_RBBM_1_HI, 2, A3XX_RBBM_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_pc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_0_LO, + A3XX_RBBM_PERFCTR_PC_0_HI, 3, A3XX_PC_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_1_LO, + A3XX_RBBM_PERFCTR_PC_1_HI, 4, A3XX_PC_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_2_LO, + A3XX_RBBM_PERFCTR_PC_2_HI, 5, A3XX_PC_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_3_LO, + A3XX_RBBM_PERFCTR_PC_3_HI, 6, A3XX_PC_PERFCOUNTER3_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_vfd[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_0_LO, + A3XX_RBBM_PERFCTR_VFD_0_HI, 7, A3XX_VFD_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_1_LO, + A3XX_RBBM_PERFCTR_VFD_1_HI, 8, A3XX_VFD_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_hlsq[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO, + A3XX_RBBM_PERFCTR_HLSQ_0_HI, 9, + A3XX_HLSQ_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO, + A3XX_RBBM_PERFCTR_HLSQ_1_HI, 10, + A3XX_HLSQ_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO, + A3XX_RBBM_PERFCTR_HLSQ_2_HI, 11, + A3XX_HLSQ_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO, + A3XX_RBBM_PERFCTR_HLSQ_3_HI, 12, + A3XX_HLSQ_PERFCOUNTER3_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO, + A3XX_RBBM_PERFCTR_HLSQ_4_HI, 13, + A3XX_HLSQ_PERFCOUNTER4_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO, + A3XX_RBBM_PERFCTR_HLSQ_5_HI, 14, + A3XX_HLSQ_PERFCOUNTER5_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_vpc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_0_LO, + A3XX_RBBM_PERFCTR_VPC_0_HI, 15, A3XX_VPC_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_1_LO, + A3XX_RBBM_PERFCTR_VPC_1_HI, 16, A3XX_VPC_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_tse[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_0_LO, + A3XX_RBBM_PERFCTR_TSE_0_HI, 17, A3XX_GRAS_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_1_LO, + A3XX_RBBM_PERFCTR_TSE_1_HI, 18, A3XX_GRAS_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_ras[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_0_LO, + A3XX_RBBM_PERFCTR_RAS_0_HI, 19, A3XX_GRAS_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_1_LO, + A3XX_RBBM_PERFCTR_RAS_1_HI, 20, A3XX_GRAS_PERFCOUNTER3_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_uche[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO, + A3XX_RBBM_PERFCTR_UCHE_0_HI, 21, + A3XX_UCHE_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO, + A3XX_RBBM_PERFCTR_UCHE_1_HI, 22, + A3XX_UCHE_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO, + A3XX_RBBM_PERFCTR_UCHE_2_HI, 23, + A3XX_UCHE_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO, + A3XX_RBBM_PERFCTR_UCHE_3_HI, 24, + A3XX_UCHE_PERFCOUNTER3_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO, + A3XX_RBBM_PERFCTR_UCHE_4_HI, 25, + A3XX_UCHE_PERFCOUNTER4_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO, + A3XX_RBBM_PERFCTR_UCHE_5_HI, 26, + A3XX_UCHE_PERFCOUNTER5_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_0_LO, + A3XX_RBBM_PERFCTR_TP_0_HI, 27, A3XX_TP_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_1_LO, + A3XX_RBBM_PERFCTR_TP_1_HI, 28, A3XX_TP_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_2_LO, + A3XX_RBBM_PERFCTR_TP_2_HI, 29, A3XX_TP_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_3_LO, + A3XX_RBBM_PERFCTR_TP_3_HI, 30, A3XX_TP_PERFCOUNTER3_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_4_LO, + A3XX_RBBM_PERFCTR_TP_4_HI, 31, A3XX_TP_PERFCOUNTER4_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_5_LO, + A3XX_RBBM_PERFCTR_TP_5_HI, 32, A3XX_TP_PERFCOUNTER5_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_0_LO, + A3XX_RBBM_PERFCTR_SP_0_HI, 33, A3XX_SP_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_1_LO, + A3XX_RBBM_PERFCTR_SP_1_HI, 34, A3XX_SP_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_2_LO, + A3XX_RBBM_PERFCTR_SP_2_HI, 35, A3XX_SP_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_3_LO, + A3XX_RBBM_PERFCTR_SP_3_HI, 36, A3XX_SP_PERFCOUNTER3_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_4_LO, + A3XX_RBBM_PERFCTR_SP_4_HI, 37, A3XX_SP_PERFCOUNTER4_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_5_LO, + A3XX_RBBM_PERFCTR_SP_5_HI, 38, A3XX_SP_PERFCOUNTER5_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_6_LO, + A3XX_RBBM_PERFCTR_SP_6_HI, 39, A3XX_SP_PERFCOUNTER6_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_7_LO, + A3XX_RBBM_PERFCTR_SP_7_HI, 40, A3XX_SP_PERFCOUNTER7_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_rb[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_0_LO, + A3XX_RBBM_PERFCTR_RB_0_HI, 41, A3XX_RB_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_1_LO, + A3XX_RBBM_PERFCTR_RB_1_HI, 42, A3XX_RB_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_0_LO, + A3XX_RBBM_PERFCTR_PWR_0_HI, -1, 0 }, + /* + * A3XX_RBBM_PERFCTR_PWR_1_LO is used for frequency scaling and removed + * from the pool of available counters + */ +}; + +static struct adreno_perfcount_register a3xx_perfcounters_vbif2[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW0, + A3XX_VBIF2_PERF_CNT_HIGH0, -1, A3XX_VBIF2_PERF_CNT_SEL0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW1, + A3XX_VBIF2_PERF_CNT_HIGH1, -1, A3XX_VBIF2_PERF_CNT_SEL1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW2, + A3XX_VBIF2_PERF_CNT_HIGH2, -1, A3XX_VBIF2_PERF_CNT_SEL2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW3, + A3XX_VBIF2_PERF_CNT_HIGH3, -1, A3XX_VBIF2_PERF_CNT_SEL3 }, +}; +/* + * Placing EN register in select field since vbif perf counters + * don't have select register to program + */ +static struct adreno_perfcount_register a3xx_perfcounters_vbif2_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, + 0, A3XX_VBIF2_PERF_PWR_CNT_LOW0, + A3XX_VBIF2_PERF_PWR_CNT_HIGH0, -1, + A3XX_VBIF2_PERF_PWR_CNT_EN0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, + 0, A3XX_VBIF2_PERF_PWR_CNT_LOW1, + A3XX_VBIF2_PERF_PWR_CNT_HIGH1, -1, + A3XX_VBIF2_PERF_PWR_CNT_EN1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, + 0, A3XX_VBIF2_PERF_PWR_CNT_LOW2, + A3XX_VBIF2_PERF_PWR_CNT_HIGH2, -1, + A3XX_VBIF2_PERF_PWR_CNT_EN2 }, +}; + +#define A3XX_PERFCOUNTER_GROUP(offset, name, enable, read, load) \ + ADRENO_PERFCOUNTER_GROUP(a3xx, offset, name, enable, read, load) + +#define A3XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags, enable, read, load) \ + ADRENO_PERFCOUNTER_GROUP_FLAGS(a3xx, offset, name, flags, enable, read, load) + +#define A3XX_REGULAR_PERFCOUNTER_GROUP(offset, name) \ + A3XX_PERFCOUNTER_GROUP(offset, name, a3xx_counter_enable,\ + a3xx_counter_read, a3xx_counter_load) + +static const struct adreno_perfcount_group +a3xx_perfcounter_groups[KGSL_PERFCOUNTER_GROUP_MAX] = { + A3XX_REGULAR_PERFCOUNTER_GROUP(CP, cp), + A3XX_REGULAR_PERFCOUNTER_GROUP(RBBM, rbbm), + A3XX_REGULAR_PERFCOUNTER_GROUP(PC, pc), + A3XX_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), + A3XX_REGULAR_PERFCOUNTER_GROUP(HLSQ, hlsq), + A3XX_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), + A3XX_REGULAR_PERFCOUNTER_GROUP(TSE, tse), + A3XX_REGULAR_PERFCOUNTER_GROUP(RAS, ras), + A3XX_REGULAR_PERFCOUNTER_GROUP(UCHE, uche), + A3XX_REGULAR_PERFCOUNTER_GROUP(TP, tp), + A3XX_REGULAR_PERFCOUNTER_GROUP(SP, sp), + A3XX_REGULAR_PERFCOUNTER_GROUP(RB, rb), + A3XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED, + a3xx_counter_pwr_enable, a3xx_counter_pwr_read, NULL), + A3XX_PERFCOUNTER_GROUP(VBIF, vbif2, + a3xx_counter_vbif_enable, a3xx_counter_vbif_read, NULL), + A3XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif2_pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED, + a3xx_counter_vbif_pwr_enable, a3xx_counter_vbif_pwr_read, + NULL), + +}; + +const struct adreno_perfcounters adreno_a3xx_perfcounters = { + a3xx_perfcounter_groups, + ARRAY_SIZE(a3xx_perfcounter_groups), +}; diff --git a/adreno_a3xx_ringbuffer.c b/adreno_a3xx_ringbuffer.c new file mode 100644 index 0000000000..9222af6b7c --- /dev/null +++ b/adreno_a3xx_ringbuffer.c @@ -0,0 +1,454 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_a3xx.h" +#include "adreno_pm4types.h" +#include "adreno_ringbuffer.h" +#include "adreno_trace.h" +#include "kgsl_trace.h" + +static int a3xx_wait_reg(unsigned int *cmds, unsigned int addr, + unsigned int val, unsigned int mask, + unsigned int interval) +{ + cmds[0] = cp_type3_packet(CP_WAIT_REG_EQ, 4); + cmds[1] = addr; + cmds[2] = val; + cmds[3] = mask; + cmds[4] = interval; + + return 5; +} + +static int a3xx_vbif_lock(unsigned int *cmds) +{ + int count; + + /* + * glue commands together until next + * WAIT_FOR_ME + */ + count = a3xx_wait_reg(cmds, A3XX_CP_WFI_PEND_CTR, + 1, 0xFFFFFFFF, 0xF); + + /* MMU-500 VBIF stall */ + cmds[count++] = cp_type3_packet(CP_REG_RMW, 3); + cmds[count++] = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0; + /* AND to unmask the HALT bit */ + cmds[count++] = ~(VBIF_RECOVERABLE_HALT_CTRL); + /* OR to set the HALT bit */ + cmds[count++] = 0x1; + + /* Wait for acknowledgment */ + count += a3xx_wait_reg(&cmds[count], + A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1, + 1, 0xFFFFFFFF, 0xF); + + return count; +} + +static int a3xx_vbif_unlock(unsigned int *cmds) +{ + /* MMU-500 VBIF unstall */ + cmds[0] = cp_type3_packet(CP_REG_RMW, 3); + cmds[1] = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0; + /* AND to unmask the HALT bit */ + cmds[2] = ~(VBIF_RECOVERABLE_HALT_CTRL); + /* OR to reset the HALT bit */ + cmds[3] = 0; + + /* release all commands since _vbif_lock() with wait_for_me */ + cmds[4] = cp_type3_packet(CP_WAIT_FOR_ME, 1); + cmds[5] = 0; + + return 6; +} + +#define A3XX_GPU_OFFSET 0xa000 + +static int a3xx_cp_smmu_reg(unsigned int *cmds, + u32 reg, + unsigned int num) +{ + cmds[0] = cp_type3_packet(CP_REG_WR_NO_CTXT, num + 1); + cmds[1] = (A3XX_GPU_OFFSET + reg) >> 2; + + return 2; +} + +/* This function is only needed for A3xx targets */ +static int a3xx_tlbiall(unsigned int *cmds) +{ + unsigned int tlbstatus = (A3XX_GPU_OFFSET + + KGSL_IOMMU_CTX_TLBSTATUS) >> 2; + int count; + + count = a3xx_cp_smmu_reg(cmds, KGSL_IOMMU_CTX_TLBIALL, 1); + cmds[count++] = 1; + + count += a3xx_cp_smmu_reg(&cmds[count], KGSL_IOMMU_CTX_TLBSYNC, 1); + cmds[count++] = 0; + + count += a3xx_wait_reg(&cmds[count], tlbstatus, 0, + KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE, 0xF); + + return count; +} + +/* offset at which a nop command is placed in setstate */ +#define KGSL_IOMMU_SETSTATE_NOP_OFFSET 1024 + +static int a3xx_rb_pagetable_switch(struct adreno_device *adreno_dev, + struct kgsl_pagetable *pagetable, u32 *cmds) +{ + u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + int count = 0; + + /* + * Adding an indirect buffer ensures that the prefetch stalls until + * the commands in indirect buffer have completed. We need to stall + * prefetch with a nop indirect buffer when updating pagetables + * because it provides stabler synchronization. + */ + cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1); + cmds[count++] = 0; + + cmds[count++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); + cmds[count++] = lower_32_bits(iommu->setstate->gpuaddr); + cmds[count++] = 2; + + cmds[count++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + cmds[count++] = 0; + + cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1); + cmds[count++] = 0; + + count += a3xx_vbif_lock(&cmds[count]); + + count += a3xx_cp_smmu_reg(&cmds[count], KGSL_IOMMU_CTX_TTBR0, 2); + cmds[count++] = lower_32_bits(ttbr0); + cmds[count++] = upper_32_bits(ttbr0); + + count += a3xx_vbif_unlock(&cmds[count]); + + count += a3xx_tlbiall(&cmds[count]); + + /* wait for me to finish the TLBI */ + cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1); + cmds[count++] = 0; + cmds[count++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + cmds[count++] = 0; + + /* Invalidate the state */ + cmds[count++] = cp_type3_packet(CP_INVALIDATE_STATE, 1); + cmds[count++] = 0x7ffff; + + return count; +} + +#define RB_SOPTIMESTAMP(device, rb) \ + MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp) +#define CTXT_SOPTIMESTAMP(device, drawctxt) \ + MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp) + +#define RB_EOPTIMESTAMP(device, rb) \ + MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp) +#define CTXT_EOPTIMESTAMP(device, drawctxt) \ + MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp) + +int a3xx_ringbuffer_init(struct adreno_device *adreno_dev) +{ + adreno_dev->num_ringbuffers = 1; + + adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]); + + return adreno_ringbuffer_setup(adreno_dev, + &adreno_dev->ringbuffers[0], 0); +} + +#define A3XX_SUBMIT_MAX 55 + +static int a3xx_ringbuffer_addcmds(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 flags, u32 *in, u32 dwords, u32 timestamp, + struct adreno_submit_time *time) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 size = A3XX_SUBMIT_MAX + dwords; + u32 *cmds, index = 0; + u64 profile_gpuaddr; + u32 profile_dwords; + + if (adreno_drawctxt_detached(drawctxt)) + return -ENOENT; + + if (adreno_gpu_fault(adreno_dev) != 0) + return -EPROTO; + + rb->timestamp++; + + if (drawctxt) + drawctxt->internal_timestamp = rb->timestamp; + + cmds = adreno_ringbuffer_allocspace(rb, size); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + /* Identify the start of a command */ + cmds[index++] = cp_type3_packet(CP_NOP, 1); + cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER; + + if (IS_PWRON_FIXUP(flags)) { + cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1); + cmds[index++] = 0; + + cmds[index++] = cp_type3_packet(CP_NOP, 1); + cmds[index++] = PWRON_FIXUP_IDENTIFIER; + + cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); + cmds[index++] = lower_32_bits(adreno_dev->pwron_fixup->gpuaddr); + cmds[index++] = adreno_dev->pwron_fixup_dwords; + + cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1); + cmds[index++] = 0; + } + + profile_gpuaddr = adreno_profile_preib_processing(adreno_dev, + drawctxt, &profile_dwords); + + if (profile_gpuaddr) { + cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); + cmds[index++] = lower_32_bits(profile_gpuaddr); + cmds[index++] = profile_dwords; + } + + if (drawctxt) { + cmds[index++] = cp_type3_packet(CP_MEM_WRITE, 2); + cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = timestamp; + } + + cmds[index++] = cp_type3_packet(CP_MEM_WRITE, 2); + cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + + if (IS_NOTPROTECTED(flags)) { + cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1); + cmds[index++] = 0; + } + + memcpy(&cmds[index], in, dwords << 2); + index += dwords; + + if (IS_NOTPROTECTED(flags)) { + cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1); + cmds[index++] = 1; + } + + /* + * Flush HLSQ lazy updates to make sure there are no resourses pending + * for indirect loads after the timestamp + */ + + cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 1); + cmds[index++] = 0x07; /* HLSQ FLUSH */ + cmds[index++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + cmds[index++] = 0; + + profile_gpuaddr = adreno_profile_postib_processing(adreno_dev, + drawctxt, &profile_dwords); + + if (profile_gpuaddr) { + cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); + cmds[index++] = lower_32_bits(profile_gpuaddr); + cmds[index++] = profile_dwords; + } + + /* + * If this is an internal command, just write the ringbuffer timestamp, + * otherwise, write both + */ + if (!drawctxt) { + cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3); + cmds[index++] = CACHE_FLUSH_TS | (1 << 31); + cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + } else { + cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3); + cmds[index++] = CACHE_FLUSH_TS | (1 << 31); + cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = timestamp; + + cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3); + cmds[index++] = CACHE_FLUSH_TS; + cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + } + + /* Trigger a context rollover */ + cmds[index++] = cp_type3_packet(CP_SET_CONSTANT, 2); + cmds[index++] = (4 << 16) | (A3XX_HLSQ_CL_KERNEL_GROUP_X_REG - 0x2000); + cmds[index++] = 0; + + if (IS_WFI(flags)) { + cmds[index++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + cmds[index++] = 0; + } + + /* Adjust the thing for the number of bytes we actually wrote */ + rb->_wptr -= (size - index); + + kgsl_pwrscale_busy(device); + kgsl_regwrite(device, A3XX_CP_RB_WPTR, rb->_wptr); + rb->wptr = rb->_wptr; + + return 0; +} + +static int a3xx_rb_context_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_pagetable *pagetable = + adreno_drawctxt_get_pagetable(drawctxt); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int count = 0; + u32 cmds[64]; + + if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) + count += a3xx_rb_pagetable_switch(adreno_dev, pagetable, cmds); + + cmds[count++] = cp_type3_packet(CP_NOP, 1); + cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER; + + cmds[count++] = cp_type3_packet(CP_MEM_WRITE, 2); + cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, + current_context)); + cmds[count++] = drawctxt->base.id; + + cmds[count++] = cp_type3_packet(CP_MEM_WRITE, 2); + cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, current_context)); + cmds[count++] = drawctxt->base.id; + + cmds[count++] = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); + cmds[count++] = 0; + cmds[count++] = 0x90000000; + + return a3xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, + cmds, count, 0, NULL); +} + +static int a3xx_drawctxt_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (rb->drawctxt_active == drawctxt) + return 0; + + if (kgsl_context_detached(&drawctxt->base)) + return -ENOENT; + + if (!_kgsl_context_get(&drawctxt->base)) + return -ENOENT; + + trace_adreno_drawctxt_switch(rb, drawctxt); + + a3xx_rb_context_switch(adreno_dev, rb, drawctxt); + + /* Release the current drawctxt as soon as the new one is switched */ + adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active, + rb, rb->timestamp); + + rb->drawctxt_active = drawctxt; + return 0; +} + +#define A3XX_COMMAND_DWORDS 4 + +int a3xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + struct adreno_ringbuffer *rb = drawctxt->rb; + int ret = 0, numibs = 0, index = 0; + u32 *cmds; + + /* Count the number of IBs (if we are not skipping) */ + if (!IS_SKIP(flags)) { + struct list_head *tmp; + + list_for_each(tmp, &cmdobj->cmdlist) + numibs++; + } + + cmds = kmalloc((A3XX_COMMAND_DWORDS + (numibs * 4)) << 2, GFP_KERNEL); + if (!cmds) { + ret = -ENOMEM; + goto done; + } + + cmds[index++] = cp_type3_packet(CP_NOP, 1); + cmds[index++] = START_IB_IDENTIFIER; + + if (numibs) { + struct kgsl_memobj_node *ib; + + list_for_each_entry(ib, &cmdobj->cmdlist, node) { + if (ib->priv & MEMOBJ_SKIP || + (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE + && !IS_PREAMBLE(flags))) + cmds[index++] = cp_type3_packet(CP_NOP, 3); + + cmds[index++] = + cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); + cmds[index++] = lower_32_bits(ib->gpuaddr); + cmds[index++] = ib->size >> 2; + } + } + + cmds[index++] = cp_type3_packet(CP_NOP, 1); + cmds[index++] = END_IB_IDENTIFIER; + + ret = a3xx_drawctxt_switch(adreno_dev, rb, drawctxt); + + /* + * In the unlikely event of an error in the drawctxt switch, + * treat it like a hang + */ + if (ret) { + /* + * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it, + * the upper layers know how to handle it + */ + if (ret != -ENOSPC && ret != -ENOENT) + dev_err(device->dev, + "Unable to switch draw context: %d\n", + ret); + goto done; + } + + adreno_drawobj_set_constraint(device, drawobj); + + ret = a3xx_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt, + flags, cmds, index, drawobj->timestamp, NULL); + +done: + trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs, + drawobj->timestamp, drawobj->flags, ret, drawctxt->type); + + kfree(cmds); + return ret; +} diff --git a/adreno_a3xx_snapshot.c b/adreno_a3xx_snapshot.c new file mode 100644 index 0000000000..b43eb75a31 --- /dev/null +++ b/adreno_a3xx_snapshot.c @@ -0,0 +1,448 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2012-2017,2019-2020, The Linux Foundation. All rights reserved. + */ + +#include + +#include "adreno.h" +#include "adreno_a3xx.h" +#include "adreno_snapshot.h" +#include "kgsl_device.h" + +/* + * Set of registers to dump for A3XX on snapshot. + * Registers in pairs - first value is the start offset, second + * is the stop offset (inclusive) + */ + +static const unsigned int a3xx_registers[] = { + 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027, + 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c, + 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5, + 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1, + 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd, + 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f6, 0x01f8, 0x01f9, + 0x01fc, 0x01ff, + 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f, + 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f, + 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e, + 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f, + 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7, + 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, + 0x0e41, 0x0e45, 0x0e64, 0x0e65, + 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7, + 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09, + 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069, + 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075, + 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109, + 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115, + 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0, + 0x2240, 0x227e, + 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8, + 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7, + 0x22ff, 0x22ff, 0x2340, 0x2343, + 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d, + 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472, + 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef, + 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511, + 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed, + 0x25f0, 0x25f0, + 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce, + 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec, + 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, + 0x300C, 0x300E, 0x301C, 0x301D, + 0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036, + 0x303C, 0x303C, 0x305E, 0x305F, +}; + +/* Removed the following HLSQ register ranges from being read during + * fault tolerance since reading the registers may cause the device to hang: + */ +static const unsigned int a3xx_hlsq_registers[] = { + 0x0e00, 0x0e05, 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, + 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, + 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a, +}; + +/* Shader memory size in words */ +#define SHADER_MEMORY_SIZE 0x4000 + +/** + * _rbbm_debug_bus_read - Helper function to read data from the RBBM + * debug bus. + * @device - GPU device to read/write registers + * @block_id - Debug bus block to read from + * @index - Index in the debug bus block to read + * @ret - Value of the register read + */ +static void _rbbm_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index, unsigned int *val) +{ + unsigned int block = (block_id << 8) | 1 << 16; + + kgsl_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, block | index); + kgsl_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS, val); +} + +/** + * a3xx_snapshot_shader_memory - Helper function to dump the GPU shader + * memory to the snapshot buffer. + * @device: GPU device whose shader memory is to be dumped + * @buf: Pointer to binary snapshot data blob being made + * @remain: Number of remaining bytes in the snapshot blob + * @priv: Unused parameter + * + */ +static size_t a3xx_snapshot_shader_memory(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + void *data = buf + sizeof(*header); + unsigned int shader_read_len = SHADER_MEMORY_SIZE; + + if (remain < DEBUG_SECTION_SZ(shader_read_len)) { + SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_SHADER_MEMORY; + header->size = shader_read_len; + + /* Map shader memory to kernel, for dumping */ + if (IS_ERR_OR_NULL(device->shader_mem_virt)) { + struct resource *res; + + res = platform_get_resource_byname(device->pdev, + IORESOURCE_MEM, "kgsl_3d0_shader_memory"); + + if (res) + device->shader_mem_virt = + devm_ioremap_resource(&device->pdev->dev, res); + } + + if (IS_ERR_OR_NULL(device->shader_mem_virt)) { + dev_err(device->dev, "Unable to map the shader memory\n"); + return 0; + } + + memcpy_fromio(data, device->shader_mem_virt, shader_read_len << 2); + + return DEBUG_SECTION_SZ(shader_read_len); +} + +static size_t a3xx_snapshot_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header + = (struct kgsl_snapshot_debugbus *)buf; + struct adreno_debugbus_block *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + size_t size; + + size = (0x40 * sizeof(unsigned int)) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = block->block_id; + header->count = 0x40; + + for (i = 0; i < 0x40; i++) + _rbbm_debug_bus_read(device, block->block_id, i, &data[i]); + + return size; +} + +static struct adreno_debugbus_block debugbus_blocks[] = { + { RBBM_BLOCK_ID_CP, 0x52, }, + { RBBM_BLOCK_ID_RBBM, 0x40, }, + { RBBM_BLOCK_ID_VBIF, 0x40, }, + { RBBM_BLOCK_ID_HLSQ, 0x40, }, + { RBBM_BLOCK_ID_UCHE, 0x40, }, + { RBBM_BLOCK_ID_PC, 0x40, }, + { RBBM_BLOCK_ID_VFD, 0x40, }, + { RBBM_BLOCK_ID_VPC, 0x40, }, + { RBBM_BLOCK_ID_TSE, 0x40, }, + { RBBM_BLOCK_ID_RAS, 0x40, }, + { RBBM_BLOCK_ID_VSC, 0x40, }, + { RBBM_BLOCK_ID_SP_0, 0x40, }, + { RBBM_BLOCK_ID_SP_1, 0x40, }, + { RBBM_BLOCK_ID_SP_2, 0x40, }, + { RBBM_BLOCK_ID_SP_3, 0x40, }, + { RBBM_BLOCK_ID_TPL1_0, 0x40, }, + { RBBM_BLOCK_ID_TPL1_1, 0x40, }, + { RBBM_BLOCK_ID_TPL1_2, 0x40, }, + { RBBM_BLOCK_ID_TPL1_3, 0x40, }, + { RBBM_BLOCK_ID_RB_0, 0x40, }, + { RBBM_BLOCK_ID_RB_1, 0x40, }, + { RBBM_BLOCK_ID_RB_2, 0x40, }, + { RBBM_BLOCK_ID_RB_3, 0x40, }, + { RBBM_BLOCK_ID_MARB_0, 0x40, }, + { RBBM_BLOCK_ID_MARB_1, 0x40, }, + { RBBM_BLOCK_ID_MARB_2, 0x40, }, + { RBBM_BLOCK_ID_MARB_3, 0x40, }, +}; + +static void a3xx_snapshot_debugbus(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(debugbus_blocks); i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, snapshot, + a3xx_snapshot_debugbus_block, + (void *) &debugbus_blocks[i]); + } +} + +static void _snapshot_hlsq_regs(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + unsigned int next_pif = 0; + + /* + * Trying to read HLSQ registers when the HLSQ block is busy + * will cause the device to hang. The RBBM_DEBUG_BUS has information + * that will tell us if the HLSQ block is busy or not. Read values + * from the debug bus to ensure the HLSQ block is not busy (this + * is hardware dependent). If the HLSQ block is busy do not + * dump the registers, otherwise dump the HLSQ registers. + */ + + /* + * tpif status bits: RBBM_BLOCK_ID_HLSQ index 4 [4:0] + * spif status bits: RBBM_BLOCK_ID_HLSQ index 7 [5:0] + * + * if ((tpif == 0, 1, 28) && (spif == 0, 1, 10)) + * then dump HLSQ registers + */ + + /* check tpif */ + _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 4, &next_pif); + next_pif &= 0x1f; + if (next_pif != 0 && next_pif != 1 && next_pif != 28) + return; + + /* check spif */ + _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 7, &next_pif); + next_pif &= 0x3f; + if (next_pif != 0 && next_pif != 1 && next_pif != 10) + return; + + SNAPSHOT_REGISTERS(device, snapshot, a3xx_hlsq_registers); +} + +#define VPC_MEM_SIZE 512 + +static size_t a3xx_snapshot_vpc_memory(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + size_t size = 4 * VPC_MEM_SIZE; + int bank, addr, i = 0; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "VPC MEMORY"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_VPC_MEMORY; + header->size = size; + + for (bank = 0; bank < 4; bank++) { + for (addr = 0; addr < VPC_MEM_SIZE; addr++) { + unsigned int val = bank | (addr << 4); + + kgsl_regwrite(device, A3XX_VPC_VPC_DEBUG_RAM_SEL, val); + kgsl_regread(device, A3XX_VPC_VPC_DEBUG_RAM_READ, + &data[i++]); + } + } + + return DEBUG_SECTION_SZ(size); +} + +static size_t a3xx_snapshot_cp_pm4_ram(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4); + size_t size = fw->size - 1; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP PM4 RAM DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_PM4_RAM; + header->size = size; + + /* + * Read the firmware from the GPU rather than use our cache in order to + * try to catch mis-programming or corruption in the hardware. We do + * use the cached version of the size, however, instead of trying to + * maintain always changing hardcoded constants + */ + kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_ME_RAM_RADDR, + A3XX_CP_ME_RAM_DATA, data, size); + + return DEBUG_SECTION_SZ(size); +} + +static size_t a3xx_snapshot_cp_pfp_ram(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP); + int size = fw->size - 1; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP PFP RAM DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_PFP_RAM; + header->size = size; + + /* + * Read the firmware from the GPU rather than use our cache in order to + * try to catch mis-programming or corruption in the hardware. We do + * use the cached version of the size, however, instead of trying to + * maintain always changing hardcoded constants + */ + kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_PFP_UCODE_ADDR, + A3XX_CP_PFP_UCODE_DATA, data, size); + + return DEBUG_SECTION_SZ(size); +} + +static size_t a3xx_snapshot_cp_roq(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf; + u32 *data = (u32 *) (buf + sizeof(*header)); + + if (remain < DEBUG_SECTION_SZ(128)) { + SNAPSHOT_ERR_NOMEM(device, "CP ROQ DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_ROQ; + header->size = 128; + + kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_ROQ_ADDR, + A3XX_CP_ROQ_DATA, data, 128); + + return DEBUG_SECTION_SZ(128); +} + +static size_t a3xx_snapshot_cp_meq(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf; + u32 *data = (u32 *) (buf + sizeof(*header)); + + if (remain < DEBUG_SECTION_SZ(16)) { + SNAPSHOT_ERR_NOMEM(device, "CP MEQ DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_MEQ; + header->size = 16; + + kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_MEQ_ADDR, + A3XX_CP_MEQ_DATA, data, 16); + + return DEBUG_SECTION_SZ(16); +} + +/* + * a3xx_snapshot() - A3XX GPU snapshot function + * @adreno_dev: Device being snapshotted + * @snapshot: Snapshot meta data + * @remain: Amount of space left in snapshot memory + * + * This is where all of the A3XX specific bits and pieces are grabbed + * into the snapshot memory + */ +void a3xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg; + + /* Disable Clock gating temporarily for the debug bus to work */ + kgsl_regwrite(device, A3XX_RBBM_CLOCK_CTL, 0x0); + + /* Save some CP information that the generic snapshot uses */ + kgsl_regread(device, A3XX_CP_IB1_BASE, ®); + snapshot->ib1base = (u64) reg; + + kgsl_regread(device, A3XX_CP_IB2_BASE, ®); + snapshot->ib2base = (u64) reg; + + kgsl_regread(device, A3XX_CP_IB1_BUFSZ, &snapshot->ib1size); + kgsl_regread(device, A3XX_CP_IB2_BUFSZ, &snapshot->ib2size); + + SNAPSHOT_REGISTERS(device, snapshot, a3xx_registers); + + _snapshot_hlsq_regs(device, snapshot); + + kgsl_snapshot_indexed_registers(device, snapshot, + A3XX_CP_STATE_DEBUG_INDEX, A3XX_CP_STATE_DEBUG_DATA, 0, 0x14); + + /* CP_ME indexed registers */ + kgsl_snapshot_indexed_registers(device, snapshot, + A3XX_CP_ME_CNTL, A3XX_CP_ME_STATUS, 64, 44); + + /* VPC memory */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a3xx_snapshot_vpc_memory, NULL); + + /* CP MEQ */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, + a3xx_snapshot_cp_meq, NULL); + + /* Shader working/shadow memory */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a3xx_snapshot_shader_memory, NULL); + + + /* CP PFP and PM4 */ + + /* + * Reading the microcode while the CP is running will + * basically move the CP instruction pointer to + * whatever address we read. Big badaboom ensues. Stop the CP + * (if it isn't already stopped) to ensure that we are safe. + * We do this here and not earlier to avoid corrupting the RBBM + * status and CP registers - by the time we get here we don't + * care about the contents of the CP anymore. + */ + + kgsl_regread(device, A3XX_CP_ME_CNTL, ®); + reg |= (1 << 27) | (1 << 28); + kgsl_regwrite(device, A3XX_CP_ME_CNTL, reg); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a3xx_snapshot_cp_pfp_ram, NULL); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a3xx_snapshot_cp_pm4_ram, NULL); + + /* CP ROQ */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a3xx_snapshot_cp_roq, NULL); + + a3xx_snapshot_debugbus(device, snapshot); +} diff --git a/adreno_a5xx.c b/adreno_a5xx.c new file mode 100644 index 0000000000..0b48af11fb --- /dev/null +++ b/adreno_a5xx.c @@ -0,0 +1,2726 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2014-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_a5xx.h" +#include "adreno_a5xx_packets.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" +#include "kgsl_trace.h" + +static int critical_packet_constructed; +static unsigned int crit_pkts_dwords; + +static void a5xx_irq_storm_worker(struct work_struct *work); +static int _read_fw2_block_header(struct kgsl_device *device, + uint32_t *header, uint32_t remain, + uint32_t id, uint32_t major, uint32_t minor); +static void a5xx_gpmu_reset(struct work_struct *work); +static int a5xx_gpmu_init(struct adreno_device *adreno_dev); + +/** + * Number of times to check if the regulator enabled before + * giving up and returning failure. + */ +#define PWR_RETRY 100 + +/** + * Number of times to check if the GPMU firmware is initialized before + * giving up and returning failure. + */ +#define GPMU_FW_INIT_RETRY 5000 + +#define A530_QFPROM_RAW_PTE_ROW0_MSB 0x134 +#define A530_QFPROM_RAW_PTE_ROW2_MSB 0x144 + +#define A5XX_INT_MASK \ + ((1 << A5XX_INT_RBBM_AHB_ERROR) | \ + (1 << A5XX_INT_RBBM_TRANSFER_TIMEOUT) | \ + (1 << A5XX_INT_RBBM_ME_MS_TIMEOUT) | \ + (1 << A5XX_INT_RBBM_PFP_MS_TIMEOUT) | \ + (1 << A5XX_INT_RBBM_ETS_MS_TIMEOUT) | \ + (1 << A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW) | \ + (1 << A5XX_INT_RBBM_GPC_ERROR) | \ + (1 << A5XX_INT_CP_HW_ERROR) | \ + (1 << A5XX_INT_CP_CACHE_FLUSH_TS) | \ + (1 << A5XX_INT_RBBM_ATB_BUS_OVERFLOW) | \ + (1 << A5XX_INT_MISC_HANG_DETECT) | \ + (1 << A5XX_INT_UCHE_OOB_ACCESS) | \ + (1 << A5XX_INT_UCHE_TRAP_INTR) | \ + (1 << A5XX_INT_CP_SW) | \ + (1 << A5XX_INT_GPMU_FIRMWARE) | \ + (1 << A5XX_INT_GPMU_VOLTAGE_DROOP)) + +static int a5xx_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore) +{ + struct adreno_device *adreno_dev; + struct kgsl_device *device; + int ret; + + adreno_dev = (struct adreno_device *) + of_device_get_match_data(&pdev->dev); + + memset(adreno_dev, 0, sizeof(*adreno_dev)); + + adreno_dev->gpucore = gpucore; + adreno_dev->chipid = chipid; + + adreno_reg_offset_init(gpucore->gpudev->reg_offsets); + + adreno_dev->sptp_pc_enabled = + ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC); + + if (adreno_is_a540(adreno_dev)) + adreno_dev->throttling_enabled = true; + + adreno_dev->hwcg_enabled = true; + adreno_dev->lm_enabled = + ADRENO_FEATURE(adreno_dev, ADRENO_LM); + + /* Setup defaults that might get changed by the fuse bits */ + adreno_dev->lm_leakage = 0x4e001a; + + device = KGSL_DEVICE(adreno_dev); + + timer_setup(&device->idle_timer, kgsl_timer, 0); + + INIT_WORK(&device->idle_check_ws, kgsl_idle_check); + + ret = adreno_device_probe(pdev, adreno_dev); + if (ret) + return ret; + + return adreno_dispatcher_init(adreno_dev); +} + +static void _do_fixup(const struct adreno_critical_fixup *fixups, int count, + uint64_t *gpuaddrs, unsigned int *buffer) +{ + int i; + + for (i = 0; i < count; i++) { + buffer[fixups[i].lo_offset] = + lower_32_bits(gpuaddrs[fixups[i].buffer]) | + fixups[i].mem_offset; + + buffer[fixups[i].hi_offset] = + upper_32_bits(gpuaddrs[fixups[i].buffer]); + } +} + +static int a5xx_critical_packet_construct(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int *cmds; + uint64_t gpuaddrs[4]; + + adreno_dev->critpkts = kgsl_allocate_global(device, + PAGE_SIZE * 4, 0, 0, 0, "crit_pkts"); + if (IS_ERR(adreno_dev->critpkts)) + return PTR_ERR(adreno_dev->critpkts); + + adreno_dev->critpkts_secure = kgsl_allocate_global(device, + PAGE_SIZE, 0, KGSL_MEMFLAGS_SECURE, 0, "crit_pkts_secure"); + if (IS_ERR(adreno_dev->critpkts_secure)) + return PTR_ERR(adreno_dev->critpkts_secure); + + cmds = adreno_dev->critpkts->hostptr; + + gpuaddrs[0] = adreno_dev->critpkts_secure->gpuaddr; + gpuaddrs[1] = adreno_dev->critpkts->gpuaddr + PAGE_SIZE; + gpuaddrs[2] = adreno_dev->critpkts->gpuaddr + (PAGE_SIZE * 2); + gpuaddrs[3] = adreno_dev->critpkts->gpuaddr + (PAGE_SIZE * 3); + + crit_pkts_dwords = ARRAY_SIZE(_a5xx_critical_pkts); + + memcpy(cmds, _a5xx_critical_pkts, crit_pkts_dwords << 2); + + _do_fixup(critical_pkt_fixups, ARRAY_SIZE(critical_pkt_fixups), + gpuaddrs, cmds); + + cmds = adreno_dev->critpkts->hostptr + PAGE_SIZE; + memcpy(cmds, _a5xx_critical_pkts_mem01, + ARRAY_SIZE(_a5xx_critical_pkts_mem01) << 2); + + cmds = adreno_dev->critpkts->hostptr + (PAGE_SIZE * 2); + memcpy(cmds, _a5xx_critical_pkts_mem02, + ARRAY_SIZE(_a5xx_critical_pkts_mem02) << 2); + + cmds = adreno_dev->critpkts->hostptr + (PAGE_SIZE * 3); + memcpy(cmds, _a5xx_critical_pkts_mem03, + ARRAY_SIZE(_a5xx_critical_pkts_mem03) << 2); + + _do_fixup(critical_pkt_mem03_fixups, + ARRAY_SIZE(critical_pkt_mem03_fixups), gpuaddrs, cmds); + + critical_packet_constructed = 1; + + return 0; +} + +static int a5xx_microcode_read(struct adreno_device *adreno_dev); + +static int a5xx_init(struct adreno_device *adreno_dev) +{ + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + int ret; + + ret = a5xx_ringbuffer_init(adreno_dev); + if (ret) + return ret; + + ret = a5xx_microcode_read(adreno_dev); + if (ret) + return ret; + + if (a5xx_has_gpmu(adreno_dev)) + INIT_WORK(&adreno_dev->gpmu_work, a5xx_gpmu_reset); + + adreno_dev->highest_bank_bit = a5xx_core->highest_bank_bit; + + INIT_WORK(&adreno_dev->irq_storm_work, a5xx_irq_storm_worker); + + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CRITICAL_PACKETS)) + a5xx_critical_packet_construct(adreno_dev); + + adreno_create_profile_buffer(adreno_dev); + a5xx_crashdump_init(adreno_dev); + + return 0; +} + +static const struct { + u32 reg; + u32 base; + u32 count; +} a5xx_protected_blocks[] = { + /* RBBM */ + { A5XX_CP_PROTECT_REG_0, 0x004, 2 }, + { A5XX_CP_PROTECT_REG_0 + 1, 0x008, 3 }, + { A5XX_CP_PROTECT_REG_0 + 2, 0x010, 4 }, + { A5XX_CP_PROTECT_REG_0 + 3, 0x020, 5 }, + { A5XX_CP_PROTECT_REG_0 + 4, 0x040, 6 }, + { A5XX_CP_PROTECT_REG_0 + 5, 0x080, 6 }, + /* Content protection */ + { A5XX_CP_PROTECT_REG_0 + 6, A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 4 }, + { A5XX_CP_PROTECT_REG_0 + 7, A5XX_RBBM_SECVID_TRUST_CNTL, 1 }, + /* CP */ + { A5XX_CP_PROTECT_REG_0 + 8, 0x800, 6 }, + { A5XX_CP_PROTECT_REG_0 + 9, 0x840, 3 }, + { A5XX_CP_PROTECT_REG_0 + 10, 0x880, 5 }, + { A5XX_CP_PROTECT_REG_0 + 11, 0xaa0, 0 }, + /* RB */ + { A5XX_CP_PROTECT_REG_0 + 12, 0xcc0, 0 }, + { A5XX_CP_PROTECT_REG_0 + 13, 0xcf0, 1 }, + /* VPC */ + { A5XX_CP_PROTECT_REG_0 + 14, 0xe68, 3 }, + { A5XX_CP_PROTECT_REG_0 + 15, 0xe70, 4 }, + /* UCHE */ + { A5XX_CP_PROTECT_REG_0 + 16, 0xe80, 4 }, + /* A5XX_CP_PROTECT_REG_17 will be used for SMMU */ + /* A5XX_CP_PROTECT_REG_18 - A5XX_CP_PROTECT_REG_31 are available */ +}; + +static void _setprotectreg(struct kgsl_device *device, u32 offset, + u32 base, u32 count) +{ + kgsl_regwrite(device, offset, 0x60000000 | (count << 24) | (base << 2)); +} + +static void a5xx_protect_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 reg; + int i; + + /* enable access protection to privileged registers */ + kgsl_regwrite(device, A5XX_CP_PROTECT_CNTL, 0x00000007); + + for (i = 0; i < ARRAY_SIZE(a5xx_protected_blocks); i++) { + reg = a5xx_protected_blocks[i].reg; + + _setprotectreg(device, reg, a5xx_protected_blocks[i].base, + a5xx_protected_blocks[i].count); + } + + /* + * For a530 and a540 the SMMU region is 0x20000 bytes long and 0x10000 + * bytes on all other targets. The base offset for both is 0x40000. + * Write it to the next available slot + */ + if (adreno_is_a530(adreno_dev) || adreno_is_a540(adreno_dev)) + _setprotectreg(device, reg + 1, 0x40000, ilog2(0x20000)); + else + _setprotectreg(device, reg + 1, 0x40000, ilog2(0x10000)); +} + +/* + * _poll_gdsc_status() - Poll the GDSC status register + * @adreno_dev: The adreno device pointer + * @status_reg: Offset of the status register + * @status_value: The expected bit value + * + * Poll the status register till the power-on bit is equal to the + * expected value or the max retries are exceeded. + */ +static int _poll_gdsc_status(struct adreno_device *adreno_dev, + unsigned int status_reg, + unsigned int status_value) +{ + unsigned int reg, retry = PWR_RETRY; + + /* Bit 20 is the power on bit of SPTP and RAC GDSC status register */ + do { + udelay(1); + kgsl_regread(KGSL_DEVICE(adreno_dev), status_reg, ®); + } while (((reg & BIT(20)) != (status_value << 20)) && retry--); + if ((reg & BIT(20)) != (status_value << 20)) + return -ETIMEDOUT; + return 0; +} + +static void a5xx_restore_isense_regs(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg, i, ramp = GPMU_ISENSE_SAVE; + static unsigned int isense_regs[6] = {0xFFFF}, isense_reg_addr[] = { + A5XX_GPU_CS_DECIMAL_ALIGN, + A5XX_GPU_CS_SENSOR_PARAM_CORE_1, + A5XX_GPU_CS_SENSOR_PARAM_CORE_2, + A5XX_GPU_CS_SW_OV_FUSE_EN, + A5XX_GPU_CS_ENDPOINT_CALIBRATION_DONE, + A5XX_GPMU_TEMP_SENSOR_CONFIG}; + + if (!adreno_is_a540(adreno_dev)) + return; + + /* read signature */ + kgsl_regread(device, ramp++, ®); + + if (reg == 0xBABEFACE) { + /* store memory locations in buffer */ + for (i = 0; i < ARRAY_SIZE(isense_regs); i++) + kgsl_regread(device, ramp + i, isense_regs + i); + + /* clear signature */ + kgsl_regwrite(device, GPMU_ISENSE_SAVE, 0x0); + } + + /* if we never stored memory locations - do nothing */ + if (isense_regs[0] == 0xFFFF) + return; + + /* restore registers from memory */ + for (i = 0; i < ARRAY_SIZE(isense_reg_addr); i++) + kgsl_regwrite(device, isense_reg_addr[i], isense_regs[i]); + +} + +/* + * a5xx_regulator_enable() - Enable any necessary HW regulators + * @adreno_dev: The adreno device pointer + * + * Some HW blocks may need their regulators explicitly enabled + * on a restart. Clocks must be on during this call. + */ +static int a5xx_regulator_enable(struct adreno_device *adreno_dev) +{ + unsigned int ret; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (test_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, + &adreno_dev->priv)) + return 0; + + if (!(adreno_is_a530(adreno_dev) || adreno_is_a540(adreno_dev))) { + /* Halt the sp_input_clk at HM level */ + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, 0x00000055); + a5xx_hwcg_set(adreno_dev, true); + /* Turn on sp_input_clk at HM level */ + kgsl_regrmw(device, A5XX_RBBM_CLOCK_CNTL, 0xFF, 0); + + set_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, + &adreno_dev->priv); + return 0; + } + + /* + * Turn on smaller power domain first to reduce voltage droop. + * Set the default register values; set SW_COLLAPSE to 0. + */ + kgsl_regwrite(device, A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000); + /* Insert a delay between RAC and SPTP GDSC to reduce voltage droop */ + udelay(3); + ret = _poll_gdsc_status(adreno_dev, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 1); + if (ret) { + dev_err(device->dev, "RBCCU GDSC enable failed\n"); + return ret; + } + + kgsl_regwrite(device, A5XX_GPMU_SP_POWER_CNTL, 0x778000); + ret = _poll_gdsc_status(adreno_dev, A5XX_GPMU_SP_PWR_CLK_STATUS, 1); + if (ret) { + dev_err(device->dev, "SPTP GDSC enable failed\n"); + return ret; + } + + /* Disable SP clock */ + kgsl_regrmw(device, A5XX_GPMU_GPMU_SP_CLOCK_CONTROL, + CNTL_IP_CLK_ENABLE, 0); + /* Enable hardware clockgating */ + a5xx_hwcg_set(adreno_dev, true); + /* Enable SP clock */ + kgsl_regrmw(device, A5XX_GPMU_GPMU_SP_CLOCK_CONTROL, + CNTL_IP_CLK_ENABLE, 1); + + a5xx_restore_isense_regs(adreno_dev); + + set_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, &adreno_dev->priv); + return 0; +} + +/* + * a5xx_regulator_disable() - Disable any necessary HW regulators + * @adreno_dev: The adreno device pointer + * + * Some HW blocks may need their regulators explicitly disabled + * on a power down to prevent current spikes. Clocks must be on + * during this call. + */ +static void a5xx_regulator_disable(struct adreno_device *adreno_dev) +{ + unsigned int reg; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (adreno_is_a512(adreno_dev) || adreno_is_a508(adreno_dev)) + return; + + if (!test_and_clear_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, + &adreno_dev->priv)) + return; + + /* If feature is not supported or not enabled */ + if (!adreno_dev->sptp_pc_enabled) { + /* Set the default register values; set SW_COLLAPSE to 1 */ + kgsl_regwrite(device, A5XX_GPMU_SP_POWER_CNTL, 0x778001); + /* + * Insert a delay between SPTP and RAC GDSC to reduce voltage + * droop. + */ + udelay(3); + if (_poll_gdsc_status(adreno_dev, + A5XX_GPMU_SP_PWR_CLK_STATUS, 0)) + dev_warn(device->dev, "SPTP GDSC disable failed\n"); + + kgsl_regwrite(device, A5XX_GPMU_RBCCU_POWER_CNTL, 0x778001); + if (_poll_gdsc_status(adreno_dev, + A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 0)) + dev_warn(device->dev, "RBCCU GDSC disable failed\n"); + } else if (test_bit(ADRENO_DEVICE_GPMU_INITIALIZED, + &adreno_dev->priv)) { + /* GPMU firmware is supposed to turn off SPTP & RAC GDSCs. */ + kgsl_regread(device, A5XX_GPMU_SP_PWR_CLK_STATUS, ®); + if (reg & BIT(20)) + dev_warn(device->dev, "SPTP GDSC is not disabled\n"); + kgsl_regread(device, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, ®); + if (reg & BIT(20)) + dev_warn(device->dev, "RBCCU GDSC is not disabled\n"); + /* + * GPMU firmware is supposed to set GMEM to non-retention. + * Bit 14 is the memory core force on bit. + */ + kgsl_regread(device, A5XX_GPMU_RBCCU_CLOCK_CNTL, ®); + if (reg & BIT(14)) + dev_warn(device->dev, "GMEM is forced on\n"); + } + + if (adreno_is_a530(adreno_dev)) { + /* Reset VBIF before PC to avoid popping bogus FIFO entries */ + kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, + 0x003C0000); + kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, 0); + } +} + +/* + * a5xx_enable_pc() - Enable the GPMU based power collapse of the SPTP and RAC + * blocks + * @adreno_dev: The adreno device pointer + */ +static void a5xx_enable_pc(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!adreno_dev->sptp_pc_enabled) + return; + + kgsl_regwrite(device, A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL, 0x0000007F); + kgsl_regwrite(device, A5XX_GPMU_PWR_COL_BINNING_CTRL, 0); + kgsl_regwrite(device, A5XX_GPMU_PWR_COL_INTER_FRAME_HYST, 0x000A0080); + kgsl_regwrite(device, A5XX_GPMU_PWR_COL_STAGGER_DELAY, 0x00600040); + + trace_adreno_sp_tp((unsigned long) __builtin_return_address(0)); +}; + +/* + * The maximum payload of a type4 packet is the max size minus one for the + * opcode + */ +#define TYPE4_MAX_PAYLOAD (PM4_TYPE4_PKT_SIZE_MAX - 1) + +static int _gpmu_create_load_cmds(struct adreno_device *adreno_dev, + uint32_t *ucode, uint32_t size) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + uint32_t *start, *cmds; + uint32_t offset = 0; + uint32_t cmds_size = size; + + /* Add a dword for each PM4 packet */ + cmds_size += (size / TYPE4_MAX_PAYLOAD) + 1; + + /* Add 4 dwords for the protected mode */ + cmds_size += 4; + + if (adreno_dev->gpmu_cmds != NULL) + return 0; + + adreno_dev->gpmu_cmds = devm_kmalloc(&device->pdev->dev, + cmds_size << 2, GFP_KERNEL); + if (adreno_dev->gpmu_cmds == NULL) + return -ENOMEM; + + cmds = adreno_dev->gpmu_cmds; + start = cmds; + + /* Turn CP protection OFF */ + cmds += cp_protected_mode(adreno_dev, cmds, 0); + + /* + * Prebuild the cmd stream to send to the GPU to load + * the GPMU firmware + */ + while (size > 0) { + int tmp_size = size; + + if (size >= TYPE4_MAX_PAYLOAD) + tmp_size = TYPE4_MAX_PAYLOAD; + + *cmds++ = cp_type4_packet( + A5XX_GPMU_INST_RAM_BASE + offset, + tmp_size); + + memcpy(cmds, &ucode[offset], tmp_size << 2); + + cmds += tmp_size; + offset += tmp_size; + size -= tmp_size; + } + + /* Turn CP protection ON */ + cmds += cp_protected_mode(adreno_dev, cmds, 1); + + adreno_dev->gpmu_cmds_size = (size_t) (cmds - start); + + return 0; +} + + +/* + * _load_gpmu_firmware() - Load the ucode into the GPMU RAM + * @adreno_dev: Pointer to adreno device + */ +static int _load_gpmu_firmware(struct adreno_device *adreno_dev) +{ + uint32_t *data; + const struct firmware *fw = NULL; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + uint32_t *cmds, cmd_size; + int ret = -EINVAL; + u32 gmu_major = 1; + + if (!a5xx_has_gpmu(adreno_dev)) + return 0; + + /* a530 used GMU major 1 and A540 used GMU major 3 */ + if (adreno_is_a540(adreno_dev)) + gmu_major = 3; + + /* gpmu fw already saved and verified so do nothing new */ + if (adreno_dev->gpmu_cmds_size != 0) + return 0; + + if (a5xx_core->gpmufw_name == NULL) + return 0; + + ret = request_firmware(&fw, a5xx_core->gpmufw_name, &device->pdev->dev); + if (ret || fw == NULL) { + dev_err(&device->pdev->dev, + "request_firmware (%s) failed: %d\n", + a5xx_core->gpmufw_name, ret); + return ret; + } + + data = (uint32_t *)fw->data; + + if (data[0] >= (fw->size / sizeof(uint32_t)) || data[0] < 2) + goto err; + + if (data[1] != GPMU_FIRMWARE_ID) + goto err; + ret = _read_fw2_block_header(device, &data[2], + data[0] - 2, GPMU_FIRMWARE_ID, gmu_major, 0); + if (ret) + goto err; + + /* Integer overflow check for cmd_size */ + if (data[2] > (data[0] - 2)) + goto err; + + cmds = data + data[2] + 3; + cmd_size = data[0] - data[2] - 2; + + if (cmd_size > GPMU_INST_RAM_SIZE) { + dev_err(device->dev, + "GPMU firmware block size is larger than RAM size\n"); + goto err; + } + + /* Everything is cool, so create some commands */ + ret = _gpmu_create_load_cmds(adreno_dev, cmds, cmd_size); +err: + if (fw) + release_firmware(fw); + + return ret; +} + +static void a5xx_spin_idle_debug(struct adreno_device *adreno_dev, + const char *str) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int rptr, wptr; + unsigned int status, status3, intstatus; + unsigned int hwfault; + + dev_err(device->dev, str); + + kgsl_regread(device, A5XX_CP_RB_RPTR, &rptr); + kgsl_regread(device, A5XX_CP_RB_WPTR, &wptr); + + kgsl_regread(device, A5XX_RBBM_STATUS, &status); + kgsl_regread(device, A5XX_RBBM_STATUS3, &status3); + kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &intstatus); + kgsl_regread(device, A5XX_CP_HW_FAULT, &hwfault); + + + dev_err(device->dev, + "rb=%d pos=%X/%X rbbm_status=%8.8X/%8.8X int_0_status=%8.8X\n", + adreno_dev->cur_rb->id, rptr, wptr, status, status3, intstatus); + + dev_err(device->dev, " hwfault=%8.8X\n", hwfault); + + kgsl_device_snapshot(device, NULL, false); +} + +static int _gpmu_send_init_cmds(struct adreno_device *adreno_dev) +{ + struct adreno_ringbuffer *rb = adreno_dev->cur_rb; + uint32_t *cmds; + uint32_t size = adreno_dev->gpmu_cmds_size; + int ret; + + if (size == 0 || adreno_dev->gpmu_cmds == NULL) + return -EINVAL; + + cmds = adreno_ringbuffer_allocspace(rb, size); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + if (cmds == NULL) + return -ENOSPC; + + /* Copy to the RB the predefined fw sequence cmds */ + memcpy(cmds, adreno_dev->gpmu_cmds, size << 2); + + ret = a5xx_ringbuffer_submit(rb, NULL, true); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) + a5xx_spin_idle_debug(adreno_dev, + "gpmu initialization failed to idle\n"); + } + return ret; +} + +/* + * a5xx_gpmu_start() - Initialize and start the GPMU + * @adreno_dev: Pointer to adreno device + * + * Load the GPMU microcode, set up any features such as hardware clock gating + * or IFPC, and take the GPMU out of reset. + */ +static int a5xx_gpmu_start(struct adreno_device *adreno_dev) +{ + int ret; + unsigned int reg, retry = GPMU_FW_INIT_RETRY; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!a5xx_has_gpmu(adreno_dev)) + return 0; + + ret = _gpmu_send_init_cmds(adreno_dev); + if (ret) + return ret; + + if (adreno_is_a530(adreno_dev)) { + /* GPMU clock gating setup */ + kgsl_regwrite(device, A5XX_GPMU_WFI_CONFIG, 0x00004014); + } + /* Kick off GPMU firmware */ + kgsl_regwrite(device, A5XX_GPMU_CM3_SYSRESET, 0); + /* + * The hardware team's estimation of GPMU firmware initialization + * latency is about 3000 cycles, that's about 5 to 24 usec. + */ + do { + udelay(1); + kgsl_regread(device, A5XX_GPMU_GENERAL_0, ®); + } while ((reg != 0xBABEFACE) && retry--); + + if (reg != 0xBABEFACE) { + dev_err(device->dev, + "GPMU firmware initialization timed out\n"); + return -ETIMEDOUT; + } + + if (!adreno_is_a530(adreno_dev)) { + kgsl_regread(device, A5XX_GPMU_GENERAL_1, ®); + + if (reg) { + dev_err(device->dev, + "GPMU firmware initialization failed: %d\n", + reg); + return -EIO; + } + } + set_bit(ADRENO_DEVICE_GPMU_INITIALIZED, &adreno_dev->priv); + /* + * We are in AWARE state and IRQ line from GPU to host is + * disabled. + * Read pending GPMU interrupts and clear GPMU_RBBM_INTR_INFO. + */ + kgsl_regread(device, A5XX_GPMU_RBBM_INTR_INFO, ®); + /* + * Clear RBBM interrupt mask if any of GPMU interrupts + * are pending. + */ + if (reg) + kgsl_regwrite(device, + A5XX_RBBM_INT_CLEAR_CMD, + 1 << A5XX_INT_GPMU_FIRMWARE); + return ret; +} + +void a5xx_hwcg_set(struct adreno_device *adreno_dev, bool on) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + int i; + + if (!adreno_dev->hwcg_enabled) + return; + + for (i = 0; i < a5xx_core->hwcg_count; i++) + kgsl_regwrite(device, a5xx_core->hwcg[i].offset, + on ? a5xx_core->hwcg[i].val : 0); + + /* enable top level HWCG */ + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, on ? 0xAAA8AA00 : 0); + kgsl_regwrite(device, A5XX_RBBM_ISDB_CNT, on ? 0x00000182 : 0x00000180); +} + +static int _read_fw2_block_header(struct kgsl_device *device, + uint32_t *header, uint32_t remain, + uint32_t id, uint32_t major, uint32_t minor) +{ + uint32_t header_size; + int i = 1; + + if (header == NULL) + return -ENOMEM; + + header_size = header[0]; + /* Headers have limited size and always occur as pairs of words */ + if (header_size > MAX_HEADER_SIZE || header_size >= remain || + header_size % 2 || header_size == 0) + return -EINVAL; + /* Sequences must have an identifying id first thing in their header */ + if (id == GPMU_SEQUENCE_ID) { + if (header[i] != HEADER_SEQUENCE || + (header[i + 1] >= MAX_SEQUENCE_ID)) + return -EINVAL; + i += 2; + } + for (; i < header_size; i += 2) { + switch (header[i]) { + /* Major Version */ + case HEADER_MAJOR: + if ((major > header[i + 1]) && + header[i + 1]) { + dev_err(device->dev, + "GPMU major version mis-match %d, %d\n", + major, header[i + 1]); + return -EINVAL; + } + break; + case HEADER_MINOR: + if (minor > header[i + 1]) + dev_err(device->dev, + "GPMU minor version mis-match %d %d\n", + minor, header[i + 1]); + break; + case HEADER_DATE: + case HEADER_TIME: + break; + default: + dev_err(device->dev, "GPMU unknown header ID %d\n", + header[i]); + } + } + return 0; +} + +/* + * Read in the register sequence file and save pointers to the + * necessary sequences. + * + * GPU sequence file format (one dword per field unless noted): + * Block 1 length (length dword field not inclusive) + * Block 1 type = Sequence = 3 + * Block Header length (length dword field not inclusive) + * BH field ID = Sequence field ID + * BH field data = Sequence ID + * BH field ID + * BH field data + * ... + * Opcode 0 ID + * Opcode 0 data M words + * Opcode 1 ID + * Opcode 1 data N words + * ... + * Opcode X ID + * Opcode X data O words + * Block 2 length... + */ +static void _load_regfile(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + const struct firmware *fw; + uint64_t block_size = 0, block_total = 0; + uint32_t fw_size, *block; + int ret = -EINVAL; + u32 lm_major = 1; + + if (!a5xx_core->regfw_name) + return; + + ret = request_firmware(&fw, a5xx_core->regfw_name, &device->pdev->dev); + if (ret) { + dev_err(&device->pdev->dev, "request firmware failed %d, %s\n", + ret, a5xx_core->regfw_name); + return; + } + + /* a530v2 lm_major was 3. a530v3 lm_major was 1 */ + if (adreno_is_a530v2(adreno_dev)) + lm_major = 3; + + fw_size = fw->size / sizeof(uint32_t); + /* Min valid file of size 6, see file description */ + if (fw_size < 6) + goto err; + block = (uint32_t *)fw->data; + /* All offset numbers calculated from file description */ + while (block_total < fw_size) { + block_size = block[0]; + if (((block_total + block_size) >= fw_size) + || block_size < 5) + goto err; + if (block[1] != GPMU_SEQUENCE_ID) + goto err; + + /* For now ignore blocks other than the LM sequence */ + if (block[4] == LM_SEQUENCE_ID) { + ret = _read_fw2_block_header(device, &block[2], + block_size - 2, GPMU_SEQUENCE_ID, + lm_major, 0); + if (ret) + goto err; + + if (block[2] > (block_size - 2)) + goto err; + adreno_dev->lm_sequence = block + block[2] + 3; + adreno_dev->lm_size = block_size - block[2] - 2; + } + block_total += (block_size + 1); + block += (block_size + 1); + } + if (adreno_dev->lm_sequence) + return; + +err: + release_firmware(fw); + dev_err(device->dev, + "Register file failed to load sz=%d bsz=%llu header=%d\n", + fw_size, block_size, ret); +} + +static int _execute_reg_sequence(struct adreno_device *adreno_dev, + uint32_t *opcode, uint32_t length) +{ + uint32_t *cur = opcode; + uint64_t reg, val; + + /* todo double check the reg writes */ + while ((cur - opcode) < length) { + if (cur[0] == 1 && (length - (cur - opcode) >= 4)) { + /* Write a 32 bit value to a 64 bit reg */ + reg = cur[2]; + reg = (reg << 32) | cur[1]; + kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg, cur[3]); + cur += 4; + } else if (cur[0] == 2 && (length - (cur - opcode) >= 5)) { + /* Write a 64 bit value to a 64 bit reg */ + reg = cur[2]; + reg = (reg << 32) | cur[1]; + val = cur[4]; + val = (val << 32) | cur[3]; + kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg, val); + cur += 5; + } else if (cur[0] == 3 && (length - (cur - opcode) >= 2)) { + /* Delay for X usec */ + udelay(cur[1]); + cur += 2; + } else + return -EINVAL; + } + return 0; +} + +static uint32_t _write_voltage_table(struct adreno_device *adreno_dev, + unsigned int addr) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + int i; + struct dev_pm_opp *opp; + unsigned int mvolt = 0; + + kgsl_regwrite(device, addr++, a5xx_core->max_power); + kgsl_regwrite(device, addr++, pwr->num_pwrlevels); + + /* Write voltage in mV and frequency in MHz */ + for (i = 0; i < pwr->num_pwrlevels; i++) { + opp = dev_pm_opp_find_freq_exact(&device->pdev->dev, + pwr->pwrlevels[i].gpu_freq, true); + /* _opp_get returns uV, convert to mV */ + if (!IS_ERR(opp)) { + mvolt = dev_pm_opp_get_voltage(opp) / 1000; + dev_pm_opp_put(opp); + } + kgsl_regwrite(device, addr++, mvolt); + kgsl_regwrite(device, addr++, + pwr->pwrlevels[i].gpu_freq / 1000000); + } + return (pwr->num_pwrlevels * 2 + 2); +} + +static uint32_t lm_limit(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (adreno_dev->lm_limit) + return adreno_dev->lm_limit; + + if (of_property_read_u32(device->pdev->dev.of_node, "qcom,lm-limit", + &adreno_dev->lm_limit)) + adreno_dev->lm_limit = LM_DEFAULT_LIMIT; + + return adreno_dev->lm_limit; +} +/* + * a5xx_lm_init() - Initialize LM/DPM on the GPMU + * @adreno_dev: The adreno device pointer + */ +static void a530_lm_init(struct adreno_device *adreno_dev) +{ + uint32_t length; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + + if (!adreno_dev->lm_enabled) + return; + + /* If something was wrong with the sequence file, return */ + if (adreno_dev->lm_sequence == NULL) + return; + + /* Write LM registers including DPM ucode, coefficients, and config */ + if (_execute_reg_sequence(adreno_dev, adreno_dev->lm_sequence, + adreno_dev->lm_size)) { + /* If the sequence is invalid, it's not getting better */ + adreno_dev->lm_sequence = NULL; + dev_warn(device->dev, + "Invalid LM sequence\n"); + return; + } + + kgsl_regwrite(device, A5XX_GPMU_TEMP_SENSOR_ID, a5xx_core->gpmu_tsens); + kgsl_regwrite(device, A5XX_GPMU_DELTA_TEMP_THRESHOLD, 0x1); + kgsl_regwrite(device, A5XX_GPMU_TEMP_SENSOR_CONFIG, 0x1); + + kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE, + (0x80000000 | device->pwrctrl.active_pwrlevel)); + /* use the leakage to set this value at runtime */ + kgsl_regwrite(device, A5XX_GPMU_BASE_LEAKAGE, + adreno_dev->lm_leakage); + + /* Enable the power threshold and set it to 6000m */ + kgsl_regwrite(device, A5XX_GPMU_GPMU_PWR_THRESHOLD, + 0x80000000 | lm_limit(adreno_dev)); + + kgsl_regwrite(device, A5XX_GPMU_BEC_ENABLE, 0x10001FFF); + kgsl_regwrite(device, A5XX_GDPM_CONFIG1, 0x00201FF1); + + /* Send an initial message to the GPMU with the LM voltage table */ + kgsl_regwrite(device, AGC_MSG_STATE, 1); + kgsl_regwrite(device, AGC_MSG_COMMAND, AGC_POWER_CONFIG_PRODUCTION_ID); + length = _write_voltage_table(adreno_dev, AGC_MSG_PAYLOAD); + kgsl_regwrite(device, AGC_MSG_PAYLOAD_SIZE, length * sizeof(uint32_t)); + kgsl_regwrite(device, AGC_INIT_MSG_MAGIC, AGC_INIT_MSG_VALUE); +} + +/* + * a5xx_lm_enable() - Enable the LM/DPM feature on the GPMU + * @adreno_dev: The adreno device pointer + */ +static void a530_lm_enable(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!adreno_dev->lm_enabled) + return; + + /* If no sequence properly initialized, return */ + if (adreno_dev->lm_sequence == NULL) + return; + + kgsl_regwrite(device, A5XX_GDPM_INT_MASK, 0x00000000); + kgsl_regwrite(device, A5XX_GDPM_INT_EN, 0x0000000A); + kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK, 0x00000001); + kgsl_regwrite(device, A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK, + 0x00050000); + kgsl_regwrite(device, A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL, + 0x00030000); + + if (adreno_is_a530(adreno_dev)) + /* Program throttle control, do not enable idle DCS on v3+ */ + kgsl_regwrite(device, A5XX_GPMU_CLOCK_THROTTLE_CTRL, + adreno_is_a530v2(adreno_dev) ? 0x00060011 : 0x00000011); +} + +static void a540_lm_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + uint32_t agc_lm_config = AGC_BCL_DISABLED | + ((ADRENO_CHIPID_PATCH(adreno_dev->chipid) & 0x3) + << AGC_GPU_VERSION_SHIFT); + unsigned int r; + + if (!adreno_dev->throttling_enabled) + agc_lm_config |= AGC_THROTTLE_DISABLE; + + if (adreno_dev->lm_enabled) { + agc_lm_config |= + AGC_LM_CONFIG_ENABLE_GPMU_ADAPTIVE | + AGC_LM_CONFIG_ISENSE_ENABLE; + + kgsl_regread(device, A5XX_GPMU_TEMP_SENSOR_CONFIG, &r); + + if ((r & GPMU_ISENSE_STATUS) == GPMU_ISENSE_END_POINT_CAL_ERR) { + dev_err(device->dev, + "GPMU: ISENSE end point calibration failure\n"); + agc_lm_config |= AGC_LM_CONFIG_ENABLE_ERROR; + } + } + + kgsl_regwrite(device, AGC_MSG_STATE, 0x80000001); + kgsl_regwrite(device, AGC_MSG_COMMAND, AGC_POWER_CONFIG_PRODUCTION_ID); + (void) _write_voltage_table(adreno_dev, AGC_MSG_PAYLOAD); + kgsl_regwrite(device, AGC_MSG_PAYLOAD + AGC_LM_CONFIG, agc_lm_config); + kgsl_regwrite(device, AGC_MSG_PAYLOAD + AGC_LEVEL_CONFIG, + (unsigned int) ~(GENMASK(LM_DCVS_LIMIT, 0) | + GENMASK(16+LM_DCVS_LIMIT, 16))); + + kgsl_regwrite(device, AGC_MSG_PAYLOAD_SIZE, + (AGC_LEVEL_CONFIG + 1) * sizeof(uint32_t)); + kgsl_regwrite(device, AGC_INIT_MSG_MAGIC, AGC_INIT_MSG_VALUE); + + kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE, + (0x80000000 | device->pwrctrl.active_pwrlevel)); + + kgsl_regwrite(device, A5XX_GPMU_GPMU_PWR_THRESHOLD, + PWR_THRESHOLD_VALID | lm_limit(adreno_dev)); + + kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK, + VOLTAGE_INTR_EN); +} + + +static void a5xx_lm_enable(struct adreno_device *adreno_dev) +{ + if (adreno_is_a530(adreno_dev)) + a530_lm_enable(adreno_dev); +} + +static void a5xx_lm_init(struct adreno_device *adreno_dev) +{ + if (adreno_is_a530(adreno_dev)) + a530_lm_init(adreno_dev); + else if (adreno_is_a540(adreno_dev)) + a540_lm_init(adreno_dev); +} + +static int gpmu_set_level(struct adreno_device *adreno_dev, unsigned int val) +{ + unsigned int reg; + int retry = 100; + + kgsl_regwrite(KGSL_DEVICE(adreno_dev), A5XX_GPMU_GPMU_VOLTAGE, val); + + do { + kgsl_regread(KGSL_DEVICE(adreno_dev), A5XX_GPMU_GPMU_VOLTAGE, + ®); + } while ((reg & 0x80000000) && retry--); + + return (reg & 0x80000000) ? -ETIMEDOUT : 0; +} + +/* + * a5xx_pwrlevel_change_settings() - Program the hardware during power level + * transitions + * @adreno_dev: The adreno device pointer + * @prelevel: The previous power level + * @postlevel: The new power level + * @post: True if called after the clock change has taken effect + */ +static void a5xx_pwrlevel_change_settings(struct adreno_device *adreno_dev, + unsigned int prelevel, unsigned int postlevel, + bool post) +{ + /* + * On pre A540 HW only call through if LMx is supported and enabled, and + * always call through for a540 + */ + if (!adreno_is_a540(adreno_dev) && !adreno_dev->lm_enabled) + return; + + if (!post) { + if (gpmu_set_level(adreno_dev, (0x80000010 | postlevel))) + dev_err(KGSL_DEVICE(adreno_dev)->dev, + "GPMU pre powerlevel did not stabilize\n"); + } else { + if (gpmu_set_level(adreno_dev, (0x80000000 | postlevel))) + dev_err(KGSL_DEVICE(adreno_dev)->dev, + "GPMU post powerlevel did not stabilize\n"); + } +} + +#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) +static void a5xx_clk_set_options(struct adreno_device *adreno_dev, + const char *name, struct clk *clk, bool on) +{ + if (!clk) + return; + + if (!adreno_is_a540(adreno_dev) && !adreno_is_a512(adreno_dev) && + !adreno_is_a508(adreno_dev)) + return; + + /* Handle clock settings for GFX PSCBCs */ + if (on) { + if (!strcmp(name, "mem_iface_clk")) { + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH); + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_MEM); + } else if (!strcmp(name, "core_clk")) { + qcom_clk_set_flags(clk, CLKFLAG_RETAIN_PERIPH); + qcom_clk_set_flags(clk, CLKFLAG_RETAIN_MEM); + } + } else { + if (!strcmp(name, "core_clk")) { + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH); + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_MEM); + } + } +} +#endif + +/* FW driven idle 10% throttle */ +#define IDLE_10PCT 0 +/* number of cycles when clock is throttled by 50% (CRC) */ +#define CRC_50PCT 1 +/* number of cycles when clock is throttled by more than 50% (CRC) */ +#define CRC_MORE50PCT 2 +/* number of cycles when clock is throttle by less than 50% (CRC) */ +#define CRC_LESS50PCT 3 + +static int64_t a5xx_read_throttling_counters(struct adreno_device *adreno_dev) +{ + int i; + int64_t adj; + uint32_t th[ADRENO_GPMU_THROTTLE_COUNTERS]; + struct adreno_busy_data *busy = &adreno_dev->busy_data; + + if (!adreno_dev->throttling_enabled) + return 0; + + for (i = 0; i < ADRENO_GPMU_THROTTLE_COUNTERS; i++) { + if (!adreno_dev->gpmu_throttle_counters[i]) + return 0; + + th[i] = counter_delta(KGSL_DEVICE(adreno_dev), + adreno_dev->gpmu_throttle_counters[i], + &busy->throttle_cycles[i]); + } + adj = th[CRC_MORE50PCT] - th[IDLE_10PCT]; + adj = th[CRC_50PCT] + th[CRC_LESS50PCT] / 3 + (adj < 0 ? 0 : adj) * 3; + + trace_kgsl_clock_throttling( + th[IDLE_10PCT], th[CRC_50PCT], + th[CRC_MORE50PCT], th[CRC_LESS50PCT], + adj); + return adj; +} + +/* + * a5xx_gpmu_reset() - Re-enable GPMU based power features and restart GPMU + * @work: Pointer to the work struct for gpmu reset + * + * Load the GPMU microcode, set up any features such as hardware clock gating + * or IFPC, and take the GPMU out of reset. + */ +static void a5xx_gpmu_reset(struct work_struct *work) +{ + struct adreno_device *adreno_dev = container_of(work, + struct adreno_device, gpmu_work); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (test_bit(ADRENO_DEVICE_GPMU_INITIALIZED, &adreno_dev->priv)) + return; + + /* + * If GPMU has already experienced a restart or is in the process of it + * after the watchdog timeout, then there is no need to reset GPMU + * again. + */ + if (device->state != KGSL_STATE_NAP && + device->state != KGSL_STATE_AWARE && + device->state != KGSL_STATE_ACTIVE) + return; + + mutex_lock(&device->mutex); + + if (device->state == KGSL_STATE_NAP) + kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + + if (a5xx_regulator_enable(adreno_dev)) + goto out; + + /* Soft reset of the GPMU block */ + kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, BIT(16)); + + /* GPU comes up in secured mode, make it unsecured by default */ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION)) + kgsl_regwrite(device, A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); + + + a5xx_gpmu_init(adreno_dev); + +out: + mutex_unlock(&device->mutex); +} + +static void _setup_throttling_counters(struct adreno_device *adreno_dev) +{ + int i, ret = 0; + + if (!adreno_is_a540(adreno_dev)) + return; + + for (i = 0; i < ADRENO_GPMU_THROTTLE_COUNTERS; i++) { + /* reset throttled cycles ivalue */ + adreno_dev->busy_data.throttle_cycles[i] = 0; + + /* Throttle countables start at off set 43 */ + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_GPMU_PWR, 43 + i, + &adreno_dev->gpmu_throttle_counters[i], NULL); + } + + WARN_ONCE(ret, "Unable to get one or more clock throttling registers\n"); +} + +/* + * a5xx_start() - Device start + * @adreno_dev: Pointer to adreno device + * + * a5xx device start + */ +static int a5xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + unsigned int bit; + int ret; + + ret = kgsl_mmu_start(device); + if (ret) + return ret; + + adreno_get_bus_counters(adreno_dev); + adreno_perfcounter_restore(adreno_dev); + + adreno_dev->irq_mask = A5XX_INT_MASK; + + if (adreno_is_a530(adreno_dev) && + ADRENO_FEATURE(adreno_dev, ADRENO_LM)) + adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_GPMU_PWR, 27, + &adreno_dev->lm_threshold_count, NULL); + + /* Enable 64 bit addressing */ + kgsl_regwrite(device, A5XX_CP_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_VSC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_GRAS_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_RB_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_PC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_HLSQ_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_VFD_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_VPC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_UCHE_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_SP_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_TPL1_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); + + _setup_throttling_counters(adreno_dev); + + /* Set up VBIF registers from the GPU core definition */ + kgsl_regmap_multi_write(&device->regmap, a5xx_core->vbif, + a5xx_core->vbif_count); + + /* Make all blocks contribute to the GPU BUSY perf counter */ + kgsl_regwrite(device, A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF); + + /* Program RBBM counter 0 to report GPU busy for frequency scaling */ + kgsl_regwrite(device, A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6); + + /* + * Enable the RBBM error reporting bits. This lets us get + * useful information on failure + */ + kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL0, 0x00000001); + + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_FAULT_DETECT_MASK)) { + /* + * We have 4 RB units, and only RB0 activity signals are + * working correctly. Mask out RB1-3 activity signals + * from the HW hang detection logic as per + * recommendation of hardware team. + */ + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11, + 0xF0000000); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18, + 0xFFFFFFFF); + } + + /* + * Set hang detection threshold to 4 million cycles + * (0x3FFFF*16) + */ + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_INT_CNTL, + (1 << 30) | 0x3FFFF); + + /* Turn on performance counters */ + kgsl_regwrite(device, A5XX_RBBM_PERFCTR_CNTL, 0x01); + + /* + * This is to increase performance by restricting VFD's cache access, + * so that LRZ and other data get evicted less. + */ + kgsl_regwrite(device, A5XX_UCHE_CACHE_WAYS, 0x02); + + /* + * Set UCHE_WRITE_THRU_BASE to the UCHE_TRAP_BASE effectively + * disabling L2 bypass + */ + kgsl_regwrite(device, A5XX_UCHE_TRAP_BASE_LO, 0xffff0000); + kgsl_regwrite(device, A5XX_UCHE_TRAP_BASE_HI, 0x0001ffff); + kgsl_regwrite(device, A5XX_UCHE_WRITE_THRU_BASE_LO, 0xffff0000); + kgsl_regwrite(device, A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff); + + /* Program the GMEM VA range for the UCHE path */ + kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MIN_LO, + adreno_dev->gpucore->gmem_base); + kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x0); + kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MAX_LO, + adreno_dev->gpucore->gmem_base + + adreno_dev->gpucore->gmem_size - 1); + kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x0); + + /* + * Below CP registers are 0x0 by default, program init + * values based on a5xx flavor. + */ + if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev)) { + kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x20); + kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x400); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A); + } else if (adreno_is_a510(adreno_dev)) { + kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x20); + kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x20); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A); + } else if (adreno_is_a540(adreno_dev) || adreno_is_a512(adreno_dev)) { + kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x40); + kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x400); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); + } else { + kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x40); + kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x40); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); + } + + /* + * vtxFifo and primFifo thresholds default values + * are different. + */ + if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev)) + kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL, + (0x100 << 11 | 0x100 << 22)); + else if (adreno_is_a510(adreno_dev) || adreno_is_a512(adreno_dev)) + kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL, + (0x200 << 11 | 0x200 << 22)); + else + kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL, + (0x400 << 11 | 0x300 << 22)); + + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_TWO_PASS_USE_WFI)) { + /* + * Set TWOPASSUSEWFI in A5XX_PC_DBG_ECO_CNTL for + * microcodes after v77 + */ + if ((adreno_compare_pfp_version(adreno_dev, 0x5FF077) >= 0)) + kgsl_regrmw(device, A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); + } + + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_DISABLE_RB_DP2CLOCKGATING)) { + /* + * Disable RB sampler datapath DP2 clock gating + * optimization for 1-SP GPU's, by default it is enabled. + */ + kgsl_regrmw(device, A5XX_RB_DBG_ECO_CNT, 0, (1 << 9)); + } + /* + * Disable UCHE global filter as SP can invalidate/flush + * independently + */ + kgsl_regwrite(device, A5XX_UCHE_MODE_CNTL, BIT(29)); + /* Set the USE_RETENTION_FLOPS chicken bit */ + kgsl_regwrite(device, A5XX_CP_CHICKEN_DBG, 0x02000000); + + /* Enable ISDB mode if requested */ + if (test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv)) { + if (!adreno_active_count_get(adreno_dev)) { + /* + * Disable ME/PFP split timeouts when the debugger is + * enabled because the CP doesn't know when a shader is + * in active debug + */ + kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL1, 0x06FFFFFF); + + /* Force the SP0/SP1 clocks on to enable ISDB */ + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP0, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP1, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP2, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP3, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP0, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP1, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP2, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP3, 0x0); + + /* disable HWCG */ + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, 0x0); + kgsl_regwrite(device, A5XX_RBBM_ISDB_CNT, 0x0); + } else + dev_err(device->dev, + "Active count failed while turning on ISDB\n"); + } else { + /* if not in ISDB mode enable ME/PFP split notification */ + kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF); + } + + kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL2, 0x0000003F); + bit = adreno_dev->highest_bank_bit ? + (adreno_dev->highest_bank_bit - 13) & 0x03 : 0; + /* + * Program the highest DDR bank bit that was passed in + * from the DT in a handful of registers. Some of these + * registers will also be written by the UMD, but we + * want to program them in case we happen to use the + * UCHE before the UMD does + */ + + kgsl_regwrite(device, A5XX_TPL1_MODE_CNTL, bit << 7); + kgsl_regwrite(device, A5XX_RB_MODE_CNTL, bit << 1); + if (adreno_is_a540(adreno_dev) || adreno_is_a512(adreno_dev)) + kgsl_regwrite(device, A5XX_UCHE_DBG_ECO_CNTL_2, bit); + + /* Disable All flat shading optimization */ + kgsl_regrmw(device, A5XX_VPC_DBG_ECO_CNTL, 0, 0x1 << 10); + + /* + * VPC corner case with local memory load kill leads to corrupt + * internal state. Normal Disable does not work for all a5x chips. + * So do the following setting to disable it. + */ + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_DISABLE_LMLOADKILL)) { + kgsl_regrmw(device, A5XX_VPC_DBG_ECO_CNTL, 0, 0x1 << 23); + kgsl_regrmw(device, A5XX_HLSQ_DBG_ECO_CNTL, 0x1 << 18, 0); + } + + if (device->mmu.secured) { + kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_CNTL, 0x0); + kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, + lower_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, + upper_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, + KGSL_IOMMU_SECURE_SIZE(&device->mmu)); + } + + a5xx_preemption_start(adreno_dev); + a5xx_protect_init(adreno_dev); + + return 0; +} + +/* + * Follow the ME_INIT sequence with a preemption yield to allow the GPU to move + * to a different ringbuffer, if desired + */ +static int _preemption_init( + struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, unsigned int *cmds, + struct kgsl_context *context) +{ + unsigned int *cmds_orig = cmds; + uint64_t gpuaddr = rb->preemption_desc->gpuaddr; + + /* Turn CP protection OFF */ + cmds += cp_protected_mode(adreno_dev, cmds, 0); + /* + * CP during context switch will save context switch info to + * a5xx_cp_preemption_record pointed by CONTEXT_SWITCH_SAVE_ADDR + */ + *cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 1); + *cmds++ = lower_32_bits(gpuaddr); + *cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI, 1); + *cmds++ = upper_32_bits(gpuaddr); + + /* Turn CP protection ON */ + cmds += cp_protected_mode(adreno_dev, cmds, 1); + + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_GLOBAL, 1); + *cmds++ = 0; + + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1); + *cmds++ = 1; + + /* Enable yield in RB only */ + *cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1); + *cmds++ = 1; + + *cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4); + cmds += cp_gpuaddr(adreno_dev, cmds, 0x0); + *cmds++ = 0; + /* generate interrupt on preemption completion */ + *cmds++ = 1; + + return cmds - cmds_orig; +} + +static int a5xx_post_start(struct adreno_device *adreno_dev) +{ + int ret; + unsigned int *cmds, *start; + struct adreno_ringbuffer *rb = adreno_dev->cur_rb; + + if (!adreno_is_a530(adreno_dev) && + !adreno_is_preemption_enabled(adreno_dev)) + return 0; + + cmds = adreno_ringbuffer_allocspace(rb, 42); + if (IS_ERR(cmds)) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + dev_err(device->dev, + "error allocating preemtion init cmds\n"); + return PTR_ERR(cmds); + } + start = cmds; + + /* + * Send a pipeline stat event whenever the GPU gets powered up + * to cause misbehaving perf counters to start ticking + */ + if (adreno_is_a530(adreno_dev)) { + *cmds++ = cp_packet(adreno_dev, CP_EVENT_WRITE, 1); + *cmds++ = 0xF; + } + + if (adreno_is_preemption_enabled(adreno_dev)) { + cmds += _preemption_init(adreno_dev, rb, cmds, NULL); + rb->_wptr = rb->_wptr - (42 - (cmds - start)); + ret = a5xx_ringbuffer_submit(rb, NULL, false); + } else { + rb->_wptr = rb->_wptr - (42 - (cmds - start)); + ret = a5xx_ringbuffer_submit(rb, NULL, true); + } + + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) + a5xx_spin_idle_debug(adreno_dev, + "hw initialization failed to idle\n"); + } + + return ret; +} + +static int a5xx_gpmu_init(struct adreno_device *adreno_dev) +{ + int ret; + + /* Set up LM before initializing the GPMU */ + a5xx_lm_init(adreno_dev); + + /* Enable SPTP based power collapse before enabling GPMU */ + a5xx_enable_pc(adreno_dev); + + ret = a5xx_gpmu_start(adreno_dev); + if (ret) + return ret; + + /* Enable limits management */ + a5xx_lm_enable(adreno_dev); + return 0; +} + +static int a5xx_zap_shader_resume(struct kgsl_device *device) +{ + int ret = qcom_scm_set_remote_state(0, 13); + + if (ret) + dev_err(device->dev, + "SCM zap resume call failed: %d\n", ret); + + return ret; +} + +/* + * a5xx_microcode_load() - Load microcode + * @adreno_dev: Pointer to adreno device + */ +static int a5xx_microcode_load(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_firmware *pm4_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4); + struct adreno_firmware *pfp_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP); + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + uint64_t gpuaddr; + + gpuaddr = pm4_fw->memdesc->gpuaddr; + kgsl_regwrite(device, A5XX_CP_PM4_INSTR_BASE_LO, + lower_32_bits(gpuaddr)); + kgsl_regwrite(device, A5XX_CP_PM4_INSTR_BASE_HI, + upper_32_bits(gpuaddr)); + + gpuaddr = pfp_fw->memdesc->gpuaddr; + kgsl_regwrite(device, A5XX_CP_PFP_INSTR_BASE_LO, + lower_32_bits(gpuaddr)); + kgsl_regwrite(device, A5XX_CP_PFP_INSTR_BASE_HI, + upper_32_bits(gpuaddr)); + + /* + * Do not invoke to load zap shader if MMU does + * not support secure mode. + */ + if (!device->mmu.secured) + return 0; + + if (adreno_dev->zap_loaded && !(ADRENO_FEATURE(adreno_dev, + ADRENO_CPZ_RETENTION))) + return a5xx_zap_shader_resume(device); + + return adreno_zap_shader_load(adreno_dev, a5xx_core->zap_name); +} + +static int _me_init_ucode_workarounds(struct adreno_device *adreno_dev) +{ + switch (ADRENO_GPUREV(adreno_dev)) { + case ADRENO_REV_A510: + return 0x00000001; /* Ucode workaround for token end syncs */ + case ADRENO_REV_A505: + case ADRENO_REV_A506: + case ADRENO_REV_A530: + /* + * Ucode workarounds for token end syncs, + * WFI after every direct-render 3D mode draw and + * WFI after every 2D Mode 3 draw. + */ + return 0x0000000B; + default: + return 0x00000000; /* No ucode workarounds enabled */ + } +} + +/* + * CP_INIT_MAX_CONTEXT bit tells if the multiple hardware contexts can + * be used at once of if they should be serialized + */ +#define CP_INIT_MAX_CONTEXT BIT(0) + +/* Enables register protection mode */ +#define CP_INIT_ERROR_DETECTION_CONTROL BIT(1) + +/* Header dump information */ +#define CP_INIT_HEADER_DUMP BIT(2) /* Reserved */ + +/* Default Reset states enabled for PFP and ME */ +#define CP_INIT_DEFAULT_RESET_STATE BIT(3) + +/* Drawcall filter range */ +#define CP_INIT_DRAWCALL_FILTER_RANGE BIT(4) + +/* Ucode workaround masks */ +#define CP_INIT_UCODE_WORKAROUND_MASK BIT(5) + +#define CP_INIT_MASK (CP_INIT_MAX_CONTEXT | \ + CP_INIT_ERROR_DETECTION_CONTROL | \ + CP_INIT_HEADER_DUMP | \ + CP_INIT_DEFAULT_RESET_STATE | \ + CP_INIT_UCODE_WORKAROUND_MASK) + +static int a5xx_critical_packet_submit(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned int *cmds; + int ret; + + if (!critical_packet_constructed) + return 0; + + cmds = adreno_ringbuffer_allocspace(rb, 4); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + *cmds++ = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, adreno_dev->critpkts->gpuaddr); + *cmds++ = crit_pkts_dwords; + + ret = a5xx_ringbuffer_submit(rb, NULL, true); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 20); + if (ret) + a5xx_spin_idle_debug(adreno_dev, + "Critical packet submission failed to idle\n"); + } + + return ret; +} + +/* + * a5xx_send_me_init() - Initialize ringbuffer + * @adreno_dev: Pointer to adreno device + * @rb: Pointer to the ringbuffer of device + * + * Submit commands for ME initialization, + */ +static int a5xx_send_me_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned int *cmds; + int i = 0, ret; + + cmds = adreno_ringbuffer_allocspace(rb, 9); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + cmds[i++] = cp_type7_packet(CP_ME_INIT, 8); + + /* Enabled ordinal mask */ + cmds[i++] = CP_INIT_MASK; + + if (CP_INIT_MASK & CP_INIT_MAX_CONTEXT) + cmds[i++] = 0x00000003; + + if (CP_INIT_MASK & CP_INIT_ERROR_DETECTION_CONTROL) + cmds[i++] = 0x20000000; + + if (CP_INIT_MASK & CP_INIT_HEADER_DUMP) { + /* Header dump address */ + cmds[i++] = 0x00000000; + /* Header dump enable and dump size */ + cmds[i++] = 0x00000000; + } + + if (CP_INIT_MASK & CP_INIT_DRAWCALL_FILTER_RANGE) { + /* Start range */ + cmds[i++] = 0x00000000; + /* End range (inclusive) */ + cmds[i++] = 0x00000000; + } + + if (CP_INIT_MASK & CP_INIT_UCODE_WORKAROUND_MASK) + cmds[i++] = _me_init_ucode_workarounds(adreno_dev); + + ret = a5xx_ringbuffer_submit(rb, NULL, true); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) + a5xx_spin_idle_debug(adreno_dev, + "CP initialization failed to idle\n"); + } + + return ret; +} + +/* + * a5xx_rb_start() - Start the ringbuffer + * @adreno_dev: Pointer to adreno device + */ +static int a5xx_rb_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb; + uint64_t addr; + unsigned int *cmds; + int ret, i; + + /* Clear all the ringbuffers */ + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); + kgsl_sharedmem_writel(device->scratch, + SCRATCH_RPTR_OFFSET(rb->id), 0); + + rb->wptr = 0; + rb->_wptr = 0; + rb->wptr_preempt_end = ~0; + } + + /* Set up the current ringbuffer */ + rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + addr = SCRATCH_RPTR_GPU_ADDR(device, rb->id); + + kgsl_regwrite(device, A5XX_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr)); + kgsl_regwrite(device, A5XX_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr)); + + /* + * The size of the ringbuffer in the hardware is the log2 + * representation of the size in quadwords (sizedwords / 2). + * Also disable the host RPTR shadow register as it might be unreliable + * in certain circumstances. + */ + + kgsl_regwrite(device, A5XX_CP_RB_CNTL, + A5XX_CP_RB_CNTL_DEFAULT); + + kgsl_regwrite(device, A5XX_CP_RB_BASE, + lower_32_bits(rb->buffer_desc->gpuaddr)); + kgsl_regwrite(device, A5XX_CP_RB_BASE_HI, + upper_32_bits(rb->buffer_desc->gpuaddr)); + + ret = a5xx_microcode_load(adreno_dev); + if (ret) + return ret; + + /* clear ME_HALT to start micro engine */ + + kgsl_regwrite(device, A5XX_CP_ME_CNTL, 0); + + ret = a5xx_send_me_init(adreno_dev, rb); + if (ret) + return ret; + + /* Run the critical packets if we need to */ + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CRITICAL_PACKETS)) { + ret = a5xx_critical_packet_submit(adreno_dev, rb); + if (ret) + return ret; + } + + /* + * Try to execute the zap shader if it exists, otherwise just try + * directly writing to the control register + */ + if (!adreno_dev->zap_loaded) + kgsl_regwrite(device, A5XX_RBBM_SECVID_TRUST_CNTL, 0); + else { + cmds = adreno_ringbuffer_allocspace(rb, 2); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + *cmds++ = cp_packet(adreno_dev, CP_SET_SECURE_MODE, 1); + *cmds++ = 0; + + ret = a5xx_ringbuffer_submit(rb, NULL, true); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) { + a5xx_spin_idle_debug(adreno_dev, + "Switch to unsecure failed to idle\n"); + return ret; + } + } + } + + ret = a5xx_gpmu_init(adreno_dev); + if (ret) + return ret; + + a5xx_post_start(adreno_dev); + + return 0; +} + +/* + * a5xx_microcode_read() - Read microcode + * @adreno_dev: Pointer to adreno device + */ +static int a5xx_microcode_read(struct adreno_device *adreno_dev) +{ + int ret; + struct adreno_firmware *pm4_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4); + struct adreno_firmware *pfp_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP); + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + + ret = adreno_get_firmware(adreno_dev, a5xx_core->pm4fw_name, pm4_fw); + if (ret) + return ret; + + ret = adreno_get_firmware(adreno_dev, a5xx_core->pfpfw_name, pfp_fw); + if (ret) + return ret; + + ret = _load_gpmu_firmware(adreno_dev); + if (ret) + return ret; + + _load_regfile(adreno_dev); + + return ret; +} + +/* Register offset defines for A5XX, in order of enum adreno_regs */ +static unsigned int a5xx_register_offsets[ADRENO_REG_REGISTER_MAX] = { + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A5XX_CP_RB_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, A5XX_CP_RB_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_LO, + A5XX_CP_RB_RPTR_ADDR_LO), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_HI, + A5XX_CP_RB_RPTR_ADDR_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A5XX_CP_RB_RPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A5XX_CP_RB_WPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A5XX_CP_ME_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A5XX_CP_RB_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A5XX_CP_IB1_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, A5XX_CP_IB1_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A5XX_CP_IB1_BUFSZ), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A5XX_CP_IB2_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, A5XX_CP_IB2_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A5XX_CP_IB2_BUFSZ), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A5XX_CP_PROTECT_REG_0), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A5XX_CP_CONTEXT_SWITCH_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DEBUG, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DISABLE, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI, + A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A5XX_RBBM_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS3, A5XX_RBBM_STATUS3), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A5XX_RBBM_INT_0_MASK), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A5XX_RBBM_CLOCK_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A5XX_RBBM_SW_RESET_CMD), + ADRENO_REG_DEFINE(ADRENO_REG_GPMU_POWER_COUNTER_ENABLE, + A5XX_GPMU_POWER_COUNTER_ENABLE), +}; + +static void a5xx_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status1, status2; + + kgsl_regread(device, A5XX_CP_INTERRUPT_STATUS, &status1); + + if (status1 & BIT(A5XX_CP_OPCODE_ERROR)) { + unsigned int val; + + kgsl_regwrite(device, A5XX_CP_PFP_STAT_ADDR, 0); + + /* + * A5XX_CP_PFP_STAT_DATA is indexed, so read it twice to get the + * value we want + */ + kgsl_regread(device, A5XX_CP_PFP_STAT_DATA, &val); + kgsl_regread(device, A5XX_CP_PFP_STAT_DATA, &val); + + dev_crit_ratelimited(device->dev, + "ringbuffer opcode error | possible opcode=0x%8.8X\n", + val); + } + if (status1 & BIT(A5XX_CP_RESERVED_BIT_ERROR)) + dev_crit_ratelimited(device->dev, + "ringbuffer reserved bit error interrupt\n"); + if (status1 & BIT(A5XX_CP_HW_FAULT_ERROR)) { + kgsl_regread(device, A5XX_CP_HW_FAULT, &status2); + dev_crit_ratelimited(device->dev, + "CP | Ringbuffer HW fault | status=%x\n", + status2); + } + if (status1 & BIT(A5XX_CP_DMA_ERROR)) + dev_crit_ratelimited(device->dev, "CP | DMA error\n"); + if (status1 & BIT(A5XX_CP_REGISTER_PROTECTION_ERROR)) { + kgsl_regread(device, A5XX_CP_PROTECT_STATUS, &status2); + dev_crit_ratelimited(device->dev, + "CP | Protected mode error| %s | addr=%x | status=%x\n", + status2 & (1 << 24) ? "WRITE" : "READ", + (status2 & 0xFFFFF) >> 2, status2); + } + if (status1 & BIT(A5XX_CP_AHB_ERROR)) { + kgsl_regread(device, A5XX_CP_AHB_FAULT, &status2); + dev_crit_ratelimited(device->dev, + "ringbuffer AHB error interrupt | status=%x\n", + status2); + } +} + +static void a5xx_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg; + + switch (bit) { + case A5XX_INT_RBBM_AHB_ERROR: { + kgsl_regread(device, A5XX_RBBM_AHB_ERROR_STATUS, ®); + + /* + * Return the word address of the erroring register so that it + * matches the register specification + */ + dev_crit_ratelimited(device->dev, + "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n", + reg & (1 << 28) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2, + (reg >> 20) & 0x3, + (reg >> 24) & 0xF); + + /* Clear the error */ + kgsl_regwrite(device, A5XX_RBBM_AHB_CMD, (1 << 4)); + break; + } + case A5XX_INT_RBBM_TRANSFER_TIMEOUT: + dev_crit_ratelimited(device->dev, + "RBBM: AHB transfer timeout\n"); + break; + case A5XX_INT_RBBM_ME_MS_TIMEOUT: + kgsl_regread(device, A5XX_RBBM_AHB_ME_SPLIT_STATUS, ®); + dev_crit_ratelimited(device->dev, + "RBBM | ME master split timeout | status=%x\n", + reg); + break; + case A5XX_INT_RBBM_PFP_MS_TIMEOUT: + kgsl_regread(device, A5XX_RBBM_AHB_PFP_SPLIT_STATUS, ®); + dev_crit_ratelimited(device->dev, + "RBBM | PFP master split timeout | status=%x\n", + reg); + break; + case A5XX_INT_RBBM_ETS_MS_TIMEOUT: + dev_crit_ratelimited(device->dev, + "RBBM: ME master split timeout\n"); + break; + case A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW: + dev_crit_ratelimited(device->dev, + "RBBM: ATB ASYNC overflow\n"); + break; + case A5XX_INT_RBBM_ATB_BUS_OVERFLOW: + dev_crit_ratelimited(device->dev, + "RBBM: ATB bus overflow\n"); + break; + case A5XX_INT_UCHE_OOB_ACCESS: + dev_crit_ratelimited(device->dev, + "UCHE: Out of bounds access\n"); + break; + case A5XX_INT_UCHE_TRAP_INTR: + dev_crit_ratelimited(device->dev, "UCHE: Trap interrupt\n"); + break; + case A5XX_INT_GPMU_VOLTAGE_DROOP: + dev_crit_ratelimited(device->dev, "GPMU: Voltage droop\n"); + break; + default: + dev_crit_ratelimited(device->dev, "Unknown interrupt %d\n", + bit); + } +} + +static void a5xx_irq_storm_worker(struct work_struct *work) +{ + struct adreno_device *adreno_dev = container_of(work, + struct adreno_device, irq_storm_work); + struct kgsl_device *device = &adreno_dev->dev; + unsigned int status; + + mutex_lock(&device->mutex); + + /* Wait for the storm to clear up */ + do { + kgsl_regwrite(device, A5XX_RBBM_INT_CLEAR_CMD, + BIT(A5XX_INT_CP_CACHE_FLUSH_TS)); + kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &status); + } while (status & BIT(A5XX_INT_CP_CACHE_FLUSH_TS)); + + /* Re-enable the interrupt bit in the mask */ + adreno_dev->irq_mask |= BIT(A5XX_INT_CP_CACHE_FLUSH_TS); + kgsl_regwrite(device, A5XX_RBBM_INT_0_MASK, adreno_dev->irq_mask); + clear_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED, &adreno_dev->priv); + + dev_warn(device->dev, "Re-enabled A5XX_INT_CP_CACHE_FLUSH_TS\n"); + mutex_unlock(&device->mutex); + + /* Reschedule just to make sure everything retires */ + adreno_dispatcher_schedule(device); +} + +static void a5xx_cp_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int cur; + static unsigned int count; + static unsigned int prev; + + if (test_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED, &adreno_dev->priv)) + return; + + kgsl_sharedmem_readl(device->memstore, &cur, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + ref_wait_ts)); + + /* + * prev holds a previously read value + * from memory. It should be changed by the GPU with every + * interrupt. If the value we know about and the value we just + * read are the same, then we are likely in a storm. + * If this happens twice, disable the interrupt in the mask + * so the dispatcher can take care of the issue. It is then + * up to the dispatcher to re-enable the mask once all work + * is done and the storm has ended. + */ + if (prev == cur) { + count++; + if (count == 2) { + /* disable interrupt from the mask */ + set_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED, + &adreno_dev->priv); + + adreno_dev->irq_mask &= + ~BIT(A5XX_INT_CP_CACHE_FLUSH_TS); + + kgsl_regwrite(device, A5XX_RBBM_INT_0_MASK, + adreno_dev->irq_mask); + + kgsl_schedule_work(&adreno_dev->irq_storm_work); + + return; + } + } else { + count = 0; + prev = cur; + } + + a5xx_preemption_trigger(adreno_dev); + adreno_dispatcher_schedule(device); +} + +static const char *gpmu_int_msg[32] = { + [FW_INTR_INFO] = "FW_INTR_INFO", + [LLM_ACK_ERR_INTR] = "LLM_ACK_ERR_INTR", + [ISENS_TRIM_ERR_INTR] = "ISENS_TRIM_ERR_INTR", + [ISENS_ERR_INTR] = "ISENS_ERR_INTR", + [ISENS_IDLE_ERR_INTR] = "ISENS_IDLE_ERR_INTR", + [ISENS_PWR_ON_ERR_INTR] = "ISENS_PWR_ON_ERR_INTR", + [6 ... 30] = "", + [WDOG_EXPITED] = "WDOG_EXPITED"}; + +static void a5xx_gpmu_int_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg, i; + + kgsl_regread(device, A5XX_GPMU_RBBM_INTR_INFO, ®); + + if (reg & (~VALID_GPMU_IRQ)) { + dev_crit_ratelimited(device->dev, + "GPMU: Unknown IRQ mask 0x%08lx in 0x%08x\n", + reg & (~VALID_GPMU_IRQ), reg); + } + + for (i = 0; i < 32; i++) + switch (reg & BIT(i)) { + case BIT(WDOG_EXPITED): + if (test_and_clear_bit(ADRENO_DEVICE_GPMU_INITIALIZED, + &adreno_dev->priv)) { + /* Stop GPMU */ + kgsl_regwrite(device, + A5XX_GPMU_CM3_SYSRESET, 1); + kgsl_schedule_work(&adreno_dev->gpmu_work); + } + /* fallthrough */ + case BIT(FW_INTR_INFO): + case BIT(LLM_ACK_ERR_INTR): + case BIT(ISENS_TRIM_ERR_INTR): + case BIT(ISENS_ERR_INTR): + case BIT(ISENS_IDLE_ERR_INTR): + case BIT(ISENS_PWR_ON_ERR_INTR): + dev_crit_ratelimited(device->dev, + "GPMU: interrupt %s(%08lx)\n", + gpmu_int_msg[i], + BIT(i)); + break; + } +} + +/* + * a5x_gpc_err_int_callback() - Isr for GPC error interrupts + * @adreno_dev: Pointer to device + * @bit: Interrupt bit + */ +static void a5x_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* + * GPC error is typically the result of mistake SW programming. + * Force GPU fault for this interrupt so that we can debug it + * with help of register dump. + */ + + dev_crit(device->dev, "RBBM: GPC error\n"); + adreno_irqctrl(adreno_dev, 0); + + /* Trigger a fault in the dispatcher - this will effect a restart */ + adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT); + adreno_dispatcher_schedule(device); +} + +u64 a5xx_read_alwayson(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 lo = 0, hi = 0; + + kgsl_regread(device, A5XX_RBBM_ALWAYSON_COUNTER_LO, &lo); + + /* The upper 32 bits are only reliable on A540 targets */ + if (adreno_is_a540(adreno_dev)) + kgsl_regread(device, A5XX_RBBM_ALWAYSON_COUNTER_HI, &hi); + + return (((u64) hi) << 32) | lo; +} + + +static const struct adreno_irq_funcs a5xx_irq_funcs[32] = { + ADRENO_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 1 - RBBM_AHB_ERROR */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 2 - RBBM_TRANSFER_TIMEOUT */ + /* 3 - RBBM_ME_MASTER_SPLIT_TIMEOUT */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), + /* 4 - RBBM_PFP_MASTER_SPLIT_TIMEOUT */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), + /* 5 - RBBM_ETS_MASTER_SPLIT_TIMEOUT */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), + /* 6 - RBBM_ATB_ASYNC_OVERFLOW */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), + ADRENO_IRQ_CALLBACK(a5x_gpc_err_int_callback), /* 7 - GPC_ERR */ + ADRENO_IRQ_CALLBACK(a5xx_preempt_callback),/* 8 - CP_SW */ + ADRENO_IRQ_CALLBACK(a5xx_cp_hw_err_callback), /* 9 - CP_HW_ERROR */ + /* 10 - CP_CCU_FLUSH_DEPTH_TS */ + ADRENO_IRQ_CALLBACK(NULL), + /* 11 - CP_CCU_FLUSH_COLOR_TS */ + ADRENO_IRQ_CALLBACK(NULL), + /* 12 - CP_CCU_RESOLVE_TS */ + ADRENO_IRQ_CALLBACK(NULL), + ADRENO_IRQ_CALLBACK(NULL), /* 13 - CP_IB2_INT */ + ADRENO_IRQ_CALLBACK(NULL), /* 14 - CP_IB1_INT */ + ADRENO_IRQ_CALLBACK(NULL), /* 15 - CP_RB_INT */ + /* 16 - CCP_UNUSED_1 */ + ADRENO_IRQ_CALLBACK(NULL), + ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_WT_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 19 - UNKNOWN_1 */ + ADRENO_IRQ_CALLBACK(a5xx_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */ + /* 21 - UNUSED_2 */ + ADRENO_IRQ_CALLBACK(NULL), + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */ + /* 23 - MISC_HANG_DETECT */ + ADRENO_IRQ_CALLBACK(adreno_hang_int_callback), + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 24 - UCHE_OOB_ACCESS */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 25 - UCHE_TRAP_INTR */ + ADRENO_IRQ_CALLBACK(NULL), /* 26 - DEBBUS_INTR_0 */ + ADRENO_IRQ_CALLBACK(NULL), /* 27 - DEBBUS_INTR_1 */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 28 - GPMU_VOLTAGE_DROOP */ + ADRENO_IRQ_CALLBACK(a5xx_gpmu_int_callback), /* 29 - GPMU_FIRMWARE */ + ADRENO_IRQ_CALLBACK(NULL), /* 30 - ISDB_CPU_IRQ */ + ADRENO_IRQ_CALLBACK(NULL), /* 31 - ISDB_UNDER_DEBUG */ +}; + +static irqreturn_t a5xx_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret; + u32 status; + + kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &status); + + /* + * Clear all the interrupt bits except A5XX_INT_RBBM_AHB_ERROR. + * The interrupt will stay asserted until it is cleared by the handler + * so don't touch it yet to avoid a storm + */ + kgsl_regwrite(device, A5XX_RBBM_INT_CLEAR_CMD, + status & ~A5XX_INT_RBBM_AHB_ERROR); + + /* Call the helper function for callbacks */ + ret = adreno_irq_callbacks(adreno_dev, a5xx_irq_funcs, status); + + trace_kgsl_a5xx_irq_status(adreno_dev, status); + + /* Now chear AHB_ERROR if it was set */ + if (status & A5XX_INT_RBBM_AHB_ERROR) + kgsl_regwrite(device, A5XX_RBBM_INT_CLEAR_CMD, + A5XX_INT_RBBM_AHB_ERROR); + + return ret; +} + +static bool a5xx_hw_isidle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status; + + /* + * Due to CRC idle throttling the GPU idle hysteresis on a540 can take + * up to 5uS to expire + */ + if (adreno_is_a540(adreno_dev)) + udelay(5); + + kgsl_regread(device, A5XX_RBBM_STATUS, &status); + + if (status & 0xfffffffe) + return false; + + kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &status); + + /* Return busy if a interrupt is pending */ + return !((status & adreno_dev->irq_mask) || + atomic_read(&adreno_dev->pending_irq_refcnt)); +} + +static int a5xx_clear_pending_transactions(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 mask = A5XX_VBIF_XIN_HALT_CTRL0_MASK; + int ret; + + kgsl_regwrite(device, A5XX_VBIF_XIN_HALT_CTRL0, mask); + ret = adreno_wait_for_halt_ack(device, A5XX_VBIF_XIN_HALT_CTRL1, mask); + kgsl_regwrite(device, A5XX_VBIF_XIN_HALT_CTRL0, 0); + + return ret; +} + +static bool a5xx_is_hw_collapsible(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg; + + if (!adreno_isidle(adreno_dev)) + return false; + + /* If feature is not supported or enabled, no worry */ + if (!adreno_dev->sptp_pc_enabled) + return true; + kgsl_regread(device, A5XX_GPMU_SP_PWR_CLK_STATUS, ®); + if (reg & BIT(20)) + return false; + kgsl_regread(device, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, ®); + return !(reg & BIT(20)); +} + +static void a5xx_remove(struct adreno_device *adreno_dev) +{ + if (ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) + del_timer(&adreno_dev->preempt.timer); +} + +static void a5xx_power_stats(struct adreno_device *adreno_dev, + struct kgsl_power_stats *stats) +{ + static u32 rbbm0_hi; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + s64 freq = kgsl_pwrctrl_active_freq(&device->pwrctrl) / 1000000; + struct adreno_busy_data *busy = &adreno_dev->busy_data; + s64 gpu_busy = 0; + u32 lo, hi; + s64 adj; + + /* Sometimes this counter can go backwards, so try to detect that */ + kgsl_regread(device, A5XX_RBBM_PERFCTR_RBBM_0_LO, &lo); + kgsl_regread(device, A5XX_RBBM_PERFCTR_RBBM_0_HI, &hi); + + if (busy->gpu_busy) { + if (lo < busy->gpu_busy) { + if (hi == rbbm0_hi) { + dev_warn_once(device->dev, + "abmormal value from RBBM_0 perfcounter: %x %x\n", + lo, busy->gpu_busy); + gpu_busy = 0; + } else { + gpu_busy = (UINT_MAX - busy->gpu_busy) + lo; + rbbm0_hi = hi; + } + } else + gpu_busy = lo - busy->gpu_busy; + } else { + gpu_busy = 0; + rbbm0_hi = 0; + } + + busy->gpu_busy = lo; + + adj = a5xx_read_throttling_counters(adreno_dev); + if (-adj <= gpu_busy) + gpu_busy += adj; + else + gpu_busy = 0; + + stats->busy_time = gpu_busy / freq; + + if (adreno_is_a530(adreno_dev) && adreno_dev->lm_threshold_count) + kgsl_regread(device, adreno_dev->lm_threshold_count, + &adreno_dev->lm_threshold_cross); + else if (adreno_is_a540(adreno_dev)) + adreno_dev->lm_threshold_cross = adj; + + if (!device->pwrctrl.bus_control) + return; + + stats->ram_time = counter_delta(device, adreno_dev->ram_cycles_lo, + &busy->bif_ram_cycles); + + stats->ram_wait = counter_delta(device, adreno_dev->starved_ram_lo, + &busy->bif_starved_ram); +} + +static int a5xx_setproperty(struct kgsl_device_private *dev_priv, + u32 type, void __user *value, u32 sizebytes) +{ + struct kgsl_device *device = dev_priv->device; + u32 enable; + + if (type != KGSL_PROP_PWRCTRL) + return -ENODEV; + + if (sizebytes != sizeof(enable)) + return -EINVAL; + + if (copy_from_user(&enable, value, sizeof(enable))) + return -EFAULT; + + mutex_lock(&device->mutex); + + if (enable) { + device->pwrctrl.ctrl_flags = 0; + kgsl_pwrscale_enable(device); + } else { + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + device->pwrctrl.ctrl_flags = KGSL_PWR_ON; + kgsl_pwrscale_disable(device, true); + } + + mutex_unlock(&device->mutex); + + return 0; +} + +#ifdef CONFIG_QCOM_KGSL_CORESIGHT +static struct adreno_coresight_register a5xx_coresight_registers[] = { + { A5XX_RBBM_CFG_DBGBUS_SEL_A }, + { A5XX_RBBM_CFG_DBGBUS_SEL_B }, + { A5XX_RBBM_CFG_DBGBUS_SEL_C }, + { A5XX_RBBM_CFG_DBGBUS_SEL_D }, + { A5XX_RBBM_CFG_DBGBUS_CNTLT }, + { A5XX_RBBM_CFG_DBGBUS_CNTLM }, + { A5XX_RBBM_CFG_DBGBUS_OPL }, + { A5XX_RBBM_CFG_DBGBUS_OPE }, + { A5XX_RBBM_CFG_DBGBUS_IVTL_0 }, + { A5XX_RBBM_CFG_DBGBUS_IVTL_1 }, + { A5XX_RBBM_CFG_DBGBUS_IVTL_2 }, + { A5XX_RBBM_CFG_DBGBUS_IVTL_3 }, + { A5XX_RBBM_CFG_DBGBUS_MASKL_0 }, + { A5XX_RBBM_CFG_DBGBUS_MASKL_1 }, + { A5XX_RBBM_CFG_DBGBUS_MASKL_2 }, + { A5XX_RBBM_CFG_DBGBUS_MASKL_3 }, + { A5XX_RBBM_CFG_DBGBUS_BYTEL_0 }, + { A5XX_RBBM_CFG_DBGBUS_BYTEL_1 }, + { A5XX_RBBM_CFG_DBGBUS_IVTE_0 }, + { A5XX_RBBM_CFG_DBGBUS_IVTE_1 }, + { A5XX_RBBM_CFG_DBGBUS_IVTE_2 }, + { A5XX_RBBM_CFG_DBGBUS_IVTE_3 }, + { A5XX_RBBM_CFG_DBGBUS_MASKE_0 }, + { A5XX_RBBM_CFG_DBGBUS_MASKE_1 }, + { A5XX_RBBM_CFG_DBGBUS_MASKE_2 }, + { A5XX_RBBM_CFG_DBGBUS_MASKE_3 }, + { A5XX_RBBM_CFG_DBGBUS_NIBBLEE }, + { A5XX_RBBM_CFG_DBGBUS_PTRC0 }, + { A5XX_RBBM_CFG_DBGBUS_PTRC1 }, + { A5XX_RBBM_CFG_DBGBUS_LOADREG }, + { A5XX_RBBM_CFG_DBGBUS_IDX }, + { A5XX_RBBM_CFG_DBGBUS_CLRC }, + { A5XX_RBBM_CFG_DBGBUS_LOADIVT }, + { A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC }, + { A5XX_RBBM_CFG_DBGBUS_OVER }, + { A5XX_RBBM_CFG_DBGBUS_COUNT0 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT1 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT2 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT3 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT4 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT5 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4 }, + { A5XX_RBBM_CFG_DBGBUS_MISR0 }, + { A5XX_RBBM_CFG_DBGBUS_MISR1 }, + { A5XX_RBBM_AHB_DBG_CNTL }, + { A5XX_RBBM_READ_AHB_THROUGH_DBG }, + { A5XX_RBBM_DBG_LO_HI_GPIO }, + { A5XX_RBBM_EXT_TRACE_BUS_CNTL }, + { A5XX_RBBM_EXT_VBIF_DBG_CNTL }, +}; + +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_a, &a5xx_coresight_registers[0]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_b, &a5xx_coresight_registers[1]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_c, &a5xx_coresight_registers[2]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_d, &a5xx_coresight_registers[3]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlt, &a5xx_coresight_registers[4]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlm, &a5xx_coresight_registers[5]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_opl, &a5xx_coresight_registers[6]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ope, &a5xx_coresight_registers[7]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_0, &a5xx_coresight_registers[8]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_1, &a5xx_coresight_registers[9]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_2, &a5xx_coresight_registers[10]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_3, &a5xx_coresight_registers[11]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_0, &a5xx_coresight_registers[12]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_1, &a5xx_coresight_registers[13]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_2, &a5xx_coresight_registers[14]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_3, &a5xx_coresight_registers[15]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_0, &a5xx_coresight_registers[16]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_1, &a5xx_coresight_registers[17]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_0, &a5xx_coresight_registers[18]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_1, &a5xx_coresight_registers[19]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_2, &a5xx_coresight_registers[20]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_3, &a5xx_coresight_registers[21]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_0, &a5xx_coresight_registers[22]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_1, &a5xx_coresight_registers[23]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_2, &a5xx_coresight_registers[24]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_3, &a5xx_coresight_registers[25]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_nibblee, &a5xx_coresight_registers[26]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc0, &a5xx_coresight_registers[27]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc1, &a5xx_coresight_registers[28]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadreg, &a5xx_coresight_registers[29]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_idx, &a5xx_coresight_registers[30]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_clrc, &a5xx_coresight_registers[31]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadivt, &a5xx_coresight_registers[32]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_event_logic, + &a5xx_coresight_registers[33]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_over, &a5xx_coresight_registers[34]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count0, &a5xx_coresight_registers[35]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count1, &a5xx_coresight_registers[36]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count2, &a5xx_coresight_registers[37]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count3, &a5xx_coresight_registers[38]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count4, &a5xx_coresight_registers[39]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count5, &a5xx_coresight_registers[40]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_addr, + &a5xx_coresight_registers[41]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf0, + &a5xx_coresight_registers[42]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf1, + &a5xx_coresight_registers[43]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf2, + &a5xx_coresight_registers[44]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf3, + &a5xx_coresight_registers[45]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf4, + &a5xx_coresight_registers[46]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_misr0, &a5xx_coresight_registers[47]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_misr1, &a5xx_coresight_registers[48]); +static ADRENO_CORESIGHT_ATTR(ahb_dbg_cntl, &a5xx_coresight_registers[49]); +static ADRENO_CORESIGHT_ATTR(read_ahb_through_dbg, + &a5xx_coresight_registers[50]); +static ADRENO_CORESIGHT_ATTR(dbg_lo_hi_gpio, &a5xx_coresight_registers[51]); +static ADRENO_CORESIGHT_ATTR(ext_trace_bus_cntl, &a5xx_coresight_registers[52]); +static ADRENO_CORESIGHT_ATTR(ext_vbif_dbg_cntl, &a5xx_coresight_registers[53]); + +static struct attribute *a5xx_coresight_attrs[] = { + &coresight_attr_cfg_dbgbus_sel_a.attr.attr, + &coresight_attr_cfg_dbgbus_sel_b.attr.attr, + &coresight_attr_cfg_dbgbus_sel_c.attr.attr, + &coresight_attr_cfg_dbgbus_sel_d.attr.attr, + &coresight_attr_cfg_dbgbus_cntlt.attr.attr, + &coresight_attr_cfg_dbgbus_cntlm.attr.attr, + &coresight_attr_cfg_dbgbus_opl.attr.attr, + &coresight_attr_cfg_dbgbus_ope.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_0.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_2.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_3.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_0.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_1.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_2.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_3.attr.attr, + &coresight_attr_cfg_dbgbus_bytel_0.attr.attr, + &coresight_attr_cfg_dbgbus_bytel_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_0.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_2.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_3.attr.attr, + &coresight_attr_cfg_dbgbus_maske_0.attr.attr, + &coresight_attr_cfg_dbgbus_maske_1.attr.attr, + &coresight_attr_cfg_dbgbus_maske_2.attr.attr, + &coresight_attr_cfg_dbgbus_maske_3.attr.attr, + &coresight_attr_cfg_dbgbus_nibblee.attr.attr, + &coresight_attr_cfg_dbgbus_ptrc0.attr.attr, + &coresight_attr_cfg_dbgbus_ptrc1.attr.attr, + &coresight_attr_cfg_dbgbus_loadreg.attr.attr, + &coresight_attr_cfg_dbgbus_idx.attr.attr, + &coresight_attr_cfg_dbgbus_clrc.attr.attr, + &coresight_attr_cfg_dbgbus_loadivt.attr.attr, + &coresight_attr_cfg_dbgbus_event_logic.attr.attr, + &coresight_attr_cfg_dbgbus_over.attr.attr, + &coresight_attr_cfg_dbgbus_count0.attr.attr, + &coresight_attr_cfg_dbgbus_count1.attr.attr, + &coresight_attr_cfg_dbgbus_count2.attr.attr, + &coresight_attr_cfg_dbgbus_count3.attr.attr, + &coresight_attr_cfg_dbgbus_count4.attr.attr, + &coresight_attr_cfg_dbgbus_count5.attr.attr, + &coresight_attr_cfg_dbgbus_trace_addr.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf0.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf1.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf2.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf3.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf4.attr.attr, + &coresight_attr_cfg_dbgbus_misr0.attr.attr, + &coresight_attr_cfg_dbgbus_misr1.attr.attr, + &coresight_attr_ahb_dbg_cntl.attr.attr, + &coresight_attr_read_ahb_through_dbg.attr.attr, + &coresight_attr_dbg_lo_hi_gpio.attr.attr, + &coresight_attr_ext_trace_bus_cntl.attr.attr, + &coresight_attr_ext_vbif_dbg_cntl.attr.attr, + NULL, +}; + +static const struct attribute_group a5xx_coresight_group = { + .attrs = a5xx_coresight_attrs, +}; + +static const struct attribute_group *a5xx_coresight_groups[] = { + &a5xx_coresight_group, + NULL, +}; + +static struct adreno_coresight a5xx_coresight = { + .registers = a5xx_coresight_registers, + .count = ARRAY_SIZE(a5xx_coresight_registers), + .groups = a5xx_coresight_groups, +}; +#endif + +const struct adreno_gpudev adreno_a5xx_gpudev = { + .reg_offsets = a5xx_register_offsets, +#ifdef CONFIG_QCOM_KGSL_CORESIGHT + .coresight = {&a5xx_coresight}, +#endif + .probe = a5xx_probe, + .start = a5xx_start, + .snapshot = a5xx_snapshot, + .init = a5xx_init, + .irq_handler = a5xx_irq_handler, + .rb_start = a5xx_rb_start, + .regulator_enable = a5xx_regulator_enable, + .regulator_disable = a5xx_regulator_disable, + .pwrlevel_change_settings = a5xx_pwrlevel_change_settings, + .preemption_schedule = a5xx_preemption_schedule, +#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) + .clk_set_options = a5xx_clk_set_options, +#endif + .read_alwayson = a5xx_read_alwayson, + .hw_isidle = a5xx_hw_isidle, + .power_ops = &adreno_power_operations, + .clear_pending_transactions = a5xx_clear_pending_transactions, + .remove = a5xx_remove, + .ringbuffer_submitcmd = a5xx_ringbuffer_submitcmd, + .is_hw_collapsible = a5xx_is_hw_collapsible, + .power_stats = a5xx_power_stats, + .setproperty = a5xx_setproperty, +}; diff --git a/adreno_a5xx.h b/adreno_a5xx.h new file mode 100644 index 0000000000..7a03e5f86d --- /dev/null +++ b/adreno_a5xx.h @@ -0,0 +1,307 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2015-2017,2019-2020 The Linux Foundation. All rights reserved. + */ + +#ifndef _ADRENO_A5XX_H_ +#define _ADRENO_A5XX_H_ + +#include "a5xx_reg.h" + +/** + * struct adreno_a5xx_core - a5xx specific GPU core definitions + */ +struct adreno_a5xx_core { + /** @base: Container for the generic &struct adreno_gpu_core */ + struct adreno_gpu_core base; + /** @gpmu_tsens: ID for the temperature sensor used by the GPMU */ + unsigned int gpmu_tsens; + /** @max_power: Max possible power draw of a core */ + unsigned int max_power; + /** pm4fw_name: Name of the PM4 microcode file */ + const char *pm4fw_name; + /** pfpfw_name: Name of the PFP microcode file */ + const char *pfpfw_name; + /** gpmufw_name: Name of the GPMU microcode file */ + const char *gpmufw_name; + /** @regfw_name: Filename for the LM registers if applicable */ + const char *regfw_name; + /** @zap_name: Name of the CPZ zap file */ + const char *zap_name; + /** @hwcg: List of registers and values to write for HWCG */ + const struct kgsl_regmap_list *hwcg; + /** @hwcg_count: Number of registers in @hwcg */ + u32 hwcg_count; + /** @vbif: List of registers and values to write for VBIF */ + const struct kgsl_regmap_list *vbif; + /** @vbif_count: Number of registers in @vbif */ + u32 vbif_count; + /** @highest_bank_bit: The bit of the highest DDR bank */ + u32 highest_bank_bit; +}; + +#define A5XX_CP_CTXRECORD_MAGIC_REF 0x27C4BAFCUL +/* Size of each CP preemption record */ +#define A5XX_CP_CTXRECORD_SIZE_IN_BYTES 0x10000 +/* Size of the preemption counter block (in bytes) */ +#define A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE (16 * 4) + +/** + * struct a5xx_cp_preemption_record - CP context record for + * preemption. + * @magic: (00) Value at this offset must be equal to + * A5XX_CP_CTXRECORD_MAGIC_REF. + * @info: (04) Type of record. Written non-zero (usually) by CP. + * we must set to zero for all ringbuffers. + * @data: (08) DATA field in SET_RENDER_MODE or checkpoint packets. + * Written by CP when switching out. Not used on switch-in. + * we must initialize to zero. + * @cntl: (12) RB_CNTL, saved and restored by CP. + * @rptr: (16) RB_RPTR, saved and restored by CP. + * @wptr: (20) RB_WPTR, saved and restored by CP. + * @rptr_addr: (24) RB_RPTR_ADDR_LO|HI saved and restored. + * rbase: (32) RB_BASE_LO|HI saved and restored. + * counter: (40) Pointer to preemption counter + */ +struct a5xx_cp_preemption_record { + uint32_t magic; + uint32_t info; + uint32_t data; + uint32_t cntl; + uint32_t rptr; + uint32_t wptr; + uint64_t rptr_addr; + uint64_t rbase; + uint64_t counter; +}; + +#define A5XX_CP_SMMU_INFO_MAGIC_REF 0x3618CDA3UL + +/** + * struct a5xx_cp_smmu_info - CP preemption SMMU info. + * @magic: (00) The value at this offset must be equal to + * A5XX_CP_SMMU_INFO_MAGIC_REF. + * @_pad4: (04) Reserved/padding + * @ttbr0: (08) Base address of the page table for the + * incoming context. + * @context_idr: (16) Context Identification Register value. + */ +struct a5xx_cp_smmu_info { + uint32_t magic; + uint32_t _pad4; + uint64_t ttbr0; + uint32_t asid; + uint32_t context_idr; +}; + +void a5xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); +unsigned int a5xx_num_registers(void); + +void a5xx_crashdump_init(struct adreno_device *adreno_dev); + +void a5xx_hwcg_set(struct adreno_device *adreno_dev, bool on); + +#define A5XX_CP_RB_CNTL_DEFAULT ((1 << 27) | ((ilog2(4) << 8) & 0x1F00) | \ + (ilog2(KGSL_RB_DWORDS >> 1) & 0x3F)) +/* GPMU interrupt multiplexor */ +#define FW_INTR_INFO (0) +#define LLM_ACK_ERR_INTR (1) +#define ISENS_TRIM_ERR_INTR (2) +#define ISENS_ERR_INTR (3) +#define ISENS_IDLE_ERR_INTR (4) +#define ISENS_PWR_ON_ERR_INTR (5) +#define WDOG_EXPITED (31) + +#define VALID_GPMU_IRQ (\ + BIT(FW_INTR_INFO) | \ + BIT(LLM_ACK_ERR_INTR) | \ + BIT(ISENS_TRIM_ERR_INTR) | \ + BIT(ISENS_ERR_INTR) | \ + BIT(ISENS_IDLE_ERR_INTR) | \ + BIT(ISENS_PWR_ON_ERR_INTR) | \ + BIT(WDOG_EXPITED)) + +/* A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL */ +#define STATE_OF_CHILD GENMASK(5, 4) +#define STATE_OF_CHILD_01 BIT(4) +#define STATE_OF_CHILD_11 (BIT(4) | BIT(5)) +#define IDLE_FULL_LM_SLEEP BIT(0) + +/* A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS */ +#define WAKEUP_ACK BIT(1) +#define IDLE_FULL_ACK BIT(0) + +/* A5XX_GPMU_GPMU_ISENSE_CTRL */ +#define ISENSE_CGC_EN_DISABLE BIT(0) + +/* A5XX_GPMU_TEMP_SENSOR_CONFIG */ +#define GPMU_BCL_ENABLED BIT(4) +#define GPMU_LLM_ENABLED BIT(9) +#define GPMU_ISENSE_STATUS GENMASK(3, 0) +#define GPMU_ISENSE_END_POINT_CAL_ERR BIT(0) + +#define AMP_CALIBRATION_RETRY_CNT 3 +#define AMP_CALIBRATION_TIMEOUT 6 + +/* A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK */ +#define VOLTAGE_INTR_EN BIT(0) + +/* A5XX_GPMU_GPMU_PWR_THRESHOLD */ +#define PWR_THRESHOLD_VALID 0x80000000 + +/* A5XX_GPMU_GPMU_SP_CLOCK_CONTROL */ +#define CNTL_IP_CLK_ENABLE BIT(0) +/* AGC */ +#define AGC_INIT_BASE A5XX_GPMU_DATA_RAM_BASE +#define AGC_INIT_MSG_MAGIC (AGC_INIT_BASE + 5) +#define AGC_MSG_BASE (AGC_INIT_BASE + 7) + +#define AGC_MSG_STATE (AGC_MSG_BASE + 0) +#define AGC_MSG_COMMAND (AGC_MSG_BASE + 1) +#define AGC_MSG_PAYLOAD_SIZE (AGC_MSG_BASE + 3) +#define AGC_MSG_PAYLOAD (AGC_MSG_BASE + 5) + +#define AGC_INIT_MSG_VALUE 0xBABEFACE +#define AGC_POWER_CONFIG_PRODUCTION_ID 1 + +#define AGC_LM_CONFIG (136/4) +#define AGC_LM_CONFIG_ENABLE_GPMU_ADAPTIVE (1) + +#define AGC_LM_CONFIG_ENABLE_ERROR (3 << 4) +#define AGC_LM_CONFIG_ISENSE_ENABLE (1 << 4) + +#define AGC_THROTTLE_SEL_DCS (1 << 8) +#define AGC_THROTTLE_DISABLE (2 << 8) + + +#define AGC_LLM_ENABLED (1 << 16) +#define AGC_GPU_VERSION_MASK GENMASK(18, 17) +#define AGC_GPU_VERSION_SHIFT 17 +#define AGC_BCL_DISABLED (1 << 24) + + +#define AGC_LEVEL_CONFIG (140/4) + +#define LM_DCVS_LIMIT 1 +/* FW file tages */ +#define GPMU_FIRMWARE_ID 2 +#define GPMU_SEQUENCE_ID 3 +#define GPMU_INST_RAM_SIZE 0xFFF + +#define HEADER_MAJOR 1 +#define HEADER_MINOR 2 +#define HEADER_DATE 3 +#define HEADER_TIME 4 +#define HEADER_SEQUENCE 5 + +#define MAX_HEADER_SIZE 10 + +#define LM_SEQUENCE_ID 1 +#define MAX_SEQUENCE_ID 3 + +#define GPMU_ISENSE_SAVE (A5XX_GPMU_DATA_RAM_BASE + 200/4) +/* LM defaults */ +#define LM_DEFAULT_LIMIT 6000 +#define A530_DEFAULT_LEAKAGE 0x004E001A + +/** + * to_a5xx_core - return the a5xx specific GPU core struct + * @adreno_dev: An Adreno GPU device handle + * + * Returns: + * A pointer to the a5xx specific GPU core struct + */ +static inline const struct adreno_a5xx_core * +to_a5xx_core(struct adreno_device *adreno_dev) +{ + const struct adreno_gpu_core *core = adreno_dev->gpucore; + + return container_of(core, struct adreno_a5xx_core, base); +} + +/* Preemption functions */ +void a5xx_preemption_trigger(struct adreno_device *adreno_dev); +void a5xx_preemption_schedule(struct adreno_device *adreno_dev); +void a5xx_preemption_start(struct adreno_device *adreno_dev); +int a5xx_preemption_init(struct adreno_device *adreno_dev); + +/** + * a5xx_preemption_post_ibsubmit - Insert commands following a submission + * @adreno_dev: Adreno GPU handle + * @cmds: Pointer to the ringbuffer to insert opcodes + * + * Return: The number of opcodes written to @cmds + */ +u32 a5xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev, u32 *cmds); + +/** + * a5xx_preemption_post_ibsubmit - Insert opcodes before a submission + * @adreno_dev: Adreno GPU handle + * @rb: The ringbuffer being written + * @drawctxt: The draw context being written + * @cmds: Pointer to the ringbuffer to insert opcodes + * + * Return: The number of opcodes written to @cmds + */ +u32 a5xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 *cmds); + +void a5xx_preempt_callback(struct adreno_device *adreno_dev, int bit); + +u64 a5xx_read_alwayson(struct adreno_device *adreno_dev); + +extern const struct adreno_perfcounters adreno_a5xx_perfcounters; + +/** + * a5xx_ringbuffer_init - Initialize the ringbuffers + * @adreno_dev: An Adreno GPU handle + * + * Initialize the ringbuffer(s) for a5xx. + * Return: 0 on success or negative on failure + */ +int a5xx_ringbuffer_init(struct adreno_device *adreno_dev); + +/** +* a5xx_ringbuffer_addcmds - Submit a command to the ringbuffer +* @adreno_dev: An Adreno GPU handle +* @rb: Pointer to the ringbuffer to submit on +* @drawctxt: Pointer to the draw context for the submission, or NULL for +* internal submissions +* @flags: Flags for the submission +* @in: Commands to write to the ringbuffer +* @dwords: Size of @in (in dwords) +* @timestamp: Timestamp for the submission +* @time: Optional pointer to a submit time structure +* +* Submit a command to the ringbuffer. +* Return: 0 on success or negative on failure +*/ +int a5xx_ringbuffer_addcmds(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 flags, u32 *in, u32 dwords, u32 timestamp, + struct adreno_submit_time *time); + +/** + * a5xx_ringbuffer_submitcmd - Submit a user command to the ringbuffer + * @adreno_dev: An Adreno GPU handle + * @cmdobj: Pointer to a user command object + * @flags: Internal submit flags + * @time: Optional pointer to a adreno_submit_time container + * + * Return: 0 on success or negative on failure + */ +int a5xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time); + +int a5xx_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time, bool sync); + +static inline bool a5xx_has_gpmu(struct adreno_device *adreno_dev) +{ + return (adreno_is_a530(adreno_dev) || adreno_is_a540(adreno_dev)); +} + +#endif diff --git a/adreno_a5xx_packets.h b/adreno_a5xx_packets.h new file mode 100644 index 0000000000..55276e46bc --- /dev/null +++ b/adreno_a5xx_packets.h @@ -0,0 +1,1406 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016,2019, The Linux Foundation. All rights reserved. + */ + +struct adreno_critical_fixup { + unsigned int lo_offset; + unsigned int hi_offset; + int buffer; + uint64_t mem_offset; +}; + +static unsigned int _a5xx_critical_pkts[] = { + 0x400E0601, /* [0x0000] == TYPE4 == */ + 0x00000002, /* [0x0001] A5X_HLSQ_MODE_CNTL (0x0E06)*/ + 0x40E78A01, /* [0x0002] == TYPE4 == */ + 0x000FFFFF, /* [0x0003] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/ + 0x48E78401, /* [0x0004] == TYPE4 == */ + 0x00000005, /* [0x0005] A5X_HLSQ_CNTL_0_CTX_0 (0xE784)*/ + 0x40E78501, /* [0x0006] == TYPE4 == */ + 0x00000009, /* [0x0007] A5X_HLSQ_CNTL_1_CTX_0 (0xE785)*/ + 0x48E78B85, /* [0x0008] == TYPE4 == */ + 0x00000001, /* [0x0009] A5X_HLSQ_VS_CONFIG_CTX_0 (0xE78B)*/ + 0x00002085, /* [0x000A] A5X_HLSQ_PS_CONFIG_CTX_0 (0xE78C)*/ + 0x00002084, /* [0x000B] A5X_HLSQ_HS_CONFIG_CTX_0 (0xE78D)*/ + 0x00002084, /* [0x000C] A5X_HLSQ_DS_CONFIG_CTX_0 (0xE78E)*/ + 0x00002084, /* [0x000D] A5X_HLSQ_GS_CONFIG_CTX_0 (0xE78F)*/ + 0x40E58485, /* [0x000E] == TYPE4 == */ + 0x00000001, /* [0x000F] A5X_SP_VS_CONFIG_CTX_0 (0xE584)*/ + 0x00002085, /* [0x0010] A5X_SP_PS_CONFIG_CTX_0 (0xE585)*/ + 0x00002084, /* [0x0011] A5X_SP_HS_CONFIG_CTX_0 (0xE586)*/ + 0x00002084, /* [0x0012] A5X_SP_DS_CONFIG_CTX_0 (0xE587)*/ + 0x00002084, /* [0x0013] A5X_SP_GS_CONFIG_CTX_0 (0xE588)*/ + 0x40E79101, /* [0x0014] == TYPE4 == */ + 0x00000004, /* [0x0015] A5X_HLSQ_VS_CNTL_CTX_0 (0xE791)*/ + 0x40E79201, /* [0x0016] == TYPE4 == */ + 0x00000002, /* [0x0017] A5X_HLSQ_PS_CNTL_CTX_0 (0xE792)*/ + 0x48E58001, /* [0x0018] == TYPE4 == */ + 0x00000010, /* [0x0019] A5X_SP_SP_CNTL_CTX_0 (0xE580)*/ + 0x70B00043, /* [0x001A] == TYPE7: LOAD_STATE (30) == */ + 0x00A00000, /* [0x001B] */ + 0x00000000, /* [0x001C] */ + 0x00000000, /* [0x001D] */ + 0x20020003, /* [0x001E] */ + 0x56D81803, /* [0x001F] */ + 0x00000003, /* [0x0020] */ + 0x20150000, /* [0x0021] */ + 0x00000000, /* [0x0022] */ + 0x00000200, /* [0x0023] */ + 0x00000000, /* [0x0024] */ + 0x201100F4, /* [0x0025] */ + 0x00000000, /* [0x0026] */ + 0x00000500, /* [0x0027] */ + 0x00000C21, /* [0x0028] */ + 0x20154004, /* [0x0029] */ + 0x00000C20, /* [0x002A] */ + 0x20154003, /* [0x002B] */ + 0x00000C23, /* [0x002C] */ + 0x20154008, /* [0x002D] */ + 0x00000C22, /* [0x002E] */ + 0x20156007, /* [0x002F] */ + 0x00000000, /* [0x0030] */ + 0x20554005, /* [0x0031] */ + 0x3F800000, /* [0x0032] */ + 0x20554006, /* [0x0033] */ + 0x00000000, /* [0x0034] */ + 0x03000000, /* [0x0035] */ + 0x20050000, /* [0x0036] */ + 0x46F00009, /* [0x0037] */ + 0x201F0000, /* [0x0038] */ + 0x4398000A, /* [0x0039] */ + 0x201F0009, /* [0x003A] */ + 0x43980809, /* [0x003B] */ + 0x20180009, /* [0x003C] */ + 0x46100809, /* [0x003D] */ + 0x00091014, /* [0x003E] */ + 0x62050009, /* [0x003F] */ + 0x00000000, /* [0x0040] */ + 0x00000500, /* [0x0041] */ + 0x04800006, /* [0x0042] */ + 0xC2C61300, /* [0x0043] */ + 0x0280000E, /* [0x0044] */ + 0xC2C61310, /* [0x0045] */ + 0x00000000, /* [0x0046] */ + 0x04800000, /* [0x0047] */ + 0x00000000, /* [0x0048] */ + 0x05000000, /* [0x0049] */ + 0x00000000, /* [0x004A] */ + 0x00000000, /* [0x004B] */ + 0x00000000, /* [0x004C] */ + 0x00000000, /* [0x004D] */ + 0x00000000, /* [0x004E] */ + 0x00000000, /* [0x004F] */ + 0x00000000, /* [0x0050] */ + 0x00000000, /* [0x0051] */ + 0x00000000, /* [0x0052] */ + 0x00000000, /* [0x0053] */ + 0x00000000, /* [0x0054] */ + 0x00000000, /* [0x0055] */ + 0x00000000, /* [0x0056] */ + 0x00000000, /* [0x0057] */ + 0x00000000, /* [0x0058] */ + 0x00000000, /* [0x0059] */ + 0x00000000, /* [0x005A] */ + 0x00000000, /* [0x005B] */ + 0x00000000, /* [0x005C] */ + 0x00000000, /* [0x005D] */ + 0x70B00023, /* [0x005E] == TYPE7: LOAD_STATE (30) == */ + 0x00700000, /* [0x005F] */ + 0x00000000, /* [0x0060] */ + 0x00000000, /* [0x0061] */ + 0x00000000, /* [0x0062] */ + 0x03000000, /* [0x0063] */ + 0x00000000, /* [0x0064] */ + 0x00000000, /* [0x0065] */ + 0x00000000, /* [0x0066] */ + 0x00000000, /* [0x0067] */ + 0x00000000, /* [0x0068] */ + 0x00000000, /* [0x0069] */ + 0x00000000, /* [0x006A] */ + 0x00000000, /* [0x006B] */ + 0x00000000, /* [0x006C] */ + 0x00000000, /* [0x006D] */ + 0x00000000, /* [0x006E] */ + 0x00000000, /* [0x006F] */ + 0x00000000, /* [0x0070] */ + 0x00000000, /* [0x0071] */ + 0x00000000, /* [0x0072] */ + 0x00000000, /* [0x0073] */ + 0x00000000, /* [0x0074] */ + 0x00000000, /* [0x0075] */ + 0x00000000, /* [0x0076] */ + 0x00000000, /* [0x0077] */ + 0x00000000, /* [0x0078] */ + 0x00000000, /* [0x0079] */ + 0x00000000, /* [0x007A] */ + 0x00000000, /* [0x007B] */ + 0x00000000, /* [0x007C] */ + 0x00000000, /* [0x007D] */ + 0x00000000, /* [0x007E] */ + 0x00000000, /* [0x007F] */ + 0x00000000, /* [0x0080] */ + 0x00000000, /* [0x0081] */ + 0x70B08003, /* [0x0082] == TYPE7: LOAD_STATE (30) == */ + 0x00620000, /* [0x0083] */ + 0x00000000, /* [0x0084] */ + 0x00000000, /* [0x0085] */ + 0x70B08003, /* [0x0086] == TYPE7: LOAD_STATE (30) == */ + 0x01220008, /* [0x0087] */ + 0x00000000, /* [0x0088] */ + 0x00000000, /* [0x0089] */ + 0x70B0000B, /* [0x008A] == TYPE7: LOAD_STATE (30) == */ + 0x01180000, /* [0x008B] */ + 0x00000001, /* [0x008C] */ + 0x00000000, /* [0x008D] */ + 0x00000000, /* [0x008E] */ + 0x00000000, /* [0x008F] */ + 0x00000000, /* [0x0090] */ + 0x00000000, /* [0x0091] */ + 0x00000000, /* [0x0092] */ + 0x00000000, /* [0x0093] */ + 0x00000000, /* [0x0094] */ + 0x01400000, /* [0x0095] */ + 0x70460001, /* [0x0096] == TYPE7: EVENT_WRITE (46) == */ + 0x00000019, /* [0x0097] */ + 0x70460004, /* [0x0098] == TYPE7: EVENT_WRITE (46) == */ + 0x0000001D, /* [0x0099] */ + 0x00000000, /* [0x009A] */ + 0x00000000, /* [0x009B] */ + 0x00000001, /* [0x009C] */ + 0x70460004, /* [0x009D] == TYPE7: EVENT_WRITE (46) == */ + 0x0000001C, /* [0x009E] */ + 0x00000000, /* [0x009F] */ + 0x00000000, /* [0x00A0] */ + 0x00000001, /* [0x00A1] */ + 0x480E9185, /* [0x00A2] == TYPE4 == */ + 0x00000000, /* [0x00A3] A5X_UCHE_CACHE_INVALIDATE_MIN_LO (0x0E91)*/ + 0x00000000, /* [0x00A4] A5X_UCHE_CACHE_INVALIDATE_MIN_HI (0x0E92)*/ + 0x00000000, /* [0x00A5] A5X_UCHE_CACHE_INVALIDATE_MAX_LO (0x0E93)*/ + 0x00000000, /* [0x00A6] A5X_UCHE_CACHE_INVALIDATE_MAX_HI (0x0E94)*/ + 0x00000012, /* [0x00A7] A5X_UCHE_CACHE_INVALIDATE (0x0E95)*/ + 0x70268000, /* [0x00A8] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x40E78A01, /* [0x00A9] == TYPE4 == */ + 0x000FFFFF, /* [0x00AA] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/ + 0x70D08003, /* [0x00AB] == TYPE7: PERFCOUNTER_ACTION (50) == */ + 0x00000000, /* [0x00AC] */ + 0x00000000, /* [0x00AD] */ + 0x00000000, /* [0x00AE] */ + 0x70D08003, /* [0x00AF] == TYPE7: PERFCOUNTER_ACTION (50) == */ + 0x00000010, /* [0x00B0] */ + 0x00000000, /* [0x00B1] */ + 0x00000000, /* [0x00B2] */ + 0x70268000, /* [0x00B3] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x48E38C01, /* [0x00B4] == TYPE4 == */ + 0xFFFFFFFF, /* [0x00B5] A5X_PC_RESTART_INDEX_CTX_0 (0xE38C)*/ + 0x40E38801, /* [0x00B6] == TYPE4 == */ + 0x00000012, /* [0x00B7] A5X_PC_RASTER_CNTL_CTX_0 (0xE388)*/ + 0x48E09102, /* [0x00B8] == TYPE4 == */ + 0xFFC00010, /* [0x00B9] A5X_GRAS_SU_POINT_MINMAX_CTX_0 (0xE091)*/ + 0x00000008, /* [0x00BA] A5X_GRAS_SU_POINT_SIZE_CTX_0 (0xE092)*/ + 0x40E09901, /* [0x00BB] == TYPE4 == */ + 0x00000000, /* [0x00BC] A5X_GRAS_SU_CONSERVATIVE_RAS_CNTL_CTX_0 + * (0xE099) + */ + 0x48E0A401, /* [0x00BD] == TYPE4 == */ + 0x00000000, /* [0x00BE] A5X_GRAS_SC_SCREEN_SCISSOR_CNTL_CTX_0 (0xE0A4)*/ + 0x48E58A01, /* [0x00BF] == TYPE4 == */ + 0x00000000, /* [0x00C0] A5X_SP_VS_CONFIG_MAX_CONST_CTX_0 (0xE58A)*/ + 0x40E58B01, /* [0x00C1] == TYPE4 == */ + 0x00000000, /* [0x00C2] A5X_SP_PS_CONFIG_MAX_CONST_CTX_0 (0xE58B)*/ + 0x480CC601, /* [0x00C3] == TYPE4 == */ + 0x00000044, /* [0x00C4] A5X_RB_MODE_CNTL (0x0CC6)*/ + 0x400CC401, /* [0x00C5] == TYPE4 == */ + 0x00100000, /* [0x00C6] A5X_RB_DBG_ECO_CNTL (0x0CC4)*/ + 0x400E4201, /* [0x00C7] == TYPE4 == */ + 0x00000000, /* [0x00C8] A5X_VFD_MODE_CNTL (0x0E42)*/ + 0x480D0201, /* [0x00C9] == TYPE4 == */ + 0x0000001F, /* [0x00CA] A5X_PC_MODE_CNTL (0x0D02)*/ + 0x480EC201, /* [0x00CB] == TYPE4 == */ + 0x0000001E, /* [0x00CC] A5X_SP_MODE_CNTL (0x0EC2)*/ + 0x400EC001, /* [0x00CD] == TYPE4 == */ + 0x40000800, /* [0x00CE] A5X_SP_DBG_ECO_CNTL (0x0EC0)*/ + 0x400F0201, /* [0x00CF] == TYPE4 == */ + 0x00000544, /* [0x00D0] A5X_TPL1_MODE_CNTL (0x0F02)*/ + 0x400E0002, /* [0x00D1] == TYPE4 == */ + 0x00000080, /* [0x00D2] A5X_HLSQ_TIMEOUT_THRESHOLD_0 (0x0E00)*/ + 0x00000000, /* [0x00D3] A5X_HLSQ_TIMEOUT_THRESHOLD_1 (0x0E01)*/ + 0x400E6001, /* [0x00D4] == TYPE4 == */ + 0x00000400, /* [0x00D5] A5X_VPC_DBG_ECO_CNTL (0x0E60)*/ + 0x400E0601, /* [0x00D6] == TYPE4 == */ + 0x00000001, /* [0x00D7] A5X_HLSQ_MODE_CNTL (0x0E06)*/ + 0x480E6201, /* [0x00D8] == TYPE4 == */ + 0x00000000, /* [0x00D9] A5X_VPC_MODE_CNTL (0x0E62)*/ + 0x70EC8005, /* [0x00DA] == TYPE7: SET_RENDER_MODE (6C) == */ + 0x00000002, /* [0x00DB] */ + 0x00000000, /* [0x00DC] */ + 0x00000000, /* [0x00DD] */ + 0x00000008, /* [0x00DE] */ + 0x00000001, /* [0x00DF] */ + 0x40E14001, /* [0x00E0] == TYPE4 == */ + 0x00000204, /* [0x00E1] A5X_RB_CNTL_CTX_0 (0xE140)*/ + 0x709D0001, /* [0x00E2] == TYPE7: SKIP_IB2_ENABLE_GLOBAL (1D) == */ + 0x00000000, /* [0x00E3] */ + 0x48E0EA02, /* [0x00E4] == TYPE4 == */ + 0x00000000, /* [0x00E5] A5X_GRAS_SC_WINDOW_SCISSOR_TL_CTX_0 (0xE0EA)*/ + 0x001F0073, /* [0x00E6] A5X_GRAS_SC_WINDOW_SCISSOR_BR_CTX_0 (0xE0EB)*/ + 0x48E21102, /* [0x00E7] == TYPE4 == */ + 0x00000000, /* [0x00E8] A5X_RB_RESOLVE_CNTL_1_CTX_0 (0xE211)*/ + 0x00000000, /* [0x00E9] A5X_RB_RESOLVE_CNTL_2_CTX_0 (0xE212)*/ + 0x480BC283, /* [0x00EA] == TYPE4 == */ + 0x00000204, /* [0x00EB] UNKNOWN (0x0BC2)*/ + 0x00000000, /* [0x00EC] UNKNOWN (0x0BC3)*/ + 0x00000000, /* [0x00ED] UNKNOWN (0x0BC4)*/ + 0x400BC502, /* [0x00EE] == TYPE4 == */ + 0x00000000, /* [0x00EF] UNKNOWN (0x0BC5)*/ + 0x00000000, /* [0x00F0] UNKNOWN (0x0BC6)*/ + 0x480BD001, /* [0x00F1] == TYPE4 == */ + 0x01100000, /* [0x00F2] UNKNOWN (0x0BD0)*/ + 0x480BE002, /* [0x00F3] == TYPE4 == */ + 0x00000000, /* [0x00F4] UNKNOWN (0x0BE0)*/ + 0x00000000, /* [0x00F5] UNKNOWN (0x0BE1)*/ + 0x480C0001, /* [0x00F6] == TYPE4 == */ + 0x00000020, /* [0x00F7] A5X_VSC_PIPE_DATA_LENGTH_0 (0x0C00)*/ + 0x48E3B001, /* [0x00F8] == TYPE4 == */ + 0x00000003, /* [0x00F9] A5X_PC_POWER_CNTL_CTX_0 (0xE3B0)*/ + 0x48E4F001, /* [0x00FA] == TYPE4 == */ + 0x00000003, /* [0x00FB] A5X_VFD_POWER_CNTL_CTX_0 (0xE4F0)*/ + 0x480E6201, /* [0x00FC] == TYPE4 == */ + 0x00000001, /* [0x00FD] A5X_VPC_MODE_CNTL (0x0E62)*/ + 0x70460001, /* [0x00FE] == TYPE7: EVENT_WRITE (46) == */ + 0x0000002C, /* [0x00FF] */ + 0x40E1D001, /* [0x0100] == TYPE4 == */ + 0x00000000, /* [0x0101] A5X_RB_WINDOW_OFFSET_CTX_0 (0xE1D0)*/ + 0x70BF8003, /* [0x0102] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */ + 0x00000000, /* [0x0103] */ + 0x00000000, /* [0x0104] */ + 0x000000A0, /* [0x0105] */ + 0x70460001, /* [0x0106] == TYPE7: EVENT_WRITE (46) == */ + 0x0000002D, /* [0x0107] */ + 0x70460004, /* [0x0108] == TYPE7: EVENT_WRITE (46) == */ + 0x00000004, /* [0x0109] */ + 0x00000000, /* [0x010A] */ + 0x00000000, /* [0x010B] */ + 0x00000000, /* [0x010C] */ + 0x70268000, /* [0x010D] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x480E6201, /* [0x010E] == TYPE4 == */ + 0x00000000, /* [0x010F] A5X_VPC_MODE_CNTL (0x0E62)*/ + 0x48E3B001, /* [0x0110] == TYPE4 == */ + 0x00000003, /* [0x0111] A5X_PC_POWER_CNTL_CTX_0 (0xE3B0)*/ + 0x48E4F001, /* [0x0112] == TYPE4 == */ + 0x00000003, /* [0x0113] A5X_VFD_POWER_CNTL_CTX_0 (0xE4F0)*/ + 0x70268000, /* [0x0114] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x400CC701, /* [0x0115] == TYPE4 == */ + 0x7C13C080, /* [0x0116] A5X_RB_CCU_CNTL (0x0CC7)*/ + 0x70EC8005, /* [0x0117] == TYPE7: SET_RENDER_MODE (6C) == */ + 0x00000001, /* [0x0118] */ + 0x00000000, /* [0x0119] */ + 0x00000000, /* [0x011A] */ + 0x00000010, /* [0x011B] */ + 0x00000001, /* [0x011C] */ + 0x70EA0001, /* [0x011D] == TYPE7: PREEMPT_ENABLE_LOCAL (6A) == */ + 0x00000000, /* [0x011E] */ + 0x48E0EA02, /* [0x011F] == TYPE4 == */ + 0x00000000, /* [0x0120] A5X_GRAS_SC_WINDOW_SCISSOR_TL_CTX_0 (0xE0EA)*/ + 0x001F0073, /* [0x0121] A5X_GRAS_SC_WINDOW_SCISSOR_BR_CTX_0 (0xE0EB)*/ + 0x48E21102, /* [0x0122] == TYPE4 == */ + 0x00000000, /* [0x0123] A5X_RB_RESOLVE_CNTL_1_CTX_0 (0xE211)*/ + 0x00030007, /* [0x0124] A5X_RB_RESOLVE_CNTL_2_CTX_0 (0xE212)*/ + 0x70138000, /* [0x0125] == TYPE7: WAIT_FOR_ME (13) == */ + 0x70640001, /* [0x0126] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */ + 0x00000000, /* [0x0127] */ + 0x702F8005, /* [0x0128] == TYPE7: SET_BIN_DATA (2F) == */ + 0x00010000, /* [0x0129] */ + 0x00000000, /* [0x012A] */ + 0x00000000, /* [0x012B] */ + 0x00000000, /* [0x012C] */ + 0x00000000, /* [0x012D] */ + 0x40E1D001, /* [0x012E] == TYPE4 == */ + 0x00000000, /* [0x012F] A5X_RB_WINDOW_OFFSET_CTX_0 (0xE1D0)*/ + 0x40E2A201, /* [0x0130] == TYPE4 == */ + 0x00000001, /* [0x0131] A5X_VPC_SO_OVERRIDE_CTX_0 (0xE2A2)*/ + 0x70640001, /* [0x0132] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */ + 0x00000000, /* [0x0133] */ + 0x48E1B285, /* [0x0134] == TYPE4 == */ + 0x00000001, /* [0x0135] A5X_RB_DEPTH_BUFFER_INFO_CTX_0 (0xE1B2)*/ + 0x00004000, /* [0x0136] A5X_RB_DEPTH_BUFFER_BASE_LO_CTX_0 (0xE1B3)*/ + 0x00000000, /* [0x0137] A5X_RB_DEPTH_BUFFER_BASE_HI_CTX_0 (0xE1B4)*/ + 0x00000004, /* [0x0138] A5X_RB_DEPTH_BUFFER_PITCH_CTX_0 (0xE1B5)*/ + 0x000000C0, /* [0x0139] A5X_RB_DEPTH_BUFFER_ARRAY_PITCH_CTX_0 (0xE1B6)*/ + 0x48E09801, /* [0x013A] == TYPE4 == */ + 0x00000001, /* [0x013B] A5X_GRAS_SU_DEPTH_BUFFER_INFO_CTX_0 (0xE098)*/ + 0x40E24083, /* [0x013C] == TYPE4 == */ + 0x00000000, /* [0x013D] A5X_RB_DEPTH_FLAG_BUFFER_BASE_LO_CTX_0 + * (0xE240) + */ + 0x00000000, /* [0x013E] A5X_RB_DEPTH_FLAG_BUFFER_BASE_HI_CTX_0 + * (0xE241) + */ + 0x00000000, /* [0x013F] A5X_RB_DEPTH_FLAG_BUFFER_PITCH_CTX_0 (0xE242)*/ + 0x40E15285, /* [0x0140] == TYPE4 == */ + 0x00001230, /* [0x0141] A5X_RB_MRT_BUFFER_INFO_0_CTX_0 (0xE152)*/ + 0x00000008, /* [0x0142] A5X_RB_MRT_BUFFER_PITCH_0_CTX_0 (0xE153)*/ + 0x00000100, /* [0x0143] A5X_RB_MRT_BUFFER_ARRAY_PITCH_0_CTX_0 (0xE154)*/ + 0x00000000, /* [0x0144] A5X_RB_MRT_BUFFER_BASE_LO_0_CTX_0 (0xE155)*/ + 0x00000000, /* [0x0145] A5X_RB_MRT_BUFFER_BASE_HI_0_CTX_0 (0xE156)*/ + 0x40E40801, /* [0x0146] == TYPE4 == */ + 0x00000000, /* [0x0147] A5X_VFD_INDEX_OFFSET_CTX_0 (0xE408)*/ + 0x48E40901, /* [0x0148] == TYPE4 == */ + 0x00000000, /* [0x0149] A5X_VFD_INSTANCE_START_OFFSET_CTX_0 (0xE409)*/ + 0x70BF8003, /* [0x014A] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */ + 0x00000000, /* [0x014B] */ + 0x00000000, /* [0x014C] */ + 0x00000112, /* [0x014D] */ + 0x70230001, /* [0x014E] == TYPE7: SKIP_IB2_ENABLE_LOCAL (23) == */ + 0x00000000, /* [0x014F] */ + 0x70BF8003, /* [0x0150] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */ + 0x00000000, /* [0x0151] */ + 0x00000000, /* [0x0152] */ + 0x0000001B, /* [0x0153] */ + 0x70EC8005, /* [0x0154] == TYPE7: SET_RENDER_MODE (6C) == */ + 0x00000001, /* [0x0155] */ + 0x00000000, /* [0x0156] */ + 0x00000000, /* [0x0157] */ + 0x00000000, /* [0x0158] */ + 0x00000001, /* [0x0159] */ + 0x70438003, /* [0x015A] == TYPE7: SET_DRAW_STATE (43) == */ + 0x00080059, /* [0x015B] */ + 0x00000000, /* [0x015C] */ + 0x00000000, /* [0x015D] */ + 0x70388003, /* [0x015E] == TYPE7: DRAW_INDX_OFFSET (38) == */ + 0x00000888, /* [0x015F] */ + 0x00000000, /* [0x0160] */ + 0x00000002, /* [0x0161] */ + 0x70A88003, /* [0x0162] == TYPE7: DRAW_INDIRECT (28) == */ + 0x00200884, /* [0x0163] */ + 0x00000000, /* [0x0164] */ + 0x00000000, /* [0x0165] */ + 0x70298006, /* [0x0166] == TYPE7: DRAW_INDX_INDIRECT (29) == */ + 0x00200404, /* [0x0167] */ + 0x00000000, /* [0x0168] */ + 0x00000000, /* [0x0169] */ + 0x00000006, /* [0x016A] */ + 0x00000000, /* [0x016B] */ + 0x00000000, /* [0x016C] */ + 0x40E2A783, /* [0x016D] == TYPE4 == */ + 0x00000000, /* [0x016E] A5X_VPC_SO_BUFFER_BASE_LO_0_CTX_0 (0xE2A7)*/ + 0x00000000, /* [0x016F] A5X_VPC_SO_BUFFER_BASE_HI_0_CTX_0 (0xE2A8)*/ + 0x00000004, /* [0x0170] A5X_VPC_SO_BUFFER_SIZE_0_CTX_0 (0xE2A9)*/ + 0x48E2AC02, /* [0x0171] == TYPE4 == */ + 0x00000000, /* [0x0172] A5X_VPC_SO_FLUSH_BASE_LO_0_CTX_0 (0xE2AC)*/ + 0x00000000, /* [0x0173] A5X_VPC_SO_FLUSH_BASE_HI_0_CTX_0 (0xE2AD)*/ + 0x70460001, /* [0x0174] == TYPE7: EVENT_WRITE (46) == */ + 0x00000011, /* [0x0175] */ + 0x48E10001, /* [0x0176] == TYPE4 == */ + 0x00000009, /* [0x0177] A5X_GRAS_LRZ_CNTL_CTX_0 (0xE100)*/ + 0x70460001, /* [0x0178] == TYPE7: EVENT_WRITE (46) == */ + 0x00000026, /* [0x0179] */ + 0x48E10001, /* [0x017A] == TYPE4 == */ + 0x00000008, /* [0x017B] A5X_GRAS_LRZ_CNTL_CTX_0 (0xE100)*/ + 0x40E10185, /* [0x017C] == TYPE4 == */ + 0x00000000, /* [0x017D] A5X_GRAS_LRZ_BUFFER_BASE_LO_CTX_0 (0xE101)*/ + 0x00000000, /* [0x017E] A5X_GRAS_LRZ_BUFFER_BASE_HI_CTX_0 (0xE102)*/ + 0x00000001, /* [0x017F] A5X_GRAS_LRZ_BUFFER_PITCH_CTX_0 (0xE103)*/ + 0x00000000, /* [0x0180] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO_CTX_0 + * (0xE104) + */ + 0x00000000, /* [0x0181] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI_CTX_0 + * (0xE105) + */ + 0x70460001, /* [0x0182] == TYPE7: EVENT_WRITE (46) == */ + 0x00000025, /* [0x0183] */ + 0x70460001, /* [0x0184] == TYPE7: EVENT_WRITE (46) == */ + 0x00000019, /* [0x0185] */ + 0x70460001, /* [0x0186] == TYPE7: EVENT_WRITE (46) == */ + 0x00000018, /* [0x0187] */ + 0x70EA0001, /* [0x0188] == TYPE7: PREEMPT_ENABLE_LOCAL (6A) == */ + 0x00000000, /* [0x0189] */ + 0x70EC0001, /* [0x018A] == TYPE7: SET_RENDER_MODE (6C) == */ + 0x00000006, /* [0x018B] */ + 0x70438003, /* [0x018C] == TYPE7: SET_DRAW_STATE (43) == */ + 0x00080059, /* [0x018D] */ + 0x00000000, /* [0x018E] */ + 0x00000000, /* [0x018F] */ + 0x70DC0002, /* [0x0190] == TYPE7: CONTEXT_REG_BUNCH (5C) == */ + 0x0000E2A1, /* [0x0191] */ + 0x00008001, /* [0x0192] */ + 0x709D0001, /* [0x0193] == TYPE7: SKIP_IB2_ENABLE_GLOBAL (1D) == */ + 0x00000000, /* [0x0194] */ + 0x70138000, /* [0x0195] == TYPE7: WAIT_FOR_ME (13) == */ + 0x70640001, /* [0x0196] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */ + 0x00000001, /* [0x0197] */ + 0x70380007, /* [0x0198] == TYPE7: DRAW_INDX_OFFSET (38) == */ + 0x00200506, /* [0x0199] */ + 0x00000000, /* [0x019A] */ + 0x00000004, /* [0x019B] */ + 0x00000000, /* [0x019C] */ + 0x00000000, /* [0x019D] */ + 0x00000000, /* [0x019E] */ + 0x00000004, /* [0x019F] */ + 0x703D8005, /* [0x01A0] == TYPE7: MEM_WRITE (3D) == */ + 0x00000000, /* [0x01A1] */ + 0x00000000, /* [0x01A2] */ + 0x00000001, /* [0x01A3] */ + 0x00000001, /* [0x01A4] */ + 0x00000001, /* [0x01A5] */ + 0x70928000, /* [0x01A6] == TYPE7: WAIT_MEM_WRITES (12) == */ + 0x70BF8003, /* [0x01A7] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */ + 0x00000000, /* [0x01A8] */ + 0x00000000, /* [0x01A9] */ + 0x00000028, /* [0x01AA] */ + 0x70C48006, /* [0x01AB] == TYPE7: COND_EXEC (44) == */ + 0x00000000, /* [0x01AC] */ + 0x00000000, /* [0x01AD] */ + 0x00000000, /* [0x01AE] */ + 0x00000000, /* [0x01AF] */ + 0x00000001, /* [0x01B0] */ + 0x00000002, /* [0x01B1] */ + 0x70100001, /* [0x01B2] == TYPE7: NOP (10) == */ + 0x00000000, /* [0x01B3] */ + 0x70C28003, /* [0x01B4] == TYPE7: MEM_TO_REG (42) == */ + 0xC000E2AB, /* [0x01B5] */ + 0x00000000, /* [0x01B6] */ + 0x00000000, /* [0x01B7] */ + 0x70230001, /* [0x01B8] == TYPE7: SKIP_IB2_ENABLE_LOCAL (23) == */ + 0x00000000, /* [0x01B9] */ + 0x70E90001, /* [0x01BA] == TYPE7: PREEMPT_ENABLE_GLOBAL (69) == */ + 0x00000000, /* [0x01BB] */ + 0x70BC8006, /* [0x01BC] == TYPE7: WAIT_REG_MEM (3C) == */ + 0x00000010, /* [0x01BD] */ + 0x00000000, /* [0x01BE] */ + 0x00000000, /* [0x01BF] */ + 0x00000001, /* [0x01C0] */ + 0xFFFFFFFF, /* [0x01C1] */ + 0x00000001, /* [0x01C2] */ + 0x70738009, /* [0x01C3] == TYPE7: MEM_TO_MEM (73) == */ + 0x20000004, /* [0x01C4] */ + 0x00000000, /* [0x01C5] */ + 0x00000000, /* [0x01C6] */ + 0x00000000, /* [0x01C7] */ + 0x00000000, /* [0x01C8] */ + 0x00000000, /* [0x01C9] */ + 0x00000000, /* [0x01CA] */ + 0x00000000, /* [0x01CB] */ + 0x00000000, /* [0x01CC] */ + 0x70738009, /* [0x01CD] == TYPE7: MEM_TO_MEM (73) == */ + 0xE0000004, /* [0x01CE] */ + 0x00000000, /* [0x01CF] */ + 0x00000000, /* [0x01D0] */ + 0x00000000, /* [0x01D1] */ + 0x00000000, /* [0x01D2] */ + 0x00000000, /* [0x01D3] */ + 0x00000000, /* [0x01D4] */ + 0x00000000, /* [0x01D5] */ + 0x00000000, /* [0x01D6] */ + 0x70B50001, /* [0x01D7] == TYPE7: SET_SUBDRAW_SIZE (35) == */ + 0x00000001, /* [0x01D8] */ + 0x40E78A01, /* [0x01D9] == TYPE4 == */ + 0x000FFFFF, /* [0x01DA] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/ + 0x70268000, /* [0x01DB] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x400E0601, /* [0x01DC] == TYPE4 == */ + 0x00000001, /* [0x01DD] A5X_HLSQ_MODE_CNTL (0x0E06)*/ + 0x706E0004, /* [0x01DE] == TYPE7: COMPUTE_CHECKPOINT (6E) == */ + 0x00000000, /* [0x01DF] */ + 0x00000000, /* [0x01E0] */ + 0x00000018, /* [0x01E1] */ + 0x00000001, /* [0x01E2] */ + 0x40E14001, /* [0x01E3] == TYPE4 == */ + 0x00020000, /* [0x01E4] A5X_RB_CNTL_CTX_0 (0xE140)*/ + 0x40E78A01, /* [0x01E5] == TYPE4 == */ + 0x01F00000, /* [0x01E6] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/ + 0x70268000, /* [0x01E7] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x48E38C01, /* [0x01E8] == TYPE4 == */ + 0xFFFFFFFF, /* [0x01E9] A5X_PC_RESTART_INDEX_CTX_0 (0xE38C)*/ + 0x480D0201, /* [0x01EA] == TYPE4 == */ + 0x0000001F, /* [0x01EB] A5X_PC_MODE_CNTL (0x0D02)*/ + 0x480EC201, /* [0x01EC] == TYPE4 == */ + 0x0000001E, /* [0x01ED] A5X_SP_MODE_CNTL (0x0EC2)*/ + 0x48E58001, /* [0x01EE] == TYPE4 == */ + 0x00000000, /* [0x01EF] A5X_SP_SP_CNTL_CTX_0 (0xE580)*/ + 0x40E2A201, /* [0x01F0] == TYPE4 == */ + 0x00000001, /* [0x01F1] A5X_VPC_SO_OVERRIDE_CTX_0 (0xE2A2)*/ + 0x70640001, /* [0x01F2] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */ + 0x00000001, /* [0x01F3] */ + 0x48E78401, /* [0x01F4] == TYPE4 == */ + 0x00000881, /* [0x01F5] A5X_HLSQ_CNTL_0_CTX_0 (0xE784)*/ + 0x40E5F001, /* [0x01F6] == TYPE4 == */ + 0x00000C06, /* [0x01F7] A5X_SP_CS_CNTL_0_CTX_0 (0xE5F0)*/ + 0x48E79001, /* [0x01F8] == TYPE4 == */ + 0x00000001, /* [0x01F9] A5X_HLSQ_CS_CONFIG_CTX_0 (0xE790)*/ + 0x48E79601, /* [0x01FA] == TYPE4 == */ + 0x00000005, /* [0x01FB] A5X_HLSQ_CS_CNTL_CTX_0 (0xE796)*/ + 0x48E58901, /* [0x01FC] == TYPE4 == */ + 0x00000001, /* [0x01FD] A5X_SP_CS_CONFIG_CTX_0 (0xE589)*/ + 0x40E7DC01, /* [0x01FE] == TYPE4 == */ + 0x00000030, /* [0x01FF] A5X_HLSQ_CONTEXT_SWITCH_CS_SW_3_CTX_0 (0xE7DC)*/ + 0x48E7DD01, /* [0x0200] == TYPE4 == */ + 0x00000002, /* [0x0201] A5X_HLSQ_CONTEXT_SWITCH_CS_SW_4_CTX_0 (0xE7DD)*/ + 0x40E7B001, /* [0x0202] == TYPE4 == */ + 0x00000003, /* [0x0203] A5X_HLSQ_CS_NDRANGE_0_CTX_0 (0xE7B0)*/ + 0x48E7B702, /* [0x0204] == TYPE4 == */ + 0x00FCC0CF, /* [0x0205] A5X_HLSQ_CS_CNTL_0_CTX_0 (0xE7B7)*/ + 0x00000000, /* [0x0206] A5X_HLSQ_CS_CNTL_1_CTX_0 (0xE7B8)*/ + 0x40E7B983, /* [0x0207] == TYPE4 == */ + 0x00000001, /* [0x0208] A5X_HLSQ_CS_KERNEL_GROUP_X_CTX_0 (0xE7B9)*/ + 0x00000001, /* [0x0209] A5X_HLSQ_CS_KERNEL_GROUP_Y_CTX_0 (0xE7BA)*/ + 0x00000001, /* [0x020A] A5X_HLSQ_CS_KERNEL_GROUP_Z_CTX_0 (0xE7BB)*/ + 0x70B08003, /* [0x020B] == TYPE7: LOAD_STATE (30) == */ + 0x00B60000, /* [0x020C] */ + 0x00000000, /* [0x020D] */ + 0x00000000, /* [0x020E] */ + 0x70B08003, /* [0x020F] == TYPE7: LOAD_STATE (30) == */ + 0x01360008, /* [0x0210] */ + 0x00000000, /* [0x0211] */ + 0x00000000, /* [0x0212] */ + 0x70B0000B, /* [0x0213] == TYPE7: LOAD_STATE (30) == */ + 0x00BC0000, /* [0x0214] */ + 0x00000000, /* [0x0215] */ + 0x00000000, /* [0x0216] */ + 0x00000000, /* [0x0217] */ + 0x00000000, /* [0x0218] */ + 0x00000000, /* [0x0219] */ + 0x00000000, /* [0x021A] */ + 0x00000000, /* [0x021B] */ + 0x00000000, /* [0x021C] */ + 0x00000000, /* [0x021D] */ + 0x00000000, /* [0x021E] */ + 0x70B00007, /* [0x021F] == TYPE7: LOAD_STATE (30) == */ + 0x00BC0000, /* [0x0220] */ + 0x00000001, /* [0x0221] */ + 0x00000000, /* [0x0222] */ + 0x00040000, /* [0x0223] */ + 0x00000000, /* [0x0224] */ + 0x00040000, /* [0x0225] */ + 0x00000000, /* [0x0226] */ + 0x70B00007, /* [0x0227] == TYPE7: LOAD_STATE (30) == */ + 0x00BC0000, /* [0x0228] */ + 0x00000002, /* [0x0229] */ + 0x00000000, /* [0x022A] */ + 0x00000000, /* [0x022B] */ + 0x00000000, /* [0x022C] */ + 0x00000000, /* [0x022D] */ + 0x00000000, /* [0x022E] */ + 0x48E7B186, /* [0x022F] == TYPE4 == */ + 0x00000001, /* [0x0230] A5X_HLSQ_CS_NDRANGE_1_CTX_0 (0xE7B1)*/ + 0x00000000, /* [0x0231] A5X_HLSQ_CS_NDRANGE_2_CTX_0 (0xE7B2)*/ + 0x00000001, /* [0x0232] A5X_HLSQ_CS_NDRANGE_3_CTX_0 (0xE7B3)*/ + 0x00000000, /* [0x0233] A5X_HLSQ_CS_NDRANGE_4_CTX_0 (0xE7B4)*/ + 0x00000001, /* [0x0234] A5X_HLSQ_CS_NDRANGE_5_CTX_0 (0xE7B5)*/ + 0x00000000, /* [0x0235] A5X_HLSQ_CS_NDRANGE_6_CTX_0 (0xE7B6)*/ + 0x70B30004, /* [0x0236] == TYPE7: EXEC_CS (33) == */ + 0x00000000, /* [0x0237] */ + 0x00000001, /* [0x0238] */ + 0x00000001, /* [0x0239] */ + 0x00000001, /* [0x023A] */ + 0x480E6201, /* [0x023B] == TYPE4 == */ + 0x00000001, /* [0x023C] A5X_VPC_MODE_CNTL (0x0E62)*/ +}; + +/* + * These are fixups for the addresses _a5xx_critical_pkts[]. The first two + * numbers are the dword offsets into the buffer above. The third enum is a + * clue as to which buffer is being patched in and the final number is an offset + * in said buffer. + */ +static const struct adreno_critical_fixup critical_pkt_fixups[] = { + { 132, 133, 2, 0x0000 }, + { 136, 137, 2, 0x0001 }, + { 154, 155, 2, 0x0100 }, + { 159, 160, 2, 0x0104 }, + { 173, 174, 2, 0x0200 }, + { 177, 178, 2, 0x0300 }, + { 236, 237, 0, 0x0000 }, + { 244, 245, 0, 0x0040 }, + { 259, 260, 3, 0x0000 }, + { 266, 267, 2, 0x0108 }, + { 298, 299, 0, 0x0040 }, + { 300, 301, 2, 0x0080 }, + { 331, 332, 3, 0x02A0 }, + { 337, 338, 3, 0x0700 }, + { 348, 349, 3, 0x0920 }, + { 356, 357, 1, 0x008C }, + { 360, 361, 1, 0x0080 }, + { 363, 364, 1, 0x008C }, + { 366, 367, 0, 0x0100 }, + { 370, 371, 0, 0x0120 }, + { 381, 382, 1, 0x0480 }, + { 384, 385, 1, 0x0400 }, + { 398, 399, 3, 0x0920 }, + { 413, 414, 1, 0x0080 }, + { 417, 418, 1, 0x0300 }, + { 424, 425, 3, 0x0880 }, + { 428, 429, 1, 0x0300 }, + { 430, 431, 1, 0x0300 }, + { 438, 439, 1, 0x0300 }, + { 446, 447, 1, 0x0300 }, + { 453, 454, 1, 0x0320 }, + { 455, 456, 1, 0x0300 }, + { 457, 458, 1, 0x0304 }, + { 459, 460, 1, 0x0308 }, + { 463, 464, 1, 0x0320 }, + { 465, 466, 1, 0x0300 }, + { 467, 468, 1, 0x0304 }, + { 469, 470, 1, 0x0308 }, + { 525, 526, 1, 0x0160 }, + { 529, 530, 1, 0x0101 }, + { 535, 536, 1, 0x0140 }, + { 539, 540, 0, 0x0800 }, + { 555, 556, 1, 0x0140 }, + { 557, 558, 0, 0x0800 }, +}; + +static unsigned int _a5xx_critical_pkts_mem01[] = { + 0xBECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0x3ECCCCCD, + 0xBECCCCCD, 0x00000000, 0xBECCCCCD, 0x3ECCCCCD, 0x3ECCCCCD, 0x00000000, + 0xBECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0xBECCCCCD, + 0xBECCCCCD, 0x00000000, 0xBECCCCCD, 0xBECCCCCD, 0x3ECCCCCD, 0x00000000, + 0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000, + 0x00000000, 0x00000000, 0x00040003, 0x00090005, 0x000B000A, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000003, 0x00000001, + 0x00000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000003, 0x00000001, 0x00000003, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x3EF5C28F, 0x3ED1EB85, 0x3E6147AE, 0x3F800000, + 0x3E947AE1, 0x3E6147AE, 0x3D4CCCCD, 0x3F800000, 0x00000000, 0x20554002, + 0x3F800000, 0x20444003, 0x000000CF, 0x20044904, 0x00000000, 0x00000200, + 0x00050001, 0x42300001, 0x00000002, 0x20154005, 0x00000020, 0x20244006, + 0x00000000, 0x00000000, 0x10200001, 0x46500007, 0x20030004, 0x46D00004, + 0x00000000, 0x20554008, 0x00070001, 0x61830806, 0x00061020, 0x61808001, + 0x00040000, 0x42380800, 0x00010000, 0x42380800, 0x20040000, 0x46D80800, + 0x00000000, 0x20154007, 0x20020000, 0x46F80000, 0x00000007, 0x20154001, + 0x00000000, 0x00000200, 0x60030001, 0x43900004, 0x60030001, 0x43900001, + 0x00000000, 0x00000400, 0x00013600, 0xC6E20004, 0x40040003, 0x50180104, + 0x40060003, 0x40180803, 0x00000003, 0x20044006, 0x00000000, 0x00000500, + 0x00003609, 0xC7260201, 0x00000000, 0x03000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, +}; + +static unsigned int _a5xx_critical_pkts_mem02[] = { + 0x00000000, 0x03000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x0000000C, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x8ACFE7F3, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +static unsigned int _a5xx_critical_pkts_mem03[] = { + 0x70438003, /* [0x0000] == TYPE7: SET_DRAW_STATE (43) == */ + 0x0008003A, /* [0x0001] */ + 0x00000000, /* [0x0002] */ + 0x00000000, /* [0x0003] */ + 0x70B08003, /* [0x0004] == TYPE7: LOAD_STATE (30) == */ + 0x00620000, /* [0x0005] */ + 0x00000000, /* [0x0006] */ + 0x00000000, /* [0x0007] */ + 0x40E29801, /* [0x0008] == TYPE4 == */ + 0x0000FFFF, /* [0x0009] A5X_VPC_GS_SIV_CNTL_CTX_0 (0xE298)*/ + 0x48E2A001, /* [0x000A] == TYPE4 == */ + 0x000000FF, /* [0x000B] A5X_VPC_PS_PRIMITIVEID_CNTL_CTX_0 (0xE2A0)*/ + 0x40E40185, /* [0x000C] == TYPE4 == */ + 0x00FCFCFC, /* [0x000D] A5X_VFD_CNTL_1_CTX_0 (0xE401)*/ + 0x0000FCFC, /* [0x000E] A5X_VFD_CNTL_2_CTX_0 (0xE402)*/ + 0x0000FCFC, /* [0x000F] A5X_VFD_CNTL_3_CTX_0 (0xE403)*/ + 0x000000FC, /* [0x0010] A5X_VFD_CNTL_4_CTX_0 (0xE404)*/ + 0x00000000, /* [0x0011] A5X_VFD_CNTL_5_CTX_0 (0xE405)*/ + 0x48E38F01, /* [0x0012] == TYPE4 == */ + 0x00000000, /* [0x0013] A5X_PC_HS_PARAM_CTX_0 (0xE38F)*/ + 0x48E58001, /* [0x0014] == TYPE4 == */ + 0x00000010, /* [0x0015] A5X_SP_SP_CNTL_CTX_0 (0xE580)*/ + 0x40E00001, /* [0x0016] == TYPE4 == */ + 0x00000080, /* [0x0017] A5X_GRAS_CL_CNTL_CTX_0 (0xE000)*/ + 0x40E09583, /* [0x0018] == TYPE4 == */ + 0x00000000, /* [0x0019] A5X_GRAS_SU_POLY_OFFSET_SCALE_CTX_0 (0xE095)*/ + 0x00000000, /* [0x001A] A5X_GRAS_SU_POLY_OFFSET_OFFSET_CTX_0 (0xE096)*/ + 0x00000000, /* [0x001B] A5X_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP_CTX_0 + * (0xE097) + */ + 0x40E09001, /* [0x001C] == TYPE4 == */ + 0x00000010, /* [0x001D] A5X_GRAS_SU_CNTL_CTX_0 (0xE090)*/ + 0x40E0AA02, /* [0x001E] == TYPE4 == */ + 0x00000000, /* [0x001F] A5X_GRAS_SC_SCREEN_SCISSOR_TL_0_CTX_0 (0xE0AA)*/ + 0x001F0073, /* [0x0020] A5X_GRAS_SC_SCREEN_SCISSOR_BR_0_CTX_0 (0xE0AB)*/ + 0x48E01086, /* [0x0021] == TYPE4 == */ + 0x42680000, /* [0x0022] A5X_GRAS_CL_VIEWPORT_XOFFSET_0_CTX_0 (0xE010)*/ + 0x42680000, /* [0x0023] A5X_GRAS_CL_VIEWPORT_XSCALE_0_CTX_0 (0xE011)*/ + 0x41800000, /* [0x0024] A5X_GRAS_CL_VIEWPORT_YOFFSET_0_CTX_0 (0xE012)*/ + 0xC1800000, /* [0x0025] A5X_GRAS_CL_VIEWPORT_YSCALE_0_CTX_0 (0xE013)*/ + 0x3EFFFEE0, /* [0x0026] A5X_GRAS_CL_VIEWPORT_ZOFFSET_0_CTX_0 (0xE014)*/ + 0x3EFFFEE0, /* [0x0027] A5X_GRAS_CL_VIEWPORT_ZSCALE_0_CTX_0 (0xE015)*/ + 0x40E0CA02, /* [0x0028] == TYPE4 == */ + 0x00000000, /* [0x0029] A5X_GRAS_SC_VIEWPORT_SCISSOR_TL_0_CTX_0 + * (0xE0CA) + */ + 0x001F0073, /* [0x002A] A5X_GRAS_SC_VIEWPORT_SCISSOR_BR_0_CTX_0 + * (0xE0CB) + */ + 0x40E00601, /* [0x002B] == TYPE4 == */ + 0x0007FDFF, /* [0x002C] A5X_GRAS_CL_GUARDBAND_CLIP_ADJ_CTX_0 (0xE006)*/ + 0x40E70401, /* [0x002D] == TYPE4 == */ + 0x00000000, /* [0x002E] A5X_TPL1_TP_RAS_MSAA_CNTL_CTX_0 (0xE704)*/ + 0x48E70501, /* [0x002F] == TYPE4 == */ + 0x00000004, /* [0x0030] A5X_TPL1_TP_DEST_MSAA_CNTL_CTX_0 (0xE705)*/ + 0x48E14201, /* [0x0031] == TYPE4 == */ + 0x00000000, /* [0x0032] A5X_RB_RAS_MSAA_CNTL_CTX_0 (0xE142)*/ + 0x40E14301, /* [0x0033] == TYPE4 == */ + 0x00000004, /* [0x0034] A5X_RB_DEST_MSAA_CNTL_CTX_0 (0xE143)*/ + 0x40E78683, /* [0x0035] == TYPE4 == */ + 0xFCFCFCFC, /* [0x0036] A5X_HLSQ_CNTL_2_CTX_0 (0xE786)*/ + 0xFCFCFCFC, /* [0x0037] A5X_HLSQ_CNTL_3_CTX_0 (0xE787)*/ + 0xFCFCFCFC, /* [0x0038] A5X_HLSQ_CNTL_4_CTX_0 (0xE788)*/ + 0x48E0A201, /* [0x0039] == TYPE4 == */ + 0x00000000, /* [0x003A] A5X_GRAS_SC_RAS_MSAA_CNTL_CTX_0 (0xE0A2)*/ + 0x40E0A301, /* [0x003B] == TYPE4 == */ + 0x00000004, /* [0x003C] A5X_GRAS_SC_DEST_MSAA_CNTL_CTX_0 (0xE0A3)*/ + 0x48E14101, /* [0x003D] == TYPE4 == */ + 0x0000C089, /* [0x003E] A5X_RB_RENDER_CNTL_CTX_0 (0xE141)*/ + 0x40E0A001, /* [0x003F] == TYPE4 == */ + 0x00000009, /* [0x0040] A5X_GRAS_SC_CNTL_CTX_0 (0xE0A0)*/ + 0x40E28001, /* [0x0041] == TYPE4 == */ + 0x00010004, /* [0x0042] A5X_VPC_CNTL_0_CTX_0 (0xE280)*/ + 0x40E38401, /* [0x0043] == TYPE4 == */ + 0x00000404, /* [0x0044] A5X_PC_PRIMITIVE_CNTL_CTX_0 (0xE384)*/ + 0x40E78501, /* [0x0045] == TYPE4 == */ + 0x0000003F, /* [0x0046] A5X_HLSQ_CNTL_1_CTX_0 (0xE785)*/ + 0x48E5D301, /* [0x0047] == TYPE4 == */ + 0x00000030, /* [0x0048] A5X_SP_PS_MRT_0_CTX_0 (0xE5D3)*/ + 0x48E5CB01, /* [0x0049] == TYPE4 == */ + 0x00000100, /* [0x004A] A5X_SP_PS_OUTPUT_0_CTX_0 (0xE5CB)*/ + 0x40E5CA01, /* [0x004B] == TYPE4 == */ + 0x001F9F81, /* [0x004C] A5X_SP_PS_OUTPUT_CNTL_CTX_0 (0xE5CA)*/ + 0x40E14601, /* [0x004D] == TYPE4 == */ + 0x00000001, /* [0x004E] A5X_RB_PS_OUTPUT_CNTL_CTX_0 (0xE146)*/ + 0x40E38E01, /* [0x004F] == TYPE4 == */ + 0x00000000, /* [0x0050] A5X_PC_GS_PARAM_CTX_0 (0xE38E)*/ + 0x40E28A01, /* [0x0051] == TYPE4 == */ + 0x00000000, /* [0x0052] A5X_VPC_VARYING_REPLACE_MODE_0_CTX_0 (0xE28A)*/ + 0x48E1A901, /* [0x0053] == TYPE4 == */ + 0xFFFF0100, /* [0x0054] A5X_RB_BLEND_CNTL_CTX_0 (0xE1A9)*/ + 0x40E5C901, /* [0x0055] == TYPE4 == */ + 0x00000100, /* [0x0056] A5X_SP_BLEND_CNTL_CTX_0 (0xE5C9)*/ + 0x40E76401, /* [0x0057] == TYPE4 == */ + 0x00000000, /* [0x0058] A5X_TPL1_TP_PS_ROTATION_CNTL_CTX_0 (0xE764)*/ + 0x48E09401, /* [0x0059] == TYPE4 == */ + 0x00000000, /* [0x005A] A5X_GRAS_SU_DEPTH_PLANE_CNTL_CTX_0 (0xE094)*/ + 0x40E1B001, /* [0x005B] == TYPE4 == */ + 0x00000000, /* [0x005C] A5X_RB_DEPTH_PLANE_CNTL_CTX_0 (0xE1B0)*/ + 0x48E1B101, /* [0x005D] == TYPE4 == */ + 0x00000000, /* [0x005E] A5X_RB_DEPTH_CNTL_CTX_0 (0xE1B1)*/ + 0x48E40001, /* [0x005F] == TYPE4 == */ + 0x00000001, /* [0x0060] A5X_VFD_CNTL_0_CTX_0 (0xE400)*/ + 0x48E40A04, /* [0x0061] == TYPE4 == */ + 0x00000000, /* [0x0062] A5X_VFD_VERTEX_BUFFER_BASE_LO_0_CTX_0 (0xE40A)*/ + 0x00000000, /* [0x0063] A5X_VFD_VERTEX_BUFFER_BASE_HI_0_CTX_0 (0xE40B)*/ + 0x00000078, /* [0x0064] A5X_VFD_VERTEX_BUFFER_SIZE_0_CTX_0 (0xE40C)*/ + 0x00000008, /* [0x0065] A5X_VFD_VERTEX_BUFFER_STRIDE_0_CTX_0 (0xE40D)*/ + 0x40E48A02, /* [0x0066] == TYPE4 == */ + 0xC6700000, /* [0x0067] A5X_VFD_FETCH_INSTR_0_CTX_0 (0xE48A)*/ + 0x00000001, /* [0x0068] A5X_VFD_FETCH_INSTR_STEP_RATE_0_CTX_0 (0xE48B)*/ + 0x48E4CA01, /* [0x0069] == TYPE4 == */ + 0x0000000F, /* [0x006A] A5X_VFD_DEST_CNTL_0_CTX_0 (0xE4CA)*/ + 0x48E10001, /* [0x006B] == TYPE4 == */ + 0x00000008, /* [0x006C] A5X_GRAS_LRZ_CNTL_CTX_0 (0xE100)*/ + 0x48E0A101, /* [0x006D] == TYPE4 == */ + 0x00000004, /* [0x006E] A5X_GRAS_SC_BIN_CNTL_CTX_0 (0xE0A1)*/ + 0x40E10185, /* [0x006F] == TYPE4 == */ + 0x00000000, /* [0x0070] A5X_GRAS_LRZ_BUFFER_BASE_LO_CTX_0 (0xE101)*/ + 0x00000000, /* [0x0071] A5X_GRAS_LRZ_BUFFER_BASE_HI_CTX_0 (0xE102)*/ + 0x00000001, /* [0x0072] A5X_GRAS_LRZ_BUFFER_PITCH_CTX_0 (0xE103)*/ + 0x00000000, /* [0x0073] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO_CTX_0 + * (0xE104) + */ + 0x00000000, /* [0x0074] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI_CTX_0 + * (0xE105) + */ + 0x70388003, /* [0x0075] == TYPE7: DRAW_INDX_OFFSET (38) == */ + 0x00200884, /* [0x0076] */ + 0x00000001, /* [0x0077] */ + 0x00000003, /* [0x0078] */ + 0x70380007, /* [0x0079] == TYPE7: DRAW_INDX_OFFSET (38) == */ + 0x00200404, /* [0x007A] */ + 0x00000001, /* [0x007B] */ + 0x00000003, /* [0x007C] */ + 0x00000000, /* [0x007D] */ + 0x00000000, /* [0x007E] */ + 0x00000000, /* [0x007F] */ + 0x00000006, /* [0x0080] */ + 0x70460004, /* [0x0081] == TYPE7: EVENT_WRITE (46) == */ + 0x00000004, /* [0x0082] */ + 0x00000000, /* [0x0083] */ + 0x00000000, /* [0x0084] */ + 0x00000001, /* [0x0085] */ + 0x70268000, /* [0x0086] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x70A88003, /* [0x0087] == TYPE7: DRAW_INDIRECT (28) == */ + 0x00200884, /* [0x0088] */ + 0x00000000, /* [0x0089] */ + 0x00000000, /* [0x008A] */ + 0x70460004, /* [0x008B] == TYPE7: EVENT_WRITE (46) == */ + 0x00000004, /* [0x008C] */ + 0x00000000, /* [0x008D] */ + 0x00000000, /* [0x008E] */ + 0x00000001, /* [0x008F] */ + 0x70268000, /* [0x0090] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x70298006, /* [0x0091] == TYPE7: DRAW_INDX_INDIRECT (29) == */ + 0x00200404, /* [0x0092] */ + 0x00000000, /* [0x0093] */ + 0x00000000, /* [0x0094] */ + 0x00000006, /* [0x0095] */ + 0x00000000, /* [0x0096] */ + 0x00000000, /* [0x0097] */ + 0x40E40801, /* [0x0098] == TYPE4 == */ + 0x0000000D, /* [0x0099] A5X_VFD_INDEX_OFFSET_CTX_0 (0xE408)*/ + 0x48E40901, /* [0x009A] == TYPE4 == */ + 0x00000000, /* [0x009B] A5X_VFD_INSTANCE_START_OFFSET_CTX_0 (0xE409)*/ + 0x70388003, /* [0x009C] == TYPE7: DRAW_INDX_OFFSET (38) == */ + 0x00200884, /* [0x009D] */ + 0x00000001, /* [0x009E] */ + 0x00000003, /* [0x009F] */ + 0x00000000, /* [0x00A0] */ + 0x00000000, /* [0x00A1] */ + 0x00000000, /* [0x00A2] */ + 0x00000000, /* [0x00A3] */ + 0x00000000, /* [0x00A4] */ + 0x00000000, /* [0x00A5] */ + 0x00000000, /* [0x00A6] */ + 0x00000000, /* [0x00A7] */ + 0x48E78401, /* [0x00A8] */ + 0x00000881, /* [0x00A9] */ + 0x40E5C001, /* [0x00AA] */ + 0x0004001E, /* [0x00AB] */ + 0x70438003, /* [0x00AC] */ + 0x0000003A, /* [0x00AD] */ + 0x00000000, /* [0x00AE] */ + 0x00000000, /* [0x00AF] */ + 0x70B00023, /* [0x00B0] */ + 0x00600000, /* [0x00B1] */ + 0x00000000, /* [0x00B2] */ + 0x00000000, /* [0x00B3] */ + 0x00000000, /* [0x00B4] */ + 0x03000000, /* [0x00B5] */ + 0x00000000, /* [0x00B6] */ + 0x00000000, /* [0x00B7] */ + 0x00000000, /* [0x00B8] */ + 0x00000000, /* [0x00B9] */ + 0x00000000, /* [0x00BA] */ + 0x00000000, /* [0x00BB] */ + 0x00000000, /* [0x00BC] */ + 0x00000000, /* [0x00BD] */ + 0x00000000, /* [0x00BE] */ + 0x00000000, /* [0x00BF] */ + 0x00000000, /* [0x00C0] */ + 0x00000000, /* [0x00C1] */ + 0x00000000, /* [0x00C2] */ + 0x00000000, /* [0x00C3] */ + 0x00000000, /* [0x00C4] */ + 0x00000000, /* [0x00C5] */ + 0x00000000, /* [0x00C6] */ + 0x00000000, /* [0x00C7] */ + 0x00000000, /* [0x00C8] */ + 0x00000000, /* [0x00C9] */ + 0x00000000, /* [0x00CA] */ + 0x00000000, /* [0x00CB] */ + 0x00000000, /* [0x00CC] */ + 0x00000000, /* [0x00CD] */ + 0x00000000, /* [0x00CE] */ + 0x00000000, /* [0x00CF] */ + 0x00000000, /* [0x00D0] */ + 0x00000000, /* [0x00D1] */ + 0x00000000, /* [0x00D2] */ + 0x00000000, /* [0x00D3] */ + 0x40E09301, /* [0x00D4] */ + 0x00000000, /* [0x00D5] */ + 0x40E38D01, /* [0x00D6] */ + 0x00000000, /* [0x00D7] */ + 0x40E29801, /* [0x00D8] */ + 0x0000FFFF, /* [0x00D9] */ + 0x48E28201, /* [0x00DA] */ + 0xEAEAEAEA, /* [0x00DB] */ + 0x40E29404, /* [0x00DC] */ + 0xFFFFFFFF, /* [0x00DD] */ + 0xFFFFFFFF, /* [0x00DE] */ + 0xFFFFFFFF, /* [0x00DF] */ + 0xFFFFFFFF, /* [0x00E0] */ + 0x40E5DB01, /* [0x00E1] */ + 0x00000000, /* [0x00E2] */ + 0x48E14701, /* [0x00E3] */ + 0x0000000F, /* [0x00E4] */ + 0x70B00023, /* [0x00E5] */ + 0x00700000, /* [0x00E6] */ + 0x00000000, /* [0x00E7] */ + 0x00000000, /* [0x00E8] */ + 0x00003C00, /* [0x00E9] */ + 0x20400000, /* [0x00EA] */ + 0x00000000, /* [0x00EB] */ + 0x20400001, /* [0x00EC] */ + 0x00000000, /* [0x00ED] */ + 0x20400002, /* [0x00EE] */ + 0x00003C00, /* [0x00EF] */ + 0x20400003, /* [0x00F0] */ + 0x00000000, /* [0x00F1] */ + 0x03000000, /* [0x00F2] */ + 0x00000000, /* [0x00F3] */ + 0x00000000, /* [0x00F4] */ + 0x00000000, /* [0x00F5] */ + 0x00000000, /* [0x00F6] */ + 0x00000000, /* [0x00F7] */ + 0x00000000, /* [0x00F8] */ + 0x00000000, /* [0x00F9] */ + 0x00000000, /* [0x00FA] */ + 0x00000000, /* [0x00FB] */ + 0x00000000, /* [0x00FC] */ + 0x00000000, /* [0x00FD] */ + 0x00000000, /* [0x00FE] */ + 0x00000000, /* [0x00FF] */ + 0x00000000, /* [0x0100] */ + 0x00000000, /* [0x0101] */ + 0x00000000, /* [0x0102] */ + 0x00000000, /* [0x0103] */ + 0x00000000, /* [0x0104] */ + 0x00000000, /* [0x0105] */ + 0x00000000, /* [0x0106] */ + 0x00000000, /* [0x0107] */ + 0x00000000, /* [0x0108] */ + 0x48E2A001, /* [0x0109] */ + 0x000000FF, /* [0x010A] */ + 0x40E40185, /* [0x010B] */ + 0x00FCFCFC, /* [0x010C] */ + 0x0000FCFC, /* [0x010D] */ + 0x0000FCFC, /* [0x010E] */ + 0x000000FC, /* [0x010F] */ + 0x00000000, /* [0x0110] */ + 0x48E38F01, /* [0x0111] */ + 0x00000000, /* [0x0112] */ + 0x48E58001, /* [0x0113] */ + 0x00000010, /* [0x0114] */ + 0x40E1A801, /* [0x0115] */ + 0x00000E00, /* [0x0116] */ + 0x48E15001, /* [0x0117] */ + 0x000007E0, /* [0x0118] */ + 0x40E15101, /* [0x0119] */ + 0x00000000, /* [0x011A] */ + 0x40E00001, /* [0x011B] */ + 0x00000080, /* [0x011C] */ + 0x40E09583, /* [0x011D] */ + 0x00000000, /* [0x011E] */ + 0x00000000, /* [0x011F] */ + 0x00000000, /* [0x0120] */ + 0x40E09001, /* [0x0121] */ + 0x00000010, /* [0x0122] */ + 0x40E0AA02, /* [0x0123] */ + 0x00000000, /* [0x0124] */ + 0x001F0073, /* [0x0125] */ + 0x48E01086, /* [0x0126] */ + 0x42680000, /* [0x0127] */ + 0x42680000, /* [0x0128] */ + 0x41800000, /* [0x0129] */ + 0xC1800000, /* [0x012A] */ + 0x3EFFFEE0, /* [0x012B] */ + 0x3EFFFEE0, /* [0x012C] */ + 0x40E0CA02, /* [0x012D] */ + 0x00000000, /* [0x012E] */ + 0x001F0073, /* [0x012F] */ + 0x40E00601, /* [0x0130] */ + 0x0007FDFF, /* [0x0131] */ + 0x40E70401, /* [0x0132] */ + 0x00000000, /* [0x0133] */ + 0x48E70501, /* [0x0134] */ + 0x00000004, /* [0x0135] */ + 0x48E14201, /* [0x0136] */ + 0x00000000, /* [0x0137] */ + 0x40E14301, /* [0x0138] */ + 0x00000004, /* [0x0139] */ + 0x40E78683, /* [0x013A] */ + 0xFCFCFCFC, /* [0x013B] */ + 0xFCFCFCFC, /* [0x013C] */ + 0xFCFCFCFC, /* [0x013D] */ + 0x48E0A201, /* [0x013E] */ + 0x00000000, /* [0x013F] */ + 0x40E0A301, /* [0x0140] */ + 0x00000004, /* [0x0141] */ + 0x48E1B285, /* [0x0142] */ + 0x00000001, /* [0x0143] */ + 0x00004000, /* [0x0144] */ + 0x00000000, /* [0x0145] */ + 0x00000004, /* [0x0146] */ + 0x000000C0, /* [0x0147] */ + 0x48E09801, /* [0x0148] */ + 0x00000001, /* [0x0149] */ + 0x48E00401, /* [0x014A] */ + 0x00000000, /* [0x014B] */ + 0x480CDD02, /* [0x014C] */ + 0x00200074, /* [0x014D] */ + 0x00000000, /* [0x014E] */ + 0x40E15285, /* [0x014F] */ + 0x00000A30, /* [0x0150] */ + 0x00000008, /* [0x0151] */ + 0x00000100, /* [0x0152] */ + 0x00000000, /* [0x0153] */ + 0x00000000, /* [0x0154] */ + 0x48E14101, /* [0x0155] */ + 0x0000C008, /* [0x0156] */ + 0x40E0A001, /* [0x0157] */ + 0x00000008, /* [0x0158] */ + 0x40E28001, /* [0x0159] */ + 0x00010004, /* [0x015A] */ + 0x40E38401, /* [0x015B] */ + 0x00000404, /* [0x015C] */ + 0x40E78501, /* [0x015D] */ + 0x0000003F, /* [0x015E] */ + 0x48E5D301, /* [0x015F] */ + 0x00000030, /* [0x0160] */ + 0x48E5CB01, /* [0x0161] */ + 0x00000100, /* [0x0162] */ + 0x40E5CA01, /* [0x0163] */ + 0x001F9F81, /* [0x0164] */ + 0x40E14601, /* [0x0165] */ + 0x00000001, /* [0x0166] */ + 0x40E38E01, /* [0x0167] */ + 0x00000000, /* [0x0168] */ + 0x40E28A01, /* [0x0169] */ + 0x00000000, /* [0x016A] */ + 0x48E1A901, /* [0x016B] */ + 0xFFFF0100, /* [0x016C] */ + 0x40E5C901, /* [0x016D] */ + 0x00000100, /* [0x016E] */ + 0x40E76401, /* [0x016F] */ + 0x00000000, /* [0x0170] */ + 0x48E09401, /* [0x0171] */ + 0x00000000, /* [0x0172] */ + 0x40E1B001, /* [0x0173] */ + 0x00000000, /* [0x0174] */ + 0x48E1B101, /* [0x0175] */ + 0x00000006, /* [0x0176] */ + 0x48E40001, /* [0x0177] */ + 0x00000001, /* [0x0178] */ + 0x48E40A04, /* [0x0179] */ + 0x00000000, /* [0x017A] */ + 0x00000000, /* [0x017B] */ + 0x00000078, /* [0x017C] */ + 0x00000008, /* [0x017D] */ + 0x40E48A02, /* [0x017E] */ + 0xC6700000, /* [0x017F] */ + 0x00000001, /* [0x0180] */ + 0x48E4CA01, /* [0x0181] */ + 0x0000000F, /* [0x0182] */ + 0x48E10001, /* [0x0183] */ + 0x00000008, /* [0x0184] */ + 0x48E0A101, /* [0x0185] */ + 0x00000000, /* [0x0186] */ + 0x40E10185, /* [0x0187] */ + 0x00000000, /* [0x0188] */ + 0x00000000, /* [0x0189] */ + 0x00000001, /* [0x018A] */ + 0x00000000, /* [0x018B] */ + 0x00000000, /* [0x018C] */ + 0x70230001, /* [0x018D] */ + 0x00000000, /* [0x018E] */ + 0x70388003, /* [0x018F] */ + 0x00200984, /* [0x0190] */ + 0x00000001, /* [0x0191] */ + 0x00000003, /* [0x0192] */ + 0x70380007, /* [0x0193] */ + 0x00200504, /* [0x0194] */ + 0x00000001, /* [0x0195] */ + 0x00000003, /* [0x0196] */ + 0x00000000, /* [0x0197] */ + 0x00000000, /* [0x0198] */ + 0x00000000, /* [0x0199] */ + 0x00000006, /* [0x019A] */ + 0x70460004, /* [0x019B] */ + 0x00000004, /* [0x019C] */ + 0x00000000, /* [0x019D] */ + 0x00000000, /* [0x019E] */ + 0x00000000, /* [0x019F] */ + 0x70268000, /* [0x01A0] */ + 0x70A88003, /* [0x01A1] */ + 0x00200984, /* [0x01A2] */ + 0x00000000, /* [0x01A3] */ + 0x00000000, /* [0x01A4] */ + 0x70460004, /* [0x01A5] */ + 0x00000004, /* [0x01A6] */ + 0x00000000, /* [0x01A7] */ + 0x00000000, /* [0x01A8] */ + 0x00000001, /* [0x01A9] */ + 0x70268000, /* [0x01AA] */ + 0x70298006, /* [0x01AB] */ + 0x00200504, /* [0x01AC] */ + 0x00000000, /* [0x01AD] */ + 0x00000000, /* [0x01AE] */ + 0x00000006, /* [0x01AF] */ + 0x00000000, /* [0x01B0] */ + 0x00000000, /* [0x01B1] */ + 0x40E40801, /* [0x01B2] */ + 0x0000000D, /* [0x01B3] */ + 0x48E40901, /* [0x01B4] */ + 0x00000000, /* [0x01B5] */ + 0x70388003, /* [0x01B6] */ + 0x00200984, /* [0x01B7] */ + 0x00000001, /* [0x01B8] */ + 0x00000003, /* [0x01B9] */ + 0x00000000, /* [0x01BA] */ + 0x00000000, /* [0x01BB] */ + 0x00000000, /* [0x01BC] */ + 0x00000000, /* [0x01BD] */ + 0x00000000, /* [0x01BE] */ + 0x00000000, /* [0x01BF] */ + 0x70EA0001, /* [0x01C0] */ + 0x00000000, /* [0x01C1] */ + 0x40E78A01, /* [0x01C2] */ + 0x000FFFFF, /* [0x01C3] */ + 0x40E09001, /* [0x01C4] */ + 0x00000000, /* [0x01C5] */ + 0x40E00501, /* [0x01C6] */ + 0x00000000, /* [0x01C7] */ + 0x40E00001, /* [0x01C8] */ + 0x00000181, /* [0x01C9] */ + 0x48E10001, /* [0x01CA] */ + 0x00000000, /* [0x01CB] */ + 0x40E21385, /* [0x01CC] */ + 0x00000004, /* [0x01CD] */ + 0x00000000, /* [0x01CE] */ + 0x00000000, /* [0x01CF] */ + 0x00000001, /* [0x01D0] */ + 0x00000001, /* [0x01D1] */ + 0x40E21C01, /* [0x01D2] */ + 0x00000000, /* [0x01D3] */ + 0x40E21001, /* [0x01D4] */ + 0x00000000, /* [0x01D5] */ + 0x70460004, /* [0x01D6] */ + 0x0000001E, /* [0x01D7] */ + 0x00000000, /* [0x01D8] */ + 0x00000000, /* [0x01D9] */ + 0x00000001, /* [0x01DA] */ + 0x00000000, /* [0x01DB] */ + 0x00000000, /* [0x01DC] */ + 0x00000000, /* [0x01DD] */ + 0x00000000, /* [0x01DE] */ + 0x00000000, /* [0x01DF] */ + 0x40E78A01, /* [0x01E0] */ + 0x020FFFFF, /* [0x01E1] */ + 0x48E78B85, /* [0x01E2] */ + 0x00000001, /* [0x01E3] */ + 0x00003F05, /* [0x01E4] */ + 0x00003F04, /* [0x01E5] */ + 0x00003F04, /* [0x01E6] */ + 0x00003F04, /* [0x01E7] */ + 0x48E79001, /* [0x01E8] */ + 0x00000000, /* [0x01E9] */ + 0x40E79101, /* [0x01EA] */ + 0x00000002, /* [0x01EB] */ + 0x40E79201, /* [0x01EC] */ + 0x00000002, /* [0x01ED] */ + 0x40E58485, /* [0x01EE] */ + 0x00000001, /* [0x01EF] */ + 0x00003F05, /* [0x01F0] */ + 0x00003F04, /* [0x01F1] */ + 0x00003F04, /* [0x01F2] */ + 0x00003F04, /* [0x01F3] */ + 0x48E58901, /* [0x01F4] */ + 0x00000000, /* [0x01F5] */ + 0x48E7C302, /* [0x01F6] */ + 0x00000002, /* [0x01F7] */ + 0x00000001, /* [0x01F8] */ + 0x48E7D702, /* [0x01F9] */ + 0x00000002, /* [0x01FA] */ + 0x00000001, /* [0x01FB] */ + 0x40E7C802, /* [0x01FC] */ + 0x00000000, /* [0x01FD] */ + 0x00000000, /* [0x01FE] */ + 0x40E7CD02, /* [0x01FF] */ + 0x00000000, /* [0x0200] */ + 0x00000000, /* [0x0201] */ + 0x48E7D202, /* [0x0202] */ + 0x00000000, /* [0x0203] */ + 0x00000000, /* [0x0204] */ + 0x40E7DC02, /* [0x0205] */ + 0x00000000, /* [0x0206] */ + 0x00000000, /* [0x0207] */ + 0x48E38901, /* [0x0208] */ + 0x00000000, /* [0x0209] */ + 0x48E29A01, /* [0x020A] */ + 0x00FFFF00, /* [0x020B] */ + 0x48E00101, /* [0x020C] */ + 0x00000000, /* [0x020D] */ + 0x40E29D01, /* [0x020E] */ + 0x0000FF00, /* [0x020F] */ + 0x40E59001, /* [0x0210] */ + 0x00000406, /* [0x0211] */ + 0x48E59201, /* [0x0212] */ + 0x00000001, /* [0x0213] */ + 0x40E59301, /* [0x0214] */ + 0x00000F00, /* [0x0215] */ + 0x40E5A301, /* [0x0216] */ + 0x00000000, /* [0x0217] */ + 0x48E38501, /* [0x0218] */ + 0x00000000, /* [0x0219] */ + 0x00000000, /* [0x021A] */ + 0x00000000, /* [0x021B] */ + 0x00000000, /* [0x021C] */ + 0x00000000, /* [0x021D] */ + 0x00000000, /* [0x021E] */ + 0x00000000, /* [0x021F] */ + 0x48210001, /* [0x0220] */ + 0x86000000, /* [0x0221] */ + 0x40218001, /* [0x0222] */ + 0x86000000, /* [0x0223] */ + 0x40211089, /* [0x0224] */ + 0x00001331, /* [0x0225] */ + 0x00000000, /* [0x0226] */ + 0x00000000, /* [0x0227] */ + 0x00020001, /* [0x0228] */ + 0x00000000, /* [0x0229] */ + 0x00000000, /* [0x022A] */ + 0x00000000, /* [0x022B] */ + 0x00000000, /* [0x022C] */ + 0x00000000, /* [0x022D] */ + 0x48218201, /* [0x022E] */ + 0x00001331, /* [0x022F] */ + 0x40214383, /* [0x0230] */ + 0x00000000, /* [0x0231] */ + 0x00000000, /* [0x0232] */ + 0x00000001, /* [0x0233] */ + 0x40210789, /* [0x0234] */ + 0x00000021, /* [0x0235] */ + 0x00000000, /* [0x0236] */ + 0x00000000, /* [0x0237] */ + 0x00020001, /* [0x0238] */ + 0x00000000, /* [0x0239] */ + 0x00000000, /* [0x023A] */ + 0x00000000, /* [0x023B] */ + 0x00000000, /* [0x023C] */ + 0x00000000, /* [0x023D] */ + 0x48218101, /* [0x023E] */ + 0x00000021, /* [0x023F] */ + 0x48218401, /* [0x0240] */ + 0x00000001, /* [0x0241] */ + 0x702C8005, /* [0x0242] */ + 0x00000002, /* [0x0243] */ + 0x00000000, /* [0x0244] */ + 0x00010001, /* [0x0245] */ + 0x00000000, /* [0x0246] */ + 0x00010001, /* [0x0247] */ + 0x70B00023, /* [0x0248] */ + 0x00600000, /* [0x0249] */ + 0x00000000, /* [0x024A] */ + 0x00000000, /* [0x024B] */ + 0x00000000, /* [0x024C] */ + 0x03000000, /* [0x024D] */ + 0x00000000, /* [0x024E] */ + 0x00000000, /* [0x024F] */ + 0x00000000, /* [0x0250] */ + 0x00000000, /* [0x0251] */ + 0x00000000, /* [0x0252] */ + 0x00000000, /* [0x0253] */ + 0x00000000, /* [0x0254] */ + 0x00000000, /* [0x0255] */ + 0x00000000, /* [0x0256] */ + 0x00000000, /* [0x0257] */ + 0x00000000, /* [0x0258] */ + 0x00000000, /* [0x0259] */ + 0x00000000, /* [0x025A] */ + 0x00000000, /* [0x025B] */ + 0x00000000, /* [0x025C] */ + 0x00000000, /* [0x025D] */ + 0x00000000, /* [0x025E] */ + 0x00000000, /* [0x025F] */ + 0x00000000, /* [0x0260] */ + 0x00000000, /* [0x0261] */ + 0x00000000, /* [0x0262] */ + 0x00000000, /* [0x0263] */ + 0x00000000, /* [0x0264] */ + 0x00000000, /* [0x0265] */ + 0x00000000, /* [0x0266] */ + 0x00000000, /* [0x0267] */ + 0x00000000, /* [0x0268] */ + 0x00000000, /* [0x0269] */ + 0x00000000, /* [0x026A] */ + 0x00000000, /* [0x026B] */ + 0x40E09301, /* [0x026C] */ + 0x00000000, /* [0x026D] */ + 0x40E38D01, /* [0x026E] */ + 0x00000000, /* [0x026F] */ + 0x40E29801, /* [0x0270] */ + 0x0000FFFF, /* [0x0271] */ + 0x48E28201, /* [0x0272] */ + 0xEAEAEAEA, /* [0x0273] */ + 0x40E29404, /* [0x0274] */ + 0xFFFFFFFF, /* [0x0275] */ + 0xFFFFFFFF, /* [0x0276] */ + 0xFFFFFFFF, /* [0x0277] */ + 0xFFFFFFFF, /* [0x0278] */ + 0x40E5DB01, /* [0x0279] */ + 0x00000000, /* [0x027A] */ + 0x48E14701, /* [0x027B] */ + 0x0000000F, /* [0x027C] */ + 0x70B00023, /* [0x027D] */ + 0x00700000, /* [0x027E] */ + 0x00000000, /* [0x027F] */ + 0x00000000, /* [0x0280] */ + 0x00003C00, /* [0x0281] */ + 0x20400000, /* [0x0282] */ + 0x00000000, /* [0x0283] */ + 0x20400001, /* [0x0284] */ + 0x00000000, /* [0x0285] */ + 0x20400002, /* [0x0286] */ + 0x00003C00, /* [0x0287] */ + 0x20400003, /* [0x0288] */ + 0x00000000, /* [0x0289] */ + 0x03000000, /* [0x028A] */ + 0x00000000, /* [0x028B] */ + 0x00000000, /* [0x028C] */ + 0x00000000, /* [0x028D] */ + 0x00000000, /* [0x028E] */ + 0x00000000, /* [0x028F] */ + 0x00000000, /* [0x0290] */ + 0x00000000, /* [0x0291] */ + 0x00000000, /* [0x0292] */ + 0x00000000, /* [0x0293] */ + 0x00000000, /* [0x0294] */ + 0x00000000, /* [0x0295] */ + 0x00000000, /* [0x0296] */ + 0x00000000, /* [0x0297] */ + 0x00000000, /* [0x0298] */ + 0x00000000, /* [0x0299] */ + 0x00000000, /* [0x029A] */ + 0x00000000, /* [0x029B] */ + 0x00000000, /* [0x029C] */ + 0x00000000, /* [0x029D] */ + 0x00000000, /* [0x029E] */ + 0x00000000, /* [0x029F] */ + 0x00000000, /* [0x02A0] */ +}; + +/* Fixups for the IBs in _a5xx_critical_pkts_mem03 */ +static const struct adreno_critical_fixup critical_pkt_mem03_fixups[] = { + { 2, 3, 3, 0x0780 }, + { 6, 7, 2, 0x0000 }, + { 98, 99, 1, 0x0000 }, + { 112, 113, 1, 0x0480 }, + { 115, 116, 1, 0x0400 }, + { 126, 127, 1, 0x0080 }, + { 131, 132, 2, 0x0108 }, + { 137, 138, 1, 0x00A0 }, + { 141, 142, 2, 0x0108 }, + { 147, 148, 1, 0x0080 }, + { 150, 151, 1, 0x00C0 }, + { 174, 175, 3, 0x0780 }, + { 378, 379, 1, 0x0000 }, + { 392, 393, 1, 0x0480 }, + { 395, 396, 1, 0x0400 }, + { 408, 409, 1, 0x0080 }, + { 413, 414, 2, 0x0108 }, + { 419, 420, 1, 0x00A0 }, + { 423, 424, 2, 0x0108 }, + { 429, 430, 1, 0x0080 }, + { 432, 433, 1, 0x00C0 }, + { 462, 463, 0, 0x0700 }, + { 472, 473, 2, 0x0110 }, + { 550, 551, 1, 0x0500 }, + { 561, 562, 1, 0x0600 }, + { 566, 567, 1, 0x0700 }, +}; diff --git a/adreno_a5xx_perfcounter.c b/adreno_a5xx_perfcounter.c new file mode 100644 index 0000000000..8886ee24ba --- /dev/null +++ b/adreno_a5xx_perfcounter.c @@ -0,0 +1,695 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_a5xx.h" +#include "adreno_perfcounter.h" +#include "adreno_pm4types.h" +#include "kgsl_device.h" + +#define VBIF2_PERF_CNT_SEL_MASK 0x7F +/* offset of clear register from select register */ +#define VBIF2_PERF_CLR_REG_SEL_OFF 8 +/* offset of enable register from select register */ +#define VBIF2_PERF_EN_REG_SEL_OFF 16 +/* offset of clear register from the enable register */ +#define VBIF2_PERF_PWR_CLR_REG_EN_OFF 8 + +static void a5xx_counter_load(struct adreno_device *adreno_dev, + struct adreno_perfcount_register *reg) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int index = reg->load_bit / 32; + u32 enable = BIT(reg->load_bit & 31); + + kgsl_regwrite(device, A5XX_RBBM_PERFCTR_LOAD_VALUE_LO, + lower_32_bits(reg->value)); + + kgsl_regwrite(device, A5XX_RBBM_PERFCTR_LOAD_VALUE_HI, + upper_32_bits(reg->value)); + + kgsl_regwrite(device, A5XX_RBBM_PERFCTR_LOAD_CMD0 + index, enable); +} + +static u64 a5xx_counter_read_norestore(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 hi, lo; + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + return ((((u64) hi) << 32) | lo) + reg->value; +} + +static int a5xx_counter_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + kgsl_regwrite(device, reg->select, countable); + reg->value = 0; + + return 0; +} + +static int a5xx_counter_inline_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0]; + u32 cmds[3]; + int ret; + + if (!(device->state == KGSL_STATE_ACTIVE)) + return a5xx_counter_enable(adreno_dev, group, counter, + countable); + + cmds[0] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + cmds[1] = cp_type4_packet(reg->select, 1); + cmds[2] = countable; + + /* submit to highest priority RB always */ + ret = a5xx_ringbuffer_addcmds(adreno_dev, rb, NULL, + F_NOTPROTECTED, cmds, 3, 0, NULL); + + if (ret) + return ret; + + /* + * schedule dispatcher to make sure rb[0] is run, because + * if the current RB is not rb[0] and gpu is idle then + * rb[0] will not get scheduled to run + */ + if (adreno_dev->cur_rb != rb) + adreno_dispatcher_schedule(device); + + /* wait for the above commands submitted to complete */ + ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp, + ADRENO_IDLE_TIMEOUT); + + if (ret) { + /* + * If we were woken up because of cancelling rb events + * either due to soft reset or adreno_stop, ignore the + * error and return 0 here. The perfcounter is already + * set up in software and it will be programmed in + * hardware when we wake up or come up after soft reset, + * by adreno_perfcounter_restore. + */ + if (ret == -EAGAIN) + ret = 0; + else + dev_err(device->dev, + "Perfcounter %s/%u/%u start via commands failed %d\n", + group->name, counter, countable, ret); + } + + if (!ret) + reg->value = 0; + + return ret; +} + +static int a5xx_counter_rbbm_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + if (adreno_is_a540(adreno_dev) && countable == A5XX_RBBM_ALWAYS_COUNT) + return -EINVAL; + + return a5xx_counter_inline_enable(adreno_dev, group, counter, + countable); +} + +static u64 a5xx_counter_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 hi, lo; + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + return (((u64) hi) << 32) | lo; +} + +static int a5xx_counter_vbif_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + if (countable > VBIF2_PERF_CNT_SEL_MASK) + return -EINVAL; + + /* + * Write 1, followed by 0 to CLR register for + * clearing the counter + */ + kgsl_regwrite(device, + reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 1); + kgsl_regwrite(device, + reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 0); + kgsl_regwrite(device, + reg->select, countable & VBIF2_PERF_CNT_SEL_MASK); + /* enable reg is 8 DWORDS before select reg */ + kgsl_regwrite(device, + reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1); + + kgsl_regwrite(device, reg->select, countable); + + reg->value = 0; + return 0; +} + +static int a5xx_counter_vbif_pwr_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + /* + * Write 1, followed by 0 to CLR register for + * clearing the counter + */ + kgsl_regwrite(device, reg->select + + VBIF2_PERF_PWR_CLR_REG_EN_OFF, 1); + kgsl_regwrite(device, reg->select + + VBIF2_PERF_PWR_CLR_REG_EN_OFF, 0); + kgsl_regwrite(device, reg->select, 1); + + reg->value = 0; + + return 0; +} + +static int a5xx_counter_alwayson_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + return 0; +} + +static u64 a5xx_counter_alwayson_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct adreno_perfcount_register *reg = &group->regs[counter]; + + return a5xx_read_alwayson(adreno_dev) + reg->value; +} + +static int a5xx_counter_pwr_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + kgsl_regwrite(device, reg->select, countable); + kgsl_regwrite(device, A5XX_GPMU_POWER_COUNTER_ENABLE, 1); + + reg->value = 0; + return 0; +} + +static int a5xx_counter_pwr_gpmu_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + unsigned int shift = (counter << 3) % (sizeof(unsigned int) * 8); + + if (adreno_is_a530(adreno_dev)) { + if (countable > 43) + return -EINVAL; + } else if (adreno_is_a540(adreno_dev)) { + if (countable > 47) + return -EINVAL; + } + + kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift); + kgsl_regwrite(device, A5XX_GPMU_POWER_COUNTER_ENABLE, 1); + + reg->value = 0; + return 0; +} + +static int a5xx_counter_pwr_alwayson_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + kgsl_regwrite(device, A5XX_GPMU_ALWAYS_ON_COUNTER_RESET, 1); + + reg->value = 0; + return 0; +} + +static struct adreno_perfcount_register a5xx_perfcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_0_LO, + A5XX_RBBM_PERFCTR_CP_0_HI, 0, A5XX_CP_PERFCTR_CP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_1_LO, + A5XX_RBBM_PERFCTR_CP_1_HI, 1, A5XX_CP_PERFCTR_CP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_2_LO, + A5XX_RBBM_PERFCTR_CP_2_HI, 2, A5XX_CP_PERFCTR_CP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_3_LO, + A5XX_RBBM_PERFCTR_CP_3_HI, 3, A5XX_CP_PERFCTR_CP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_4_LO, + A5XX_RBBM_PERFCTR_CP_4_HI, 4, A5XX_CP_PERFCTR_CP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_5_LO, + A5XX_RBBM_PERFCTR_CP_5_HI, 5, A5XX_CP_PERFCTR_CP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_6_LO, + A5XX_RBBM_PERFCTR_CP_6_HI, 6, A5XX_CP_PERFCTR_CP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_7_LO, + A5XX_RBBM_PERFCTR_CP_7_HI, 7, A5XX_CP_PERFCTR_CP_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_rbbm[] = { + /* + * A5XX_RBBM_PERFCTR_RBBM_0 is used for frequency scaling and omitted + * from the poool of available counters + */ + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_1_LO, + A5XX_RBBM_PERFCTR_RBBM_1_HI, 9, A5XX_RBBM_PERFCTR_RBBM_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_2_LO, + A5XX_RBBM_PERFCTR_RBBM_2_HI, 10, A5XX_RBBM_PERFCTR_RBBM_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_3_LO, + A5XX_RBBM_PERFCTR_RBBM_3_HI, 11, A5XX_RBBM_PERFCTR_RBBM_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_pc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_0_LO, + A5XX_RBBM_PERFCTR_PC_0_HI, 12, A5XX_PC_PERFCTR_PC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_1_LO, + A5XX_RBBM_PERFCTR_PC_1_HI, 13, A5XX_PC_PERFCTR_PC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_2_LO, + A5XX_RBBM_PERFCTR_PC_2_HI, 14, A5XX_PC_PERFCTR_PC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_3_LO, + A5XX_RBBM_PERFCTR_PC_3_HI, 15, A5XX_PC_PERFCTR_PC_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_4_LO, + A5XX_RBBM_PERFCTR_PC_4_HI, 16, A5XX_PC_PERFCTR_PC_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_5_LO, + A5XX_RBBM_PERFCTR_PC_5_HI, 17, A5XX_PC_PERFCTR_PC_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_6_LO, + A5XX_RBBM_PERFCTR_PC_6_HI, 18, A5XX_PC_PERFCTR_PC_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_7_LO, + A5XX_RBBM_PERFCTR_PC_7_HI, 19, A5XX_PC_PERFCTR_PC_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vfd[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_0_LO, + A5XX_RBBM_PERFCTR_VFD_0_HI, 20, A5XX_VFD_PERFCTR_VFD_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_1_LO, + A5XX_RBBM_PERFCTR_VFD_1_HI, 21, A5XX_VFD_PERFCTR_VFD_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_2_LO, + A5XX_RBBM_PERFCTR_VFD_2_HI, 22, A5XX_VFD_PERFCTR_VFD_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_3_LO, + A5XX_RBBM_PERFCTR_VFD_3_HI, 23, A5XX_VFD_PERFCTR_VFD_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_4_LO, + A5XX_RBBM_PERFCTR_VFD_4_HI, 24, A5XX_VFD_PERFCTR_VFD_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_5_LO, + A5XX_RBBM_PERFCTR_VFD_5_HI, 25, A5XX_VFD_PERFCTR_VFD_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_6_LO, + A5XX_RBBM_PERFCTR_VFD_6_HI, 26, A5XX_VFD_PERFCTR_VFD_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_7_LO, + A5XX_RBBM_PERFCTR_VFD_7_HI, 27, A5XX_VFD_PERFCTR_VFD_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_hlsq[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_0_LO, + A5XX_RBBM_PERFCTR_HLSQ_0_HI, 28, A5XX_HLSQ_PERFCTR_HLSQ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_1_LO, + A5XX_RBBM_PERFCTR_HLSQ_1_HI, 29, A5XX_HLSQ_PERFCTR_HLSQ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_2_LO, + A5XX_RBBM_PERFCTR_HLSQ_2_HI, 30, A5XX_HLSQ_PERFCTR_HLSQ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_3_LO, + A5XX_RBBM_PERFCTR_HLSQ_3_HI, 31, A5XX_HLSQ_PERFCTR_HLSQ_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_4_LO, + A5XX_RBBM_PERFCTR_HLSQ_4_HI, 32, A5XX_HLSQ_PERFCTR_HLSQ_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_5_LO, + A5XX_RBBM_PERFCTR_HLSQ_5_HI, 33, A5XX_HLSQ_PERFCTR_HLSQ_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_6_LO, + A5XX_RBBM_PERFCTR_HLSQ_6_HI, 34, A5XX_HLSQ_PERFCTR_HLSQ_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_7_LO, + A5XX_RBBM_PERFCTR_HLSQ_7_HI, 35, A5XX_HLSQ_PERFCTR_HLSQ_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vpc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_0_LO, + A5XX_RBBM_PERFCTR_VPC_0_HI, 36, A5XX_VPC_PERFCTR_VPC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_1_LO, + A5XX_RBBM_PERFCTR_VPC_1_HI, 37, A5XX_VPC_PERFCTR_VPC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_2_LO, + A5XX_RBBM_PERFCTR_VPC_2_HI, 38, A5XX_VPC_PERFCTR_VPC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_3_LO, + A5XX_RBBM_PERFCTR_VPC_3_HI, 39, A5XX_VPC_PERFCTR_VPC_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_ccu[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_0_LO, + A5XX_RBBM_PERFCTR_CCU_0_HI, 40, A5XX_RB_PERFCTR_CCU_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_1_LO, + A5XX_RBBM_PERFCTR_CCU_1_HI, 41, A5XX_RB_PERFCTR_CCU_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_2_LO, + A5XX_RBBM_PERFCTR_CCU_2_HI, 42, A5XX_RB_PERFCTR_CCU_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_3_LO, + A5XX_RBBM_PERFCTR_CCU_3_HI, 43, A5XX_RB_PERFCTR_CCU_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_tse[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_0_LO, + A5XX_RBBM_PERFCTR_TSE_0_HI, 44, A5XX_GRAS_PERFCTR_TSE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_1_LO, + A5XX_RBBM_PERFCTR_TSE_1_HI, 45, A5XX_GRAS_PERFCTR_TSE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_2_LO, + A5XX_RBBM_PERFCTR_TSE_2_HI, 46, A5XX_GRAS_PERFCTR_TSE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_3_LO, + A5XX_RBBM_PERFCTR_TSE_3_HI, 47, A5XX_GRAS_PERFCTR_TSE_SEL_3 }, +}; + + +static struct adreno_perfcount_register a5xx_perfcounters_ras[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_0_LO, + A5XX_RBBM_PERFCTR_RAS_0_HI, 48, A5XX_GRAS_PERFCTR_RAS_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_1_LO, + A5XX_RBBM_PERFCTR_RAS_1_HI, 49, A5XX_GRAS_PERFCTR_RAS_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_2_LO, + A5XX_RBBM_PERFCTR_RAS_2_HI, 50, A5XX_GRAS_PERFCTR_RAS_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_3_LO, + A5XX_RBBM_PERFCTR_RAS_3_HI, 51, A5XX_GRAS_PERFCTR_RAS_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_uche[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_0_LO, + A5XX_RBBM_PERFCTR_UCHE_0_HI, 52, A5XX_UCHE_PERFCTR_UCHE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_1_LO, + A5XX_RBBM_PERFCTR_UCHE_1_HI, 53, A5XX_UCHE_PERFCTR_UCHE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_2_LO, + A5XX_RBBM_PERFCTR_UCHE_2_HI, 54, A5XX_UCHE_PERFCTR_UCHE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_3_LO, + A5XX_RBBM_PERFCTR_UCHE_3_HI, 55, A5XX_UCHE_PERFCTR_UCHE_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_4_LO, + A5XX_RBBM_PERFCTR_UCHE_4_HI, 56, A5XX_UCHE_PERFCTR_UCHE_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_5_LO, + A5XX_RBBM_PERFCTR_UCHE_5_HI, 57, A5XX_UCHE_PERFCTR_UCHE_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_6_LO, + A5XX_RBBM_PERFCTR_UCHE_6_HI, 58, A5XX_UCHE_PERFCTR_UCHE_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_7_LO, + A5XX_RBBM_PERFCTR_UCHE_7_HI, 59, A5XX_UCHE_PERFCTR_UCHE_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_0_LO, + A5XX_RBBM_PERFCTR_TP_0_HI, 60, A5XX_TPL1_PERFCTR_TP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_1_LO, + A5XX_RBBM_PERFCTR_TP_1_HI, 61, A5XX_TPL1_PERFCTR_TP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_2_LO, + A5XX_RBBM_PERFCTR_TP_2_HI, 62, A5XX_TPL1_PERFCTR_TP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_3_LO, + A5XX_RBBM_PERFCTR_TP_3_HI, 63, A5XX_TPL1_PERFCTR_TP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_4_LO, + A5XX_RBBM_PERFCTR_TP_4_HI, 64, A5XX_TPL1_PERFCTR_TP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_5_LO, + A5XX_RBBM_PERFCTR_TP_5_HI, 65, A5XX_TPL1_PERFCTR_TP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_6_LO, + A5XX_RBBM_PERFCTR_TP_6_HI, 66, A5XX_TPL1_PERFCTR_TP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_7_LO, + A5XX_RBBM_PERFCTR_TP_7_HI, 67, A5XX_TPL1_PERFCTR_TP_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_0_LO, + A5XX_RBBM_PERFCTR_SP_0_HI, 68, A5XX_SP_PERFCTR_SP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_1_LO, + A5XX_RBBM_PERFCTR_SP_1_HI, 69, A5XX_SP_PERFCTR_SP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_2_LO, + A5XX_RBBM_PERFCTR_SP_2_HI, 70, A5XX_SP_PERFCTR_SP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_3_LO, + A5XX_RBBM_PERFCTR_SP_3_HI, 71, A5XX_SP_PERFCTR_SP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_4_LO, + A5XX_RBBM_PERFCTR_SP_4_HI, 72, A5XX_SP_PERFCTR_SP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_5_LO, + A5XX_RBBM_PERFCTR_SP_5_HI, 73, A5XX_SP_PERFCTR_SP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_6_LO, + A5XX_RBBM_PERFCTR_SP_6_HI, 74, A5XX_SP_PERFCTR_SP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_7_LO, + A5XX_RBBM_PERFCTR_SP_7_HI, 75, A5XX_SP_PERFCTR_SP_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_8_LO, + A5XX_RBBM_PERFCTR_SP_8_HI, 76, A5XX_SP_PERFCTR_SP_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_9_LO, + A5XX_RBBM_PERFCTR_SP_9_HI, 77, A5XX_SP_PERFCTR_SP_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_10_LO, + A5XX_RBBM_PERFCTR_SP_10_HI, 78, A5XX_SP_PERFCTR_SP_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_11_LO, + A5XX_RBBM_PERFCTR_SP_11_HI, 79, A5XX_SP_PERFCTR_SP_SEL_11 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_rb[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_0_LO, + A5XX_RBBM_PERFCTR_RB_0_HI, 80, A5XX_RB_PERFCTR_RB_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_1_LO, + A5XX_RBBM_PERFCTR_RB_1_HI, 81, A5XX_RB_PERFCTR_RB_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_2_LO, + A5XX_RBBM_PERFCTR_RB_2_HI, 82, A5XX_RB_PERFCTR_RB_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_3_LO, + A5XX_RBBM_PERFCTR_RB_3_HI, 83, A5XX_RB_PERFCTR_RB_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_4_LO, + A5XX_RBBM_PERFCTR_RB_4_HI, 84, A5XX_RB_PERFCTR_RB_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_5_LO, + A5XX_RBBM_PERFCTR_RB_5_HI, 85, A5XX_RB_PERFCTR_RB_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_6_LO, + A5XX_RBBM_PERFCTR_RB_6_HI, 86, A5XX_RB_PERFCTR_RB_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_7_LO, + A5XX_RBBM_PERFCTR_RB_7_HI, 87, A5XX_RB_PERFCTR_RB_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vsc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VSC_0_LO, + A5XX_RBBM_PERFCTR_VSC_0_HI, 88, A5XX_VSC_PERFCTR_VSC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VSC_1_LO, + A5XX_RBBM_PERFCTR_VSC_1_HI, 89, A5XX_VSC_PERFCTR_VSC_SEL_1 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_lrz[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_0_LO, + A5XX_RBBM_PERFCTR_LRZ_0_HI, 90, A5XX_GRAS_PERFCTR_LRZ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_1_LO, + A5XX_RBBM_PERFCTR_LRZ_1_HI, 91, A5XX_GRAS_PERFCTR_LRZ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_2_LO, + A5XX_RBBM_PERFCTR_LRZ_2_HI, 92, A5XX_GRAS_PERFCTR_LRZ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_3_LO, + A5XX_RBBM_PERFCTR_LRZ_3_HI, 93, A5XX_GRAS_PERFCTR_LRZ_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_cmp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_0_LO, + A5XX_RBBM_PERFCTR_CMP_0_HI, 94, A5XX_RB_PERFCTR_CMP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_1_LO, + A5XX_RBBM_PERFCTR_CMP_1_HI, 95, A5XX_RB_PERFCTR_CMP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_2_LO, + A5XX_RBBM_PERFCTR_CMP_2_HI, 96, A5XX_RB_PERFCTR_CMP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_3_LO, + A5XX_RBBM_PERFCTR_CMP_3_HI, 97, A5XX_RB_PERFCTR_CMP_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vbif[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW0, + A5XX_VBIF_PERF_CNT_HIGH0, -1, A5XX_VBIF_PERF_CNT_SEL0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW1, + A5XX_VBIF_PERF_CNT_HIGH1, -1, A5XX_VBIF_PERF_CNT_SEL1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW2, + A5XX_VBIF_PERF_CNT_HIGH2, -1, A5XX_VBIF_PERF_CNT_SEL2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW3, + A5XX_VBIF_PERF_CNT_HIGH3, -1, A5XX_VBIF_PERF_CNT_SEL3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vbif_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW0, + A5XX_VBIF_PERF_PWR_CNT_HIGH0, -1, A5XX_VBIF_PERF_PWR_CNT_EN0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW1, + A5XX_VBIF_PERF_PWR_CNT_HIGH1, -1, A5XX_VBIF_PERF_PWR_CNT_EN1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW2, + A5XX_VBIF_PERF_PWR_CNT_HIGH2, -1, A5XX_VBIF_PERF_PWR_CNT_EN2 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_alwayson[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_ALWAYSON_COUNTER_LO, + A5XX_RBBM_ALWAYSON_COUNTER_HI, -1 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_0_LO, + A5XX_SP_POWER_COUNTER_0_HI, -1, A5XX_SP_POWERCTR_SP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_1_LO, + A5XX_SP_POWER_COUNTER_1_HI, -1, A5XX_SP_POWERCTR_SP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_2_LO, + A5XX_SP_POWER_COUNTER_2_HI, -1, A5XX_SP_POWERCTR_SP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_3_LO, + A5XX_SP_POWER_COUNTER_3_HI, -1, A5XX_SP_POWERCTR_SP_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_0_LO, + A5XX_TP_POWER_COUNTER_0_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_1_LO, + A5XX_TP_POWER_COUNTER_1_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_2_LO, + A5XX_TP_POWER_COUNTER_2_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_3_LO, + A5XX_TP_POWER_COUNTER_3_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_rb[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_0_LO, + A5XX_RB_POWER_COUNTER_0_HI, -1, A5XX_RB_POWERCTR_RB_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_1_LO, + A5XX_RB_POWER_COUNTER_1_HI, -1, A5XX_RB_POWERCTR_RB_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_2_LO, + A5XX_RB_POWER_COUNTER_2_HI, -1, A5XX_RB_POWERCTR_RB_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_3_LO, + A5XX_RB_POWER_COUNTER_3_HI, -1, A5XX_RB_POWERCTR_RB_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_ccu[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CCU_POWER_COUNTER_0_LO, + A5XX_CCU_POWER_COUNTER_0_HI, -1, A5XX_RB_POWERCTR_CCU_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CCU_POWER_COUNTER_1_LO, + A5XX_CCU_POWER_COUNTER_1_HI, -1, A5XX_RB_POWERCTR_CCU_SEL_1 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_uche[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_0_LO, + A5XX_UCHE_POWER_COUNTER_0_HI, -1, + A5XX_UCHE_POWERCTR_UCHE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_1_LO, + A5XX_UCHE_POWER_COUNTER_1_HI, -1, + A5XX_UCHE_POWERCTR_UCHE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_2_LO, + A5XX_UCHE_POWER_COUNTER_2_HI, -1, + A5XX_UCHE_POWERCTR_UCHE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_3_LO, + A5XX_UCHE_POWER_COUNTER_3_HI, -1, + A5XX_UCHE_POWERCTR_UCHE_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_0_LO, + A5XX_CP_POWER_COUNTER_0_HI, -1, A5XX_CP_POWERCTR_CP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_1_LO, + A5XX_CP_POWER_COUNTER_1_HI, -1, A5XX_CP_POWERCTR_CP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_2_LO, + A5XX_CP_POWER_COUNTER_2_HI, -1, A5XX_CP_POWERCTR_CP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_3_LO, + A5XX_CP_POWER_COUNTER_3_HI, -1, A5XX_CP_POWERCTR_CP_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_gpmu[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_0_LO, + A5XX_GPMU_POWER_COUNTER_0_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_1_LO, + A5XX_GPMU_POWER_COUNTER_1_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_2_LO, + A5XX_GPMU_POWER_COUNTER_2_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_3_LO, + A5XX_GPMU_POWER_COUNTER_3_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_4_LO, + A5XX_GPMU_POWER_COUNTER_4_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_5_LO, + A5XX_GPMU_POWER_COUNTER_5_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_1 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_alwayson[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_ALWAYS_ON_COUNTER_LO, + A5XX_GPMU_ALWAYS_ON_COUNTER_HI, -1 }, +}; + +#define A5XX_PERFCOUNTER_GROUP(offset, name, enable, read, load) \ + ADRENO_PERFCOUNTER_GROUP(a5xx, offset, name, enable, read, load) + +#define A5XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags, enable, read, load) \ + ADRENO_PERFCOUNTER_GROUP_FLAGS(a5xx, offset, name, flags, enable, \ + read, load) + +#define A5XX_POWER_COUNTER_GROUP(offset, name, enable, read) \ + [KGSL_PERFCOUNTER_GROUP_##offset##_PWR] = { a5xx_pwrcounters_##name, \ + ARRAY_SIZE(a5xx_pwrcounters_##name), __stringify(name##_pwr), 0, \ + enable, read, NULL } + +#define A5XX_REGULAR_PERFCOUNTER_GROUP(offset, name) \ + A5XX_PERFCOUNTER_GROUP(offset, name, a5xx_counter_inline_enable, \ + a5xx_counter_read, a5xx_counter_load) + +static struct adreno_perfcount_group a5xx_perfcounter_groups + [KGSL_PERFCOUNTER_GROUP_MAX] = { + A5XX_REGULAR_PERFCOUNTER_GROUP(CP, cp), + A5XX_PERFCOUNTER_GROUP(RBBM, rbbm, + a5xx_counter_rbbm_enable, a5xx_counter_read, a5xx_counter_load), + A5XX_REGULAR_PERFCOUNTER_GROUP(PC, pc), + A5XX_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), + A5XX_REGULAR_PERFCOUNTER_GROUP(HLSQ, hlsq), + A5XX_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), + A5XX_REGULAR_PERFCOUNTER_GROUP(CCU, ccu), + A5XX_REGULAR_PERFCOUNTER_GROUP(CMP, cmp), + A5XX_REGULAR_PERFCOUNTER_GROUP(TSE, tse), + A5XX_REGULAR_PERFCOUNTER_GROUP(RAS, ras), + A5XX_REGULAR_PERFCOUNTER_GROUP(LRZ, lrz), + A5XX_REGULAR_PERFCOUNTER_GROUP(UCHE, uche), + A5XX_REGULAR_PERFCOUNTER_GROUP(TP, tp), + A5XX_REGULAR_PERFCOUNTER_GROUP(SP, sp), + A5XX_REGULAR_PERFCOUNTER_GROUP(RB, rb), + A5XX_REGULAR_PERFCOUNTER_GROUP(VSC, vsc), + A5XX_PERFCOUNTER_GROUP(VBIF, vbif, + a5xx_counter_vbif_enable, a5xx_counter_read_norestore, NULL), + A5XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED, + a5xx_counter_vbif_pwr_enable, + a5xx_counter_read_norestore, NULL), + A5XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson, + ADRENO_PERFCOUNTER_GROUP_FIXED, + a5xx_counter_alwayson_enable, a5xx_counter_alwayson_read, NULL), + A5XX_POWER_COUNTER_GROUP(SP, sp, + a5xx_counter_pwr_enable, a5xx_counter_read_norestore), + A5XX_POWER_COUNTER_GROUP(TP, tp, + a5xx_counter_pwr_enable, a5xx_counter_read_norestore), + A5XX_POWER_COUNTER_GROUP(RB, rb, + a5xx_counter_pwr_enable, a5xx_counter_read_norestore), + A5XX_POWER_COUNTER_GROUP(CCU, ccu, + a5xx_counter_pwr_enable, a5xx_counter_read_norestore), + A5XX_POWER_COUNTER_GROUP(UCHE, uche, + a5xx_counter_pwr_enable, a5xx_counter_read_norestore), + A5XX_POWER_COUNTER_GROUP(CP, cp, + a5xx_counter_pwr_enable, a5xx_counter_read_norestore), + A5XX_POWER_COUNTER_GROUP(GPMU, gpmu, + a5xx_counter_pwr_gpmu_enable, a5xx_counter_read_norestore), + A5XX_POWER_COUNTER_GROUP(ALWAYSON, alwayson, + a5xx_counter_pwr_alwayson_enable, a5xx_counter_read_norestore), +}; + +const struct adreno_perfcounters adreno_a5xx_perfcounters = { + a5xx_perfcounter_groups, + ARRAY_SIZE(a5xx_perfcounter_groups), +}; diff --git a/adreno_a5xx_preempt.c b/adreno_a5xx_preempt.c new file mode 100644 index 0000000000..bf3126ef74 --- /dev/null +++ b/adreno_a5xx_preempt.c @@ -0,0 +1,552 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2014-2017,2021 The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_a5xx.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" + +#define PREEMPT_RECORD(_field) \ + offsetof(struct a5xx_cp_preemption_record, _field) + +#define PREEMPT_SMMU_RECORD(_field) \ + offsetof(struct a5xx_cp_smmu_info, _field) + +static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb = adreno_dev->cur_rb; + unsigned int wptr; + unsigned long flags; + + spin_lock_irqsave(&rb->preempt_lock, flags); + + kgsl_regread(device, A5XX_CP_RB_WPTR, &wptr); + + if (wptr != rb->wptr) { + kgsl_regwrite(device, A5XX_CP_RB_WPTR, rb->wptr); + /* + * In case something got submitted while preemption was on + * going, reset the timer. + */ + reset_timer = true; + } + + if (reset_timer) + rb->dispatch_q.expires = jiffies + + msecs_to_jiffies(adreno_drawobj_timeout); + + spin_unlock_irqrestore(&rb->preempt_lock, flags); +} + +static void _a5xx_preemption_done(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status; + + /* + * In the very unlikely case that the power is off, do nothing - the + * state will be reset on power up and everybody will be happy + */ + + if (!kgsl_state_is_awake(device)) + return; + + kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status); + + if (status != 0) { + dev_err(device->dev, + "Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n", + status, adreno_dev->cur_rb->id, + adreno_get_rptr(adreno_dev->cur_rb), + adreno_dev->cur_rb->wptr, + adreno_dev->next_rb->id, + adreno_get_rptr(adreno_dev->next_rb), + adreno_dev->next_rb->wptr); + + /* Set a fault and restart */ + adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + + return; + } + + del_timer_sync(&adreno_dev->preempt.timer); + + trace_adreno_preempt_done(adreno_dev->cur_rb, adreno_dev->next_rb, 0); + + /* Clean up all the bits */ + adreno_dev->prev_rb = adreno_dev->cur_rb; + adreno_dev->cur_rb = adreno_dev->next_rb; + adreno_dev->next_rb = NULL; + + /* Update the wptr for the new command queue */ + _update_wptr(adreno_dev, true); + + /* Update the dispatcher timer for the new command queue */ + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + + /* Clear the preempt state */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); +} + +static void _a5xx_preemption_fault(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status; + + /* + * If the power is on check the preemption status one more time - if it + * was successful then just transition to the complete state + */ + if (kgsl_state_is_awake(device)) { + kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status); + + if (status == 0) { + adreno_set_preempt_state(adreno_dev, + ADRENO_PREEMPT_COMPLETE); + + adreno_dispatcher_schedule(device); + return; + } + } + + dev_err(device->dev, + "Preemption timed out: cur=%d R/W=%X/%X, next=%d R/W=%X/%X\n", + adreno_dev->cur_rb->id, + adreno_get_rptr(adreno_dev->cur_rb), + adreno_dev->cur_rb->wptr, + adreno_dev->next_rb->id, + adreno_get_rptr(adreno_dev->next_rb), + adreno_dev->next_rb->wptr); + + adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); +} + +static void _a5xx_preemption_worker(struct work_struct *work) +{ + struct adreno_preemption *preempt = container_of(work, + struct adreno_preemption, work); + struct adreno_device *adreno_dev = container_of(preempt, + struct adreno_device, preempt); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Need to take the mutex to make sure that the power stays on */ + mutex_lock(&device->mutex); + + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED)) + _a5xx_preemption_fault(adreno_dev); + + mutex_unlock(&device->mutex); +} + +/* Find the highest priority active ringbuffer */ +static struct adreno_ringbuffer *a5xx_next_ringbuffer( + struct adreno_device *adreno_dev) +{ + struct adreno_ringbuffer *rb; + unsigned long flags; + unsigned int i; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + bool empty; + + spin_lock_irqsave(&rb->preempt_lock, flags); + empty = adreno_rb_empty(rb); + spin_unlock_irqrestore(&rb->preempt_lock, flags); + + if (!empty) + return rb; + } + + return NULL; +} + +void a5xx_preemption_trigger(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_ringbuffer *next; + uint64_t ttbr0; + unsigned int contextidr; + unsigned long flags; + + /* Put ourselves into a possible trigger state */ + if (!adreno_move_preempt_state(adreno_dev, + ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START)) + return; + + /* Get the next ringbuffer to preempt in */ + next = a5xx_next_ringbuffer(adreno_dev); + + /* + * Nothing to do if every ringbuffer is empty or if the current + * ringbuffer is the only active one + */ + if (next == NULL || next == adreno_dev->cur_rb) { + /* + * Update any critical things that might have been skipped while + * we were looking for a new ringbuffer + */ + + if (next != NULL) { + _update_wptr(adreno_dev, false); + + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + } + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + return; + } + + /* Turn off the dispatcher timer */ + del_timer(&adreno_dev->dispatcher.timer); + + /* + * This is the most critical section - we need to take care not to race + * until we have programmed the CP for the switch + */ + + spin_lock_irqsave(&next->preempt_lock, flags); + + /* + * Get the pagetable from the pagetable info. + * The pagetable_desc is allocated and mapped at probe time, and + * preemption_desc at init time, so no need to check if + * sharedmem accesses to these memdescs succeed. + */ + kgsl_sharedmem_readq(next->pagetable_desc, &ttbr0, + PT_INFO_OFFSET(ttbr0)); + kgsl_sharedmem_readl(next->pagetable_desc, &contextidr, + PT_INFO_OFFSET(contextidr)); + + kgsl_sharedmem_writel(next->preemption_desc, + PREEMPT_RECORD(wptr), next->wptr); + + spin_unlock_irqrestore(&next->preempt_lock, flags); + + /* And write it to the smmu info */ + if (kgsl_mmu_is_perprocess(&device->mmu)) { + kgsl_sharedmem_writeq(iommu->smmu_info, + PREEMPT_SMMU_RECORD(ttbr0), ttbr0); + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(context_idr), contextidr); + } + + kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO, + lower_32_bits(next->preemption_desc->gpuaddr)); + kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI, + upper_32_bits(next->preemption_desc->gpuaddr)); + + adreno_dev->next_rb = next; + + /* Start the timer to detect a stuck preemption */ + mod_timer(&adreno_dev->preempt.timer, + jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT)); + + trace_adreno_preempt_trigger(adreno_dev->cur_rb, adreno_dev->next_rb, + 1); + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED); + + /* Trigger the preemption */ + kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_CNTL, 1); +} + +void a5xx_preempt_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status; + + if (!adreno_move_preempt_state(adreno_dev, + ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING)) + return; + + kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status); + + if (status != 0) { + dev_err(KGSL_DEVICE(adreno_dev)->dev, + "preempt interrupt with non-zero status: %X\n", + status); + + /* + * Under the assumption that this is a race between the + * interrupt and the register, schedule the worker to clean up. + * If the status still hasn't resolved itself by the time we get + * there then we have to assume something bad happened + */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); + adreno_dispatcher_schedule(device); + return; + } + + del_timer(&adreno_dev->preempt.timer); + + trace_adreno_preempt_done(adreno_dev->cur_rb, adreno_dev->next_rb, 0); + + adreno_dev->prev_rb = adreno_dev->cur_rb; + adreno_dev->cur_rb = adreno_dev->next_rb; + adreno_dev->next_rb = NULL; + + /* Update the wptr if it changed while preemption was ongoing */ + _update_wptr(adreno_dev, true); + + /* Update the dispatcher timer for the new command queue */ + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + + a5xx_preemption_trigger(adreno_dev); +} + +void a5xx_preemption_schedule(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + mutex_lock(&device->mutex); + + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE)) + _a5xx_preemption_done(adreno_dev); + + a5xx_preemption_trigger(adreno_dev); + + mutex_unlock(&device->mutex); +} + +u32 a5xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt, u32 *cmds) +{ + unsigned int *cmds_orig = cmds; + uint64_t gpuaddr = rb->preemption_desc->gpuaddr; + unsigned int preempt_style = 0; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + if (drawctxt) { + /* + * Preemption from secure to unsecure needs Zap shader to be + * run to clear all secure content. CP does not know during + * preemption if it is switching between secure and unsecure + * contexts so restrict Secure contexts to be preempted at + * ringbuffer level. + */ + if (drawctxt->base.flags & KGSL_CONTEXT_SECURE) + preempt_style = KGSL_CONTEXT_PREEMPT_STYLE_RINGBUFFER; + else + preempt_style = FIELD_GET(KGSL_CONTEXT_PREEMPT_STYLE_MASK, + drawctxt->base.flags); + } + + /* + * CP_PREEMPT_ENABLE_GLOBAL(global preemption) can only be set by KMD + * in ringbuffer. + * 1) set global preemption to 0x0 to disable global preemption. + * Only RB level preemption is allowed in this mode + * 2) Set global preemption to defer(0x2) for finegrain preemption. + * when global preemption is set to defer(0x2), + * CP_PREEMPT_ENABLE_LOCAL(local preemption) determines the + * preemption point. Local preemption + * can be enabled by both UMD(within IB) and KMD. + */ + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_GLOBAL, 1); + *cmds++ = ((preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN) + ? 2 : 0); + + /* Turn CP protection OFF */ + cmds += cp_protected_mode(adreno_dev, cmds, 0); + + /* + * CP during context switch will save context switch info to + * a5xx_cp_preemption_record pointed by CONTEXT_SWITCH_SAVE_ADDR + */ + *cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 1); + *cmds++ = lower_32_bits(gpuaddr); + *cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI, 1); + *cmds++ = upper_32_bits(gpuaddr); + + /* Turn CP protection ON */ + cmds += cp_protected_mode(adreno_dev, cmds, 1); + + /* + * Enable local preemption for finegrain preemption in case of + * a misbehaving IB + */ + if (preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN) { + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1); + *cmds++ = 1; + } else { + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1); + *cmds++ = 0; + } + + /* Enable CP_CONTEXT_SWITCH_YIELD packets in the IB2s */ + *cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1); + *cmds++ = 2; + + return (unsigned int) (cmds - cmds_orig); +} + +unsigned int a5xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev, + unsigned int *cmds) +{ + int dwords = 0; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + cmds[dwords++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4); + /* Write NULL to the address to skip the data write */ + dwords += cp_gpuaddr(adreno_dev, &cmds[dwords], 0x0); + cmds[dwords++] = 1; + /* generate interrupt on preemption completion */ + cmds[dwords++] = 1; + + return dwords; +} + +void a5xx_preemption_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_ringbuffer *rb; + unsigned int i; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + /* Force the state to be clear */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + + /* Only set up smmu info when per-process pagetables are enabled */ + + if (kgsl_mmu_is_perprocess(&device->mmu)) { + /* smmu_info is allocated and mapped in a5xx_preemption_iommu_init */ + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(magic), A5XX_CP_SMMU_INFO_MAGIC_REF); + kgsl_sharedmem_writeq(iommu->smmu_info, + PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device)); + + /* The CP doesn't use the asid record, so poison it */ + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(asid), 0xDECAFBAD); + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(context_idr), 0); + + kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + lower_32_bits(iommu->smmu_info->gpuaddr)); + + kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, + upper_32_bits(iommu->smmu_info->gpuaddr)); + } + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + /* + * preemption_desc is allocated and mapped at init time, + * so no need to check sharedmem_writel return value + */ + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(rptr), 0); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(wptr), 0); + + adreno_ringbuffer_set_pagetable(rb, + device->mmu.defaultpagetable); + } + +} + +static int a5xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, uint64_t counteraddr) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (IS_ERR_OR_NULL(rb->preemption_desc)) + rb->preemption_desc = kgsl_allocate_global(device, + A5XX_CP_CTXRECORD_SIZE_IN_BYTES, SZ_16K, 0, + KGSL_MEMDESC_PRIVILEGED, "preemption_desc"); + + if (IS_ERR(rb->preemption_desc)) + return PTR_ERR(rb->preemption_desc); + + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(magic), A5XX_CP_CTXRECORD_MAGIC_REF); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(info), 0); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(data), 0); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(cntl), A5XX_CP_RB_CNTL_DEFAULT); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(rptr), 0); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(wptr), 0); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(rptr_addr), SCRATCH_RPTR_GPU_ADDR(device, + rb->id)); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(counter), counteraddr); + + return 0; +} + +int a5xx_preemption_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_preemption *preempt = &adreno_dev->preempt; + struct adreno_ringbuffer *rb; + int ret; + unsigned int i; + uint64_t addr; + + /* We are dependent on IOMMU to make preemption go on the CP side */ + if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU) + return -ENODEV; + + INIT_WORK(&preempt->work, _a5xx_preemption_worker); + + /* Allocate mem for storing preemption counters */ + if (IS_ERR_OR_NULL(preempt->scratch)) + preempt->scratch = kgsl_allocate_global(device, + adreno_dev->num_ringbuffers * + A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE, 0, 0, 0, + "preemption_counters"); + + ret = PTR_ERR_OR_ZERO(preempt->scratch); + if (ret) + return ret; + + addr = preempt->scratch->gpuaddr; + + /* Allocate mem for storing preemption switch record */ + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + ret = a5xx_preemption_ringbuffer_init(adreno_dev, rb, addr); + if (ret) + return ret; + + addr += A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE; + } + + /* Allocate mem for storing preemption smmu record */ + if (kgsl_mmu_is_perprocess(&device->mmu) && IS_ERR_OR_NULL(iommu->smmu_info)) + iommu->smmu_info = kgsl_allocate_global(device, PAGE_SIZE, 0, + KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED, + "smmu_info"); + + if (IS_ERR(iommu->smmu_info)) + return PTR_ERR(iommu->smmu_info); + + set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); + return 0; +} diff --git a/adreno_a5xx_ringbuffer.c b/adreno_a5xx_ringbuffer.c new file mode 100644 index 0000000000..fb973e9412 --- /dev/null +++ b/adreno_a5xx_ringbuffer.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_a5xx.h" +#include "adreno_pm4types.h" +#include "adreno_ringbuffer.h" +#include "adreno_trace.h" +#include "kgsl_trace.h" + +static int a5xx_rb_pagetable_switch(struct kgsl_device *device, + struct adreno_context *drawctxt, + struct adreno_ringbuffer *rb, + struct kgsl_pagetable *pagetable, u32 *cmds) +{ + u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable); + u32 id = drawctxt ? drawctxt->base.id : 0; + + if (pagetable == device->mmu.defaultpagetable) + return 0; + + cmds[0] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3); + cmds[1] = lower_32_bits(ttbr0); + cmds[2] = upper_32_bits(ttbr0); + cmds[3] = id; + + cmds[4] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + cmds[5] = cp_type7_packet(CP_WAIT_FOR_ME, 0); + cmds[6] = cp_type4_packet(A5XX_CP_CNTL, 1); + cmds[7] = 1; + + cmds[8] = cp_type7_packet(CP_MEM_WRITE, 5); + cmds[9] = lower_32_bits(rb->pagetable_desc->gpuaddr + + PT_INFO_OFFSET(ttbr0)); + cmds[10] = upper_32_bits(rb->pagetable_desc->gpuaddr + + PT_INFO_OFFSET(ttbr0)); + cmds[11] = lower_32_bits(ttbr0); + cmds[12] = upper_32_bits(ttbr0); + cmds[13] = id; + + cmds[14] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + cmds[15] = cp_type7_packet(CP_WAIT_FOR_ME, 0); + cmds[16] = cp_type4_packet(A5XX_CP_CNTL, 1); + cmds[17] = 0; + + return 18; +} + +#define RB_SOPTIMESTAMP(device, rb) \ + MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp) +#define CTXT_SOPTIMESTAMP(device, drawctxt) \ + MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp) + +#define RB_EOPTIMESTAMP(device, rb) \ + MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp) +#define CTXT_EOPTIMESTAMP(device, drawctxt) \ + MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp) + +int a5xx_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time, bool sync) +{ + struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned long flags; + + adreno_get_submit_time(adreno_dev, rb, time); + adreno_profile_submit_time(time); + + if (sync) { + u32 *cmds = adreno_ringbuffer_allocspace(rb, 3); + + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + cmds[0] = cp_type7_packet(CP_WHERE_AM_I, 2); + cmds[1] = lower_32_bits(SCRATCH_RPTR_GPU_ADDR(device, rb->id)); + cmds[2] = upper_32_bits(SCRATCH_RPTR_GPU_ADDR(device, rb->id)); + } + + spin_lock_irqsave(&rb->preempt_lock, flags); + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) { + if (adreno_dev->cur_rb == rb) { + kgsl_pwrscale_busy(device); + kgsl_regwrite(device, A5XX_CP_RB_WPTR, rb->_wptr); + } + } + + rb->wptr = rb->_wptr; + spin_unlock_irqrestore(&rb->preempt_lock, flags); + + return 0; +} + +int a5xx_ringbuffer_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int i; + + if (IS_ERR_OR_NULL(device->scratch)) + device->scratch = kgsl_allocate_global(device, PAGE_SIZE, + 0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED, + "scratch"); + + if (IS_ERR(device->scratch)) + return PTR_ERR(device->scratch); + + adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) { + adreno_dev->num_ringbuffers = 1; + return adreno_ringbuffer_setup(adreno_dev, + &adreno_dev->ringbuffers[0], 0); + } + + adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers); + + for (i = 0; i < adreno_dev->num_ringbuffers; i++) { + int ret; + + ret = adreno_ringbuffer_setup(adreno_dev, + &adreno_dev->ringbuffers[i], i); + if (ret) + return ret; + } + + timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0); + a5xx_preemption_init(adreno_dev); + return 0; +} + +#define A5XX_SUBMIT_MAX 64 + +int a5xx_ringbuffer_addcmds(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 flags, u32 *in, u32 dwords, u32 timestamp, + struct adreno_submit_time *time) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + static u32 sequence; + u32 size = A5XX_SUBMIT_MAX + dwords; + u32 *cmds, index = 0; + u64 profile_gpuaddr; + u32 profile_dwords; + + if (adreno_drawctxt_detached(drawctxt)) + return -ENOENT; + + if (adreno_gpu_fault(adreno_dev) != 0) + return -EPROTO; + + rb->timestamp++; + + if (drawctxt) + drawctxt->internal_timestamp = rb->timestamp; + + cmds = adreno_ringbuffer_allocspace(rb, size); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + /* Identify the start of a command */ + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER; + + /* 14 dwords */ + index += a5xx_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt, + &cmds[index]); + + profile_gpuaddr = adreno_profile_preib_processing(adreno_dev, + drawctxt, &profile_dwords); + + if (profile_gpuaddr) { + cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(profile_gpuaddr); + cmds[index++] = upper_32_bits(profile_gpuaddr); + cmds[index++] = profile_dwords; + } + + if (drawctxt) { + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = timestamp; + } + + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + + if (IS_SECURE(flags)) { + cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1); + cmds[index++] = 1; + } + + if (IS_NOTPROTECTED(flags)) { + cmds[index++] = cp_type7_packet(CP_SET_PROTECTED_MODE, 1); + cmds[index++] = 0; + } + + memcpy(&cmds[index], in, dwords << 2); + index += dwords; + + if (IS_NOTPROTECTED(flags)) { + cmds[index++] = cp_type7_packet(CP_SET_PROTECTED_MODE, 1); + cmds[index++] = 1; + } + + /* 4 dwords */ + profile_gpuaddr = adreno_profile_postib_processing(adreno_dev, + drawctxt, &profile_dwords); + + if (profile_gpuaddr) { + cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(profile_gpuaddr); + cmds[index++] = upper_32_bits(profile_gpuaddr); + cmds[index++] = profile_dwords; + } + + if (!adreno_is_a510(adreno_dev) && + test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, + &device->mmu.pfpolicy)) + cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0); + + /* + * Do a unique memory write from the GPU to assist in early detection of + * interrupt storms + */ + + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[index++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, ref_wait_ts)); + cmds[index++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, ref_wait_ts)); + cmds[index++] = ++sequence; + + /* + * If this is an internal command, just write the ringbuffer timestamp, + * otherwise, write both + */ + if (!drawctxt) { + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_FLUSH_TS | (1 << 31); + cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + } else { + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_FLUSH_TS | (1 << 31); + cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = timestamp; + + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_FLUSH_TS; + cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + } + + if (IS_WFI(flags)) + cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + + if (IS_SECURE(flags)) { + cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1); + cmds[index++] = 0; + } + + /* 5 dwords */ + index += a5xx_preemption_post_ibsubmit(adreno_dev, &cmds[index]); + + /* Adjust the thing for the number of bytes we actually wrote */ + rb->_wptr -= (size - index); + + a5xx_ringbuffer_submit(rb, time, + !adreno_is_preemption_enabled(adreno_dev)); + + return 0; +} + +static u32 a5xx_get_alwayson_counter(struct adreno_device *adreno_dev, + u32 *cmds, u64 gpuaddr) +{ + cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3); + cmds[1] = A5XX_RBBM_ALWAYSON_COUNTER_LO; + + /* On some targets the upper 32 bits are not reliable */ + if (ADRENO_GPUREV(adreno_dev) > ADRENO_REV_A530) + cmds[1] |= (1 << 30) | (2 << 18); + + cmds[2] = lower_32_bits(gpuaddr); + cmds[3] = upper_32_bits(gpuaddr); + + return 4; +} + +/* This is the maximum possible size for 64 bit targets */ +#define PROFILE_IB_DWORDS 4 +#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2)) + +static u64 a5xx_get_user_profiling_ib(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct kgsl_drawobj_cmd *cmdobj, + u32 target_offset, u32 *cmds) +{ + u32 offset = rb->profile_index * (PROFILE_IB_DWORDS << 2); + u32 *ib = rb->profile_desc->hostptr + offset; + u64 gpuaddr = rb->profile_desc->gpuaddr + offset; + u32 dwords = a5xx_get_alwayson_counter(adreno_dev, ib, + cmdobj->profiling_buffer_gpuaddr + target_offset); + + cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[1] = lower_32_bits(gpuaddr); + cmds[2] = upper_32_bits(gpuaddr); + cmds[3] = dwords; + + rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS; + + return 4; +} + +static int a5xx_rb_context_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_pagetable *pagetable = + adreno_drawctxt_get_pagetable(drawctxt); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int count = 0; + u32 cmds[32]; + + if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) + count += a5xx_rb_pagetable_switch(device, drawctxt, + rb, pagetable, cmds); + + cmds[count++] = cp_type7_packet(CP_NOP, 1); + cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER; + + cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, + current_context)); + cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, + current_context)); + cmds[count++] = drawctxt->base.id; + + cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, current_context)); + cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, current_context)); + cmds[count++] = drawctxt->base.id; + + cmds[count++] = cp_type4_packet(A5XX_UCHE_INVALIDATE0, 1); + cmds[count++] = 0x12; + + return a5xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, + cmds, count, 0, NULL); +} + +static int a5xx_drawctxt_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (rb->drawctxt_active == drawctxt) + return 0; + + if (kgsl_context_detached(&drawctxt->base)) + return -ENOENT; + + if (!_kgsl_context_get(&drawctxt->base)) + return -ENOENT; + + trace_adreno_drawctxt_switch(rb, drawctxt); + + a5xx_rb_context_switch(adreno_dev, rb, drawctxt); + + /* Release the current drawctxt as soon as the new one is switched */ + adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active, + rb, rb->timestamp); + + rb->drawctxt_active = drawctxt; + return 0; +} + + +#define A5XX_USER_PROFILE_IB(dev, rb, cmdobj, cmds, field) \ + a5xx_get_user_profiling_ib((dev), (rb), (cmdobj), \ + offsetof(struct kgsl_drawobj_profiling_buffer, field), \ + (cmds)) + +#define A5XX_KERNEL_PROFILE(dev, cmdobj, cmds, field) \ + a5xx_get_alwayson_counter((dev), (cmds), \ + (dev)->profile_buffer->gpuaddr + \ + ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \ + field)) + +#define A5XX_COMMAND_DWORDS 32 + +int a5xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + struct adreno_ringbuffer *rb = drawctxt->rb; + int ret = 0, numibs = 0, index = 0; + u32 *cmds; + + /* Count the number of IBs (if we are not skipping) */ + if (!IS_SKIP(flags)) { + struct list_head *tmp; + + list_for_each(tmp, &cmdobj->cmdlist) + numibs++; + } + + cmds = kmalloc((A5XX_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL); + if (!cmds) { + ret = -ENOMEM; + goto done; + } + + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = START_IB_IDENTIFIER; + + /* Kernel profiling: 4 dwords */ + if (IS_KERNEL_PROFILE(flags)) + index += A5XX_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index], + started); + + /* User profiling: 4 dwords */ + if (IS_USER_PROFILE(flags)) + index += A5XX_USER_PROFILE_IB(adreno_dev, rb, cmdobj, + &cmds[index], gpu_ticks_submitted); + + if (numibs) { + struct kgsl_memobj_node *ib; + + list_for_each_entry(ib, &cmdobj->cmdlist, node) { + if (ib->priv & MEMOBJ_SKIP || + (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE + && !IS_PREAMBLE(flags))) + cmds[index++] = cp_type7_packet(CP_NOP, 4); + + cmds[index++] = + cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(ib->gpuaddr); + cmds[index++] = upper_32_bits(ib->gpuaddr); + + /* Double check that IB_PRIV is never set */ + cmds[index++] = (ib->size >> 2) & 0xfffff; + } + } + + /* + * SRM -- set render mode (ex binning, direct render etc) + * SRM is set by UMD usually at start of IB to tell CP the type of + * preemption. + * KMD needs to set SRM to NULL to indicate CP that rendering is + * done by IB. + */ + cmds[index++] = cp_type7_packet(CP_SET_RENDER_MODE, 5); + cmds[index++] = 0; + cmds[index++] = 0; + cmds[index++] = 0; + cmds[index++] = 0; + cmds[index++] = 0; + + cmds[index++] = cp_type7_packet(CP_YIELD_ENABLE, 1); + cmds[index++] = 1; + + /* 4 dwords */ + if (IS_KERNEL_PROFILE(flags)) + index += A5XX_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index], + retired); + + /* 4 dwords */ + if (IS_USER_PROFILE(flags)) + index += A5XX_USER_PROFILE_IB(adreno_dev, rb, cmdobj, + &cmds[index], gpu_ticks_retired); + + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = END_IB_IDENTIFIER; + + ret = a5xx_drawctxt_switch(adreno_dev, rb, drawctxt); + + /* + * In the unlikely event of an error in the drawctxt switch, + * treat it like a hang + */ + if (ret) { + /* + * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it, + * the upper layers know how to handle it + */ + if (ret != -ENOSPC && ret != -ENOENT) + dev_err(device->dev, + "Unable to switch draw context: %d\n", + ret); + goto done; + } + + adreno_drawobj_set_constraint(device, drawobj); + + ret = a5xx_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt, + flags, cmds, index, drawobj->timestamp, time); + +done: + trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs, + drawobj->timestamp, drawobj->flags, ret, drawctxt->type); + + kfree(cmds); + return ret; +} diff --git a/adreno_a5xx_snapshot.c b/adreno_a5xx_snapshot.c new file mode 100644 index 0000000000..a871e5e980 --- /dev/null +++ b/adreno_a5xx_snapshot.c @@ -0,0 +1,1220 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2015-2020, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_a5xx.h" +#include "adreno_snapshot.h" + +enum a5xx_rbbm_debbus_id { + A5XX_RBBM_DBGBUS_CP = 0x1, + A5XX_RBBM_DBGBUS_RBBM = 0x2, + A5XX_RBBM_DBGBUS_VBIF = 0x3, + A5XX_RBBM_DBGBUS_HLSQ = 0x4, + A5XX_RBBM_DBGBUS_UCHE = 0x5, + A5XX_RBBM_DBGBUS_DPM = 0x6, + A5XX_RBBM_DBGBUS_TESS = 0x7, + A5XX_RBBM_DBGBUS_PC = 0x8, + A5XX_RBBM_DBGBUS_VFDP = 0x9, + A5XX_RBBM_DBGBUS_VPC = 0xa, + A5XX_RBBM_DBGBUS_TSE = 0xb, + A5XX_RBBM_DBGBUS_RAS = 0xc, + A5XX_RBBM_DBGBUS_VSC = 0xd, + A5XX_RBBM_DBGBUS_COM = 0xe, + A5XX_RBBM_DBGBUS_DCOM = 0xf, + A5XX_RBBM_DBGBUS_LRZ = 0x10, + A5XX_RBBM_DBGBUS_A2D_DSP = 0x11, + A5XX_RBBM_DBGBUS_CCUFCHE = 0x12, + A5XX_RBBM_DBGBUS_GPMU = 0x13, + A5XX_RBBM_DBGBUS_RBP = 0x14, + A5XX_RBBM_DBGBUS_HM = 0x15, + A5XX_RBBM_DBGBUS_RBBM_CFG = 0x16, + A5XX_RBBM_DBGBUS_VBIF_CX = 0x17, + A5XX_RBBM_DBGBUS_GPC = 0x1d, + A5XX_RBBM_DBGBUS_LARC = 0x1e, + A5XX_RBBM_DBGBUS_HLSQ_SPTP = 0x1f, + A5XX_RBBM_DBGBUS_RB_0 = 0x20, + A5XX_RBBM_DBGBUS_RB_1 = 0x21, + A5XX_RBBM_DBGBUS_RB_2 = 0x22, + A5XX_RBBM_DBGBUS_RB_3 = 0x23, + A5XX_RBBM_DBGBUS_CCU_0 = 0x28, + A5XX_RBBM_DBGBUS_CCU_1 = 0x29, + A5XX_RBBM_DBGBUS_CCU_2 = 0x2a, + A5XX_RBBM_DBGBUS_CCU_3 = 0x2b, + A5XX_RBBM_DBGBUS_A2D_RAS_0 = 0x30, + A5XX_RBBM_DBGBUS_A2D_RAS_1 = 0x31, + A5XX_RBBM_DBGBUS_A2D_RAS_2 = 0x32, + A5XX_RBBM_DBGBUS_A2D_RAS_3 = 0x33, + A5XX_RBBM_DBGBUS_VFD_0 = 0x38, + A5XX_RBBM_DBGBUS_VFD_1 = 0x39, + A5XX_RBBM_DBGBUS_VFD_2 = 0x3a, + A5XX_RBBM_DBGBUS_VFD_3 = 0x3b, + A5XX_RBBM_DBGBUS_SP_0 = 0x40, + A5XX_RBBM_DBGBUS_SP_1 = 0x41, + A5XX_RBBM_DBGBUS_SP_2 = 0x42, + A5XX_RBBM_DBGBUS_SP_3 = 0x43, + A5XX_RBBM_DBGBUS_TPL1_0 = 0x48, + A5XX_RBBM_DBGBUS_TPL1_1 = 0x49, + A5XX_RBBM_DBGBUS_TPL1_2 = 0x4a, + A5XX_RBBM_DBGBUS_TPL1_3 = 0x4b +}; + +static const struct adreno_debugbus_block a5xx_debugbus_blocks[] = { + { A5XX_RBBM_DBGBUS_CP, 0x100, }, + { A5XX_RBBM_DBGBUS_RBBM, 0x100, }, + { A5XX_RBBM_DBGBUS_VBIF, 0x100, }, + { A5XX_RBBM_DBGBUS_HLSQ, 0x100, }, + { A5XX_RBBM_DBGBUS_UCHE, 0x100, }, + { A5XX_RBBM_DBGBUS_DPM, 0x100, }, + { A5XX_RBBM_DBGBUS_TESS, 0x100, }, + { A5XX_RBBM_DBGBUS_PC, 0x100, }, + { A5XX_RBBM_DBGBUS_VFDP, 0x100, }, + { A5XX_RBBM_DBGBUS_VPC, 0x100, }, + { A5XX_RBBM_DBGBUS_TSE, 0x100, }, + { A5XX_RBBM_DBGBUS_RAS, 0x100, }, + { A5XX_RBBM_DBGBUS_VSC, 0x100, }, + { A5XX_RBBM_DBGBUS_COM, 0x100, }, + { A5XX_RBBM_DBGBUS_DCOM, 0x100, }, + { A5XX_RBBM_DBGBUS_LRZ, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_DSP, 0x100, }, + { A5XX_RBBM_DBGBUS_CCUFCHE, 0x100, }, + { A5XX_RBBM_DBGBUS_GPMU, 0x100, }, + { A5XX_RBBM_DBGBUS_RBP, 0x100, }, + { A5XX_RBBM_DBGBUS_HM, 0x100, }, + { A5XX_RBBM_DBGBUS_RBBM_CFG, 0x100, }, + { A5XX_RBBM_DBGBUS_VBIF_CX, 0x100, }, + { A5XX_RBBM_DBGBUS_GPC, 0x100, }, + { A5XX_RBBM_DBGBUS_LARC, 0x100, }, + { A5XX_RBBM_DBGBUS_HLSQ_SPTP, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_0, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_1, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_2, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_3, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_0, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_1, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_2, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_3, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_0, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_1, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_2, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_3, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_0, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_1, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_2, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_3, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_0, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_1, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_2, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_3, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_0, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_1, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_2, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_3, 0x100, }, +}; + +#define A5XX_NUM_AXI_ARB_BLOCKS 2 +#define A5XX_NUM_XIN_BLOCKS 4 + +/* Width of A5XX_CP_DRAW_STATE_ADDR is 8 bits */ +#define A5XX_CP_DRAW_STATE_ADDR_WIDTH 8 + +/* a5xx_snapshot_cp_pm4() - Dump PM4 data in snapshot */ +static size_t a5xx_snapshot_cp_pm4(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4); + size_t size = fw->size; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP PM4 RAM DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_PM4_RAM; + header->size = size; + + memcpy(data, fw->memdesc->hostptr, size * sizeof(uint32_t)); + + return DEBUG_SECTION_SZ(size); +} + +/* a5xx_snapshot_cp_pfp() - Dump the PFP data on snapshot */ +static size_t a5xx_snapshot_cp_pfp(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP); + int size = fw->size; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP PFP RAM DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_PFP_RAM; + header->size = size; + + memcpy(data, fw->memdesc->hostptr, size * sizeof(uint32_t)); + + return DEBUG_SECTION_SZ(size); +} + +/* a5xx_rbbm_debug_bus_read() - Read data from trace bus */ +static void a5xx_rbbm_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index, unsigned int *val) +{ + unsigned int reg; + + reg = (block_id << A5XX_RBBM_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT) | + (index << A5XX_RBBM_CFG_DBGBUS_SEL_PING_INDEX_SHIFT); + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_A, reg); + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_B, reg); + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_C, reg); + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_D, reg); + + kgsl_regread(device, A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2, val); + val++; + kgsl_regread(device, A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1, val); + +} + +/* a5xx_snapshot_vbif_debugbus() - Dump the VBIF debug data */ +static size_t a5xx_snapshot_vbif_debugbus(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header = + (struct kgsl_snapshot_debugbus *)buf; + struct adreno_debugbus_block *block = priv; + int i, j; + /* + * Total number of VBIF data words considering 3 sections: + * 2 arbiter blocks of 16 words + * 4 AXI XIN blocks of 18 dwords each + * 4 core clock side XIN blocks of 12 dwords each + */ + unsigned int dwords = (16 * A5XX_NUM_AXI_ARB_BLOCKS) + + (18 * A5XX_NUM_XIN_BLOCKS) + (12 * A5XX_NUM_XIN_BLOCKS); + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + size_t size; + unsigned int reg_clk; + + size = (dwords * sizeof(unsigned int)) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + header->id = block->block_id; + header->count = dwords; + + kgsl_regread(device, A5XX_VBIF_CLKON, ®_clk); + kgsl_regwrite(device, A5XX_VBIF_CLKON, reg_clk | + (A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK << + A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT)); + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL0, 0); + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS_OUT_CTRL, + (A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK << + A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT)); + for (i = 0; i < A5XX_NUM_AXI_ARB_BLOCKS; i++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL0, + (1 << (i + 16))); + for (j = 0; j < 16; j++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL1, + ((j & A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK) + << A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT)); + kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT, + data); + data++; + } + } + + /* XIN blocks AXI side */ + for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL0, 1 << i); + for (j = 0; j < 18; j++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL1, + ((j & A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK) + << A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT)); + kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT, + data); + data++; + } + } + + /* XIN blocks core clock side */ + for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL0, 1 << i); + for (j = 0; j < 12; j++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL1, + ((j & A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK) + << A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT)); + kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT, + data); + data++; + } + } + /* restore the clock of VBIF */ + kgsl_regwrite(device, A5XX_VBIF_CLKON, reg_clk); + return size; +} + +/* a5xx_snapshot_debugbus_block() - Capture debug data for a gpu block */ +static size_t a5xx_snapshot_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header = + (struct kgsl_snapshot_debugbus *)buf; + struct adreno_debugbus_block *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int dwords; + size_t size; + + dwords = block->dwords; + + /* For a5xx each debug bus data unit is 2 DWRODS */ + size = (dwords * sizeof(unsigned int) * 2) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = block->block_id; + header->count = dwords * 2; + + for (i = 0; i < dwords; i++) + a5xx_rbbm_debug_bus_read(device, block->block_id, i, + &data[i*2]); + + return size; +} + +/* a5xx_snapshot_debugbus() - Capture debug bus data */ +static void a5xx_snapshot_debugbus(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + int i; + + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_CNTLM, + 0xf << A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT); + + for (i = 0; i < ARRAY_SIZE(a5xx_debugbus_blocks); i++) { + if (a5xx_debugbus_blocks[i].block_id == A5XX_RBBM_DBGBUS_VBIF) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, a5xx_snapshot_vbif_debugbus, + (void *) &a5xx_debugbus_blocks[i]); + else + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, a5xx_snapshot_debugbus_block, + (void *) &a5xx_debugbus_blocks[i]); + } +} + +static const unsigned int a5xx_vbif_registers[] = { + 0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x302C, 0x3030, 0x3030, + 0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, + 0x3042, 0x3042, 0x3049, 0x3049, 0x3058, 0x3058, 0x305A, 0x3061, + 0x3064, 0x3068, 0x306C, 0x306D, 0x3080, 0x3088, 0x308C, 0x308C, + 0x3090, 0x3094, 0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, + 0x30C8, 0x30C8, 0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, + 0x3100, 0x3100, 0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, + 0x3120, 0x3120, 0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, + 0x340C, 0x340C, 0x3410, 0x3410, 0x3800, 0x3801, +}; + +/* + * Set of registers to dump for A5XX on snapshot. + * Registers in pairs - first value is the start offset, second + * is the stop offset (inclusive) + */ + +static const unsigned int a5xx_registers[] = { + /* RBBM */ + 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B, + 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095, + 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3, + 0x04E0, 0x04F4, 0X04F8, 0x0529, 0x0531, 0x0533, 0x0540, 0x0555, + 0xF400, 0xF400, 0xF800, 0xF807, + /* CP */ + 0x0800, 0x0803, 0x0806, 0x081A, 0x081F, 0x0841, 0x0860, 0x0860, + 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0X0B1C, 0X0B1E, 0x0B28, + 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, + /* VSC */ + 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 0x0C60, 0x0C61, + /* GRAS */ + 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98, 0x0CA0, 0x0CA0, + 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585, + /* RB */ + 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, + 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, + 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545, + /* PC */ + 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0, + 0x24C0, 0x24C0, + /* VFD */ + 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57, + /* VPC */ + 0x0E60, 0x0E7C, + /* UCHE */ + 0x0E80, 0x0E8F, 0x0E90, 0x0E96, 0xEA0, 0xEA8, 0xEB0, 0xEB2, + + /* RB CTX 0 */ + 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, 0xE1B0, 0xE1B6, + 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201, 0xE210, 0xE21C, + 0xE240, 0xE268, + /* GRAS CTX 0 */ + 0xE000, 0xE006, 0xE010, 0xE09A, 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, + 0xE100, 0xE105, + /* PC CTX 0 */ + 0xE380, 0xE38F, 0xE3B0, 0xE3B0, + /* VFD CTX 0 */ + 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0, + /* VPC CTX 0 */ + 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, + + /* RB CTX 1 */ + 0xE940, 0xE947, 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, + 0xE9C0, 0xE9C7, 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, + 0xEA40, 0xEA68, + /* GRAS CTX 1 */ + 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, + 0xE900, 0xE905, + /* PC CTX 1 */ + 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, + /* VFD CTX 1 */ + 0xEC00, 0xEC05, 0xEC08, 0xECE9, 0xECF0, 0xECF0, + /* VPC CTX 1 */ + 0xEA80, 0xEA80, 0xEA82, 0xEAA3, 0xEAA5, 0xEAC2, +}; + +/* + * GPMU registers to dump for A5XX on snapshot. + * Registers in pairs - first value is the start offset, second + * is the stop offset (inclusive) + */ + +static const unsigned int a5xx_gpmu_registers[] = { + /* GPMU */ + 0xA800, 0xA8FF, 0xAC60, 0xAC60, +}; + +/* + * Set of registers to dump for A5XX before actually triggering crash dumper. + * Registers in pairs - first value is the start offset, second + * is the stop offset (inclusive) + */ +static const unsigned int a5xx_pre_crashdumper_registers[] = { + /* RBBM: RBBM_STATUS - RBBM_STATUS3 */ + 0x04F5, 0x04F7, 0x0530, 0x0530, + /* CP: CP_STATUS_1 */ + 0x0B1D, 0x0B1D, +}; + + +struct a5xx_hlsq_sp_tp_regs { + unsigned int statetype; + unsigned int ahbaddr; + unsigned int size; + uint64_t offset; +}; + +static struct a5xx_hlsq_sp_tp_regs a5xx_hlsq_sp_tp_registers[] = { + /* HSLQ non context. 0xe32 - 0xe3f are holes so don't include them */ + { 0x35, 0xE00, 0x32 }, + /* HLSQ CTX 0 2D */ + { 0x31, 0x2080, 0x1 }, + /* HLSQ CTX 1 2D */ + { 0x33, 0x2480, 0x1 }, + /* HLSQ CTX 0 3D. 0xe7e2 - 0xe7ff are holes so don't include them */ + { 0x32, 0xE780, 0x62 }, + /* HLSQ CTX 1 3D. 0xefe2 - 0xefff are holes so don't include them */ + { 0x34, 0xEF80, 0x62 }, + + /* SP non context */ + { 0x3f, 0x0EC0, 0x40 }, + /* SP CTX 0 2D */ + { 0x3d, 0x2040, 0x1 }, + /* SP CTX 1 2D */ + { 0x3b, 0x2440, 0x1 }, + /* SP CTX 0 3D */ + { 0x3e, 0xE580, 0x180 }, + /* SP CTX 1 3D */ + { 0x3c, 0xED80, 0x180 }, + + /* TP non context. 0x0f1c - 0x0f3f are holes so don't include them */ + { 0x3a, 0x0F00, 0x1c }, + /* TP CTX 0 2D. 0x200a - 0x200f are holes so don't include them */ + { 0x38, 0x2000, 0xa }, + /* TP CTX 1 2D. 0x240a - 0x240f are holes so don't include them */ + { 0x36, 0x2400, 0xa }, + /* TP CTX 0 3D */ + { 0x39, 0xE700, 0x80 }, + /* TP CTX 1 3D */ + { 0x37, 0xEF00, 0x80 }, +}; + + +#define A5XX_NUM_SHADER_BANKS 4 +#define A5XX_SHADER_STATETYPE_SHIFT 8 + +enum a5xx_shader_obj { + A5XX_TP_W_MEMOBJ = 1, + A5XX_TP_W_SAMPLER = 2, + A5XX_TP_W_MIPMAP_BASE = 3, + A5XX_TP_W_MEMOBJ_TAG = 4, + A5XX_TP_W_SAMPLER_TAG = 5, + A5XX_TP_S_3D_MEMOBJ = 6, + A5XX_TP_S_3D_SAMPLER = 0x7, + A5XX_TP_S_3D_MEMOBJ_TAG = 0x8, + A5XX_TP_S_3D_SAMPLER_TAG = 0x9, + A5XX_TP_S_CS_MEMOBJ = 0xA, + A5XX_TP_S_CS_SAMPLER = 0xB, + A5XX_TP_S_CS_MEMOBJ_TAG = 0xC, + A5XX_TP_S_CS_SAMPLER_TAG = 0xD, + A5XX_SP_W_INSTR = 0xE, + A5XX_SP_W_CONST = 0xF, + A5XX_SP_W_UAV_SIZE = 0x10, + A5XX_SP_W_CB_SIZE = 0x11, + A5XX_SP_W_UAV_BASE = 0x12, + A5XX_SP_W_CB_BASE = 0x13, + A5XX_SP_W_INST_TAG = 0x14, + A5XX_SP_W_STATE = 0x15, + A5XX_SP_S_3D_INSTR = 0x16, + A5XX_SP_S_3D_CONST = 0x17, + A5XX_SP_S_3D_CB_BASE = 0x18, + A5XX_SP_S_3D_CB_SIZE = 0x19, + A5XX_SP_S_3D_UAV_BASE = 0x1A, + A5XX_SP_S_3D_UAV_SIZE = 0x1B, + A5XX_SP_S_CS_INSTR = 0x1C, + A5XX_SP_S_CS_CONST = 0x1D, + A5XX_SP_S_CS_CB_BASE = 0x1E, + A5XX_SP_S_CS_CB_SIZE = 0x1F, + A5XX_SP_S_CS_UAV_BASE = 0x20, + A5XX_SP_S_CS_UAV_SIZE = 0x21, + A5XX_SP_S_3D_INSTR_DIRTY = 0x22, + A5XX_SP_S_3D_CONST_DIRTY = 0x23, + A5XX_SP_S_3D_CB_BASE_DIRTY = 0x24, + A5XX_SP_S_3D_CB_SIZE_DIRTY = 0x25, + A5XX_SP_S_3D_UAV_BASE_DIRTY = 0x26, + A5XX_SP_S_3D_UAV_SIZE_DIRTY = 0x27, + A5XX_SP_S_CS_INSTR_DIRTY = 0x28, + A5XX_SP_S_CS_CONST_DIRTY = 0x29, + A5XX_SP_S_CS_CB_BASE_DIRTY = 0x2A, + A5XX_SP_S_CS_CB_SIZE_DIRTY = 0x2B, + A5XX_SP_S_CS_UAV_BASE_DIRTY = 0x2C, + A5XX_SP_S_CS_UAV_SIZE_DIRTY = 0x2D, + A5XX_HLSQ_ICB = 0x2E, + A5XX_HLSQ_ICB_DIRTY = 0x2F, + A5XX_HLSQ_ICB_CB_BASE_DIRTY = 0x30, + A5XX_SP_POWER_RESTORE_RAM = 0x40, + A5XX_SP_POWER_RESTORE_RAM_TAG = 0x41, + A5XX_TP_POWER_RESTORE_RAM = 0x42, + A5XX_TP_POWER_RESTORE_RAM_TAG = 0x43, + +}; + +struct a5xx_shader_block { + unsigned int statetype; + unsigned int sz; + uint64_t offset; +}; + +struct a5xx_shader_block_info { + struct a5xx_shader_block *block; + unsigned int bank; + uint64_t offset; +}; + +static struct a5xx_shader_block a5xx_shader_blocks[] = { + {A5XX_TP_W_MEMOBJ, 0x200}, + {A5XX_TP_W_MIPMAP_BASE, 0x3C0}, + {A5XX_TP_W_SAMPLER_TAG, 0x40}, + {A5XX_TP_S_3D_SAMPLER, 0x80}, + {A5XX_TP_S_3D_SAMPLER_TAG, 0x20}, + {A5XX_TP_S_CS_SAMPLER, 0x40}, + {A5XX_TP_S_CS_SAMPLER_TAG, 0x10}, + {A5XX_SP_W_CONST, 0x800}, + {A5XX_SP_W_CB_SIZE, 0x30}, + {A5XX_SP_W_CB_BASE, 0xF0}, + {A5XX_SP_W_STATE, 0x1}, + {A5XX_SP_S_3D_CONST, 0x800}, + {A5XX_SP_S_3D_CB_SIZE, 0x28}, + {A5XX_SP_S_3D_UAV_SIZE, 0x80}, + {A5XX_SP_S_CS_CONST, 0x400}, + {A5XX_SP_S_CS_CB_SIZE, 0x8}, + {A5XX_SP_S_CS_UAV_SIZE, 0x80}, + {A5XX_SP_S_3D_CONST_DIRTY, 0x12}, + {A5XX_SP_S_3D_CB_SIZE_DIRTY, 0x1}, + {A5XX_SP_S_3D_UAV_SIZE_DIRTY, 0x2}, + {A5XX_SP_S_CS_CONST_DIRTY, 0xA}, + {A5XX_SP_S_CS_CB_SIZE_DIRTY, 0x1}, + {A5XX_SP_S_CS_UAV_SIZE_DIRTY, 0x2}, + {A5XX_HLSQ_ICB_DIRTY, 0xB}, + {A5XX_SP_POWER_RESTORE_RAM_TAG, 0xA}, + {A5XX_TP_POWER_RESTORE_RAM_TAG, 0xA}, + {A5XX_TP_W_SAMPLER, 0x80}, + {A5XX_TP_W_MEMOBJ_TAG, 0x40}, + {A5XX_TP_S_3D_MEMOBJ, 0x200}, + {A5XX_TP_S_3D_MEMOBJ_TAG, 0x20}, + {A5XX_TP_S_CS_MEMOBJ, 0x100}, + {A5XX_TP_S_CS_MEMOBJ_TAG, 0x10}, + {A5XX_SP_W_INSTR, 0x800}, + {A5XX_SP_W_UAV_SIZE, 0x80}, + {A5XX_SP_W_UAV_BASE, 0x80}, + {A5XX_SP_W_INST_TAG, 0x40}, + {A5XX_SP_S_3D_INSTR, 0x800}, + {A5XX_SP_S_3D_CB_BASE, 0xC8}, + {A5XX_SP_S_3D_UAV_BASE, 0x80}, + {A5XX_SP_S_CS_INSTR, 0x400}, + {A5XX_SP_S_CS_CB_BASE, 0x28}, + {A5XX_SP_S_CS_UAV_BASE, 0x80}, + {A5XX_SP_S_3D_INSTR_DIRTY, 0x1}, + {A5XX_SP_S_3D_CB_BASE_DIRTY, 0x5}, + {A5XX_SP_S_3D_UAV_BASE_DIRTY, 0x2}, + {A5XX_SP_S_CS_INSTR_DIRTY, 0x1}, + {A5XX_SP_S_CS_CB_BASE_DIRTY, 0x1}, + {A5XX_SP_S_CS_UAV_BASE_DIRTY, 0x2}, + {A5XX_HLSQ_ICB, 0x200}, + {A5XX_HLSQ_ICB_CB_BASE_DIRTY, 0x4}, + {A5XX_SP_POWER_RESTORE_RAM, 0x140}, + {A5XX_TP_POWER_RESTORE_RAM, 0x40}, +}; + +static struct kgsl_memdesc *capturescript; +static struct kgsl_memdesc *registers; +static bool crash_dump_valid; + +static size_t a5xx_snapshot_shader_memory(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_shader *header = + (struct kgsl_snapshot_shader *) buf; + struct a5xx_shader_block_info *info = + (struct a5xx_shader_block_info *) priv; + struct a5xx_shader_block *block = info->block; + unsigned int *data = (unsigned int *) (buf + sizeof(*header)); + + if (remain < SHADER_SECTION_SZ(block->sz)) { + SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); + return 0; + } + + header->type = block->statetype; + header->index = info->bank; + header->size = block->sz; + + memcpy(data, registers->hostptr + info->offset, + block->sz * sizeof(unsigned int)); + + return SHADER_SECTION_SZ(block->sz); +} + +static void a5xx_snapshot_shader(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + unsigned int i, j; + struct a5xx_shader_block_info info; + + /* Shader blocks can only be read by the crash dumper */ + if (!crash_dump_valid) + return; + + for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { + for (j = 0; j < A5XX_NUM_SHADER_BANKS; j++) { + info.block = &a5xx_shader_blocks[i]; + info.bank = j; + info.offset = a5xx_shader_blocks[i].offset + + (j * a5xx_shader_blocks[i].sz); + + /* Shader working/shadow memory */ + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SHADER, + snapshot, a5xx_snapshot_shader_memory, &info); + } + } +} + +/* Dump registers which get affected by crash dumper trigger */ +static size_t a5xx_snapshot_pre_crashdump_regs(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_registers pre_cdregs = { + .regs = a5xx_pre_crashdumper_registers, + .count = ARRAY_SIZE(a5xx_pre_crashdumper_registers)/2, + }; + + return kgsl_snapshot_dump_registers(device, buf, remain, &pre_cdregs); +} + +struct registers { + const unsigned int *regs; + size_t size; +}; + +static size_t a5xx_legacy_snapshot_registers(struct kgsl_device *device, + u8 *buf, size_t remain, const unsigned int *regs, size_t size) +{ + struct kgsl_snapshot_registers snapshot_regs = { + .regs = regs, + .count = size / 2, + }; + + return kgsl_snapshot_dump_registers(device, buf, remain, + &snapshot_regs); +} + +#define REG_PAIR_COUNT(_a, _i) \ + (((_a)[(2 * (_i)) + 1] - (_a)[2 * (_i)]) + 1) + +static size_t a5xx_snapshot_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int *src = (unsigned int *) registers->hostptr; + struct registers *regs = (struct registers *)priv; + unsigned int j, k; + unsigned int count = 0; + + if (!crash_dump_valid) + return a5xx_legacy_snapshot_registers(device, buf, remain, + regs->regs, regs->size); + + if (remain < sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + remain -= sizeof(*header); + + for (j = 0; j < regs->size / 2; j++) { + unsigned int start = regs->regs[2 * j]; + unsigned int end = regs->regs[(2 * j) + 1]; + + if (remain < ((end - start) + 1) * 8) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + goto out; + } + + remain -= ((end - start) + 1) * 8; + + for (k = start; k <= end; k++, count++) { + *data++ = k; + *data++ = *src++; + } + } + +out: + header->count = count; + + /* Return the size of the section */ + return (count * 8) + sizeof(*header); +} + +/* Snapshot a preemption record buffer */ +static size_t snapshot_preemption_record(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_memdesc *memdesc = priv; + + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)buf; + + u8 *ptr = buf + sizeof(*header); + + if (remain < (SZ_64K + sizeof(*header))) { + SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); + return 0; + } + + header->size = SZ_64K >> 2; + header->gpuaddr = memdesc->gpuaddr; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + memcpy(ptr, memdesc->hostptr, SZ_64K); + + return SZ_64K + sizeof(*header); +} + + +static void _a5xx_do_crashdump(struct kgsl_device *device) +{ + unsigned long wait_time; + unsigned int reg = 0; + unsigned int val; + + crash_dump_valid = false; + + if (!device->snapshot_crashdumper) + return; + + if (IS_ERR_OR_NULL(capturescript) || IS_ERR_OR_NULL(registers)) + return; + + /* IF the SMMU is stalled we cannot do a crash dump */ + kgsl_regread(device, A5XX_RBBM_STATUS3, &val); + if (val & BIT(24)) + return; + + /* Turn on APRIV so we can access the buffers */ + kgsl_regwrite(device, A5XX_CP_CNTL, 1); + + kgsl_regwrite(device, A5XX_CP_CRASH_SCRIPT_BASE_LO, + lower_32_bits(capturescript->gpuaddr)); + kgsl_regwrite(device, A5XX_CP_CRASH_SCRIPT_BASE_HI, + upper_32_bits(capturescript->gpuaddr)); + kgsl_regwrite(device, A5XX_CP_CRASH_DUMP_CNTL, 1); + + wait_time = jiffies + msecs_to_jiffies(CP_CRASH_DUMPER_TIMEOUT); + while (!time_after(jiffies, wait_time)) { + kgsl_regread(device, A5XX_CP_CRASH_DUMP_CNTL, ®); + if (reg & 0x4) + break; + cpu_relax(); + } + + kgsl_regwrite(device, A5XX_CP_CNTL, 0); + + if (!(reg & 0x4)) { + dev_err(device->dev, "Crash dump timed out: 0x%X\n", reg); + return; + } + + crash_dump_valid = true; +} + +static int get_hlsq_registers(struct kgsl_device *device, + const struct a5xx_hlsq_sp_tp_regs *regs, unsigned int *data) +{ + unsigned int i; + unsigned int *src = registers->hostptr + regs->offset; + + for (i = 0; i < regs->size; i++) { + *data++ = regs->ahbaddr + i; + *data++ = *(src + i); + } + + return (2 * regs->size); +} + +static size_t a5xx_snapshot_dump_hlsq_sp_tp_regs(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int count = 0, i; + + /* Figure out how many registers we are going to dump */ + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) + count += a5xx_hlsq_sp_tp_registers[i].size; + + if (remain < (count * 8) + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) + data += get_hlsq_registers(device, + &a5xx_hlsq_sp_tp_registers[i], data); + + header->count = count; + + /* Return the size of the section */ + return (count * 8) + sizeof(*header); +} + +static size_t a5xx_snapshot_cp_merciu(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int i, size; + + if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev) || + adreno_is_a540(adreno_dev) || adreno_is_a512(adreno_dev)) + size = 1024; + else if (adreno_is_a510(adreno_dev)) + size = 32; + else + size = 64; + + /* The MERCIU data is two dwords per entry */ + size = size << 1; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP MERCIU DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_MERCIU; + header->size = size; + + kgsl_regwrite(device, A5XX_CP_MERCIU_DBG_ADDR, 0); + + for (i = 0; i < size; i++) { + kgsl_regread(device, A5XX_CP_MERCIU_DBG_DATA_1, + &data[(i * 2)]); + kgsl_regread(device, A5XX_CP_MERCIU_DBG_DATA_2, + &data[(i * 2) + 1]); + } + + return DEBUG_SECTION_SZ(size); +} + +static size_t a5xx_snapshot_cp_roq(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf; + u32 size, *data = (u32 *) (buf + sizeof(*header)); + + if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev) || + adreno_is_a510(adreno_dev)) + size = 256; + else + size = 512; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP ROQ DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_ROQ; + header->size = size; + + kgsl_regmap_read_indexed(&device->regmap, A5XX_CP_ROQ_DBG_ADDR, + A5XX_CP_ROQ_DBG_DATA, data, size); + + return DEBUG_SECTION_SZ(size); +} + +static size_t a5xx_snapshot_cp_meq(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf; + u32 size, *data = (u32 *) (buf + sizeof(*header)); + + if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev) || + adreno_is_a510(adreno_dev)) + size = 32; + else + size = 64; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP MEQ DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_MEQ; + header->size = size; + + kgsl_regmap_read_indexed(&device->regmap, A5XX_CP_MEQ_DBG_ADDR, + A5XX_CP_MEQ_DBG_DATA, data, size); + + return DEBUG_SECTION_SZ(size); +} + +/* + * a5xx_snapshot() - A5XX GPU snapshot function + * @adreno_dev: Device being snapshotted + * @snapshot: Pointer to the snapshot instance + * + * This is where all of the A5XX specific bits and pieces are grabbed + * into the snapshot memory + */ +void a5xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int i; + u32 hi, lo; + struct adreno_ringbuffer *rb; + struct registers regs; + + /* Disable Clock gating temporarily for the debug bus to work */ + a5xx_hwcg_set(adreno_dev, false); + + /* Save some CP information that the generic snapshot uses */ + kgsl_regread(device, A5XX_CP_IB1_BASE, &lo); + kgsl_regread(device, A5XX_CP_IB1_BASE_HI, &hi); + + snapshot->ib1base = (((u64) hi) << 32) | lo; + + kgsl_regread(device, A5XX_CP_IB2_BASE, &lo); + kgsl_regread(device, A5XX_CP_IB2_BASE_HI, &hi); + + snapshot->ib2base = (((u64) hi) << 32) | lo; + + kgsl_regread(device, A5XX_CP_IB1_BUFSZ, &snapshot->ib1size); + kgsl_regread(device, A5XX_CP_IB2_BUFSZ, &snapshot->ib2size); + + /* Dump the registers which get affected by crash dumper trigger */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, + snapshot, a5xx_snapshot_pre_crashdump_regs, NULL); + + /* Dump vbif registers as well which get affected by crash dumper */ + SNAPSHOT_REGISTERS(device, snapshot, a5xx_vbif_registers); + + /* Try to run the crash dumper */ + _a5xx_do_crashdump(device); + + regs.regs = a5xx_registers; + regs.size = ARRAY_SIZE(a5xx_registers); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot, + a5xx_snapshot_registers, ®s); + + if (a5xx_has_gpmu(adreno_dev)) { + regs.regs = a5xx_gpmu_registers; + regs.size = ARRAY_SIZE(a5xx_gpmu_registers); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, + snapshot, a5xx_snapshot_registers, ®s); + } + + + /* Dump SP TP HLSQ registers */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot, + a5xx_snapshot_dump_hlsq_sp_tp_regs, NULL); + + /* CP_PFP indexed registers */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_PFP_STAT_ADDR, A5XX_CP_PFP_STAT_DATA, 0, 36); + + /* CP_ME indexed registers */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_ME_STAT_ADDR, A5XX_CP_ME_STAT_DATA, 0, 29); + + /* CP_DRAW_STATE */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_DRAW_STATE_ADDR, A5XX_CP_DRAW_STATE_DATA, + 0, 1 << A5XX_CP_DRAW_STATE_ADDR_WIDTH); + + /* ME_UCODE Cache */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_ME_UCODE_DBG_ADDR, A5XX_CP_ME_UCODE_DBG_DATA, + 0, 0x53F); + + /* PFP_UCODE Cache */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_PFP_UCODE_DBG_ADDR, A5XX_CP_PFP_UCODE_DBG_DATA, + 0, 0x53F); + + /* CP MEQ */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a5xx_snapshot_cp_meq, NULL); + + /* CP ROQ */ + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a5xx_snapshot_cp_roq, NULL); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a5xx_snapshot_cp_merciu, NULL); + + /* CP PFP and PM4 */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a5xx_snapshot_cp_pfp, NULL); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a5xx_snapshot_cp_pm4, NULL); + + /* Shader memory */ + a5xx_snapshot_shader(device, snapshot); + + /* Debug bus */ + a5xx_snapshot_debugbus(device, snapshot); + + /* Preemption record */ + if (adreno_is_preemption_enabled(adreno_dev)) { + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, snapshot_preemption_record, + rb->preemption_desc); + } + } + +} + +static int _a5xx_crashdump_init_shader(struct a5xx_shader_block *block, + uint64_t *ptr, uint64_t *offset) +{ + int qwords = 0; + unsigned int j; + + /* Capture each bank in the block */ + for (j = 0; j < A5XX_NUM_SHADER_BANKS; j++) { + /* Program the aperture */ + ptr[qwords++] = + (block->statetype << A5XX_SHADER_STATETYPE_SHIFT) | j; + ptr[qwords++] = (((uint64_t) A5XX_HLSQ_DBG_READ_SEL << 44)) | + (1 << 21) | 1; + + /* Read all the data in one chunk */ + ptr[qwords++] = registers->gpuaddr + *offset; + ptr[qwords++] = + (((uint64_t) A5XX_HLSQ_DBG_AHB_READ_APERTURE << 44)) | + block->sz; + + /* Remember the offset of the first bank for easy access */ + if (j == 0) + block->offset = *offset; + + *offset += block->sz * sizeof(unsigned int); + } + + return qwords; +} + +static int _a5xx_crashdump_init_hlsq(struct a5xx_hlsq_sp_tp_regs *regs, + uint64_t *ptr, uint64_t *offset) +{ + int qwords = 0; + + /* Program the aperture */ + ptr[qwords++] = + (regs->statetype << A5XX_SHADER_STATETYPE_SHIFT); + ptr[qwords++] = (((uint64_t) A5XX_HLSQ_DBG_READ_SEL << 44)) | + (1 << 21) | 1; + + /* Read all the data in one chunk */ + ptr[qwords++] = registers->gpuaddr + *offset; + ptr[qwords++] = + (((uint64_t) A5XX_HLSQ_DBG_AHB_READ_APERTURE << 44)) | + regs->size; + + /* Remember the offset of the first bank for easy access */ + regs->offset = *offset; + + *offset += regs->size * sizeof(unsigned int); + + return qwords; +} + +void a5xx_crashdump_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int script_size = 0; + unsigned int data_size = 0; + unsigned int i, j; + uint64_t *ptr; + uint64_t offset = 0; + + if (!IS_ERR_OR_NULL(capturescript) && !IS_ERR_OR_NULL(registers)) + return; + + /* + * We need to allocate two buffers: + * 1 - the buffer to hold the draw script + * 2 - the buffer to hold the data + */ + + /* + * To save the registers, we need 16 bytes per register pair for the + * script and a dword for each register int the data + */ + + /* Each pair needs 16 bytes (2 qwords) */ + script_size += (ARRAY_SIZE(a5xx_registers) / 2) * 16; + + /* Each register needs a dword in the data */ + for (j = 0; j < ARRAY_SIZE(a5xx_registers) / 2; j++) + data_size += REG_PAIR_COUNT(a5xx_registers, j) * + sizeof(unsigned int); + + if (a5xx_has_gpmu(adreno_dev)) { + /* Each pair needs 16 bytes (2 qwords) */ + script_size += (ARRAY_SIZE(a5xx_gpmu_registers) / 2) * 16; + + /* Each register needs a dword in the data */ + for (j = 0; j < ARRAY_SIZE(a5xx_gpmu_registers) / 2; j++) + data_size += REG_PAIR_COUNT(a5xx_gpmu_registers, j) * + sizeof(unsigned int); + } + + /* + * To save the shader blocks for each block in each type we need 32 + * bytes for the script (16 bytes to program the aperture and 16 to + * read the data) and then a block specific number of bytes to hold + * the data + */ + for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { + script_size += 32 * A5XX_NUM_SHADER_BANKS; + data_size += a5xx_shader_blocks[i].sz * sizeof(unsigned int) * + A5XX_NUM_SHADER_BANKS; + } + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) { + script_size += 32; + data_size += + a5xx_hlsq_sp_tp_registers[i].size * sizeof(unsigned int); + } + + /* Now allocate the script and data buffers */ + + /* The script buffers needs 2 extra qwords on the end */ + if (!IS_ERR_OR_NULL(capturescript)) + capturescript = kgsl_allocate_global(device, + script_size + 16, 0, KGSL_MEMFLAGS_GPUREADONLY, + KGSL_MEMDESC_PRIVILEGED, "capturescript"); + + if (IS_ERR(capturescript)) + return; + + if (!IS_ERR_OR_NULL(registers)) + registers = kgsl_allocate_global(device, data_size, 0, 0, + KGSL_MEMDESC_PRIVILEGED, "capturescript_regs"); + + if (IS_ERR(registers)) + return; + + /* Build the crash script */ + + ptr = (uint64_t *) capturescript->hostptr; + + /* For the registers, program a read command for each pair */ + + for (j = 0; j < ARRAY_SIZE(a5xx_registers) / 2; j++) { + unsigned int r = REG_PAIR_COUNT(a5xx_registers, j); + *ptr++ = registers->gpuaddr + offset; + *ptr++ = (((uint64_t) a5xx_registers[2 * j]) << 44) + | r; + offset += r * sizeof(unsigned int); + } + + if (a5xx_has_gpmu(adreno_dev)) { + for (j = 0; j < ARRAY_SIZE(a5xx_gpmu_registers) / 2; j++) { + unsigned int r = REG_PAIR_COUNT(a5xx_gpmu_registers, j); + *ptr++ = registers->gpuaddr + offset; + *ptr++ = (((uint64_t) a5xx_gpmu_registers[2 * j]) << 44) + | r; + offset += r * sizeof(unsigned int); + } + } + + /* Program each shader block */ + for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { + ptr += _a5xx_crashdump_init_shader(&a5xx_shader_blocks[i], ptr, + &offset); + } + /* Program the hlsq sp tp register sets */ + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) + ptr += _a5xx_crashdump_init_hlsq(&a5xx_hlsq_sp_tp_registers[i], + ptr, &offset); + + *ptr++ = 0; + *ptr++ = 0; +} diff --git a/adreno_a6xx.c b/adreno_a6xx.c new file mode 100644 index 0000000000..44e69b9d94 --- /dev/null +++ b/adreno_a6xx.c @@ -0,0 +1,2774 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_a6xx.h" +#include "adreno_a6xx_hwsched.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" +#include "kgsl_trace.h" +#include "kgsl_util.h" + +/* IFPC & Preemption static powerup restore list */ +static u32 a6xx_pwrup_reglist[] = { + A6XX_VSC_ADDR_MODE_CNTL, + A6XX_GRAS_ADDR_MODE_CNTL, + A6XX_RB_ADDR_MODE_CNTL, + A6XX_PC_ADDR_MODE_CNTL, + A6XX_HLSQ_ADDR_MODE_CNTL, + A6XX_VFD_ADDR_MODE_CNTL, + A6XX_VPC_ADDR_MODE_CNTL, + A6XX_UCHE_ADDR_MODE_CNTL, + A6XX_SP_ADDR_MODE_CNTL, + A6XX_TPL1_ADDR_MODE_CNTL, + A6XX_UCHE_WRITE_RANGE_MAX_LO, + A6XX_UCHE_WRITE_RANGE_MAX_HI, + A6XX_UCHE_TRAP_BASE_LO, + A6XX_UCHE_TRAP_BASE_HI, + A6XX_UCHE_WRITE_THRU_BASE_LO, + A6XX_UCHE_WRITE_THRU_BASE_HI, + A6XX_UCHE_GMEM_RANGE_MIN_LO, + A6XX_UCHE_GMEM_RANGE_MIN_HI, + A6XX_UCHE_GMEM_RANGE_MAX_LO, + A6XX_UCHE_GMEM_RANGE_MAX_HI, + A6XX_UCHE_FILTER_CNTL, + A6XX_UCHE_CACHE_WAYS, + A6XX_UCHE_MODE_CNTL, + A6XX_RB_NC_MODE_CNTL, + A6XX_TPL1_NC_MODE_CNTL, + A6XX_SP_NC_MODE_CNTL, + A6XX_PC_DBG_ECO_CNTL, + A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, +}; + +/* IFPC only static powerup restore list */ +static u32 a6xx_ifpc_pwrup_reglist[] = { + A6XX_CP_CHICKEN_DBG, + A6XX_CP_DBG_ECO_CNTL, + A6XX_CP_PROTECT_CNTL, + A6XX_CP_PROTECT_REG, + A6XX_CP_PROTECT_REG+1, + A6XX_CP_PROTECT_REG+2, + A6XX_CP_PROTECT_REG+3, + A6XX_CP_PROTECT_REG+4, + A6XX_CP_PROTECT_REG+5, + A6XX_CP_PROTECT_REG+6, + A6XX_CP_PROTECT_REG+7, + A6XX_CP_PROTECT_REG+8, + A6XX_CP_PROTECT_REG+9, + A6XX_CP_PROTECT_REG+10, + A6XX_CP_PROTECT_REG+11, + A6XX_CP_PROTECT_REG+12, + A6XX_CP_PROTECT_REG+13, + A6XX_CP_PROTECT_REG+14, + A6XX_CP_PROTECT_REG+15, + A6XX_CP_PROTECT_REG+16, + A6XX_CP_PROTECT_REG+17, + A6XX_CP_PROTECT_REG+18, + A6XX_CP_PROTECT_REG+19, + A6XX_CP_PROTECT_REG+20, + A6XX_CP_PROTECT_REG+21, + A6XX_CP_PROTECT_REG+22, + A6XX_CP_PROTECT_REG+23, + A6XX_CP_PROTECT_REG+24, + A6XX_CP_PROTECT_REG+25, + A6XX_CP_PROTECT_REG+26, + A6XX_CP_PROTECT_REG+27, + A6XX_CP_PROTECT_REG+28, + A6XX_CP_PROTECT_REG+29, + A6XX_CP_PROTECT_REG+30, + A6XX_CP_PROTECT_REG+31, + A6XX_CP_AHB_CNTL, +}; + +/* Applicable to a620, a635, a650 and a660 */ +static u32 a650_pwrup_reglist[] = { + A6XX_CP_PROTECT_REG + 47, /* Programmed for infinite span */ + A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, + A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, + A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2, + A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3, + A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4, + A6XX_UCHE_CMDQ_CONFIG, +}; + +static u32 a615_pwrup_reglist[] = { + A6XX_UCHE_GBIF_GX_CONFIG, +}; + +int a6xx_fenced_write(struct adreno_device *adreno_dev, u32 offset, + u32 value, u32 mask) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status, i; + + kgsl_regwrite(device, offset, value); + + if (!gmu_core_isenabled(device)) + return 0; + + for (i = 0; i < GMU_CORE_LONG_WAKEUP_RETRY_LIMIT; i++) { + /* + * Make sure the previous register write is posted before + * checking the fence status + */ + mb(); + + kgsl_regread(device, A6XX_GMU_AHB_FENCE_STATUS, &status); + + /* + * If !writedropped0/1, then the write to fenced register + * was successful + */ + if (!(status & mask)) + break; + + /* Wait a small amount of time before trying again */ + udelay(GMU_CORE_WAKEUP_DELAY_US); + + /* Try to write the fenced register again */ + kgsl_regwrite(device, offset, value); + } + + if (i < GMU_CORE_SHORT_WAKEUP_RETRY_LIMIT) + return 0; + + if (i == GMU_CORE_LONG_WAKEUP_RETRY_LIMIT) { + dev_err(adreno_dev->dev.dev, + "Timed out waiting %d usecs to write fenced register 0x%x\n", + i * GMU_CORE_WAKEUP_DELAY_US, offset); + return -ETIMEDOUT; + } + + dev_err(adreno_dev->dev.dev, + "Waited %d usecs to write fenced register 0x%x\n", + i * GMU_CORE_WAKEUP_DELAY_US, offset); + + return 0; +} + +int a6xx_init(struct adreno_device *adreno_dev) +{ + const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + + adreno_dev->highest_bank_bit = a6xx_core->highest_bank_bit; + + adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev, + ADRENO_COOP_RESET); + + /* If the memory type is DDR 4, override the existing configuration */ + if (of_fdt_get_ddrtype() == 0x7) { + if (adreno_is_a660_shima(adreno_dev) || + adreno_is_a635(adreno_dev)) + adreno_dev->highest_bank_bit = 14; + else if ((adreno_is_a650(adreno_dev) || + adreno_is_a660(adreno_dev))) + adreno_dev->highest_bank_bit = 15; + } + + a6xx_crashdump_init(adreno_dev); + + return adreno_allocate_global(KGSL_DEVICE(adreno_dev), + &adreno_dev->pwrup_reglist, + PAGE_SIZE, 0, 0, KGSL_MEMDESC_PRIVILEGED, + "powerup_register_list"); +} + +static int a6xx_nogmu_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + ret = a6xx_ringbuffer_init(adreno_dev); + if (ret) + return ret; + + ret = a6xx_microcode_read(adreno_dev); + if (ret) + return ret; + + /* Try to map the GMU wrapper region if applicable */ + ret = kgsl_regmap_add_region(&device->regmap, device->pdev, + "gmu_wrapper", NULL, NULL); + if (ret && ret != -ENODEV) + dev_err(device->dev, "Couldn't map the GMU wrapper registers\n"); + + adreno_create_profile_buffer(adreno_dev); + + return a6xx_init(adreno_dev); +} + +static void a6xx_protect_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + const struct adreno_protected_regs *regs = a6xx_core->protected_regs; + int i; + + /* + * Enable access protection to privileged registers, fault on an access + * protect violation and select the last span to protect from the start + * address all the way to the end of the register address space + */ + kgsl_regwrite(device, A6XX_CP_PROTECT_CNTL, + (1 << 0) | (1 << 1) | (1 << 3)); + + /* Program each register defined by the core definition */ + for (i = 0; regs[i].reg; i++) { + u32 count; + + /* + * This is the offset of the end register as counted from the + * start, i.e. # of registers in the range - 1 + */ + count = regs[i].end - regs[i].start; + + kgsl_regwrite(device, regs[i].reg, + (regs[i].start & 0x3ffff) | ((count & 0x1fff) << 18) | + (regs[i].noaccess << 31)); + } +} + +static inline unsigned int +__get_rbbm_clock_cntl_on(struct adreno_device *adreno_dev) +{ + if (adreno_is_a630(adreno_dev)) + return 0x8AA8AA02; + else if (adreno_is_a612(adreno_dev) || adreno_is_a610(adreno_dev)) + return 0xAAA8AA82; + else + return 0x8AA8AA82; +} + +static inline unsigned int +__get_gmu_ao_cgc_mode_cntl(struct adreno_device *adreno_dev) +{ + if (adreno_is_a612(adreno_dev)) + return 0x00000022; + else if (adreno_is_a615_family(adreno_dev)) + return 0x00000222; + else if (adreno_is_a660(adreno_dev)) + return 0x00020000; + else + return 0x00020202; +} + +static inline unsigned int +__get_gmu_ao_cgc_delay_cntl(struct adreno_device *adreno_dev) +{ + if (adreno_is_a612(adreno_dev)) + return 0x00000011; + else if (adreno_is_a615_family(adreno_dev)) + return 0x00000111; + else + return 0x00010111; +} + +static inline unsigned int +__get_gmu_ao_cgc_hyst_cntl(struct adreno_device *adreno_dev) +{ + if (adreno_is_a612(adreno_dev)) + return 0x00000055; + else if (adreno_is_a615_family(adreno_dev)) + return 0x00000555; + else + return 0x00005555; +} + +static unsigned int __get_gmu_wfi_config(struct adreno_device *adreno_dev) +{ + if (adreno_is_a620(adreno_dev) || adreno_is_a640(adreno_dev) || + adreno_is_a650(adreno_dev)) + return 0x00000002; + + return 0x00000000; +} + +void a6xx_cx_regulator_disable_wait(struct regulator *reg, + struct kgsl_device *device, u32 timeout) +{ + if (!adreno_regulator_disable_poll(device, reg, A6XX_GPU_CC_CX_GDSCR, timeout)) { + dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); + /* Dump the cx regulator consumer list */ + qcom_clk_dump(NULL, reg, false); + } +} + +static void set_holi_sptprac_clock(struct kgsl_device *device, bool enable) +{ + u32 val = 0; + + kgsl_regread(device, A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, &val); + val &= ~1; + kgsl_regwrite(device, A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, + val | (enable ? 1 : 0)); +} + +static void a6xx_hwcg_set(struct adreno_device *adreno_dev, bool on) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + unsigned int value; + int i; + + if (!adreno_dev->hwcg_enabled) + on = false; + + if (gmu_core_isenabled(device)) { + gmu_core_regwrite(device, A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, + on ? __get_gmu_ao_cgc_mode_cntl(adreno_dev) : 0); + gmu_core_regwrite(device, A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, + on ? __get_gmu_ao_cgc_delay_cntl(adreno_dev) : 0); + gmu_core_regwrite(device, A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, + on ? __get_gmu_ao_cgc_hyst_cntl(adreno_dev) : 0); + gmu_core_regwrite(device, A6XX_GMU_CX_GMU_WFI_CONFIG, + on ? __get_gmu_wfi_config(adreno_dev) : 0); + } + + kgsl_regread(device, A6XX_RBBM_CLOCK_CNTL, &value); + + if (value == __get_rbbm_clock_cntl_on(adreno_dev) && on) + return; + + if (value == 0 && !on) + return; + + /* + * Disable SP clock before programming HWCG registers. + * A612 and A610 GPU is not having the GX power domain. + * Hence skip GMU_GX registers for A12 and A610. + */ + + if (gmu_core_isenabled(device) && !adreno_is_a612(adreno_dev) && + !adreno_is_a610(adreno_dev)) + gmu_core_regrmw(device, + A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); + else if (adreno_is_a619_holi(adreno_dev)) + set_holi_sptprac_clock(device, false); + + for (i = 0; i < a6xx_core->hwcg_count; i++) + kgsl_regwrite(device, a6xx_core->hwcg[i].offset, + on ? a6xx_core->hwcg[i].val : 0); + + /* + * Enable SP clock after programming HWCG registers. + * A612 and A610 GPU is not having the GX power domain. + * Hence skip GMU_GX registers for A612. + */ + if (gmu_core_isenabled(device) && !adreno_is_a612(adreno_dev) && + !adreno_is_a610(adreno_dev)) + gmu_core_regrmw(device, + A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1); + else if (adreno_is_a619_holi(adreno_dev)) + set_holi_sptprac_clock(device, true); + + /* enable top level HWCG */ + kgsl_regwrite(device, A6XX_RBBM_CLOCK_CNTL, + on ? __get_rbbm_clock_cntl_on(adreno_dev) : 0); +} + +struct a6xx_reglist_list { + u32 *regs; + u32 count; +}; + +#define REGLIST(_a) \ + (struct a6xx_reglist_list) { .regs = _a, .count = ARRAY_SIZE(_a), } + +static void a6xx_patch_pwrup_reglist(struct adreno_device *adreno_dev) +{ + struct a6xx_reglist_list reglist[3]; + void *ptr = adreno_dev->pwrup_reglist->hostptr; + struct cpu_gpu_lock *lock = ptr; + int items = 0, i, j; + u32 *dest = ptr + sizeof(*lock); + + /* Static IFPC-only registers */ + reglist[items++] = REGLIST(a6xx_ifpc_pwrup_reglist); + + /* Static IFPC + preemption registers */ + reglist[items++] = REGLIST(a6xx_pwrup_reglist); + + /* Add target specific registers */ + if (adreno_is_a615_family(adreno_dev)) + reglist[items++] = REGLIST(a615_pwrup_reglist); + else if (adreno_is_a650_family(adreno_dev)) + reglist[items++] = REGLIST(a650_pwrup_reglist); + + /* + * For each entry in each of the lists, write the offset and the current + * register value into the GPU buffer + */ + for (i = 0; i < items; i++) { + u32 *r = reglist[i].regs; + + for (j = 0; j < reglist[i].count; j++) { + *dest++ = r[j]; + kgsl_regread(KGSL_DEVICE(adreno_dev), r[j], dest++); + } + + lock->list_length += reglist[i].count * 2; + } + + if (adreno_is_a630(adreno_dev)) { + *dest++ = A6XX_RBBM_VBIF_CLIENT_QOS_CNTL; + kgsl_regread(KGSL_DEVICE(adreno_dev), + A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, dest++); + } else { + *dest++ = A6XX_RBBM_GBIF_CLIENT_QOS_CNTL; + kgsl_regread(KGSL_DEVICE(adreno_dev), + A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, dest++); + } + + lock->list_length += 2; + + *dest++ = A6XX_RBBM_PERFCTR_CNTL; + *dest++ = 1; + lock->list_length += 2; + + /* + * The overall register list is composed of + * 1. Static IFPC-only registers + * 2. Static IFPC + preemption registers + * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) + * + * The CP views the second and third entries as one dynamic list + * starting from list_offset. list_length should be the total dwords in + * all the lists and list_offset should be specified as the size in + * dwords of the first entry in the list. + */ + lock->list_offset = reglist[0].count * 2; +} + + +static void a6xx_llc_configure_gpu_scid(struct adreno_device *adreno_dev); +static void a6xx_llc_configure_gpuhtw_scid(struct adreno_device *adreno_dev); +static void a6xx_llc_enable_overrides(struct adreno_device *adreno_dev); + +static void a6xx_set_secvid(struct kgsl_device *device) +{ + static bool set; + + if (set || !device->mmu.secured) + return; + + kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_CNTL, 0x0); + kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, + lower_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, + upper_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, + KGSL_IOMMU_SECURE_SIZE(&device->mmu)); + + if (ADRENO_QUIRK(ADRENO_DEVICE(device), ADRENO_QUIRK_SECVID_SET_ONCE)) + set = true; +} + +static void a6xx_deassert_gbif_halt(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_regwrite(device, A6XX_GBIF_HALT, 0x0); + + if (adreno_is_a619_holi(adreno_dev)) + kgsl_regwrite(device, A6XX_RBBM_GPR0_CNTL, 0x0); + else + kgsl_regwrite(device, A6XX_RBBM_GBIF_HALT, 0x0); +} + +/* + * Some targets support marking certain transactions as always privileged which + * allows us to mark more memory as privileged without having to explicitly set + * the APRIV bit. For those targets, choose the following transactions to be + * privileged by default: + * CDWRITE [6:6] - Crashdumper writes + * CDREAD [5:5] - Crashdumper reads + * RBRPWB [3:3] - RPTR shadow writes + * RBPRIVLEVEL [2:2] - Memory accesses from PM4 packets in the ringbuffer + * RBFETCH [1:1] - Ringbuffer reads + */ +#define A6XX_APRIV_DEFAULT \ + ((1 << 6) | (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1)) + +void a6xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + unsigned int mal, mode, hbb_hi = 0, hbb_lo = 0; + unsigned int uavflagprd_inv; + unsigned int amsbc = 0; + unsigned int rgb565_predicator = 0; + static bool patch_reglist; + + /* Enable 64 bit addressing */ + kgsl_regwrite(device, A6XX_CP_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_VSC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_GRAS_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_RB_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_PC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_HLSQ_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_VFD_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_VPC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_UCHE_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_SP_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_TPL1_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); + + /* Set up VBIF registers from the GPU core definition */ + kgsl_regmap_multi_write(&device->regmap, a6xx_core->vbif, + a6xx_core->vbif_count); + + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_LIMIT_UCHE_GBIF_RW)) + kgsl_regwrite(device, A6XX_UCHE_GBIF_GX_CONFIG, 0x10200F9); + + /* Make all blocks contribute to the GPU BUSY perf counter */ + kgsl_regwrite(device, A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF); + + /* + * Set UCHE_WRITE_THRU_BASE to the UCHE_TRAP_BASE effectively + * disabling L2 bypass + */ + kgsl_regwrite(device, A6XX_UCHE_WRITE_RANGE_MAX_LO, 0xffffffc0); + kgsl_regwrite(device, A6XX_UCHE_WRITE_RANGE_MAX_HI, 0x0001ffff); + kgsl_regwrite(device, A6XX_UCHE_TRAP_BASE_LO, 0xfffff000); + kgsl_regwrite(device, A6XX_UCHE_TRAP_BASE_HI, 0x0001ffff); + kgsl_regwrite(device, A6XX_UCHE_WRITE_THRU_BASE_LO, 0xfffff000); + kgsl_regwrite(device, A6XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff); + + /* + * Some A6xx targets no longer use a programmed GMEM base address + * so only write the registers if a non zero address is given + * in the GPU list + */ + if (adreno_dev->gpucore->gmem_base) { + kgsl_regwrite(device, A6XX_UCHE_GMEM_RANGE_MIN_LO, + adreno_dev->gpucore->gmem_base); + kgsl_regwrite(device, A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x0); + kgsl_regwrite(device, A6XX_UCHE_GMEM_RANGE_MAX_LO, + adreno_dev->gpucore->gmem_base + + adreno_dev->gpucore->gmem_size - 1); + kgsl_regwrite(device, A6XX_UCHE_GMEM_RANGE_MAX_HI, 0x0); + } + + kgsl_regwrite(device, A6XX_UCHE_FILTER_CNTL, 0x804); + kgsl_regwrite(device, A6XX_UCHE_CACHE_WAYS, 0x4); + + /* ROQ sizes are twice as big on a640/a680 than on a630 */ + if (ADRENO_GPUREV(adreno_dev) >= ADRENO_REV_A640) { + kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140); + kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362C); + } else if (adreno_is_a612(adreno_dev) || adreno_is_a610(adreno_dev)) { + kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060); + kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16); + } else { + kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x010000C0); + kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362C); + } + + if (adreno_is_a660(adreno_dev)) + kgsl_regwrite(device, A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020); + + if (adreno_is_a612(adreno_dev) || adreno_is_a610(adreno_dev)) { + /* For A612 and A610 Mem pool size is reduced to 48 */ + kgsl_regwrite(device, A6XX_CP_MEM_POOL_SIZE, 48); + kgsl_regwrite(device, A6XX_CP_MEM_POOL_DBG_ADDR, 47); + } else { + kgsl_regwrite(device, A6XX_CP_MEM_POOL_SIZE, 128); + } + + /* Setting the primFifo thresholds values */ + kgsl_regwrite(device, A6XX_PC_DBG_ECO_CNTL, + a6xx_core->prim_fifo_threshold); + + /* Set the AHB default slave response to "ERROR" */ + kgsl_regwrite(device, A6XX_CP_AHB_CNTL, 0x1); + + /* Turn on performance counters */ + kgsl_regwrite(device, A6XX_RBBM_PERFCTR_CNTL, 0x1); + + /* Turn on the IFPC counter (countable 4 on XOCLK4) */ + if (gmu_core_isenabled(device)) + gmu_core_regrmw(device, A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1, + 0xff, 0x4); + + /* Turn on GX_MEM retention */ + if (gmu_core_isenabled(device) && adreno_is_a612(adreno_dev)) { + kgsl_regwrite(device, A6XX_RBBM_BLOCK_GX_RETENTION_CNTL, 0x7FB); + /* For CP IPC interrupt */ + kgsl_regwrite(device, A6XX_RBBM_INT_2_MASK, 0x00000010); + } + + if (of_property_read_u32(device->pdev->dev.of_node, + "qcom,min-access-length", &mal)) + mal = 32; + + if (of_property_read_u32(device->pdev->dev.of_node, + "qcom,ubwc-mode", &mode)) + mode = 0; + + switch (mode) { + case KGSL_UBWC_1_0: + mode = 1; + break; + case KGSL_UBWC_2_0: + mode = 0; + break; + case KGSL_UBWC_3_0: + mode = 0; + amsbc = 1; /* Only valid for A640 and A680 */ + break; + case KGSL_UBWC_4_0: + mode = 0; + rgb565_predicator = 1; + amsbc = 1; + break; + default: + break; + } + + if (!WARN_ON(!adreno_dev->highest_bank_bit)) { + hbb_lo = (adreno_dev->highest_bank_bit - 13) & 3; + hbb_hi = ((adreno_dev->highest_bank_bit - 13) >> 2) & 1; + } + + mal = (mal == 64) ? 1 : 0; + + uavflagprd_inv = (adreno_is_a650_family(adreno_dev)) ? 2 : 0; + + kgsl_regwrite(device, A6XX_RB_NC_MODE_CNTL, (rgb565_predicator << 11)| + (hbb_hi << 10) | (amsbc << 4) | (mal << 3) | + (hbb_lo << 1) | mode); + + kgsl_regwrite(device, A6XX_TPL1_NC_MODE_CNTL, (hbb_hi << 4) | + (mal << 3) | (hbb_lo << 1) | mode); + + kgsl_regwrite(device, A6XX_SP_NC_MODE_CNTL, (hbb_hi << 10) | + (mal << 3) | (uavflagprd_inv << 4) | + (hbb_lo << 1) | mode); + + kgsl_regwrite(device, A6XX_UCHE_MODE_CNTL, (mal << 23) | + (hbb_lo << 21)); + + kgsl_regwrite(device, A6XX_RBBM_INTERFACE_HANG_INT_CNTL, + (1 << 30) | a6xx_core->hang_detect_cycles); + + kgsl_regwrite(device, A6XX_UCHE_CLIENT_PF, 1); + + /* Set weights for bicubic filtering */ + if (adreno_is_a650_family(adreno_dev)) { + kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0); + kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, + 0x3FE05FF4); + kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2, + 0x3FA0EBEE); + kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3, + 0x3F5193ED); + kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4, + 0x3F0243F0); + } + + /* Set TWOPASSUSEWFI in A6XX_PC_DBG_ECO_CNTL if requested */ + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_TWO_PASS_USE_WFI)) + kgsl_regrmw(device, A6XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); + + /* Set the bit vccCacheSkipDis=1 to get rid of TSEskip logic */ + if (a6xx_core->disable_tseskip) + kgsl_regrmw(device, A6XX_PC_DBG_ECO_CNTL, 0, (1 << 9)); + + /* Enable the GMEM save/restore feature for preemption */ + if (adreno_is_preemption_enabled(adreno_dev)) + kgsl_regwrite(device, A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, + 0x1); + + /* + * Enable GMU power counter 0 to count GPU busy. This is applicable to + * all a6xx targets + */ + kgsl_regwrite(device, A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000); + kgsl_regrmw(device, A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, 0x20); + kgsl_regwrite(device, A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0x1); + + a6xx_protect_init(adreno_dev); + /* + * We start LM here because we want all the following to be up + * 1. GX HS + * 2. SPTPRAC + * 3. HFI + * At this point, we are guaranteed all. + */ + + /* Configure LLCC */ + a6xx_llc_configure_gpu_scid(adreno_dev); + a6xx_llc_configure_gpuhtw_scid(adreno_dev); + + a6xx_llc_enable_overrides(adreno_dev); + + if (adreno_is_a660(adreno_dev)) { + kgsl_regwrite(device, A6XX_CP_CHICKEN_DBG, 0x1); + kgsl_regwrite(device, A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0); + + /* Set dualQ + disable afull for A660 GPU but not for A635 */ + if (!adreno_is_a635(adreno_dev)) + kgsl_regwrite(device, A6XX_UCHE_CMDQ_CONFIG, 0x66906); + } + + if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) + kgsl_regwrite(device, A6XX_CP_APRIV_CNTL, A6XX_APRIV_DEFAULT); + + a6xx_set_secvid(device); + + /* + * Enable hardware clock gating here to prevent any register access + * issue due to internal clock gating. + */ + a6xx_hwcg_set(adreno_dev, true); + + /* + * All registers must be written before this point so that we don't + * miss any register programming when we patch the power up register + * list. + */ + if (!patch_reglist && (adreno_dev->pwrup_reglist->gpuaddr != 0)) { + a6xx_patch_pwrup_reglist(adreno_dev); + patch_reglist = true; + } + + /* + * During adreno_stop, GBIF halt is asserted to ensure + * no further transaction can go through GPU before GPU + * headswitch is turned off. + * + * This halt is deasserted once headswitch goes off but + * incase headswitch doesn't goes off clear GBIF halt + * here to ensure GPU wake-up doesn't fail because of + * halted GPU transactions. + */ + a6xx_deassert_gbif_halt(adreno_dev); + +} + +/* Offsets into the MX/CX mapped register regions */ +#define RDPM_MX_OFFSET 0xf00 +#define RDPM_CX_OFFSET 0xf18 + +void a6xx_rdpm_mx_freq_update(struct a6xx_gmu_device *gmu, + u32 freq) +{ + if (gmu->rdpm_mx_virt) { + writel_relaxed(freq/1000, + (gmu->rdpm_mx_virt + RDPM_MX_OFFSET)); + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + } +} + +void a6xx_rdpm_cx_freq_update(struct a6xx_gmu_device *gmu, + u32 freq) +{ + if (gmu->rdpm_cx_virt) { + writel_relaxed(freq/1000, + (gmu->rdpm_cx_virt + RDPM_CX_OFFSET)); + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + } +} + +/* This is the start point for non GMU/RGMU targets */ +static int a6xx_nogmu_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* + * During adreno_stop() GBIF halt is asserted to ensure that + * no further transactions go through the GPU before the + * GPU headswitch is turned off. + * + * The halt is supposed to be deasserted when the headswitch goes off + * but clear it again during start to be sure + */ + kgsl_regwrite(device, A6XX_GBIF_HALT, 0x0); + kgsl_regwrite(device, A6XX_RBBM_GBIF_HALT, 0x0); + + ret = kgsl_mmu_start(device); + if (ret) + return ret; + + adreno_get_bus_counters(adreno_dev); + adreno_perfcounter_restore(adreno_dev); + + a6xx_start(adreno_dev); + return 0; +} + +/* + * CP_INIT_MAX_CONTEXT bit tells if the multiple hardware contexts can + * be used at once of if they should be serialized + */ +#define CP_INIT_MAX_CONTEXT BIT(0) + +/* Enables register protection mode */ +#define CP_INIT_ERROR_DETECTION_CONTROL BIT(1) + +/* Header dump information */ +#define CP_INIT_HEADER_DUMP BIT(2) /* Reserved */ + +/* Default Reset states enabled for PFP and ME */ +#define CP_INIT_DEFAULT_RESET_STATE BIT(3) + +/* Drawcall filter range */ +#define CP_INIT_DRAWCALL_FILTER_RANGE BIT(4) + +/* Ucode workaround masks */ +#define CP_INIT_UCODE_WORKAROUND_MASK BIT(5) + +/* + * Operation mode mask + * + * This ordinal provides the option to disable the + * save/restore of performance counters across preemption. + */ +#define CP_INIT_OPERATION_MODE_MASK BIT(6) + +/* Register initialization list */ +#define CP_INIT_REGISTER_INIT_LIST BIT(7) + +/* Register initialization list with spinlock */ +#define CP_INIT_REGISTER_INIT_LIST_WITH_SPINLOCK BIT(8) + +#define CP_INIT_MASK (CP_INIT_MAX_CONTEXT | \ + CP_INIT_ERROR_DETECTION_CONTROL | \ + CP_INIT_HEADER_DUMP | \ + CP_INIT_DEFAULT_RESET_STATE | \ + CP_INIT_UCODE_WORKAROUND_MASK | \ + CP_INIT_OPERATION_MODE_MASK | \ + CP_INIT_REGISTER_INIT_LIST_WITH_SPINLOCK) + +void a6xx_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds) +{ + int i = 0; + + cmds[i++] = cp_type7_packet(CP_ME_INIT, A6XX_CP_INIT_DWORDS - 1); + + /* Enabled ordinal mask */ + cmds[i++] = CP_INIT_MASK; + + if (CP_INIT_MASK & CP_INIT_MAX_CONTEXT) + cmds[i++] = 0x00000003; + + if (CP_INIT_MASK & CP_INIT_ERROR_DETECTION_CONTROL) + cmds[i++] = 0x20000000; + + if (CP_INIT_MASK & CP_INIT_HEADER_DUMP) { + /* Header dump address */ + cmds[i++] = 0x00000000; + /* Header dump enable and dump size */ + cmds[i++] = 0x00000000; + } + + if (CP_INIT_MASK & CP_INIT_UCODE_WORKAROUND_MASK) + cmds[i++] = 0x00000000; + + if (CP_INIT_MASK & CP_INIT_OPERATION_MODE_MASK) + cmds[i++] = 0x00000002; + + if (CP_INIT_MASK & CP_INIT_REGISTER_INIT_LIST_WITH_SPINLOCK) { + uint64_t gpuaddr = adreno_dev->pwrup_reglist->gpuaddr; + + cmds[i++] = lower_32_bits(gpuaddr); + cmds[i++] = upper_32_bits(gpuaddr); + cmds[i++] = 0; + } +} + +void a6xx_spin_idle_debug(struct adreno_device *adreno_dev, + const char *str) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int rptr, wptr; + unsigned int status, status3, intstatus; + unsigned int hwfault; + + dev_err(device->dev, str); + + kgsl_regread(device, A6XX_CP_RB_RPTR, &rptr); + kgsl_regread(device, A6XX_CP_RB_WPTR, &wptr); + + kgsl_regread(device, A6XX_RBBM_STATUS, &status); + kgsl_regread(device, A6XX_RBBM_STATUS3, &status3); + kgsl_regread(device, A6XX_RBBM_INT_0_STATUS, &intstatus); + kgsl_regread(device, A6XX_CP_HW_FAULT, &hwfault); + + + dev_err(device->dev, + "rb=%d pos=%X/%X rbbm_status=%8.8X/%8.8X int_0_status=%8.8X\n", + adreno_dev->cur_rb ? adreno_dev->cur_rb->id : -1, rptr, wptr, + status, status3, intstatus); + + dev_err(device->dev, " hwfault=%8.8X\n", hwfault); + + kgsl_device_snapshot(device, NULL, false); + +} + +/* + * a6xx_send_cp_init() - Initialize ringbuffer + * @adreno_dev: Pointer to adreno device + * @rb: Pointer to the ringbuffer of device + * + * Submit commands for ME initialization, + */ +static int a6xx_send_cp_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int *cmds; + int ret; + + cmds = adreno_ringbuffer_allocspace(rb, A6XX_CP_INIT_DWORDS); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + a6xx_cp_init_cmds(adreno_dev, cmds); + + ret = a6xx_ringbuffer_submit(rb, NULL, true); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) { + a6xx_spin_idle_debug(adreno_dev, + "CP initialization failed to idle\n"); + + kgsl_sharedmem_writel(device->scratch, + SCRATCH_RPTR_OFFSET(rb->id), 0); + rb->wptr = 0; + rb->_wptr = 0; + } + } + + return ret; +} + +/* + * Follow the ME_INIT sequence with a preemption yield to allow the GPU to move + * to a different ringbuffer, if desired + */ +static int _preemption_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, unsigned int *cmds, + struct kgsl_context *context) +{ + unsigned int *cmds_orig = cmds; + + /* Turn CP protection OFF on legacy targets */ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) + cmds += cp_protected_mode(adreno_dev, cmds, 0); + + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 6); + *cmds++ = 1; + cmds += cp_gpuaddr(adreno_dev, cmds, + rb->preemption_desc->gpuaddr); + + *cmds++ = 2; + cmds += cp_gpuaddr(adreno_dev, cmds, + rb->secure_preemption_desc->gpuaddr); + + /* Turn CP protection back ON */ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) + cmds += cp_protected_mode(adreno_dev, cmds, 1); + + *cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4); + cmds += cp_gpuaddr(adreno_dev, cmds, 0x0); + *cmds++ = 0; + /* generate interrupt on preemption completion */ + *cmds++ = 0; + + return cmds - cmds_orig; +} + +static int a6xx_post_start(struct adreno_device *adreno_dev) +{ + int ret; + unsigned int *cmds, *start; + struct adreno_ringbuffer *rb = adreno_dev->cur_rb; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + cmds = adreno_ringbuffer_allocspace(rb, 42); + if (IS_ERR(cmds)) { + dev_err(device->dev, + "error allocating preemption init cmds\n"); + return PTR_ERR(cmds); + } + start = cmds; + + cmds += _preemption_init(adreno_dev, rb, cmds, NULL); + + rb->_wptr = rb->_wptr - (42 - (cmds - start)); + + ret = a6xx_ringbuffer_submit(rb, NULL, false); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) + a6xx_spin_idle_debug(adreno_dev, + "hw preemption initialization failed to idle\n"); + } + + return ret; +} + +int a6xx_rb_start(struct adreno_device *adreno_dev) +{ + const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 cp_rb_cntl = A6XX_CP_RB_CNTL_DEFAULT | + (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV) ? 0 : (1 << 27)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); + struct adreno_ringbuffer *rb; + uint64_t addr; + int ret, i; + unsigned int *cmds; + + /* Clear all the ringbuffers */ + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); + kgsl_sharedmem_writel(device->scratch, + SCRATCH_RPTR_OFFSET(rb->id), 0); + + rb->wptr = 0; + rb->_wptr = 0; + rb->wptr_preempt_end = ~0; + } + + a6xx_preemption_start(adreno_dev); + + /* Set up the current ringbuffer */ + rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + addr = SCRATCH_RPTR_GPU_ADDR(device, rb->id); + + kgsl_regwrite(device, A6XX_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr)); + kgsl_regwrite(device, A6XX_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr)); + + /* + * The size of the ringbuffer in the hardware is the log2 + * representation of the size in quadwords (sizedwords / 2). + */ + kgsl_regwrite(device, A6XX_CP_RB_CNTL, cp_rb_cntl); + + kgsl_regwrite(device, A6XX_CP_RB_BASE, + lower_32_bits(rb->buffer_desc->gpuaddr)); + + kgsl_regwrite(device, A6XX_CP_RB_BASE_HI, + upper_32_bits(rb->buffer_desc->gpuaddr)); + + /* Program the ucode base for CP */ + kgsl_regwrite(device, A6XX_CP_SQE_INSTR_BASE_LO, + lower_32_bits(fw->memdesc->gpuaddr)); + kgsl_regwrite(device, A6XX_CP_SQE_INSTR_BASE_HI, + upper_32_bits(fw->memdesc->gpuaddr)); + + /* Clear the SQE_HALT to start the CP engine */ + kgsl_regwrite(device, A6XX_CP_SQE_CNTL, 1); + + ret = a6xx_send_cp_init(adreno_dev, rb); + if (ret) + return ret; + + ret = adreno_zap_shader_load(adreno_dev, a6xx_core->zap_name); + if (ret) + return ret; + + /* + * Take the GPU out of secure mode. Try the zap shader if it is loaded, + * otherwise just try to write directly to the secure control register + */ + if (!adreno_dev->zap_loaded) + kgsl_regwrite(device, A6XX_RBBM_SECVID_TRUST_CNTL, 0); + else { + cmds = adreno_ringbuffer_allocspace(rb, 2); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + *cmds++ = cp_packet(adreno_dev, CP_SET_SECURE_MODE, 1); + *cmds++ = 0; + + ret = a6xx_ringbuffer_submit(rb, NULL, true); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) { + a6xx_spin_idle_debug(adreno_dev, + "Switch to unsecure failed to idle\n"); + return ret; + } + } + } + + return a6xx_post_start(adreno_dev); +} + +/* + * a6xx_sptprac_enable() - Power on SPTPRAC + * @adreno_dev: Pointer to Adreno device + */ +static int a6xx_sptprac_enable(struct adreno_device *adreno_dev) +{ + return a6xx_gmu_sptprac_enable(adreno_dev); +} + +/* + * a6xx_sptprac_disable() - Power off SPTPRAC + * @adreno_dev: Pointer to Adreno device + */ +static void a6xx_sptprac_disable(struct adreno_device *adreno_dev) +{ + a6xx_gmu_sptprac_disable(adreno_dev); +} + +/* + * a6xx_gpu_keepalive() - GMU reg write to request GPU stays on + * @adreno_dev: Pointer to the adreno device that has the GMU + * @state: State to set: true is ON, false is OFF + */ +static void a6xx_gpu_keepalive(struct adreno_device *adreno_dev, + bool state) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!gmu_core_isenabled(device)) + return; + + gmu_core_regwrite(device, A6XX_GMU_GMU_PWR_COL_KEEPALIVE, state); +} + +static bool a6xx_irq_pending(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status; + + kgsl_regread(device, A6XX_RBBM_INT_0_STATUS, &status); + + /* Return busy if a interrupt is pending */ + return ((status & adreno_dev->irq_mask) || + atomic_read(&adreno_dev->pending_irq_refcnt)); +} + +static bool a619_holi_hw_isidle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg; + + kgsl_regread(device, A6XX_RBBM_STATUS, ®); + if (reg & 0xfffffffe) + return false; + + return a6xx_irq_pending(adreno_dev) ? false : true; +} + +bool a6xx_hw_isidle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg; + + /* Non GMU devices monitor the RBBM status */ + if (!gmu_core_isenabled(device)) { + kgsl_regread(device, A6XX_RBBM_STATUS, ®); + if (reg & 0xfffffffe) + return false; + + return a6xx_irq_pending(adreno_dev) ? false : true; + } + + gmu_core_regread(device, A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, ®); + + /* Bit 23 is GPUBUSYIGNAHB */ + return (reg & BIT(23)) ? false : true; +} + +int a6xx_microcode_read(struct adreno_device *adreno_dev) +{ + struct adreno_firmware *sqe_fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); + const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + + return adreno_get_firmware(adreno_dev, a6xx_core->sqefw_name, sqe_fw); +} + +static int64_t a6xx_read_throttling_counters(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int64_t adj = -1; + u32 a, b, c; + struct adreno_busy_data *busy = &adreno_dev->busy_data; + + if (!(adreno_dev->lm_enabled || adreno_dev->bcl_enabled)) + return 0; + + a = counter_delta(device, A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L, + &busy->throttle_cycles[0]); + + b = counter_delta(device, A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L, + &busy->throttle_cycles[1]); + + c = counter_delta(device, A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L, + &busy->throttle_cycles[2]); + + /* + * Currently there are no a6xx targets with both LM and BCL enabled. + * So if BCL is enabled, we can log bcl counters and return. + */ + if (adreno_dev->bcl_enabled) { + trace_kgsl_bcl_clock_throttling(a, b, c); + return 0; + } + + /* + * The adjustment is the number of cycles lost to throttling, which + * is calculated as a weighted average of the cycles throttled + * at different levels. The adjustment is negative because in A6XX, + * the busy count includes the throttled cycles. Therefore, we want + * to remove them to prevent appearing to be busier than + * we actually are. + */ + if (adreno_is_a620(adreno_dev) || adreno_is_a650(adreno_dev)) + /* + * With the newer generations, CRC throttle from SIDs of 0x14 + * and above cannot be observed in power counters. Since 90% + * throttle uses SID 0x16 the adjustment calculation needs + * correction. The throttling is in increments of 4.2%, and the + * 91.7% counter does a weighted count by the value of sid used + * which are taken into consideration for the final formula. + */ + adj *= div_s64((a * 42) + (b * 500) + + (div_s64((int64_t)c - a - b * 12, 22) * 917), 1000); + else + adj *= ((a * 5) + (b * 50) + (c * 90)) / 100; + + trace_kgsl_clock_throttling(0, b, c, a, adj); + + return adj; +} +#define GPU_CPR_FSM_CTL_OFFSET 0x4 +static void a6xx_gx_cpr_toggle(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + static void __iomem *gx_cpr_virt; + struct resource *res; + u32 val = 0; + + if (!a6xx_core->gx_cpr_toggle) + return; + + if (!gx_cpr_virt) { + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "gx_cpr"); + if (res == NULL) + return; + + gx_cpr_virt = devm_ioremap_resource(&device->pdev->dev, res); + if (!gx_cpr_virt) { + dev_err(device->dev, "Failed to map GX CPR\n"); + return; + } + } + + /* + * Toggle(disable -> enable) closed loop functionality to recover + * CPR measurements stall happened under certain conditions. + */ + + val = readl_relaxed(gx_cpr_virt + GPU_CPR_FSM_CTL_OFFSET); + /* Make sure memory is updated before access */ + rmb(); + + writel_relaxed(val & 0xfffffff0, gx_cpr_virt + GPU_CPR_FSM_CTL_OFFSET); + /* make sure register write committed */ + wmb(); + + /* Wait for small time before we enable GX CPR */ + udelay(5); + + writel_relaxed(val | 0x00000001, gx_cpr_virt + GPU_CPR_FSM_CTL_OFFSET); + /* make sure register write committed */ + wmb(); +} + +/* This is only defined for non-GMU and non-RGMU targets */ +static int a6xx_clear_pending_transactions(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + if (adreno_is_a619_holi(adreno_dev)) { + kgsl_regwrite(device, A6XX_RBBM_GPR0_CNTL, 0x1e0); + ret = adreno_wait_for_halt_ack(device, + A6XX_RBBM_VBIF_GX_RESET_STATUS, 0xf0); + } else { + kgsl_regwrite(device, A6XX_RBBM_GBIF_HALT, + A6XX_GBIF_GX_HALT_MASK); + ret = adreno_wait_for_halt_ack(device, A6XX_RBBM_GBIF_HALT_ACK, + A6XX_GBIF_GX_HALT_MASK); + } + + if (ret) + return ret; + + return a6xx_halt_gbif(adreno_dev); +} + +/** + * a6xx_reset() - Helper function to reset the GPU + * @adreno_dev: Pointer to the adreno device structure for the GPU + * + * Try to reset the GPU to recover from a fault for targets without + * a GMU. + */ +static int a6xx_reset(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + unsigned long flags = device->pwrctrl.ctrl_flags; + + ret = a6xx_clear_pending_transactions(adreno_dev); + if (ret) + return ret; + + /* Clear ctrl_flags to ensure clocks and regulators are turned off */ + device->pwrctrl.ctrl_flags = 0; + + kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + + /* since device is officially off now clear start bit */ + clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + + a6xx_reset_preempt_records(adreno_dev); + + ret = adreno_start(device, 0); + if (ret) + return ret; + + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + + device->pwrctrl.ctrl_flags = flags; + + /* Toggle GX CPR on demand */ + a6xx_gx_cpr_toggle(device); + + /* + * If active_cnt is zero, there is no need to keep the GPU active. So, + * we should transition to SLUMBER. + */ + if (!atomic_read(&device->active_cnt)) + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + + return 0; +} + +static void a6xx_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status1, status2; + + kgsl_regread(device, A6XX_CP_INTERRUPT_STATUS, &status1); + + if (status1 & BIT(A6XX_CP_OPCODE_ERROR)) { + unsigned int opcode; + + kgsl_regwrite(device, A6XX_CP_SQE_STAT_ADDR, 1); + kgsl_regread(device, A6XX_CP_SQE_STAT_DATA, &opcode); + dev_crit_ratelimited(device->dev, + "CP opcode error interrupt | opcode=0x%8.8x\n", opcode); + } + if (status1 & BIT(A6XX_CP_UCODE_ERROR)) + dev_crit_ratelimited(device->dev, "CP ucode error interrupt\n"); + if (status1 & BIT(A6XX_CP_HW_FAULT_ERROR)) { + kgsl_regread(device, A6XX_CP_HW_FAULT, &status2); + dev_crit_ratelimited(device->dev, + "CP | Ringbuffer HW fault | status=%x\n", status2); + } + if (status1 & BIT(A6XX_CP_REGISTER_PROTECTION_ERROR)) { + kgsl_regread(device, A6XX_CP_PROTECT_STATUS, &status2); + dev_crit_ratelimited(device->dev, + "CP | Protected mode error | %s | addr=%x | status=%x\n", + status2 & (1 << 20) ? "READ" : "WRITE", + status2 & 0x3FFFF, status2); + } + if (status1 & BIT(A6XX_CP_AHB_ERROR)) + dev_crit_ratelimited(device->dev, + "CP AHB error interrupt\n"); + if (status1 & BIT(A6XX_CP_VSD_PARITY_ERROR)) + dev_crit_ratelimited(device->dev, + "CP VSD decoder parity error\n"); + if (status1 & BIT(A6XX_CP_ILLEGAL_INSTR_ERROR)) + dev_crit_ratelimited(device->dev, + "CP Illegal instruction error\n"); + +} + +static void a6xx_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + switch (bit) { + case A6XX_INT_CP_AHB_ERROR: + dev_crit_ratelimited(device->dev, "CP: AHB bus error\n"); + break; + case A6XX_INT_ATB_ASYNCFIFO_OVERFLOW: + dev_crit_ratelimited(device->dev, + "RBBM: ATB ASYNC overflow\n"); + break; + case A6XX_INT_RBBM_ATB_BUS_OVERFLOW: + dev_crit_ratelimited(device->dev, + "RBBM: ATB bus overflow\n"); + break; + case A6XX_INT_UCHE_OOB_ACCESS: + dev_crit_ratelimited(device->dev, + "UCHE: Out of bounds access\n"); + break; + case A6XX_INT_UCHE_TRAP_INTR: + dev_crit_ratelimited(device->dev, "UCHE: Trap interrupt\n"); + break; + case A6XX_INT_TSB_WRITE_ERROR: + dev_crit_ratelimited(device->dev, "TSB: Write error interrupt\n"); + break; + default: + dev_crit_ratelimited(device->dev, "Unknown interrupt %d\n", + bit); + } +} + +/* + * a6xx_llc_configure_gpu_scid() - Program the sub-cache ID for all GPU blocks + * @adreno_dev: The adreno device pointer + */ +static void a6xx_llc_configure_gpu_scid(struct adreno_device *adreno_dev) +{ + uint32_t gpu_scid; + uint32_t gpu_cntl1_val = 0; + int i; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_mmu *mmu = &device->mmu; + + if (IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice) || + !adreno_dev->gpu_llc_slice_enable) + return; + + if (llcc_slice_activate(adreno_dev->gpu_llc_slice)) + return; + + gpu_scid = llcc_get_slice_id(adreno_dev->gpu_llc_slice); + for (i = 0; i < A6XX_LLC_NUM_GPU_SCIDS; i++) + gpu_cntl1_val = (gpu_cntl1_val << A6XX_GPU_LLC_SCID_NUM_BITS) + | gpu_scid; + + if (mmu->subtype == KGSL_IOMMU_SMMU_V500) + kgsl_regrmw(device, A6XX_GBIF_SCACHE_CNTL1, + A6XX_GPU_LLC_SCID_MASK, gpu_cntl1_val); + else + adreno_cx_misc_regrmw(adreno_dev, + A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1, + A6XX_GPU_LLC_SCID_MASK, gpu_cntl1_val); + + /* + * On A660, the SCID programming for UCHE traffic is done in + * A6XX_GBIF_SCACHE_CNTL0[14:10] + * GFO ENABLE BIT(8) : LLC uses a 64 byte cache line size enabling + * GFO allows it allocate partial cache lines + */ + if (adreno_is_a660(adreno_dev)) + kgsl_regrmw(device, A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) | + BIT(8), (gpu_scid << 10) | BIT(8)); +} + +/* + * a6xx_llc_configure_gpuhtw_scid() - Program the SCID for GPU pagetables + * @adreno_dev: The adreno device pointer + */ +static void a6xx_llc_configure_gpuhtw_scid(struct adreno_device *adreno_dev) +{ + uint32_t gpuhtw_scid; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_mmu *mmu = &device->mmu; + + if (IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice) || + !adreno_dev->gpuhtw_llc_slice_enable) + return; + + if (llcc_slice_activate(adreno_dev->gpuhtw_llc_slice)) + return; + + /* + * On SMMU-v500, the GPUHTW SCID is configured via a NoC override in + * the XBL image. + */ + if (mmu->subtype == KGSL_IOMMU_SMMU_V500) + return; + + gpuhtw_scid = llcc_get_slice_id(adreno_dev->gpuhtw_llc_slice); + + adreno_cx_misc_regrmw(adreno_dev, + A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1, + A6XX_GPUHTW_LLC_SCID_MASK, + gpuhtw_scid << A6XX_GPUHTW_LLC_SCID_SHIFT); +} + +/* + * a6xx_llc_enable_overrides() - Override the page attributes + * @adreno_dev: The adreno device pointer + */ +static void a6xx_llc_enable_overrides(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_mmu *mmu = &device->mmu; + + /* + * Attributes override through GBIF is not supported with MMU-500. + * Attributes are used as configured through SMMU pagetable entries. + */ + if (mmu->subtype == KGSL_IOMMU_SMMU_V500) + return; + + /* + * 0x3: readnoallocoverrideen=0 + * read-no-alloc=0 - Allocate lines on read miss + * writenoallocoverrideen=1 + * write-no-alloc=1 - Do not allocates lines on write miss + */ + adreno_cx_misc_regwrite(adreno_dev, + A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0, 0x3); +} + +static const char *uche_client[7][3] = { + {"SP | VSC | VPC | HLSQ | PC | LRZ", "TP", "VFD"}, + {"VSC | VPC | HLSQ | PC | LRZ", "TP | VFD", "SP"}, + {"SP | VPC | HLSQ | PC | LRZ", "TP | VFD", "VSC"}, + {"SP | VSC | HLSQ | PC | LRZ", "TP | VFD", "VPC"}, + {"SP | VSC | VPC | PC | LRZ", "TP | VFD", "HLSQ"}, + {"SP | VSC | VPC | HLSQ | LRZ", "TP | VFD", "PC"}, + {"SP | VSC | VPC | HLSQ | PC", "TP | VFD", "LRZ"}, +}; + +static const char *const uche_client_a660[] = { "VFD", "SP", "VSC", "VPC", + "HLSQ", "PC", "LRZ", "TP" }; + +#define SCOOBYDOO 0x5c00bd00 + +static const char *a6xx_fault_block_uche(struct kgsl_device *device, + unsigned int mid) +{ + unsigned int uche_client_id = 0; + static char str[40]; + + /* + * Smmu driver takes a vote on CX gdsc before calling the kgsl + * pagefault handler. If there is contention for device mutex in this + * path and the dispatcher fault handler is holding this lock, trying + * to turn off CX gdsc will fail during the reset. So to avoid blocking + * here, try to lock device mutex and return if it fails. + */ + if (!mutex_trylock(&device->mutex)) + return "UCHE: unknown"; + + if (!kgsl_state_is_awake(device)) { + mutex_unlock(&device->mutex); + return "UCHE: unknown"; + } + + kgsl_regread(device, A6XX_UCHE_CLIENT_PF, &uche_client_id); + mutex_unlock(&device->mutex); + + /* Ignore the value if the gpu is in IFPC */ + if (uche_client_id == SCOOBYDOO) + return "UCHE: unknown"; + + if (adreno_is_a660(ADRENO_DEVICE(device))) { + + /* Mask is 7 bits for A660 */ + uche_client_id &= 0x7F; + if (uche_client_id >= ARRAY_SIZE(uche_client_a660) || + (mid == 2)) + return "UCHE: Unknown"; + + if (mid == 1) + snprintf(str, sizeof(str), "UCHE: Not %s", + uche_client_a660[uche_client_id]); + else if (mid == 3) + snprintf(str, sizeof(str), "UCHE: %s", + uche_client_a660[uche_client_id]); + } else { + uche_client_id &= A6XX_UCHE_CLIENT_PF_CLIENT_ID_MASK; + if (uche_client_id >= ARRAY_SIZE(uche_client)) + return "UCHE: Unknown"; + + snprintf(str, sizeof(str), "UCHE: %s", + uche_client[uche_client_id][mid - 1]); + } + + return str; +} + +static const char *a6xx_iommu_fault_block(struct kgsl_device *device, + unsigned int fsynr1) +{ + unsigned int mid = fsynr1 & 0xff; + + switch (mid) { + case 0: + return "CP"; + case 1: + case 2: + case 3: + return a6xx_fault_block_uche(device, mid); + case 4: + return "CCU"; + case 6: + return "CDP Prefetch"; + case 7: + return "GPMU"; + } + + return "Unknown"; +} + +static void a6xx_cp_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (adreno_is_preemption_enabled(adreno_dev)) + a6xx_preemption_trigger(adreno_dev, true); + + adreno_dispatcher_schedule(device); +} + +/* + * a6xx_gpc_err_int_callback() - Isr for GPC error interrupts + * @adreno_dev: Pointer to device + * @bit: Interrupt bit + */ +static void a6xx_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* + * GPC error is typically the result of mistake SW programming. + * Force GPU fault for this interrupt so that we can debug it + * with help of register dump. + */ + + dev_crit(device->dev, "RBBM: GPC error\n"); + adreno_irqctrl(adreno_dev, 0); + + /* Trigger a fault in the dispatcher - this will effect a restart */ + adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT); +} + +static const struct adreno_irq_funcs a6xx_irq_funcs[32] = { + ADRENO_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ + ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 1 - RBBM_AHB_ERROR */ + ADRENO_IRQ_CALLBACK(NULL), /* 2 - UNUSED */ + ADRENO_IRQ_CALLBACK(NULL), /* 3 - UNUSED */ + ADRENO_IRQ_CALLBACK(NULL), /* 4 - UNUSED */ + ADRENO_IRQ_CALLBACK(NULL), /* 5 - UNUSED */ + /* 6 - RBBM_ATB_ASYNC_OVERFLOW */ + ADRENO_IRQ_CALLBACK(a6xx_err_callback), + ADRENO_IRQ_CALLBACK(a6xx_gpc_err_int_callback), /* 7 - GPC_ERR */ + ADRENO_IRQ_CALLBACK(a6xx_preemption_callback),/* 8 - CP_SW */ + ADRENO_IRQ_CALLBACK(a6xx_cp_hw_err_callback), /* 9 - CP_HW_ERROR */ + ADRENO_IRQ_CALLBACK(NULL), /* 10 - CP_CCU_FLUSH_DEPTH_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 11 - CP_CCU_FLUSH_COLOR_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 12 - CP_CCU_RESOLVE_TS */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 13 - CP_IB2_INT */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 14 - CP_IB1_INT */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */ + ADRENO_IRQ_CALLBACK(NULL), /* 16 - UNUSED */ + ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_WT_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 19 - UNUSED */ + ADRENO_IRQ_CALLBACK(a6xx_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 21 - UNUSED */ + ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */ + /* 23 - MISC_HANG_DETECT */ + ADRENO_IRQ_CALLBACK(adreno_hang_int_callback), + ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 24 - UCHE_OOB_ACCESS */ + ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 25 - UCHE_TRAP_INTR */ + ADRENO_IRQ_CALLBACK(NULL), /* 26 - DEBBUS_INTR_0 */ + ADRENO_IRQ_CALLBACK(NULL), /* 27 - DEBBUS_INTR_1 */ + ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 28 - TSBWRITEERROR */ + ADRENO_IRQ_CALLBACK(NULL), /* 29 - UNUSED */ + ADRENO_IRQ_CALLBACK(NULL), /* 30 - ISDB_CPU_IRQ */ + ADRENO_IRQ_CALLBACK(NULL), /* 31 - ISDB_UNDER_DEBUG */ +}; + +/* + * If the AHB fence is not in ALLOW mode when we receive an RBBM + * interrupt, something went wrong. This means that we cannot proceed + * since the IRQ status and clear registers are not accessible. + * This is usually harmless because the GMU will abort power collapse + * and change the fence back to ALLOW. Poll so that this can happen. + */ +static int a6xx_irq_poll_fence(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status, fence, fence_retries = 0; + u64 a, b, c; + + if (!gmu_core_isenabled(device)) + return 0; + + a = a6xx_read_alwayson(adreno_dev); + + kgsl_regread(device, A6XX_GMU_AO_AHB_FENCE_CTRL, &fence); + + while (fence != 0) { + b = a6xx_read_alwayson(adreno_dev); + + /* Wait for small time before trying again */ + udelay(1); + kgsl_regread(device, A6XX_GMU_AO_AHB_FENCE_CTRL, &fence); + + if (fence_retries == 100 && fence != 0) { + c = a6xx_read_alwayson(adreno_dev); + + kgsl_regread(device, A6XX_GMU_RBBM_INT_UNMASKED_STATUS, + &status); + + dev_crit_ratelimited(device->dev, + "status=0x%x Unmasked status=0x%x Mask=0x%x timestamps: %llx %llx %llx\n", + status & adreno_dev->irq_mask, status, + adreno_dev->irq_mask, a, b, c); + return -ETIMEDOUT; + } + + fence_retries++; + } + + return 0; +} + +static irqreturn_t a6xx_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret = IRQ_NONE; + u32 status; + + /* + * On A6xx, the GPU can power down once the INT_0_STATUS is read + * below. But there still might be some register reads required + * so force the GMU/GPU into KEEPALIVE mode until done with the ISR. + */ + a6xx_gpu_keepalive(adreno_dev, true); + + if (a6xx_irq_poll_fence(adreno_dev)) { + adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT); + goto done; + } + + kgsl_regread(device, A6XX_RBBM_INT_0_STATUS, &status); + + kgsl_regwrite(device, A6XX_RBBM_INT_CLEAR_CMD, status); + + ret = adreno_irq_callbacks(adreno_dev, a6xx_irq_funcs, status); + + trace_kgsl_a5xx_irq_status(adreno_dev, status); + +done: + /* If hard fault, then let snapshot turn off the keepalive */ + if (!(adreno_gpu_fault(adreno_dev) & ADRENO_HARD_FAULT)) + a6xx_gpu_keepalive(adreno_dev, false); + + return ret; +} + +#ifdef CONFIG_QCOM_KGSL_CORESIGHT +static struct adreno_coresight_register a6xx_coresight_regs[] = { + { A6XX_DBGC_CFG_DBGBUS_SEL_A }, + { A6XX_DBGC_CFG_DBGBUS_SEL_B }, + { A6XX_DBGC_CFG_DBGBUS_SEL_C }, + { A6XX_DBGC_CFG_DBGBUS_SEL_D }, + { A6XX_DBGC_CFG_DBGBUS_CNTLT }, + { A6XX_DBGC_CFG_DBGBUS_CNTLM }, + { A6XX_DBGC_CFG_DBGBUS_OPL }, + { A6XX_DBGC_CFG_DBGBUS_OPE }, + { A6XX_DBGC_CFG_DBGBUS_IVTL_0 }, + { A6XX_DBGC_CFG_DBGBUS_IVTL_1 }, + { A6XX_DBGC_CFG_DBGBUS_IVTL_2 }, + { A6XX_DBGC_CFG_DBGBUS_IVTL_3 }, + { A6XX_DBGC_CFG_DBGBUS_MASKL_0 }, + { A6XX_DBGC_CFG_DBGBUS_MASKL_1 }, + { A6XX_DBGC_CFG_DBGBUS_MASKL_2 }, + { A6XX_DBGC_CFG_DBGBUS_MASKL_3 }, + { A6XX_DBGC_CFG_DBGBUS_BYTEL_0 }, + { A6XX_DBGC_CFG_DBGBUS_BYTEL_1 }, + { A6XX_DBGC_CFG_DBGBUS_IVTE_0 }, + { A6XX_DBGC_CFG_DBGBUS_IVTE_1 }, + { A6XX_DBGC_CFG_DBGBUS_IVTE_2 }, + { A6XX_DBGC_CFG_DBGBUS_IVTE_3 }, + { A6XX_DBGC_CFG_DBGBUS_MASKE_0 }, + { A6XX_DBGC_CFG_DBGBUS_MASKE_1 }, + { A6XX_DBGC_CFG_DBGBUS_MASKE_2 }, + { A6XX_DBGC_CFG_DBGBUS_MASKE_3 }, + { A6XX_DBGC_CFG_DBGBUS_NIBBLEE }, + { A6XX_DBGC_CFG_DBGBUS_PTRC0 }, + { A6XX_DBGC_CFG_DBGBUS_PTRC1 }, + { A6XX_DBGC_CFG_DBGBUS_LOADREG }, + { A6XX_DBGC_CFG_DBGBUS_IDX }, + { A6XX_DBGC_CFG_DBGBUS_CLRC }, + { A6XX_DBGC_CFG_DBGBUS_LOADIVT }, + { A6XX_DBGC_VBIF_DBG_CNTL }, + { A6XX_DBGC_DBG_LO_HI_GPIO }, + { A6XX_DBGC_EXT_TRACE_BUS_CNTL }, + { A6XX_DBGC_READ_AHB_THROUGH_DBG }, + { A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1 }, + { A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2 }, + { A6XX_DBGC_EVT_CFG }, + { A6XX_DBGC_EVT_INTF_SEL_0 }, + { A6XX_DBGC_EVT_INTF_SEL_1 }, + { A6XX_DBGC_PERF_ATB_CFG }, + { A6XX_DBGC_PERF_ATB_COUNTER_SEL_0 }, + { A6XX_DBGC_PERF_ATB_COUNTER_SEL_1 }, + { A6XX_DBGC_PERF_ATB_COUNTER_SEL_2 }, + { A6XX_DBGC_PERF_ATB_COUNTER_SEL_3 }, + { A6XX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 }, + { A6XX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 }, + { A6XX_DBGC_PERF_ATB_DRAIN_CMD }, + { A6XX_DBGC_ECO_CNTL }, + { A6XX_DBGC_AHB_DBG_CNTL }, +}; + +static struct adreno_coresight_register a6xx_coresight_regs_cx[] = { + { A6XX_CX_DBGC_CFG_DBGBUS_SEL_A }, + { A6XX_CX_DBGC_CFG_DBGBUS_SEL_B }, + { A6XX_CX_DBGC_CFG_DBGBUS_SEL_C }, + { A6XX_CX_DBGC_CFG_DBGBUS_SEL_D }, + { A6XX_CX_DBGC_CFG_DBGBUS_CNTLT }, + { A6XX_CX_DBGC_CFG_DBGBUS_CNTLM }, + { A6XX_CX_DBGC_CFG_DBGBUS_OPL }, + { A6XX_CX_DBGC_CFG_DBGBUS_OPE }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0 }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1 }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2 }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3 }, + { A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0 }, + { A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1 }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTE_0 }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTE_1 }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTE_2 }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTE_3 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKE_0 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKE_1 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKE_2 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKE_3 }, + { A6XX_CX_DBGC_CFG_DBGBUS_NIBBLEE }, + { A6XX_CX_DBGC_CFG_DBGBUS_PTRC0 }, + { A6XX_CX_DBGC_CFG_DBGBUS_PTRC1 }, + { A6XX_CX_DBGC_CFG_DBGBUS_LOADREG }, + { A6XX_CX_DBGC_CFG_DBGBUS_IDX }, + { A6XX_CX_DBGC_CFG_DBGBUS_CLRC }, + { A6XX_CX_DBGC_CFG_DBGBUS_LOADIVT }, + { A6XX_CX_DBGC_VBIF_DBG_CNTL }, + { A6XX_CX_DBGC_DBG_LO_HI_GPIO }, + { A6XX_CX_DBGC_EXT_TRACE_BUS_CNTL }, + { A6XX_CX_DBGC_READ_AHB_THROUGH_DBG }, + { A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 }, + { A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 }, + { A6XX_CX_DBGC_EVT_CFG }, + { A6XX_CX_DBGC_EVT_INTF_SEL_0 }, + { A6XX_CX_DBGC_EVT_INTF_SEL_1 }, + { A6XX_CX_DBGC_PERF_ATB_CFG }, + { A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_0 }, + { A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_1 }, + { A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_2 }, + { A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_3 }, + { A6XX_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 }, + { A6XX_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 }, + { A6XX_CX_DBGC_PERF_ATB_DRAIN_CMD }, + { A6XX_CX_DBGC_ECO_CNTL }, + { A6XX_CX_DBGC_AHB_DBG_CNTL }, +}; + +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_a, &a6xx_coresight_regs[0]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_b, &a6xx_coresight_regs[1]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_c, &a6xx_coresight_regs[2]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_d, &a6xx_coresight_regs[3]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlt, &a6xx_coresight_regs[4]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlm, &a6xx_coresight_regs[5]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_opl, &a6xx_coresight_regs[6]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ope, &a6xx_coresight_regs[7]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_0, &a6xx_coresight_regs[8]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_1, &a6xx_coresight_regs[9]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_2, &a6xx_coresight_regs[10]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_3, &a6xx_coresight_regs[11]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_0, &a6xx_coresight_regs[12]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_1, &a6xx_coresight_regs[13]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_2, &a6xx_coresight_regs[14]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_3, &a6xx_coresight_regs[15]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_0, &a6xx_coresight_regs[16]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_1, &a6xx_coresight_regs[17]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_0, &a6xx_coresight_regs[18]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_1, &a6xx_coresight_regs[19]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_2, &a6xx_coresight_regs[20]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_3, &a6xx_coresight_regs[21]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_0, &a6xx_coresight_regs[22]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_1, &a6xx_coresight_regs[23]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_2, &a6xx_coresight_regs[24]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_3, &a6xx_coresight_regs[25]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_nibblee, &a6xx_coresight_regs[26]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc0, &a6xx_coresight_regs[27]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc1, &a6xx_coresight_regs[28]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadreg, &a6xx_coresight_regs[29]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_idx, &a6xx_coresight_regs[30]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_clrc, &a6xx_coresight_regs[31]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadivt, &a6xx_coresight_regs[32]); +static ADRENO_CORESIGHT_ATTR(vbif_dbg_cntl, &a6xx_coresight_regs[33]); +static ADRENO_CORESIGHT_ATTR(dbg_lo_hi_gpio, &a6xx_coresight_regs[34]); +static ADRENO_CORESIGHT_ATTR(ext_trace_bus_cntl, &a6xx_coresight_regs[35]); +static ADRENO_CORESIGHT_ATTR(read_ahb_through_dbg, &a6xx_coresight_regs[36]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf1, &a6xx_coresight_regs[37]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf2, &a6xx_coresight_regs[38]); +static ADRENO_CORESIGHT_ATTR(evt_cfg, &a6xx_coresight_regs[39]); +static ADRENO_CORESIGHT_ATTR(evt_intf_sel_0, &a6xx_coresight_regs[40]); +static ADRENO_CORESIGHT_ATTR(evt_intf_sel_1, &a6xx_coresight_regs[41]); +static ADRENO_CORESIGHT_ATTR(perf_atb_cfg, &a6xx_coresight_regs[42]); +static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_0, &a6xx_coresight_regs[43]); +static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_1, &a6xx_coresight_regs[44]); +static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_2, &a6xx_coresight_regs[45]); +static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_3, &a6xx_coresight_regs[46]); +static ADRENO_CORESIGHT_ATTR(perf_atb_trig_intf_sel_0, + &a6xx_coresight_regs[47]); +static ADRENO_CORESIGHT_ATTR(perf_atb_trig_intf_sel_1, + &a6xx_coresight_regs[48]); +static ADRENO_CORESIGHT_ATTR(perf_atb_drain_cmd, &a6xx_coresight_regs[49]); +static ADRENO_CORESIGHT_ATTR(eco_cntl, &a6xx_coresight_regs[50]); +static ADRENO_CORESIGHT_ATTR(ahb_dbg_cntl, &a6xx_coresight_regs[51]); + +/*CX debug registers*/ +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_a, + &a6xx_coresight_regs_cx[0]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_b, + &a6xx_coresight_regs_cx[1]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_c, + &a6xx_coresight_regs_cx[2]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_d, + &a6xx_coresight_regs_cx[3]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_cntlt, + &a6xx_coresight_regs_cx[4]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_cntlm, + &a6xx_coresight_regs_cx[5]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_opl, + &a6xx_coresight_regs_cx[6]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ope, + &a6xx_coresight_regs_cx[7]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_0, + &a6xx_coresight_regs_cx[8]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_1, + &a6xx_coresight_regs_cx[9]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_2, + &a6xx_coresight_regs_cx[10]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_3, + &a6xx_coresight_regs_cx[11]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_0, + &a6xx_coresight_regs_cx[12]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_1, + &a6xx_coresight_regs_cx[13]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_2, + &a6xx_coresight_regs_cx[14]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_3, + &a6xx_coresight_regs_cx[15]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_bytel_0, + &a6xx_coresight_regs_cx[16]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_bytel_1, + &a6xx_coresight_regs_cx[17]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_0, + &a6xx_coresight_regs_cx[18]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_1, + &a6xx_coresight_regs_cx[19]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_2, + &a6xx_coresight_regs_cx[20]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_3, + &a6xx_coresight_regs_cx[21]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_0, + &a6xx_coresight_regs_cx[22]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_1, + &a6xx_coresight_regs_cx[23]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_2, + &a6xx_coresight_regs_cx[24]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_3, + &a6xx_coresight_regs_cx[25]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_nibblee, + &a6xx_coresight_regs_cx[26]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ptrc0, + &a6xx_coresight_regs_cx[27]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ptrc1, + &a6xx_coresight_regs_cx[28]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_loadreg, + &a6xx_coresight_regs_cx[29]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_idx, + &a6xx_coresight_regs_cx[30]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_clrc, + &a6xx_coresight_regs_cx[31]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_loadivt, + &a6xx_coresight_regs_cx[32]); +static ADRENO_CORESIGHT_ATTR(cx_vbif_dbg_cntl, + &a6xx_coresight_regs_cx[33]); +static ADRENO_CORESIGHT_ATTR(cx_dbg_lo_hi_gpio, + &a6xx_coresight_regs_cx[34]); +static ADRENO_CORESIGHT_ATTR(cx_ext_trace_bus_cntl, + &a6xx_coresight_regs_cx[35]); +static ADRENO_CORESIGHT_ATTR(cx_read_ahb_through_dbg, + &a6xx_coresight_regs_cx[36]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_trace_buf1, + &a6xx_coresight_regs_cx[37]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_trace_buf2, + &a6xx_coresight_regs_cx[38]); +static ADRENO_CORESIGHT_ATTR(cx_evt_cfg, + &a6xx_coresight_regs_cx[39]); +static ADRENO_CORESIGHT_ATTR(cx_evt_intf_sel_0, + &a6xx_coresight_regs_cx[40]); +static ADRENO_CORESIGHT_ATTR(cx_evt_intf_sel_1, + &a6xx_coresight_regs_cx[41]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_cfg, + &a6xx_coresight_regs_cx[42]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_0, + &a6xx_coresight_regs_cx[43]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_1, + &a6xx_coresight_regs_cx[44]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_2, + &a6xx_coresight_regs_cx[45]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_3, + &a6xx_coresight_regs_cx[46]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_trig_intf_sel_0, + &a6xx_coresight_regs_cx[47]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_trig_intf_sel_1, + &a6xx_coresight_regs_cx[48]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_drain_cmd, + &a6xx_coresight_regs_cx[49]); +static ADRENO_CORESIGHT_ATTR(cx_eco_cntl, + &a6xx_coresight_regs_cx[50]); +static ADRENO_CORESIGHT_ATTR(cx_ahb_dbg_cntl, + &a6xx_coresight_regs_cx[51]); + +static struct attribute *a6xx_coresight_attrs[] = { + &coresight_attr_cfg_dbgbus_sel_a.attr.attr, + &coresight_attr_cfg_dbgbus_sel_b.attr.attr, + &coresight_attr_cfg_dbgbus_sel_c.attr.attr, + &coresight_attr_cfg_dbgbus_sel_d.attr.attr, + &coresight_attr_cfg_dbgbus_cntlt.attr.attr, + &coresight_attr_cfg_dbgbus_cntlm.attr.attr, + &coresight_attr_cfg_dbgbus_opl.attr.attr, + &coresight_attr_cfg_dbgbus_ope.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_0.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_2.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_3.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_0.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_1.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_2.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_3.attr.attr, + &coresight_attr_cfg_dbgbus_bytel_0.attr.attr, + &coresight_attr_cfg_dbgbus_bytel_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_0.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_2.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_3.attr.attr, + &coresight_attr_cfg_dbgbus_maske_0.attr.attr, + &coresight_attr_cfg_dbgbus_maske_1.attr.attr, + &coresight_attr_cfg_dbgbus_maske_2.attr.attr, + &coresight_attr_cfg_dbgbus_maske_3.attr.attr, + &coresight_attr_cfg_dbgbus_nibblee.attr.attr, + &coresight_attr_cfg_dbgbus_ptrc0.attr.attr, + &coresight_attr_cfg_dbgbus_ptrc1.attr.attr, + &coresight_attr_cfg_dbgbus_loadreg.attr.attr, + &coresight_attr_cfg_dbgbus_idx.attr.attr, + &coresight_attr_cfg_dbgbus_clrc.attr.attr, + &coresight_attr_cfg_dbgbus_loadivt.attr.attr, + &coresight_attr_vbif_dbg_cntl.attr.attr, + &coresight_attr_dbg_lo_hi_gpio.attr.attr, + &coresight_attr_ext_trace_bus_cntl.attr.attr, + &coresight_attr_read_ahb_through_dbg.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf1.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf2.attr.attr, + &coresight_attr_evt_cfg.attr.attr, + &coresight_attr_evt_intf_sel_0.attr.attr, + &coresight_attr_evt_intf_sel_1.attr.attr, + &coresight_attr_perf_atb_cfg.attr.attr, + &coresight_attr_perf_atb_counter_sel_0.attr.attr, + &coresight_attr_perf_atb_counter_sel_1.attr.attr, + &coresight_attr_perf_atb_counter_sel_2.attr.attr, + &coresight_attr_perf_atb_counter_sel_3.attr.attr, + &coresight_attr_perf_atb_trig_intf_sel_0.attr.attr, + &coresight_attr_perf_atb_trig_intf_sel_1.attr.attr, + &coresight_attr_perf_atb_drain_cmd.attr.attr, + &coresight_attr_eco_cntl.attr.attr, + &coresight_attr_ahb_dbg_cntl.attr.attr, + NULL, +}; + +/*cx*/ +static struct attribute *a6xx_coresight_attrs_cx[] = { + &coresight_attr_cx_cfg_dbgbus_sel_a.attr.attr, + &coresight_attr_cx_cfg_dbgbus_sel_b.attr.attr, + &coresight_attr_cx_cfg_dbgbus_sel_c.attr.attr, + &coresight_attr_cx_cfg_dbgbus_sel_d.attr.attr, + &coresight_attr_cx_cfg_dbgbus_cntlt.attr.attr, + &coresight_attr_cx_cfg_dbgbus_cntlm.attr.attr, + &coresight_attr_cx_cfg_dbgbus_opl.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ope.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivtl_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivtl_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivtl_2.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivtl_3.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maskl_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maskl_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maskl_2.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maskl_3.attr.attr, + &coresight_attr_cx_cfg_dbgbus_bytel_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_bytel_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivte_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivte_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivte_2.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivte_3.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maske_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maske_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maske_2.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maske_3.attr.attr, + &coresight_attr_cx_cfg_dbgbus_nibblee.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ptrc0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ptrc1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_loadreg.attr.attr, + &coresight_attr_cx_cfg_dbgbus_idx.attr.attr, + &coresight_attr_cx_cfg_dbgbus_clrc.attr.attr, + &coresight_attr_cx_cfg_dbgbus_loadivt.attr.attr, + &coresight_attr_cx_vbif_dbg_cntl.attr.attr, + &coresight_attr_cx_dbg_lo_hi_gpio.attr.attr, + &coresight_attr_cx_ext_trace_bus_cntl.attr.attr, + &coresight_attr_cx_read_ahb_through_dbg.attr.attr, + &coresight_attr_cx_cfg_dbgbus_trace_buf1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_trace_buf2.attr.attr, + &coresight_attr_cx_evt_cfg.attr.attr, + &coresight_attr_cx_evt_intf_sel_0.attr.attr, + &coresight_attr_cx_evt_intf_sel_1.attr.attr, + &coresight_attr_cx_perf_atb_cfg.attr.attr, + &coresight_attr_cx_perf_atb_counter_sel_0.attr.attr, + &coresight_attr_cx_perf_atb_counter_sel_1.attr.attr, + &coresight_attr_cx_perf_atb_counter_sel_2.attr.attr, + &coresight_attr_cx_perf_atb_counter_sel_3.attr.attr, + &coresight_attr_cx_perf_atb_trig_intf_sel_0.attr.attr, + &coresight_attr_cx_perf_atb_trig_intf_sel_1.attr.attr, + &coresight_attr_cx_perf_atb_drain_cmd.attr.attr, + &coresight_attr_cx_eco_cntl.attr.attr, + &coresight_attr_cx_ahb_dbg_cntl.attr.attr, + NULL, +}; + +static const struct attribute_group a6xx_coresight_group = { + .attrs = a6xx_coresight_attrs, +}; + +static const struct attribute_group *a6xx_coresight_groups[] = { + &a6xx_coresight_group, + NULL, +}; + +static const struct attribute_group a6xx_coresight_group_cx = { + .attrs = a6xx_coresight_attrs_cx, +}; + +static const struct attribute_group *a6xx_coresight_groups_cx[] = { + &a6xx_coresight_group_cx, + NULL, +}; + +static struct adreno_coresight a6xx_coresight = { + .registers = a6xx_coresight_regs, + .count = ARRAY_SIZE(a6xx_coresight_regs), + .groups = a6xx_coresight_groups, +}; + +static struct adreno_coresight a6xx_coresight_cx = { + .registers = a6xx_coresight_regs_cx, + .count = ARRAY_SIZE(a6xx_coresight_regs_cx), + .groups = a6xx_coresight_groups_cx, +}; +#endif + +int a6xx_probe_common(struct platform_device *pdev, + struct adreno_device *adreno_dev, u32 chipid, + const struct adreno_gpu_core *gpucore) +{ + const struct adreno_gpudev *gpudev = gpucore->gpudev; + + adreno_dev->gpucore = gpucore; + adreno_dev->chipid = chipid; + + adreno_reg_offset_init(gpudev->reg_offsets); + + adreno_dev->hwcg_enabled = true; + + adreno_dev->preempt.preempt_level = 1; + adreno_dev->preempt.skipsaverestore = true; + adreno_dev->preempt.usesgmem = true; + + return adreno_device_probe(pdev, adreno_dev); +} + +static int a6xx_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore) +{ + struct adreno_device *adreno_dev; + struct kgsl_device *device; + int ret; + + adreno_dev = (struct adreno_device *) + of_device_get_match_data(&pdev->dev); + + memset(adreno_dev, 0, sizeof(*adreno_dev)); + + ret = a6xx_probe_common(pdev, adreno_dev, chipid, gpucore); + if (ret) + return ret; + + ret = adreno_dispatcher_init(adreno_dev); + if (ret) + return ret; + + device = KGSL_DEVICE(adreno_dev); + + timer_setup(&device->idle_timer, kgsl_timer, 0); + + INIT_WORK(&device->idle_check_ws, kgsl_idle_check); + + adreno_dev->irq_mask = A6XX_INT_MASK; + + return 0; +} + +/* Register offset defines for A6XX, in order of enum adreno_regs */ +static unsigned int a6xx_register_offsets[ADRENO_REG_REGISTER_MAX] = { + + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A6XX_CP_RB_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, A6XX_CP_RB_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_LO, + A6XX_CP_RB_RPTR_ADDR_LO), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_HI, + A6XX_CP_RB_RPTR_ADDR_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A6XX_CP_RB_RPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A6XX_CP_RB_WPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A6XX_CP_RB_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A6XX_CP_SQE_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A6XX_CP_IB1_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, A6XX_CP_IB1_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A6XX_CP_IB1_REM_SIZE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A6XX_CP_IB2_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, A6XX_CP_IB2_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A6XX_CP_IB2_REM_SIZE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A6XX_CP_CONTEXT_SWITCH_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI, + A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI), + ADRENO_REG_DEFINE( + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO, + A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO), + ADRENO_REG_DEFINE( + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI, + A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI), + ADRENO_REG_DEFINE( + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO, + A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO), + ADRENO_REG_DEFINE( + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI, + A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO, + A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI, + A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_LEVEL_STATUS, + A6XX_CP_CONTEXT_SWITCH_LEVEL_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A6XX_RBBM_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS3, A6XX_RBBM_STATUS3), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A6XX_RBBM_INT_0_MASK), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A6XX_RBBM_CLOCK_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A6XX_RBBM_SW_RESET_CMD), + ADRENO_REG_DEFINE(ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK, + A6XX_GMU_AO_HOST_INTERRUPT_MASK), + ADRENO_REG_DEFINE(ADRENO_REG_GMU_AHB_FENCE_STATUS, + A6XX_GMU_AHB_FENCE_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_GMU_GMU2HOST_INTR_MASK, + A6XX_GMU_GMU2HOST_INTR_MASK), +}; + +int a6xx_perfcounter_update(struct adreno_device *adreno_dev, + struct adreno_perfcount_register *reg, bool update_reg) +{ + void *ptr = adreno_dev->pwrup_reglist->hostptr; + struct cpu_gpu_lock *lock = ptr; + u32 *data = ptr + sizeof(*lock); + int i, offset = 0; + + if (kgsl_hwlock(lock)) { + kgsl_hwunlock(lock); + return -EBUSY; + } + + /* + * If the perfcounter select register is already present in reglist + * update it, otherwise append the pair to + * the end of the list. + */ + for (i = 0; i < lock->list_length >> 1; i++) { + if (data[offset] == reg->select) { + data[offset + 1] = reg->countable; + goto update; + } + + if (data[offset] == GEN7_RBBM_PERFCTR_CNTL) + break; + + offset += 2; + } + + /* + * For all targets GEN7_RBBM_PERFCTR_CNTL needs to be the last entry, + * so overwrite the existing GEN7_RBBM_PERFCNTL_CTRL and add it back to + * the end. + */ + data[offset] = reg->select; + data[offset + 1] = reg->countable; + data[offset + 2] = GEN7_RBBM_PERFCTR_CNTL; + data[offset + 3] = 1; + + lock->list_length += 2; + +update: + if (update_reg) + kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg->select, + reg->countable); + + kgsl_hwunlock(lock); + return 0; +} + +u64 gen7_read_alwayson(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 lo = 0, hi = 0, tmp = 0; + + /* Always use the GMU AO counter when doing a AHB read */ + gmu_core_regread(device, GEN7_GMU_ALWAYS_ON_COUNTER_H, &hi); + gmu_core_regread(device, GEN7_GMU_ALWAYS_ON_COUNTER_L, &lo); + + /* Check for overflow */ + gmu_core_regread(device, GEN7_GMU_ALWAYS_ON_COUNTER_H, &tmp); + + if (hi != tmp) { + gmu_core_regread(device, GEN7_GMU_ALWAYS_ON_COUNTER_L, + &lo); + hi = tmp; + } + + return (((u64) hi) << 32) | lo; +} + +static void gen7_remove(struct adreno_device *adreno_dev) +{ + if (ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) + del_timer(&adreno_dev->preempt.timer); +} + +static void gen7_read_bus_stats(struct kgsl_device *device, + struct kgsl_power_stats *stats, + struct adreno_busy_data *busy) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u64 ram_cycles, starved_ram; + + ram_cycles = counter_delta(device, adreno_dev->ram_cycles_lo, + &busy->bif_ram_cycles); + + starved_ram = counter_delta(device, adreno_dev->starved_ram_lo, + &busy->bif_starved_ram); + + ram_cycles += counter_delta(device, + adreno_dev->ram_cycles_lo_ch1_read, + &busy->bif_ram_cycles_read_ch1); + + ram_cycles += counter_delta(device, + adreno_dev->ram_cycles_lo_ch0_write, + &busy->bif_ram_cycles_write_ch0); + + ram_cycles += counter_delta(device, + adreno_dev->ram_cycles_lo_ch1_write, + &busy->bif_ram_cycles_write_ch1); + + starved_ram += counter_delta(device, + adreno_dev->starved_ram_lo_ch1, + &busy->bif_starved_ram_ch1); + + stats->ram_time = ram_cycles; + stats->ram_wait = starved_ram; +} + +static void gen7_power_stats(struct adreno_device *adreno_dev, + struct kgsl_power_stats *stats) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_busy_data *busy = &adreno_dev->busy_data; + u64 gpu_busy; + + /* Set the GPU busy counter for frequency scaling */ + gpu_busy = counter_delta(device, GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, + &busy->gpu_busy); + + stats->busy_time = gpu_busy * 10; + do_div(stats->busy_time, 192); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) { + u32 ifpc = counter_delta(device, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_L, + &busy->num_ifpc); + + adreno_dev->ifpc_count += ifpc; + if (ifpc > 0) + trace_adreno_ifpc_count(adreno_dev->ifpc_count); + } + + if (device->pwrctrl.bus_control) + gen7_read_bus_stats(device, stats, busy); + + if (adreno_dev->bcl_enabled) { + u32 a, b, c; + + a = counter_delta(device, GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L, + &busy->throttle_cycles[0]); + + b = counter_delta(device, GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L, + &busy->throttle_cycles[1]); + + c = counter_delta(device, GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L, + &busy->throttle_cycles[2]); + + trace_kgsl_bcl_clock_throttling(a, b, c); + } +} + +static int gen7_setproperty(struct kgsl_device_private *dev_priv, + u32 type, void __user *value, u32 sizebytes) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 enable; + + if (type != KGSL_PROP_PWRCTRL) + return -ENODEV; + + if (sizebytes != sizeof(enable)) + return -EINVAL; + + if (copy_from_user(&enable, value, sizeof(enable))) + return -EFAULT; + + mutex_lock(&device->mutex); + + if (enable) { + clear_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); + + kgsl_pwrscale_enable(device); + } else { + set_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); + + if (!adreno_active_count_get(adreno_dev)) + adreno_active_count_put(adreno_dev); + + kgsl_pwrscale_disable(device, true); + } + + mutex_unlock(&device->mutex); + + return 0; +} + +const struct gen7_gpudev adreno_gen7_hwsched_gpudev = { + .base = { + .reg_offsets = gen7_register_offsets, + .probe = gen7_hwsched_probe, + .snapshot = gen7_hwsched_snapshot, + .irq_handler = gen7_irq_handler, + .iommu_fault_block = gen7_iommu_fault_block, + .preemption_context_init = gen7_preemption_context_init, + .context_detach = gen7_hwsched_context_detach, + .read_alwayson = gen7_read_alwayson, + .reset = gen7_hwsched_reset, + .power_ops = &gen7_hwsched_power_ops, + .power_stats = gen7_power_stats, + .setproperty = gen7_setproperty, + .hw_isidle = gen7_hw_isidle, + .add_to_va_minidump = gen7_hwsched_add_to_minidump, + }, + .hfi_probe = gen7_hwsched_hfi_probe, + .hfi_remove = gen7_hwsched_hfi_remove, + .handle_watchdog = gen7_hwsched_handle_watchdog, +}; + +const struct gen7_gpudev adreno_gen7_gmu_gpudev = { + .base = { + .reg_offsets = gen7_register_offsets, + .probe = gen7_gmu_device_probe, + .snapshot = gen7_gmu_snapshot, + .irq_handler = gen7_irq_handler, + .rb_start = gen7_rb_start, + .gpu_keepalive = gen7_gpu_keepalive, + .hw_isidle = gen7_hw_isidle, + .iommu_fault_block = gen7_iommu_fault_block, + .reset = gen7_gmu_reset, + .preemption_schedule = gen7_preemption_schedule, + .preemption_context_init = gen7_preemption_context_init, + .read_alwayson = gen7_read_alwayson, + .power_ops = &gen7_gmu_power_ops, + .remove = gen7_remove, + .ringbuffer_submitcmd = gen7_ringbuffer_submitcmd, + .power_stats = gen7_power_stats, + .setproperty = gen7_setproperty, + .add_to_va_minidump = gen7_gmu_add_to_minidump, + }, + .hfi_probe = gen7_gmu_hfi_probe, + .handle_watchdog = gen7_gmu_handle_watchdog, +}; diff --git a/adreno_gen7.h b/adreno_gen7.h new file mode 100644 index 0000000000..7e4c910231 --- /dev/null +++ b/adreno_gen7.h @@ -0,0 +1,436 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _ADRENO_GEN7_H_ +#define _ADRENO_GEN7_H_ + +#include + +#include "gen7_reg.h" +#include "adreno_gen7_gmu.h" + +extern const struct adreno_power_ops gen7_gmu_power_ops; +extern const struct adreno_power_ops gen7_hwsched_power_ops; +extern const struct adreno_perfcounters adreno_gen7_perfcounters; + +struct gen7_gpudev { + struct adreno_gpudev base; + int (*hfi_probe)(struct adreno_device *adreno_dev); + void (*hfi_remove)(struct adreno_device *adreno_dev); + void (*handle_watchdog)(struct adreno_device *adreno_dev); +}; + +extern const struct gen7_gpudev adreno_gen7_gmu_gpudev; +extern const struct gen7_gpudev adreno_gen7_hwsched_gpudev; + +/** + * struct gen7_device - Container for the gen7_device + */ +struct gen7_device { + /** @gmu: Container for the gen7 GMU device */ + struct gen7_gmu_device gmu; + /** @adreno_dev: Container for the generic adreno device */ + struct adreno_device adreno_dev; +}; + +/** + * struct gen7_protected_regs - container for a protect register span + */ +struct gen7_protected_regs { + /** @reg: Physical protected mode register to write to */ + u32 reg; + /** @start: Dword offset of the starting register in the range */ + u32 start; + /** + * @end: Dword offset of the ending register in the range + * (inclusive) + */ + u32 end; + /** + * @noaccess: 1 if the register should not be accessible from + * userspace, 0 if it can be read (but not written) + */ + u32 noaccess; +}; + +/** + * struct adreno_gen7_core - gen7 specific GPU core definitions + */ +struct adreno_gen7_core { + /** @base: Container for the generic GPU definitions */ + struct adreno_gpu_core base; + /** @sqefw_name: Name of the SQE microcode file */ + const char *sqefw_name; + /** @gmufw_name: Name of the GMU firmware file */ + const char *gmufw_name; + /** @gmufw_name: Name of the backup GMU firmware file */ + const char *gmufw_bak_name; + /** @zap_name: Name of the CPZ zap file */ + const char *zap_name; + /** @hwcg: List of registers and values to write for HWCG */ + const struct kgsl_regmap_list *hwcg; + /** @hwcg_count: Number of registers in @hwcg */ + u32 hwcg_count; + /** @gbif: List of registers and values to write for GBIF */ + const struct kgsl_regmap_list *gbif; + /** @gbif_count: Number of registers in @gbif */ + u32 gbif_count; + /** @hang_detect_cycles: Hang detect counter timeout value */ + u32 hang_detect_cycles; + /** @protected_regs: Array of protected registers for the target */ + const struct gen7_protected_regs *protected_regs; + /** @ctxt_record_size: Size of the preemption record in bytes */ + u64 ctxt_record_size; + /** @highest_bank_bit: Highest bank bit value */ + u32 highest_bank_bit; +}; + +/** + * struct gen7_cp_preemption_record - CP context record for + * preemption. + * @magic: (00) Value at this offset must be equal to + * GEN7_CP_CTXRECORD_MAGIC_REF. + * @info: (04) Type of record. Written non-zero (usually) by CP. + * we must set to zero for all ringbuffers. + * @errno: (08) Error code. Initialize this to GEN7_CP_CTXRECORD_ERROR_NONE. + * CP will update to another value if a preemption error occurs. + * @data: (12) DATA field in YIELD and SET_MARKER packets. + * Written by CP when switching out. Not used on switch-in. Initialized to 0. + * @cntl: (16) RB_CNTL, saved and restored by CP. We must initialize this. + * @rptr: (20) RB_RPTR, saved and restored by CP. We must initialize this. + * @wptr: (24) RB_WPTR, saved and restored by CP. We must initialize this. + * @_pad28: (28) Reserved/padding. + * @rptr_addr: (32) RB_RPTR_ADDR_LO|HI saved and restored. We must initialize. + * rbase: (40) RB_BASE_LO|HI saved and restored. + * counter: (48) Pointer to preemption counter. + * @bv_rptr_addr: (56) BV_RB_RPTR_ADDR_LO|HI save and restored. We must initialize. + */ +struct gen7_cp_preemption_record { + u32 magic; + u32 info; + u32 errno; + u32 data; + u32 cntl; + u32 rptr; + u32 wptr; + u32 _pad28; + u64 rptr_addr; + u64 rbase; + u64 counter; + u64 bv_rptr_addr; +}; + +/** + * struct gen7_cp_smmu_info - CP preemption SMMU info. + * @magic: (00) The value at this offset must be equal to + * GEN7_CP_SMMU_INFO_MAGIC_REF + * @_pad4: (04) Reserved/padding + * @ttbr0: (08) Base address of the page table for the * incoming context + * @asid: (16) Address Space IDentifier (ASID) of the incoming context + * @context_idr: (20) Context Identification Register value + * @context_bank: (24) Which Context Bank in SMMU to update + */ +struct gen7_cp_smmu_info { + u32 magic; + u32 _pad4; + u64 ttbr0; + u32 asid; + u32 context_idr; + u32 context_bank; +}; + +#define GEN7_CP_SMMU_INFO_MAGIC_REF 0x241350d5UL + +#define GEN7_CP_CTXRECORD_MAGIC_REF 0xae399d6eUL +/* Size of each CP preemption record */ +#define GEN7_CP_CTXRECORD_SIZE_IN_BYTES (2860 * 1024) +/* Size of the user context record block (in bytes) */ +#define GEN7_CP_CTXRECORD_USER_RESTORE_SIZE (192 * 1024) +/* Size of the performance counter save/restore block (in bytes) */ +#define GEN7_CP_PERFCOUNTER_SAVE_RESTORE_SIZE (4 * 1024) + +#define GEN7_CP_RB_CNTL_DEFAULT \ + (FIELD_PREP(GENMASK(7, 0), ilog2(KGSL_RB_DWORDS >> 1)) | \ + FIELD_PREP(GENMASK(12, 8), ilog2(4))) + +/* Size of the CP_INIT pm4 stream in dwords */ +#define GEN7_CP_INIT_DWORDS 10 + +#define GEN7_INT_MASK \ + ((1 << GEN7_INT_AHBERROR) | \ + (1 << GEN7_INT_ATBASYNCFIFOOVERFLOW) | \ + (1 << GEN7_INT_GPCERROR) | \ + (1 << GEN7_INT_SWINTERRUPT) | \ + (1 << GEN7_INT_HWERROR) | \ + (1 << GEN7_INT_PM4CPINTERRUPT) | \ + (1 << GEN7_INT_RB_DONE_TS) | \ + (1 << GEN7_INT_CACHE_CLEAN_TS) | \ + (1 << GEN7_INT_ATBBUSOVERFLOW) | \ + (1 << GEN7_INT_HANGDETECTINTERRUPT) | \ + (1 << GEN7_INT_OUTOFBOUNDACCESS) | \ + (1 << GEN7_INT_UCHETRAPINTERRUPT) | \ + (1 << GEN7_INT_TSBWRITEERROR)) + +#define GEN7_HWSCHED_INT_MASK \ + ((1 << GEN7_INT_AHBERROR) | \ + (1 << GEN7_INT_ATBASYNCFIFOOVERFLOW) | \ + (1 << GEN7_INT_ATBBUSOVERFLOW) | \ + (1 << GEN7_INT_OUTOFBOUNDACCESS) | \ + (1 << GEN7_INT_UCHETRAPINTERRUPT)) + +/** + * to_gen7_core - return the gen7 specific GPU core struct + * @adreno_dev: An Adreno GPU device handle + * + * Returns: + * A pointer to the gen7 specific GPU core struct + */ +static inline const struct adreno_gen7_core * +to_gen7_core(struct adreno_device *adreno_dev) +{ + const struct adreno_gpu_core *core = adreno_dev->gpucore; + + return container_of(core, struct adreno_gen7_core, base); +} + +/** + * gen7_is_smmu_stalled() - Check whether smmu is stalled or not + * @device: Pointer to KGSL device + * + * Return - True if smmu is stalled or false otherwise + */ +static inline bool gen7_is_smmu_stalled(struct kgsl_device *device) +{ + u32 val; + + kgsl_regread(device, GEN7_RBBM_STATUS3, &val); + + return val & BIT(24); +} + +/** + * gen7_cx_regulator_disable_wait - Disable a cx regulator and wait for it + * @reg: A &struct regulator handle + * @device: kgsl device struct + * @timeout: Time to wait (in milliseconds) + * + * Disable the regulator and wait @timeout milliseconds for it to enter the + * disabled state. + * + */ +void gen7_cx_regulator_disable_wait(struct regulator *reg, + struct kgsl_device *device, u32 timeout); + +/* Preemption functions */ +void gen7_preemption_trigger(struct adreno_device *adreno_dev, bool atomic); +void gen7_preemption_schedule(struct adreno_device *adreno_dev); +void gen7_preemption_start(struct adreno_device *adreno_dev); +int gen7_preemption_init(struct adreno_device *adreno_dev); + +u32 gen7_preemption_post_ibsubmit(struct adreno_device *adreno_dev, + unsigned int *cmds); +u32 gen7_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 *cmds); + +unsigned int gen7_set_marker(unsigned int *cmds, + enum adreno_cp_marker_type type); + +void gen7_preemption_callback(struct adreno_device *adreno_dev, int bit); + +int gen7_preemption_context_init(struct kgsl_context *context); + +void gen7_preemption_context_destroy(struct kgsl_context *context); + +void gen7_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); +void gen7_crashdump_init(struct adreno_device *adreno_dev); + +/** + * gen7_read_alwayson - Read the current always on clock value + * @adreno_dev: An Adreno GPU handle + * + * Return: The current value of the GMU always on counter + */ +u64 gen7_read_alwayson(struct adreno_device *adreno_dev); + +/** + * gen7_start - Program gen7 registers + * @adreno_dev: An Adreno GPU handle + * + * This function does all gen7 register programming every + * time we boot the gpu + * + * Return: 0 on success or negative on failure + */ +int gen7_start(struct adreno_device *adreno_dev); + +/** + * gen7_init - Initialize gen7 resources + * @adreno_dev: An Adreno GPU handle + * + * This function does gen7 specific one time initialization + * and is invoked when the very first client opens a + * kgsl instance + * + * Return: Zero on success and negative error on failure + */ +int gen7_init(struct adreno_device *adreno_dev); + +/** + * gen7_rb_start - Gen7 specific ringbuffer setup + * @adreno_dev: An Adreno GPU handle + * + * This function does gen7 specific ringbuffer setup and + * attempts to submit CP INIT and bring GPU out of secure mode + * + * Return: Zero on success and negative error on failure + */ +int gen7_rb_start(struct adreno_device *adreno_dev); + +/** + * gen7_microcode_read - Get the cp microcode from the filesystem + * @adreno_dev: An Adreno GPU handle + * + * This function gets the firmware from filesystem and sets up + * the micorocode global buffer + * + * Return: Zero on success and negative error on failure + */ +int gen7_microcode_read(struct adreno_device *adreno_dev); + +/** + * gen7_probe_common - Probe common gen7 resources + * @pdev: Pointer to the platform device + * @adreno_dev: Pointer to the adreno device + * @chipid: Chipid of the target + * @gpucore: Pointer to the gpucore strucure + * + * This function sets up the gen7 resources common across all + * gen7 targets + */ +int gen7_probe_common(struct platform_device *pdev, + struct adreno_device *adreno_dev, u32 chipid, + const struct adreno_gpu_core *gpucore); + +/** + * gen7_hw_isidle - Check whether gen7 gpu is idle or not + * @adreno_dev: An Adreno GPU handle + * + * Return: True if gpu is idle, otherwise false + */ +bool gen7_hw_isidle(struct adreno_device *adreno_dev); + +/** + * gen7_spin_idle_debug - Debug logging used when gpu fails to idle + * @adreno_dev: An Adreno GPU handle + * + * This function logs interesting registers and triggers a snapshot + */ +void gen7_spin_idle_debug(struct adreno_device *adreno_dev, + const char *str); + +/** + * gen7_perfcounter_update - Update the IFPC perfcounter list + * @adreno_dev: An Adreno GPU handle + * @reg: Perfcounter reg struct to add/remove to the list + * @update_reg: true if the perfcounter needs to be programmed by the CPU + * + * Return: 0 on success or -EBUSY if the lock couldn't be taken + */ +int gen7_perfcounter_update(struct adreno_device *adreno_dev, + struct adreno_perfcount_register *reg, bool update_reg); + +/* + * gen7_ringbuffer_init - Initialize the ringbuffers + * @adreno_dev: An Adreno GPU handle + * + * Initialize the ringbuffer(s) for a5xx. + * Return: 0 on success or negative on failure + */ +int gen7_ringbuffer_init(struct adreno_device *adreno_dev); + +/** + * gen7_ringbuffer_submitcmd - Submit a user command to the ringbuffer + * @adreno_dev: An Adreno GPU handle + * @cmdobj: Pointer to a user command object + * @flags: Internal submit flags + * @time: Optional pointer to a adreno_submit_time container + * + * Return: 0 on success or negative on failure + */ +int gen7_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time); + +/** + * gen7_ringbuffer_submit - Submit a command to the ringbuffer + * @rb: Ringbuffer pointer + * @time: Optional pointer to a adreno_submit_time container + * + * Return: 0 on success or negative on failure + */ +int gen7_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time); + +/** + * gen7_fenced_write - Write to a fenced register + * @adreno_dev: An Adreno GPU handle + * @offset: Register offset + * @value: Value to write + * @mask: Expected FENCE_STATUS for successful write + * + * Return: 0 on success or negative on failure + */ +int gen7_fenced_write(struct adreno_device *adreno_dev, u32 offset, + u32 value, u32 mask); + +/** + * gen77ringbuffer_addcmds - Wrap and submit commands to the ringbuffer + * @adreno_dev: An Adreno GPU handle + * @rb: Ringbuffer pointer + * @drawctxt: Draw context submitting the commands + * @flags: Submission flags + * @in: Input buffer to write to ringbuffer + * @dwords: Dword length of @in + * @timestamp: Draw context timestamp for the submission + * @time: Optional pointer to a adreno_submit_time container + * + * Return: 0 on success or negative on failure + */ +int gen7_ringbuffer_addcmds(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 flags, u32 *in, u32 dwords, u32 timestamp, + struct adreno_submit_time *time); + +/** + * gen7_cp_init_cmds - Create the CP_INIT commands + * @adreno_dev: An Adreno GPU handle + * @cmd: Buffer to write the CP_INIT commands into + */ +void gen7_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds); + +/** + * gen7_gmu_hfi_probe - Probe Gen7 HFI specific data + * @adreno_dev: An Adreno GPU handle + * + * Return: 0 on success or negative on failure + */ +int gen7_gmu_hfi_probe(struct adreno_device *adreno_dev); + +static inline const struct gen7_gpudev * +to_gen7_gpudev(const struct adreno_gpudev *gpudev) +{ + return container_of(gpudev, struct gen7_gpudev, base); +} + +/** + * gen7_reset_preempt_records - Reset the preemption buffers + * @adreno_dev: Handle to the adreno device + * + * Reset the preemption records at the time of hard reset + */ +void gen7_reset_preempt_records(struct adreno_device *adreno_dev); +#endif diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c new file mode 100644 index 0000000000..8fc1726748 --- /dev/null +++ b/adreno_gen7_gmu.c @@ -0,0 +1,2708 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_trace.h" +#include "kgsl_bus.h" +#include "kgsl_device.h" +#include "kgsl_trace.h" +#include "kgsl_util.h" + +static struct gmu_vma_entry gen7_gmu_vma[] = { + [GMU_ITCM] = { + .start = 0x00000000, + .size = SZ_16K, + }, + [GMU_CACHE] = { + .start = SZ_16K, + .size = (SZ_16M - SZ_16K), + .next_va = SZ_16K, + }, + [GMU_DTCM] = { + .start = SZ_256M + SZ_16K, + .size = SZ_16K, + }, + [GMU_DCACHE] = { + .start = 0x0, + .size = 0x0, + }, + [GMU_NONCACHED_KERNEL] = { + .start = 0x60000000, + .size = SZ_512M, + .next_va = 0x60000000, + }, +}; + +static ssize_t log_stream_enable_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, log_kobj); + bool val; + int ret; + + ret = kstrtobool(buf, &val); + if (ret) + return ret; + + gmu->log_stream_enable = val; + return count; +} + +static ssize_t log_stream_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, log_kobj); + + return scnprintf(buf, PAGE_SIZE, "%d\n", gmu->log_stream_enable); +} + +static ssize_t log_group_mask_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, log_kobj); + u32 val; + int ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + gmu->log_group_mask = val; + return count; +} + +static ssize_t log_group_mask_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, log_kobj); + + return scnprintf(buf, PAGE_SIZE, "%x\n", gmu->log_group_mask); +} + +static struct kobj_attribute log_stream_enable_attr = + __ATTR(log_stream_enable, 0644, log_stream_enable_show, log_stream_enable_store); + +static struct kobj_attribute log_group_mask_attr = + __ATTR(log_group_mask, 0644, log_group_mask_show, log_group_mask_store); + +static struct attribute *log_attrs[] = { + &log_stream_enable_attr.attr, + &log_group_mask_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(log); + +static struct kobj_type log_kobj_type = { + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = log_groups, +}; + +static int gen7_timed_poll_check_rscc(struct gen7_gmu_device *gmu, + unsigned int offset, unsigned int expected_ret, + unsigned int timeout, unsigned int mask) +{ + u32 value; + + return readl_poll_timeout(gmu->rscc_virt + (offset << 2), value, + (value & mask) == expected_ret, 100, timeout * 1000); +} + +struct gen7_gmu_device *to_gen7_gmu(struct adreno_device *adreno_dev) +{ + struct gen7_device *gen7_dev = container_of(adreno_dev, + struct gen7_device, adreno_dev); + + return &gen7_dev->gmu; +} + +struct adreno_device *gen7_gmu_to_adreno(struct gen7_gmu_device *gmu) +{ + struct gen7_device *gen7_dev = + container_of(gmu, struct gen7_device, gmu); + + return &gen7_dev->adreno_dev; +} + +#define RSC_CMD_OFFSET 2 + +static void _regwrite(void __iomem *regbase, + unsigned int offsetwords, unsigned int value) +{ + void __iomem *reg; + + reg = regbase + (offsetwords << 2); + __raw_writel(value, reg); +} + +void gen7_load_rsc_ucode(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + void __iomem *rscc = gmu->rscc_virt; + + /* Disable SDE clock gating */ + _regwrite(rscc, GEN7_GPU_RSCC_RSC_STATUS0_DRV0, BIT(24)); + + /* Setup RSC PDC handshake for sleep and wakeup */ + _regwrite(rscc, GEN7_RSCC_PDC_SLAVE_ID_DRV0, 1); + _regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_DATA, 0); + _regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_ADDR, 0); + _regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_DATA + RSC_CMD_OFFSET, 0); + _regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_ADDR + RSC_CMD_OFFSET, 0); + _regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_DATA + RSC_CMD_OFFSET * 2, + 0x80000000); + _regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_ADDR + RSC_CMD_OFFSET * 2, 0); + _regwrite(rscc, GEN7_RSCC_OVERRIDE_START_ADDR, 0); + _regwrite(rscc, GEN7_RSCC_PDC_SEQ_START_ADDR, 0x4520); + _regwrite(rscc, GEN7_RSCC_PDC_MATCH_VALUE_LO, 0x4510); + _regwrite(rscc, GEN7_RSCC_PDC_MATCH_VALUE_HI, 0x4514); + + /* Load RSC sequencer uCode for sleep and wakeup */ + _regwrite(rscc, GEN7_RSCC_SEQ_MEM_0_DRV0, 0xeaaae5a0); + _regwrite(rscc, GEN7_RSCC_SEQ_MEM_0_DRV0 + 1, 0xe1a1ebab); + _regwrite(rscc, GEN7_RSCC_SEQ_MEM_0_DRV0 + 2, 0xa2e0a581); + _regwrite(rscc, GEN7_RSCC_SEQ_MEM_0_DRV0 + 3, 0xecac82e2); + _regwrite(rscc, GEN7_RSCC_SEQ_MEM_0_DRV0 + 4, 0x0020edad); +} + +int gen7_load_pdc_ucode(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct resource *res_cfg; + void __iomem *cfg = NULL; + + res_cfg = platform_get_resource_byname(gmu->pdev, IORESOURCE_MEM, + "gmu_pdc"); + if (res_cfg) + cfg = ioremap(res_cfg->start, resource_size(res_cfg)); + + if (!cfg) { + dev_err(&gmu->pdev->dev, "Failed to map PDC CFG\n"); + return -ENODEV; + } + + /* Setup GPU PDC */ + _regwrite(cfg, GEN7_PDC_GPU_SEQ_START_ADDR, 0); + _regwrite(cfg, GEN7_PDC_GPU_ENABLE_PDC, 0x80000001); + + iounmap(cfg); + + return 0; +} + +/* Configure and enable GMU low power mode */ +static void gen7_gmu_power_config(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Disable GMU WB/RB buffer and caches at boot */ + gmu_core_regwrite(device, GEN7_GMU_SYS_BUS_CONFIG, 0x1); + gmu_core_regwrite(device, GEN7_GMU_ICACHE_CONFIG, 0x1); + gmu_core_regwrite(device, GEN7_GMU_DCACHE_CONFIG, 0x1); +} + +static void gmu_ao_sync_event(struct adreno_device *adreno_dev) +{ + unsigned long flags; + u64 ticks; + + /* + * Get the GMU always on ticks and log it in a trace message. This + * will be used to map GMU ticks to ftrace time. Do this in atomic + * context to ensure nothing happens between reading the always + * on ticks and doing the trace. + */ + + local_irq_save(flags); + + ticks = gen7_read_alwayson(adreno_dev); + + trace_gmu_ao_sync(ticks); + + local_irq_restore(flags); +} + +int gen7_gmu_device_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + gmu_ao_sync_event(adreno_dev); + + /* Bring GMU out of reset */ + gmu_core_regwrite(device, GEN7_GMU_CM3_SYSRESET, 0); + + /* Make sure the write is posted before moving ahead */ + wmb(); + + if (gmu_core_timed_poll_check(device, GEN7_GMU_CM3_FW_INIT_RESULT, + BIT(8), 100, GENMASK(8, 0))) { + dev_err(&gmu->pdev->dev, "GMU failed to come out of reset\n"); + gmu_core_fault_snapshot(device); + return -ETIMEDOUT; + } + + return 0; +} + +/* + * gen7_gmu_hfi_start() - Write registers and start HFI. + * @device: Pointer to KGSL device + */ +int gen7_gmu_hfi_start(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gmu_core_regwrite(device, GEN7_GMU_HFI_CTRL_INIT, 1); + + if (gmu_core_timed_poll_check(device, GEN7_GMU_HFI_CTRL_STATUS, + BIT(0), 100, BIT(0))) { + dev_err(&gmu->pdev->dev, "GMU HFI init failed\n"); + gmu_core_fault_snapshot(device); + return -ETIMEDOUT; + } + + return 0; +} + +int gen7_rscc_wakeup_sequence(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct device *dev = &gmu->pdev->dev; + + /* Skip wakeup sequence if we didn't do the sleep sequence */ + if (!test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags)) + return 0; + + /* RSC wake sequence */ + gmu_core_regwrite(device, GEN7_GMU_RSCC_CONTROL_REQ, BIT(1)); + + /* Write request before polling */ + wmb(); + + if (gmu_core_timed_poll_check(device, GEN7_GMU_RSCC_CONTROL_ACK, + BIT(1), 100, BIT(1))) { + dev_err(dev, "Failed to do GPU RSC power on\n"); + return -ETIMEDOUT; + } + + if (gen7_timed_poll_check_rscc(gmu, GEN7_RSCC_SEQ_BUSY_DRV0, + 0x0, 100, UINT_MAX)) { + dev_err(dev, "GPU RSC sequence stuck in waking up GPU\n"); + return -ETIMEDOUT; + } + + gmu_core_regwrite(device, GEN7_GMU_RSCC_CONTROL_REQ, 0); + + clear_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags); + + return 0; +} + +int gen7_rscc_sleep_sequence(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + if (!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + return 0; + + if (test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags)) + return 0; + + gmu_core_regwrite(device, GEN7_GMU_CM3_SYSRESET, 1); + /* Make sure M3 is in reset before going on */ + wmb(); + + gmu_core_regread(device, GEN7_GMU_GENERAL_9, &gmu->log_wptr_retention); + + gmu_core_regwrite(device, GEN7_GMU_RSCC_CONTROL_REQ, BIT(0)); + /* Make sure the request completes before continuing */ + wmb(); + + ret = gen7_timed_poll_check_rscc(gmu, GEN7_GPU_RSCC_RSC_STATUS0_DRV0, + BIT(16), 100, BIT(16)); + if (ret) { + dev_err(&gmu->pdev->dev, "GPU RSC power off fail\n"); + return -ETIMEDOUT; + } + + gmu_core_regwrite(device, GEN7_GMU_RSCC_CONTROL_REQ, 0); + + if (adreno_dev->lm_enabled) + gmu_core_regwrite(device, GEN7_GMU_AO_SPARE_CNTL, 0); + + set_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags); + + return 0; +} + +static struct kgsl_memdesc *find_gmu_memdesc(struct gen7_gmu_device *gmu, + u32 addr, u32 size) +{ + int i; + + for (i = 0; i < gmu->global_entries; i++) { + struct kgsl_memdesc *md = &gmu->gmu_globals[i]; + + if ((addr >= md->gmuaddr) && + (((addr + size) <= (md->gmuaddr + md->size)))) + return md; + } + + return NULL; +} + +static int find_vma_block(struct gen7_gmu_device *gmu, u32 addr, u32 size) +{ + int i; + + for (i = 0; i < GMU_MEM_TYPE_MAX; i++) { + struct gmu_vma_entry *vma = &gmu->vma[i]; + + if ((addr >= vma->start) && + ((addr + size) <= (vma->start + vma->size))) + return i; + } + + return -ENOENT; +} + +static void load_tcm(struct adreno_device *adreno_dev, const u8 *src, + u32 tcm_start, u32 base, const struct gmu_block_header *blk) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 tcm_offset = tcm_start + ((blk->addr - base)/sizeof(u32)); + + kgsl_regmap_bulk_write(&device->regmap, tcm_offset, src, + blk->size >> 2); +} + +int gen7_gmu_load_fw(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + const u8 *fw = (const u8 *)gmu->fw_image->data; + + while (fw < gmu->fw_image->data + gmu->fw_image->size) { + const struct gmu_block_header *blk = + (const struct gmu_block_header *)fw; + int id; + + fw += sizeof(*blk); + + /* Don't deal with zero size blocks */ + if (blk->size == 0) + continue; + + id = find_vma_block(gmu, blk->addr, blk->size); + + if (id < 0) { + dev_err(&gmu->pdev->dev, + "Unknown block in GMU FW addr:0x%x size:0x%x\n", + blk->addr, blk->size); + return -EINVAL; + } + + if (id == GMU_ITCM) { + load_tcm(adreno_dev, fw, + GEN7_GMU_CM3_ITCM_START, + gmu->vma[GMU_ITCM].start, blk); + } else if (id == GMU_DTCM) { + load_tcm(adreno_dev, fw, + GEN7_GMU_CM3_DTCM_START, + gmu->vma[GMU_DTCM].start, blk); + } else { + struct kgsl_memdesc *md = + find_gmu_memdesc(gmu, blk->addr, blk->size); + + if (!md) { + dev_err(&gmu->pdev->dev, + "No backing memory for GMU FW block addr:0x%x size:0x%x\n", + blk->addr, blk->size); + return -EINVAL; + } + + memcpy(md->hostptr + (blk->addr - md->gmuaddr), fw, + blk->size); + } + + fw += blk->size; + } + + /* Proceed only after the FW is written */ + wmb(); + return 0; +} + +static const char *oob_to_str(enum oob_request req) +{ + switch (req) { + case oob_gpu: + return "oob_gpu"; + case oob_perfcntr: + return "oob_perfcntr"; + case oob_boot_slumber: + return "oob_boot_slumber"; + case oob_dcvs: + return "oob_dcvs"; + default: + return "unknown"; + } +} + +static void trigger_reset_recovery(struct adreno_device *adreno_dev, + enum oob_request req) +{ + /* + * Trigger recovery for perfcounter oob only since only + * perfcounter oob can happen alongside an actively rendering gpu. + */ + if (req != oob_perfcntr) + return; + + if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->fault) + adreno_dev->dispatch_ops->fault(adreno_dev, + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); +} + +int gen7_gmu_oob_set(struct kgsl_device *device, + enum oob_request req) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + int set, check; + + if (req >= oob_boot_slumber) { + dev_err(&gmu->pdev->dev, + "Unsupported OOB request %s\n", + oob_to_str(req)); + return -EINVAL; + } + + set = BIT(30 - req * 2); + check = BIT(31 - req); + + gmu_core_regwrite(device, GEN7_GMU_HOST2GMU_INTR_SET, set); + + if (gmu_core_timed_poll_check(device, GEN7_GMU_GMU2HOST_INTR_INFO, check, + 100, check)) { + gmu_core_fault_snapshot(device); + ret = -ETIMEDOUT; + WARN(1, "OOB request %s timed out\n", oob_to_str(req)); + trigger_reset_recovery(adreno_dev, req); + } + + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, check); + + trace_kgsl_gmu_oob_set(set); + return ret; +} + +void gen7_gmu_oob_clear(struct kgsl_device *device, + enum oob_request req) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int clear = BIT(31 - req * 2); + + if (req >= oob_boot_slumber) { + dev_err(&gmu->pdev->dev, "Unsupported OOB clear %s\n", + oob_to_str(req)); + return; + } + + gmu_core_regwrite(device, GEN7_GMU_HOST2GMU_INTR_SET, clear); + trace_kgsl_gmu_oob_clear(clear); +} + +void gen7_gmu_irq_enable(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hfi *hfi = &gmu->hfi; + + /* Clear pending IRQs and Unmask needed IRQs */ + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, UINT_MAX); + gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_CLR, UINT_MAX); + + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_MASK, + (unsigned int)~HFI_IRQ_MASK); + gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, + (unsigned int)~GMU_AO_INT_MASK); + + /* Enable all IRQs on host */ + enable_irq(hfi->irq); + enable_irq(gmu->irq); +} + +void gen7_gmu_irq_disable(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hfi *hfi = &gmu->hfi; + + /* Disable all IRQs on host */ + disable_irq(gmu->irq); + disable_irq(hfi->irq); + + /* Mask all IRQs and clear pending IRQs */ + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_MASK, UINT_MAX); + gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, UINT_MAX); + + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, UINT_MAX); + gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_CLR, UINT_MAX); +} + +static int gen7_gmu_hfi_start_msg(struct adreno_device *adreno_dev) +{ + struct hfi_start_cmd req; + int ret; + + ret = CMD_MSG_HDR(req, H2F_MSG_START); + if (ret) + return ret; + + return gen7_hfi_send_generic_req(adreno_dev, &req); +} + +static int gen7_complete_rpmh_votes(struct gen7_gmu_device *gmu) +{ + int ret = 0; + + ret |= gen7_timed_poll_check_rscc(gmu, GEN7_RSCC_TCS0_DRV0_STATUS, + BIT(0), 1, BIT(0)); + ret |= gen7_timed_poll_check_rscc(gmu, GEN7_RSCC_TCS1_DRV0_STATUS, + BIT(0), 1, BIT(0)); + ret |= gen7_timed_poll_check_rscc(gmu, GEN7_RSCC_TCS2_DRV0_STATUS, + BIT(0), 1, BIT(0)); + ret |= gen7_timed_poll_check_rscc(gmu, GEN7_RSCC_TCS3_DRV0_STATUS, + BIT(0), 1, BIT(0)); + + return ret; +} + +#define GX_GDSC_POWER_OFF BIT(0) +#define GX_CLK_OFF BIT(1) +#define is_on(val) (!(val & (GX_GDSC_POWER_OFF | GX_CLK_OFF))) + +bool gen7_gmu_gx_is_on(struct kgsl_device *device) +{ + unsigned int val; + + gmu_core_regread(device, GEN7_GMU_GFX_PWR_CLK_STATUS, &val); + return is_on(val); +} + +static const char *idle_level_name(int level) +{ + if (level == GPU_HW_ACTIVE) + return "GPU_HW_ACTIVE"; + else if (level == GPU_HW_IFPC) + return "GPU_HW_IFPC"; + + return "(Unknown)"; +} + +int gen7_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + unsigned int reg, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8; + unsigned long t; + u64 ts1, ts2; + + ts1 = gen7_read_alwayson(adreno_dev); + + t = jiffies + msecs_to_jiffies(100); + do { + gmu_core_regread(device, + GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE, ®); + gmu_core_regread(device, GEN7_GMU_GFX_PWR_CLK_STATUS, ®1); + + /* + * Check that we are at lowest level. If lowest level is IFPC + * double check that GFX clock is off. + */ + if (gmu->idle_level == reg) + if (!(gmu->idle_level == GPU_HW_IFPC && is_on(reg1))) + return 0; + + /* Wait 100us to reduce unnecessary AHB bus traffic */ + usleep_range(10, 100); + } while (!time_after(jiffies, t)); + + /* Check one last time */ + gmu_core_regread(device, GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE, ®); + gmu_core_regread(device, GEN7_GMU_GFX_PWR_CLK_STATUS, ®1); + + /* + * Check that we are at lowest level. If lowest level is IFPC + * double check that GFX clock is off. + */ + if (gmu->idle_level == reg) + if (!(gmu->idle_level == GPU_HW_IFPC && is_on(reg1))) + return 0; + + ts2 = gen7_read_alwayson(adreno_dev); + + /* Collect abort data to help with debugging */ + gmu_core_regread(device, GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS, ®2); + gmu_core_regread(device, GEN7_GMU_RBBM_INT_UNMASKED_STATUS, ®3); + gmu_core_regread(device, GEN7_GMU_GMU_PWR_COL_KEEPALIVE, ®4); + gmu_core_regread(device, GEN7_GMU_AO_SPARE_CNTL, ®5); + + dev_err(&gmu->pdev->dev, + "----------------------[ GMU error ]----------------------\n"); + dev_err(&gmu->pdev->dev, + "Timeout waiting for lowest idle level %s\n", + idle_level_name(gmu->idle_level)); + dev_err(&gmu->pdev->dev, "Start: %llx (absolute ticks)\n", ts1); + dev_err(&gmu->pdev->dev, "Poll: %llx (ticks relative to start)\n", + ts2-ts1); + dev_err(&gmu->pdev->dev, + "RPMH_POWER_STATE=%x GFX_PWR_CLK_STATUS=%x\n", reg, reg1); + dev_err(&gmu->pdev->dev, "CX_BUSY_STATUS=%x\n", reg2); + dev_err(&gmu->pdev->dev, + "RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n", + reg3, reg4); + dev_err(&gmu->pdev->dev, "GEN7_GMU_AO_SPARE_CNTL=%x\n", reg5); + + /* Access GX registers only when GX is ON */ + if (is_on(reg1)) { + kgsl_regread(device, GEN7_CP_STATUS_1, ®6); + kgsl_regread(device, GEN7_CP_CP2GMU_STATUS, ®7); + kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, ®8); + + dev_err(&gmu->pdev->dev, "GEN7_CP_STATUS_1=%x\n", reg6); + dev_err(&gmu->pdev->dev, + "CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n", + reg7, reg8); + } + + WARN_ON(1); + gmu_core_fault_snapshot(device); + return -ETIMEDOUT; +} + +/* Bitmask for GPU idle status check */ +#define CXGXCPUBUSYIGNAHB BIT(30) +int gen7_gmu_wait_for_idle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 status2; + u64 ts1; + + ts1 = gen7_read_alwayson(adreno_dev); + if (gmu_core_timed_poll_check(device, GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS, + 0, 100, CXGXCPUBUSYIGNAHB)) { + gmu_core_regread(device, + GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS2, &status2); + dev_err(&gmu->pdev->dev, + "GMU not idling: status2=0x%x %llx %llx\n", + status2, ts1, + gen7_read_alwayson(ADRENO_DEVICE(device))); + gmu_core_fault_snapshot(device); + return -ETIMEDOUT; + } + + return 0; +} + +void gen7_gmu_version_info(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + /* GMU version info is at a fixed offset in the DTCM */ + gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xff8, + &gmu->ver.core); + gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xff9, + &gmu->ver.core_dev); + gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xffa, + &gmu->ver.pwr); + gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xffb, + &gmu->ver.pwr_dev); + gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xffc, + &gmu->ver.hfi); +} + +int gen7_gmu_itcm_shadow(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 i, *dest; + + if (gmu->itcm_shadow) + return 0; + + gmu->itcm_shadow = vzalloc(gmu->vma[GMU_ITCM].size); + if (!gmu->itcm_shadow) + return -ENOMEM; + + dest = (u32 *)gmu->itcm_shadow; + + for (i = 0; i < (gmu->vma[GMU_ITCM].size >> 2); i++) + gmu_core_regread(KGSL_DEVICE(adreno_dev), + GEN7_GMU_CM3_ITCM_START + i, dest++); + + return 0; +} + +void gen7_gmu_register_config(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 val; + + /* Clear any previously set cm3 fault */ + atomic_set(&gmu->cm3_fault, 0); + + /* Vote veto for FAL10 */ + gmu_core_regwrite(device, GEN7_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1); + gmu_core_regwrite(device, GEN7_GPU_GMU_CX_GMU_CX_FAL_INTF, 0x1); + + /* Turn on TCM retention */ + adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_TCM_RET_CNTL, 1); + + /* Clear init result to make sure we are getting fresh value */ + gmu_core_regwrite(device, GEN7_GMU_CM3_FW_INIT_RESULT, 0); + gmu_core_regwrite(device, GEN7_GMU_CM3_BOOT_CONFIG, 0x2); + + gmu_core_regwrite(device, GEN7_GMU_HFI_QTBL_ADDR, + gmu->hfi.hfi_mem->gmuaddr); + gmu_core_regwrite(device, GEN7_GMU_HFI_QTBL_INFO, 1); + + gmu_core_regwrite(device, GEN7_GMU_AHB_FENCE_RANGE_0, BIT(31) | + FIELD_PREP(GENMASK(30, 18), 0x32) | + FIELD_PREP(GENMASK(17, 0), 0x8a0)); + + /* + * Make sure that CM3 state is at reset value. Snapshot is changing + * NMI bit and if we boot up GMU with NMI bit set GMU will boot + * straight in to NMI handler without executing __main code + */ + gmu_core_regwrite(device, GEN7_GMU_CM3_CFG, 0x4052); + + /** + * We may have asserted gbif halt as part of reset sequence which may + * not get cleared if the gdsc was not reset. So clear it before + * attempting GMU boot. + */ + kgsl_regwrite(device, GEN7_GBIF_HALT, 0x0); + + /* Set the log wptr index */ + gmu_core_regwrite(device, GEN7_GMU_GENERAL_9, + gmu->log_wptr_retention); + + /* Pass chipid to GMU FW, must happen before starting GMU */ + gmu_core_regwrite(device, GEN7_GMU_GENERAL_10, + ADRENO_GMU_CHIPID(adreno_dev->chipid)); + + /* Log size is encoded in (number of 4K units - 1) */ + val = (gmu->gmu_log->gmuaddr & GENMASK(31, 12)) | + ((GMU_LOG_SIZE/SZ_4K - 1) & GENMASK(7, 0)); + gmu_core_regwrite(device, GEN7_GMU_GENERAL_8, val); + + /* Configure power control and bring the GMU out of reset */ + gen7_gmu_power_config(adreno_dev); + + /* + * Enable BCL throttling - + * XOCLK1: countable: 0x13 (25% throttle) + * XOCLK2: countable: 0x17 (58% throttle) + * XOCLK3: countable: 0x19 (75% throttle) + * POWER_CONTROL_SELECT_0 controls counters 0 - 3, each selector + * is 8 bits wide. + */ + if (adreno_dev->bcl_enabled) + gmu_core_regrmw(device, GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0, + 0xffffff00, FIELD_PREP(GENMASK(31, 24), 0x19) | + FIELD_PREP(GENMASK(23, 16), 0x17) | + FIELD_PREP(GENMASK(15, 8), 0x13)); +} + +struct kgsl_memdesc *gen7_reserve_gmu_kernel_block(struct gen7_gmu_device *gmu, + u32 addr, u32 size, u32 vma_id) +{ + int ret; + struct kgsl_memdesc *md; + struct gmu_vma_entry *vma = &gmu->vma[vma_id]; + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); + + if (gmu->global_entries == ARRAY_SIZE(gmu->gmu_globals)) + return ERR_PTR(-ENOMEM); + + md = &gmu->gmu_globals[gmu->global_entries]; + + ret = kgsl_allocate_kernel(device, md, size, 0, KGSL_MEMDESC_SYSMEM); + if (ret) { + memset(md, 0x0, sizeof(*md)); + return ERR_PTR(-ENOMEM); + } + + if (!addr) + addr = vma->next_va; + + ret = gmu_core_map_memdesc(gmu->domain, md, addr, + IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV); + if (ret) { + dev_err(&gmu->pdev->dev, + "Unable to map GMU kernel block: addr:0x%08x size:0x%x :%d\n", + addr, md->size, ret); + kgsl_sharedmem_free(md); + memset(md, 0, sizeof(*md)); + return ERR_PTR(-ENOMEM); + } + + md->gmuaddr = addr; + + vma->next_va = md->gmuaddr + md->size; + + gmu->global_entries++; + + return md; +} + +static int gen7_gmu_process_prealloc(struct gen7_gmu_device *gmu, + struct gmu_block_header *blk) +{ + struct kgsl_memdesc *md; + + int id = find_vma_block(gmu, blk->addr, blk->value); + + if (id < 0) { + dev_err(&gmu->pdev->dev, + "Invalid prealloc block addr: 0x%x value:%d\n", + blk->addr, blk->value); + return id; + } + + /* Nothing to do for TCM blocks or user uncached */ + if (id == GMU_ITCM || id == GMU_DTCM || id == GMU_NONCACHED_USER) + return 0; + + /* Check if the block is already allocated */ + md = find_gmu_memdesc(gmu, blk->addr, blk->value); + if (md != NULL) + return 0; + + md = gen7_reserve_gmu_kernel_block(gmu, blk->addr, blk->value, id); + + return PTR_ERR_OR_ZERO(md); +} + +int gen7_gmu_parse_fw(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev); + struct gmu_block_header *blk; + int ret, offset = 0; + const char *gmufw_name = gen7_core->gmufw_name; + + /* GMU fw already saved and verified so do nothing new */ + if (gmu->fw_image) + return 0; + + if (gen7_core->gmufw_name == NULL) + return -EINVAL; + + ret = request_firmware(&gmu->fw_image, gmufw_name, &gmu->pdev->dev); + if (ret) { + if (gen7_core->gmufw_bak_name) { + gmufw_name = gen7_core->gmufw_bak_name; + ret = request_firmware(&gmu->fw_image, gmufw_name, + &gmu->pdev->dev); + } + if (ret) { + dev_err(&gmu->pdev->dev, + "request_firmware (%s) failed: %d\n", + gmufw_name, ret); + + return ret; + } + } + + /* + * Zero payload fw blocks contain meta data and are + * guaranteed to precede fw load data. Parse the + * meta data blocks. + */ + while (offset < gmu->fw_image->size) { + blk = (struct gmu_block_header *)&gmu->fw_image->data[offset]; + + if (offset + sizeof(*blk) > gmu->fw_image->size) { + dev_err(&gmu->pdev->dev, "Invalid FW Block\n"); + return -EINVAL; + } + + /* Done with zero length blocks so return */ + if (blk->size) + break; + + offset += sizeof(*blk); + + if (blk->type == GMU_BLK_TYPE_PREALLOC_REQ || + blk->type == GMU_BLK_TYPE_PREALLOC_PERSIST_REQ) { + ret = gen7_gmu_process_prealloc(gmu, blk); + + if (ret) + return ret; + } + } + + return 0; +} + +int gen7_gmu_memory_init(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + /* GMU master log */ + if (IS_ERR_OR_NULL(gmu->gmu_log)) + gmu->gmu_log = gen7_reserve_gmu_kernel_block(gmu, 0, + GMU_LOG_SIZE, GMU_NONCACHED_KERNEL); + + return PTR_ERR_OR_ZERO(gmu->gmu_log); +} + +static int gen7_gmu_init(struct adreno_device *adreno_dev) +{ + int ret; + + ret = gen7_gmu_parse_fw(adreno_dev); + if (ret) + return ret; + + ret = gen7_gmu_memory_init(adreno_dev); + if (ret) + return ret; + + return gen7_hfi_init(adreno_dev); +} + +static void _do_gbif_halt(struct kgsl_device *device, u32 reg, u32 ack_reg, + u32 mask, const char *client) +{ + u32 ack; + unsigned long t; + + kgsl_regwrite(device, reg, mask); + + t = jiffies + msecs_to_jiffies(100); + do { + kgsl_regread(device, ack_reg, &ack); + if ((ack & mask) == mask) + return; + + /* + * If we are attempting recovery in case of stall-on-fault + * then the halt sequence will not complete as long as SMMU + * is stalled. + */ + kgsl_mmu_pagefault_resume(&device->mmu, false); + + usleep_range(10, 100); + } while (!time_after(jiffies, t)); + + /* Check one last time */ + kgsl_mmu_pagefault_resume(&device->mmu, false); + + kgsl_regread(device, ack_reg, &ack); + if ((ack & mask) == mask) + return; + + dev_err(device->dev, "%s GBIF halt timed out\n", client); +} + +static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) +{ + int ret = 0; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Disconnect GPU from BUS is not needed if CX GDSC goes off later */ + + /* Check no outstanding RPMh voting */ + gen7_complete_rpmh_votes(gmu); + + /* Clear the WRITEDROPPED fields and set fence to allow mode */ + gmu_core_regwrite(device, GEN7_GMU_AHB_FENCE_STATUS_CLR, 0x7); + gmu_core_regwrite(device, GEN7_GMU_AO_AHB_FENCE_CTRL, 0); + + /* Make sure above writes are committed before we proceed to recovery */ + wmb(); + + gmu_core_regwrite(device, GEN7_GMU_CM3_SYSRESET, 1); + + /* Halt GX traffic */ + if (gen7_gmu_gx_is_on(device)) + _do_gbif_halt(device, GEN7_RBBM_GBIF_HALT, + GEN7_RBBM_GBIF_HALT_ACK, + GEN7_GBIF_GX_HALT_MASK, + "GX"); + + /* Halt CX traffic */ + _do_gbif_halt(device, GEN7_GBIF_HALT, GEN7_GBIF_HALT_ACK, + GEN7_GBIF_ARB_HALT_MASK, "CX"); + + if (gen7_gmu_gx_is_on(device)) + kgsl_regwrite(device, GEN7_RBBM_SW_RESET_CMD, 0x1); + + /* Allow the software reset to complete */ + udelay(100); + + /* + * This is based on the assumption that GMU is the only one controlling + * the GX HS. This code path is the only client voting for GX through + * the regulator interface. + */ + if (gmu->gx_gdsc) { + if (gen7_gmu_gx_is_on(device)) { + /* Switch gx gdsc control from GMU to CPU + * force non-zero reference count in clk driver + * so next disable call will turn + * off the GDSC + */ + ret = regulator_enable(gmu->gx_gdsc); + if (ret) + dev_err(&gmu->pdev->dev, + "suspend fail: gx enable %d\n", ret); + + ret = regulator_disable(gmu->gx_gdsc); + if (ret) + dev_err(&gmu->pdev->dev, + "suspend fail: gx disable %d\n", ret); + + if (gen7_gmu_gx_is_on(device)) + dev_err(&gmu->pdev->dev, + "gx is stuck on\n"); + } + } +} + +/* + * gen7_gmu_notify_slumber() - initiate request to GMU to prepare to slumber + * @device: Pointer to KGSL device + */ +static int gen7_gmu_notify_slumber(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int bus_level = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq; + int perf_idx = gmu->hfi.dcvs_table.gpu_level_num - + pwr->default_pwrlevel - 1; + struct hfi_prep_slumber_cmd req = { + .freq = perf_idx, + .bw = bus_level, + }; + int ret; + + /* Disable the power counter so that the GMU is not busy */ + gmu_core_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0); + + ret = CMD_MSG_HDR(req, H2F_MSG_PREPARE_SLUMBER); + if (ret) + return ret; + + ret = gen7_hfi_send_generic_req(adreno_dev, &req); + + /* Make sure the fence is in ALLOW mode */ + gmu_core_regwrite(device, GEN7_GMU_AO_AHB_FENCE_CTRL, 0); + return ret; +} + +void gen7_gmu_suspend(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gen7_gmu_irq_disable(adreno_dev); + + gen7_gmu_pwrctrl_suspend(adreno_dev); + + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + + gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + + dev_err(&gmu->pdev->dev, "Suspended GMU\n"); + + device->state = KGSL_STATE_NONE; +} + +static int gen7_gmu_dcvs_set(struct adreno_device *adreno_dev, + int gpu_pwrlevel, int bus_level) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct hfi_gx_bw_perf_vote_cmd req = { + .ack_type = DCVS_ACK_BLOCK, + .freq = INVALID_DCVS_IDX, + .bw = INVALID_DCVS_IDX, + }; + int ret = 0; + + if (!test_bit(GMU_PRIV_HFI_STARTED, &gmu->flags)) + return 0; + + /* Do not set to XO and lower GPU clock vote from GMU */ + if ((gpu_pwrlevel != INVALID_DCVS_IDX) && + (gpu_pwrlevel >= table->gpu_level_num - 1)) + return -EINVAL; + + if (gpu_pwrlevel < table->gpu_level_num - 1) + req.freq = table->gpu_level_num - gpu_pwrlevel - 1; + + if (bus_level < pwr->ddr_table_count && bus_level > 0) + req.bw = bus_level; + + /* GMU will vote for slumber levels through the sleep sequence */ + if ((req.freq == INVALID_DCVS_IDX) && + (req.bw == INVALID_DCVS_IDX)) { + return 0; + } + + ret = CMD_MSG_HDR(req, H2F_MSG_GX_BW_PERF_VOTE); + if (ret) + return ret; + + ret = gen7_hfi_send_generic_req(adreno_dev, &req); + if (ret) { + dev_err_ratelimited(&gmu->pdev->dev, + "Failed to set GPU perf idx %d, bw idx %d\n", + req.freq, req.bw); + + /* + * If this was a dcvs request along side an active gpu, request + * dispatcher based reset and recovery. + */ + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT | + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + } + + return ret; +} + +static int gen7_gmu_clock_set(struct adreno_device *adreno_dev, u32 pwrlevel) +{ + return gen7_gmu_dcvs_set(adreno_dev, pwrlevel, INVALID_DCVS_IDX); +} + +static int gen7_gmu_ifpc_store(struct kgsl_device *device, + unsigned int val) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + unsigned int requested_idle_level; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) + return -EINVAL; + + if (val) + requested_idle_level = GPU_HW_IFPC; + else + requested_idle_level = GPU_HW_ACTIVE; + + if (gmu->idle_level == requested_idle_level) + return 0; + + /* Power down the GPU before changing the idle level */ + return adreno_power_cycle_u32(adreno_dev, &gmu->idle_level, + requested_idle_level); +} + +static unsigned int gen7_gmu_ifpc_show(struct kgsl_device *device) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(ADRENO_DEVICE(device)); + + return gmu->idle_level == GPU_HW_IFPC; +} + +/* Send an NMI to the GMU */ +void gen7_gmu_send_nmi(struct adreno_device *adreno_dev, bool force) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 result; + + /* + * Do not send NMI if the SMMU is stalled because GMU will not be able + * to save cm3 state to DDR. + */ + if (gen7_gmu_gx_is_on(device) && gen7_is_smmu_stalled(device)) { + dev_err(&gmu->pdev->dev, + "Skipping NMI because SMMU is stalled\n"); + return; + } + + if (force) + goto nmi; + + /* + * We should not send NMI if there was a CM3 fault reported because we + * don't want to overwrite the critical CM3 state captured by gmu before + * it sent the CM3 fault interrupt. Also don't send NMI if GMU reset is + * already active. We could have hit a GMU assert and NMI might have + * already been triggered. + */ + + /* make sure we're reading the latest cm3_fault */ + smp_rmb(); + + if (atomic_read(&gmu->cm3_fault)) + return; + + gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &result); + + if (result & 0xE00) + return; + +nmi: + /* Mask so there's no interrupt caused by NMI */ + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_MASK, UINT_MAX); + + /* Make sure the interrupt is masked before causing it */ + wmb(); + + /* This will cause the GMU to save it's internal state to ddr */ + gmu_core_regrmw(device, GEN7_GMU_CM3_CFG, BIT(9), BIT(9)); + + /* Make sure the NMI is invoked before we proceed*/ + wmb(); + + /* Wait for the NMI to be handled */ + udelay(200); +} + +static void gen7_gmu_cooperative_reset(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + unsigned int result; + + gmu_core_regwrite(device, GEN7_GMU_CX_GMU_WDOG_CTRL, 0); + gmu_core_regwrite(device, GEN7_GMU_HOST2GMU_INTR_SET, BIT(17)); + + /* + * After triggering graceful death wait for snapshot ready + * indication from GMU. + */ + if (!gmu_core_timed_poll_check(device, GEN7_GMU_CM3_FW_INIT_RESULT, + 0x800, 2, 0x800)) + return; + + gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &result); + dev_err(&gmu->pdev->dev, + "GMU cooperative reset timed out 0x%x\n", result); + /* + * If we dont get a snapshot ready from GMU, trigger NMI + * and if we still timeout then we just continue with reset. + */ + gen7_gmu_send_nmi(adreno_dev, true); + + gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &result); + if ((result & 0x800) != 0x800) + dev_err(&gmu->pdev->dev, + "GMU cooperative reset NMI timed out 0x%x\n", result); +} + +static int gen7_gmu_wait_for_active_transition(struct kgsl_device *device) +{ + unsigned int reg; + struct gen7_gmu_device *gmu = to_gen7_gmu(ADRENO_DEVICE(device)); + + if (gmu_core_timed_poll_check(device, GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE, + GPU_HW_ACTIVE, 100, GENMASK(3, 0))) { + gmu_core_regread(device, GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE, ®); + dev_err(&gmu->pdev->dev, + "GMU failed to move to ACTIVE state, Current state: 0x%x\n", + reg); + + return -ETIMEDOUT; + } + + return 0; +} + +static bool gen7_gmu_scales_bandwidth(struct kgsl_device *device) +{ + return true; +} + +void gen7_gmu_handle_watchdog(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 mask; + + /* Temporarily mask the watchdog interrupt to prevent a storm */ + gmu_core_regread(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, &mask); + gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, + (mask | GMU_INT_WDOG_BITE)); + + gen7_gmu_send_nmi(adreno_dev, false); + + dev_err_ratelimited(&gmu->pdev->dev, + "GMU watchdog expired interrupt received\n"); +} + +static irqreturn_t gen7_gmu_irq_handler(int irq, void *data) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + const struct gen7_gpudev *gen7_gpudev = + to_gen7_gpudev(ADRENO_GPU_DEVICE(adreno_dev)); + unsigned int status = 0; + + gmu_core_regread(device, GEN7_GMU_AO_HOST_INTERRUPT_STATUS, &status); + gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_CLR, status); + + if (status & GMU_INT_HOST_AHB_BUS_ERR) + dev_err_ratelimited(&gmu->pdev->dev, + "AHB bus error interrupt received\n"); + + if (status & GMU_INT_WDOG_BITE) + gen7_gpudev->handle_watchdog(adreno_dev); + + if (status & GMU_INT_FENCE_ERR) { + unsigned int fence_status; + + gmu_core_regread(device, GEN7_GMU_AHB_FENCE_STATUS, + &fence_status); + dev_err_ratelimited(&gmu->pdev->dev, + "FENCE error interrupt received %x\n", fence_status); + } + + if (status & ~GMU_AO_INT_MASK) + dev_err_ratelimited(&gmu->pdev->dev, + "Unhandled GMU interrupts 0x%lx\n", + status & ~GMU_AO_INT_MASK); + + return IRQ_HANDLED; +} + +void gen7_gmu_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Send nmi only if it was a gmu fault */ + if (device->gmu_fault) + gen7_gmu_send_nmi(adreno_dev, false); + + gen7_gmu_device_snapshot(device, snapshot); + + gen7_snapshot(adreno_dev, snapshot); + + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, UINT_MAX); + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_MASK, HFI_IRQ_MASK); +} + +void gen7_gmu_aop_send_acd_state(struct gen7_gmu_device *gmu, bool flag) +{ + struct qmp_pkt msg; + char msg_buf[36]; + u32 size; + int ret; + + if (IS_ERR_OR_NULL(gmu->mailbox.channel)) + return; + + size = scnprintf(msg_buf, sizeof(msg_buf), + "{class: gpu, res: acd, val: %d}", flag); + + /* mailbox controller expects 4-byte aligned buffer */ + msg.size = ALIGN((size + 1), SZ_4); + msg.data = msg_buf; + + ret = mbox_send_message(gmu->mailbox.channel, &msg); + + if (ret < 0) + dev_err(&gmu->pdev->dev, + "AOP mbox send message failed: %d\n", ret); +} + +int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + ret = regulator_enable(gmu->cx_gdsc); + if (ret) + dev_err(&gmu->pdev->dev, + "Failed to enable GMU CX gdsc, error %d\n", ret); + + return ret; +} + +int gen7_gmu_enable_clks(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", + GMU_FREQ_MIN); + if (ret) { + dev_err(&gmu->pdev->dev, "Unable to set the GMU clock\n"); + return ret; + } + + ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "hub_clk", + 150000000); + if (ret && ret != -ENODEV) { + dev_err(&gmu->pdev->dev, "Unable to set the HUB clock\n"); + return ret; + } + + ret = clk_bulk_prepare_enable(gmu->num_clks, gmu->clks); + if (ret) { + dev_err(&gmu->pdev->dev, "Cannot enable GMU clocks\n"); + return ret; + } + + device->state = KGSL_STATE_AWARE; + + return 0; +} + +static int gen7_gmu_first_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int level, ret; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_AWARE); + + gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); + + ret = gen7_gmu_enable_gdsc(adreno_dev); + if (ret) + return ret; + + ret = gen7_gmu_enable_clks(adreno_dev); + if (ret) + goto gdsc_off; + + ret = gen7_gmu_load_fw(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen7_gmu_version_info(adreno_dev); + + ret = gen7_gmu_itcm_shadow(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen7_gmu_register_config(adreno_dev); + + gen7_gmu_irq_enable(adreno_dev); + + /* Vote for minimal DDR BW for GMU to init */ + level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min; + icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); + + ret = gen7_gmu_device_start(adreno_dev); + if (ret) + goto err; + + if (!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) { + ret = gen7_load_pdc_ucode(adreno_dev); + if (ret) + goto err; + + gen7_load_rsc_ucode(adreno_dev); + set_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags); + } + + ret = gen7_gmu_hfi_start(adreno_dev); + if (ret) + goto err; + + ret = gen7_hfi_start(adreno_dev); + if (ret) + goto err; + + icc_set_bw(pwr->icc_path, 0, 0); + + device->gmu_fault = false; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL)) + adreno_dev->bcl_enabled = true; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_AWARE); + + return 0; + +err: + if (device->gmu_fault) { + gen7_gmu_suspend(adreno_dev); + return ret; + } + + gen7_gmu_irq_disable(adreno_dev); + +clks_gdsc_off: + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + +gdsc_off: + /* Poll to make sure that the CX is off */ + gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + + return ret; +} + +static int gen7_gmu_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_AWARE); + + ret = gen7_gmu_enable_gdsc(adreno_dev); + if (ret) + return ret; + + ret = gen7_gmu_enable_clks(adreno_dev); + if (ret) + goto gdsc_off; + + ret = gen7_rscc_wakeup_sequence(adreno_dev); + if (ret) + goto clks_gdsc_off; + + ret = gen7_gmu_load_fw(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen7_gmu_register_config(adreno_dev); + + gen7_gmu_irq_enable(adreno_dev); + + ret = gen7_gmu_device_start(adreno_dev); + if (ret) + goto err; + + ret = gen7_gmu_hfi_start(adreno_dev); + if (ret) + goto err; + + ret = gen7_hfi_start(adreno_dev); + if (ret) + goto err; + + device->gmu_fault = false; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_AWARE); + + return 0; + +err: + if (device->gmu_fault) { + gen7_gmu_suspend(adreno_dev); + return ret; + } + + gen7_gmu_irq_disable(adreno_dev); + +clks_gdsc_off: + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + +gdsc_off: + /* Poll to make sure that the CX is off */ + gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + + return ret; +} + +static void set_acd(struct adreno_device *adreno_dev, void *priv) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + adreno_dev->acd_enabled = *((bool *)priv); + gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); +} + +static int gen7_gmu_acd_set(struct kgsl_device *device, bool val) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + if (IS_ERR_OR_NULL(gmu->mailbox.channel)) + return -EINVAL; + + /* Don't do any unneeded work if ACD is already in the correct state */ + if (adreno_dev->acd_enabled == val) + return 0; + + /* Power cycle the GPU for changes to take effect */ + return adreno_power_cycle(adreno_dev, set_acd, &val); +} + +static const struct gmu_dev_ops gen7_gmudev = { + .oob_set = gen7_gmu_oob_set, + .oob_clear = gen7_gmu_oob_clear, + .gx_is_on = gen7_gmu_gx_is_on, + .ifpc_store = gen7_gmu_ifpc_store, + .ifpc_show = gen7_gmu_ifpc_show, + .cooperative_reset = gen7_gmu_cooperative_reset, + .wait_for_active_transition = gen7_gmu_wait_for_active_transition, + .scales_bandwidth = gen7_gmu_scales_bandwidth, + .acd_set = gen7_gmu_acd_set, +}; + +static int gen7_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, + u32 ab) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret = 0; + + if (buslevel != pwr->cur_buslevel) { + ret = gen7_gmu_dcvs_set(adreno_dev, INVALID_DCVS_IDX, buslevel); + if (ret) + return ret; + + pwr->cur_buslevel = buslevel; + + trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel); + } + + if (ab != pwr->cur_ab) { + icc_set_bw(pwr->icc_path, MBps_to_icc(ab), 0); + pwr->cur_ab = ab; + } + + return ret; +} + +static void gen7_free_gmu_globals(struct gen7_gmu_device *gmu) +{ + int i; + + for (i = 0; i < gmu->global_entries; i++) { + struct kgsl_memdesc *md = &gmu->gmu_globals[i]; + + if (!md->gmuaddr) + continue; + + iommu_unmap(gmu->domain, + md->gmuaddr, md->size); + + dma_free_attrs(&gmu->pdev->dev, (size_t) md->size, + (void *)md->hostptr, md->physaddr, 0); + + memset(md, 0, sizeof(*md)); + } + + if (gmu->domain) { + iommu_detach_device(gmu->domain, &gmu->pdev->dev); + iommu_domain_free(gmu->domain); + gmu->domain = NULL; + } + + gmu->global_entries = 0; +} + +static int gen7_gmu_aop_mailbox_init(struct adreno_device *adreno_dev, + struct gen7_gmu_device *gmu) +{ + struct kgsl_mailbox *mailbox = &gmu->mailbox; + + mailbox->client.dev = &gmu->pdev->dev; + mailbox->client.tx_block = true; + mailbox->client.tx_tout = 1000; + mailbox->client.knows_txdone = false; + + mailbox->channel = mbox_request_channel(&mailbox->client, 0); + if (IS_ERR(mailbox->channel)) + return PTR_ERR(mailbox->channel); + + adreno_dev->acd_enabled = true; + return 0; +} + +static void gen7_gmu_acd_probe(struct kgsl_device *device, + struct gen7_gmu_device *gmu, struct device_node *node) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrlevel *pwrlevel = + &pwr->pwrlevels[pwr->num_pwrlevels - 1]; + struct hfi_acd_table_cmd *cmd = &gmu->hfi.acd_table; + int ret, i, cmd_idx = 0; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_ACD)) + return; + + cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ACD_TBL, sizeof(*cmd), HFI_MSG_CMD); + + cmd->version = 1; + cmd->stride = 1; + cmd->enable_by_level = 0; + + /* + * Iterate through each gpu power level and generate a mask for GMU + * firmware for ACD enabled levels and store the corresponding control + * register configurations to the acd_table structure. + */ + for (i = 0; i < pwr->num_pwrlevels; i++) { + if (pwrlevel->acd_level) { + cmd->enable_by_level |= (1 << (i + 1)); + cmd->data[cmd_idx++] = pwrlevel->acd_level; + } + pwrlevel--; + } + + if (!cmd->enable_by_level) + return; + + cmd->num_levels = cmd_idx; + + ret = gen7_gmu_aop_mailbox_init(adreno_dev, gmu); + if (ret) + dev_err(&gmu->pdev->dev, + "AOP mailbox init failed: %d\n", ret); +} + +static int gen7_gmu_reg_probe(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + ret = kgsl_regmap_add_region(&device->regmap, gmu->pdev, "gmu", NULL, NULL); + + if (ret) + dev_err(&gmu->pdev->dev, "Unable to map the GMU registers\n"); + /* + * gmu_ao_blk_dec1 and gmu_ao_blk_dec2 are contiguous and contained within the gmu region + * mapped above. gmu_ao_blk_dec0 is not within the gmu region and is mapped separately. + */ + kgsl_regmap_add_region(&device->regmap, gmu->pdev, "gmu_ao_blk_dec0", NULL, NULL); + + return ret; +} + +static int gen7_gmu_regulators_probe(struct gen7_gmu_device *gmu, + struct platform_device *pdev) +{ + gmu->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); + if (IS_ERR(gmu->cx_gdsc)) { + if (PTR_ERR(gmu->cx_gdsc) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Couldn't get the vddcx gdsc\n"); + return PTR_ERR(gmu->cx_gdsc); + } + + gmu->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); + if (IS_ERR(gmu->gx_gdsc)) { + if (PTR_ERR(gmu->gx_gdsc) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Couldn't get the vdd gdsc\n"); + return PTR_ERR(gmu->gx_gdsc); + } + + return 0; +} + +void gen7_gmu_remove(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + if (!IS_ERR_OR_NULL(gmu->mailbox.channel)) + mbox_free_channel(gmu->mailbox.channel); + + adreno_dev->acd_enabled = false; + + if (gmu->fw_image) + release_firmware(gmu->fw_image); + + gen7_free_gmu_globals(gmu); + + vfree(gmu->itcm_shadow); + kobject_put(&gmu->log_kobj); +} + +static int gen7_gmu_iommu_fault_handler(struct iommu_domain *domain, + struct device *dev, unsigned long addr, int flags, void *token) +{ + char *fault_type = "unknown"; + + if (flags & IOMMU_FAULT_TRANSLATION) + fault_type = "translation"; + else if (flags & IOMMU_FAULT_PERMISSION) + fault_type = "permission"; + else if (flags & IOMMU_FAULT_EXTERNAL) + fault_type = "external"; + else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) + fault_type = "transaction stalled"; + + dev_err(dev, "GMU fault addr = %lX, context=kernel (%s %s fault)\n", + addr, + (flags & IOMMU_FAULT_WRITE) ? "write" : "read", + fault_type); + + return 0; +} + +static int gen7_gmu_iommu_init(struct gen7_gmu_device *gmu) +{ + int ret; + int no_stall = 1; + + gmu->domain = iommu_domain_alloc(&platform_bus_type); + if (gmu->domain == NULL) { + dev_err(&gmu->pdev->dev, "Unable to allocate GMU IOMMU domain\n"); + return -ENODEV; + } + + /* + * Disable stall on fault for the GMU context bank. + * This sets SCTLR.CFCFG = 0. + * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. + */ + iommu_domain_set_attr(gmu->domain, + DOMAIN_ATTR_FAULT_MODEL_NO_STALL, &no_stall); + + ret = iommu_attach_device(gmu->domain, &gmu->pdev->dev); + if (!ret) { + iommu_set_fault_handler(gmu->domain, + gen7_gmu_iommu_fault_handler, gmu); + return 0; + } + + dev_err(&gmu->pdev->dev, + "Unable to attach GMU IOMMU domain: %d\n", ret); + iommu_domain_free(gmu->domain); + gmu->domain = NULL; + + return ret; +} + +int gen7_gmu_probe(struct kgsl_device *device, + struct platform_device *pdev) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct device *dev = &pdev->dev; + struct resource *res; + int ret, i; + + gmu->pdev = pdev; + + dma_set_coherent_mask(&gmu->pdev->dev, DMA_BIT_MASK(64)); + gmu->pdev->dev.dma_mask = &gmu->pdev->dev.coherent_dma_mask; + set_dma_ops(&gmu->pdev->dev, NULL); + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "rscc"); + if (res) { + gmu->rscc_virt = devm_ioremap(&device->pdev->dev, res->start, + resource_size(res)); + if (!gmu->rscc_virt) { + dev_err(&gmu->pdev->dev, "rscc ioremap failed\n"); + return -ENOMEM; + } + } + + /* Set up GMU regulators */ + ret = gen7_gmu_regulators_probe(gmu, pdev); + if (ret) + return ret; + + ret = devm_clk_bulk_get_all(&pdev->dev, &gmu->clks); + if (ret < 0) + return ret; + + /* + * Voting for apb_pclk will enable power and clocks required for + * QDSS path to function. However, if QCOM_KGSL_QDSS_STM is not enabled, + * QDSS is essentially unusable. Hence, if QDSS cannot be used, + * don't vote for this clock. + */ + if (!IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) { + for (i = 0; i < ret; i++) { + if (!strcmp(gmu->clks[i].id, "apb_pclk")) { + gmu->clks[i].clk = NULL; + break; + } + } + } + + gmu->num_clks = ret; + + /* Set up GMU IOMMU and shared memory with GMU */ + ret = gen7_gmu_iommu_init(gmu); + if (ret) + goto error; + + gmu->vma = gen7_gmu_vma; + + /* Map and reserve GMU CSRs registers */ + ret = gen7_gmu_reg_probe(adreno_dev); + if (ret) + goto error; + + /* Populates RPMh configurations */ + ret = gen7_build_rpmh_tables(adreno_dev); + if (ret) + goto error; + + /* Set up GMU idle state */ + if (ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) + gmu->idle_level = GPU_HW_IFPC; + else + gmu->idle_level = GPU_HW_ACTIVE; + + gen7_gmu_acd_probe(device, gmu, pdev->dev.of_node); + + set_bit(GMU_ENABLED, &device->gmu_core.flags); + + device->gmu_core.dev_ops = &gen7_gmudev; + + /* Set default GMU attributes */ + gmu->log_stream_enable = false; + gmu->log_group_mask = 0x3; + + /* GMU sysfs nodes setup */ + kobject_init_and_add(&gmu->log_kobj, &log_kobj_type, &dev->kobj, "log"); + + of_property_read_u32(gmu->pdev->dev.of_node, "qcom,gmu-perf-ddr-bw", + &gmu->perf_ddr_bw); + + gmu->irq = kgsl_request_irq(gmu->pdev, "gmu", + gen7_gmu_irq_handler, device); + + if (gmu->irq >= 0) + return 0; + + ret = gmu->irq; + +error: + gen7_gmu_remove(device); + return ret; +} + +static void gen7_gmu_active_count_put(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (WARN(atomic_read(&device->active_cnt) == 0, + "Unbalanced get/put calls to KGSL active count\n")) + return; + + if (atomic_dec_and_test(&device->active_cnt)) { + kgsl_pwrscale_update_stats(device); + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + } + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + wake_up(&device->active_cnt_wq); +} + +int gen7_halt_gbif(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* Halt new client requests */ + kgsl_regwrite(device, GEN7_GBIF_HALT, GEN7_GBIF_CLIENT_HALT_MASK); + ret = adreno_wait_for_halt_ack(device, + GEN7_GBIF_HALT_ACK, GEN7_GBIF_CLIENT_HALT_MASK); + + /* Halt all AXI requests */ + kgsl_regwrite(device, GEN7_GBIF_HALT, GEN7_GBIF_ARB_HALT_MASK); + ret = adreno_wait_for_halt_ack(device, + GEN7_GBIF_HALT_ACK, GEN7_GBIF_ARB_HALT_MASK); + + /* De-assert the halts */ + kgsl_regwrite(device, GEN7_GBIF_HALT, 0x0); + + return ret; +} + +static int gen7_gmu_power_off(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = 0; + + if (device->gmu_fault) + goto error; + + /* Wait for the lowest idle level we requested */ + ret = gen7_gmu_wait_for_lowest_idle(adreno_dev); + if (ret) + goto error; + + ret = gen7_gmu_notify_slumber(adreno_dev); + if (ret) + goto error; + + ret = gen7_gmu_wait_for_idle(adreno_dev); + if (ret) + goto error; + + ret = gen7_rscc_sleep_sequence(adreno_dev); + if (ret) + goto error; + + /* Now that we are done with GMU and GPU, Clear the GBIF */ + ret = gen7_halt_gbif(adreno_dev); + if (ret) + goto error; + + gen7_gmu_irq_disable(adreno_dev); + + gen7_hfi_stop(adreno_dev); + + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + + /* Poll to make sure that the CX is off */ + gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + + device->state = KGSL_STATE_NONE; + + return 0; + +error: + gen7_hfi_stop(adreno_dev); + gen7_gmu_suspend(adreno_dev); + + return ret; +} + +void gen7_enable_gpu_irq(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_pwrctrl_irq(device, true); + + adreno_irqctrl(adreno_dev, 1); +} + +void gen7_disable_gpu_irq(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_pwrctrl_irq(device, false); + + if (gen7_gmu_gx_is_on(device)) + adreno_irqctrl(adreno_dev, 0); +} + +static int gen7_gpu_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + + adreno_set_active_ctxs_null(adreno_dev); + + adreno_ringbuffer_set_global(adreno_dev, 0); + + ret = kgsl_mmu_start(device); + if (ret) + goto err; + + ret = gen7_gmu_oob_set(device, oob_gpu); + if (ret) + goto oob_clear; + + ret = gen7_gmu_hfi_start_msg(adreno_dev); + if (ret) + goto oob_clear; + + /* Clear the busy_data stats - we're starting over from scratch */ + memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data)); + + gen7_start(adreno_dev); + + /* Re-initialize the coresight registers if applicable */ + adreno_coresight_start(adreno_dev); + + adreno_perfcounter_start(adreno_dev); + + /* Clear FSR here in case it is set from a previous pagefault */ + kgsl_mmu_clear_fsr(&device->mmu); + + gen7_enable_gpu_irq(adreno_dev); + + ret = gen7_rb_start(adreno_dev); + if (ret) { + gen7_disable_gpu_irq(adreno_dev); + goto oob_clear; + } + + /* Start the dispatcher */ + adreno_dispatcher_start(device); + + device->reset_counter++; + + gen7_gmu_oob_clear(device, oob_gpu); + + return 0; + +oob_clear: + gen7_gmu_oob_clear(device, oob_gpu); + +err: + gen7_gmu_power_off(adreno_dev); + + return ret; +} + +static void gmu_idle_timer(struct timer_list *t) +{ + struct kgsl_device *device = container_of(t, struct kgsl_device, + idle_timer); + + kgsl_schedule_work(&device->idle_check_ws); +} + +static int gen7_boot(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + WARN_ON(test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)); + + trace_kgsl_pwr_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen7_gmu_boot(adreno_dev); + if (ret) + return ret; + + ret = gen7_gpu_boot(adreno_dev); + if (ret) + return ret; + + kgsl_start_idle_timer(device); + kgsl_pwrscale_wake(device); + + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + device->pwrctrl.last_stat_updated = ktime_get(); + device->state = KGSL_STATE_ACTIVE; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_ACTIVE); + + return ret; +} + +static int gen7_first_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + return gen7_boot(adreno_dev); + + ret = gen7_ringbuffer_init(adreno_dev); + if (ret) + return ret; + + ret = gen7_microcode_read(adreno_dev); + if (ret) + return ret; + + ret = gen7_init(adreno_dev); + if (ret) + return ret; + + ret = gen7_gmu_init(adreno_dev); + if (ret) + return ret; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen7_gmu_first_boot(adreno_dev); + if (ret) + return ret; + + ret = gen7_gpu_boot(adreno_dev); + if (ret) + return ret; + + adreno_get_bus_counters(adreno_dev); + + adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev, + ADRENO_COOP_RESET); + + adreno_create_profile_buffer(adreno_dev); + + set_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags); + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + /* + * There is a possible deadlock scenario during kgsl firmware reading + * (request_firmware) and devfreq update calls. During first boot, kgsl + * device mutex is held and then request_firmware is called for reading + * firmware. request_firmware internally takes dev_pm_qos_mtx lock. + * Whereas in case of devfreq update calls triggered by thermal/bcl or + * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then + * tries to take kgsl device mutex as part of get_dev_status/target + * calls. This results in deadlock when both thread are unable to acquire + * the mutex held by other thread. Enable devfreq updates now as we are + * done reading all firmware files. + */ + device->pwrscale.devfreq_enabled = true; + + device->pwrctrl.last_stat_updated = ktime_get(); + device->state = KGSL_STATE_ACTIVE; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_ACTIVE); + + return 0; +} + +static int gen7_power_off(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + WARN_ON(!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)); + + trace_kgsl_pwr_request_state(device, KGSL_STATE_SLUMBER); + + adreno_suspend_context(device); + + ret = gen7_gmu_oob_set(device, oob_gpu); + if (!ret) { + kgsl_pwrscale_update_stats(device); + + /* Save active coresight registers if applicable */ + adreno_coresight_stop(adreno_dev); + + adreno_irqctrl(adreno_dev, 0); + } + + gen7_gmu_oob_clear(device, oob_gpu); + + kgsl_pwrctrl_irq(device, false); + + gen7_gmu_power_off(adreno_dev); + + adreno_set_active_ctxs_null(adreno_dev); + + adreno_dispatcher_stop(adreno_dev); + + adreno_ringbuffer_stop(adreno_dev); + + if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpu_llc_slice); + + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + del_timer_sync(&device->idle_timer); + + kgsl_pwrscale_sleep(device); + + kgsl_pwrctrl_clear_l3_vote(device); + + /* + * Reset the context records so that CP can start + * at the correct read pointer for BV thread after + * coming out of slumber. + */ + gen7_reset_preempt_records(adreno_dev); + + trace_kgsl_pwr_set_state(device, KGSL_STATE_SLUMBER); + + return ret; +} + +static void gmu_idle_check(struct work_struct *work) +{ + struct kgsl_device *device = container_of(work, + struct kgsl_device, idle_check_ws); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + mutex_lock(&device->mutex); + + if (test_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags)) + goto done; + + if (!atomic_read(&device->active_cnt)) { + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + gen7_power_off(adreno_dev); + } else { + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + } + +done: + mutex_unlock(&device->mutex); +} + +static int gen7_gmu_first_open(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* + * Do the one time settings that need to happen when we + * attempt to boot the gpu the very first time + */ + ret = gen7_first_boot(adreno_dev); + if (ret) + return ret; + + /* + * A client that does a first_open but never closes the device + * may prevent us from going back to SLUMBER. So trigger the idle + * check by incrementing the active count and immediately releasing it. + */ + atomic_inc(&device->active_cnt); + gen7_gmu_active_count_put(adreno_dev); + + return 0; +} + +static int gen7_gmu_last_close(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return gen7_power_off(adreno_dev); + + return 0; +} + +static int gen7_gmu_active_count_get(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EINVAL; + + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags)) + return -EINVAL; + + if ((atomic_read(&device->active_cnt) == 0) && + !test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + ret = gen7_boot(adreno_dev); + + if (ret == 0) + atomic_inc(&device->active_cnt); + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + return ret; +} + +static int gen7_gmu_pm_suspend(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags)) + return 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_SUSPEND); + + /* Halt any new submissions */ + reinit_completion(&device->halt_gate); + + /* wait for active count so device can be put in slumber */ + ret = kgsl_active_count_wait(device, 0, HZ); + if (ret) { + dev_err(device->dev, + "Timed out waiting for the active count\n"); + goto err; + } + + ret = adreno_idle(device); + if (ret) + goto err; + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + gen7_power_off(adreno_dev); + + set_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags); + + adreno_get_gpu_halt(adreno_dev); + + trace_kgsl_pwr_set_state(device, KGSL_STATE_SUSPEND); + + return 0; +err: + adreno_dispatcher_start(device); + return ret; +} + +static void gen7_gmu_pm_resume(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + if (WARN(!test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags), + "resume invoked without a suspend\n")) + return; + + adreno_put_gpu_halt(adreno_dev); + + adreno_dispatcher_start(device); + + clear_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags); +} + +static void gen7_gmu_touch_wakeup(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + /* + * Do not wake up a suspended device or until the first boot sequence + * has been completed. + */ + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags) || + !test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + return; + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + goto done; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen7_gmu_boot(adreno_dev); + if (ret) + return; + + ret = gen7_gpu_boot(adreno_dev); + if (ret) + return; + + kgsl_pwrscale_wake(device); + + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + device->pwrctrl.last_stat_updated = ktime_get(); + device->state = KGSL_STATE_ACTIVE; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_ACTIVE); + +done: + /* + * When waking up from a touch event we want to stay active long enough + * for the user to send a draw command. The default idle timer timeout + * is shorter than we want so go ahead and push the idle timer out + * further for this special case + */ + mod_timer(&device->idle_timer, jiffies + + msecs_to_jiffies(adreno_wake_timeout)); +} + +const struct adreno_power_ops gen7_gmu_power_ops = { + .first_open = gen7_gmu_first_open, + .last_close = gen7_gmu_last_close, + .active_count_get = gen7_gmu_active_count_get, + .active_count_put = gen7_gmu_active_count_put, + .pm_suspend = gen7_gmu_pm_suspend, + .pm_resume = gen7_gmu_pm_resume, + .touch_wakeup = gen7_gmu_touch_wakeup, + .gpu_clock_set = gen7_gmu_clock_set, + .gpu_bus_set = gen7_gmu_bus_set, +}; + +int gen7_gmu_device_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore) +{ + struct adreno_device *adreno_dev; + struct kgsl_device *device; + struct gen7_device *gen7_dev; + int ret; + + gen7_dev = devm_kzalloc(&pdev->dev, sizeof(*gen7_dev), + GFP_KERNEL); + if (!gen7_dev) + return -ENOMEM; + + adreno_dev = &gen7_dev->adreno_dev; + + ret = gen7_probe_common(pdev, adreno_dev, chipid, gpucore); + if (ret) + return ret; + + ret = adreno_dispatcher_init(adreno_dev); + if (ret) + return ret; + + device = KGSL_DEVICE(adreno_dev); + + INIT_WORK(&device->idle_check_ws, gmu_idle_check); + + timer_setup(&device->idle_timer, gmu_idle_timer, 0); + + adreno_dev->irq_mask = GEN7_INT_MASK; + + return 0; +} + +int gen7_gmu_reset(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + gen7_hfi_stop(adreno_dev); + + gen7_disable_gpu_irq(adreno_dev); + + /* Hard reset the gmu and gpu */ + gen7_gmu_suspend(adreno_dev); + + gen7_reset_preempt_records(adreno_dev); + + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + /* Attempt to reboot the gmu and gpu */ + return gen7_boot(adreno_dev); +} + +int gen7_gmu_hfi_probe(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hfi *hfi = &gmu->hfi; + + hfi->irq = kgsl_request_irq(gmu->pdev, "hfi", + gen7_hfi_irq_handler, KGSL_DEVICE(adreno_dev)); + + return hfi->irq < 0 ? hfi->irq : 0; +} + +int gen7_gmu_add_to_minidump(struct adreno_device *adreno_dev) +{ + struct gen7_device *gen7_dev = container_of(adreno_dev, + struct gen7_device, adreno_dev); + int ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_GEN7_DEVICE, + (void *)(gen7_dev), sizeof(struct gen7_device)); + if (ret) + return ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_GMU_LOG_ENTRY, + gen7_dev->gmu.gmu_log->hostptr, gen7_dev->gmu.gmu_log->size); + if (ret) + return ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_HFIMEM_ENTRY, + gen7_dev->gmu.hfi.hfi_mem->hostptr, gen7_dev->gmu.hfi.hfi_mem->size); + + return ret; +} + +static int gen7_gmu_bind(struct device *dev, struct device *master, void *data) +{ + struct kgsl_device *device = dev_get_drvdata(master); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + const struct gen7_gpudev *gen7_gpudev = to_gen7_gpudev(gpudev); + int ret; + + ret = gen7_gmu_probe(device, to_platform_device(dev)); + if (ret) + return ret; + + if (gen7_gpudev->hfi_probe) { + ret = gen7_gpudev->hfi_probe(adreno_dev); + + if (ret) { + gen7_gmu_remove(device); + return ret; + } + } + + return 0; +} + +static void gen7_gmu_unbind(struct device *dev, struct device *master, + void *data) +{ + struct kgsl_device *device = dev_get_drvdata(master); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + const struct gen7_gpudev *gen7_gpudev = to_gen7_gpudev(gpudev); + + if (gen7_gpudev->hfi_remove) + gen7_gpudev->hfi_remove(adreno_dev); + + gen7_gmu_remove(device); +} + +static const struct component_ops gen7_gmu_component_ops = { + .bind = gen7_gmu_bind, + .unbind = gen7_gmu_unbind, +}; + +static int gen7_gmu_probe_dev(struct platform_device *pdev) +{ + return component_add(&pdev->dev, &gen7_gmu_component_ops); +} + +static int gen7_gmu_remove_dev(struct platform_device *pdev) +{ + component_del(&pdev->dev, &gen7_gmu_component_ops); + return 0; +} + +static const struct of_device_id gen7_gmu_match_table[] = { + { .compatible = "qcom,gen7-gmu" }, + { }, +}; + +struct platform_driver gen7_gmu_driver = { + .probe = gen7_gmu_probe_dev, + .remove = gen7_gmu_remove_dev, + .driver = { + .name = "adreno-gen7-gmu", + .of_match_table = gen7_gmu_match_table, + }, +}; diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h new file mode 100644 index 0000000000..0702793251 --- /dev/null +++ b/adreno_gen7_gmu.h @@ -0,0 +1,422 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_GEN7_GMU_H +#define __ADRENO_GEN7_GMU_H + +#include + +#include "adreno_gen7_hfi.h" +#include "kgsl_gmu_core.h" + +/** + * struct gen7_gmu_device - GMU device structure + * @ver: GMU Version information + * @irq: GMU interrupt number + * @fw_image: GMU FW image + * @hfi_mem: pointer to HFI shared memory + * @dump_mem: pointer to GMU debug dump memory + * @gmu_log: gmu event log memory + * @hfi: HFI controller + * @num_gpupwrlevels: number GPU frequencies in GPU freq table + * @num_bwlevel: number of GPU BW levels + * @num_cnocbwlevel: number CNOC BW levels + * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling + * @cx_gdsc: CX headswitch that controls power of GMU and + * subsystem peripherals + * @gx_gdsc: GX headswitch that controls power of GPU subsystem + * @clks: GPU subsystem clocks required for GMU functionality + * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different + * than default power level + * @idle_level: Minimal GPU idle power level + * @fault_count: GMU fault count + * @mailbox: Messages to AOP for ACD enable/disable go through this + * @log_wptr_retention: Store the log wptr offset on slumber + */ +struct gen7_gmu_device { + struct { + u32 core; + u32 core_dev; + u32 pwr; + u32 pwr_dev; + u32 hfi; + } ver; + struct platform_device *pdev; + int irq; + const struct firmware *fw_image; + struct kgsl_memdesc *dump_mem; + struct kgsl_memdesc *gmu_log; + struct gen7_hfi hfi; + /** @pwrlevels: Array of GMU power levels */ + struct regulator *cx_gdsc; + struct regulator *gx_gdsc; + struct clk_bulk_data *clks; + /** @num_clks: Number of entries in the @clks array */ + int num_clks; + unsigned int idle_level; + struct kgsl_mailbox mailbox; + /** @gmu_globals: Array to store gmu global buffers */ + struct kgsl_memdesc gmu_globals[GMU_KERNEL_ENTRIES]; + /** @global_entries: To keep track of number of gmu buffers */ + u32 global_entries; + struct gmu_vma_entry *vma; + unsigned int log_wptr_retention; + /** @cm3_fault: whether gmu received a cm3 fault interrupt */ + atomic_t cm3_fault; + /** + * @itcm_shadow: Copy of the itcm block in firmware binary used for + * snapshot + */ + void *itcm_shadow; + /** @flags: Internal gmu flags */ + unsigned long flags; + /** @rscc_virt: Pointer where RSCC block is mapped */ + void __iomem *rscc_virt; + /** @domain: IOMMU domain for the kernel context */ + struct iommu_domain *domain; + /** @log_stream_enable: GMU log streaming enable. Disabled by default */ + bool log_stream_enable; + /** @log_group_mask: Allows overriding default GMU log group mask */ + u32 log_group_mask; + struct kobject log_kobj; + /* + * @perf_ddr_bw: The lowest ddr bandwidth that puts CX at a corner at + * which GMU can run at 500 Mhz. + */ + u32 perf_ddr_bw; +}; + +/* Helper function to get to gen7 gmu device from adreno device */ +struct gen7_gmu_device *to_gen7_gmu(struct adreno_device *adreno_dev); + +/* Helper function to get to adreno device from gen7 gmu device */ +struct adreno_device *gen7_gmu_to_adreno(struct gen7_gmu_device *gmu); + +/** + * gen7_reserve_gmu_kernel_block() - Allocate a gmu buffer + * @gmu: Pointer to the gen7 gmu device + * @addr: Desired gmu virtual address + * @size: Size of the buffer in bytes + * @vma_id: Target gmu vma where this buffer should be mapped + * + * This function allocates a buffer and maps it in + * the desired gmu vma + * + * Return: Pointer to the memory descriptor or error pointer on failure + */ +struct kgsl_memdesc *gen7_reserve_gmu_kernel_block(struct gen7_gmu_device *gmu, + u32 addr, u32 size, u32 vma_id); + +/** + * gen7_build_rpmh_tables - Build the rpmh tables + * @adreno_dev: Pointer to the adreno device + * + * This function creates the gpu dcvs and bw tables + * + * Return: 0 on success and negative error on failure + */ +int gen7_build_rpmh_tables(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_gx_is_on - Check if GX is on + * @device: Pointer to KGSL device + * + * This function reads pwr status registers to check if GX + * is on or off + */ +bool gen7_gmu_gx_is_on(struct kgsl_device *device); + +/** + * gen7_gmu_device_snapshot - GEN7 GMU snapshot function + * @device: Device being snapshotted + * @snapshot: Pointer to the snapshot instance + * + * This is where all of the GEN7 GMU specific bits and pieces are grabbed + * into the snapshot memory + */ +void gen7_gmu_device_snapshot(struct kgsl_device *device, + struct kgsl_snapshot *snapshot); + +/** + * gen7_gmu_device_probe - GEN7 GMU snapshot function + * @pdev: Pointer to the platform device + * @chipid: Chipid of the target + * @gpucore: Pointer to the gpucore + * + * The target specific probe function for gmu based gen7 targets. + */ +int gen7_gmu_device_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore); + +/** + * gen7_gmu_reset - Reset and restart the gmu + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_reset(struct adreno_device *adreno_dev); + +/** + * gen7_enable_gpu_irq - Enable gpu interrupt + * @adreno_dev: Pointer to the adreno device + */ +void gen7_enable_gpu_irq(struct adreno_device *adreno_dev); + +/** + * gen7_disable_gpu_irq - Disable gpu interrupt + * @adreno_dev: Pointer to the adreno device + */ +void gen7_disable_gpu_irq(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_snapshot- Take snapshot for gmu targets + * @adreno_dev: Pointer to the adreno device + * @snapshot: Pointer to the snapshot structure + * + * Send an NMI to gmu if we hit a gmu fault. Then take gmu + * snapshot and carry on with rest of the gen7 snapshot + */ +void gen7_gmu_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + +/** + * gen7_gmu_probe - Probe gen7 gmu resources + * @device: Pointer to the kgsl device + * @pdev: Pointer to the gmu platform device + * + * Probe the gmu and hfi resources + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_probe(struct kgsl_device *device, + struct platform_device *pdev); + +/** + * gen7_gmu_parse_fw - Parse the gmu fw binary + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_parse_fw(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_memory_init - Allocate gmu memory + * @adreno_dev: Pointer to the adreno device + * + * Allocates the gmu log buffer and others if ndeeded. + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_memory_init(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_aop_send_acd_state - Enable or disable acd feature in aop + * @gmu: Pointer to the gen7 gmu device + * @flag: Boolean to enable or disable acd in aop + * + * This function enables or disables gpu acd feature using mailbox + */ +void gen7_gmu_aop_send_acd_state(struct gen7_gmu_device *gmu, bool flag); + +/** + * gen7_gmu_enable_clocks - Enable gmu clocks + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_load_fw - Load gmu firmware + * @adreno_dev: Pointer to the adreno device + * + * Loads the gmu firmware binary into TCMs and memory + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_load_fw(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_device_start - Bring gmu out of reset + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_device_start(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_hfi_start - Indicate hfi start to gmu + * @device: Pointer to the kgsl device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_hfi_start(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_itcm_shadow - Create itcm shadow copy for snapshot + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_itcm_shadow(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_register_config - gmu register configuration + * @adreno_dev: Pointer to the adreno device + * + * Program gmu regsiters based on features + */ +void gen7_gmu_register_config(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_version_info - Get gmu firmware version + * @adreno_dev: Pointer to the adreno device + * + * Program gmu regsiters based on features + */ +void gen7_gmu_version_info(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_irq_enable - Enable gmu interrupts + * @adreno_dev: Pointer to the adreno device + */ +void gen7_gmu_irq_enable(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_irq_disable - Disaable gmu interrupts + * @adreno_dev: Pointer to the adreno device + */ +void gen7_gmu_irq_disable(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_suspend - Hard reset the gpu and gmu + * @adreno_dev: Pointer to the adreno device + * + * In case we hit a gmu fault, hard reset the gpu and gmu + * to recover from the fault + */ +void gen7_gmu_suspend(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_oob_set - send gmu oob request + * @device: Pointer to the kgsl device + * @req: Type of oob request as defined in enum oob_request + * + * Request gmu to keep gpu powered up till the oob is cleared + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_oob_set(struct kgsl_device *device, enum oob_request oob); + +/** + * gen7_gmu_oob_clear - clear an asserted oob request + * @device: Pointer to the kgsl device + * @req: Type of oob request as defined in enum oob_request + * + * Clear a previously requested oob so that gmu can power + * collapse the gpu + */ +void gen7_gmu_oob_clear(struct kgsl_device *device, enum oob_request oob); + +/** + * gen7_gmu_wait_for_lowest_idle - wait for gmu to complete ifpc + * @adreno_dev: Pointer to the adreno device + * + * If ifpc is enabled, wait for gmu to put gpu into ifpc. + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_wait_for_idle - Wait for gmu to become idle + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_wait_for_idle(struct adreno_device *adreno_dev); + +/** + * gen7_rscc_sleep_sequence - Trigger rscc sleep sequence + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_rscc_sleep_sequence(struct adreno_device *adreno_dev); + +/** + * gen7_rscc_wakeup_sequence - Trigger rscc wakeup sequence + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_rscc_wakeup_sequence(struct adreno_device *adreno_dev); + +/** + * gen7_halt_gbif - Halt CX and GX requests in GBIF + * @adreno_dev: Pointer to the adreno device + * + * Clear any pending GX or CX transactions in GBIF and + * deassert GBIF halt + * + * Return: 0 on success or negative error on failure + */ +int gen7_halt_gbif(struct adreno_device *adreno_dev); + +/** + * gen7_load_pdc_ucode - Load and enable pdc sequence + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_load_pdc_ucode(struct adreno_device *adreno_dev); + +/** + * gen7_load_rsc_ucode - Load rscc sequence + * @adreno_dev: Pointer to the adreno device + */ +void gen7_load_rsc_ucode(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_remove - Clean up gmu probed resources + * @device: Pointer to the kgsl device + */ +void gen7_gmu_remove(struct kgsl_device *device); + +/** + * gen7_gmu_enable_clks - Enable gmu clocks + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_enable_clks(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_enable_gdsc - Enable gmu gdsc + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_handle_watchdog - Handle watchdog interrupt + * @adreno_dev: Pointer to the adreno device + */ +void gen7_gmu_handle_watchdog(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_send_nmi - Send NMI to GMU + * @adreno_dev: Pointer to the adreno device + * @force: Boolean to forcefully send NMI irrespective of GMU state + */ +void gen7_gmu_send_nmi(struct adreno_device *adreno_dev, bool force); + +/** + * gen7_gmu_add_to_minidump - Register gen7_device with va minidump + * @adreno_dev: Pointer to the adreno device + */ +int gen7_gmu_add_to_minidump(struct adreno_device *adreno_dev); + +#endif diff --git a/adreno_gen7_gmu_snapshot.c b/adreno_gen7_gmu_snapshot.c new file mode 100644 index 0000000000..39c3c184a9 --- /dev/null +++ b/adreno_gen7_gmu_snapshot.c @@ -0,0 +1,326 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include "gen7_reg.h" +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_gen7_gmu.h" +#include "adreno_snapshot.h" +#include "kgsl_device.h" + +static const u32 gen7_gmu_registers[] = { + 0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403, + 0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03, + 0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403, + 0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03, + 0x1f400, 0x1f40d, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507, + 0x1f509, 0x1f50b, 0x1f800, 0x1f804, 0x1f807, 0x1f808, 0x1f80b, 0x1f80c, + 0x1f80f, 0x1f80f, 0x1f811, 0x1f811, 0x1f813, 0x1f817, 0x1f819, 0x1f81c, + 0x1f824, 0x1f82a, 0x1f82d, 0x1f830, 0x1f840, 0x1f853, 0x1f860, 0x1f860, + 0x1f870, 0x1f879, 0x1f87f, 0x1f87f, 0x1f888, 0x1f889, 0x1f8a0, 0x1f8a2, + 0x1f8a4, 0x1f8af, 0x1f8c0, 0x1f8c1, 0x1f8c3, 0x1f8c4, 0x1f8d0, 0x1f8d0, + 0x1f8ec, 0x1f8ec, 0x1f8f0, 0x1f8f1, 0x1f910, 0x1f914, 0x1f920, 0x1f921, + 0x1f924, 0x1f925, 0x1f928, 0x1f929, 0x1f92c, 0x1f92d, 0x1f940, 0x1f940, + 0x1f942, 0x1f944, 0x1f948, 0x1f94a, 0x1f94f, 0x1f951, 0x1f958, 0x1f95a, + 0x1f95d, 0x1f95d, 0x1f962, 0x1f962, 0x1f964, 0x1f96b, 0x1f970, 0x1f979, + 0x1f980, 0x1f981, 0x1f984, 0x1f986, 0x1f992, 0x1f993, 0x1f996, 0x1f99e, + 0x1f9c0, 0x1f9c0, 0x1f9c5, 0x1f9d4, 0x1f9f0, 0x1f9f1, 0x1f9f8, 0x1f9fa, + 0x1fa00, 0x1fa03, 0x20000, 0x20005, 0x20008, 0x2000c, 0x20010, 0x20012, + 0x20018, 0x20018, 0x20020, 0x20023, 0x20030, 0x20031, 0x23801, 0x23801, + 0x23803, 0x23803, 0x23805, 0x23805, 0x23807, 0x23807, 0x23809, 0x23809, + 0x2380b, 0x2380b, 0x2380d, 0x2380d, 0x2380f, 0x2380f, 0x23811, 0x23811, + 0x23813, 0x23813, 0x23815, 0x23815, 0x23817, 0x23817, 0x23819, 0x23819, + 0x2381b, 0x2381b, 0x2381d, 0x2381d, 0x2381f, 0x23820, 0x23822, 0x23822, + 0x23824, 0x23824, 0x23826, 0x23826, 0x23828, 0x23828, 0x2382a, 0x2382a, + 0x2382c, 0x2382c, 0x2382e, 0x2382e, 0x23830, 0x23830, 0x23832, 0x23832, + 0x23834, 0x23834, 0x23836, 0x23836, 0x23838, 0x23838, 0x2383a, 0x2383a, + 0x2383c, 0x2383c, 0x2383e, 0x2383e, 0x23840, 0x23847, 0x23b00, 0x23b01, + 0x23b03, 0x23b03, 0x23b05, 0x23b0e, 0x23b10, 0x23b13, 0x23b15, 0x23b16, + 0x23b20, 0x23b20, 0x23b28, 0x23b28, 0x23b30, 0x23b30, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_gmu_registers), 8)); + +static const u32 gen7_gmu_gx_registers[] = { + 0x1a400, 0x1a41f, 0x1a440, 0x1a45f, 0x1a480, 0x1a49f, 0x1a4c0, 0x1a4df, + 0x1a500, 0x1a51f, 0x1a540, 0x1a55f, 0x1a580, 0x1a59f, 0x1a5c0, 0x1a5df, + 0x1a780, 0x1a781, 0x1a783, 0x1a785, 0x1a787, 0x1a789, 0x1a78b, 0x1a78d, + 0x1a78f, 0x1a791, 0x1a793, 0x1a795, 0x1a797, 0x1a799, 0x1a79b, 0x1a79b, + 0x1a7c0, 0x1a7c1, 0x1a7c4, 0x1a7c5, 0x1a7c8, 0x1a7c9, 0x1a7cc, 0x1a7cd, + 0x1a7d0, 0x1a7d1, 0x1a7d4, 0x1a7d5, 0x1a7d8, 0x1a7d9, 0x1a7fc, 0x1a7fd, + 0x1a800, 0x1a802, 0x1a804, 0x1a804, 0x1a816, 0x1a816, 0x1a81e, 0x1a81e, + 0x1a826, 0x1a826, 0x1a82e, 0x1a82e, 0x1a836, 0x1a836, 0x1a83e, 0x1a83e, + 0x1a846, 0x1a846, 0x1a860, 0x1a862, 0x1a864, 0x1a867, 0x1a870, 0x1a870, + 0x1a883, 0x1a884, 0x1a8c0, 0x1a8c2, 0x1a8c4, 0x1a8c7, 0x1a8d0, 0x1a8d3, + 0x1a900, 0x1a92b, 0x1a940, 0x1a940, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_gmu_gx_registers), 8)); + +static const u32 gen7_rscc_registers[] = { + 0x14000, 0x14036, 0x14040, 0x14042, 0x14080, 0x14084, 0x14089, 0x1408c, + 0x14091, 0x14094, 0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac, + 0x14100, 0x14102, 0x14114, 0x14119, 0x14124, 0x1412e, 0x14140, 0x14143, + 0x14180, 0x14197, 0x14340, 0x14342, 0x14344, 0x14347, 0x1434c, 0x14373, + 0x143ec, 0x143ef, 0x143f4, 0x1441b, 0x14494, 0x14497, 0x1449c, 0x144c3, + 0x1453c, 0x1453f, 0x14544, 0x1456b, 0x145e4, 0x145e7, 0x145ec, 0x14613, + 0x1468c, 0x1468f, 0x14694, 0x146bb, 0x14734, 0x14737, 0x1473c, 0x14763, + 0x147dc, 0x147df, 0x147e4, 0x1480b, 0x14884, 0x14887, 0x1488c, 0x148b3, + 0x1492c, 0x1492f, 0x14934, 0x1495b, 0x14f51, 0x14f54, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_rscc_registers), 8)); + +struct gmu_mem_type_desc { + struct kgsl_memdesc *memdesc; + u32 type; +}; + +static size_t gen7_snapshot_gmu_mem(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_gmu_mem *mem_hdr = + (struct kgsl_snapshot_gmu_mem *)buf; + unsigned int *data = (unsigned int *) + (buf + sizeof(*mem_hdr)); + struct gmu_mem_type_desc *desc = priv; + + if (remain < desc->memdesc->size + sizeof(*mem_hdr)) { + dev_err(device->dev, + "snapshot: Not enough memory for the gmu section %d\n", + desc->type); + return 0; + } + + mem_hdr->type = desc->type; + mem_hdr->hostaddr = (u64)(uintptr_t)desc->memdesc->hostptr; + mem_hdr->gmuaddr = desc->memdesc->gmuaddr; + mem_hdr->gpuaddr = 0; + + memcpy(data, desc->memdesc->hostptr, desc->memdesc->size); + + return desc->memdesc->size + sizeof(*mem_hdr); +} + +static size_t gen7_gmu_snapshot_dtcm(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_gmu_mem *mem_hdr = + (struct kgsl_snapshot_gmu_mem *)buf; + struct gen7_gmu_device *gmu = (struct gen7_gmu_device *)priv; + u32 *data = (u32 *)(buf + sizeof(*mem_hdr)); + u32 i; + + if (remain < gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr)) { + SNAPSHOT_ERR_NOMEM(device, "GMU DTCM Memory"); + return 0; + } + + mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK; + mem_hdr->hostaddr = 0; + mem_hdr->gmuaddr = gmu->vma[GMU_DTCM].start; + mem_hdr->gpuaddr = 0; + + for (i = 0; i < (gmu->vma[GMU_DTCM].size >> 2); i++) + gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + i, data++); + + return gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr); +} + +static size_t gen7_gmu_snapshot_itcm(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_gmu_mem *mem_hdr = + (struct kgsl_snapshot_gmu_mem *)buf; + void *dest = buf + sizeof(*mem_hdr); + struct gen7_gmu_device *gmu = (struct gen7_gmu_device *)priv; + + if (!gmu->itcm_shadow) { + dev_err(&gmu->pdev->dev, "No memory allocated for ITCM shadow capture\n"); + return 0; + } + + if (remain < gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr)) { + SNAPSHOT_ERR_NOMEM(device, "GMU ITCM Memory"); + return 0; + } + + mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK; + mem_hdr->hostaddr = 0; + mem_hdr->gmuaddr = gmu->vma[GMU_ITCM].start; + mem_hdr->gpuaddr = 0; + + memcpy(dest, gmu->itcm_shadow, gmu->vma[GMU_ITCM].size); + + return gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr); +} + +static void gen7_gmu_snapshot_memories(struct kgsl_device *device, + struct gen7_gmu_device *gmu, struct kgsl_snapshot *snapshot) +{ + struct gmu_mem_type_desc desc; + struct kgsl_memdesc *md; + int i; + + for (i = 0; i < ARRAY_SIZE(gmu->gmu_globals); i++) { + + md = &gmu->gmu_globals[i]; + if (!md->size) + continue; + + desc.memdesc = md; + if (md == gmu->hfi.hfi_mem) + desc.type = SNAPSHOT_GMU_MEM_HFI; + else if (md == gmu->gmu_log) + desc.type = SNAPSHOT_GMU_MEM_LOG; + else if (md == gmu->dump_mem) + desc.type = SNAPSHOT_GMU_MEM_DEBUG; + else + desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK; + + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GMU_MEMORY, + snapshot, gen7_snapshot_gmu_mem, &desc); + } +} + +struct kgsl_snapshot_gmu_version { + u32 type; + u32 value; +}; + +static size_t gen7_snapshot_gmu_version(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + u32 *data = (u32 *) (buf + sizeof(*header)); + struct kgsl_snapshot_gmu_version *ver = priv; + + if (remain < DEBUG_SECTION_SZ(1)) { + SNAPSHOT_ERR_NOMEM(device, "GMU Version"); + return 0; + } + + header->type = ver->type; + header->size = 1; + + *data = ver->value; + + return DEBUG_SECTION_SZ(1); +} + +static void gen7_gmu_snapshot_versions(struct kgsl_device *device, + struct gen7_gmu_device *gmu, + struct kgsl_snapshot *snapshot) +{ + int i; + + struct kgsl_snapshot_gmu_version gmu_vers[] = { + { .type = SNAPSHOT_DEBUG_GMU_CORE_VERSION, + .value = gmu->ver.core, }, + { .type = SNAPSHOT_DEBUG_GMU_CORE_DEV_VERSION, + .value = gmu->ver.core_dev, }, + { .type = SNAPSHOT_DEBUG_GMU_PWR_VERSION, + .value = gmu->ver.pwr, }, + { .type = SNAPSHOT_DEBUG_GMU_PWR_DEV_VERSION, + .value = gmu->ver.pwr_dev, }, + { .type = SNAPSHOT_DEBUG_GMU_HFI_VERSION, + .value = gmu->ver.hfi, }, + }; + + for (i = 0; i < ARRAY_SIZE(gmu_vers); i++) + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, gen7_snapshot_gmu_version, + &gmu_vers[i]); +} + +#define RSCC_OFFSET_DWORDS 0x14000 + +static size_t gen7_snapshot_rscc_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + const u32 *regs = priv; + unsigned int *data = (unsigned int *)buf; + int count = 0, k; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + /* Figure out how many registers we are going to dump */ + count = adreno_snapshot_regs_count(regs); + + if (remain < (count * 4)) { + SNAPSHOT_ERR_NOMEM(device, "RSCC REGISTERS"); + return 0; + } + + for (regs = priv; regs[0] != UINT_MAX; regs += 2) { + unsigned int cnt = REG_COUNT(regs); + + if (cnt == 1) { + *data++ = BIT(31) | regs[0]; + *data++ = __raw_readl(gmu->rscc_virt + + ((regs[0] - RSCC_OFFSET_DWORDS) << 2)); + continue; + } + *data++ = regs[0]; + *data++ = cnt; + for (k = regs[0]; k <= regs[1]; k++) + *data++ = __raw_readl(gmu->rscc_virt + + ((k - RSCC_OFFSET_DWORDS) << 2)); + } + + /* Return the size of the section */ + return (count * 4); +} + +/* + * gen7_gmu_device_snapshot() - GEN7 GMU snapshot function + * @device: Device being snapshotted + * @snapshot: Pointer to the snapshot instance + * + * This is where all of the GEN7 GMU specific bits and pieces are grabbed + * into the snapshot memory + */ +void gen7_gmu_device_snapshot(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, + snapshot, gen7_gmu_snapshot_itcm, gmu); + + gen7_gmu_snapshot_versions(device, gmu, snapshot); + + gen7_gmu_snapshot_memories(device, gmu, snapshot); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, + adreno_snapshot_registers_v2, (void *) gen7_gmu_registers); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, + gen7_snapshot_rscc_registers, (void *) gen7_rscc_registers); + + if (!gen7_gmu_gx_is_on(device)) + goto dtcm; + + /* Set fence to ALLOW mode so registers can be read */ + kgsl_regwrite(device, GEN7_GMU_AO_AHB_FENCE_CTRL, 0); + /* Make sure the previous write posted before reading */ + wmb(); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, + adreno_snapshot_registers_v2, (void *) gen7_gmu_gx_registers); + + /* A stalled SMMU can lead to NoC timeouts when host accesses DTCM */ + if (gen7_is_smmu_stalled(device)) { + dev_err(&gmu->pdev->dev, + "Not dumping dtcm because SMMU is stalled\n"); + return; + } + +dtcm: + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, + snapshot, gen7_gmu_snapshot_dtcm, gmu); +} diff --git a/adreno_gen7_hfi.c b/adreno_gen7_hfi.c new file mode 100644 index 0000000000..953b7da45d --- /dev/null +++ b/adreno_gen7_hfi.c @@ -0,0 +1,636 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_gen7_hfi.h" +#include "kgsl_device.h" +#include "kgsl_trace.h" + +/* Below section is for all structures related to HFI queues */ +#define HFI_QUEUE_MAX HFI_QUEUE_DEFAULT_CNT + +/* Total header sizes + queue sizes + 16 for alignment */ +#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \ + (HFI_QUEUE_SIZE * HFI_QUEUE_MAX)) + +#define HOST_QUEUE_START_ADDR(hfi_mem, i) \ + ((hfi_mem)->hostptr + HFI_QUEUE_OFFSET(i)) + +struct gen7_hfi *to_gen7_hfi(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + return &gmu->hfi; +} + +/* Size in below functions are in unit of dwords */ +int gen7_hfi_queue_read(struct gen7_gmu_device *gmu, u32 queue_idx, + unsigned int *output, unsigned int max_size) +{ + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + u32 *queue; + u32 msg_hdr; + u32 i, read; + u32 size; + int result = 0; + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + return -EINVAL; + + if (hdr->read_index == hdr->write_index) + return -ENODATA; + + /* Clear the output data before populating */ + memset(output, 0, max_size); + + queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx); + msg_hdr = queue[hdr->read_index]; + size = MSG_HDR_GET_SIZE(msg_hdr); + + if (size > (max_size >> 2)) { + dev_err(&gmu->pdev->dev, + "HFI message too big: hdr:0x%x rd idx=%d\n", + msg_hdr, hdr->read_index); + result = -EMSGSIZE; + goto done; + } + + read = hdr->read_index; + + if (read < hdr->queue_size) { + for (i = 0; i < size && i < (max_size >> 2); i++) { + output[i] = queue[read]; + read = (read + 1)%hdr->queue_size; + } + result = size; + } else { + /* In case FW messed up */ + dev_err(&gmu->pdev->dev, + "Read index %d greater than queue size %d\n", + hdr->read_index, hdr->queue_size); + result = -ENODATA; + } + + read = ALIGN(read, SZ_4) % hdr->queue_size; + + hfi_update_read_idx(hdr, read); + + /* For acks, trace the packet for which this ack was sent */ + if (MSG_HDR_GET_TYPE(msg_hdr) == HFI_MSG_ACK) + trace_kgsl_hfi_receive(MSG_HDR_GET_ID(output[1]), + MSG_HDR_GET_SIZE(output[1]), + MSG_HDR_GET_SEQNUM(output[1])); + else + trace_kgsl_hfi_receive(MSG_HDR_GET_ID(msg_hdr), + MSG_HDR_GET_SIZE(msg_hdr), MSG_HDR_GET_SEQNUM(msg_hdr)); + +done: + return result; +} + +/* Size in below functions are in unit of dwords */ +int gen7_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx, + u32 *msg) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + u32 *queue; + u32 i, write, empty_space; + u32 size = MSG_HDR_GET_SIZE(*msg); + u32 align_size = ALIGN(size, SZ_4); + u32 id = MSG_HDR_GET_ID(*msg); + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + return -EINVAL; + + queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx); + + trace_kgsl_hfi_send(id, size, MSG_HDR_GET_SEQNUM(*msg)); + + empty_space = (hdr->write_index >= hdr->read_index) ? + (hdr->queue_size - (hdr->write_index - hdr->read_index)) + : (hdr->read_index - hdr->write_index); + + if (empty_space <= align_size) + return -ENOSPC; + + write = hdr->write_index; + + for (i = 0; i < size; i++) { + queue[write] = msg[i]; + write = (write + 1) % hdr->queue_size; + } + + /* Cookify any non used data at the end of the write buffer */ + for (; i < align_size; i++) { + queue[write] = 0xfafafafa; + write = (write + 1) % hdr->queue_size; + } + + hfi_update_write_idx(hdr, write); + + return 0; +} + +int gen7_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg) +{ + int ret; + + ret = gen7_hfi_queue_write(adreno_dev, HFI_CMD_ID, msg); + + /* + * Memory barrier to make sure packet and write index are written before + * an interrupt is raised + */ + wmb(); + + /* Send interrupt to GMU to receive the message */ + if (!ret) + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), + GEN7_GMU_HOST2GMU_INTR_SET, 0x1); + + return ret; +} + +/* Sizes of the queue and message are in unit of dwords */ +static void init_queues(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + int i; + struct hfi_queue_table *tbl; + struct hfi_queue_header *hdr; + struct { + unsigned int idx; + unsigned int pri; + unsigned int status; + } queue[HFI_QUEUE_MAX] = { + { HFI_CMD_ID, HFI_CMD_PRI, HFI_QUEUE_STATUS_ENABLED }, + { HFI_MSG_ID, HFI_MSG_PRI, HFI_QUEUE_STATUS_ENABLED }, + { HFI_DBG_ID, HFI_DBG_PRI, HFI_QUEUE_STATUS_ENABLED }, + }; + + /* Fill Table Header */ + tbl = mem_addr->hostptr; + tbl->qtbl_hdr.version = 0; + tbl->qtbl_hdr.size = sizeof(struct hfi_queue_table) >> 2; + tbl->qtbl_hdr.qhdr0_offset = sizeof(struct hfi_queue_table_header) >> 2; + tbl->qtbl_hdr.qhdr_size = sizeof(struct hfi_queue_header) >> 2; + tbl->qtbl_hdr.num_q = HFI_QUEUE_MAX; + tbl->qtbl_hdr.num_active_q = HFI_QUEUE_MAX; + + memset(&tbl->qhdr[0], 0, sizeof(tbl->qhdr)); + + /* Fill Individual Queue Headers */ + for (i = 0; i < HFI_QUEUE_MAX; i++) { + hdr = &tbl->qhdr[i]; + hdr->start_addr = GMU_QUEUE_START_ADDR(mem_addr->gmuaddr, i); + hdr->type = QUEUE_HDR_TYPE(queue[i].idx, queue[i].pri, 0, 0); + hdr->status = queue[i].status; + hdr->queue_size = HFI_QUEUE_SIZE >> 2; /* convert to dwords */ + } +} + +int gen7_hfi_init(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hfi *hfi = &gmu->hfi; + + /* Allocates & maps memory for HFI */ + if (IS_ERR_OR_NULL(hfi->hfi_mem)) { + hfi->hfi_mem = gen7_reserve_gmu_kernel_block(gmu, 0, + HFIMEM_SIZE, GMU_NONCACHED_KERNEL); + if (!IS_ERR(hfi->hfi_mem)) + init_queues(adreno_dev); + } + + return PTR_ERR_OR_ZERO(hfi->hfi_mem); +} + +int gen7_receive_ack_cmd(struct gen7_gmu_device *gmu, void *rcvd, + struct pending_cmd *ret_cmd) +{ + struct adreno_device *adreno_dev = gen7_gmu_to_adreno(gmu); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 *ack = rcvd; + u32 hdr = ack[0]; + u32 req_hdr = ack[1]; + + if (ret_cmd == NULL) + return -EINVAL; + + if (HDR_CMP_SEQNUM(ret_cmd->sent_hdr, req_hdr)) { + memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2); + return 0; + } + + /* Didn't find the sender, list the waiter */ + dev_err_ratelimited(&gmu->pdev->dev, + "HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n", + req_hdr, ret_cmd->sent_hdr); + + gmu_core_fault_snapshot(device); + + return -ENODEV; +} + +static int poll_gmu_reg(struct adreno_device *adreno_dev, + u32 offsetdwords, unsigned int expected_val, + unsigned int mask, unsigned int timeout_ms) +{ + unsigned int val; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms); + bool nmi = false; + + while (time_is_after_jiffies(timeout)) { + gmu_core_regread(device, offsetdwords, &val); + if ((val & mask) == expected_val) + return 0; + + /* + * If GMU firmware fails any assertion, error message is sent + * to KMD and NMI is triggered. So check if GMU is in NMI and + * timeout early. Bits [11:9] of A6XX_GMU_CM3_FW_INIT_RESULT + * contain GMU reset status. Non zero value here indicates that + * GMU reset is active, NMI handler would eventually complete + * and GMU would wait for recovery. + */ + gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &val); + if (val & 0xE00) { + nmi = true; + break; + } + + usleep_range(10, 100); + } + + /* Check one last time */ + gmu_core_regread(device, offsetdwords, &val); + if ((val & mask) == expected_val) + return 0; + + dev_err(&gmu->pdev->dev, + "Reg poll %s: offset 0x%x, want 0x%x, got 0x%x\n", + nmi ? "abort" : "timeout", offsetdwords, expected_val, + val & mask); + + return -ETIMEDOUT; +} + +static int gen7_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, + void *data, struct pending_cmd *ret_cmd) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int rc; + u32 *cmd = data; + struct gen7_hfi *hfi = &gmu->hfi; + unsigned int seqnum = atomic_inc_return(&hfi->seqnum); + + *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + if (ret_cmd == NULL) + return gen7_hfi_cmdq_write(adreno_dev, cmd); + + ret_cmd->sent_hdr = cmd[0]; + + rc = gen7_hfi_cmdq_write(adreno_dev, cmd); + if (rc) + return rc; + + rc = poll_gmu_reg(adreno_dev, GEN7_GMU_GMU2HOST_INTR_INFO, + HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT); + + if (rc) { + gmu_core_fault_snapshot(device); + dev_err(&gmu->pdev->dev, + "Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n", + cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd)); + return rc; + } + + /* Clear the interrupt */ + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, + HFI_IRQ_MSGQ_MASK); + + rc = gen7_hfi_process_queue(gmu, HFI_MSG_ID, ret_cmd); + + return rc; +} + +#define HFI_ACK_ERROR 0xffffffff + +int gen7_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd) +{ + struct pending_cmd ret_cmd; + int rc; + + memset(&ret_cmd, 0, sizeof(ret_cmd)); + + rc = gen7_hfi_send_cmd_wait_inline(adreno_dev, cmd, &ret_cmd); + + if (!rc && ret_cmd.results[2] == HFI_ACK_ERROR) { + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gmu_core_fault_snapshot(device); + dev_err(&gmu->pdev->dev, "HFI ACK failure: Req 0x%8.8X\n", + ret_cmd.results[1]); + return -EINVAL; + } + + return rc; +} + +int gen7_hfi_send_core_fw_start(struct adreno_device *adreno_dev) +{ + struct hfi_core_fw_start_cmd cmd = { + .handle = 0x0, + }; + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_CORE_FW_START); + if (ret) + return ret; + + return gen7_hfi_send_generic_req(adreno_dev, &cmd); +} + +static const char *feature_to_string(u32 feature) +{ + if (feature == HFI_FEATURE_ACD) + return "ACD"; + else if (feature == HFI_FEATURE_LM) + return "LM"; + + return "unknown"; +} + +int gen7_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, + u32 feature, u32 enable, u32 data) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_feature_ctrl_cmd cmd = { + .feature = feature, + .enable = enable, + .data = data, + }; + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_FEATURE_CTRL); + if (ret) + return ret; + + ret = gen7_hfi_send_generic_req(adreno_dev, &cmd); + if (ret) + dev_err(&gmu->pdev->dev, + "Unable to %s feature %s (%d)\n", + enable ? "enable" : "disable", + feature_to_string(feature), + feature); + return ret; +} + +int gen7_hfi_send_set_value(struct adreno_device *adreno_dev, + u32 type, u32 subtype, u32 data) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_set_value_cmd cmd = { + .type = type, + .subtype = subtype, + .data = data, + }; + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_SET_VALUE); + if (ret) + return ret; + + ret = gen7_hfi_send_generic_req(adreno_dev, &cmd); + if (ret) + dev_err(&gmu->pdev->dev, + "Unable to set HFI Value %d, %d to %d, error = %d\n", + type, subtype, data, ret); + return ret; +} + +void adreno_gen7_receive_err_req(struct gen7_gmu_device *gmu, void *rcvd) +{ + struct hfi_err_cmd *cmd = rcvd; + + dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n", + ((cmd->error_code >> 16) & 0xffff), + (cmd->error_code & 0xffff), + (char *) cmd->data); +} + +void adreno_gen7_receive_debug_req(struct gen7_gmu_device *gmu, void *rcvd) +{ + struct hfi_debug_cmd *cmd = rcvd; + + dev_dbg(&gmu->pdev->dev, "HFI Debug Received: %d %d %d\n", + cmd->type, cmd->timestamp, cmd->data); +} + +int gen7_hfi_process_queue(struct gen7_gmu_device *gmu, + u32 queue_idx, struct pending_cmd *ret_cmd) +{ + u32 rcvd[MAX_RCVD_SIZE]; + + while (gen7_hfi_queue_read(gmu, queue_idx, rcvd, sizeof(rcvd)) > 0) { + /* ACK Handler */ + if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { + int ret = gen7_receive_ack_cmd(gmu, rcvd, ret_cmd); + + if (ret) + return ret; + continue; + } + + /* Request Handler */ + switch (MSG_HDR_GET_ID(rcvd[0])) { + case F2H_MSG_ERR: /* No Reply */ + adreno_gen7_receive_err_req(gmu, rcvd); + break; + case F2H_MSG_DEBUG: /* No Reply */ + adreno_gen7_receive_debug_req(gmu, rcvd); + break; + default: /* No Reply */ + dev_err(&gmu->pdev->dev, + "HFI request %d not supported\n", + MSG_HDR_GET_ID(rcvd[0])); + break; + } + } + + return 0; +} + +int gen7_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev) +{ + int ret; + + if (!adreno_dev->bcl_enabled) + return 0; + + ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_BCL, 1, 0); + + return ret; +} + +int gen7_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + + if (adreno_dev->acd_enabled) { + ret = gen7_hfi_send_feature_ctrl(adreno_dev, + HFI_FEATURE_ACD, 1, 0); + + if (!ret) + ret = gen7_hfi_send_generic_req(adreno_dev, + &gmu->hfi.acd_table); + } + + return ret; +} + +int gen7_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + if (gmu->idle_level == GPU_HW_IFPC) + return gen7_hfi_send_feature_ctrl(adreno_dev, + HFI_FEATURE_IFPC, 1, 0x1680); + return 0; +} + +int gen7_hfi_start(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr; + int result, i; + + /* Force read_index to the write_index no matter what */ + for (i = 0; i < HFI_QUEUE_MAX; i++) { + hdr = &tbl->qhdr[i]; + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + continue; + + if (hdr->read_index != hdr->write_index) { + dev_err(&gmu->pdev->dev, + "HFI Q[%d] Index Error: read:0x%X write:0x%X\n", + i, hdr->read_index, hdr->write_index); + hdr->read_index = hdr->write_index; + } + } + + result = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.dcvs_table); + if (result) + goto err; + + result = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table); + if (result) + goto err; + + result = gen7_hfi_send_acd_feature_ctrl(adreno_dev); + if (result) + goto err; + + result = gen7_hfi_send_bcl_feature_ctrl(adreno_dev); + if (result) + goto err; + + result = gen7_hfi_send_ifpc_feature_ctrl(adreno_dev); + if (result) + goto err; + + result = gen7_hfi_send_core_fw_start(adreno_dev); + if (result) + goto err; + + set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); + + /* Request default DCVS level */ + result = kgsl_pwrctrl_set_default_gpu_pwrlevel(device); + if (result) + goto err; + + /* Request default BW vote */ + result = kgsl_pwrctrl_axi(device, true); + +err: + if (result) + gen7_hfi_stop(adreno_dev); + + return result; + +} + +void gen7_hfi_stop(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int i; + + /* Flush HFI queues */ + for (i = 0; i < HFI_QUEUE_MAX; i++) { + hdr = &tbl->qhdr[i]; + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + continue; + + if (hdr->read_index != hdr->write_index) + dev_err(&gmu->pdev->dev, + "HFI queue[%d] is not empty before close: rd=%d,wt=%d\n", + i, hdr->read_index, hdr->write_index); + } + + kgsl_pwrctrl_axi(device, false); + + clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); + +} + +/* HFI interrupt handler */ +irqreturn_t gen7_hfi_irq_handler(int irq, void *data) +{ + struct kgsl_device *device = data; + struct gen7_gmu_device *gmu = to_gen7_gmu(ADRENO_DEVICE(device)); + unsigned int status = 0; + + gmu_core_regread(device, GEN7_GMU_GMU2HOST_INTR_INFO, &status); + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, HFI_IRQ_MASK); + + if (status & HFI_IRQ_DBGQ_MASK) + gen7_hfi_process_queue(gmu, HFI_DBG_ID, NULL); + if (status & HFI_IRQ_CM3_FAULT_MASK) { + dev_err_ratelimited(&gmu->pdev->dev, + "GMU CM3 fault interrupt received\n"); + atomic_set(&gmu->cm3_fault, 1); + + /* make sure other CPUs see the update */ + smp_wmb(); + } + if (status & ~HFI_IRQ_MASK) + dev_err_ratelimited(&gmu->pdev->dev, + "Unhandled HFI interrupts 0x%lx\n", + status & ~HFI_IRQ_MASK); + + return IRQ_HANDLED; +} diff --git a/adreno_gen7_hfi.h b/adreno_gen7_hfi.h new file mode 100644 index 0000000000..273dc7deb5 --- /dev/null +++ b/adreno_gen7_hfi.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_GEN7_HFI_H +#define __ADRENO_GEN7_HFI_H + +#include "adreno_hfi.h" + +/** + * struct gen7_hfi - HFI control structure + */ +struct gen7_hfi { + /** @irq: HFI interrupt line */ + int irq; + /** @seqnum: atomic counter that is incremented for each message sent. + * The value of the counter is used as sequence number for HFI message. + */ + atomic_t seqnum; + /** @hfi_mem: Memory descriptor for the hfi memory */ + struct kgsl_memdesc *hfi_mem; + /** @bw_table: HFI BW table buffer */ + struct hfi_bwtable_cmd bw_table; + /** @acd_table: HFI table for ACD data */ + struct hfi_acd_table_cmd acd_table; + /** @dcvs_table: HFI table for gpu dcvs levels */ + struct hfi_dcvstable_cmd dcvs_table; +}; + +struct gen7_gmu_device; + +/* gen7_hfi_irq_handler - IRQ handler for HFI interripts */ +irqreturn_t gen7_hfi_irq_handler(int irq, void *data); + +/** + * gen7_hfi_start - Send the various HFIs during device boot up + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_start(struct adreno_device *adreno_dev); + +/** + * gen7_hfi_start - Send the various HFIs during device boot up + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +void gen7_hfi_stop(struct adreno_device *adreno_dev); + +/** + * gen7_hfi_init - Initialize hfi resources + * @adreno_dev: Pointer to the adreno device + * + * This function allocates and sets up hfi queues + * when a process creates the very first kgsl instance + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_init(struct adreno_device *adreno_dev); + +/* Helper function to get to gen7 hfi struct from adreno device */ +struct gen7_hfi *to_gen7_hfi(struct adreno_device *adreno_dev); + +/** + * gen7_hfi_queue_write - Write a command to hfi queue + * @adreno_dev: Pointer to the adreno device + * @queue_idx: destination queue id + * @msg: Data to be written to the queue + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx, + u32 *msg); + +/** + * gen7_hfi_queue_read - Read data from hfi queue + * @gmu: Pointer to the gen7 gmu device + * @queue_idx: queue id to read from + * @output: Pointer to read the data into + * @max_size: Number of bytes to read from the queue + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_queue_read(struct gen7_gmu_device *gmu, u32 queue_idx, + u32 *output, u32 max_size); + +/** + * gen7_receive_ack_cmd - Process ack type packets + * @gmu: Pointer to the gen7 gmu device + * @rcvd: Pointer to the data read from hfi queue + * @ret_cmd: Container for the hfi packet for which this ack is received + * + * Return: 0 on success or negative error on failure + */ +int gen7_receive_ack_cmd(struct gen7_gmu_device *gmu, void *rcvd, + struct pending_cmd *ret_cmd); + +/** + * gen7_hfi_send_feature_ctrl - Enable gmu feature via hfi + * @adreno_dev: Pointer to the adreno device + * @feature: feature to be enabled or disabled + * enable: Set 1 to enable or 0 to disable a feature + * @data: payload for the send feature hfi packet + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, + u32 feature, u32 enable, u32 data); + +/** + * gen7_hfi_send_set_value - Send gmu set_values via hfi + * @adreno_dev: Pointer to the adreno device + * @type: GMU set_value type + * @subtype: GMU set_value subtype + * @data: Value to set + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_set_value(struct adreno_device *adreno_dev, + u32 type, u32 subtype, u32 data); + +/** + * gen7_hfi_send_core_fw_start - Send the core fw start hfi + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_core_fw_start(struct adreno_device *adreno_dev); + +/** + * gen7_hfi_send_acd_feature_ctrl - Send the acd table and acd feature + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev); + +/** + * gen7_hfi_send_generic_req - Send a generic hfi packet + * @adreno_dev: Pointer to the adreno device + * @cmd: Pointer to the hfi packet header and data + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd); + +/** + * gen7_hfi_send_bcl_feature_ctrl - Send the bcl feature hfi packet + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev); + +/** + * gen7_hfi_send_ifpc_feature_ctrl - Send the ipfc feature hfi packet + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev); + +/* + * gen7_hfi_process_queue - Check hfi queue for messages from gmu + * @gmu: Pointer to the gen7 gmu device + * @queue_idx: queue id to be processed + * @ret_cmd: Container for data needed for waiting for the ack + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_process_queue(struct gen7_gmu_device *gmu, + u32 queue_idx, struct pending_cmd *ret_cmd); + +/** + * gen7_hfi_cmdq_write - Write a command to command queue + * @adreno_dev: Pointer to the adreno device + * @msg: Data to be written to the queue + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg); +void adreno_gen7_receive_err_req(struct gen7_gmu_device *gmu, void *rcvd); +void adreno_gen7_receive_debug_req(struct gen7_gmu_device *gmu, void *rcvd); +#endif diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c new file mode 100644 index 0000000000..710c696557 --- /dev/null +++ b/adreno_gen7_hwsched.c @@ -0,0 +1,1161 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_gen7_hwsched.h" +#include "adreno_snapshot.h" +#include "kgsl_device.h" +#include "kgsl_trace.h" +#include "kgsl_util.h" + +static size_t adreno_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + struct kgsl_memdesc *rb = (struct kgsl_memdesc *)priv; + + if (remain < rb->size + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "RB"); + return 0; + } + + header->start = 0; + header->end = rb->size >> 2; + header->rptr = 0; + header->rbsize = rb->size >> 2; + header->count = rb->size >> 2; + header->timestamp_queued = 0; + header->timestamp_retired = 0; + header->gpuaddr = rb->gpuaddr; + header->id = 0; + + memcpy(data, rb->hostptr, rb->size); + + return rb->size + sizeof(*header); +} + +static void gen7_hwsched_snapshot_preemption_record(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset) +{ + struct kgsl_snapshot_section_header *section_header = + (struct kgsl_snapshot_section_header *)snapshot->ptr; + u8 *dest = snapshot->ptr + sizeof(*section_header); + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)dest; + const struct adreno_gen7_core *gen7_core = to_gen7_core(ADRENO_DEVICE(device)); + u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES; + size_t section_size; + + if (gen7_core->ctxt_record_size) + ctxt_record_size = gen7_core->ctxt_record_size; + + ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size); + + section_size = sizeof(*section_header) + sizeof(*header) + ctxt_record_size; + if (snapshot->remain < section_size) { + SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); + return; + } + + section_header->magic = SNAPSHOT_SECTION_MAGIC; + section_header->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2; + section_header->size = section_size; + + header->size = ctxt_record_size >> 2; + header->gpuaddr = md->gpuaddr + offset; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + dest += sizeof(*header); + + memcpy(dest, md->hostptr + offset, ctxt_record_size); + + snapshot->ptr += section_header->size; + snapshot->remain -= section_header->size; + snapshot->size += section_header->size; +} + +static void snapshot_preemption_records(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md) +{ + const struct adreno_gen7_core *gen7_core = + to_gen7_core(ADRENO_DEVICE(device)); + u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES; + u64 offset; + + if (gen7_core->ctxt_record_size) + ctxt_record_size = gen7_core->ctxt_record_size; + + /* All preemption records exist as a single mem alloc entry */ + for (offset = 0; offset < md->size; offset += ctxt_record_size) + gen7_hwsched_snapshot_preemption_record(device, snapshot, md, + offset); +} + +static u32 gen7_copy_gpu_global(struct adreno_device *adreno_dev, + void *out, u64 gpuaddr, u32 size) +{ + struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); + u64 offset; + u32 i; + + for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { + struct kgsl_memdesc *md = hw_hfi->mem_alloc_table[i].md; + + if (md && (gpuaddr >= md->gpuaddr) && + ((gpuaddr + size) <= (md->gpuaddr + md->size))) { + offset = gpuaddr - md->gpuaddr; + memcpy(out, md->hostptr + offset, size); + return size; + } + } + + return 0; +} + +static size_t adreno_hwsched_snapshot_rb_payload(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + struct payload_section *payload = (struct payload_section *)priv; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 size = gen7_hwsched_parse_payload(payload, KEY_RB_SIZEDWORDS) << 2; + u64 lo, hi, gpuaddr; + + lo = gen7_hwsched_parse_payload(payload, KEY_RB_GPUADDR_LO); + hi = gen7_hwsched_parse_payload(payload, KEY_RB_GPUADDR_HI); + gpuaddr = hi << 32 | lo; + + /* If the gpuaddress and size don't match any allocation, then abort */ + if ((remain < size + sizeof(*header)) || + !gen7_copy_gpu_global(adreno_dev, data, gpuaddr, size)) { + SNAPSHOT_ERR_NOMEM(device, "RB"); + return 0; + } + + header->start = 0; + header->end = size >> 2; + header->rptr = gen7_hwsched_parse_payload(payload, KEY_RB_RPTR); + header->wptr = gen7_hwsched_parse_payload(payload, KEY_RB_WPTR); + header->rbsize = size >> 2; + header->count = size >> 2; + header->timestamp_queued = gen7_hwsched_parse_payload(payload, + KEY_RB_QUEUED_TS); + header->timestamp_retired = gen7_hwsched_parse_payload(payload, + KEY_RB_RETIRED_TS); + header->gpuaddr = gpuaddr; + header->id = gen7_hwsched_parse_payload(payload, KEY_RB_ID); + + return size + sizeof(*header); +} + +static bool parse_payload_rb(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + bool ret = false; + + /* Skip if we didn't receive a context bad HFI */ + if (!cmd->hdr) + return false; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == PAYLOAD_RB) { + kgsl_snapshot_add_section(KGSL_DEVICE(adreno_dev), + KGSL_SNAPSHOT_SECTION_RB_V2, + snapshot, adreno_hwsched_snapshot_rb_payload, + payload); + ret = true; + } + + i += sizeof(*payload) + (payload->dwords << 2); + } + + return ret; +} + +void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); + bool skip_memkind_rb = false; + u32 i; + + gen7_gmu_snapshot(adreno_dev, snapshot); + + adreno_hwsched_parse_fault_cmdobj(adreno_dev, snapshot); + + /* + * First try to dump ringbuffers using context bad HFI payloads + * because they have all the ringbuffer parameters. If ringbuffer + * payloads are not present, fall back to dumping ringbuffers + * based on MEMKIND_RB + */ + if (parse_payload_rb(adreno_dev, snapshot)) + skip_memkind_rb = true; + + for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { + struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i]; + + if (entry->desc.mem_kind == HFI_MEMKIND_RB && !skip_memkind_rb) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_RB_V2, + snapshot, adreno_hwsched_snapshot_rb, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_SCRATCH) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_PROFILE) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_CSW_SMMU_INFO) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_CSW_PRIV_NON_SECURE) + snapshot_preemption_records(device, snapshot, + entry->md); + } + + adreno_hwsched_parse_fault_cmdobj(adreno_dev, snapshot); +} + +static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int level, ret = 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_AWARE); + + gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); + + ret = gen7_gmu_enable_gdsc(adreno_dev); + if (ret) + return ret; + + ret = gen7_gmu_enable_clks(adreno_dev); + if (ret) + goto gdsc_off; + + ret = gen7_gmu_load_fw(adreno_dev); + if (ret) + goto clks_gdsc_off; + + ret = gen7_gmu_itcm_shadow(adreno_dev); + if (ret) + goto clks_gdsc_off; + + if (!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) { + ret = gen7_load_pdc_ucode(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen7_load_rsc_ucode(adreno_dev); + set_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags); + } + + gen7_gmu_register_config(adreno_dev); + + gen7_gmu_version_info(adreno_dev); + + gen7_gmu_irq_enable(adreno_dev); + + /* Vote for minimal DDR BW for GMU to init */ + level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min; + + icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); + + ret = gen7_gmu_device_start(adreno_dev); + if (ret) + goto err; + + ret = gen7_hwsched_hfi_start(adreno_dev); + if (ret) + goto err; + + icc_set_bw(pwr->icc_path, 0, 0); + + device->gmu_fault = false; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL)) + adreno_dev->bcl_enabled = true; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_AWARE); + + return 0; + +err: + if (device->gmu_fault) { + gen7_gmu_suspend(adreno_dev); + + return ret; + } + + gen7_gmu_irq_disable(adreno_dev); + +clks_gdsc_off: + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + +gdsc_off: + /* Poll to make sure that the CX is off */ + gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + + return ret; +} + +static int gen7_hwsched_gmu_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_AWARE); + + ret = gen7_gmu_enable_gdsc(adreno_dev); + if (ret) + return ret; + + ret = gen7_gmu_enable_clks(adreno_dev); + if (ret) + goto gdsc_off; + + ret = gen7_rscc_wakeup_sequence(adreno_dev); + if (ret) + goto clks_gdsc_off; + + ret = gen7_gmu_load_fw(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen7_gmu_register_config(adreno_dev); + + gen7_gmu_irq_enable(adreno_dev); + + ret = gen7_gmu_device_start(adreno_dev); + if (ret) + goto err; + + ret = gen7_hwsched_hfi_start(adreno_dev); + if (ret) + goto err; + + device->gmu_fault = false; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_AWARE); + + return 0; +err: + if (device->gmu_fault) { + gen7_gmu_suspend(adreno_dev); + + return ret; + } + + gen7_gmu_irq_disable(adreno_dev); + +clks_gdsc_off: + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + +gdsc_off: + /* Poll to make sure that the CX is off */ + gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + + return ret; +} + +void gen7_hwsched_active_count_put(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (WARN(atomic_read(&device->active_cnt) == 0, + "Unbalanced get/put calls to KGSL active count\n")) + return; + + if (atomic_dec_and_test(&device->active_cnt)) { + kgsl_pwrscale_update_stats(device); + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + } + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + wake_up(&device->active_cnt_wq); +} + +static int gen7_hwsched_notify_slumber(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_prep_slumber_cmd req; + int ret; + + ret = CMD_MSG_HDR(req, H2F_MSG_PREPARE_SLUMBER); + if (ret) + return ret; + + req.freq = gmu->hfi.dcvs_table.gpu_level_num - + pwr->default_pwrlevel - 1; + req.bw = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq; + + /* Disable the power counter so that the GMU is not busy */ + gmu_core_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0); + + return gen7_hfi_send_cmd_async(adreno_dev, &req); + +} +static int gen7_hwsched_gmu_power_off(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + + if (device->gmu_fault) + goto error; + + /* Wait for the lowest idle level we requested */ + ret = gen7_gmu_wait_for_lowest_idle(adreno_dev); + if (ret) + goto error; + + ret = gen7_hwsched_notify_slumber(adreno_dev); + if (ret) + goto error; + + ret = gen7_gmu_wait_for_idle(adreno_dev); + if (ret) + goto error; + + ret = gen7_rscc_sleep_sequence(adreno_dev); + + /* Now that we are done with GMU and GPU, Clear the GBIF */ + ret = gen7_halt_gbif(adreno_dev); + + gen7_gmu_irq_disable(adreno_dev); + + gen7_hwsched_hfi_stop(adreno_dev); + + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + + /* Poll to make sure that the CX is off */ + gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + + return ret; + +error: + gen7_hwsched_hfi_stop(adreno_dev); + gen7_gmu_suspend(adreno_dev); + + return ret; +} + +static int gen7_hwsched_gpu_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + + ret = kgsl_mmu_start(device); + if (ret) + goto err; + + ret = gen7_gmu_oob_set(device, oob_gpu); + if (ret) + goto err; + + /* Clear the busy_data stats - we're starting over from scratch */ + memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data)); + + gen7_start(adreno_dev); + + /* Re-initialize the coresight registers if applicable */ + adreno_coresight_start(adreno_dev); + + adreno_perfcounter_start(adreno_dev); + + /* Clear FSR here in case it is set from a previous pagefault */ + kgsl_mmu_clear_fsr(&device->mmu); + + gen7_enable_gpu_irq(adreno_dev); + + ret = gen7_hwsched_cp_init(adreno_dev); + if (ret) { + gen7_disable_gpu_irq(adreno_dev); + goto err; + } + + device->reset_counter++; +err: + gen7_gmu_oob_clear(device, oob_gpu); + + if (ret) + gen7_hwsched_gmu_power_off(adreno_dev); + + return ret; +} + +static void hwsched_idle_timer(struct timer_list *t) +{ + struct kgsl_device *device = container_of(t, struct kgsl_device, + idle_timer); + + kgsl_schedule_work(&device->idle_check_ws); +} + +static int gen7_hwsched_gmu_init(struct adreno_device *adreno_dev) +{ + int ret; + + ret = gen7_gmu_parse_fw(adreno_dev); + if (ret) + return ret; + + ret = gen7_gmu_memory_init(adreno_dev); + if (ret) + return ret; + + return gen7_hwsched_hfi_init(adreno_dev); +} + +static void gen7_hwsched_touch_wakeup(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + /* + * Do not wake up a suspended device or until the first boot sequence + * has been completed. + */ + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags) || + !test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + return; + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + goto done; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen7_hwsched_gmu_boot(adreno_dev); + if (ret) + return; + + ret = gen7_hwsched_gpu_boot(adreno_dev); + if (ret) + return; + + kgsl_pwrscale_wake(device); + + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + device->pwrctrl.last_stat_updated = ktime_get(); + device->state = KGSL_STATE_ACTIVE; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_ACTIVE); + +done: + /* + * When waking up from a touch event we want to stay active long enough + * for the user to send a draw command. The default idle timer timeout + * is shorter than we want so go ahead and push the idle timer out + * further for this special case + */ + mod_timer(&device->idle_timer, jiffies + + msecs_to_jiffies(adreno_wake_timeout)); +} + +static int gen7_hwsched_boot(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_ACTIVE); + + adreno_hwsched_start(adreno_dev); + + ret = gen7_hwsched_gmu_boot(adreno_dev); + if (ret) + return ret; + + ret = gen7_hwsched_gpu_boot(adreno_dev); + if (ret) + return ret; + + kgsl_start_idle_timer(device); + kgsl_pwrscale_wake(device); + + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + device->pwrctrl.last_stat_updated = ktime_get(); + device->state = KGSL_STATE_ACTIVE; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_ACTIVE); + + return ret; +} + +static int gen7_hwsched_first_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + return gen7_hwsched_boot(adreno_dev); + + adreno_hwsched_start(adreno_dev); + + ret = gen7_microcode_read(adreno_dev); + if (ret) + return ret; + + ret = gen7_init(adreno_dev); + if (ret) + return ret; + + ret = gen7_hwsched_gmu_init(adreno_dev); + if (ret) + return ret; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen7_hwsched_gmu_first_boot(adreno_dev); + if (ret) + return ret; + + ret = gen7_hwsched_gpu_boot(adreno_dev); + if (ret) + return ret; + + adreno_get_bus_counters(adreno_dev); + + adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev, + ADRENO_COOP_RESET); + + set_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags); + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + /* + * There is a possible deadlock scenario during kgsl firmware reading + * (request_firmware) and devfreq update calls. During first boot, kgsl + * device mutex is held and then request_firmware is called for reading + * firmware. request_firmware internally takes dev_pm_qos_mtx lock. + * Whereas in case of devfreq update calls triggered by thermal/bcl or + * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then + * tries to take kgsl device mutex as part of get_dev_status/target + * calls. This results in deadlock when both thread are unable to acquire + * the mutex held by other thread. Enable devfreq updates now as we are + * done reading all firmware files. + */ + device->pwrscale.devfreq_enabled = true; + + device->pwrctrl.last_stat_updated = ktime_get(); + device->state = KGSL_STATE_ACTIVE; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_ACTIVE); + + return 0; +} + +static int gen7_hwsched_power_off(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_SLUMBER); + + /* process any profiling results that are available */ + adreno_profile_process_results(ADRENO_DEVICE(device)); + + if (!gen7_hw_isidle(adreno_dev)) + dev_err(&gmu->pdev->dev, "GPU isn't idle before SLUMBER\n"); + + ret = gen7_gmu_oob_set(device, oob_gpu); + if (ret) { + gen7_gmu_oob_clear(device, oob_gpu); + goto no_gx_power; + } + + kgsl_pwrscale_update_stats(device); + + /* Save active coresight registers if applicable */ + adreno_coresight_stop(adreno_dev); + + adreno_irqctrl(adreno_dev, 0); + + gen7_gmu_oob_clear(device, oob_gpu); + +no_gx_power: + kgsl_pwrctrl_irq(device, false); + + gen7_hwsched_gmu_power_off(adreno_dev); + + adreno_hwsched_unregister_contexts(adreno_dev); + + if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpu_llc_slice); + + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + device->state = KGSL_STATE_NONE; + + del_timer_sync(&device->idle_timer); + + kgsl_pwrscale_sleep(device); + + kgsl_pwrctrl_clear_l3_vote(device); + + trace_kgsl_pwr_set_state(device, KGSL_STATE_SLUMBER); + + return ret; +} + +static void hwsched_idle_check(struct work_struct *work) +{ + struct kgsl_device *device = container_of(work, + struct kgsl_device, idle_check_ws); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + mutex_lock(&device->mutex); + + if (test_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags)) + goto done; + + if (!atomic_read(&device->active_cnt)) { + gen7_hwsched_power_off(adreno_dev); + } else { + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + } + +done: + mutex_unlock(&device->mutex); +} + +static int gen7_hwsched_first_open(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* + * Do the one time settings that need to happen when we + * attempt to boot the gpu the very first time + */ + ret = gen7_hwsched_first_boot(adreno_dev); + if (ret) + return ret; + + /* + * A client that does a first_open but never closes the device + * may prevent us from going back to SLUMBER. So trigger the idle + * check by incrementing the active count and immediately releasing it. + */ + atomic_inc(&device->active_cnt); + gen7_hwsched_active_count_put(adreno_dev); + + return 0; +} + +int gen7_hwsched_active_count_get(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EINVAL; + + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags)) + return -EINVAL; + + if ((atomic_read(&device->active_cnt) == 0)) + ret = gen7_hwsched_boot(adreno_dev); + + if (ret == 0) + atomic_inc(&device->active_cnt); + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + return ret; +} + +static int gen7_hwsched_dcvs_set(struct adreno_device *adreno_dev, + int gpu_pwrlevel, int bus_level) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct hfi_gx_bw_perf_vote_cmd req = { + .ack_type = DCVS_ACK_BLOCK, + .freq = INVALID_DCVS_IDX, + .bw = INVALID_DCVS_IDX, + }; + int ret; + + if (!test_bit(GMU_PRIV_HFI_STARTED, &gmu->flags)) + return 0; + + /* Do not set to XO and lower GPU clock vote from GMU */ + if ((gpu_pwrlevel != INVALID_DCVS_IDX) && + (gpu_pwrlevel >= table->gpu_level_num - 1)) { + dev_err(&gmu->pdev->dev, "Invalid gpu dcvs request: %d\n", + gpu_pwrlevel); + return -EINVAL; + } + + if (gpu_pwrlevel < table->gpu_level_num - 1) + req.freq = table->gpu_level_num - gpu_pwrlevel - 1; + + if (bus_level < pwr->ddr_table_count && bus_level > 0) + req.bw = bus_level; + + /* GMU will vote for slumber levels through the sleep sequence */ + if ((req.freq == INVALID_DCVS_IDX) && (req.bw == INVALID_DCVS_IDX)) + return 0; + + ret = CMD_MSG_HDR(req, H2F_MSG_GX_BW_PERF_VOTE); + if (ret) + return ret; + + ret = gen7_hfi_send_cmd_async(adreno_dev, &req); + + if (ret) { + dev_err_ratelimited(&gmu->pdev->dev, + "Failed to set GPU perf idx %d, bw idx %d\n", + req.freq, req.bw); + + /* + * If this was a dcvs request along side an active gpu, request + * dispatcher based reset and recovery. + */ + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + } + + return ret; +} + +static int gen7_hwsched_clock_set(struct adreno_device *adreno_dev, + u32 pwrlevel) +{ + return gen7_hwsched_dcvs_set(adreno_dev, pwrlevel, INVALID_DCVS_IDX); +} + +static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + static unsigned long prev_freq; + unsigned long freq = GMU_FREQ_MIN; + + if (!gmu->perf_ddr_bw) + return; + + /* + * Scale the GMU if DDR is at a CX corner at which GMU can run at + * 500 Mhz + */ + if (pwr->ddr_table[buslevel] >= gmu->perf_ddr_bw) + freq = GMU_FREQ_MAX; + + if (prev_freq == freq) + return; + + if (kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", freq)) { + dev_err(&gmu->pdev->dev, "Unable to set the GMU clock to %ld\n", + freq); + return; + } + + trace_kgsl_gmu_pwrlevel(freq, prev_freq); + + prev_freq = freq; +} + +static int gen7_hwsched_bus_set(struct adreno_device *adreno_dev, int buslevel, + u32 ab) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret = 0; + + if (buslevel != pwr->cur_buslevel) { + ret = gen7_hwsched_dcvs_set(adreno_dev, INVALID_DCVS_IDX, + buslevel); + if (ret) + return ret; + + scale_gmu_frequency(adreno_dev, buslevel); + + pwr->cur_buslevel = buslevel; + + trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel); + } + + if (ab != pwr->cur_ab) { + icc_set_bw(pwr->icc_path, MBps_to_icc(ab), 0); + pwr->cur_ab = ab; + } + + return ret; +} + +static int gen7_hwsched_pm_suspend(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags)) + return 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_SUSPEND); + + /* Halt any new submissions */ + reinit_completion(&device->halt_gate); + + /** + * Wait for the dispatcher to retire everything by waiting + * for the active count to go to zero. + */ + ret = kgsl_active_count_wait(device, 0, msecs_to_jiffies(100)); + if (ret) { + dev_err(device->dev, "Timed out waiting for the active count\n"); + goto err; + } + + ret = adreno_hwsched_idle(adreno_dev); + if (ret) + goto err; + + gen7_hwsched_power_off(adreno_dev); + + adreno_get_gpu_halt(adreno_dev); + + set_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags); + + trace_kgsl_pwr_set_state(device, KGSL_STATE_SUSPEND); + + return 0; + +err: + adreno_hwsched_start(adreno_dev); + + return ret; +} + +static void gen7_hwsched_pm_resume(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + if (WARN(!test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags), + "resume invoked without a suspend\n")) + return; + + adreno_put_gpu_halt(adreno_dev); + + adreno_hwsched_start(adreno_dev); + + clear_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags); +} + +void gen7_hwsched_handle_watchdog(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 mask; + + /* Temporarily mask the watchdog interrupt to prevent a storm */ + gmu_core_regread(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, + &mask); + gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, + (mask | GMU_INT_WDOG_BITE)); + + gen7_gmu_send_nmi(adreno_dev, false); + + dev_err_ratelimited(&gmu->pdev->dev, + "GMU watchdog expired interrupt received\n"); + + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); +} + +static void gen7_hwsched_drain_ctxt_unregister(struct adreno_device *adreno_dev) +{ + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + struct pending_cmd *cmd = NULL; + + read_lock(&hfi->msglock); + + list_for_each_entry(cmd, &hfi->msglist, node) { + if (MSG_HDR_GET_ID(cmd->sent_hdr) == H2F_MSG_UNREGISTER_CONTEXT) + complete(&cmd->complete); + } + + read_unlock(&hfi->msglock); +} + +int gen7_hwsched_reset(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + /* + * Any pending context unregister packets will be lost + * since we hard reset the GMU. This means any threads waiting + * for context unregister hfi ack will timeout. Wake them + * to avoid false positive ack timeout messages later. + */ + gen7_hwsched_drain_ctxt_unregister(adreno_dev); + + adreno_hwsched_unregister_contexts(adreno_dev); + + if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + gen7_hwsched_hfi_stop(adreno_dev); + + gen7_disable_gpu_irq(adreno_dev); + + gen7_gmu_suspend(adreno_dev); + + /* + * In some corner cases, it is possible that GMU put TS_RETIRE + * on the msgq after we have turned off gmu interrupts. Hence, + * drain the queue one last time before we reboot the GMU. + */ + gen7_hwsched_process_msgq(adreno_dev); + + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + ret = gen7_hwsched_boot(adreno_dev); + + BUG_ON(ret); + + return ret; +} + +const struct adreno_power_ops gen7_hwsched_power_ops = { + .first_open = gen7_hwsched_first_open, + .last_close = gen7_hwsched_power_off, + .active_count_get = gen7_hwsched_active_count_get, + .active_count_put = gen7_hwsched_active_count_put, + .touch_wakeup = gen7_hwsched_touch_wakeup, + .pm_suspend = gen7_hwsched_pm_suspend, + .pm_resume = gen7_hwsched_pm_resume, + .gpu_clock_set = gen7_hwsched_clock_set, + .gpu_bus_set = gen7_hwsched_bus_set, +}; + +const struct adreno_hwsched_ops gen7_hwsched_ops = { + .submit_cmdobj = gen7_hwsched_submit_cmdobj, + .preempt_count = gen7_hwsched_preempt_count_get, +}; + +int gen7_hwsched_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore) +{ + struct adreno_device *adreno_dev; + struct kgsl_device *device; + struct gen7_hwsched_device *gen7_hwsched_dev; + int ret; + + gen7_hwsched_dev = devm_kzalloc(&pdev->dev, sizeof(*gen7_hwsched_dev), + GFP_KERNEL); + if (!gen7_hwsched_dev) + return -ENOMEM; + + adreno_dev = &gen7_hwsched_dev->gen7_dev.adreno_dev; + + ret = gen7_probe_common(pdev, adreno_dev, chipid, gpucore); + if (ret) + return ret; + + device = KGSL_DEVICE(adreno_dev); + + INIT_WORK(&device->idle_check_ws, hwsched_idle_check); + + timer_setup(&device->idle_timer, hwsched_idle_timer, 0); + + adreno_dev->irq_mask = GEN7_HWSCHED_INT_MASK; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) + set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); + + return adreno_hwsched_init(adreno_dev, &gen7_hwsched_ops); +} + +int gen7_hwsched_add_to_minidump(struct adreno_device *adreno_dev) +{ + struct gen7_device *gen7_dev = container_of(adreno_dev, + struct gen7_device, adreno_dev); + struct gen7_hwsched_device *gen7_hwsched = container_of(gen7_dev, + struct gen7_hwsched_device, gen7_dev); + int ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_HWSCHED_DEVICE, + (void *)(gen7_hwsched), sizeof(struct gen7_hwsched_device)); + if (ret) + return ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_GMU_LOG_ENTRY, + gen7_dev->gmu.gmu_log->hostptr, gen7_dev->gmu.gmu_log->size); + if (ret) + return ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_HFIMEM_ENTRY, + gen7_dev->gmu.hfi.hfi_mem->hostptr, gen7_dev->gmu.hfi.hfi_mem->size); + + return ret; +} diff --git a/adreno_gen7_hwsched.h b/adreno_gen7_hwsched.h new file mode 100644 index 0000000000..b2da557d8d --- /dev/null +++ b/adreno_gen7_hwsched.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _ADRENO_GEN7_HWSCHED_H_ +#define _ADRENO_GEN7_HWSCHED_H_ + +#include "adreno_gen7_hwsched_hfi.h" + +/** + * struct gen7_hwsched_device - Container for the gen7 hwscheduling device + */ +struct gen7_hwsched_device { + /** @gen7_dev: Container for the gen7 device */ + struct gen7_device gen7_dev; + /** @hwsched_hfi: Container for hwscheduling specific hfi resources */ + struct gen7_hwsched_hfi hwsched_hfi; +}; + +/** + * gen7_hwsched_probe - Target specific probe for hwsched + * @pdev: Pointer to the platform device + * @chipid: Chipid of the target + * @gpucore: Pointer to the gpucore + * + * The target specific probe function for hwsched enabled gmu targets. + * + * Return: 0 on success or negative error on failure + */ +int gen7_hwsched_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore); + +/** + * gen7_hwsched_reset - Restart the gmu and gpu + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_hwsched_reset(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_snapshot - take gen7 hwsched snapshot + * @adreno_dev: Pointer to the adreno device + * @snapshot: Pointer to the snapshot instance + * + * Snapshot the faulty ib and then snapshot rest of gen7 gmu things + */ +void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + +/** + * gen7_hwsched_handle_watchdog - Handle watchdog interrupt + * @adreno_dev: Pointer to the adreno device + */ +void gen7_hwsched_handle_watchdog(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_active_count_get - Increment the active count + * @adreno_dev: Pointer to the adreno device + * + * This function increments the active count. If active count + * is 0, this function also powers up the device. + * + * Return: 0 on success or negative error on failure + */ +int gen7_hwsched_active_count_get(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_active_count_put - Put back the active count + * @adreno_dev: Pointer to the adreno device + * + * This function decrements the active count sets the idle + * timer if active count is zero. + */ +void gen7_hwsched_active_count_put(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_add_to_minidump - Register hwsched_device with va minidump + * @adreno_dev: Pointer to the adreno device + */ +int gen7_hwsched_add_to_minidump(struct adreno_device *adreno_dev); + +#endif diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c new file mode 100644 index 0000000000..0ee8a7b858 --- /dev/null +++ b/adreno_gen7_hwsched_hfi.c @@ -0,0 +1,1606 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_gen7_hwsched.h" +#include "adreno_hfi.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" +#include "kgsl_device.h" +#include "kgsl_eventlog.h" +#include "kgsl_pwrctrl.h" +#include "kgsl_trace.h" + +#define HFI_QUEUE_MAX (HFI_QUEUE_DEFAULT_CNT + HFI_QUEUE_DISPATCH_MAX_CNT) + +#define DEFINE_QHDR(gmuaddr, id, prio) \ + {\ + .status = 1, \ + .start_addr = GMU_QUEUE_START_ADDR(gmuaddr, id), \ + .type = QUEUE_HDR_TYPE(id, prio, 0, 0), \ + .queue_size = SZ_4K >> 2, \ + .msg_size = 0, \ + .unused0 = 0, \ + .unused1 = 0, \ + .unused2 = 0, \ + .unused3 = 0, \ + .unused4 = 0, \ + .read_index = 0, \ + .write_index = 0, \ +} + +static struct dq_info { + /** @max_dq: Maximum number of dispatch queues per RB level */ + u32 max_dq; + /** @base_dq_id: Base dqid for level */ + u32 base_dq_id; + /** @offset: Next dqid to use for roundrobin context assignment */ + u32 offset; +} gen7_hfi_dqs[KGSL_PRIORITY_MAX_RB_LEVELS] = { + { 4, 0, }, /* RB0 */ + { 4, 4, }, /* RB1 */ + { 3, 8, }, /* RB2 */ + { 3, 11, }, /* RB3 */ +}; + +struct gen7_hwsched_hfi *to_gen7_hwsched_hfi( + struct adreno_device *adreno_dev) +{ + struct gen7_device *gen7_dev = container_of(adreno_dev, + struct gen7_device, adreno_dev); + struct gen7_hwsched_device *gen7_hwsched = container_of(gen7_dev, + struct gen7_hwsched_device, gen7_dev); + + return &gen7_hwsched->hwsched_hfi; +} + +static void add_waiter(struct gen7_hwsched_hfi *hfi, u32 hdr, + struct pending_cmd *ack) +{ + memset(ack, 0x0, sizeof(*ack)); + + init_completion(&ack->complete); + write_lock_irq(&hfi->msglock); + list_add_tail(&ack->node, &hfi->msglist); + write_unlock_irq(&hfi->msglock); + + ack->sent_hdr = hdr; +} + +static void del_waiter(struct gen7_hwsched_hfi *hfi, struct pending_cmd *ack) +{ + write_lock_irq(&hfi->msglock); + list_del(&ack->node); + write_unlock_irq(&hfi->msglock); +} + +static void gen7_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + struct pending_cmd *cmd = NULL; + u32 waiters[64], num_waiters = 0, i; + u32 *ack = rcvd; + u32 hdr = ack[0]; + u32 req_hdr = ack[1]; + u32 size_bytes = MSG_HDR_GET_SIZE(hdr) << 2; + + if (size_bytes > sizeof(cmd->results)) + dev_err_ratelimited(&gmu->pdev->dev, + "Ack result too big: %d Truncating to: %ld\n", + size_bytes, sizeof(cmd->results)); + + read_lock(&hfi->msglock); + + list_for_each_entry(cmd, &hfi->msglist, node) { + if (HDR_CMP_SEQNUM(cmd->sent_hdr, req_hdr)) { + memcpy(cmd->results, ack, + min_t(u32, size_bytes, + sizeof(cmd->results))); + complete(&cmd->complete); + read_unlock(&hfi->msglock); + return; + } + + if (num_waiters < ARRAY_SIZE(waiters)) + waiters[num_waiters++] = cmd->sent_hdr; + } + + read_unlock(&hfi->msglock); + + /* Didn't find the sender, list the waiter */ + dev_err_ratelimited(&gmu->pdev->dev, + "Unexpectedly got id %d seqnum %d. Total waiters: %d Top %d Waiters:\n", + MSG_HDR_GET_ID(req_hdr), MSG_HDR_GET_SEQNUM(req_hdr), + num_waiters, min_t(u32, num_waiters, 5)); + + for (i = 0; i < num_waiters && i < 5; i++) + dev_err_ratelimited(&gmu->pdev->dev, + " id %d seqnum %d\n", + MSG_HDR_GET_ID(waiters[i]), + MSG_HDR_GET_SEQNUM(waiters[i])); +} + +static void log_profiling_info(struct adreno_device *adreno_dev, u32 *rcvd) +{ + struct hfi_ts_retire_cmd *cmd = (struct hfi_ts_retire_cmd *)rcvd; + struct kgsl_context *context; + struct retire_info info = {0}; + + context = kgsl_context_get(KGSL_DEVICE(adreno_dev), cmd->ctxt_id); + if (context == NULL) + return; + + info.timestamp = cmd->ts; + info.rb_id = adreno_get_level(context->priority); + info.gmu_dispatch_queue = context->gmu_dispatch_queue; + info.submitted_to_rb = cmd->submitted_to_rb; + info.sop = cmd->sop; + info.eop = cmd->eop; + info.retired_on_gmu = cmd->retired_on_gmu; + + trace_adreno_cmdbatch_retired(context, &info, 0, 0, 0); + + log_kgsl_cmdbatch_retired_event(context->id, cmd->ts, + context->priority, 0, cmd->sop, cmd->eop); + + kgsl_context_put(context); +} + +u32 gen7_hwsched_parse_payload(struct payload_section *payload, u32 key) +{ + u32 i; + + /* Each key-value pair is 2 dwords */ + for (i = 0; i < payload->dwords; i += 2) { + if (payload->data[i] == key) + return payload->data[i + 1]; + } + + return 0; +} + +/* Look up a particular key's value for a given type of payload */ +static u32 gen7_hwsched_lookup_key_value(struct adreno_device *adreno_dev, + u32 type, u32 key) +{ + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + + if (!cmd->hdr) + return 0; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == type) + return gen7_hwsched_parse_payload(payload, key); + + i += struct_size(payload, data, payload->dwords); + } + + return 0; +} + +static u32 get_payload_rb_key(struct adreno_device *adreno_dev, + u32 rb_id, u32 key) +{ + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + + if (!cmd->hdr) + return 0; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == PAYLOAD_RB) { + u32 id = gen7_hwsched_parse_payload(payload, KEY_RB_ID); + + if (id == rb_id) + return gen7_hwsched_parse_payload(payload, key); + } + + i += struct_size(payload, data, payload->dwords); + } + + return 0; +} + +static void log_gpu_fault(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct device *dev = &gmu->pdev->dev; + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + + switch (cmd->error) { + case GMU_GPU_HW_HANG: + dev_crit_ratelimited(dev, "MISC: GPU hang detected\n"); + break; + case GMU_GPU_SW_HANG: + dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %d\n", + cmd->ctxt_id, cmd->ts); + break; + case GMU_CP_OPCODE_ERROR: + dev_crit_ratelimited(dev, + "CP opcode error interrupt | opcode=0x%8.8x\n", + gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_OPCODE_ERROR)); + break; + case GMU_CP_PROTECTED_ERROR: { + u32 status = gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_PROTECTED_ERROR); + + dev_crit_ratelimited(dev, + "CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", + status & (1 << 20) ? "READ" : "WRITE", + status & 0x3FFFF, status); + } + break; + case GMU_CP_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "CP Illegal instruction error\n"); + break; + case GMU_CP_UCODE_ERROR: + dev_crit_ratelimited(dev, "CP ucode error interrupt\n"); + break; + case GMU_CP_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP | Ringbuffer HW fault | status=0x%8.8x\n", + gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_HW_FAULT)); + break; + case GMU_GPU_PREEMPT_TIMEOUT: { + u32 cur, next, cur_rptr, cur_wptr, next_rptr, next_wptr; + + cur = gen7_hwsched_lookup_key_value(adreno_dev, + PAYLOAD_PREEMPT_TIMEOUT, KEY_PREEMPT_TIMEOUT_CUR_RB_ID); + next = gen7_hwsched_lookup_key_value(adreno_dev, + PAYLOAD_PREEMPT_TIMEOUT, + KEY_PREEMPT_TIMEOUT_NEXT_RB_ID); + cur_rptr = get_payload_rb_key(adreno_dev, cur, KEY_RB_RPTR); + cur_wptr = get_payload_rb_key(adreno_dev, cur, KEY_RB_WPTR); + next_rptr = get_payload_rb_key(adreno_dev, next, KEY_RB_RPTR); + next_wptr = get_payload_rb_key(adreno_dev, next, KEY_RB_WPTR); + + dev_crit_ratelimited(dev, + "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n", + cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr); + } + break; + case GMU_CP_GPC_ERROR: + dev_crit_ratelimited(dev, "RBBM: GPC error\n"); + break; + case GMU_CP_BV_OPCODE_ERROR: + dev_crit_ratelimited(dev, + "CP BV opcode error | opcode=0x%8.8x\n", + gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_BV_OPCODE_ERROR)); + break; + case GMU_CP_BV_PROTECTED_ERROR: { + u32 status = gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_BV_PROTECTED_ERROR); + + dev_crit_ratelimited(dev, + "CP BV | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", + status & (1 << 20) ? "READ" : "WRITE", + status & 0x3FFFF, status); + } + break; + case GMU_CP_BV_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP BV | Ringbuffer HW fault | status=0x%8.8x\n", + gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_HW_FAULT)); + break; + case GMU_CP_BV_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "CP BV Illegal instruction error\n"); + break; + case GMU_CP_BV_UCODE_ERROR: + dev_crit_ratelimited(dev, "CP BV ucode error interrupt\n"); + break; + case GMU_CP_UNKNOWN_ERROR: + fallthrough; + default: + dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n", + cmd->error); + break; + } +} + +static u32 peek_next_header(struct gen7_gmu_device *gmu, uint32_t queue_idx) +{ + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + u32 *queue; + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + return 0; + + if (hdr->read_index == hdr->write_index) + return 0; + + queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx); + + return queue[hdr->read_index]; +} + +static void process_ctx_bad(struct adreno_device *adreno_dev) +{ + log_gpu_fault(adreno_dev); + + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); +} + +void gen7_hwsched_process_msgq(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 rcvd[MAX_RCVD_SIZE], next_hdr; + + for (;;) { + next_hdr = peek_next_header(gmu, HFI_MSG_ID); + + if (!next_hdr) + return; + + if (MSG_HDR_GET_ID(next_hdr) == F2H_MSG_CONTEXT_BAD) { + gen7_hfi_queue_read(gmu, HFI_MSG_ID, + (u32 *)adreno_dev->hwsched.ctxt_bad, + HFI_MAX_MSG_SIZE); + process_ctx_bad(adreno_dev); + continue; + } + + gen7_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)); + + /* + * We are assuming that there is only one outstanding ack + * because hfi sending thread waits for completion while + * holding the device mutex + */ + if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { + gen7_receive_ack_async(adreno_dev, rcvd); + } else if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_TS_RETIRE) { + log_profiling_info(adreno_dev, rcvd); + adreno_hwsched_trigger(adreno_dev); + } + } +} + +static void process_log_block(struct adreno_device *adreno_dev, void *data) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_log_block *cmd = data; + u32 *log_event = gmu->gmu_log->hostptr; + u32 start, end; + + start = cmd->start_index; + end = cmd->stop_index; + + log_event += start * 4; + while (start != end) { + trace_gmu_event(log_event); + log_event += 4; + start++; + } +} + +static void process_dbgq_irq(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 rcvd[MAX_RCVD_SIZE]; + bool recovery = false; + + while (gen7_hfi_queue_read(gmu, HFI_DBG_ID, rcvd, sizeof(rcvd)) > 0) { + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_ERR) { + adreno_gen7_receive_err_req(gmu, rcvd); + recovery = true; + break; + } + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_DEBUG) + adreno_gen7_receive_debug_req(gmu, rcvd); + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_LOG_BLOCK) + process_log_block(adreno_dev, rcvd); + } + + if (!recovery) + return; + + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); +} + +/* HFI interrupt handler */ +static irqreturn_t gen7_hwsched_hfi_handler(int irq, void *data) +{ + struct adreno_device *adreno_dev = data; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status = 0; + + gmu_core_regread(device, GEN7_GMU_GMU2HOST_INTR_INFO, &status); + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, hfi->irq_mask); + + /* + * If interrupts are not enabled on the HFI message queue, + * the inline message processing loop will process it, + * else, process it here. + */ + if (!(hfi->irq_mask & HFI_IRQ_MSGQ_MASK)) + status &= ~HFI_IRQ_MSGQ_MASK; + + if (status & (HFI_IRQ_MSGQ_MASK | HFI_IRQ_DBGQ_MASK)) { + wake_up_interruptible(&hfi->f2h_wq); + adreno_hwsched_trigger(adreno_dev); + } + if (status & HFI_IRQ_CM3_FAULT_MASK) { + atomic_set(&gmu->cm3_fault, 1); + + /* make sure other CPUs see the update */ + smp_wmb(); + + dev_err_ratelimited(&gmu->pdev->dev, + "GMU CM3 fault interrupt received\n"); + + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + } + + /* Ignore OOB bits */ + status &= GENMASK(31 - (oob_max - 1), 0); + + if (status & ~hfi->irq_mask) + dev_err_ratelimited(&gmu->pdev->dev, + "Unhandled HFI interrupts 0x%x\n", + status & ~hfi->irq_mask); + + return IRQ_HANDLED; +} + +#define HFI_IRQ_MSGQ_MASK BIT(0) +#define HFI_RSP_TIMEOUT 100 /* msec */ + +static int wait_ack_completion(struct adreno_device *adreno_dev, + struct pending_cmd *ack) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int rc; + + rc = wait_for_completion_timeout(&ack->complete, + HFI_RSP_TIMEOUT); + if (!rc) { + dev_err(&gmu->pdev->dev, + "Ack timeout for id:%d sequence=%d\n", + MSG_HDR_GET_ID(ack->sent_hdr), + MSG_HDR_GET_SEQNUM(ack->sent_hdr)); + gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev)); + return -ETIMEDOUT; + } + + return 0; +} + +static int check_ack_failure(struct adreno_device *adreno_dev, + struct pending_cmd *ack) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + if (ack->results[2] != 0xffffffff) + return 0; + + dev_err(&gmu->pdev->dev, + "ACK error: sender id %d seqnum %d\n", + MSG_HDR_GET_ID(ack->sent_hdr), + MSG_HDR_GET_SEQNUM(ack->sent_hdr)); + + return -EINVAL; +} + +int gen7_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + u32 *cmd = data; + u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); + int rc; + struct pending_cmd pending_ack; + + *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + + add_waiter(hfi, *cmd, &pending_ack); + + rc = gen7_hfi_cmdq_write(adreno_dev, cmd); + if (rc) + goto done; + + rc = wait_ack_completion(adreno_dev, &pending_ack); + if (rc) + goto done; + + rc = check_ack_failure(adreno_dev, &pending_ack); + +done: + del_waiter(hfi, &pending_ack); + + return rc; +} + +static void init_queues(struct gen7_hfi *hfi) +{ + u32 gmuaddr = hfi->hfi_mem->gmuaddr; + struct hfi_queue_table hfi_table = { + .qtbl_hdr = { + .version = 0, + .size = sizeof(struct hfi_queue_table) >> 2, + .qhdr0_offset = + sizeof(struct hfi_queue_table_header) >> 2, + .qhdr_size = sizeof(struct hfi_queue_header) >> 2, + .num_q = HFI_QUEUE_MAX, + .num_active_q = HFI_QUEUE_MAX, + }, + .qhdr = { + DEFINE_QHDR(gmuaddr, HFI_CMD_ID, 0), + DEFINE_QHDR(gmuaddr, HFI_MSG_ID, 0), + DEFINE_QHDR(gmuaddr, HFI_DBG_ID, 0), + /* 4 DQs for RB priority 0 */ + DEFINE_QHDR(gmuaddr, 3, 0), + DEFINE_QHDR(gmuaddr, 4, 0), + DEFINE_QHDR(gmuaddr, 5, 0), + DEFINE_QHDR(gmuaddr, 6, 0), + /* 4 DQs for RB priority 1 */ + DEFINE_QHDR(gmuaddr, 7, 1), + DEFINE_QHDR(gmuaddr, 8, 1), + DEFINE_QHDR(gmuaddr, 9, 1), + DEFINE_QHDR(gmuaddr, 10, 1), + /* 3 DQs for RB priority 2 */ + DEFINE_QHDR(gmuaddr, 11, 2), + DEFINE_QHDR(gmuaddr, 12, 2), + DEFINE_QHDR(gmuaddr, 13, 2), + /* 3 DQs for RB priority 3 */ + DEFINE_QHDR(gmuaddr, 14, 3), + DEFINE_QHDR(gmuaddr, 15, 3), + DEFINE_QHDR(gmuaddr, 16, 3), + }, + }; + + memcpy(hfi->hfi_mem->hostptr, &hfi_table, sizeof(hfi_table)); +} + +/* Total header sizes + queue sizes + 16 for alignment */ +#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \ + (SZ_4K * HFI_QUEUE_MAX)) + +static int hfi_f2h_main(void *arg); + +int gen7_hwsched_hfi_init(struct adreno_device *adreno_dev) +{ + struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); + struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); + + if (IS_ERR_OR_NULL(hw_hfi->big_ib)) { + hw_hfi->big_ib = gen7_reserve_gmu_kernel_block( + to_gen7_gmu(adreno_dev), 0, + HWSCHED_MAX_IBS * sizeof(struct hfi_issue_ib), + GMU_NONCACHED_KERNEL); + if (IS_ERR(hw_hfi->big_ib)) + return PTR_ERR(hw_hfi->big_ib); + } + + if (IS_ERR_OR_NULL(hfi->hfi_mem)) { + hfi->hfi_mem = gen7_reserve_gmu_kernel_block( + to_gen7_gmu(adreno_dev), + 0, HFIMEM_SIZE, GMU_NONCACHED_KERNEL); + if (IS_ERR(hfi->hfi_mem)) + return PTR_ERR(hfi->hfi_mem); + init_queues(hfi); + } + + if (IS_ERR_OR_NULL(hw_hfi->f2h_task)) + hw_hfi->f2h_task = kthread_run(hfi_f2h_main, adreno_dev, "gmu_f2h"); + + return PTR_ERR_OR_ZERO(hw_hfi->f2h_task); +} + +static int get_attrs(u32 flags) +{ + int attrs = IOMMU_READ; + + if (flags & HFI_MEMFLAG_GMU_PRIV) + attrs |= IOMMU_PRIV; + + if (flags & HFI_MEMFLAG_GMU_WRITEABLE) + attrs |= IOMMU_WRITE; + + return attrs; +} + +static int gmu_import_buffer(struct adreno_device *adreno_dev, + struct hfi_mem_alloc_entry *entry, u32 flags) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int attrs = get_attrs(flags); + struct gmu_vma_entry *vma = &gmu->vma[GMU_NONCACHED_KERNEL]; + struct hfi_mem_alloc_desc *desc = &entry->desc; + int ret; + + if (flags & HFI_MEMFLAG_GMU_CACHEABLE) + vma = &gmu->vma[GMU_CACHE]; + + if ((vma->next_va + desc->size) > (vma->start + vma->size)) { + dev_err(&gmu->pdev->dev, + "GMU mapping too big. available: %d required: %d\n", + vma->next_va - vma->start, desc->size); + return -ENOMEM; + } + + ret = gmu_core_map_memdesc(gmu->domain, entry->md, vma->next_va, attrs); + if (ret) { + dev_err(&gmu->pdev->dev, "gmu map err: 0x%08x, %x\n", + vma->next_va, attrs); + return ret; + } + + entry->md->gmuaddr = vma->next_va; + + vma->next_va += desc->size; + return 0; +} + +static struct hfi_mem_alloc_entry *lookup_mem_alloc_table( + struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc) +{ + struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); + int i; + + for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { + struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i]; + + if ((entry->desc.mem_kind == desc->mem_kind) && + (entry->desc.gmu_mem_handle == desc->gmu_mem_handle)) + return entry; + } + + return NULL; +} + +static struct hfi_mem_alloc_entry *get_mem_alloc_entry( + struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + struct hfi_mem_alloc_entry *entry = + lookup_mem_alloc_table(adreno_dev, desc); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u64 flags = 0; + u32 priv = 0; + int ret; + const char *memkind_string = desc->mem_kind < HFI_MEMKIND_MAX ? + hfi_memkind_strings[desc->mem_kind] : "UNKNOWN"; + + if (entry) + return entry; + + if (hfi->mem_alloc_entries == ARRAY_SIZE(hfi->mem_alloc_table)) { + dev_err(&gmu->pdev->dev, + "Reached max mem alloc entries\n"); + return ERR_PTR(-ENOMEM); + } + + entry = &hfi->mem_alloc_table[hfi->mem_alloc_entries]; + + memcpy(&entry->desc, desc, sizeof(*desc)); + + entry->desc.host_mem_handle = desc->gmu_mem_handle; + + if (desc->flags & HFI_MEMFLAG_GFX_PRIV) + priv |= KGSL_MEMDESC_PRIVILEGED; + + if (!(desc->flags & HFI_MEMFLAG_GFX_WRITEABLE)) + flags |= KGSL_MEMFLAGS_GPUREADONLY; + + if (desc->flags & HFI_MEMFLAG_GFX_SECURE) + flags |= KGSL_MEMFLAGS_SECURE; + + if (!(desc->flags & HFI_MEMFLAG_GFX_ACC)) { + entry->md = gen7_reserve_gmu_kernel_block(gmu, 0, + desc->size, + (desc->flags & HFI_MEMFLAG_GMU_CACHEABLE) ? + GMU_CACHE : GMU_NONCACHED_KERNEL); + if (IS_ERR(entry->md)) { + int ret = PTR_ERR(entry->md); + + memset(entry, 0, sizeof(*entry)); + return ERR_PTR(ret); + } + entry->desc.size = entry->md->size; + entry->desc.gmu_addr = entry->md->gmuaddr; + + goto done; + } + + entry->md = kgsl_allocate_global(device, desc->size, 0, flags, priv, + memkind_string); + if (IS_ERR(entry->md)) { + int ret = PTR_ERR(entry->md); + + memset(entry, 0, sizeof(*entry)); + return ERR_PTR(ret); + } + + entry->desc.size = entry->md->size; + entry->desc.gpu_addr = entry->md->gpuaddr; + + if (!(desc->flags & HFI_MEMFLAG_GMU_ACC)) + goto done; + + /* + * If gmu mapping fails, then we have to live with + * leaking the gpu global buffer allocated above. + */ + ret = gmu_import_buffer(adreno_dev, entry, desc->flags); + if (ret) { + dev_err(&gmu->pdev->dev, + "gpuaddr: 0x%llx size: %lld bytes lost\n", + entry->md->gpuaddr, entry->md->size); + memset(entry, 0, sizeof(*entry)); + return ERR_PTR(ret); + } + + entry->desc.gmu_addr = entry->md->gmuaddr; +done: + hfi->mem_alloc_entries++; + + return entry; +} + +static int process_mem_alloc(struct adreno_device *adreno_dev, + struct hfi_mem_alloc_desc *mad) +{ + struct hfi_mem_alloc_entry *entry; + + entry = get_mem_alloc_entry(adreno_dev, mad); + if (IS_ERR(entry)) + return PTR_ERR(entry); + + if (entry->md) { + mad->gpu_addr = entry->md->gpuaddr; + mad->gmu_addr = entry->md->gmuaddr; + } + + /* + * GMU uses the host_mem_handle to check if this memalloc was + * successful + */ + mad->host_mem_handle = mad->gmu_mem_handle; + + return 0; +} + +static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) +{ + struct hfi_mem_alloc_cmd *in = (struct hfi_mem_alloc_cmd *)rcvd; + struct hfi_mem_alloc_reply_cmd out = {0}; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + ret = process_mem_alloc(adreno_dev, &in->desc); + if (ret) + return ret; + + memcpy(&out.desc, &in->desc, sizeof(out.desc)); + + out.hdr = ACK_MSG_HDR(F2H_MSG_MEM_ALLOC, sizeof(out)); + out.hdr = MSG_HDR_SET_SEQNUM(out.hdr, + atomic_inc_return(&gmu->hfi.seqnum)); + + out.req_hdr = in->hdr; + + return gen7_hfi_cmdq_write(adreno_dev, (u32 *)&out); +} + +static int send_start_msg(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int seqnum = atomic_inc_return(&gmu->hfi.seqnum); + int ret, rc = 0; + struct hfi_start_cmd cmd; + u32 rcvd[MAX_RCVD_SIZE]; + struct pending_cmd pending_ack = {0}; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_START); + if (ret) + return ret; + + cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + + pending_ack.sent_hdr = cmd.hdr; + + rc = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&cmd); + if (rc) + return rc; + +poll: + rc = gmu_core_timed_poll_check(device, GEN7_GMU_GMU2HOST_INTR_INFO, + HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK); + + if (rc) { + dev_err(&gmu->pdev->dev, + "Timed out processing MSG_START seqnum: %d\n", + seqnum); + gmu_core_fault_snapshot(device); + return rc; + } + + /* Clear the interrupt */ + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, + HFI_IRQ_MSGQ_MASK); + + if (gen7_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)) <= 0) { + dev_err(&gmu->pdev->dev, "MSG_START: no payload\n"); + gmu_core_fault_snapshot(device); + return -EINVAL; + } + + if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { + rc = gen7_receive_ack_cmd(gmu, rcvd, &pending_ack); + if (rc) + return rc; + + return check_ack_failure(adreno_dev, &pending_ack); + } + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_MEM_ALLOC) { + rc = mem_alloc_reply(adreno_dev, rcvd); + if (rc) + return rc; + + goto poll; + } + + dev_err(&gmu->pdev->dev, + "MSG_START: unexpected response id:%d, type:%d\n", + MSG_HDR_GET_ID(rcvd[0]), + MSG_HDR_GET_TYPE(rcvd[0])); + + gmu_core_fault_snapshot(device); + + return rc; +} + +static void reset_hfi_queues(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr; + u32 i; + + /* Flush HFI queues */ + for (i = 0; i < HFI_QUEUE_MAX; i++) { + struct hfi_queue_header *hdr = &tbl->qhdr[i]; + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + continue; + + if (hdr->read_index != hdr->write_index) { + dev_err(&gmu->pdev->dev, + "HFI queue[%d] is not empty before close: rd=%d,wt=%d\n", + i, hdr->read_index, hdr->write_index); + hdr->read_index = hdr->write_index; + + gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev)); + } + } +} + +void gen7_hwsched_hfi_stop(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + + hfi->irq_mask &= ~HFI_IRQ_MSGQ_MASK; + + reset_hfi_queues(adreno_dev); + + kgsl_pwrctrl_axi(KGSL_DEVICE(adreno_dev), false); + + clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); + +} + +static void enable_async_hfi(struct adreno_device *adreno_dev) +{ + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + + hfi->irq_mask |= HFI_IRQ_MSGQ_MASK; + + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN7_GMU_GMU2HOST_INTR_MASK, + (u32)~hfi->irq_mask); +} + +static int enable_preemption(struct adreno_device *adreno_dev) +{ + u32 data; + int ret; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + /* + * Bits [0:1] contains the preemption level + * Bit 2 is to enable/disable gmem save/restore + * Bit 3 is to enable/disable skipsaverestore + */ + data = FIELD_PREP(GENMASK(1, 0), adreno_dev->preempt.preempt_level) | + FIELD_PREP(BIT(2), adreno_dev->preempt.usesgmem) | + FIELD_PREP(BIT(3), adreno_dev->preempt.skipsaverestore); + + ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_PREEMPTION, 1, + data); + if (ret) + return ret; + + /* + * Bits[3:0] contain the preemption timeout enable bit per ringbuffer + * Bits[31:4] contain the timeout in ms + */ + return gen7_hfi_send_set_value(adreno_dev, HFI_VALUE_BIN_TIME, 1, + FIELD_PREP(GENMASK(31, 4), 3000) | + FIELD_PREP(GENMASK(3, 0), 0xf)); + +} + +int gen7_hwsched_hfi_start(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + ret = gen7_gmu_hfi_start(adreno_dev); + if (ret) + goto err; + + ret = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.dcvs_table); + if (ret) + goto err; + + ret = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table); + if (ret) + goto err; + + ret = gen7_hfi_send_acd_feature_ctrl(adreno_dev); + if (ret) + goto err; + + ret = gen7_hfi_send_bcl_feature_ctrl(adreno_dev); + if (ret) + goto err; + + ret = gen7_hfi_send_ifpc_feature_ctrl(adreno_dev); + if (ret) + goto err; + + ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_HWSCHED, 1, 0); + if (ret) + goto err; + + ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_KPROF, 1, 0); + if (ret) + goto err; + + /* Enable the long ib timeout detection */ + if (adreno_long_ib_detect(adreno_dev)) { + ret = gen7_hfi_send_feature_ctrl(adreno_dev, + HFI_FEATURE_BAIL_OUT_TIMER, 1, 0); + if (ret) + goto err; + } + + if (gmu->log_stream_enable) + gen7_hfi_send_set_value(adreno_dev, + HFI_VALUE_LOG_STREAM_ENABLE, 0, 1); + + if (gmu->log_group_mask) + gen7_hfi_send_set_value(adreno_dev, + HFI_VALUE_LOG_GROUP, 0, gmu->log_group_mask); + + ret = gen7_hfi_send_core_fw_start(adreno_dev); + if (ret) + goto err; + + ret = enable_preemption(adreno_dev); + if (ret) + goto err; + + ret = send_start_msg(adreno_dev); + if (ret) + goto err; + + enable_async_hfi(adreno_dev); + + set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); + + /* Request default DCVS level */ + ret = kgsl_pwrctrl_set_default_gpu_pwrlevel(device); + if (ret) + goto err; + + /* Request default BW vote */ + ret = kgsl_pwrctrl_axi(device, true); + +err: + if (ret) + gen7_hwsched_hfi_stop(adreno_dev); + + return ret; +} + +static int submit_raw_cmds(struct adreno_device *adreno_dev, void *cmds, + const char *str) +{ + int ret; + + ret = gen7_hfi_send_cmd_async(adreno_dev, cmds); + if (ret) + return ret; + + ret = gmu_core_timed_poll_check(KGSL_DEVICE(adreno_dev), + GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS, 0, 200, BIT(23)); + if (ret) + gen7_spin_idle_debug(adreno_dev, str); + + return ret; +} + +static int cp_init(struct adreno_device *adreno_dev) +{ + u32 cmds[GEN7_CP_INIT_DWORDS + 1]; + + cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, + (GEN7_CP_INIT_DWORDS + 1) << 2, HFI_MSG_CMD); + + gen7_cp_init_cmds(adreno_dev, &cmds[1]); + + return submit_raw_cmds(adreno_dev, cmds, + "CP initialization failed to idle\n"); +} + +static int send_switch_to_unsecure(struct adreno_device *adreno_dev) +{ + u32 cmds[3]; + + cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, sizeof(cmds), + HFI_MSG_CMD); + + cmds[1] = cp_type7_packet(CP_SET_SECURE_MODE, 1); + cmds[2] = 0; + + return submit_raw_cmds(adreno_dev, cmds, + "Switch to unsecure failed to idle\n"); +} + +int gen7_hwsched_cp_init(struct adreno_device *adreno_dev) +{ + const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* Program the ucode base for CP */ + kgsl_regwrite(device, GEN7_CP_SQE_INSTR_BASE_LO, + lower_32_bits(fw->memdesc->gpuaddr)); + kgsl_regwrite(device, GEN7_CP_SQE_INSTR_BASE_HI, + upper_32_bits(fw->memdesc->gpuaddr)); + + ret = cp_init(adreno_dev); + if (ret) + return ret; + + ret = adreno_zap_shader_load(adreno_dev, gen7_core->zap_name); + if (ret) + return ret; + + if (!adreno_dev->zap_loaded) + kgsl_regwrite(KGSL_DEVICE(adreno_dev), + GEN7_RBBM_SECVID_TRUST_CNTL, 0x0); + else + ret = send_switch_to_unsecure(adreno_dev); + + return ret; +} + +static bool is_queue_empty(struct adreno_device *adreno_dev, u32 queue_idx) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + return true; + + if (hdr->read_index == hdr->write_index) + return true; + + return false; +} + +static int hfi_f2h_main(void *arg) +{ + struct adreno_device *adreno_dev = arg; + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + + while (!kthread_should_stop()) { + wait_event_interruptible(hfi->f2h_wq, !kthread_should_stop() && + !(is_queue_empty(adreno_dev, HFI_MSG_ID) && + is_queue_empty(adreno_dev, HFI_DBG_ID))); + + if (kthread_should_stop()) + break; + + gen7_hwsched_process_msgq(adreno_dev); + process_dbgq_irq(adreno_dev); + } + + return 0; +} + +int gen7_hwsched_hfi_probe(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); + + gmu->hfi.irq = kgsl_request_irq(gmu->pdev, "hfi", + gen7_hwsched_hfi_handler, adreno_dev); + + if (gmu->hfi.irq < 0) + return gmu->hfi.irq; + + hw_hfi->irq_mask = HFI_IRQ_MASK; + + rwlock_init(&hw_hfi->msglock); + + INIT_LIST_HEAD(&hw_hfi->msglist); + + init_waitqueue_head(&hw_hfi->f2h_wq); + + return 0; +} + +void gen7_hwsched_hfi_remove(struct adreno_device *adreno_dev) +{ + struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); + + kthread_stop(hw_hfi->f2h_task); +} + +static void add_profile_events(struct adreno_device *adreno_dev, + struct kgsl_drawobj *drawobj, struct adreno_submit_time *time) +{ + unsigned long flags; + u64 time_in_s; + unsigned long time_in_ns; + struct kgsl_context *context = drawobj->context; + struct submission_info info = {0}; + + /* + * Here we are attempting to create a mapping between the + * GPU time domain (alwayson counter) and the CPU time domain + * (local_clock) by sampling both values as close together as + * possible. This is useful for many types of debugging and + * profiling. In order to make this mapping as accurate as + * possible, we must turn off interrupts to avoid running + * interrupt handlers between the two samples. + */ + + local_irq_save(flags); + + /* Read always on registers */ + time->ticks = gen7_read_alwayson(adreno_dev); + + /* Trace the GPU time to create a mapping to ftrace time */ + trace_adreno_cmdbatch_sync(context->id, context->priority, + drawobj->timestamp, time->ticks); + + /* Get the kernel clock for time since boot */ + time->ktime = local_clock(); + + /* Get the timeofday for the wall time (for the user) */ + ktime_get_real_ts64(&time->utime); + + local_irq_restore(flags); + + /* Return kernel clock time to the client if requested */ + time_in_s = time->ktime; + time_in_ns = do_div(time_in_s, 1000000000); + + info.inflight = -1; + info.rb_id = adreno_get_level(context->priority); + info.gmu_dispatch_queue = context->gmu_dispatch_queue; + + trace_adreno_cmdbatch_submitted(drawobj, &info, time->ticks, + (unsigned long) time_in_s, time_in_ns / 1000, 0); + + log_kgsl_cmdbatch_submitted_event(context->id, drawobj->timestamp, + context->priority, drawobj->flags); +} + +static u32 get_next_dq(u32 priority) +{ + struct dq_info *info = &gen7_hfi_dqs[priority]; + u32 next = info->base_dq_id + info->offset; + + info->offset = (info->offset + 1) % info->max_dq; + + return next; +} + +static u32 get_dq_id(u32 priority) +{ + u32 level = adreno_get_level(priority); + + return get_next_dq(level); +} + +static int send_context_register(struct adreno_device *adreno_dev, + struct kgsl_context *context) +{ + struct hfi_register_ctxt_cmd cmd; + struct kgsl_pagetable *pt = context->proc_priv->pagetable; + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_REGISTER_CONTEXT); + if (ret) + return ret; + + cmd.ctxt_id = context->id; + cmd.flags = HFI_CTXT_FLAG_NOTIFY | context->flags; + cmd.pt_addr = kgsl_mmu_pagetable_get_ttbr0(pt); + cmd.ctxt_idr = pid_nr(context->proc_priv->pid); + cmd.ctxt_bank = kgsl_mmu_pagetable_get_context_bank(pt); + + return gen7_hfi_send_cmd_async(adreno_dev, &cmd); +} + +static int send_context_pointers(struct adreno_device *adreno_dev, + struct kgsl_context *context) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct hfi_context_pointers_cmd cmd; + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_CONTEXT_POINTERS); + if (ret) + return ret; + + cmd.ctxt_id = context->id; + cmd.sop_addr = MEMSTORE_ID_GPU_ADDR(device, context->id, soptimestamp); + cmd.eop_addr = MEMSTORE_ID_GPU_ADDR(device, context->id, eoptimestamp); + if (context->user_ctxt_record) + cmd.user_ctxt_record_addr = + context->user_ctxt_record->memdesc.gpuaddr; + else + cmd.user_ctxt_record_addr = 0; + + return gen7_hfi_send_cmd_async(adreno_dev, &cmd); +} + +static int hfi_context_register(struct adreno_device *adreno_dev, + struct kgsl_context *context) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + if (context->gmu_registered) + return 0; + + ret = send_context_register(adreno_dev, context); + if (ret) { + dev_err(&gmu->pdev->dev, + "Unable to register context %d: %d\n", + context->id, ret); + + if (device->gmu_fault) + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + + return ret; + } + + ret = send_context_pointers(adreno_dev, context); + if (ret) { + dev_err(&gmu->pdev->dev, + "Unable to register context %d pointers: %d\n", + context->id, ret); + + if (device->gmu_fault) + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + + return ret; + } + + context->gmu_registered = true; + context->gmu_dispatch_queue = get_dq_id(context->priority); + + return 0; +} + +static void populate_ibs(struct adreno_device *adreno_dev, + struct hfi_submit_cmd *cmd, struct kgsl_drawobj_cmd *cmdobj) +{ + struct hfi_issue_ib *issue_ib; + struct kgsl_memobj_node *ib; + + if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS) { + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + + /* + * The dispatcher ensures that there is only one big IB inflight + */ + cmd->big_ib_gmu_va = hfi->big_ib->gmuaddr; + cmd->flags |= CMDBATCH_INDIRECT; + issue_ib = hfi->big_ib->hostptr; + } else { + issue_ib = (struct hfi_issue_ib *)&cmd[1]; + } + + list_for_each_entry(ib, &cmdobj->cmdlist, node) { + issue_ib->addr = ib->gpuaddr; + issue_ib->size = ib->size; + issue_ib++; + } + + cmd->numibs = cmdobj->numibs; +} + +#define HFI_DSP_IRQ_BASE 2 + +#define DISPQ_IRQ_BIT(_idx) BIT((_idx) + HFI_DSP_IRQ_BASE) + +int gen7_hwsched_submit_cmdobj(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj) +{ + struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); + int ret = 0; + u32 cmd_sizebytes; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct hfi_submit_cmd *cmd; + struct adreno_submit_time time = {0}; + + ret = hfi_context_register(adreno_dev, drawobj->context); + if (ret) + return ret; + + /* Add a *issue_ib struct for each IB */ + if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS || + test_bit(CMDOBJ_SKIP, &cmdobj->priv)) + cmd_sizebytes = sizeof(*cmd); + else + cmd_sizebytes = sizeof(*cmd) + + (sizeof(struct hfi_issue_ib) * cmdobj->numibs); + + if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE)) + return -EMSGSIZE; + + cmd = kmalloc(cmd_sizebytes, GFP_KERNEL); + if (cmd == NULL) + return -ENOMEM; + + cmd->ctxt_id = drawobj->context->id; + cmd->flags = HFI_CTXT_FLAG_NOTIFY; + cmd->ts = drawobj->timestamp; + + if (test_bit(CMDOBJ_SKIP, &cmdobj->priv)) + goto skipib; + + populate_ibs(adreno_dev, cmd, cmdobj); + + if ((drawobj->flags & KGSL_DRAWOBJ_PROFILING) && + cmdobj->profiling_buf_entry) { + + time.drawobj = drawobj; + + cmd->profile_gpuaddr_lo = + lower_32_bits(cmdobj->profiling_buffer_gpuaddr); + cmd->profile_gpuaddr_hi = + upper_32_bits(cmdobj->profiling_buffer_gpuaddr); + + /* Indicate to GMU to do user profiling for this submission */ + cmd->flags |= CMDBATCH_PROFILING; + } + +skipib: + adreno_drawobj_set_constraint(KGSL_DEVICE(adreno_dev), drawobj); + + cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, cmd_sizebytes, + HFI_MSG_CMD); + cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, + atomic_inc_return(&hfi->seqnum)); + + ret = gen7_hfi_queue_write(adreno_dev, + HFI_DSP_ID_0 + drawobj->context->gmu_dispatch_queue, + (u32 *)cmd); + if (ret) + goto free; + + add_profile_events(adreno_dev, drawobj, &time); + + cmdobj->submit_ticks = time.ticks; + + /* Send interrupt to GMU to receive the message */ + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN7_GMU_HOST2GMU_INTR_SET, + DISPQ_IRQ_BIT(drawobj->context->gmu_dispatch_queue)); + + /* Put the profiling information in the user profiling buffer */ + adreno_profile_submit_time(&time); + +free: + kfree(cmd); + + return ret; +} + +static int send_context_unregister_hfi(struct adreno_device *adreno_dev, + struct kgsl_context *context, u32 ts) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + struct pending_cmd pending_ack; + struct hfi_unregister_ctxt_cmd cmd; + u32 seqnum; + int rc, ret; + + /* Only send HFI if device is not in SLUMBER */ + if (!context->gmu_registered || + !test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_UNREGISTER_CONTEXT); + if (ret) + return ret; + + cmd.ctxt_id = context->id, + cmd.ts = ts, + + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + + add_waiter(hfi, cmd.hdr, &pending_ack); + + /* + * Although we know device is powered on, we can still enter SLUMBER + * because the wait for ack below is done without holding the mutex. So + * take an active count before releasing the mutex so as to avoid a + * concurrent SLUMBER sequence while GMU is un-registering this context. + */ + gen7_hwsched_active_count_get(adreno_dev); + + rc = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&cmd); + if (rc) + goto done; + + mutex_unlock(&device->mutex); + + rc = wait_for_completion_timeout(&pending_ack.complete, + msecs_to_jiffies(30 * 1000)); + if (!rc) { + dev_err(&gmu->pdev->dev, + "Ack timeout for context unregister seq: %d ctx: %d ts: %d\n", + MSG_HDR_GET_SEQNUM(pending_ack.sent_hdr), + context->id, ts); + rc = -ETIMEDOUT; + + mutex_lock(&device->mutex); + + gmu_core_fault_snapshot(device); + + /* + * Trigger dispatcher based reset and recovery. Invalidate the + * context so that any un-finished inflight submissions are not + * replayed after recovery. + */ + adreno_drawctxt_set_guilty(device, context); + + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + + goto done; + } + + mutex_lock(&device->mutex); + + rc = check_ack_failure(adreno_dev, &pending_ack); +done: + gen7_hwsched_active_count_put(adreno_dev); + + del_waiter(hfi, &pending_ack); + + return rc; +} + +void gen7_hwsched_context_detach(struct adreno_context *drawctxt) +{ + struct kgsl_context *context = &drawctxt->base; + struct kgsl_device *device = context->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int ret = 0; + + mutex_lock(&device->mutex); + + ret = send_context_unregister_hfi(adreno_dev, context, + drawctxt->internal_timestamp); + + if (!ret) { + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp), + drawctxt->timestamp); + + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp), + drawctxt->timestamp); + + adreno_profile_process_results(adreno_dev); + } + + context->gmu_registered = false; + + mutex_unlock(&device->mutex); +} + +u32 gen7_hwsched_preempt_count_get(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct hfi_get_value_cmd cmd; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); + struct pending_cmd pending_ack; + int rc; + + if (device->state != KGSL_STATE_ACTIVE) + return 0; + + rc = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE); + if (rc) + return 0; + + cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + cmd.type = HFI_VALUE_PREEMPT_COUNT; + cmd.subtype = 0; + + add_waiter(hfi, cmd.hdr, &pending_ack); + + rc = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&cmd); + if (rc) + goto done; + + rc = wait_ack_completion(adreno_dev, &pending_ack); + if (rc) + goto done; + + rc = check_ack_failure(adreno_dev, &pending_ack); + +done: + del_waiter(hfi, &pending_ack); + + return rc ? 0 : pending_ack.results[2]; +} diff --git a/adreno_gen7_hwsched_hfi.h b/adreno_gen7_hwsched_hfi.h new file mode 100644 index 0000000000..a756f0ded1 --- /dev/null +++ b/adreno_gen7_hwsched_hfi.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _ADRENO_GEN7_HWSCHED_HFI_H_ +#define _ADRENO_GEN7_HWSCHED_HFI_H_ + +/* Maximum number of IBs in a submission */ +#define HWSCHED_MAX_NUMIBS \ + ((HFI_MAX_MSG_SIZE - offsetof(struct hfi_issue_cmd_cmd, ibs)) \ + / sizeof(struct hfi_issue_ib)) + +struct gen7_hwsched_hfi { + struct hfi_mem_alloc_entry mem_alloc_table[32]; + u32 mem_alloc_entries; + /** @irq_mask: Store the hfi interrupt mask */ + u32 irq_mask; + /** @msglock: To protect the list of un-ACKed hfi packets */ + rwlock_t msglock; + /** @msglist: List of un-ACKed hfi packets */ + struct list_head msglist; + /** @f2h_task: Task for processing gmu fw to host packets */ + struct task_struct *f2h_task; + /** @f2h_wq: Waitqueue for the f2h_task */ + wait_queue_head_t f2h_wq; + /** @big_ib: GMU buffer to hold big IBs */ + struct kgsl_memdesc *big_ib; +}; + +struct kgsl_drawobj_cmd; + +/** + * gen7_hwsched_hfi_probe - Probe hwsched hfi resources + * @adreno_dev: Pointer to adreno device structure + * + * Return: 0 on success and negative error on failure. + */ +int gen7_hwsched_hfi_probe(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_hfi_remove - Release hwsched hfi resources + * @adreno_dev: Pointer to adreno device structure + */ +void gen7_hwsched_hfi_remove(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_hfi_init - Initialize hfi resources + * @adreno_dev: Pointer to adreno device structure + * + * This function is used to initialize hfi resources + * once before the very first gmu boot + * + * Return: 0 on success and negative error on failure. + */ +int gen7_hwsched_hfi_init(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_hfi_start - Start hfi resources + * @adreno_dev: Pointer to adreno device structure + * + * Send the various hfi packets before booting the gpu + * + * Return: 0 on success and negative error on failure. + */ +int gen7_hwsched_hfi_start(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_hfi_stop - Stop the hfi resources + * @adreno_dev: Pointer to the adreno device + * + * This function does the hfi cleanup when powering down the gmu + */ +void gen7_hwsched_hfi_stop(struct adreno_device *adreno_dev); + +/** + * gen7_hwched_cp_init - Send CP_INIT via HFI + * @adreno_dev: Pointer to adreno device structure + * + * This function is used to send CP INIT packet and bring + * GPU out of secure mode using hfi raw packets. + * + * Return: 0 on success and negative error on failure. + */ +int gen7_hwsched_cp_init(struct adreno_device *adreno_dev); + +/** + * gen7_hfi_send_cmd_async - Send an hfi packet + * @adreno_dev: Pointer to adreno device structure + * @data: Data to be sent in the hfi packet + * + * Send data in the form of an HFI packet to gmu and wait for + * it's ack asynchronously + * + * Return: 0 on success and negative error on failure. + */ +int gen7_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data); + +/** + * gen7_hwsched_submit_cmdobj - Dispatch IBs to dispatch queues + * @adreno_dev: Pointer to adreno device structure + * @cmdobj: The command object which needs to be submitted + * + * This function is used to register the context if needed and submit + * IBs to the hfi dispatch queues. + + * Return: 0 on success and negative error on failure + */ +int gen7_hwsched_submit_cmdobj(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj); + +/** + * gen7_hwsched_context_detach - Unregister a context with GMU + * @drawctxt: Pointer to the adreno context + * + * This function sends context unregister HFI and waits for the ack + * to ensure all submissions from this context have retired + */ +void gen7_hwsched_context_detach(struct adreno_context *drawctxt); + +/* Helper function to get to gen7 hwsched hfi device from adreno device */ +struct gen7_hwsched_hfi *to_gen7_hwsched_hfi(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_preempt_count_get - Get preemption count from GMU + * @adreno_dev: Pointer to adreno device + * + * This function sends a GET_VALUE HFI packet to get the number of + * preemptions completed since last SLUMBER exit. + * + * Return: Preemption count + */ +u32 gen7_hwsched_preempt_count_get(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_parse_payload - Parse payload to look up a key + * @payload: Pointer to a payload section + * @key: The key who's value is to be looked up + * + * This function parses the payload data which is a sequence + * of key-value pairs. + * + * Return: The value of the key or 0 if key is not found + */ +u32 gen7_hwsched_parse_payload(struct payload_section *payload, u32 key); + +/** + * gen7_hwsched_process_msgq - Process hfi msg queue + * @adreno_dev: Pointer to adreno device + * + * Process any pending firmware to host packets in the message + * queue + */ +void gen7_hwsched_process_msgq(struct adreno_device *adreno_dev); +#endif diff --git a/adreno_gen7_perfcounter.c b/adreno_gen7_perfcounter.c new file mode 100644 index 0000000000..f088856da3 --- /dev/null +++ b/adreno_gen7_perfcounter.c @@ -0,0 +1,896 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_perfcounter.h" +#include "adreno_pm4types.h" +#include "kgsl_device.h" + +/* + * For registers that do not get restored on power cycle, read the value and add + * the stored shadow value + */ +static u64 gen7_counter_read_norestore(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 hi, lo; + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + return ((((u64) hi) << 32) | lo) + reg->value; +} + +static int gen7_counter_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + int ret = 0; + + if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) + ret = gen7_perfcounter_update(adreno_dev, reg, true); + else + kgsl_regwrite(device, reg->select, countable); + + if (!ret) + reg->value = 0; + + return ret; +} + +static int gen7_counter_inline_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0]; + u32 cmds[3]; + int ret; + + if (!(device->state == KGSL_STATE_ACTIVE)) + return gen7_counter_enable(adreno_dev, group, counter, + countable); + + if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) + gen7_perfcounter_update(adreno_dev, reg, false); + + cmds[0] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + cmds[1] = cp_type4_packet(reg->select, 1); + cmds[2] = countable; + + /* submit to highest priority RB always */ + ret = gen7_ringbuffer_addcmds(adreno_dev, rb, NULL, + F_NOTPROTECTED, cmds, 3, 0, NULL); + if (ret) + return ret; + + /* + * schedule dispatcher to make sure rb[0] is run, because + * if the current RB is not rb[0] and gpu is idle then + * rb[0] will not get scheduled to run + */ + if (adreno_dev->cur_rb != rb) + adreno_dispatcher_schedule(device); + + /* wait for the above commands submitted to complete */ + ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp, + ADRENO_IDLE_TIMEOUT); + + if (ret) { + /* + * If we were woken up because of cancelling rb events + * either due to soft reset or adreno_stop, ignore the + * error and return 0 here. The perfcounter is already + * set up in software and it will be programmed in + * hardware when we wake up or come up after soft reset + */ + if (ret == -EAGAIN) + ret = 0; + else + dev_err(device->dev, + "Perfcounter %s/%u/%u start via commands failed %d\n", + group->name, counter, countable, ret); + } + + if (!ret) + reg->value = 0; + + return ret; +} + +static u64 gen7_counter_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 hi, lo; + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + /* These registers are restored on power resume */ + return (((u64) hi) << 32) | lo; +} + +static int gen7_counter_gbif_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + unsigned int shift = counter << 3; + unsigned int select = BIT(counter); + + if (countable > 0xff) + return -EINVAL; + + /* + * Write 1, followed by 0 to CLR register for + * clearing the counter + */ + kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_CLR, select, select); + kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_CLR, select, 0); + + /* select the desired countable */ + kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift); + + /* enable counter */ + kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_EN, select, select); + + reg->value = 0; + return 0; +} + +static int gen7_counter_gbif_pwr_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + unsigned int shift = counter << 3; + unsigned int select = BIT(16 + counter); + + if (countable > 0xff) + return -EINVAL; + + /* + * Write 1, followed by 0 to CLR register for + * clearing the counter + */ + kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_CLR, select, select); + kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_CLR, select, 0); + + /* select the desired countable */ + kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift); + + /* Enable the counter */ + kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_EN, select, select); + + reg->value = 0; + return 0; +} + +static int gen7_counter_alwayson_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + return 0; +} + +static u64 gen7_counter_alwayson_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct adreno_perfcount_register *reg = &group->regs[counter]; + + return gen7_read_alwayson(adreno_dev) + reg->value; +} + +static void gen7_write_gmu_counter_enable(struct kgsl_device *device, + struct adreno_perfcount_register *reg, u32 bit, u32 countable) +{ + kgsl_regrmw(device, reg->select, 0xff << bit, countable << bit); +} + +static int gen7_counter_gmu_xoclk_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + if (countable > 0xff) + return -EINVAL; + + /* + * Counters [0:3] are in select 1 bit offsets 0, 8, 16 and 24 + * Counters [4:5] are in select 2 bit offset 0, 8 + * Counters [6:9] are in select 3 bit offset 0, 8, 16 and 24 + */ + + if (counter == 4 || counter == 5) + counter -= 4; + else if (counter >= 6) + counter -= 6; + + gen7_write_gmu_counter_enable(device, reg, counter * 8, countable); + + reg->value = 0; + + kgsl_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); + + return 0; +} + +static int gen7_counter_gmu_gmuclk_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + if (countable > 0xff) + return -EINVAL; + + /* + * The two counters are stuck into GMU_CX_GMU_POWER_COUNTER_SELECT_1 + * at bit offset 16 and 24 + */ + gen7_write_gmu_counter_enable(device, reg, + 16 + (counter * 8), countable); + + kgsl_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); + + reg->value = 0; + return 0; +} + +static int gen7_counter_gmu_perf_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + if (countable > 0xff) + return -EINVAL; + + /* + * Counters [0:3] are in select 1 bit offsets 0, 8, 16 and 24 + * Counters [4:5] are in select 2 bit offset 0, 8 + */ + + if (counter >= 4) + counter -= 4; + + gen7_write_gmu_counter_enable(device, reg, counter * 8, countable); + + kgsl_regwrite(device, GEN7_GMU_CX_GMU_PERF_COUNTER_ENABLE, 1); + + reg->value = 0; + return 0; +} + +static struct adreno_perfcount_register gen7_perfcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_0_LO, + GEN7_RBBM_PERFCTR_CP_0_HI, -1, GEN7_CP_PERFCTR_CP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_1_LO, + GEN7_RBBM_PERFCTR_CP_1_HI, -1, GEN7_CP_PERFCTR_CP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_2_LO, + GEN7_RBBM_PERFCTR_CP_2_HI, -1, GEN7_CP_PERFCTR_CP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_3_LO, + GEN7_RBBM_PERFCTR_CP_3_HI, -1, GEN7_CP_PERFCTR_CP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_4_LO, + GEN7_RBBM_PERFCTR_CP_4_HI, -1, GEN7_CP_PERFCTR_CP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_5_LO, + GEN7_RBBM_PERFCTR_CP_5_HI, -1, GEN7_CP_PERFCTR_CP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_6_LO, + GEN7_RBBM_PERFCTR_CP_6_HI, -1, GEN7_CP_PERFCTR_CP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_7_LO, + GEN7_RBBM_PERFCTR_CP_7_HI, -1, GEN7_CP_PERFCTR_CP_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_8_LO, + GEN7_RBBM_PERFCTR_CP_8_HI, -1, GEN7_CP_PERFCTR_CP_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_9_LO, + GEN7_RBBM_PERFCTR_CP_9_HI, -1, GEN7_CP_PERFCTR_CP_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_10_LO, + GEN7_RBBM_PERFCTR_CP_10_HI, -1, GEN7_CP_PERFCTR_CP_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_11_LO, + GEN7_RBBM_PERFCTR_CP_11_HI, -1, GEN7_CP_PERFCTR_CP_SEL_11 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_12_LO, + GEN7_RBBM_PERFCTR_CP_12_HI, -1, GEN7_CP_PERFCTR_CP_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_13_LO, + GEN7_RBBM_PERFCTR_CP_13_HI, -1, GEN7_CP_PERFCTR_CP_SEL_13 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_bv_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_0_LO, + GEN7_RBBM_PERFCTR2_CP_0_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_1_LO, + GEN7_RBBM_PERFCTR2_CP_1_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_2_LO, + GEN7_RBBM_PERFCTR2_CP_2_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_3_LO, + GEN7_RBBM_PERFCTR2_CP_3_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_4_LO, + GEN7_RBBM_PERFCTR2_CP_4_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_5_LO, + GEN7_RBBM_PERFCTR2_CP_5_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_6_LO, + GEN7_RBBM_PERFCTR2_CP_6_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_6 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_rbbm[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RBBM_0_LO, + GEN7_RBBM_PERFCTR_RBBM_0_HI, -1, GEN7_RBBM_PERFCTR_RBBM_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RBBM_1_LO, + GEN7_RBBM_PERFCTR_RBBM_1_HI, -1, GEN7_RBBM_PERFCTR_RBBM_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RBBM_2_LO, + GEN7_RBBM_PERFCTR_RBBM_2_HI, -1, GEN7_RBBM_PERFCTR_RBBM_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RBBM_3_LO, + GEN7_RBBM_PERFCTR_RBBM_3_HI, -1, GEN7_RBBM_PERFCTR_RBBM_SEL_3 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_pc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_0_LO, + GEN7_RBBM_PERFCTR_PC_0_HI, -1, GEN7_PC_PERFCTR_PC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_1_LO, + GEN7_RBBM_PERFCTR_PC_1_HI, -1, GEN7_PC_PERFCTR_PC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_2_LO, + GEN7_RBBM_PERFCTR_PC_2_HI, -1, GEN7_PC_PERFCTR_PC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_3_LO, + GEN7_RBBM_PERFCTR_PC_3_HI, -1, GEN7_PC_PERFCTR_PC_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_4_LO, + GEN7_RBBM_PERFCTR_PC_4_HI, -1, GEN7_PC_PERFCTR_PC_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_5_LO, + GEN7_RBBM_PERFCTR_PC_5_HI, -1, GEN7_PC_PERFCTR_PC_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_6_LO, + GEN7_RBBM_PERFCTR_PC_6_HI, -1, GEN7_PC_PERFCTR_PC_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_7_LO, + GEN7_RBBM_PERFCTR_PC_7_HI, -1, GEN7_PC_PERFCTR_PC_SEL_7 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_bv_pc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_0_LO, + GEN7_RBBM_PERFCTR_BV_PC_0_HI, -1, GEN7_PC_PERFCTR_PC_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_1_LO, + GEN7_RBBM_PERFCTR_BV_PC_1_HI, -1, GEN7_PC_PERFCTR_PC_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_2_LO, + GEN7_RBBM_PERFCTR_BV_PC_2_HI, -1, GEN7_PC_PERFCTR_PC_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_3_LO, + GEN7_RBBM_PERFCTR_BV_PC_3_HI, -1, GEN7_PC_PERFCTR_PC_SEL_11 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_4_LO, + GEN7_RBBM_PERFCTR_BV_PC_4_HI, -1, GEN7_PC_PERFCTR_PC_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_5_LO, + GEN7_RBBM_PERFCTR_BV_PC_5_HI, -1, GEN7_PC_PERFCTR_PC_SEL_13 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_6_LO, + GEN7_RBBM_PERFCTR_BV_PC_6_HI, -1, GEN7_PC_PERFCTR_PC_SEL_14 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_7_LO, + GEN7_RBBM_PERFCTR_BV_PC_7_HI, -1, GEN7_PC_PERFCTR_PC_SEL_15 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_vfd[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_0_LO, + GEN7_RBBM_PERFCTR_VFD_0_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_1_LO, + GEN7_RBBM_PERFCTR_VFD_1_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_2_LO, + GEN7_RBBM_PERFCTR_VFD_2_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_3_LO, + GEN7_RBBM_PERFCTR_VFD_3_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_4_LO, + GEN7_RBBM_PERFCTR_VFD_4_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_5_LO, + GEN7_RBBM_PERFCTR_VFD_5_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_6_LO, + GEN7_RBBM_PERFCTR_VFD_6_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_7_LO, + GEN7_RBBM_PERFCTR_VFD_7_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_7 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_bv_vfd[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_0_LO, + GEN7_RBBM_PERFCTR_BV_VFD_0_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_1_LO, + GEN7_RBBM_PERFCTR_BV_VFD_1_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_2_LO, + GEN7_RBBM_PERFCTR_BV_VFD_2_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_3_LO, + GEN7_RBBM_PERFCTR_BV_VFD_3_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_11 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_4_LO, + GEN7_RBBM_PERFCTR_BV_VFD_4_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_5_LO, + GEN7_RBBM_PERFCTR_BV_VFD_5_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_13 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_6_LO, + GEN7_RBBM_PERFCTR_BV_VFD_6_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_14 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_7_LO, + GEN7_RBBM_PERFCTR_BV_VFD_7_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_15 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_hlsq[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_0_LO, + GEN7_RBBM_PERFCTR_HLSQ_0_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_1_LO, + GEN7_RBBM_PERFCTR_HLSQ_1_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_2_LO, + GEN7_RBBM_PERFCTR_HLSQ_2_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_3_LO, + GEN7_RBBM_PERFCTR_HLSQ_3_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_4_LO, + GEN7_RBBM_PERFCTR_HLSQ_4_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_5_LO, + GEN7_RBBM_PERFCTR_HLSQ_5_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_5 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_vpc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_0_LO, + GEN7_RBBM_PERFCTR_VPC_0_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_1_LO, + GEN7_RBBM_PERFCTR_VPC_1_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_2_LO, + GEN7_RBBM_PERFCTR_VPC_2_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_3_LO, + GEN7_RBBM_PERFCTR_VPC_3_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_4_LO, + GEN7_RBBM_PERFCTR_VPC_4_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_5_LO, + GEN7_RBBM_PERFCTR_VPC_5_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_5 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_bv_vpc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_0_LO, + GEN7_RBBM_PERFCTR_BV_VPC_0_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_1_LO, + GEN7_RBBM_PERFCTR_BV_VPC_1_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_2_LO, + GEN7_RBBM_PERFCTR_BV_VPC_2_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_3_LO, + GEN7_RBBM_PERFCTR_BV_VPC_3_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_4_LO, + GEN7_RBBM_PERFCTR_BV_VPC_4_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_5_LO, + GEN7_RBBM_PERFCTR_BV_VPC_5_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_11 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_ccu[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_0_LO, + GEN7_RBBM_PERFCTR_CCU_0_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_1_LO, + GEN7_RBBM_PERFCTR_CCU_1_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_2_LO, + GEN7_RBBM_PERFCTR_CCU_2_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_3_LO, + GEN7_RBBM_PERFCTR_CCU_3_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_4_LO, + GEN7_RBBM_PERFCTR_CCU_4_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_4 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_tse[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TSE_0_LO, + GEN7_RBBM_PERFCTR_TSE_0_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TSE_1_LO, + GEN7_RBBM_PERFCTR_TSE_1_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TSE_2_LO, + GEN7_RBBM_PERFCTR_TSE_2_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TSE_3_LO, + GEN7_RBBM_PERFCTR_TSE_3_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_3 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_ras[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RAS_0_LO, + GEN7_RBBM_PERFCTR_RAS_0_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RAS_1_LO, + GEN7_RBBM_PERFCTR_RAS_1_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RAS_2_LO, + GEN7_RBBM_PERFCTR_RAS_2_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RAS_3_LO, + GEN7_RBBM_PERFCTR_RAS_3_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_3 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_uche[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_0_LO, + GEN7_RBBM_PERFCTR_UCHE_0_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_1_LO, + GEN7_RBBM_PERFCTR_UCHE_1_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_2_LO, + GEN7_RBBM_PERFCTR_UCHE_2_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_3_LO, + GEN7_RBBM_PERFCTR_UCHE_3_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_4_LO, + GEN7_RBBM_PERFCTR_UCHE_4_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_5_LO, + GEN7_RBBM_PERFCTR_UCHE_5_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_6_LO, + GEN7_RBBM_PERFCTR_UCHE_6_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_7_LO, + GEN7_RBBM_PERFCTR_UCHE_7_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_8_LO, + GEN7_RBBM_PERFCTR_UCHE_8_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_9_LO, + GEN7_RBBM_PERFCTR_UCHE_9_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_10_LO, + GEN7_RBBM_PERFCTR_UCHE_10_HI, -1, + GEN7_UCHE_PERFCTR_UCHE_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_11_LO, + GEN7_RBBM_PERFCTR_UCHE_11_HI, -1, + GEN7_UCHE_PERFCTR_UCHE_SEL_11 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_0_LO, + GEN7_RBBM_PERFCTR_TP_0_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_1_LO, + GEN7_RBBM_PERFCTR_TP_1_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_2_LO, + GEN7_RBBM_PERFCTR_TP_2_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_3_LO, + GEN7_RBBM_PERFCTR_TP_3_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_4_LO, + GEN7_RBBM_PERFCTR_TP_4_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_5_LO, + GEN7_RBBM_PERFCTR_TP_5_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_6_LO, + GEN7_RBBM_PERFCTR_TP_6_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_7_LO, + GEN7_RBBM_PERFCTR_TP_7_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_8_LO, + GEN7_RBBM_PERFCTR_TP_8_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_9_LO, + GEN7_RBBM_PERFCTR_TP_9_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_10_LO, + GEN7_RBBM_PERFCTR_TP_10_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_11_LO, + GEN7_RBBM_PERFCTR_TP_11_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_11 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_bv_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_0_LO, + GEN7_RBBM_PERFCTR2_TP_0_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_1_LO, + GEN7_RBBM_PERFCTR2_TP_1_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_13 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_2_LO, + GEN7_RBBM_PERFCTR2_TP_2_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_14 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_3_LO, + GEN7_RBBM_PERFCTR2_TP_3_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_15 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_4_LO, + GEN7_RBBM_PERFCTR2_TP_4_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_16 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_5_LO, + GEN7_RBBM_PERFCTR2_TP_5_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_17 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_0_LO, + GEN7_RBBM_PERFCTR_SP_0_HI, -1, GEN7_SP_PERFCTR_SP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_1_LO, + GEN7_RBBM_PERFCTR_SP_1_HI, -1, GEN7_SP_PERFCTR_SP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_2_LO, + GEN7_RBBM_PERFCTR_SP_2_HI, -1, GEN7_SP_PERFCTR_SP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_3_LO, + GEN7_RBBM_PERFCTR_SP_3_HI, -1, GEN7_SP_PERFCTR_SP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_4_LO, + GEN7_RBBM_PERFCTR_SP_4_HI, -1, GEN7_SP_PERFCTR_SP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_5_LO, + GEN7_RBBM_PERFCTR_SP_5_HI, -1, GEN7_SP_PERFCTR_SP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_6_LO, + GEN7_RBBM_PERFCTR_SP_6_HI, -1, GEN7_SP_PERFCTR_SP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_7_LO, + GEN7_RBBM_PERFCTR_SP_7_HI, -1, GEN7_SP_PERFCTR_SP_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_8_LO, + GEN7_RBBM_PERFCTR_SP_8_HI, -1, GEN7_SP_PERFCTR_SP_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_9_LO, + GEN7_RBBM_PERFCTR_SP_9_HI, -1, GEN7_SP_PERFCTR_SP_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_10_LO, + GEN7_RBBM_PERFCTR_SP_10_HI, -1, GEN7_SP_PERFCTR_SP_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_11_LO, + GEN7_RBBM_PERFCTR_SP_11_HI, -1, GEN7_SP_PERFCTR_SP_SEL_11 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_12_LO, + GEN7_RBBM_PERFCTR_SP_12_HI, -1, GEN7_SP_PERFCTR_SP_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_13_LO, + GEN7_RBBM_PERFCTR_SP_13_HI, -1, GEN7_SP_PERFCTR_SP_SEL_13 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_14_LO, + GEN7_RBBM_PERFCTR_SP_14_HI, -1, GEN7_SP_PERFCTR_SP_SEL_14 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_15_LO, + GEN7_RBBM_PERFCTR_SP_15_HI, -1, GEN7_SP_PERFCTR_SP_SEL_15 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_16_LO, + GEN7_RBBM_PERFCTR_SP_16_HI, -1, GEN7_SP_PERFCTR_SP_SEL_16 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_17_LO, + GEN7_RBBM_PERFCTR_SP_17_HI, -1, GEN7_SP_PERFCTR_SP_SEL_17 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_18_LO, + GEN7_RBBM_PERFCTR_SP_18_HI, -1, GEN7_SP_PERFCTR_SP_SEL_18 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_19_LO, + GEN7_RBBM_PERFCTR_SP_19_HI, -1, GEN7_SP_PERFCTR_SP_SEL_19 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_20_LO, + GEN7_RBBM_PERFCTR_SP_20_HI, -1, GEN7_SP_PERFCTR_SP_SEL_20 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_21_LO, + GEN7_RBBM_PERFCTR_SP_21_HI, -1, GEN7_SP_PERFCTR_SP_SEL_21 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_22_LO, + GEN7_RBBM_PERFCTR_SP_22_HI, -1, GEN7_SP_PERFCTR_SP_SEL_22 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_23_LO, + GEN7_RBBM_PERFCTR_SP_23_HI, -1, GEN7_SP_PERFCTR_SP_SEL_23 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_bv_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_0_LO, + GEN7_RBBM_PERFCTR2_SP_0_HI, -1, GEN7_SP_PERFCTR_SP_SEL_24 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_1_LO, + GEN7_RBBM_PERFCTR2_SP_1_HI, -1, GEN7_SP_PERFCTR_SP_SEL_25 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_2_LO, + GEN7_RBBM_PERFCTR2_SP_2_HI, -1, GEN7_SP_PERFCTR_SP_SEL_26 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_3_LO, + GEN7_RBBM_PERFCTR2_SP_3_HI, -1, GEN7_SP_PERFCTR_SP_SEL_27 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_4_LO, + GEN7_RBBM_PERFCTR2_SP_4_HI, -1, GEN7_SP_PERFCTR_SP_SEL_28 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_5_LO, + GEN7_RBBM_PERFCTR2_SP_5_HI, -1, GEN7_SP_PERFCTR_SP_SEL_29 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_6_LO, + GEN7_RBBM_PERFCTR2_SP_6_HI, -1, GEN7_SP_PERFCTR_SP_SEL_30 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_7_LO, + GEN7_RBBM_PERFCTR2_SP_7_HI, -1, GEN7_SP_PERFCTR_SP_SEL_31 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_8_LO, + GEN7_RBBM_PERFCTR2_SP_8_HI, -1, GEN7_SP_PERFCTR_SP_SEL_32 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_9_LO, + GEN7_RBBM_PERFCTR2_SP_9_HI, -1, GEN7_SP_PERFCTR_SP_SEL_33 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_10_LO, + GEN7_RBBM_PERFCTR2_SP_10_HI, -1, GEN7_SP_PERFCTR_SP_SEL_34 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_11_LO, + GEN7_RBBM_PERFCTR2_SP_11_HI, -1, GEN7_SP_PERFCTR_SP_SEL_35 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_rb[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_0_LO, + GEN7_RBBM_PERFCTR_RB_0_HI, -1, GEN7_RB_PERFCTR_RB_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_1_LO, + GEN7_RBBM_PERFCTR_RB_1_HI, -1, GEN7_RB_PERFCTR_RB_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_2_LO, + GEN7_RBBM_PERFCTR_RB_2_HI, -1, GEN7_RB_PERFCTR_RB_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_3_LO, + GEN7_RBBM_PERFCTR_RB_3_HI, -1, GEN7_RB_PERFCTR_RB_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_4_LO, + GEN7_RBBM_PERFCTR_RB_4_HI, -1, GEN7_RB_PERFCTR_RB_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_5_LO, + GEN7_RBBM_PERFCTR_RB_5_HI, -1, GEN7_RB_PERFCTR_RB_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_6_LO, + GEN7_RBBM_PERFCTR_RB_6_HI, -1, GEN7_RB_PERFCTR_RB_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_7_LO, + GEN7_RBBM_PERFCTR_RB_7_HI, -1, GEN7_RB_PERFCTR_RB_SEL_7 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_vsc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VSC_0_LO, + GEN7_RBBM_PERFCTR_VSC_0_HI, -1, GEN7_VSC_PERFCTR_VSC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VSC_1_LO, + GEN7_RBBM_PERFCTR_VSC_1_HI, -1, GEN7_VSC_PERFCTR_VSC_SEL_1 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_lrz[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_LRZ_0_LO, + GEN7_RBBM_PERFCTR_LRZ_0_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_LRZ_1_LO, + GEN7_RBBM_PERFCTR_LRZ_1_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_LRZ_2_LO, + GEN7_RBBM_PERFCTR_LRZ_2_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_LRZ_3_LO, + GEN7_RBBM_PERFCTR_LRZ_3_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_3 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_cmp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CMP_0_LO, + GEN7_RBBM_PERFCTR_CMP_0_HI, -1, GEN7_RB_PERFCTR_CMP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CMP_1_LO, + GEN7_RBBM_PERFCTR_CMP_1_HI, -1, GEN7_RB_PERFCTR_CMP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CMP_2_LO, + GEN7_RBBM_PERFCTR_CMP_2_HI, -1, GEN7_RB_PERFCTR_CMP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CMP_3_LO, + GEN7_RBBM_PERFCTR_CMP_3_HI, -1, GEN7_RB_PERFCTR_CMP_SEL_3 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_ufc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UFC_0_LO, + GEN7_RBBM_PERFCTR_UFC_0_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UFC_1_LO, + GEN7_RBBM_PERFCTR_UFC_1_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UFC_2_LO, + GEN7_RBBM_PERFCTR_UFC_2_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UFC_3_LO, + GEN7_RBBM_PERFCTR_UFC_3_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_3 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_bv_ufc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_UFC_0_LO, + GEN7_RBBM_PERFCTR2_UFC_0_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_UFC_1_LO, + GEN7_RBBM_PERFCTR2_UFC_1_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_5 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_gbif[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PERF_CNT_LOW0, + GEN7_GBIF_PERF_CNT_HIGH0, -1, GEN7_GBIF_PERF_CNT_SEL }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PERF_CNT_LOW1, + GEN7_GBIF_PERF_CNT_HIGH1, -1, GEN7_GBIF_PERF_CNT_SEL }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PERF_CNT_LOW2, + GEN7_GBIF_PERF_CNT_HIGH2, -1, GEN7_GBIF_PERF_CNT_SEL }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PERF_CNT_LOW3, + GEN7_GBIF_PERF_CNT_HIGH3, -1, GEN7_GBIF_PERF_CNT_SEL }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_gbif_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PWR_CNT_LOW0, + GEN7_GBIF_PWR_CNT_HIGH0, -1, GEN7_GBIF_PERF_PWR_CNT_SEL }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PWR_CNT_LOW1, + GEN7_GBIF_PWR_CNT_HIGH1, -1, GEN7_GBIF_PERF_PWR_CNT_SEL }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PWR_CNT_LOW2, + GEN7_GBIF_PWR_CNT_HIGH2, -1, GEN7_GBIF_PERF_PWR_CNT_SEL }, +}; + +#define GMU_COUNTER(lo, hi, sel) \ + { .countable = KGSL_PERFCOUNTER_NOT_USED, \ + .offset = lo, .offset_hi = hi, .select = sel } + +#define GMU_COUNTER_RESERVED(lo, hi, sel) \ + { .countable = KGSL_PERFCOUNTER_BROKEN, \ + .offset = lo, .offset_hi = hi, .select = sel } + +static struct adreno_perfcount_register gen7_perfcounters_gmu_xoclk[] = { + /* + * COUNTER_XOCLK_0 and COUNTER_XOCLK_4 are used for the GPU + * busy and ifpc count. Mark them as reserved to ensure they + * are not re-used. + */ + GMU_COUNTER_RESERVED(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0), + GMU_COUNTER_RESERVED(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_2), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_2), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_2), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_2), +}; + +static struct adreno_perfcount_register gen7_perfcounters_gmu_gmuclk[] = { + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1), +}; + +static struct adreno_perfcount_register gen7_perfcounters_gmu_perf[] = { + GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_0_L, + GEN7_GMU_CX_GMU_PERF_COUNTER_0_H, + GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_0), + GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_1_L, + GEN7_GMU_CX_GMU_PERF_COUNTER_1_H, + GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_0), + GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_2_L, + GEN7_GMU_CX_GMU_PERF_COUNTER_2_H, + GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_0), + GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_3_L, + GEN7_GMU_CX_GMU_PERF_COUNTER_3_H, + GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_0), + GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_4_L, + GEN7_GMU_CX_GMU_PERF_COUNTER_4_H, + GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_1), + GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_5_L, + GEN7_GMU_CX_GMU_PERF_COUNTER_5_H, + GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_1), +}; + +static struct adreno_perfcount_register gen7_perfcounters_alwayson[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_CP_ALWAYS_ON_COUNTER_LO, + GEN7_CP_ALWAYS_ON_COUNTER_HI, -1 }, +}; + +/* + * ADRENO_PERFCOUNTER_GROUP_RESTORE flag is enabled by default + * because most of the perfcounter groups need to be restored + * as part of preemption and IFPC. Perfcounter groups that are + * not restored as part of preemption and IFPC should be defined + * using GEN7_PERFCOUNTER_GROUP_FLAGS macro + */ + +#define GEN7_PERFCOUNTER_GROUP_FLAGS(core, offset, name, flags, \ + enable, read) \ + [KGSL_PERFCOUNTER_GROUP_##offset] = { core##_perfcounters_##name, \ + ARRAY_SIZE(core##_perfcounters_##name), __stringify(name), flags, \ + enable, read } + +#define GEN7_PERFCOUNTER_GROUP(offset, name, enable, read) \ + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, offset, name, \ + ADRENO_PERFCOUNTER_GROUP_RESTORE, enable, read) + +#define GEN7_REGULAR_PERFCOUNTER_GROUP(offset, name) \ + GEN7_PERFCOUNTER_GROUP(offset, name, \ + gen7_counter_enable, gen7_counter_read) + +#define GEN7_BV_PERFCOUNTER_GROUP(offset, name, enable, read) \ + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, BV_##offset, bv_##name, \ + ADRENO_PERFCOUNTER_GROUP_RESTORE, enable, read) + +#define GEN7_BV_REGULAR_PERFCOUNTER_GROUP(offset, name) \ + GEN7_BV_PERFCOUNTER_GROUP(offset, name, \ + gen7_counter_enable, gen7_counter_read) + +static const struct adreno_perfcount_group gen7_perfcounter_groups + [KGSL_PERFCOUNTER_GROUP_MAX] = { + GEN7_REGULAR_PERFCOUNTER_GROUP(CP, cp), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, RBBM, rbbm, 0, + gen7_counter_enable, gen7_counter_read), + GEN7_REGULAR_PERFCOUNTER_GROUP(PC, pc), + GEN7_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), + GEN7_PERFCOUNTER_GROUP(HLSQ, hlsq, + gen7_counter_inline_enable, gen7_counter_read), + GEN7_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), + GEN7_REGULAR_PERFCOUNTER_GROUP(CCU, ccu), + GEN7_REGULAR_PERFCOUNTER_GROUP(CMP, cmp), + GEN7_REGULAR_PERFCOUNTER_GROUP(TSE, tse), + GEN7_REGULAR_PERFCOUNTER_GROUP(RAS, ras), + GEN7_REGULAR_PERFCOUNTER_GROUP(LRZ, lrz), + GEN7_REGULAR_PERFCOUNTER_GROUP(UCHE, uche), + GEN7_PERFCOUNTER_GROUP(TP, tp, + gen7_counter_inline_enable, gen7_counter_read), + GEN7_PERFCOUNTER_GROUP(SP, sp, + gen7_counter_inline_enable, gen7_counter_read), + GEN7_REGULAR_PERFCOUNTER_GROUP(RB, rb), + GEN7_REGULAR_PERFCOUNTER_GROUP(VSC, vsc), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, VBIF, gbif, 0, + gen7_counter_gbif_enable, gen7_counter_read_norestore), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, VBIF_PWR, gbif_pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED, + gen7_counter_gbif_pwr_enable, gen7_counter_read_norestore), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, ALWAYSON, alwayson, + ADRENO_PERFCOUNTER_GROUP_FIXED, + gen7_counter_alwayson_enable, gen7_counter_alwayson_read), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_XOCLK, gmu_xoclk, 0, + gen7_counter_gmu_xoclk_enable, gen7_counter_read_norestore), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_GMUCLK, gmu_gmuclk, 0, + gen7_counter_gmu_gmuclk_enable, gen7_counter_read_norestore), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_PERF, gmu_perf, 0, + gen7_counter_gmu_perf_enable, gen7_counter_read_norestore), + GEN7_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), + GEN7_BV_REGULAR_PERFCOUNTER_GROUP(CP, cp), + GEN7_BV_REGULAR_PERFCOUNTER_GROUP(PC, pc), + GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), + GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), + GEN7_BV_PERFCOUNTER_GROUP(TP, tp, + gen7_counter_inline_enable, gen7_counter_read), + GEN7_BV_PERFCOUNTER_GROUP(SP, sp, + gen7_counter_inline_enable, gen7_counter_read), + GEN7_BV_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), +}; + +const struct adreno_perfcounters adreno_gen7_perfcounters = { + gen7_perfcounter_groups, + ARRAY_SIZE(gen7_perfcounter_groups), +}; diff --git a/adreno_gen7_preempt.c b/adreno_gen7_preempt.c new file mode 100644 index 0000000000..4c5da6d497 --- /dev/null +++ b/adreno_gen7_preempt.c @@ -0,0 +1,746 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" + +#define PREEMPT_RECORD(_field) \ + offsetof(struct gen7_cp_preemption_record, _field) + +#define PREEMPT_SMMU_RECORD(_field) \ + offsetof(struct gen7_cp_smmu_info, _field) + +enum { + SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO = 0, + SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_NON_SECURE_SAVE_ADDR, + SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR, + SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR, + SET_PSEUDO_REGISTER_SAVE_REGISTER_COUNTER, +}; + +static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer, + bool atomic) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb = adreno_dev->cur_rb; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&rb->preempt_lock, flags); + + if (!atomic) { + /* + * We might have skipped updating the wptr in case we are in + * dispatcher context. Do it now. + */ + if (rb->skip_inline_wptr) { + + ret = gen7_fenced_write(adreno_dev, + GEN7_CP_RB_WPTR, rb->wptr, + FENCE_STATUS_WRITEDROPPED0_MASK); + + reset_timer = true; + rb->skip_inline_wptr = false; + } + } else { + unsigned int wptr; + + kgsl_regread(device, GEN7_CP_RB_WPTR, &wptr); + if (wptr != rb->wptr) { + kgsl_regwrite(device, GEN7_CP_RB_WPTR, rb->wptr); + reset_timer = true; + } + } + + if (reset_timer) + rb->dispatch_q.expires = jiffies + + msecs_to_jiffies(adreno_drawobj_timeout); + + spin_unlock_irqrestore(&rb->preempt_lock, flags); + + if (!atomic) { + /* If WPTR update fails, set the fault and trigger recovery */ + if (ret) { + gmu_core_fault_snapshot(device); + adreno_dispatcher_fault(adreno_dev, + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + } + } +} + +static void _power_collapse_set(struct adreno_device *adreno_dev, bool val) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gmu_core_regwrite(device, + GEN7_GMU_PWR_COL_PREEMPT_KEEPALIVE, (val ? 1 : 0)); +} + +static void _gen7_preemption_done(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status; + + /* + * In the very unlikely case that the power is off, do nothing - the + * state will be reset on power up and everybody will be happy + */ + + if (!kgsl_state_is_awake(device)) + return; + + kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, &status); + + if (status & 0x1) { + dev_err(device->dev, + "Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n", + status, adreno_dev->cur_rb->id, + adreno_get_rptr(adreno_dev->cur_rb), + adreno_dev->cur_rb->wptr, + adreno_dev->next_rb->id, + adreno_get_rptr(adreno_dev->next_rb), + adreno_dev->next_rb->wptr); + + /* Set a fault and restart */ + adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + + return; + } + + adreno_dev->preempt.count++; + + del_timer_sync(&adreno_dev->preempt.timer); + + kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status); + + trace_adreno_preempt_done(adreno_dev->cur_rb, adreno_dev->next_rb, + status); + + /* Clean up all the bits */ + adreno_dev->prev_rb = adreno_dev->cur_rb; + adreno_dev->cur_rb = adreno_dev->next_rb; + adreno_dev->next_rb = NULL; + + /* Update the wptr for the new command queue */ + _update_wptr(adreno_dev, true, false); + + /* Update the dispatcher timer for the new command queue */ + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + + /* Clear the preempt state */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); +} + +static void _gen7_preemption_fault(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status; + + /* + * If the power is on check the preemption status one more time - if it + * was successful then just transition to the complete state + */ + if (kgsl_state_is_awake(device)) { + kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, &status); + + if (!(status & 0x1)) { + adreno_set_preempt_state(adreno_dev, + ADRENO_PREEMPT_COMPLETE); + + adreno_dispatcher_schedule(device); + return; + } + } + + dev_err(device->dev, + "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n", + adreno_dev->cur_rb->id, + adreno_get_rptr(adreno_dev->cur_rb), + adreno_dev->cur_rb->wptr, + adreno_dev->next_rb->id, + adreno_get_rptr(adreno_dev->next_rb), + adreno_dev->next_rb->wptr); + + adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); +} + +static void _gen7_preemption_worker(struct work_struct *work) +{ + struct adreno_preemption *preempt = container_of(work, + struct adreno_preemption, work); + struct adreno_device *adreno_dev = container_of(preempt, + struct adreno_device, preempt); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Need to take the mutex to make sure that the power stays on */ + mutex_lock(&device->mutex); + + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED)) + _gen7_preemption_fault(adreno_dev); + + mutex_unlock(&device->mutex); +} + +/* Find the highest priority active ringbuffer */ +static struct adreno_ringbuffer *gen7_next_ringbuffer( + struct adreno_device *adreno_dev) +{ + struct adreno_ringbuffer *rb; + unsigned long flags; + unsigned int i; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + bool empty; + + spin_lock_irqsave(&rb->preempt_lock, flags); + empty = adreno_rb_empty(rb); + spin_unlock_irqrestore(&rb->preempt_lock, flags); + + if (!empty) + return rb; + } + + return NULL; +} + +void gen7_preemption_trigger(struct adreno_device *adreno_dev, bool atomic) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_ringbuffer *next; + u64 ttbr0, gpuaddr; + u32 contextidr, cntl; + unsigned long flags; + struct adreno_preemption *preempt = &adreno_dev->preempt; + + /* Put ourselves into a possible trigger state */ + if (!adreno_move_preempt_state(adreno_dev, + ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START)) + return; + + /* Get the next ringbuffer to preempt in */ + next = gen7_next_ringbuffer(adreno_dev); + + /* + * Nothing to do if every ringbuffer is empty or if the current + * ringbuffer is the only active one + */ + if (next == NULL || next == adreno_dev->cur_rb) { + /* + * Update any critical things that might have been skipped while + * we were looking for a new ringbuffer + */ + + if (next != NULL) { + _update_wptr(adreno_dev, false, atomic); + + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + } + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + return; + } + + /* Turn off the dispatcher timer */ + del_timer(&adreno_dev->dispatcher.timer); + + /* + * This is the most critical section - we need to take care not to race + * until we have programmed the CP for the switch + */ + + spin_lock_irqsave(&next->preempt_lock, flags); + + /* + * Get the pagetable from the pagetable info. + * The pagetable_desc is allocated and mapped at probe time, and + * preemption_desc at init time, so no need to check if + * sharedmem accesses to these memdescs succeed. + */ + kgsl_sharedmem_readq(next->pagetable_desc, &ttbr0, + PT_INFO_OFFSET(ttbr0)); + kgsl_sharedmem_readl(next->pagetable_desc, &contextidr, + PT_INFO_OFFSET(contextidr)); + + kgsl_sharedmem_writel(next->preemption_desc, + PREEMPT_RECORD(wptr), next->wptr); + + spin_unlock_irqrestore(&next->preempt_lock, flags); + + /* And write it to the smmu info */ + if (kgsl_mmu_is_perprocess(&device->mmu)) { + kgsl_sharedmem_writeq(iommu->smmu_info, + PREEMPT_SMMU_RECORD(ttbr0), ttbr0); + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(context_idr), contextidr); + } + + kgsl_sharedmem_readq(preempt->scratch, &gpuaddr, + next->id * sizeof(u64)); + + /* + * Set a keepalive bit before the first preemption register write. + * This is required since while each individual write to the context + * switch registers will wake the GPU from collapse, it will not in + * itself cause GPU activity. Thus, the GPU could technically be + * re-collapsed between subsequent register writes leading to a + * prolonged preemption sequence. The keepalive bit prevents any + * further power collapse while it is set. + * It is more efficient to use a keepalive+wake-on-fence approach here + * rather than an OOB. Both keepalive and the fence are effectively + * free when the GPU is already powered on, whereas an OOB requires an + * unconditional handshake with the GMU. + */ + _power_collapse_set(adreno_dev, true); + + /* + * Fenced writes on this path will make sure the GPU is woken up + * in case it was power collapsed by the GMU. + */ + if (gen7_fenced_write(adreno_dev, + GEN7_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO, + lower_32_bits(next->preemption_desc->gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + /* + * Above fence writes will make sure GMU comes out of + * IFPC state if its was in IFPC state but it doesn't + * guarantee that GMU FW actually moved to ACTIVE state + * i.e. wake-up from IFPC is complete. + * Wait for GMU to move to ACTIVE state before triggering + * preemption. This is require to make sure CP doesn't + * interrupt GMU during wake-up from IFPC. + */ + if (gmu_core_dev_wait_for_active_transition(device)) + goto err; + + if (gen7_fenced_write(adreno_dev, + GEN7_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI, + upper_32_bits(next->preemption_desc->gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + if (gen7_fenced_write(adreno_dev, + GEN7_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO, + lower_32_bits(next->secure_preemption_desc->gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + if (gen7_fenced_write(adreno_dev, + GEN7_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI, + upper_32_bits(next->secure_preemption_desc->gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + if (gen7_fenced_write(adreno_dev, + GEN7_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO, + lower_32_bits(gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + if (gen7_fenced_write(adreno_dev, + GEN7_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI, + upper_32_bits(gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + adreno_dev->next_rb = next; + + /* Start the timer to detect a stuck preemption */ + mod_timer(&adreno_dev->preempt.timer, + jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT)); + + cntl = (preempt->preempt_level << 6) | 0x01; + + /* Skip save/restore during L1 preemption */ + if (preempt->skipsaverestore) + cntl |= (1 << 9); + + /* Enable GMEM save/restore across preemption */ + if (preempt->usesgmem) + cntl |= (1 << 8); + + trace_adreno_preempt_trigger(adreno_dev->cur_rb, adreno_dev->next_rb, + cntl); + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED); + + /* Trigger the preemption */ + if (gen7_fenced_write(adreno_dev, GEN7_CP_CONTEXT_SWITCH_CNTL, cntl, + FENCE_STATUS_WRITEDROPPED1_MASK)) { + adreno_dev->next_rb = NULL; + del_timer(&adreno_dev->preempt.timer); + goto err; + } + + return; +err: + /* If fenced write fails, take inline snapshot and trigger recovery */ + if (!in_interrupt()) { + gmu_core_fault_snapshot(device); + adreno_dispatcher_fault(adreno_dev, + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + } else { + adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT); + } + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + /* Clear the keep alive */ + _power_collapse_set(adreno_dev, false); + +} + +void gen7_preemption_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status; + + if (!adreno_move_preempt_state(adreno_dev, + ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING)) + return; + + kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, &status); + + if (status & 0x1) { + dev_err(KGSL_DEVICE(adreno_dev)->dev, + "preempt interrupt with non-zero status: %X\n", + status); + + /* + * Under the assumption that this is a race between the + * interrupt and the register, schedule the worker to clean up. + * If the status still hasn't resolved itself by the time we get + * there then we have to assume something bad happened + */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); + adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev)); + return; + } + + adreno_dev->preempt.count++; + + /* + * We can now safely clear the preemption keepalive bit, allowing + * power collapse to resume its regular activity. + */ + _power_collapse_set(adreno_dev, false); + + del_timer(&adreno_dev->preempt.timer); + + kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status); + + trace_adreno_preempt_done(adreno_dev->cur_rb, adreno_dev->next_rb, + status); + + adreno_dev->prev_rb = adreno_dev->cur_rb; + adreno_dev->cur_rb = adreno_dev->next_rb; + adreno_dev->next_rb = NULL; + + /* Update the wptr if it changed while preemption was ongoing */ + _update_wptr(adreno_dev, true, true); + + /* Update the dispatcher timer for the new command queue */ + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + + gen7_preemption_trigger(adreno_dev, true); +} + +void gen7_preemption_schedule(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + mutex_lock(&device->mutex); + + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE)) + _gen7_preemption_done(adreno_dev); + + gen7_preemption_trigger(adreno_dev, false); + + mutex_unlock(&device->mutex); +} + +u32 gen7_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 *cmds) +{ + u32 *cmds_orig = cmds; + u64 gpuaddr = 0; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + *cmds++ = cp_type7_packet(CP_THREAD_CONTROL, 1); + *cmds++ = CP_SET_THREAD_BR; + + if (drawctxt) { + gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 15); + } else { + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); + } + + /* NULL SMMU_INFO buffer - we track in KMD */ + *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO; + cmds += cp_gpuaddr(adreno_dev, cmds, 0x0); + + *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_NON_SECURE_SAVE_ADDR; + cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr); + + *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR; + cmds += cp_gpuaddr(adreno_dev, cmds, + rb->secure_preemption_desc->gpuaddr); + + if (drawctxt) { + *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR; + cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); + } + + /* + * There is no need to specify this address when we are about to + * trigger preemption. This is because CP internally stores this + * address specified here in the CP_SET_PSEUDO_REGISTER payload to + * the context record and thus knows from where to restore + * the saved perfcounters for the new ringbuffer. + */ + *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_COUNTER; + cmds += cp_gpuaddr(adreno_dev, cmds, + rb->perfcounter_save_restore_desc->gpuaddr); + + if (drawctxt) { + struct adreno_ringbuffer *rb = drawctxt->rb; + u64 dest = adreno_dev->preempt.scratch->gpuaddr + + (rb->id * sizeof(u64)); + + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2); + cmds += cp_gpuaddr(adreno_dev, cmds, dest); + *cmds++ = lower_32_bits(gpuaddr); + *cmds++ = upper_32_bits(gpuaddr); + } + + return (unsigned int) (cmds - cmds_orig); +} + +u32 gen7_preemption_post_ibsubmit(struct adreno_device *adreno_dev, + u32 *cmds) +{ + u32 index = 0; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + if (adreno_dev->cur_rb) { + u64 dest = adreno_dev->preempt.scratch->gpuaddr + + (adreno_dev->cur_rb->id * sizeof(u64)); + + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 4); + cmds[index++] = lower_32_bits(dest); + cmds[index++] = upper_32_bits(dest); + cmds[index++] = 0; + cmds[index++] = 0; + } + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BOTH; + cmds[index++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4); + cmds[index++] = 0; + cmds[index++] = 0; + cmds[index++] = 1; + cmds[index++] = 0; + + return index; +} + +void gen7_preemption_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_ringbuffer *rb; + unsigned int i; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + /* Force the state to be clear */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + + if (kgsl_mmu_is_perprocess(&device->mmu)) { + /* smmu_info is allocated and mapped in gen7_preemption_iommu_init */ + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(magic), GEN7_CP_SMMU_INFO_MAGIC_REF); + kgsl_sharedmem_writeq(iommu->smmu_info, + PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device)); + + /* The CP doesn't use the asid record, so poison it */ + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(asid), 0xdecafbad); + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(context_idr), 0); + + kgsl_regwrite(device, GEN7_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + lower_32_bits(iommu->smmu_info->gpuaddr)); + + kgsl_regwrite(device, GEN7_CP_CONTEXT_SWITCH_SMMU_INFO_HI, + upper_32_bits(iommu->smmu_info->gpuaddr)); + } + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(rptr), 0); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(wptr), 0); + + adreno_ringbuffer_set_pagetable(rb, + device->mmu.defaultpagetable); + } +} + +static void reset_rb_preempt_record(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + memset(rb->preemption_desc->hostptr, 0x0, rb->preemption_desc->size); + + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(magic), GEN7_CP_CTXRECORD_MAGIC_REF); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(cntl), GEN7_CP_RB_CNTL_DEFAULT); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(rptr_addr), SCRATCH_RPTR_GPU_ADDR( + KGSL_DEVICE(adreno_dev), rb->id)); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(bv_rptr_addr), SCRATCH_BV_RPTR_GPU_ADDR( + KGSL_DEVICE(adreno_dev), rb->id)); +} + +void gen7_reset_preempt_records(struct adreno_device *adreno_dev) +{ + int i; + struct adreno_ringbuffer *rb; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + reset_rb_preempt_record(adreno_dev, rb); + } +} + +static int gen7_preemption_ringbuffer_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev); + u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES; + int ret; + + if (gen7_core->ctxt_record_size) + ctxt_record_size = gen7_core->ctxt_record_size; + + ret = adreno_allocate_global(device, &rb->preemption_desc, + ctxt_record_size, SZ_16K, 0, + KGSL_MEMDESC_PRIVILEGED, "preemption_desc"); + if (ret) + return ret; + + ret = adreno_allocate_global(device, &rb->secure_preemption_desc, + ctxt_record_size, 0, + KGSL_MEMFLAGS_SECURE, KGSL_MEMDESC_PRIVILEGED, + "secure_preemption_desc"); + if (ret) + return ret; + + ret = adreno_allocate_global(device, &rb->perfcounter_save_restore_desc, + GEN7_CP_PERFCOUNTER_SAVE_RESTORE_SIZE, 0, 0, + KGSL_MEMDESC_PRIVILEGED, + "perfcounter_save_restore_desc"); + if (ret) + return ret; + + reset_rb_preempt_record(adreno_dev, rb); + + return 0; +} + +int gen7_preemption_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_preemption *preempt = &adreno_dev->preempt; + struct adreno_ringbuffer *rb; + int ret; + unsigned int i; + + /* We are dependent on IOMMU to make preemption go on the CP side */ + if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU) + return -ENODEV; + + INIT_WORK(&preempt->work, _gen7_preemption_worker); + + /* Allocate mem for storing preemption switch record */ + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + ret = gen7_preemption_ringbuffer_init(adreno_dev, rb); + if (ret) + return ret; + } + + ret = adreno_allocate_global(device, &preempt->scratch, PAGE_SIZE, + 0, 0, 0, "preempt_scratch"); + if (ret) + return ret; + + /* Allocate mem for storing preemption smmu record */ + if (kgsl_mmu_is_perprocess(&device->mmu)) { + ret = adreno_allocate_global(device, &iommu->smmu_info, PAGE_SIZE, 0, + KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED, + "smmu_info"); + if (ret) + return ret; + } + + set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); + return 0; +} + +int gen7_preemption_context_init(struct kgsl_context *context) +{ + struct kgsl_device *device = context->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u64 flags = 0; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + if (context->flags & KGSL_CONTEXT_SECURE) + flags |= KGSL_MEMFLAGS_SECURE; + + if (is_compat_task()) + flags |= KGSL_MEMFLAGS_FORCE_32BIT; + + /* + * gpumem_alloc_entry takes an extra refcount. Put it only when + * destroying the context to keep the context record valid + */ + context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv, + GEN7_CP_CTXRECORD_USER_RESTORE_SIZE, flags); + if (IS_ERR(context->user_ctxt_record)) { + int ret = PTR_ERR(context->user_ctxt_record); + + context->user_ctxt_record = NULL; + return ret; + } + + return 0; +} diff --git a/adreno_gen7_ringbuffer.c b/adreno_gen7_ringbuffer.c new file mode 100644 index 0000000000..47277fc51f --- /dev/null +++ b/adreno_gen7_ringbuffer.c @@ -0,0 +1,556 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_pm4types.h" +#include "adreno_ringbuffer.h" +#include "adreno_trace.h" +#include "kgsl_trace.h" + +static bool is_concurrent_binning(struct adreno_context *drawctxt) +{ + if (!drawctxt) + return false; + + return !(drawctxt->base.flags & KGSL_CONTEXT_SECURE); +} + +static int gen7_rb_pagetable_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + struct kgsl_pagetable *pagetable, u32 *cmds) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable); + int count = 0; + u32 id = drawctxt ? drawctxt->base.id : 0; + + if (pagetable == device->mmu.defaultpagetable) + return 0; + + /* CP switches the pagetable and flushes the Caches */ + cmds[count++] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3); + cmds[count++] = lower_32_bits(ttbr0); + cmds[count++] = upper_32_bits(ttbr0); + cmds[count++] = id; + + cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 5); + cmds[count++] = lower_32_bits(rb->pagetable_desc->gpuaddr + + PT_INFO_OFFSET(ttbr0)); + cmds[count++] = upper_32_bits(rb->pagetable_desc->gpuaddr + + PT_INFO_OFFSET(ttbr0)); + cmds[count++] = lower_32_bits(ttbr0); + cmds[count++] = upper_32_bits(ttbr0); + cmds[count++] = id; + + /* + * Sync both threads after switching pagetables and enable BR only + * to make sure BV doesn't race ahead while BR is still switching + * pagetables. + */ + cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR; + + return count; +} + +static int gen7_rb_context_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_pagetable *pagetable = + adreno_drawctxt_get_pagetable(drawctxt); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int count = 0; + u32 cmds[42]; + + /* Sync both threads */ + cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BOTH; + /* Reset context state */ + cmds[count++] = cp_type7_packet(CP_RESET_CONTEXT_STATE, 1); + cmds[count++] = CP_CLEAR_BV_BR_COUNTER | CP_CLEAR_RESOURCE_TABLE | + CP_CLEAR_ON_CHIP_TS; + /* + * Enable/disable concurrent binning for pagetable switch and + * set the thread to BR since only BR can execute the pagetable + * switch packets. + */ + /* Sync both threads and enable BR only */ + cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR; + + if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) + count += gen7_rb_pagetable_switch(adreno_dev, rb, + drawctxt, pagetable, &cmds[count]); + else { + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + u32 id = drawctxt ? drawctxt->base.id : 0; + u32 offset = GEN7_SMMU_BASE + (iommu->cb0_offset >> 2) + 0x0d; + + /* + * Set the CONTEXTIDR register to the current context id so we + * can use it in pagefault debugging. Unlike TTBR0 we don't + * need any special sequence or locking to change it + */ + cmds[count++] = cp_type4_packet(offset, 1); + cmds[count++] = id; + } + + cmds[count++] = cp_type7_packet(CP_NOP, 1); + cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER; + + cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, + current_context)); + cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, + current_context)); + cmds[count++] = drawctxt->base.id; + + cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, current_context)); + cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, current_context)); + cmds[count++] = drawctxt->base.id; + + cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1); + cmds[count++] = 0x31; + + return gen7_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, + cmds, count, 0, NULL); +} + +#define RB_SOPTIMESTAMP(device, rb) \ + MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp) +#define CTXT_SOPTIMESTAMP(device, drawctxt) \ + MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp) + +#define RB_EOPTIMESTAMP(device, rb) \ + MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp) +#define CTXT_EOPTIMESTAMP(device, drawctxt) \ + MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp) + +int gen7_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time) +{ + struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = 0; + unsigned long flags; + + adreno_get_submit_time(adreno_dev, rb, time); + adreno_profile_submit_time(time); + + spin_lock_irqsave(&rb->preempt_lock, flags); + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) { + if (adreno_dev->cur_rb == rb) { + kgsl_pwrscale_busy(device); + ret = gen7_fenced_write(adreno_dev, + GEN7_CP_RB_WPTR, rb->_wptr, + FENCE_STATUS_WRITEDROPPED0_MASK); + rb->skip_inline_wptr = false; + } + } else { + if (adreno_dev->cur_rb == rb) + rb->skip_inline_wptr = true; + } + + rb->wptr = rb->_wptr; + spin_unlock_irqrestore(&rb->preempt_lock, flags); + + if (ret) { + /* + * If WPTR update fails, take inline snapshot and trigger + * recovery. + */ + gmu_core_fault_snapshot(device); + adreno_dispatcher_fault(adreno_dev, + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + } + + return ret; +} + +int gen7_ringbuffer_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int i, ret; + + ret = adreno_allocate_global(device, &device->scratch, PAGE_SIZE, + 0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED, + "scratch"); + if (ret) + return ret; + + adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) { + adreno_dev->num_ringbuffers = 1; + return adreno_ringbuffer_setup(adreno_dev, + &adreno_dev->ringbuffers[0], 0); + } + + adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers); + + for (i = 0; i < adreno_dev->num_ringbuffers; i++) { + int ret; + + ret = adreno_ringbuffer_setup(adreno_dev, + &adreno_dev->ringbuffers[i], i); + if (ret) + return ret; + } + + timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0); + gen7_preemption_init(adreno_dev); + return 0; +} + +#define GEN7_SUBMIT_MAX 100 + +int gen7_ringbuffer_addcmds(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 flags, u32 *in, u32 dwords, u32 timestamp, + struct adreno_submit_time *time) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 size = GEN7_SUBMIT_MAX + dwords; + u32 *cmds, index = 0; + u64 profile_gpuaddr; + u32 profile_dwords; + + if (adreno_drawctxt_detached(drawctxt)) + return -ENOENT; + + if (adreno_gpu_fault(adreno_dev) != 0) + return -EPROTO; + + rb->timestamp++; + + if (drawctxt) + drawctxt->internal_timestamp = rb->timestamp; + + /* All submissions are run with protected mode off due to APRIV */ + flags &= ~F_NOTPROTECTED; + + cmds = adreno_ringbuffer_allocspace(rb, size); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + /* Identify the start of a command */ + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER; + + /* This is 21 dwords when drawctxt is not NULL */ + index += gen7_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt, + &cmds[index]); + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BOTH; + + cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1); + cmds[index++] = 0x101; /* IFPC disable */ + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BR; + + profile_gpuaddr = adreno_profile_preib_processing(adreno_dev, + drawctxt, &profile_dwords); + + if (profile_gpuaddr) { + cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(profile_gpuaddr); + cmds[index++] = upper_32_bits(profile_gpuaddr); + cmds[index++] = profile_dwords; + } + + if (drawctxt) { + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = timestamp; + } + + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + + if (IS_SECURE(flags)) { + /* Sync BV and BR if entering secure mode */ + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SYNC_THREADS | CP_CONCURRENT_BIN_DISABLE; + cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1); + cmds[index++] = 1; + } + + memcpy(&cmds[index], in, dwords << 2); + index += dwords; + + profile_gpuaddr = adreno_profile_postib_processing(adreno_dev, + drawctxt, &dwords); + + if (profile_gpuaddr) { + cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(profile_gpuaddr); + cmds[index++] = upper_32_bits(profile_gpuaddr); + cmds[index++] = profile_dwords; + } + + if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &device->mmu.pfpolicy)) + cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0); + + /* + * If this is an internal command, just write the ringbuffer timestamp, + * otherwise, write both + */ + if (!drawctxt) { + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27); + cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + } else { + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27); + cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = timestamp; + + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_CLEAN | BIT(27); + cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + } + + if (IS_WFI(flags)) + cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + + if (IS_SECURE(flags)) { + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_CONCURRENT_BIN_DISABLE; + cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1); + cmds[index++] = 0; + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SYNC_THREADS; + } + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BOTH; + + cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1); + cmds[index++] = 0x100; /* IFPC enable */ + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BR; + + /* 10 dwords */ + index += gen7_preemption_post_ibsubmit(adreno_dev, &cmds[index]); + + /* Adjust the thing for the number of bytes we actually wrote */ + rb->_wptr -= (size - index); + + return gen7_ringbuffer_submit(rb, time); +} + +static u32 gen7_get_alwayson_counter(u32 *cmds, u64 gpuaddr) +{ + cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3); + cmds[1] = GEN7_CP_ALWAYS_ON_COUNTER_LO | (1 << 30) | (2 << 18); + cmds[2] = lower_32_bits(gpuaddr); + cmds[3] = upper_32_bits(gpuaddr); + + return 4; +} + +#define PROFILE_IB_DWORDS 4 +#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2)) + +static u64 gen7_get_user_profiling_ib(struct adreno_ringbuffer *rb, + struct kgsl_drawobj_cmd *cmdobj, u32 target_offset, u32 *cmds) +{ + u32 offset = rb->profile_index * (PROFILE_IB_DWORDS << 2); + u32 *ib = rb->profile_desc->hostptr + offset; + u32 dwords = gen7_get_alwayson_counter(ib, + cmdobj->profiling_buffer_gpuaddr + target_offset); + + cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[1] = lower_32_bits(rb->profile_desc->gpuaddr + offset); + cmds[2] = upper_32_bits(rb->profile_desc->gpuaddr + offset); + cmds[3] = dwords; + + rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS; + + return 4; +} + +static int gen7_drawctxt_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (rb->drawctxt_active == drawctxt) + return 0; + + if (kgsl_context_detached(&drawctxt->base)) + return -ENOENT; + + if (!_kgsl_context_get(&drawctxt->base)) + return -ENOENT; + + trace_adreno_drawctxt_switch(rb, drawctxt); + + gen7_rb_context_switch(adreno_dev, rb, drawctxt); + + /* Release the current drawctxt as soon as the new one is switched */ + adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active, + rb, rb->timestamp); + + rb->drawctxt_active = drawctxt; + return 0; +} + + +#define GEN7_USER_PROFILE_IB(rb, cmdobj, cmds, field) \ + gen7_get_user_profiling_ib((rb), (cmdobj), \ + offsetof(struct kgsl_drawobj_profiling_buffer, field), \ + (cmds)) + +#define GEN7_KERNEL_PROFILE(dev, cmdobj, cmds, field) \ + gen7_get_alwayson_counter((cmds), \ + (dev)->profile_buffer->gpuaddr + \ + ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \ + field)) + +#define GEN7_COMMAND_DWORDS 38 + +int gen7_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + struct adreno_ringbuffer *rb = drawctxt->rb; + int ret = 0, numibs = 0, index = 0; + u32 *cmds; + + /* Count the number of IBs (if we are not skipping) */ + if (!IS_SKIP(flags)) { + struct list_head *tmp; + + list_for_each(tmp, &cmdobj->cmdlist) + numibs++; + } + + cmds = kmalloc((GEN7_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL); + if (!cmds) { + ret = -ENOMEM; + goto done; + } + + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = START_IB_IDENTIFIER; + + /* Kernel profiling: 4 dwords */ + if (IS_KERNEL_PROFILE(flags)) + index += GEN7_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index], + started); + + /* User profiling: 4 dwords */ + if (IS_USER_PROFILE(flags)) + index += GEN7_USER_PROFILE_IB(rb, cmdobj, &cmds[index], + gpu_ticks_submitted); + + if (is_concurrent_binning(drawctxt)) { + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BOTH; + } + if (numibs) { + struct kgsl_memobj_node *ib; + + cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1); + cmds[index++] = 0x00d; /* IB1LIST start */ + + list_for_each_entry(ib, &cmdobj->cmdlist, node) { + if (ib->priv & MEMOBJ_SKIP || + (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE && + !IS_PREAMBLE(flags))) + cmds[index++] = cp_type7_packet(CP_NOP, 4); + + cmds[index++] = + cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(ib->gpuaddr); + cmds[index++] = upper_32_bits(ib->gpuaddr); + + /* Double check that IB_PRIV is never set */ + cmds[index++] = (ib->size >> 2) & 0xfffff; + } + + cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1); + cmds[index++] = 0x00e; /* IB1LIST end */ + } + + if (is_concurrent_binning(drawctxt)) { + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BR; + } + /* CCU invalidate depth */ + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1); + cmds[index++] = 24; + + /* CCU invalidate color */ + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1); + cmds[index++] = 25; + + /* 4 dwords */ + if (IS_KERNEL_PROFILE(flags)) + index += GEN7_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index], + retired); + + /* 4 dwords */ + if (IS_USER_PROFILE(flags)) + index += GEN7_USER_PROFILE_IB(rb, cmdobj, &cmds[index], + gpu_ticks_retired); + + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = END_IB_IDENTIFIER; + + ret = gen7_drawctxt_switch(adreno_dev, rb, drawctxt); + + /* + * In the unlikely event of an error in the drawctxt switch, + * treat it like a hang + */ + if (ret) { + /* + * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it, + * the upper layers know how to handle it + */ + if (ret != -ENOSPC && ret != -ENOENT) + dev_err(device->dev, + "Unable to switch draw context: %d\n", ret); + goto done; + } + + adreno_drawobj_set_constraint(device, drawobj); + + ret = gen7_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt, + flags, cmds, index, drawobj->timestamp, time); + +done: + trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs, + drawobj->timestamp, drawobj->flags, ret, drawctxt->type); + + kfree(cmds); + return ret; +} diff --git a/adreno_gen7_rpmh.c b/adreno_gen7_rpmh.c new file mode 100644 index 0000000000..37e3fcdaf1 --- /dev/null +++ b/adreno_gen7_rpmh.c @@ -0,0 +1,469 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_gen7.h" +#include "kgsl_bus.h" +#include "kgsl_device.h" + +struct rpmh_arc_vals { + u32 num; + const u16 *val; +}; + +struct bcm { + const char *name; + u32 buswidth; + u32 channels; + u32 unit; + u16 width; + u8 vcd; + bool fixed; +}; + +struct bcm_data { + __le32 unit; + __le16 width; + u8 vcd; + u8 reserved; +}; + +struct rpmh_bw_votes { + u32 wait_bitmask; + u32 num_cmds; + u32 *addrs; + u32 num_levels; + u32 **cmds; +}; + +#define ARC_VOTE_SET(pri, sec, vlvl) \ + (FIELD_PREP(GENMASK(31, 16), vlvl) | \ + FIELD_PREP(GENMASK(15, 8), sec) | \ + FIELD_PREP(GENMASK(7, 0), pri)) + +static int rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id) +{ + size_t len = 0; + + arc->val = cmd_db_read_aux_data(res_id, &len); + + /* + * cmd_db_read_aux_data() gives us a zero-padded table of + * size len that contains the arc values. To determine the + * number of arc values, we loop through the table and count + * them until we get to the end of the buffer or hit the + * zero padding. + */ + for (arc->num = 1; arc->num < (len >> 1); arc->num++) { + if (arc->val[arc->num - 1] != 0 && arc->val[arc->num] == 0) + break; + } + + return 0; +} + +static int setup_volt_dependency_tbl(u32 *votes, + struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, + u16 *vlvl, unsigned int num_entries) +{ + int i, j, k; + uint16_t cur_vlvl; + bool found_match; + + /* i tracks current KGSL GPU frequency table entry + * j tracks secondary rail voltage table entry + * k tracks primary rail voltage table entry + */ + for (i = 0; i < num_entries; i++) { + found_match = false; + + /* Look for a primary rail voltage that matches a VLVL level */ + for (k = 0; k < pri_rail->num; k++) { + if (pri_rail->val[k] >= vlvl[i]) { + cur_vlvl = pri_rail->val[k]; + found_match = true; + break; + } + } + + /* If we did not find a matching VLVL level then abort */ + if (!found_match) + return -EINVAL; + + /* + * Look for a secondary rail index whose VLVL value + * is greater than or equal to the VLVL value of the + * corresponding index of the primary rail + */ + for (j = 0; j < sec_rail->num; j++) { + if (sec_rail->val[j] >= cur_vlvl || + j + 1 == sec_rail->num) + break; + } + + if (j == sec_rail->num) + j = 0; + + votes[i] = ARC_VOTE_SET(k, j, cur_vlvl); + } + + return 0; +} + +/* Generate a set of bandwidth votes for the list of BCMs */ +static void tcs_cmd_data(struct bcm *bcms, int count, u32 ab, u32 ib, + u32 *data) +{ + int i; + + for (i = 0; i < count; i++) { + bool valid = true; + bool commit = false; + u64 avg, peak, x, y; + + if (i == count - 1 || bcms[i].vcd != bcms[i + 1].vcd) + commit = true; + + if (bcms[i].fixed) { + if (!ab && !ib) + data[i] = BCM_TCS_CMD(commit, false, 0x0, 0x0); + else + data[i] = BCM_TCS_CMD(commit, true, 0x0, 0x8); + continue; + } + + /* Multiple the bandwidth by the width of the connection */ + avg = ((u64) ab) * bcms[i].width; + + /* And then divide by the total width across channels */ + do_div(avg, bcms[i].buswidth * bcms[i].channels); + + peak = ((u64) ib) * bcms[i].width; + do_div(peak, bcms[i].buswidth); + + /* Input bandwidth value is in KBps */ + x = avg * 1000ULL; + do_div(x, bcms[i].unit); + + /* Input bandwidth value is in KBps */ + y = peak * 1000ULL; + do_div(y, bcms[i].unit); + + /* + * If a bandwidth value was specified but the calculation ends + * rounding down to zero, set a minimum level + */ + if (ab && x == 0) + x = 1; + + if (ib && y == 0) + y = 1; + + x = min_t(u64, x, BCM_TCS_CMD_VOTE_MASK); + y = min_t(u64, y, BCM_TCS_CMD_VOTE_MASK); + + if (!x && !y) + valid = false; + + data[i] = BCM_TCS_CMD(commit, valid, x, y); + } +} + +static void free_rpmh_bw_votes(struct rpmh_bw_votes *votes) +{ + int i; + + if (!votes) + return; + + for (i = 0; votes->cmds && i < votes->num_levels; i++) + kfree(votes->cmds[i]); + + kfree(votes->cmds); + kfree(votes->addrs); + kfree(votes); +} + +/* Build the votes table from the specified bandwidth levels */ +static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms, + int bcm_count, u32 *levels, int levels_count) +{ + struct rpmh_bw_votes *votes; + int i; + + votes = kzalloc(sizeof(*votes), GFP_KERNEL); + if (!votes) + return ERR_PTR(-ENOMEM); + + votes->addrs = kcalloc(bcm_count, sizeof(*votes->cmds), GFP_KERNEL); + if (!votes->addrs) { + free_rpmh_bw_votes(votes); + return ERR_PTR(-ENOMEM); + } + + votes->cmds = kcalloc(levels_count, sizeof(*votes->cmds), GFP_KERNEL); + if (!votes->cmds) { + free_rpmh_bw_votes(votes); + return ERR_PTR(-ENOMEM); + } + + votes->num_cmds = bcm_count; + votes->num_levels = levels_count; + + /* Get the cmd-db information for each BCM */ + for (i = 0; i < bcm_count; i++) { + size_t l; + const struct bcm_data *data; + + data = cmd_db_read_aux_data(bcms[i].name, &l); + + votes->addrs[i] = cmd_db_read_addr(bcms[i].name); + + bcms[i].unit = le32_to_cpu(data->unit); + bcms[i].width = le16_to_cpu(data->width); + bcms[i].vcd = data->vcd; + } + + for (i = 0; i < bcm_count; i++) { + if (i == (bcm_count - 1) || bcms[i].vcd != bcms[i + 1].vcd) + votes->wait_bitmask |= (1 << i); + } + + for (i = 0; i < levels_count; i++) { + votes->cmds[i] = kcalloc(bcm_count, sizeof(u32), GFP_KERNEL); + if (!votes->cmds[i]) { + free_rpmh_bw_votes(votes); + return ERR_PTR(-ENOMEM); + } + + tcs_cmd_data(bcms, bcm_count, 0, levels[i], votes->cmds[i]); + } + + return votes; +} + +/* + * setup_gmu_arc_votes - Build the gmu voting table + * @hfi: Pointer to hfi device + * @pri_rail: Pointer to primary power rail vlvl table + * @sec_rail: Pointer to second/dependent power rail vlvl table + * + * This function initializes the cx votes for all gmu frequencies + * for gmu dcvs + */ +static int setup_cx_arc_votes(struct gen7_hfi *hfi, + struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail) +{ + /* Hardcoded values of GMU CX voltage levels */ + u16 gmu_cx_vlvl[MAX_CX_LEVELS]; + u32 cx_votes[MAX_CX_LEVELS]; + struct hfi_dcvstable_cmd *table = &hfi->dcvs_table; + int ret, i; + + gmu_cx_vlvl[0] = 0; + gmu_cx_vlvl[1] = RPMH_REGULATOR_LEVEL_LOW_SVS; + gmu_cx_vlvl[2] = RPMH_REGULATOR_LEVEL_SVS; + + table->gmu_level_num = 3; + + table->cx_votes[0].freq = 0; + table->cx_votes[1].freq = GMU_FREQ_MIN / 1000; + table->cx_votes[2].freq = GMU_FREQ_MAX / 1000; + + ret = setup_volt_dependency_tbl(cx_votes, pri_rail, + sec_rail, gmu_cx_vlvl, table->gmu_level_num); + if (!ret) { + for (i = 0; i < table->gmu_level_num; i++) + table->cx_votes[i].vote = cx_votes[i]; + } + + return ret; +} + +/* + * setup_gx_arc_votes - Build the gpu dcvs voting table + * @hfi: Pointer to hfi device + * @pri_rail: Pointer to primary power rail vlvl table + * @sec_rail: Pointer to second/dependent power rail vlvl table + * + * This function initializes the gx votes for all gpu frequencies + * for gpu dcvs + */ +static int setup_gx_arc_votes(struct adreno_device *adreno_dev, + struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + u32 index; + u16 vlvl_tbl[MAX_GX_LEVELS]; + u32 gx_votes[MAX_GX_LEVELS]; + int ret, i; + + /* Add the zero powerlevel for the perf table */ + table->gpu_level_num = device->pwrctrl.num_pwrlevels + 1; + + if (table->gpu_level_num > pri_rail->num || + table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) { + dev_err(&gmu->pdev->dev, + "Defined more GPU DCVS levels than RPMh can support\n"); + return -ERANGE; + } + + memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); + + table->gx_votes[0].freq = 0; + + /* GMU power levels are in ascending order */ + for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) { + vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level; + table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000; + } + + ret = setup_volt_dependency_tbl(gx_votes, pri_rail, + sec_rail, vlvl_tbl, table->gpu_level_num); + if (!ret) { + for (i = 0; i < table->gpu_level_num; i++) { + table->gx_votes[i].vote = gx_votes[i]; + table->gx_votes[i].acd = 0xffffffff; + } + } + + return ret; + +} + +static int build_dcvs_table(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hfi *hfi = &gmu->hfi; + struct rpmh_arc_vals gx_arc, cx_arc, mx_arc; + int ret; + + ret = CMD_MSG_HDR(hfi->dcvs_table, H2F_MSG_PERF_TBL); + if (ret) + return ret; + + ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl"); + if (ret) + return ret; + + ret = rpmh_arc_cmds(&cx_arc, "cx.lvl"); + if (ret) + return ret; + + ret = rpmh_arc_cmds(&mx_arc, "mx.lvl"); + if (ret) + return ret; + + ret = setup_cx_arc_votes(hfi, &cx_arc, &mx_arc); + if (ret) + return ret; + + return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc); +} + +/* + * List of Bus Control Modules (BCMs) that need to be configured for the GPU + * to access DDR. For each bus level we will generate a vote each BC + */ +static struct bcm gen7_ddr_bcms[] = { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { .name = "ACV", .fixed = true }, +}; + +/* Same as above, but for the CNOC BCMs */ +static struct bcm gen7_cnoc_bcms[] = { + { .name = "CN0", .buswidth = 4 }, +}; + +static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd, + struct rpmh_bw_votes *ddr, struct rpmh_bw_votes *cnoc) +{ + u32 i, j; + + cmd->bw_level_num = ddr->num_levels; + cmd->ddr_cmds_num = ddr->num_cmds; + cmd->ddr_wait_bitmask = ddr->wait_bitmask; + + for (i = 0; i < ddr->num_cmds; i++) + cmd->ddr_cmd_addrs[i] = ddr->addrs[i]; + + for (i = 0; i < ddr->num_levels; i++) + for (j = 0; j < ddr->num_cmds; j++) + cmd->ddr_cmd_data[i][j] = (u32) ddr->cmds[i][j]; + + if (!cnoc) + return; + + cmd->cnoc_cmds_num = cnoc->num_cmds; + cmd->cnoc_wait_bitmask = cnoc->wait_bitmask; + + for (i = 0; i < cnoc->num_cmds; i++) + cmd->cnoc_cmd_addrs[i] = cnoc->addrs[i]; + + for (i = 0; i < cnoc->num_levels; i++) + for (j = 0; j < cnoc->num_cmds; j++) + cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j]; +} + +static int build_bw_table(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct rpmh_bw_votes *ddr, *cnoc = NULL; + u32 *cnoc_table; + u32 count; + int ret; + + ddr = build_rpmh_bw_votes(gen7_ddr_bcms, ARRAY_SIZE(gen7_ddr_bcms), + pwr->ddr_table, pwr->ddr_table_count); + if (IS_ERR(ddr)) + return PTR_ERR(ddr); + + cnoc_table = kgsl_bus_get_table(device->pdev, "qcom,bus-table-cnoc", + &count); + + if (count > 0) + cnoc = build_rpmh_bw_votes(gen7_cnoc_bcms, + ARRAY_SIZE(gen7_cnoc_bcms), cnoc_table, count); + + kfree(cnoc_table); + + if (IS_ERR(cnoc)) { + free_rpmh_bw_votes(ddr); + return PTR_ERR(cnoc); + } + + ret = CMD_MSG_HDR(gmu->hfi.bw_table, H2F_MSG_BW_VOTE_TBL); + if (ret) + return ret; + + build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc); + + free_rpmh_bw_votes(ddr); + free_rpmh_bw_votes(cnoc); + + return 0; +} + +int gen7_build_rpmh_tables(struct adreno_device *adreno_dev) +{ + int ret; + + ret = build_dcvs_table(adreno_dev); + if (ret) + return ret; + + return build_bw_table(adreno_dev); +} diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c new file mode 100644 index 0000000000..71a2d37c22 --- /dev/null +++ b/adreno_gen7_snapshot.c @@ -0,0 +1,1254 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno_gen7_snapshot.h" +#include "adreno.h" +#include "adreno_snapshot.h" + +static struct kgsl_memdesc *gen7_capturescript; +static struct kgsl_memdesc *gen7_crashdump_registers; +static u32 *gen7_cd_reg_end; + +#define GEN7_DEBUGBUS_BLOCK_SIZE 0x100 + +#define GEN7_SP_READ_SEL_VAL(_location, _pipe, _statetype, _usptp, _sptp) \ + (FIELD_PREP(GENMASK(19, 18), _location) | \ + FIELD_PREP(GENMASK(17, 16), _pipe) | \ + FIELD_PREP(GENMASK(15, 8), _statetype) | \ + FIELD_PREP(GENMASK(7, 4), _usptp) | \ + FIELD_PREP(GENMASK(3, 0), _sptp)) + +#define GEN7_CP_APERTURE_REG_VAL(_pipe, _cluster, _context) \ + (FIELD_PREP(GENMASK(13, 12), _pipe) | \ + FIELD_PREP(GENMASK(10, 8), _cluster) | \ + FIELD_PREP(GENMASK(5, 4), _context)) + +#define GEN7_DEBUGBUS_SECTION_SIZE (sizeof(struct kgsl_snapshot_debugbus) \ + + (GEN7_DEBUGBUS_BLOCK_SIZE << 3)) + +#define CD_REG_END 0xaaaaaaaa + +static int CD_WRITE(u64 *ptr, u32 offset, u64 val) +{ + ptr[0] = val; + ptr[1] = FIELD_PREP(GENMASK(63, 44), offset) | BIT(21) | BIT(0); + + return 2; +} + +static int CD_READ(u64 *ptr, u32 offset, u32 size, u64 target) +{ + ptr[0] = target; + ptr[1] = FIELD_PREP(GENMASK(63, 44), offset) | size; + + return 2; +} + +static void CD_FINISH(u64 *ptr, u32 offset) +{ + gen7_cd_reg_end = gen7_crashdump_registers->hostptr + offset; + *gen7_cd_reg_end = CD_REG_END; + ptr[0] = gen7_crashdump_registers->gpuaddr + offset; + ptr[1] = FIELD_PREP(GENMASK(63, 44), GEN7_CP_CRASH_DUMP_STATUS) | BIT(0); + ptr[2] = 0; + ptr[3] = 0; +} + +static bool CD_SCRIPT_CHECK(struct kgsl_device *device) +{ + return (gen7_is_smmu_stalled(device) || (!device->snapshot_crashdumper) || + IS_ERR_OR_NULL(gen7_capturescript) || + IS_ERR_OR_NULL(gen7_crashdump_registers)); +} + +static bool _gen7_do_crashdump(struct kgsl_device *device) +{ + unsigned int reg = 0; + ktime_t timeout; + + kgsl_regwrite(device, GEN7_CP_CRASH_SCRIPT_BASE_LO, + lower_32_bits(gen7_capturescript->gpuaddr)); + kgsl_regwrite(device, GEN7_CP_CRASH_SCRIPT_BASE_HI, + upper_32_bits(gen7_capturescript->gpuaddr)); + kgsl_regwrite(device, GEN7_CP_CRASH_DUMP_CNTL, 1); + + timeout = ktime_add_ms(ktime_get(), CP_CRASH_DUMPER_TIMEOUT); + + if (!device->snapshot_atomic) + might_sleep(); + for (;;) { + /* make sure we're reading the latest value */ + rmb(); + if ((*gen7_cd_reg_end) != CD_REG_END) + break; + if (ktime_compare(ktime_get(), timeout) > 0) + break; + /* Wait 1msec to avoid unnecessary looping */ + if (!device->snapshot_atomic) + usleep_range(100, 1000); + } + + kgsl_regread(device, GEN7_CP_CRASH_DUMP_STATUS, ®); + + /* + * Writing to the GEN7_CP_CRASH_DUMP_CNTL also resets the + * GEN7_CP_CRASH_DUMP_STATUS. Make sure the read above is + * complete before we change the value + */ + rmb(); + + kgsl_regwrite(device, GEN7_CP_CRASH_DUMP_CNTL, 0); + + if (WARN(!(reg & 0x2), "Crashdumper timed out\n")) + return false; + + return true; +} + +static size_t gen7_legacy_snapshot_registers(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct reg_list *regs = priv; + + if (regs->sel) + kgsl_regwrite(device, regs->sel->host_reg, regs->sel->val); + + return adreno_snapshot_registers_v2(device, buf, remain, (void *)regs->regs); +} + +static size_t gen7_snapshot_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct reg_list *regs = (struct reg_list *)priv; + const u32 *ptr = regs->regs; + unsigned int *data = (unsigned int *)buf; + unsigned int *src; + unsigned int size = adreno_snapshot_regs_count(ptr) * 4; + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + src = gen7_crashdump_registers->hostptr + regs->offset; + + for (ptr = regs->regs; ptr[0] != UINT_MAX; ptr += 2) { + unsigned int cnt = REG_COUNT(ptr); + + if (cnt == 1) + *data++ = BIT(31) | ptr[0]; + else { + *data++ = ptr[0]; + *data++ = cnt; + } + memcpy(data, src, cnt << 2); + data += cnt; + src += cnt; + } + + /* Return the size of the section */ + return size; +} + +static size_t gen7_legacy_snapshot_shader(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_shader_v2 *header = + (struct kgsl_snapshot_shader_v2 *) buf; + struct gen7_shader_block_info *info = (struct gen7_shader_block_info *) priv; + struct gen7_shader_block *block = info->block; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int read_sel; + int i; + + if (remain < (sizeof(*header) + (block->size << 2))) { + SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); + return 0; + } + + header->type = block->statetype; + header->index = block->sp_id; + header->size = block->size; + header->usptp = block->usptp; + header->location = block->location; + header->pipe_id = block->pipeid; + + read_sel = GEN7_SP_READ_SEL_VAL(block->location, block->pipeid, + block->statetype, block->usptp, block->sp_id); + + kgsl_regwrite(device, GEN7_SP_READ_SEL, read_sel); + + /* + * An explicit barrier is needed so that reads do not happen before + * the register write. + */ + mb(); + + for (i = 0; i < block->size; i++) + data[i] = kgsl_regmap_read(&device->regmap, GEN7_SP_AHB_READ_APERTURE + i); + + return (sizeof(*header) + (block->size << 2)); +} + +static size_t gen7_snapshot_shader_memory(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_shader_v2 *header = + (struct kgsl_snapshot_shader_v2 *) buf; + struct gen7_shader_block_info *info = (struct gen7_shader_block_info *) priv; + struct gen7_shader_block *block = info->block; + unsigned int *data = (unsigned int *) (buf + sizeof(*header)); + + if (remain < (sizeof(*header) + (block->size << 2))) { + SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); + return 0; + } + + header->type = block->statetype; + header->index = block->sp_id; + header->size = block->size; + header->usptp = block->usptp; + header->location = block->location; + header->pipe_id = block->pipeid; + + memcpy(data, gen7_crashdump_registers->hostptr + info->offset, + (block->size << 2)); + + return (sizeof(*header) + (block->size << 2)); +} + +static void gen7_snapshot_shader(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + unsigned int i; + struct gen7_shader_block_info info; + u64 *ptr; + u32 offset = 0; + size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, + void *priv) = gen7_legacy_snapshot_shader; + + if (CD_SCRIPT_CHECK(device)) { + for (i = 0; i < ARRAY_SIZE(gen7_shader_blocks); i++) { + info.block = &gen7_shader_blocks[i]; + info.offset = offset; + offset += gen7_shader_blocks[i].size << 2; + + /* Shader working/shadow memory */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_SHADER_V2, + snapshot, func, &info); + } + return; + } + + /* Build the crash script */ + ptr = gen7_capturescript->hostptr; + offset = 0; + + for (i = 0; i < ARRAY_SIZE(gen7_shader_blocks); i++) { + struct gen7_shader_block *block = &gen7_shader_blocks[i]; + + /* Program the aperture */ + ptr += CD_WRITE(ptr, GEN7_SP_READ_SEL, + GEN7_SP_READ_SEL_VAL(block->location, block->pipeid, + block->statetype, block->usptp, block->sp_id)); + + /* Read all the data in one chunk */ + ptr += CD_READ(ptr, GEN7_SP_AHB_READ_APERTURE, block->size, + gen7_crashdump_registers->gpuaddr + offset); + + offset += block->size << 2; + } + + /* Marker for end of script */ + CD_FINISH(ptr, offset); + + /* Try to run the crash dumper */ + if (_gen7_do_crashdump(device)) + func = gen7_snapshot_shader_memory; + + offset = 0; + + for (i = 0; i < ARRAY_SIZE(gen7_shader_blocks); i++) { + info.block = &gen7_shader_blocks[i]; + info.offset = offset; + offset += gen7_shader_blocks[i].size << 2; + + /* Shader working/shadow memory */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_SHADER_V2, + snapshot, func, &info); + } +} + +static void gen7_snapshot_mempool(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + /* set CP_CHICKEN_DBG[StabilizeMVC] to stabilize it while dumping */ + kgsl_regrmw(device, GEN7_CP_CHICKEN_DBG, 0x4, 0x4); + kgsl_regrmw(device, GEN7_CP_BV_CHICKEN_DBG, 0x4, 0x4); + + kgsl_snapshot_indexed_registers(device, snapshot, + GEN7_CP_MEM_POOL_DBG_ADDR, GEN7_CP_MEM_POOL_DBG_DATA, + 0, 0x2100); + + kgsl_snapshot_indexed_registers(device, snapshot, + GEN7_CP_BV_MEM_POOL_DBG_ADDR, GEN7_CP_BV_MEM_POOL_DBG_DATA, + 0, 0x2100); + + kgsl_regrmw(device, GEN7_CP_CHICKEN_DBG, 0x4, 0x0); + kgsl_regrmw(device, GEN7_CP_BV_CHICKEN_DBG, 0x4, 0x0); +} + +static unsigned int gen7_read_dbgahb(struct kgsl_device *device, + unsigned int regbase, unsigned int reg) +{ + unsigned int val; + + kgsl_regread(device, (GEN7_SP_AHB_READ_APERTURE + reg - regbase), &val); + return val; +} + +static size_t gen7_legacy_snapshot_cluster_dbgahb(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_mvc_regs_v2 *header = + (struct kgsl_snapshot_mvc_regs_v2 *)buf; + struct gen7_sptp_cluster_registers *cluster = + (struct gen7_sptp_cluster_registers *)priv; + const u32 *ptr = cluster->regs; + unsigned int read_sel; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int j; + unsigned int size = adreno_snapshot_regs_count(ptr) * 4; + + if (remain < (sizeof(*header) + size)) { + SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS"); + return 0; + } + + header->ctxt_id = cluster->context_id; + header->cluster_id = cluster->cluster_id; + header->pipe_id = cluster->pipe_id; + header->location_id = cluster->location_id; + + read_sel = GEN7_SP_READ_SEL_VAL(cluster->location_id, cluster->pipe_id, + cluster->statetype, 0, 0); + + kgsl_regwrite(device, GEN7_SP_READ_SEL, read_sel); + + for (ptr = cluster->regs; ptr[0] != UINT_MAX; ptr += 2) { + unsigned int count = REG_COUNT(ptr); + + if (count == 1) + *data++ = ptr[0]; + else { + *data++ = ptr[0] | (1 << 31); + *data++ = ptr[1]; + } + for (j = ptr[0]; j <= ptr[1]; j++) + *data++ = gen7_read_dbgahb(device, cluster->regbase, j); + } + + return (size + sizeof(*header)); +} + +static size_t gen7_snapshot_cluster_dbgahb(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_mvc_regs_v2 *header = + (struct kgsl_snapshot_mvc_regs_v2 *)buf; + struct gen7_sptp_cluster_registers *cluster = + (struct gen7_sptp_cluster_registers *)priv; + const u32 *ptr = cluster->regs; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int *src; + unsigned int size = adreno_snapshot_regs_count(ptr) * 4; + + if (remain < (sizeof(*header) + size)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + header->ctxt_id = cluster->context_id; + header->cluster_id = cluster->cluster_id; + header->pipe_id = cluster->pipe_id; + header->location_id = cluster->location_id; + + src = gen7_crashdump_registers->hostptr + cluster->offset; + + for (ptr = cluster->regs; ptr[0] != UINT_MAX; ptr += 2) { + unsigned int cnt = REG_COUNT(ptr); + + if (cnt == 1) + *data++ = ptr[0]; + else { + *data++ = ptr[0] | (1 << 31); + *data++ = ptr[1]; + } + memcpy(data, src, cnt << 2); + data += cnt; + src += cnt; + } + + return (size + sizeof(*header)); +} + +static void gen7_snapshot_dbgahb_regs(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + int i; + u64 *ptr, offset = 0; + unsigned int count; + size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, + void *priv) = gen7_legacy_snapshot_cluster_dbgahb; + + if (CD_SCRIPT_CHECK(device)) { + for (i = 0; i < ARRAY_SIZE(gen7_sptp_clusters); i++) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_MVC_V2, snapshot, func, + &gen7_sptp_clusters[i]); + return; + } + + /* Build the crash script */ + ptr = gen7_capturescript->hostptr; + + for (i = 0; i < ARRAY_SIZE(gen7_sptp_clusters); i++) { + struct gen7_sptp_cluster_registers *cluster = &gen7_sptp_clusters[i]; + const u32 *regs = cluster->regs; + + cluster->offset = offset; + + /* Program the aperture */ + ptr += CD_WRITE(ptr, GEN7_SP_READ_SEL, GEN7_SP_READ_SEL_VAL + (cluster->location_id, cluster->pipe_id, cluster->statetype, 0, 0)); + + for (; regs[0] != UINT_MAX; regs += 2) { + count = REG_COUNT(regs); + ptr += CD_READ(ptr, (GEN7_SP_AHB_READ_APERTURE + + regs[0] - cluster->regbase), count, + (gen7_crashdump_registers->gpuaddr + offset)); + + offset += count * sizeof(unsigned int); + } + } + /* Marker for end of script */ + CD_FINISH(ptr, offset); + + /* Try to run the crash dumper */ + if (_gen7_do_crashdump(device)) + func = gen7_snapshot_cluster_dbgahb; + + /* Capture the registers in snapshot */ + for (i = 0; i < ARRAY_SIZE(gen7_sptp_clusters); i++) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_MVC_V2, snapshot, func, &gen7_sptp_clusters[i]); +} + +static size_t gen7_legacy_snapshot_mvc(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_mvc_regs_v2 *header = + (struct kgsl_snapshot_mvc_regs_v2 *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct gen7_cluster_registers *cluster = + (struct gen7_cluster_registers *)priv; + const u32 *ptr = cluster->regs; + unsigned int j; + unsigned int size = adreno_snapshot_regs_count(ptr) * 4; + + if (remain < (sizeof(*header) + size)) { + SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS"); + return 0; + } + + header->ctxt_id = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0; + header->cluster_id = cluster->cluster_id; + header->pipe_id = cluster->pipe_id; + header->location_id = UINT_MAX; + + /* + * Set the AHB control for the Host to read from the + * cluster/context for this iteration. + */ + kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, GEN7_CP_APERTURE_REG_VAL + (cluster->pipe_id, cluster->cluster_id, cluster->context_id)); + + if (cluster->sel) + kgsl_regwrite(device, cluster->sel->host_reg, cluster->sel->val); + + for (ptr = cluster->regs; ptr[0] != UINT_MAX; ptr += 2) { + unsigned int count = REG_COUNT(ptr); + + if (count == 1) + *data++ = ptr[0]; + else { + *data++ = ptr[0] | (1 << 31); + *data++ = ptr[1]; + } + for (j = ptr[0]; j <= ptr[1]; j++) { + kgsl_regread(device, j, data); + data++; + } + } + + return (size + sizeof(*header)); +} + +static size_t gen7_snapshot_mvc(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_mvc_regs_v2 *header = + (struct kgsl_snapshot_mvc_regs_v2 *)buf; + struct gen7_cluster_registers *cluster = + (struct gen7_cluster_registers *)priv; + const u32 *ptr = cluster->regs; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int *src; + unsigned int cnt; + unsigned int size = adreno_snapshot_regs_count(ptr) * 4; + + if (remain < (sizeof(*header) + size)) { + SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS"); + return 0; + } + + header->ctxt_id = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0; + header->cluster_id = cluster->cluster_id; + header->pipe_id = cluster->pipe_id; + header->location_id = UINT_MAX; + + src = gen7_crashdump_registers->hostptr + cluster->offset; + + for (ptr = cluster->regs; ptr[0] != UINT_MAX; ptr += 2) { + cnt = REG_COUNT(ptr); + + if (cnt == 1) + *data++ = ptr[0]; + else { + *data++ = ptr[0] | (1 << 31); + *data++ = ptr[1]; + } + memcpy(data, src, cnt << 2); + src += cnt; + data += cnt; + } + + return (size + sizeof(*header)); + +} + +static void gen7_snapshot_mvc_regs(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + int i; + u64 *ptr, offset = 0; + unsigned int count; + size_t (*func)(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) = gen7_legacy_snapshot_mvc; + + if (CD_SCRIPT_CHECK(device)) { + for (i = 0; i < ARRAY_SIZE(gen7_clusters); i++) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_MVC_V2, snapshot, func, &gen7_clusters[i]); + return; + } + + /* Build the crash script */ + ptr = gen7_capturescript->hostptr; + + for (i = 0; i < ARRAY_SIZE(gen7_clusters); i++) { + struct gen7_cluster_registers *cluster = &gen7_clusters[i]; + const u32 *regs = cluster->regs; + + cluster->offset = offset; + ptr += CD_WRITE(ptr, GEN7_CP_APERTURE_CNTL_CD, GEN7_CP_APERTURE_REG_VAL + (cluster->pipe_id, cluster->cluster_id, cluster->context_id)); + + if (cluster->sel) + ptr += CD_WRITE(ptr, cluster->sel->cd_reg, cluster->sel->val); + + for (; regs[0] != UINT_MAX; regs += 2) { + count = REG_COUNT(regs); + + ptr += CD_READ(ptr, regs[0], + count, (gen7_crashdump_registers->gpuaddr + offset)); + + offset += count * sizeof(unsigned int); + } + } + + /* Marker for end of script */ + CD_FINISH(ptr, offset); + + /* Try to run the crash dumper */ + if (_gen7_do_crashdump(device)) + func = gen7_snapshot_mvc; + + for (i = 0; i < ARRAY_SIZE(gen7_clusters); i++) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_MVC_V2, snapshot, func, &gen7_clusters[i]); +} + +/* gen7_dbgc_debug_bus_read() - Read data from trace bus */ +static void gen7_dbgc_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index, unsigned int *val) +{ + unsigned int reg; + + reg = FIELD_PREP(GENMASK(7, 0), index) | + FIELD_PREP(GENMASK(24, 16), block_id); + + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_A, reg); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_B, reg); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_C, reg); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_D, reg); + + /* + * There needs to be a delay of 1 us to ensure enough time for correct + * data is funneled into the trace buffer + */ + udelay(1); + + kgsl_regread(device, GEN7_DBGC_CFG_DBGBUS_TRACE_BUF2, val); + val++; + kgsl_regread(device, GEN7_DBGC_CFG_DBGBUS_TRACE_BUF1, val); +} + +/* gen7_snapshot_dbgc_debugbus_block() - Capture debug data for a gpu block */ +static size_t gen7_snapshot_dbgc_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header = + (struct kgsl_snapshot_debugbus *)buf; + const u32 *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + + if (remain < GEN7_DEBUGBUS_SECTION_SIZE) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = *block; + header->count = GEN7_DEBUGBUS_BLOCK_SIZE * 2; + + for (i = 0; i < GEN7_DEBUGBUS_BLOCK_SIZE; i++) + gen7_dbgc_debug_bus_read(device, *block, i, &data[i*2]); + + return GEN7_DEBUGBUS_SECTION_SIZE; +} + +static u32 gen7_dbgc_side_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index) +{ + u32 val; + unsigned int reg = FIELD_PREP(GENMASK(7, 0), index) | + FIELD_PREP(GENMASK(24, 16), block_id); + + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_A, reg); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_B, reg); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_C, reg); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_D, reg); + + /* + * There needs to be a delay of 1 us to ensure enough time for correct + * data is funneled into the trace buffer + */ + udelay(1); + + val = kgsl_regmap_read(&device->regmap, GEN7_DBGC_CFG_DBGBUS_OVER); + + return FIELD_GET(GENMASK(27, 24), val); +} + +static size_t gen7_snapshot_dbgc_side_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_side_debugbus *header = + (struct kgsl_snapshot_side_debugbus *)buf; + const u32 *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + size_t size = (GEN7_DEBUGBUS_BLOCK_SIZE * sizeof(unsigned int)) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = *block; + header->size = GEN7_DEBUGBUS_BLOCK_SIZE; + header->valid_data = 0x4; + + for (i = 0; i < GEN7_DEBUGBUS_BLOCK_SIZE; i++) + data[i] = gen7_dbgc_side_debug_bus_read(device, *block, i); + + return size; +} + +/* gen7_cx_dbgc_debug_bus_read() - Read data from trace bus */ +static void gen7_cx_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index, unsigned int *val) +{ + unsigned int reg; + + reg = FIELD_PREP(GENMASK(7, 0), index) | + FIELD_PREP(GENMASK(24, 16), block_id); + + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_A, reg); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_B, reg); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_C, reg); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_D, reg); + + /* + * There needs to be a delay of 1 us to ensure enough time for correct + * data is funneled into the trace buffer + */ + udelay(1); + + adreno_cx_dbgc_regread(device, GEN7_CX_DBGC_CFG_DBGBUS_TRACE_BUF2, val); + val++; + adreno_cx_dbgc_regread(device, GEN7_CX_DBGC_CFG_DBGBUS_TRACE_BUF1, val); +} + +/* + * gen7_snapshot_cx_dbgc_debugbus_block() - Capture debug data for a gpu + * block from the CX DBGC block + */ +static size_t gen7_snapshot_cx_dbgc_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header = + (struct kgsl_snapshot_debugbus *)buf; + const u32 *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + + if (remain < GEN7_DEBUGBUS_SECTION_SIZE) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = *block; + header->count = GEN7_DEBUGBUS_BLOCK_SIZE * 2; + + for (i = 0; i < GEN7_DEBUGBUS_BLOCK_SIZE; i++) + gen7_cx_debug_bus_read(device, *block, i, &data[i*2]); + + return GEN7_DEBUGBUS_SECTION_SIZE; +} + +/* gen7_cx_side_dbgc_debug_bus_read() - Read data from trace bus */ +static void gen7_cx_side_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index, unsigned int *val) +{ + unsigned int reg = FIELD_PREP(GENMASK(7, 0), index) | + FIELD_PREP(GENMASK(24, 16), block_id); + + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_A, reg); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_B, reg); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_C, reg); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_D, reg); + + /* + * There needs to be a delay of 1 us to ensure enough time for correct + * data is funneled into the trace buffer + */ + udelay(1); + + adreno_cx_dbgc_regread(device, GEN7_CX_DBGC_CFG_DBGBUS_OVER, ®); + *val = FIELD_GET(GENMASK(27, 24), reg); +} + +/* + * gen7_snapshot_cx_dbgc_debugbus_block() - Capture debug data for a gpu + * block from the CX DBGC block + */ +static size_t gen7_snapshot_cx_side_dbgc_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_side_debugbus *header = + (struct kgsl_snapshot_side_debugbus *)buf; + const u32 *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + size_t size = (GEN7_DEBUGBUS_BLOCK_SIZE * sizeof(unsigned int)) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = *block; + header->size = GEN7_DEBUGBUS_BLOCK_SIZE; + header->valid_data = 0x4; + + for (i = 0; i < GEN7_DEBUGBUS_BLOCK_SIZE; i++) + gen7_cx_side_debug_bus_read(device, *block, i, &data[i]); + + return size; +} + +/* gen7_snapshot_debugbus() - Capture debug bus data */ +static void gen7_snapshot_debugbus(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + int i; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_CNTLT, + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_CNTLM, + FIELD_PREP(GENMASK(27, 24), 0xf)); + + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_IVTL_0, 0); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_IVTL_1, 0); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_IVTL_2, 0); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_IVTL_3, 0); + + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_BYTEL_0, + FIELD_PREP(GENMASK(3, 0), 0x0) | + FIELD_PREP(GENMASK(7, 4), 0x1) | + FIELD_PREP(GENMASK(11, 8), 0x2) | + FIELD_PREP(GENMASK(15, 12), 0x3) | + FIELD_PREP(GENMASK(19, 16), 0x4) | + FIELD_PREP(GENMASK(23, 20), 0x5) | + FIELD_PREP(GENMASK(27, 24), 0x6) | + FIELD_PREP(GENMASK(31, 28), 0x7)); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_BYTEL_1, + FIELD_PREP(GENMASK(3, 0), 0x8) | + FIELD_PREP(GENMASK(7, 4), 0x9) | + FIELD_PREP(GENMASK(11, 8), 0xa) | + FIELD_PREP(GENMASK(15, 12), 0xb) | + FIELD_PREP(GENMASK(19, 16), 0xc) | + FIELD_PREP(GENMASK(23, 20), 0xd) | + FIELD_PREP(GENMASK(27, 24), 0xe) | + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_MASKL_0, 0); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_MASKL_1, 0); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_MASKL_2, 0); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_MASKL_3, 0); + + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_CNTLT, + FIELD_PREP(GENMASK(31, 28), 0xf)); + + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_CNTLM, + FIELD_PREP(GENMASK(27, 24), 0xf)); + + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); + + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_BYTEL_0, + FIELD_PREP(GENMASK(3, 0), 0x0) | + FIELD_PREP(GENMASK(7, 4), 0x1) | + FIELD_PREP(GENMASK(11, 8), 0x2) | + FIELD_PREP(GENMASK(15, 12), 0x3) | + FIELD_PREP(GENMASK(19, 16), 0x4) | + FIELD_PREP(GENMASK(23, 20), 0x5) | + FIELD_PREP(GENMASK(27, 24), 0x6) | + FIELD_PREP(GENMASK(31, 28), 0x7)); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_BYTEL_1, + FIELD_PREP(GENMASK(3, 0), 0x8) | + FIELD_PREP(GENMASK(7, 4), 0x9) | + FIELD_PREP(GENMASK(11, 8), 0xa) | + FIELD_PREP(GENMASK(15, 12), 0xb) | + FIELD_PREP(GENMASK(19, 16), 0xc) | + FIELD_PREP(GENMASK(23, 20), 0xd) | + FIELD_PREP(GENMASK(27, 24), 0xe) | + FIELD_PREP(GENMASK(31, 28), 0xf)); + + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); + + for (i = 0; i < ARRAY_SIZE(gen7_debugbus_blocks); i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen7_snapshot_dbgc_debugbus_block, + (void *) &gen7_debugbus_blocks[i]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen7_snapshot_dbgc_side_debugbus_block, + (void *) &gen7_debugbus_blocks[i]); + } + + /* + * GBIF has same debugbus as of other GPU blocks hence fall back to + * default path if GPU uses GBIF. + * GBIF uses exactly same ID as of VBIF so use it as it is. + */ + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen7_snapshot_dbgc_debugbus_block, + (void *) &gen7_gbif_debugbus_blocks[0]); + + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen7_snapshot_dbgc_debugbus_block, + (void *) &gen7_gbif_debugbus_blocks[1]); + + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen7_snapshot_dbgc_side_debugbus_block, + (void *) &gen7_gbif_debugbus_blocks[0]); + + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen7_snapshot_dbgc_side_debugbus_block, + (void *) &gen7_gbif_debugbus_blocks[1]); + + /* Dump the CX debugbus data if the block exists */ + if (adreno_is_cx_dbgc_register(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_A)) { + for (i = 0; i < ARRAY_SIZE(gen7_cx_dbgc_debugbus_blocks); i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen7_snapshot_cx_dbgc_debugbus_block, + (void *) &gen7_cx_dbgc_debugbus_blocks[i]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen7_snapshot_cx_side_dbgc_debugbus_block, + (void *) &gen7_cx_dbgc_debugbus_blocks[i]); + } + /* + * Get debugbus for GBIF CX part if GPU has GBIF block + * GBIF uses exactly same ID as of VBIF so use + * it as it is. + */ + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, snapshot, + gen7_snapshot_cx_dbgc_debugbus_block, + (void *) &gen7_gbif_debugbus_blocks[0]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, snapshot, + gen7_snapshot_cx_side_dbgc_debugbus_block, + (void *) &gen7_gbif_debugbus_blocks[0]); + } +} + + + +/* gen7_snapshot_sqe() - Dump SQE data in snapshot */ +static size_t gen7_snapshot_sqe(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); + + if (remain < DEBUG_SECTION_SZ(1)) { + SNAPSHOT_ERR_NOMEM(device, "SQE VERSION DEBUG"); + return 0; + } + + /* Dump the SQE firmware version */ + header->type = SNAPSHOT_DEBUG_SQE_VERSION; + header->size = 1; + *data = fw->version; + + return DEBUG_SECTION_SZ(1); +} + +/* Snapshot the preemption related buffers */ +static size_t snapshot_preemption_record(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_memdesc *memdesc = priv; + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)buf; + u8 *ptr = buf + sizeof(*header); + const struct adreno_gen7_core *gpucore = to_gen7_core(ADRENO_DEVICE(device)); + u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES; + + if (gpucore->ctxt_record_size) + ctxt_record_size = gpucore->ctxt_record_size; + + ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size); + + if (remain < (ctxt_record_size + sizeof(*header))) { + SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); + return 0; + } + + header->size = ctxt_record_size >> 2; + header->gpuaddr = memdesc->gpuaddr; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + memcpy(ptr, memdesc->hostptr, ctxt_record_size); + + return ctxt_record_size + sizeof(*header); +} + +static void gen7_reglist_snapshot(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + u64 *ptr, offset = 0; + int i; + u32 r; + size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, + void *priv) = gen7_legacy_snapshot_registers; + + if (CD_SCRIPT_CHECK(device)) { + for (i = 0; i < ARRAY_SIZE(gen7_reg_list); i++) + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, func, &gen7_reg_list[i]); + return; + } + + /* Build the crash script */ + ptr = (u64 *)gen7_capturescript->hostptr; + + for (i = 0; i < ARRAY_SIZE(gen7_reg_list); i++) { + struct reg_list *regs = &gen7_reg_list[i]; + const u32 *regs_ptr = regs->regs; + + regs->offset = offset; + + /* Program the SEL_CNTL_CD register appropriately */ + if (regs->sel) + ptr += CD_WRITE(ptr, regs->sel->cd_reg, regs->sel->val); + + for (; regs_ptr[0] != UINT_MAX; regs_ptr += 2) { + r = REG_COUNT(regs_ptr); + ptr += CD_READ(ptr, regs_ptr[0], r, + (gen7_crashdump_registers->gpuaddr + offset)); + offset += r * sizeof(u32); + } + } + + /* Marker for end of script */ + CD_FINISH(ptr, offset); + + /* Try to run the crash dumper */ + if (_gen7_do_crashdump(device)) + func = gen7_snapshot_registers; + + for (i = 0; i < ARRAY_SIZE(gen7_reg_list); i++) + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, func, &gen7_reg_list[i]); + +} + +static void gen7_snapshot_br_roq(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + unsigned int roq_size; + + /* + * CP ROQ dump units is 4 dwords. The number of units is stored + * in CP_ROQ_THRESHOLDS_2[31:20], but it is not accessible to + * host. Program the GEN7_CP_SQE_UCODE_DBG_ADDR with 0x70d3 offset + * and read the value CP_ROQ_THRESHOLDS_2 from + * GEN7_CP_SQE_UCODE_DBG_DATA + */ + kgsl_regwrite(device, GEN7_CP_SQE_UCODE_DBG_ADDR, 0x70d3); + kgsl_regread(device, GEN7_CP_SQE_UCODE_DBG_DATA, &roq_size); + roq_size = roq_size >> 20; + kgsl_snapshot_indexed_registers(device, snapshot, + GEN7_CP_ROQ_DBG_ADDR, GEN7_CP_ROQ_DBG_DATA, 0, (roq_size << 2)); +} + +static void gen7_snapshot_bv_roq(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + unsigned int roq_size; + + /* + * CP ROQ dump units is 4 dwords. The number of units is stored + * in CP_BV_ROQ_THRESHOLDS_2[31:20], but it is not accessible to + * host. Program the GEN7_CP_BV_SQE_UCODE_DBG_ADDR with 0x70d3 offset + * (at which CP stores the roq values) and read the value of + * CP_BV_ROQ_THRESHOLDS_2 from GEN7_CP_BV_SQE_UCODE_DBG_DATA + */ + kgsl_regwrite(device, GEN7_CP_BV_SQE_UCODE_DBG_ADDR, 0x70d3); + kgsl_regread(device, GEN7_CP_BV_SQE_UCODE_DBG_DATA, &roq_size); + roq_size = roq_size >> 20; + kgsl_snapshot_indexed_registers(device, snapshot, + GEN7_CP_BV_ROQ_DBG_ADDR, GEN7_CP_BV_ROQ_DBG_DATA, 0, (roq_size << 2)); +} + +static void gen7_snapshot_lpac_roq(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + unsigned int roq_size; + + /* + * CP ROQ dump units is 4 dwords. The number of units is stored + * in CP_LPAC_ROQ_THRESHOLDS_2[31:20], but it is not accessible to + * host. Program the GEN7_CP_SQE_AC_UCODE_DBG_ADDR with 0x70d3 offset + * (at which CP stores the roq values) and read the value of + * CP_LPAC_ROQ_THRESHOLDS_2 from GEN7_CP_SQE_AC_UCODE_DBG_DATA + */ + kgsl_regwrite(device, GEN7_CP_SQE_AC_UCODE_DBG_ADDR, 0x70d3); + kgsl_regread(device, GEN7_CP_SQE_AC_UCODE_DBG_DATA, &roq_size); + roq_size = roq_size >> 20; + kgsl_snapshot_indexed_registers(device, snapshot, + GEN7_CP_LPAC_ROQ_DBG_ADDR, GEN7_CP_LPAC_ROQ_DBG_DATA, 0, (roq_size << 2)); +} + +/* + * gen7_snapshot() - GEN7 GPU snapshot function + * @adreno_dev: Device being snapshotted + * @snapshot: Pointer to the snapshot instance + * + * This is where all of the GEN7 specific bits and pieces are grabbed + * into the snapshot memory + */ +void gen7_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb; + unsigned int i; + u32 hi, lo, cgc, cgc1, cgc2; + + /* + * Dump debugbus data here to capture it for both + * GMU and GPU snapshot. Debugbus data can be accessed + * even if the gx headswitch is off. If gx + * headswitch is off, data for gx blocks will show as + * 0x5c00bd00. Disable clock gating for SP and TP to capture + * debugbus data. + */ + if (device->ftbl->is_hwcg_on(device)) { + kgsl_regread(device, GEN7_RBBM_CLOCK_CNTL2_SP0, &cgc); + kgsl_regread(device, GEN7_RBBM_CLOCK_CNTL_TP0, &cgc1); + kgsl_regread(device, GEN7_RBBM_CLOCK_CNTL3_TP0, &cgc2); + kgsl_regrmw(device, GEN7_RBBM_CLOCK_CNTL2_SP0, GENMASK(22, 20), 0); + kgsl_regrmw(device, GEN7_RBBM_CLOCK_CNTL_TP0, GENMASK(2, 0), 0); + kgsl_regrmw(device, GEN7_RBBM_CLOCK_CNTL3_TP0, GENMASK(14, 12), 0); + } + + gen7_snapshot_debugbus(adreno_dev, snapshot); + + /* Restore the value of the clockgating registers */ + if (device->ftbl->is_hwcg_on(device)) { + kgsl_regwrite(device, GEN7_RBBM_CLOCK_CNTL2_SP0, cgc); + kgsl_regwrite(device, GEN7_RBBM_CLOCK_CNTL_TP0, cgc1); + kgsl_regwrite(device, GEN7_RBBM_CLOCK_CNTL3_TP0, cgc2); + } + + if (!gmu_core_dev_gx_is_on(device)) + return; + + kgsl_regread(device, GEN7_CP_IB1_BASE, &lo); + kgsl_regread(device, GEN7_CP_IB1_BASE_HI, &hi); + + snapshot->ib1base = (((u64) hi) << 32) | lo; + + kgsl_regread(device, GEN7_CP_IB2_BASE, &lo); + kgsl_regread(device, GEN7_CP_IB2_BASE_HI, &hi); + + snapshot->ib2base = (((u64) hi) << 32) | lo; + + kgsl_regread(device, GEN7_CP_IB1_REM_SIZE, &snapshot->ib1size); + kgsl_regread(device, GEN7_CP_IB2_REM_SIZE, &snapshot->ib2size); + + /* Assert the isStatic bit before triggering snapshot */ + kgsl_regwrite(device, GEN7_RBBM_SNAPSHOT_STATUS, 0x1); + + /* Dump the registers which get affected by crash dumper trigger */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, adreno_snapshot_registers_v2, + (void *)gen7_pre_crashdumper_registers); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, adreno_snapshot_registers_v2, + (void *)gen7_gpucc_registers); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, adreno_snapshot_registers_v2, + (void *)gen7_cpr_registers); + + gen7_reglist_snapshot(device, snapshot); + + /* + * Need to program and save this register before capturing resource table + * to workaround a CGC issue + */ + if (device->ftbl->is_hwcg_on(device)) { + kgsl_regread(device, GEN7_RBBM_CLOCK_MODE_CP, &cgc); + kgsl_regrmw(device, GEN7_RBBM_CLOCK_MODE_CP, 0x7, 0); + } + kgsl_snapshot_indexed_registers(device, snapshot, + GEN7_CP_RESOURCE_TBL_DBG_ADDR, GEN7_CP_RESOURCE_TBL_DBG_DATA, + 0, 0x4100); + + /* Reprogram the register back to the original stored value */ + if (device->ftbl->is_hwcg_on(device)) + kgsl_regwrite(device, GEN7_RBBM_CLOCK_MODE_CP, cgc); + + for (i = 0; i < ARRAY_SIZE(gen7_cp_indexed_reg_list); i++) + kgsl_snapshot_indexed_registers(device, snapshot, + gen7_cp_indexed_reg_list[i].addr, + gen7_cp_indexed_reg_list[i].data, 0, + gen7_cp_indexed_reg_list[i].size); + + gen7_snapshot_br_roq(device, snapshot); + + gen7_snapshot_bv_roq(device, snapshot); + + gen7_snapshot_lpac_roq(device, snapshot); + + /* SQE Firmware */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, gen7_snapshot_sqe, NULL); + + /* Mempool debug data */ + gen7_snapshot_mempool(device, snapshot); + + /* Shader memory */ + gen7_snapshot_shader(device, snapshot); + + /* MVC register section */ + gen7_snapshot_mvc_regs(device, snapshot); + + /* registers dumped through DBG AHB */ + gen7_snapshot_dbgahb_regs(device, snapshot); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, adreno_snapshot_registers_v2, + (void *)gen7_post_crashdumper_registers); + + kgsl_regwrite(device, GEN7_RBBM_SNAPSHOT_STATUS, 0x0); + + /* Preemption record */ + if (adreno_is_preemption_enabled(adreno_dev)) { + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, snapshot_preemption_record, + rb->preemption_desc); + } + } +} + +void gen7_crashdump_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (IS_ERR_OR_NULL(gen7_capturescript)) + gen7_capturescript = kgsl_allocate_global(device, + 4 * PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY, + KGSL_MEMDESC_PRIVILEGED, "capturescript"); + + if (IS_ERR(gen7_capturescript)) + return; + + if (IS_ERR_OR_NULL(gen7_crashdump_registers)) + gen7_crashdump_registers = kgsl_allocate_global(device, + 300 * PAGE_SIZE, 0, 0, KGSL_MEMDESC_PRIVILEGED, + "capturescript_regs"); + + if (IS_ERR(gen7_crashdump_registers)) + return; +} diff --git a/adreno_gen7_snapshot.h b/adreno_gen7_snapshot.h new file mode 100644 index 0000000000..93c0144414 --- /dev/null +++ b/adreno_gen7_snapshot.h @@ -0,0 +1,1311 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_GEN7_SNAPSHOT_H +#define __ADRENO_GEN7_SNAPSHOT_H + +#include "adreno.h" +#include "adreno_gen7.h" + +#define PIPE_NONE 0 +#define PIPE_BR 1 +#define PIPE_BV 2 +#define PIPE_LPAC 3 + +#define CLUSTER_NONE 0 +#define CLUSTER_FE 1 +#define CLUSTER_SP_VS 2 +#define CLUSTER_PC_VS 3 +#define CLUSTER_GRAS 4 +#define CLUSTER_SP_PS 5 +#define CLUSTER_VPC_PS 6 +#define CLUSTER_PS 7 + +#define HLSQ_State 0 +#define HLSQ_DP 1 +#define SP_TOP 2 +#define USPTP 3 + +#define STATE_NON_CONTEXT 0 +#define STATE_TOGGLE_CTXT 1 +#define STATE_FORCE_CTXT_0 2 +#define STATE_FORCE_CTXT_1 3 + +enum gen7_debugbus_ids { + DEBUGBUS_CP_0_0 = 1, + DEBUGBUS_CP_0_1 = 2, + DEBUGBUS_RBBM = 3, + DEBUGBUS_GBIF_GX = 5, + DEBUGBUS_GBIF_CX = 6, + DEBUGBUS_HLSQ = 7, + DEBUGBUS_UCHE_0 = 9, + DEBUGBUS_TESS_BR = 13, + DEBUGBUS_TESS_BV = 14, + DEBUGBUS_PC_BR = 17, + DEBUGBUS_PC_BV = 18, + DEBUGBUS_VFDP_BR = 21, + DEBUGBUS_VFDP_BV = 22, + DEBUGBUS_VPC_BR = 25, + DEBUGBUS_VPC_BV = 26, + DEBUGBUS_TSE_BR = 29, + DEBUGBUS_TSE_BV = 30, + DEBUGBUS_RAS_BR = 33, + DEBUGBUS_RAS_BV = 34, + DEBUGBUS_VSC = 37, + DEBUGBUS_COM_0 = 39, + DEBUGBUS_LRZ_BR = 43, + DEBUGBUS_LRZ_BV = 44, + DEBUGBUS_UFC_0 = 47, + DEBUGBUS_UFC_1 = 48, + DEBUGBUS_GMU_GX = 55, + DEBUGBUS_DBGC = 59, + DEBUGBUS_CX = 60, + DEBUGBUS_GMU_CX = 61, + DEBUGBUS_GPC_BR = 62, + DEBUGBUS_GPC_BV = 63, + DEBUGBUS_LARC = 66, + DEBUGBUS_HLSQ_SPTP = 68, + DEBUGBUS_RB_0 = 70, + DEBUGBUS_RB_1 = 71, + DEBUGBUS_RB_2 = 72, + DEBUGBUS_RB_3 = 73, + DEBUGBUS_UCHE_WRAPPER = 102, + DEBUGBUS_CCU_0 = 106, + DEBUGBUS_CCU_1 = 107, + DEBUGBUS_CCU_2 = 108, + DEBUGBUS_CCU_3 = 109, + DEBUGBUS_VFD_BR_0 = 138, + DEBUGBUS_VFD_BR_1 = 139, + DEBUGBUS_VFD_BR_2 = 140, + DEBUGBUS_VFD_BR_3 = 141, + DEBUGBUS_VFD_BR_4 = 142, + DEBUGBUS_VFD_BR_5 = 143, + DEBUGBUS_VFD_BR_6 = 144, + DEBUGBUS_VFD_BR_7 = 145, + DEBUGBUS_VFD_BV_0 = 202, + DEBUGBUS_VFD_BV_1 = 203, + DEBUGBUS_VFD_BV_2 = 204, + DEBUGBUS_VFD_BV_3 = 205, + DEBUGBUS_USP_0 = 234, + DEBUGBUS_USP_1 = 235, + DEBUGBUS_USP_2 = 236, + DEBUGBUS_USP_3 = 237, + DEBUGBUS_TP_0 = 266, + DEBUGBUS_TP_1 = 267, + DEBUGBUS_TP_2 = 268, + DEBUGBUS_TP_3 = 269, + DEBUGBUS_TP_4 = 270, + DEBUGBUS_TP_5 = 271, + DEBUGBUS_TP_6 = 272, + DEBUGBUS_TP_7 = 273, + DEBUGBUS_USPTP_0 = 330, + DEBUGBUS_USPTP_1 = 331, + DEBUGBUS_USPTP_2 = 332, + DEBUGBUS_USPTP_3 = 333, + DEBUGBUS_USPTP_4 = 334, + DEBUGBUS_USPTP_5 = 335, + DEBUGBUS_USPTP_6 = 336, + DEBUGBUS_USPTP_7 = 337, +}; + +static const u32 gen7_debugbus_blocks[] = { + DEBUGBUS_CP_0_0, + DEBUGBUS_CP_0_1, + DEBUGBUS_RBBM, + DEBUGBUS_HLSQ, + DEBUGBUS_UCHE_0, + DEBUGBUS_TESS_BR, + DEBUGBUS_TESS_BV, + DEBUGBUS_PC_BR, + DEBUGBUS_PC_BV, + DEBUGBUS_VFDP_BR, + DEBUGBUS_VFDP_BV, + DEBUGBUS_VPC_BR, + DEBUGBUS_VPC_BV, + DEBUGBUS_TSE_BR, + DEBUGBUS_TSE_BV, + DEBUGBUS_RAS_BR, + DEBUGBUS_RAS_BV, + DEBUGBUS_VSC, + DEBUGBUS_COM_0, + DEBUGBUS_LRZ_BR, + DEBUGBUS_LRZ_BV, + DEBUGBUS_UFC_0, + DEBUGBUS_UFC_1, + DEBUGBUS_GMU_GX, + DEBUGBUS_DBGC, + DEBUGBUS_GPC_BR, + DEBUGBUS_GPC_BV, + DEBUGBUS_LARC, + DEBUGBUS_HLSQ_SPTP, + DEBUGBUS_RB_0, + DEBUGBUS_RB_1, + DEBUGBUS_RB_2, + DEBUGBUS_RB_3, + DEBUGBUS_UCHE_WRAPPER, + DEBUGBUS_CCU_0, + DEBUGBUS_CCU_1, + DEBUGBUS_CCU_2, + DEBUGBUS_CCU_3, + DEBUGBUS_VFD_BR_0, + DEBUGBUS_VFD_BR_1, + DEBUGBUS_VFD_BR_2, + DEBUGBUS_VFD_BR_3, + DEBUGBUS_VFD_BR_4, + DEBUGBUS_VFD_BR_5, + DEBUGBUS_VFD_BR_6, + DEBUGBUS_VFD_BR_7, + DEBUGBUS_VFD_BV_0, + DEBUGBUS_VFD_BV_1, + DEBUGBUS_VFD_BV_2, + DEBUGBUS_VFD_BV_3, + DEBUGBUS_USP_0, + DEBUGBUS_USP_1, + DEBUGBUS_USP_2, + DEBUGBUS_USP_3, + DEBUGBUS_TP_0, + DEBUGBUS_TP_1, + DEBUGBUS_TP_2, + DEBUGBUS_TP_3, + DEBUGBUS_TP_4, + DEBUGBUS_TP_5, + DEBUGBUS_TP_6, + DEBUGBUS_TP_7, + DEBUGBUS_USPTP_0, + DEBUGBUS_USPTP_1, + DEBUGBUS_USPTP_2, + DEBUGBUS_USPTP_3, + DEBUGBUS_USPTP_4, + DEBUGBUS_USPTP_5, + DEBUGBUS_USPTP_6, + DEBUGBUS_USPTP_7, +}; + +enum gen7_statetype_ids { + TP0_NCTX_REG = 0, + TP0_CTX0_3D_CVS_REG = 1, + TP0_CTX0_3D_CPS_REG = 2, + TP0_CTX1_3D_CVS_REG = 3, + TP0_CTX1_3D_CPS_REG = 4, + TP0_CTX2_3D_CPS_REG = 5, + TP0_CTX3_3D_CPS_REG = 6, + TP0_TMO_DATA = 9, + TP0_SMO_DATA = 10, + TP0_MIPMAP_BASE_DATA = 11, + SP_NCTX_REG = 32, + SP_CTX0_3D_CVS_REG = 33, + SP_CTX0_3D_CPS_REG = 34, + SP_CTX1_3D_CVS_REG = 35, + SP_CTX1_3D_CPS_REG = 36, + SP_CTX2_3D_CPS_REG = 37, + SP_CTX3_3D_CPS_REG = 38, + SP_INST_DATA = 39, + SP_INST_DATA_1 = 40, + SP_LB_0_DATA = 41, + SP_LB_1_DATA = 42, + SP_LB_2_DATA = 43, + SP_LB_3_DATA = 44, + SP_LB_4_DATA = 45, + SP_LB_5_DATA = 46, + SP_LB_6_DATA = 47, + SP_LB_7_DATA = 48, + SP_CB_RAM = 49, + SP_INST_TAG = 52, + SP_INST_DATA_2 = 53, + SP_TMO_TAG = 54, + SP_SMO_TAG = 55, + SP_STATE_DATA = 56, + SP_HWAVE_RAM = 57, + SP_L0_INST_BUF = 58, + SP_LB_8_DATA = 59, + SP_LB_9_DATA = 60, + SP_LB_10_DATA = 61, + SP_LB_11_DATA = 62, + SP_LB_12_DATA = 63, + HLSQ_CVS_BE_CTXT_BUF_RAM_TAG = 69, + HLSQ_CPS_BE_CTXT_BUF_RAM_TAG = 70, + HLSQ_GFX_CVS_BE_CTXT_BUF_RAM = 71, + HLSQ_GFX_CPS_BE_CTXT_BUF_RAM = 72, + HLSQ_CHUNK_CVS_RAM = 73, + HLSQ_CHUNK_CPS_RAM = 74, + HLSQ_CHUNK_CVS_RAM_TAG = 75, + HLSQ_CHUNK_CPS_RAM_TAG = 76, + HLSQ_ICB_CVS_CB_BASE_TAG = 77, + HLSQ_ICB_CPS_CB_BASE_TAG = 78, + HLSQ_CVS_MISC_RAM = 79, + HLSQ_CPS_MISC_RAM = 80, + HLSQ_CPS_MISC_RAM_1 = 81, + HLSQ_INST_RAM = 82, + HLSQ_GFX_CVS_CONST_RAM = 83, + HLSQ_GFX_CPS_CONST_RAM = 84, + HLSQ_CVS_MISC_RAM_TAG = 85, + HLSQ_CPS_MISC_RAM_TAG = 86, + HLSQ_INST_RAM_TAG = 87, + HLSQ_GFX_CVS_CONST_RAM_TAG = 88, + HLSQ_GFX_CPS_CONST_RAM_TAG = 89, + HLSQ_INST_RAM_1 = 92, + HLSQ_STPROC_META = 93, + HLSQ_BV_BE_META = 94, + HLSQ_DATAPATH_META = 96, + HLSQ_FRONTEND_META = 97, + HLSQ_INDIRECT_META = 98, + HLSQ_BACKEND_META = 99, +}; + +static const struct sel_reg { + unsigned int host_reg; + unsigned int cd_reg; + unsigned int val; +} gen7_0_0_rb_rac_sel = { + .host_reg = GEN7_RB_RB_SUB_BLOCK_SEL_CNTL_HOST, + .cd_reg = GEN7_RB_RB_SUB_BLOCK_SEL_CNTL_CD, + .val = 0x0, +}, +gen7_0_0_rb_rbp_sel = { + .host_reg = GEN7_RB_RB_SUB_BLOCK_SEL_CNTL_HOST, + .cd_reg = GEN7_RB_RB_SUB_BLOCK_SEL_CNTL_CD, + .val = 0x9, +}; + +static const u32 gen7_pre_crashdumper_registers[] = { + 0x00210, 0x00210, 0x00212, 0x00213, 0x03c00, 0x03c0b, 0x03c40, 0x03c42, + 0x03c45, 0x03c47, 0x03c49, 0x03c4a, 0x03cc0, 0x03cd1, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_pre_crashdumper_registers), 8)); + +static const u32 gen7_post_crashdumper_registers[] = { + 0x00535, 0x00535, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_post_crashdumper_registers), 8)); + +static const u32 gen7_gpu_registers[] = { + 0x00000, 0x00000, 0x00002, 0x00002, 0x00011, 0x00012, 0x00016, 0x0001b, + 0x0001f, 0x00032, 0x00038, 0x0003c, 0x00042, 0x00042, 0x00044, 0x00044, + 0x00047, 0x00047, 0x00049, 0x0004a, 0x0004c, 0x0004c, 0x00050, 0x00050, + 0x00056, 0x00056, 0x00073, 0x00075, 0x000ad, 0x000ae, 0x000b0, 0x000b0, + 0x000b4, 0x000b4, 0x000b8, 0x000b8, 0x000bc, 0x000bc, 0x000c0, 0x000c0, + 0x000c4, 0x000c4, 0x000c8, 0x000c8, 0x000cc, 0x000cc, 0x000d0, 0x000d0, + 0x000d4, 0x000d4, 0x000d8, 0x000d8, 0x000dc, 0x000dc, 0x000e0, 0x000e0, + 0x000e4, 0x000e4, 0x000e8, 0x000e8, 0x000ec, 0x000ec, 0x000f0, 0x000f0, + 0x000f4, 0x000f4, 0x000f8, 0x000f8, 0x00100, 0x00100, 0x00104, 0x0010b, + 0x0010f, 0x0011d, 0x0012f, 0x0012f, 0x00200, 0x0020d, 0x00211, 0x00211, + 0x00215, 0x00243, 0x00260, 0x00268, 0x00272, 0x00274, 0x00281, 0x0028d, + 0x00300, 0x00401, 0x00410, 0x00451, 0x00460, 0x004a3, 0x004c0, 0x004d1, + 0x00500, 0x00500, 0x00507, 0x0050b, 0x0050f, 0x0050f, 0x00511, 0x00511, + 0x00533, 0x00534, 0x00536, 0x00536, 0x00540, 0x00555, 0x00564, 0x00567, + 0x00574, 0x00577, 0x005fb, 0x005ff, 0x00800, 0x00808, 0x00810, 0x00813, + 0x00820, 0x00821, 0x00823, 0x00827, 0x00830, 0x00834, 0x0083f, 0x00841, + 0x00843, 0x00847, 0x0084f, 0x00886, 0x008a0, 0x008ab, 0x008c0, 0x008c0, + 0x008c4, 0x008c5, 0x008d0, 0x008dd, 0x008e0, 0x008e6, 0x008f0, 0x008f3, + 0x00900, 0x00903, 0x00908, 0x00911, 0x00928, 0x0093e, 0x00942, 0x0094d, + 0x00980, 0x00984, 0x0098d, 0x0098f, 0x009b0, 0x009b4, 0x009c2, 0x009c9, + 0x009ce, 0x009d7, 0x009e0, 0x009e7, 0x00a00, 0x00a00, 0x00a02, 0x00a03, + 0x00a10, 0x00a4f, 0x00a61, 0x00a9f, 0x00ad0, 0x00adb, 0x00b00, 0x00b31, + 0x00b35, 0x00b3c, 0x00b40, 0x00b40, 0x00c00, 0x00c00, 0x00c02, 0x00c04, + 0x00c06, 0x00c06, 0x00c10, 0x00cd9, 0x00ce0, 0x00d0c, 0x00df0, 0x00df4, + 0x00e01, 0x00e02, 0x00e07, 0x00e0e, 0x00e10, 0x00e13, 0x00e17, 0x00e19, + 0x00e1b, 0x00e2b, 0x00e30, 0x00e32, 0x00e38, 0x00e3c, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_gpu_registers), 8)); + +static const u32 gen7_cx_misc_registers[] = { + 0x27800, 0x27800, 0x27810, 0x27814, 0x27820, 0x27824, 0x27832, 0x27857, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_cx_misc_registers), 8)); + +static const u32 gen7_cpr_registers[] = { + 0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c, + 0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850, + 0x26880, 0x26898, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee, + 0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f, + 0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274ac, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_cpr_registers), 8)); + +static const u32 gen7_dpm_registers[] = { + 0x1aa00, 0x1aa06, 0x1aa09, 0x1aa0a, 0x1aa0c, 0x1aa0d, 0x1aa0f, 0x1aa12, + 0x1aa14, 0x1aa47, 0x1aa50, 0x1aa51, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_dpm_registers), 8)); + +static const u32 gen7_gpucc_registers[] = { + 0x24000, 0x2400e, 0x24400, 0x2440e, 0x24800, 0x24805, 0x24c00, 0x24cff, + 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, 0x26400, 0x26405, + 0x26414, 0x2641d, 0x2642a, 0x26430, 0x26432, 0x26432, 0x26441, 0x26455, + 0x26466, 0x26468, 0x26478, 0x2647a, 0x26489, 0x2648a, 0x2649c, 0x2649e, + 0x264a0, 0x264a3, 0x264b3, 0x264b5, 0x264c5, 0x264c7, 0x264d6, 0x264d8, + 0x264e8, 0x264e9, 0x264f9, 0x264fc, 0x2650b, 0x2650c, 0x2651c, 0x2651e, + 0x26540, 0x26570, 0x26600, 0x26616, 0x26620, 0x2662d, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_gpucc_registers), 8)); + +static const u32 gen7_0_0_noncontext_pipe_br_registers[] = { + 0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b, + 0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640, + 0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a, + 0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16, + 0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31, + 0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79, + 0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f, + 0x0a630, 0x0a631, 0x0a638, 0x0a638, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_pipe_br_registers), 8)); + +static const u32 gen7_0_0_noncontext_pipe_bv_registers[] = { + 0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b, + 0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640, + 0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a, + 0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16, + 0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31, + 0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79, + 0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f, + 0x0a630, 0x0a631, 0x0a638, 0x0a638, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_pipe_bv_registers), 8)); + +static const u32 gen7_0_0_noncontext_pipe_lpac_registers[] = { + 0x00887, 0x0088c, 0x00f80, 0x00f80, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_pipe_lpac_registers), 8)); + +static const u32 gen7_0_0_noncontext_rb_rac_pipe_br_registers[] = { + 0x08e10, 0x08e1c, 0x08e20, 0x08e25, 0x08e51, 0x08e5a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_rb_rac_pipe_br_registers), 8)); + +static const u32 gen7_0_0_noncontext_rb_rbp_pipe_br_registers[] = { + 0x08e01, 0x08e01, 0x08e04, 0x08e04, 0x08e06, 0x08e09, 0x08e0c, 0x08e0c, + 0x08e28, 0x08e28, 0x08e2c, 0x08e35, 0x08e3b, 0x08e3f, 0x08e50, 0x08e50, + 0x08e5b, 0x08e5d, 0x08e5f, 0x08e5f, 0x08e61, 0x08e61, 0x08e63, 0x08e65, + 0x08e68, 0x08e68, 0x08e70, 0x08e79, 0x08e80, 0x08e8f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_rb_rbp_pipe_br_registers), 8)); + +/* Block: GRAS Cluster: CLUSTER_GRAS Pipeline: PIPE_BR */ +static const u32 gen7_0_0_gras_cluster_gras_pipe_br_registers[] = { + 0x08000, 0x08008, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d, + 0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa, + 0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08110, 0x08120, 0x0813f, + 0x08400, 0x08406, 0x0840a, 0x0840b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_gras_cluster_gras_pipe_br_registers), 8)); + +/* Block: GRAS Cluster: CLUSTER_GRAS Pipeline: PIPE_BV */ +static const u32 gen7_0_0_gras_cluster_gras_pipe_bv_registers[] = { + 0x08000, 0x08008, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d, + 0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa, + 0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08110, 0x08120, 0x0813f, + 0x08400, 0x08406, 0x0840a, 0x0840b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_gras_cluster_gras_pipe_bv_registers), 8)); + +/* Block: PC Cluster: CLUSTER_FE Pipeline: PIPE_BR */ +static const u32 gen7_0_0_pc_cluster_fe_pipe_br_registers[] = { + 0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886, + 0x09b00, 0x09b08, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_pc_cluster_fe_pipe_br_registers), 8)); + +/* Block: PC Cluster: CLUSTER_FE Pipeline: PIPE_BV */ +static const u32 gen7_0_0_pc_cluster_fe_pipe_bv_registers[] = { + 0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886, + 0x09b00, 0x09b08, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_pc_cluster_fe_pipe_bv_registers), 8)); + +/* Block: RB_RAC Cluster: CLUSTER_PS Pipeline: PIPE_BR */ +static const u32 gen7_0_0_rb_rac_cluster_ps_pipe_br_registers[] = { + 0x08802, 0x08802, 0x08804, 0x08806, 0x08809, 0x0880a, 0x0880e, 0x08811, + 0x08818, 0x0881e, 0x08821, 0x08821, 0x08823, 0x08826, 0x08829, 0x08829, + 0x0882b, 0x0882e, 0x08831, 0x08831, 0x08833, 0x08836, 0x08839, 0x08839, + 0x0883b, 0x0883e, 0x08841, 0x08841, 0x08843, 0x08846, 0x08849, 0x08849, + 0x0884b, 0x0884e, 0x08851, 0x08851, 0x08853, 0x08856, 0x08859, 0x08859, + 0x0885b, 0x0885e, 0x08860, 0x08864, 0x08870, 0x08870, 0x08873, 0x08876, + 0x08878, 0x08879, 0x08882, 0x08885, 0x08887, 0x08889, 0x08891, 0x08891, + 0x08898, 0x08898, 0x088c0, 0x088c1, 0x088e5, 0x088e5, 0x088f4, 0x088f5, + 0x08a00, 0x08a05, 0x08a10, 0x08a15, 0x08a20, 0x08a25, 0x08a30, 0x08a35, + 0x08c00, 0x08c01, 0x08c18, 0x08c1f, 0x08c26, 0x08c34, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_rb_rac_cluster_ps_pipe_br_registers), 8)); + +/* Block: RB_RBP Cluster: CLUSTER_PS Pipeline: PIPE_BR */ +static const u32 gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers[] = { + 0x08800, 0x08801, 0x08803, 0x08803, 0x0880b, 0x0880d, 0x08812, 0x08812, + 0x08820, 0x08820, 0x08822, 0x08822, 0x08827, 0x08828, 0x0882a, 0x0882a, + 0x0882f, 0x08830, 0x08832, 0x08832, 0x08837, 0x08838, 0x0883a, 0x0883a, + 0x0883f, 0x08840, 0x08842, 0x08842, 0x08847, 0x08848, 0x0884a, 0x0884a, + 0x0884f, 0x08850, 0x08852, 0x08852, 0x08857, 0x08858, 0x0885a, 0x0885a, + 0x0885f, 0x0885f, 0x08865, 0x08865, 0x08871, 0x08872, 0x08877, 0x08877, + 0x08880, 0x08881, 0x08886, 0x08886, 0x08890, 0x08890, 0x088d0, 0x088e4, + 0x088e8, 0x088ea, 0x088f0, 0x088f0, 0x08900, 0x0891a, 0x08927, 0x08928, + 0x08c17, 0x08c17, 0x08c20, 0x08c25, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: HLSQ_State */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers[] = { + 0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a99e, 0x0a9a7, 0x0a9a7, + 0x0a9aa, 0x0a9aa, 0x0a9ae, 0x0a9b0, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9ba, + 0x0a9bc, 0x0a9bc, 0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e0, 0x0a9fc, + 0x0aa00, 0x0aa00, 0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab03, + 0x0ab05, 0x0ab05, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: HLSQ_DP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers[] = { + 0x0a9b1, 0x0a9b1, 0x0a9c6, 0x0a9cb, 0x0a9d4, 0x0a9df, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: SP_TOP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers[] = { + 0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a9a2, 0x0a9a7, 0x0a9a8, + 0x0a9aa, 0x0a9aa, 0x0a9ae, 0x0a9ae, 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, + 0x0a9ba, 0x0a9bc, 0x0a9e0, 0x0a9f9, 0x0aa00, 0x0aa00, 0x0ab00, 0x0ab00, + 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: uSPTP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers[] = { + 0x0a980, 0x0a982, 0x0a985, 0x0a9a6, 0x0a9a8, 0x0a9a9, 0x0a9ab, 0x0a9ae, + 0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9bf, 0x0a9c2, 0x0a9c3, + 0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, + 0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV Location: HLSQ_State */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_bv_hlsq_state_registers[] = { + 0x0ab00, 0x0ab02, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_bv_hlsq_state_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV Location: SP_TOP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_bv_sp_top_registers[] = { + 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_bv_sp_top_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV Location: uSPTP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_bv_usptp_registers[] = { + 0x0ab00, 0x0ab02, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_bv_usptp_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: HLSQ_State */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers[] = { + 0x0a9b0, 0x0a9b0, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc, + 0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9fc, + 0x0aa00, 0x0aa00, 0x0aa31, 0x0aa31, 0x0ab00, 0x0ab01, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: HLSQ_DP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_dp_registers[] = { + 0x0a9b1, 0x0a9b1, 0x0a9d4, 0x0a9df, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_dp_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: SP_TOP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers[] = { + 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9bc, 0x0a9e2, 0x0a9e3, + 0x0a9e6, 0x0a9f9, 0x0aa00, 0x0aa00, 0x0ab00, 0x0ab00, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: uSPTP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers[] = { + 0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9be, 0x0a9c2, 0x0a9c3, + 0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa31, 0x0aa31, 0x0ab00, 0x0ab01, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR Location: HLSQ_State */ +static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers[] = { + 0x0a800, 0x0a800, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824, + 0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a833, 0x0a835, 0x0a83a, 0x0a83a, + 0x0a83c, 0x0a83c, 0x0a83f, 0x0a840, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862, + 0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a88c, 0x0a88e, + 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898, 0x0a89a, 0x0a89d, + 0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab03, 0x0ab05, 0x0ab05, + 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR Location: SP_TOP */ +static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers[] = { + 0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a830, 0x0a831, + 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840, 0x0a85c, 0x0a85d, + 0x0a862, 0x0a864, 0x0a870, 0x0a871, 0x0a88d, 0x0a88e, 0x0a893, 0x0a895, + 0x0a8a0, 0x0a8af, 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, + 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR Location: uSPTP */ +static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers[] = { + 0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a830, 0x0a833, + 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861, 0x0a863, 0x0a867, + 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a898, 0x0a8c0, 0x0a8c3, + 0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV Location: HLSQ_State */ +static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers[] = { + 0x0a800, 0x0a800, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824, + 0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a833, 0x0a835, 0x0a83a, 0x0a83a, + 0x0a83c, 0x0a83c, 0x0a83f, 0x0a840, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862, + 0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a88c, 0x0a88e, + 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898, 0x0a89a, 0x0a89d, + 0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab02, 0x0ab0a, 0x0ab1b, + 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV Location: SP_TOP */ +static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers[] = { + 0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a830, 0x0a831, + 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840, 0x0a85c, 0x0a85d, + 0x0a862, 0x0a864, 0x0a870, 0x0a871, 0x0a88d, 0x0a88e, 0x0a893, 0x0a895, + 0x0a8a0, 0x0a8af, 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab0a, 0x0ab1b, + 0x0ab20, 0x0ab20, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV Location: uSPTP */ +static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers[] = { + 0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a830, 0x0a833, + 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861, 0x0a863, 0x0a867, + 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a898, 0x0a8c0, 0x0a8c3, + 0x0ab00, 0x0ab02, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers), 8)); + +/* Block: TPL1 Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR */ +static const u32 gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers[] = { + 0x0b180, 0x0b183, 0x0b190, 0x0b195, 0x0b2c0, 0x0b2d5, 0x0b300, 0x0b307, + 0x0b309, 0x0b309, 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers), 8)); + +/* Block: TPL1 Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV */ +static const u32 gen7_0_0_tpl1_cluster_sp_ps_pipe_bv_registers[] = { + 0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_ps_pipe_bv_registers), 8)); + +/* Block: TPL1 Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC */ +static const u32 gen7_0_0_tpl1_cluster_sp_ps_pipe_lpac_registers[] = { + 0x0b180, 0x0b181, 0x0b300, 0x0b301, 0x0b307, 0x0b307, 0x0b309, 0x0b309, + 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_ps_pipe_lpac_registers), 8)); + +/* Block: TPL1 Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR */ +static const u32 gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers[] = { + 0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers), 8)); + +/* Block: TPL1 Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV */ +static const u32 gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers[] = { + 0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers), 8)); + +/* Block: VFD Cluster: CLUSTER_FE Pipeline: PIPE_BR */ +static const u32 gen7_0_0_vfd_cluster_fe_pipe_br_registers[] = { + 0x0a000, 0x0a009, 0x0a00e, 0x0a0ef, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vfd_cluster_fe_pipe_br_registers), 8)); + +/* Block: VFD Cluster: CLUSTER_FE Pipeline: PIPE_BV */ +static const u32 gen7_0_0_vfd_cluster_fe_pipe_bv_registers[] = { + 0x0a000, 0x0a009, 0x0a00e, 0x0a0ef, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vfd_cluster_fe_pipe_bv_registers), 8)); + +/* Block: VPC Cluster: CLUSTER_FE Pipeline: PIPE_BR */ +static const u32 gen7_0_0_vpc_cluster_fe_pipe_br_registers[] = { + 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_fe_pipe_br_registers), 8)); + +/* Block: VPC Cluster: CLUSTER_FE Pipeline: PIPE_BV */ +static const u32 gen7_0_0_vpc_cluster_fe_pipe_bv_registers[] = { + 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_fe_pipe_bv_registers), 8)); + +/* Block: VPC Cluster: CLUSTER_PC_VS Pipeline: PIPE_BR */ +static const u32 gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers[] = { + 0x09101, 0x0910c, 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers), 8)); + +/* Block: VPC Cluster: CLUSTER_PC_VS Pipeline: PIPE_BV */ +static const u32 gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers[] = { + 0x09101, 0x0910c, 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers), 8)); + +/* Block: VPC Cluster: CLUSTER_VPC_PS Pipeline: PIPE_BR */ +static const u32 gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers[] = { + 0x09200, 0x0920f, 0x09212, 0x09216, 0x09218, 0x09236, 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers), 8)); + +/* Block: VPC Cluster: CLUSTER_VPC_PS Pipeline: PIPE_BV */ +static const u32 gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers[] = { + 0x09200, 0x0920f, 0x09212, 0x09216, 0x09218, 0x09236, 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers), 8)); + +/* Block: SP Cluster: noncontext Pipeline: PIPE_BR Location: HLSQ_State */ +static const u32 gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers[] = { + 0x0ae52, 0x0ae52, 0x0ae60, 0x0ae67, 0x0ae69, 0x0ae73, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers), 8)); + +/* Block: SP Cluster: noncontext Pipeline: PIPE_BR Location: SP_TOP */ +static const u32 gen7_0_0_sp_noncontext_pipe_br_sp_top_registers[] = { + 0x0ae00, 0x0ae00, 0x0ae02, 0x0ae04, 0x0ae06, 0x0ae09, 0x0ae0c, 0x0ae0c, + 0x0ae0f, 0x0ae0f, 0x0ae28, 0x0ae2b, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3f, + 0x0ae50, 0x0ae52, 0x0ae80, 0x0aea3, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_br_sp_top_registers), 8)); + +/* Block: SP Cluster: noncontext Pipeline: PIPE_BR Location: uSPTP */ +static const u32 gen7_0_0_sp_noncontext_pipe_br_usptp_registers[] = { + 0x0ae00, 0x0ae00, 0x0ae02, 0x0ae04, 0x0ae06, 0x0ae09, 0x0ae0c, 0x0ae0c, + 0x0ae0f, 0x0ae0f, 0x0ae30, 0x0ae32, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3b, + 0x0ae3e, 0x0ae3f, 0x0ae50, 0x0ae52, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_br_usptp_registers), 8)); + +/* Block: SP Cluster: noncontext Pipeline: PIPE_LPAC Location: HLSQ_State */ +static const u32 gen7_0_0_sp_noncontext_pipe_lpac_hlsq_state_registers[] = { + 0x0af88, 0x0af8a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_hlsq_state_registers), 8)); + +/* Block: SP Cluster: noncontext Pipeline: PIPE_LPAC Location: SP_TOP */ +static const u32 gen7_0_0_sp_noncontext_pipe_lpac_sp_top_registers[] = { + 0x0af80, 0x0af84, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_sp_top_registers), 8)); + +/* Block: SP Cluster: noncontext Pipeline: PIPE_LPAC Location: uSPTP */ +static const u32 gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers[] = { + 0x0af80, 0x0af84, 0x0af90, 0x0af92, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers), 8)); + +/* Block: TPl1 Cluster: noncontext Pipeline: PIPE_BR */ +static const u32 gen7_0_0_tpl1_noncontext_pipe_br_registers[] = { + 0x0b600, 0x0b600, 0x0b602, 0x0b602, 0x0b604, 0x0b604, 0x0b608, 0x0b60c, + 0x0b60f, 0x0b621, 0x0b630, 0x0b633, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_noncontext_pipe_br_registers), 8)); + +/* Block: TPl1 Cluster: noncontext Pipeline: PIPE_LPAC */ +static const u32 gen7_0_0_tpl1_noncontext_pipe_lpac_registers[] = { + 0x0b780, 0x0b780, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_noncontext_pipe_lpac_registers), 8)); + +struct gen7_cluster_registers { + /* cluster_id: Cluster identifier */ + int cluster_id; + /* pipe_id: Pipe Identifier */ + int pipe_id; + /* context_id: one of STATE_ that identifies the context to dump */ + int context_id; + /* regs: Pointer to an array of register pairs */ + const u32 *regs; + /* sel: Pointer to a selector register to write before reading */ + const struct sel_reg *sel; + /* offset: Internal variable to track the state of the crashdump */ + unsigned int offset; +}; + +static struct gen7_cluster_registers gen7_clusters[] = { + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_0_0_noncontext_pipe_br_registers, }, + { CLUSTER_NONE, PIPE_BV, STATE_NON_CONTEXT, + gen7_0_0_noncontext_pipe_bv_registers, }, + { CLUSTER_NONE, PIPE_LPAC, STATE_NON_CONTEXT, + gen7_0_0_noncontext_pipe_lpac_registers, }, + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_0_0_noncontext_rb_rac_pipe_br_registers, &gen7_0_0_rb_rac_sel, }, + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_0_0_noncontext_rb_rbp_pipe_br_registers, &gen7_0_0_rb_rbp_sel, }, + { CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_gras_cluster_gras_pipe_br_registers, }, + { CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_0_0_gras_cluster_gras_pipe_bv_registers, }, + { CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_gras_cluster_gras_pipe_br_registers, }, + { CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_0_0_gras_cluster_gras_pipe_bv_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_pc_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_0_0_pc_cluster_fe_pipe_bv_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_pc_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_0_0_pc_cluster_fe_pipe_bv_registers, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rac_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rac_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rbp_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rbp_sel, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vfd_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_0_0_vfd_cluster_fe_pipe_bv_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vfd_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_0_0_vfd_cluster_fe_pipe_bv_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_fe_pipe_bv_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_fe_pipe_bv_registers, }, + { CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, }, + { CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers, }, + { CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, }, + { CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers, }, + { CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, }, + { CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, }, + { CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, }, + { CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, }, +}; + +struct gen7_sptp_cluster_registers { + /* cluster_id: Cluster identifier */ + int cluster_id; + /* cluster_id: SP block state type for the cluster */ + int statetype; + /* pipe_id: Pipe identifier */ + int pipe_id; + /* context_id: Context identifier */ + int context_id; + /* location_id: Location identifier */ + int location_id; + /* regs: Pointer to the list of register pairs to read */ + const u32 *regs; + /* regbase: Dword offset of the register block in the GPu register space */ + unsigned int regbase; + /* offset: Internal variable used to track the crashdump state */ + unsigned int offset; +}; + +static struct gen7_sptp_cluster_registers gen7_sptp_clusters[] = { + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, HLSQ_State, + gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers, 0xae00 }, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, SP_TOP, + gen7_0_0_sp_noncontext_pipe_br_sp_top_registers, 0xae00 }, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, USPTP, + gen7_0_0_sp_noncontext_pipe_br_usptp_registers, 0xae00 }, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, HLSQ_State, + gen7_0_0_sp_noncontext_pipe_lpac_hlsq_state_registers, 0xaf80 }, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, SP_TOP, + gen7_0_0_sp_noncontext_pipe_lpac_sp_top_registers, 0xaf80 }, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, USPTP, + gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers, 0xaf80 }, + { CLUSTER_NONE, TP0_NCTX_REG, PIPE_BR, 0, USPTP, + gen7_0_0_tpl1_noncontext_pipe_br_registers, 0xb600 }, + { CLUSTER_NONE, TP0_NCTX_REG, PIPE_LPAC, 0, USPTP, + gen7_0_0_tpl1_noncontext_pipe_lpac_registers, 0xb780 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_State, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_State, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_State, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_State, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_State, + gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_dp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_State, + gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_State, + gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP, + gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_State, + gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_State, + gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP, + gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, + gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers, 0xa800 }, + { CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 }, + { CLUSTER_SP_PS, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 }, + { CLUSTER_SP_PS, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP, + gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 }, + { CLUSTER_SP_PS, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP, + gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 }, + { CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + gen7_0_0_tpl1_cluster_sp_ps_pipe_lpac_registers, 0xb000 }, + { CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers, 0xb000 }, + { CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers, 0xb000 }, + { CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers, 0xb000 }, + { CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, + gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers, 0xb000 }, +}; + +struct gen7_shader_block { + /* statetype: Type identifer for the block */ + u32 statetype; + /* size: Size of the block (in dwords) */ + u32 size + /* sp_id: The SP id to dump */; + u32 sp_id; + /* usptp: The usptp id to dump */; + u32 usptp; + /* pipe_id: Pipe identifier for the block data */ + u32 pipeid; + /* location: Location identifer for the block data */ + u32 location; + /* offset: The offset in the snasphot dump */ + u64 offset; +}; + +static struct gen7_shader_block gen7_shader_blocks[] = { + {TP0_TMO_DATA, 0x200, 0, 0, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 0, 0, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 0, 0, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 0, 0, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 0, 0, PIPE_BR, USPTP}, + {SP_INST_DATA_2, 0x200, 0, 0, PIPE_BR, USPTP}, + {SP_TMO_TAG, 0x80, 0, 0, PIPE_BR, USPTP}, + {SP_SMO_TAG, 0x80, 0, 0, PIPE_BR, USPTP}, + {SP_STATE_DATA, 0x40, 0, 0, PIPE_BR, USPTP}, + {SP_HWAVE_RAM, 0x100, 0, 0, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 0, 0, PIPE_BR, USPTP}, + {SP_LB_8_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 0, 0, PIPE_BR, USPTP}, + {HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x10, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x300, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x300, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x300, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CHUNK_CVS_RAM, 0x1c0, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_CHUNK_CVS_RAM, 0x1c0, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CHUNK_CPS_RAM, 0x300, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CHUNK_CPS_RAM, 0x300, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_CHUNK_CVS_RAM_TAG, 0x40, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CHUNK_CVS_RAM_TAG, 0x40, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_CHUNK_CPS_RAM_TAG, 0x40, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CHUNK_CPS_RAM_TAG, 0x40, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_ICB_CVS_CB_BASE_TAG, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_ICB_CVS_CB_BASE_TAG, 0x10, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_ICB_CPS_CB_BASE_TAG, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_ICB_CPS_CB_BASE_TAG, 0x10, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_CVS_MISC_RAM, 0x280, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CVS_MISC_RAM, 0x280, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_CPS_MISC_RAM, 0x800, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CPS_MISC_RAM, 0x800, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_CPS_MISC_RAM_1, 0x200, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_INST_RAM, 0x800, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_INST_RAM, 0x800, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_INST_RAM, 0x800, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_GFX_CVS_CONST_RAM, 0x800, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_GFX_CVS_CONST_RAM, 0x800, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_GFX_CPS_CONST_RAM, 0x800, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_GFX_CPS_CONST_RAM, 0x800, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_CVS_MISC_RAM_TAG, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CVS_MISC_RAM_TAG, 0x10, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_CPS_MISC_RAM_TAG, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CPS_MISC_RAM_TAG, 0x10, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_INST_RAM_TAG, 0x80, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_INST_RAM_TAG, 0x80, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_INST_RAM_TAG, 0x80, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_GFX_CVS_CONST_RAM_TAG, 0x64, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_GFX_CVS_CONST_RAM_TAG, 0x64, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_GFX_CPS_CONST_RAM_TAG, 0x64, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_GFX_CPS_CONST_RAM_TAG, 0x64, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_INST_RAM_1, 0x800, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_STPROC_META, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_BV_BE_META, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_BV_BE_META, 0x10, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_DATAPATH_META, 0x20, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_FRONTEND_META, 0x40, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_FRONTEND_META, 0x40, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_FRONTEND_META, 0x40, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_INDIRECT_META, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_BACKEND_META, 0x40, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_BACKEND_META, 0x40, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_BACKEND_META, 0x40, 0, 0, PIPE_LPAC, HLSQ_State}, + /* SP 0 USPTP 1 */ + {TP0_TMO_DATA, 0x200, 0, 1, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 0, 1, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 0, 1, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 0, 1, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 0, 1, PIPE_BR, USPTP}, + {SP_INST_DATA_2, 0x200, 0, 1, PIPE_BR, USPTP}, + {SP_TMO_TAG, 0x80, 0, 1, PIPE_BR, USPTP}, + {SP_SMO_TAG, 0x80, 0, 1, PIPE_BR, USPTP}, + {SP_STATE_DATA, 0x40, 0, 1, PIPE_BR, USPTP}, + {SP_HWAVE_RAM, 0x100, 0, 1, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 0, 1, PIPE_BR, USPTP}, + {SP_LB_8_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 0, 1, PIPE_BR, USPTP}, + /* SP 1 USPTP 0 */ + {TP0_TMO_DATA, 0x200, 1, 0, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 1, 0, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 1, 0, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 1, 0, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 1, 0, PIPE_BR, USPTP,}, + {SP_INST_DATA_2, 0x200, 1, 0, PIPE_BR, USPTP,}, + {SP_TMO_TAG, 0x80, 1, 0, PIPE_BR, USPTP,}, + {SP_SMO_TAG, 0x80, 1, 0, PIPE_BR, USPTP,}, + {SP_STATE_DATA, 0x40, 1, 0, PIPE_BR, USPTP,}, + {SP_HWAVE_RAM, 0x100, 1, 0, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 1, 0, PIPE_BR, USPTP,}, + {SP_LB_8_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 1, 0, PIPE_BR, USPTP}, + /* SP 1 USPTP 1 */ + {TP0_TMO_DATA, 0x200, 1, 1, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 1, 1, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 1, 1, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 1, 1, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 1, 1, PIPE_BR, USPTP,}, + {SP_INST_DATA_2, 0x200, 1, 1, PIPE_BR, USPTP,}, + {SP_TMO_TAG, 0x80, 1, 1, PIPE_BR, USPTP,}, + {SP_SMO_TAG, 0x80, 1, 1, PIPE_BR, USPTP,}, + {SP_STATE_DATA, 0x40, 1, 1, PIPE_BR, USPTP,}, + {SP_HWAVE_RAM, 0x100, 1, 1, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 1, 1, PIPE_BR, USPTP,}, + {SP_LB_8_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 1, 1, PIPE_BR, USPTP}, + /* SP 2 USPTP 0 */ + {TP0_TMO_DATA, 0x200, 2, 0, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 2, 0, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 2, 0, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 2, 0, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 2, 0, PIPE_BR, USPTP,}, + {SP_INST_DATA_2, 0x200, 2, 0, PIPE_BR, USPTP,}, + {SP_TMO_TAG, 0x80, 2, 0, PIPE_BR, USPTP,}, + {SP_SMO_TAG, 0x80, 2, 0, PIPE_BR, USPTP,}, + {SP_STATE_DATA, 0x40, 2, 0, PIPE_BR, USPTP,}, + {SP_HWAVE_RAM, 0x100, 2, 0, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 2, 0, PIPE_BR, USPTP,}, + {SP_LB_8_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 2, 0, PIPE_BR, USPTP}, + /* SP 2 USPTP 1 */ + {TP0_TMO_DATA, 0x200, 2, 1, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 2, 1, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 2, 1, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 2, 1, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 2, 1, PIPE_BR, USPTP,}, + {SP_INST_DATA_2, 0x200, 2, 1, PIPE_BR, USPTP,}, + {SP_TMO_TAG, 0x80, 2, 1, PIPE_BR, USPTP,}, + {SP_SMO_TAG, 0x80, 2, 1, PIPE_BR, USPTP,}, + {SP_STATE_DATA, 0x40, 2, 1, PIPE_BR, USPTP,}, + {SP_HWAVE_RAM, 0x100, 2, 1, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 2, 1, PIPE_BR, USPTP,}, + {SP_LB_8_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 2, 1, PIPE_BR, USPTP}, + /* SP 3 USPTP 0 */ + {TP0_TMO_DATA, 0x200, 3, 0, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 3, 0, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 3, 0, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 3, 0, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 3, 0, PIPE_BR, USPTP,}, + {SP_INST_DATA_2, 0x200, 3, 0, PIPE_BR, USPTP,}, + {SP_TMO_TAG, 0x80, 3, 0, PIPE_BR, USPTP,}, + {SP_SMO_TAG, 0x80, 3, 0, PIPE_BR, USPTP,}, + {SP_STATE_DATA, 0x40, 3, 0, PIPE_BR, USPTP,}, + {SP_HWAVE_RAM, 0x100, 3, 0, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 3, 0, PIPE_BR, USPTP,}, + {SP_LB_8_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 3, 0, PIPE_BR, USPTP}, + /* SP 3 USPTP 1 */ + {TP0_TMO_DATA, 0x200, 3, 1, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 3, 1, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 3, 1, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 3, 1, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 3, 1, PIPE_BR, USPTP,}, + {SP_INST_DATA_2, 0x200, 3, 1, PIPE_BR, USPTP,}, + {SP_TMO_TAG, 0x80, 3, 1, PIPE_BR, USPTP,}, + {SP_SMO_TAG, 0x80, 3, 1, PIPE_BR, USPTP,}, + {SP_STATE_DATA, 0x40, 3, 1, PIPE_BR, USPTP,}, + {SP_HWAVE_RAM, 0x100, 3, 1, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 3, 1, PIPE_BR, USPTP,}, + {SP_LB_8_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 3, 1, PIPE_BR, USPTP}, +}; + +static const u32 gen7_gbif_debugbus_blocks[] = { + DEBUGBUS_GBIF_CX, + DEBUGBUS_GBIF_GX, +}; + +static const u32 gen7_cx_dbgc_debugbus_blocks[] = { + DEBUGBUS_GMU_CX, + DEBUGBUS_CX, +}; + +struct gen7_shader_block_info { + struct gen7_shader_block *block; + u32 bank; + u64 offset; +}; + +static struct reg_list { + const u32 *regs; + const struct sel_reg *sel; + u64 offset; +} gen7_reg_list[] = { + { gen7_gpu_registers, NULL }, + { gen7_cx_misc_registers, NULL }, + { gen7_dpm_registers, NULL }, +}; + +static struct cp_indexed_reg_list { + u32 addr; + u32 data; + u32 size; +} gen7_cp_indexed_reg_list[] = { + { GEN7_CP_SQE_STAT_ADDR, GEN7_CP_SQE_STAT_DATA, 0x33}, + { GEN7_CP_DRAW_STATE_ADDR, GEN7_CP_DRAW_STATE_DATA, 0x100}, + { GEN7_CP_SQE_UCODE_DBG_ADDR, GEN7_CP_SQE_UCODE_DBG_DATA, 0x8000}, + { GEN7_CP_BV_SQE_STAT_ADDR, GEN7_CP_BV_SQE_STAT_DATA, 0x33}, + { GEN7_CP_BV_DRAW_STATE_ADDR, GEN7_CP_BV_DRAW_STATE_DATA, 0x100}, + { GEN7_CP_BV_SQE_UCODE_DBG_ADDR, GEN7_CP_BV_SQE_UCODE_DBG_DATA, 0x8000}, + { GEN7_CP_SQE_AC_STAT_ADDR, GEN7_CP_SQE_AC_STAT_DATA, 0x33}, + { GEN7_CP_LPAC_DRAW_STATE_ADDR, GEN7_CP_LPAC_DRAW_STATE_DATA, 0x100}, + { GEN7_CP_SQE_AC_UCODE_DBG_ADDR, GEN7_CP_SQE_AC_UCODE_DBG_DATA, 0x8000}, + { GEN7_CP_LPAC_FIFO_DBG_ADDR, GEN7_CP_LPAC_FIFO_DBG_DATA, 0x40}, +}; +#endif /*_ADRENO_GEN7_SNAPSHOT_H */ diff --git a/adreno_hfi.h b/adreno_hfi.h new file mode 100644 index 0000000000..6b171de136 --- /dev/null +++ b/adreno_hfi.h @@ -0,0 +1,869 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_HFI_H +#define __ADRENO_HFI_H + +#define HFI_QUEUE_SIZE SZ_4K /* bytes, must be base 4dw */ +#define MAX_RCVD_PAYLOAD_SIZE 16 /* dwords */ +#define MAX_RCVD_SIZE (MAX_RCVD_PAYLOAD_SIZE + 3) /* dwords */ +#define HFI_MAX_MSG_SIZE (SZ_1K) + +#define HFI_CMD_ID 0 +#define HFI_MSG_ID 1 +#define HFI_DBG_ID 2 +#define HFI_DSP_ID_0 3 + +#define HFI_CMD_IDX 0 +#define HFI_MSG_IDX 1 +#define HFI_DBG_IDX 2 +#define HFI_DSP_IDX_BASE 3 +#define HFI_DSP_IDX_0 3 + +#define HFI_CMD_IDX_LEGACY 0 +#define HFI_DSP_IDX_0_LEGACY 1 +#define HFI_MSG_IDX_LEGACY 4 +#define HFI_DBG_IDX_LEGACY 5 + +#define HFI_QUEUE_STATUS_DISABLED 0 +#define HFI_QUEUE_STATUS_ENABLED 1 + +/* HTOF queue priority, 1 is highest priority */ +#define HFI_CMD_PRI 10 +#define HFI_MSG_PRI 10 +#define HFI_DBG_PRI 40 +#define HFI_DSP_PRI_0 20 + +#define HFI_IRQ_SIDEMSGQ_MASK BIT(1) +#define HFI_IRQ_DBGQ_MASK BIT(2) +#define HFI_IRQ_CM3_FAULT_MASK BIT(15) +#define HFI_IRQ_OOB_MASK GENMASK(31, 16) +#define HFI_IRQ_MASK (HFI_IRQ_SIDEMSGQ_MASK |\ + HFI_IRQ_DBGQ_MASK |\ + HFI_IRQ_CM3_FAULT_MASK) + +#define DCVS_ACK_NONBLOCK 0 +#define DCVS_ACK_BLOCK 1 + +#define HFI_FEATURE_DCVS 0 +#define HFI_FEATURE_HWSCHED 1 +#define HFI_FEATURE_PREEMPTION 2 +#define HFI_FEATURE_CLOCKS_ON 3 +#define HFI_FEATURE_BUS_ON 4 +#define HFI_FEATURE_RAIL_ON 5 +#define HFI_FEATURE_HWCG 6 +#define HFI_FEATURE_LM 7 +#define HFI_FEATURE_THROTTLE 8 +#define HFI_FEATURE_IFPC 9 +#define HFI_FEATURE_NAP 10 +#define HFI_FEATURE_BCL 11 +#define HFI_FEATURE_ACD 12 +#define HFI_FEATURE_DIDT 13 +#define HFI_FEATURE_DEPRECATED 14 +#define HFI_FEATURE_CB 15 +#define HFI_FEATURE_KPROF 16 +#define HFI_FEATURE_BAIL_OUT_TIMER 17 +#define HFI_FEATURE_GMU_STATS 18 +#define HFI_FEATURE_DBQ 19 +#define HFI_FEATURE_MINBW 20 +#define HFI_FEATURE_CLX 21 + +/* A6xx uses a different value for KPROF */ +#define HFI_FEATURE_A6XX_KPROF 14 + +#define HFI_VALUE_FT_POLICY 100 +#define HFI_VALUE_RB_MAX_CMDS 101 +#define HFI_VALUE_CTX_MAX_CMDS 102 +#define HFI_VALUE_ADDRESS 103 +#define HFI_VALUE_MAX_GPU_PERF_INDEX 104 +#define HFI_VALUE_MIN_GPU_PERF_INDEX 105 +#define HFI_VALUE_MAX_BW_PERF_INDEX 106 +#define HFI_VALUE_MIN_BW_PERF_INDEX 107 +#define HFI_VALUE_MAX_GPU_THERMAL_INDEX 108 +#define HFI_VALUE_GPUCLK 109 +#define HFI_VALUE_CLK_TIME 110 +#define HFI_VALUE_LOG_GROUP 111 +#define HFI_VALUE_LOG_EVENT_ON 112 +#define HFI_VALUE_LOG_EVENT_OFF 113 +#define HFI_VALUE_DCVS_OBJ 114 +#define HFI_VALUE_LM_CS0 115 +#define HFI_VALUE_BIN_TIME 117 +#define HFI_VALUE_LOG_STREAM_ENABLE 119 +#define HFI_VALUE_PREEMPT_COUNT 120 + +#define HFI_VALUE_GLOBAL_TOKEN 0xFFFFFFFF + +#define HFI_CTXT_FLAG_PMODE BIT(0) +#define HFI_CTXT_FLAG_SWITCH_INTERNAL BIT(1) +#define HFI_CTXT_FLAG_SWITCH BIT(3) +#define HFI_CTXT_FLAG_NOTIFY BIT(5) +#define HFI_CTXT_FLAG_NO_FAULT_TOLERANCE BIT(9) +#define HFI_CTXT_FLAG_PWR_RULE BIT(11) +#define HFI_CTXT_FLAG_PRIORITY_MASK GENMASK(15, 12) +#define HFI_CTXT_FLAG_IFH_NOP BIT(16) +#define HFI_CTXT_FLAG_SECURE BIT(17) +#define HFI_CTXT_FLAG_TYPE_MASK GENMASK(24, 20) +#define HFI_CTXT_FLAG_TYPE_ANY 0 +#define HFI_CTXT_FLAG_TYPE_GL 1 +#define HFI_CTXT_FLAG_TYPE_CL 2 +#define HFI_CTXT_FLAG_TYPE_C2D 3 +#define HFI_CTXT_FLAG_TYPE_RS 4 +#define HFI_CTXT_FLAG_TYPE_VK 5 +#define HFI_CTXT_FLAG_TYPE_UNKNOWN 0x1e +#define HFI_CTXT_FLAG_PREEMPT_STYLE_MASK GENMASK(27, 25) +#define HFI_CTXT_FLAG_PREEMPT_STYLE_ANY 0 +#define HFI_CTXT_FLAG_PREEMPT_STYLE_RB 1 +#define HFI_CTXT_FLAG_PREEMPT_STYLE_FG 2 +#define CMDBATCH_INDIRECT 0x00000200 + +enum hfi_mem_kind { + /** @HFI_MEMKIND_GENERIC: Used for requesting generic memory */ + HFI_MEMKIND_GENERIC = 0, + /** @HFI_MEMKIND_RB: Used for requesting ringbuffer memory */ + HFI_MEMKIND_RB, + /** @HFI_MEMKIND_SCRATCH: Used for requesting scratch memory */ + HFI_MEMKIND_SCRATCH, + /** + * @HFI_MEMKIND_CSW_SMMU_INFO: Used for requesting SMMU record for + * preemption context switching + */ + HFI_MEMKIND_CSW_SMMU_INFO, + /** + * @HFI_MEMKIND_CSW_PRIV_NON_SECURE: Used for requesting privileged non + * secure preemption records + */ + HFI_MEMKIND_CSW_PRIV_NON_SECURE, + /** + * @HFI_MEMKIND_CSW_PRIV_SECURE: Used for requesting privileged secure + * preemption records + */ + HFI_MEMKIND_CSW_PRIV_SECURE, + /** + * @HFI_MEMKIND_CSW_NON_PRIV: Used for requesting non privileged per + * context preemption buffer + */ + HFI_MEMKIND_CSW_NON_PRIV, + /** + * @HFI_MEMKIND_CSW_COUNTER: Used for requesting preemption performance + * counter save/restore buffer + */ + HFI_MEMKIND_CSW_COUNTER, + /** + * @HFI_MEMKIND_CTXTREC_PREEMPT_CNTR: Used for requesting preemption + * counter buffer + */ + HFI_MEMKIND_CTXTREC_PREEMPT_CNTR, + /** @HFI_MEMKIND_SYSLOG: Used for requesting system log memory */ + HFI_MEMKIND_SYS_LOG, + /** @HFI_MEMKIND_CRASH_DUMP: Used for requesting carsh dumper memory */ + HFI_MEMKIND_CRASH_DUMP, + /** + * @HFI_MEMKIND_MMIO_DPU: Used for requesting Display processing unit's + * register space + */ + HFI_MEMKIND_MMIO_DPU, + /** + * @HFI_MEMKIND_MMIO_TCSR: Used for requesting Top CSR(contains SoC + * doorbells) register space + */ + HFI_MEMKIND_MMIO_TCSR, + /** + * @HFI_MEMKIND_MMIO_QDSS_STM: Used for requesting QDSS STM register + * space + */ + HFI_MEMKIND_MMIO_QDSS_STM, + /** @HFI_MEMKIND_PROFILE: Used for kernel profiling */ + HFI_MEMKIND_PROFILE, + /** @HFI_MEMKIND_USER_PROFILING_IBS: Used for user profiling */ + HFI_MEMKIND_USER_PROFILE_IBS, + /** @MEMKIND_CMD_BUFFER: Used for composing ringbuffer content */ + HFI_MEMKIND_CMD_BUFFER, + HFI_MEMKIND_MAX, +}; + +static const char * const hfi_memkind_strings[] = { + [HFI_MEMKIND_GENERIC] = "GMU GENERIC", + [HFI_MEMKIND_RB] = "GMU RB", + [HFI_MEMKIND_SCRATCH] = "GMU SCRATCH", + [HFI_MEMKIND_CSW_SMMU_INFO] = "GMU SMMU INFO", + [HFI_MEMKIND_CSW_PRIV_NON_SECURE] = "GMU CSW PRIV NON SECURE", + [HFI_MEMKIND_CSW_PRIV_SECURE] = "GMU CSW PRIV SECURE", + [HFI_MEMKIND_CSW_NON_PRIV] = "GMU CSW NON PRIV", + [HFI_MEMKIND_CSW_COUNTER] = "GMU CSW COUNTER", + [HFI_MEMKIND_CTXTREC_PREEMPT_CNTR] = "GMU PREEMPT CNTR", + [HFI_MEMKIND_SYS_LOG] = "GMU SYS LOG", + [HFI_MEMKIND_CRASH_DUMP] = "GMU CRASHDUMP", + [HFI_MEMKIND_MMIO_DPU] = "GMU MMIO DPU", + [HFI_MEMKIND_MMIO_TCSR] = "GMU MMIO TCSR", + [HFI_MEMKIND_MMIO_QDSS_STM] = "GMU MMIO QDSS STM", + [HFI_MEMKIND_PROFILE] = "GMU KERNEL PROFILING", + [HFI_MEMKIND_USER_PROFILE_IBS] = "GMU USER PROFILING", + [HFI_MEMKIND_CMD_BUFFER] = "GMU CMD BUFFER", +}; + +/* CP/GFX pipeline can access */ +#define HFI_MEMFLAG_GFX_ACC BIT(0) + +/* Buffer has APRIV protection in GFX PTEs */ +#define HFI_MEMFLAG_GFX_PRIV BIT(1) + +/* Buffer is read-write for GFX PTEs. A 0 indicates read-only */ +#define HFI_MEMFLAG_GFX_WRITEABLE BIT(2) + +/* GMU can access */ +#define HFI_MEMFLAG_GMU_ACC BIT(3) + +/* Buffer has APRIV protection in GMU PTEs */ +#define HFI_MEMFLAG_GMU_PRIV BIT(4) + +/* Buffer is read-write for GMU PTEs. A 0 indicates read-only */ +#define HFI_MEMFLAG_GMU_WRITEABLE BIT(5) + +/* Buffer is located in GMU's non-cached bufferable VA range */ +#define HFI_MEMFLAG_GMU_BUFFERABLE BIT(6) + +/* Buffer is located in GMU's cacheable VA range */ +#define HFI_MEMFLAG_GMU_CACHEABLE BIT(7) + +/* Host can access */ +#define HFI_MEMFLAG_HOST_ACC BIT(8) + +/* Host initializes the buffer */ +#define HFI_MEMFLAG_HOST_INIT BIT(9) + +/* Gfx buffer needs to be secure */ +#define HFI_MEMFLAG_GFX_SECURE BIT(12) + +/** + * struct hfi_queue_table_header - HFI queue table structure + * @version: HFI protocol version + * @size: queue table size in dwords + * @qhdr0_offset: first queue header offset (dwords) in this table + * @qhdr_size: queue header size + * @num_q: number of queues defined in this table + * @num_active_q: number of active queues + */ +struct hfi_queue_table_header { + u32 version; + u32 size; + u32 qhdr0_offset; + u32 qhdr_size; + u32 num_q; + u32 num_active_q; +} __packed; + +/** + * struct hfi_queue_header - HFI queue header structure + * @status: active: 1; inactive: 0 + * @start_addr: starting address of the queue in GMU VA space + * @type: queue type encoded the priority, ID and send/recevie types + * @queue_size: size of the queue + * @msg_size: size of the message if each message has fixed size. + * Otherwise, 0 means variable size of message in the queue. + * @read_index: read index of the queue + * @write_index: write index of the queue + */ +struct hfi_queue_header { + u32 status; + u32 start_addr; + u32 type; + u32 queue_size; + u32 msg_size; + u32 unused0; + u32 unused1; + u32 unused2; + u32 unused3; + u32 unused4; + u32 read_index; + u32 write_index; +} __packed; + +#define HFI_MSG_CMD 0 /* V1 and V2 */ +#define HFI_MSG_ACK 1 /* V2 only */ +#define HFI_V1_MSG_POST 1 /* V1 only */ +#define HFI_V1_MSG_ACK 2/* V1 only */ + +/* Size is converted from Bytes to DWords */ +#define CREATE_MSG_HDR(id, size, type) \ + (((type) << 16) | ((((size) >> 2) & 0xFF) << 8) | ((id) & 0xFF)) +#define ACK_MSG_HDR(id, size) CREATE_MSG_HDR(id, size, HFI_MSG_ACK) + +#define HFI_QUEUE_DEFAULT_CNT 3 +#define HFI_QUEUE_DISPATCH_MAX_CNT 14 +#define HFI_QUEUE_HDR_MAX (HFI_QUEUE_DEFAULT_CNT + HFI_QUEUE_DISPATCH_MAX_CNT) + +struct hfi_queue_table { + struct hfi_queue_table_header qtbl_hdr; + struct hfi_queue_header qhdr[HFI_QUEUE_HDR_MAX]; +} __packed; + +#define HFI_QUEUE_OFFSET(i) \ + (ALIGN(sizeof(struct hfi_queue_table), SZ_16) + \ + ((i) * HFI_QUEUE_SIZE)) + +#define GMU_QUEUE_START_ADDR(gmuaddr, i) \ + (gmuaddr + HFI_QUEUE_OFFSET(i)) + +#define HOST_QUEUE_START_ADDR(hfi_mem, i) \ + ((hfi_mem)->hostptr + HFI_QUEUE_OFFSET(i)) + +#define MSG_HDR_GET_ID(hdr) ((hdr) & 0xFF) +#define MSG_HDR_GET_SIZE(hdr) (((hdr) >> 8) & 0xFF) +#define MSG_HDR_GET_TYPE(hdr) (((hdr) >> 16) & 0xF) +#define MSG_HDR_GET_SEQNUM(hdr) (((hdr) >> 20) & 0xFFF) + +#define MSG_HDR_GET_SIZE(hdr) (((hdr) >> 8) & 0xFF) +#define MSG_HDR_GET_SEQNUM(hdr) (((hdr) >> 20) & 0xFFF) + +#define HDR_CMP_SEQNUM(out_hdr, in_hdr) \ + (MSG_HDR_GET_SEQNUM(out_hdr) == MSG_HDR_GET_SEQNUM(in_hdr)) + +#define MSG_HDR_SET_SEQNUM(hdr, num) \ + (((hdr) & 0xFFFFF) | ((num) << 20)) + +#define QUEUE_HDR_TYPE(id, prio, rtype, stype) \ + (((id) & 0xFF) | (((prio) & 0xFF) << 8) | \ + (((rtype) & 0xFF) << 16) | (((stype) & 0xFF) << 24)) + +#define HFI_RSP_TIMEOUT 100 /* msec */ + +#define HFI_IRQ_MSGQ_MASK BIT(0) + +#define H2F_MSG_INIT 0 +#define H2F_MSG_FW_VER 1 +#define H2F_MSG_LM_CFG 2 +#define H2F_MSG_BW_VOTE_TBL 3 +#define H2F_MSG_PERF_TBL 4 +#define H2F_MSG_TEST 5 +#define H2F_MSG_ACD_TBL 7 +#define H2F_MSG_START 10 +#define H2F_MSG_FEATURE_CTRL 11 +#define H2F_MSG_GET_VALUE 12 +#define H2F_MSG_SET_VALUE 13 +#define H2F_MSG_CORE_FW_START 14 +#define F2H_MSG_MEM_ALLOC 20 +#define H2F_MSG_GX_BW_PERF_VOTE 30 +#define H2F_MSG_FW_HALT 32 +#define H2F_MSG_PREPARE_SLUMBER 33 +#define F2H_MSG_ERR 100 +#define F2H_MSG_DEBUG 101 +#define F2H_MSG_LOG_BLOCK 102 +#define F2H_MSG_GMU_CNTR_REGISTER 110 +#define F2H_MSG_GMU_CNTR_RELEASE 111 +#define F2H_MSG_ACK 126 /* Deprecated for v2.0*/ +#define H2F_MSG_ACK 127 /* Deprecated for v2.0*/ +#define H2F_MSG_REGISTER_CONTEXT 128 +#define H2F_MSG_UNREGISTER_CONTEXT 129 +#define H2F_MSG_ISSUE_CMD 130 +#define H2F_MSG_ISSUE_CMD_RAW 131 +#define H2F_MSG_TS_NOTIFY 132 +#define F2H_MSG_TS_RETIRE 133 +#define H2F_MSG_CONTEXT_POINTERS 134 +#define H2F_MSG_CONTEXT_RULE 140 /* AKA constraint */ +#define F2H_MSG_CONTEXT_BAD 150 + +/* H2F */ +struct hfi_gmu_init_cmd { + u32 hdr; + u32 seg_id; + u32 dbg_buffer_addr; + u32 dbg_buffer_size; + u32 boot_state; +} __packed; + +/* H2F */ +struct hfi_fw_version_cmd { + u32 hdr; + u32 supported_ver; +} __packed; + +/* H2F */ +struct hfi_bwtable_cmd { + u32 hdr; + u32 bw_level_num; + u32 cnoc_cmds_num; + u32 ddr_cmds_num; + u32 cnoc_wait_bitmask; + u32 ddr_wait_bitmask; + u32 cnoc_cmd_addrs[MAX_CNOC_CMDS]; + u32 cnoc_cmd_data[MAX_CNOC_LEVELS][MAX_CNOC_CMDS]; + u32 ddr_cmd_addrs[MAX_BW_CMDS]; + u32 ddr_cmd_data[MAX_GX_LEVELS][MAX_BW_CMDS]; +} __packed; + +struct opp_gx_desc { + u32 vote; + u32 acd; + u32 freq; +} __packed; + +struct opp_desc { + u32 vote; + u32 freq; +} __packed; + +/* H2F */ +struct hfi_dcvstable_v1_cmd { + u32 hdr; + u32 gpu_level_num; + u32 gmu_level_num; + struct opp_desc gx_votes[MAX_GX_LEVELS]; + struct opp_desc cx_votes[MAX_CX_LEVELS]; +} __packed; + +/* H2F */ +struct hfi_dcvstable_cmd { + u32 hdr; + u32 gpu_level_num; + u32 gmu_level_num; + struct opp_gx_desc gx_votes[MAX_GX_LEVELS]; + struct opp_desc cx_votes[MAX_CX_LEVELS]; +} __packed; + +#define MAX_ACD_STRIDE 2 +#define MAX_ACD_NUM_LEVELS 6 + +/* H2F */ +struct hfi_acd_table_cmd { + u32 hdr; + u32 version; + u32 enable_by_level; + u32 stride; + u32 num_levels; + u32 data[MAX_ACD_NUM_LEVELS * MAX_ACD_STRIDE]; +} __packed; + +/* H2F */ +struct hfi_test_cmd { + u32 hdr; + u32 data; +} __packed; + +/* H2F */ +struct hfi_start_cmd { + u32 hdr; +} __packed; + +/* H2F */ +struct hfi_feature_ctrl_cmd { + u32 hdr; + u32 feature; + u32 enable; + u32 data; +} __packed; + +/* H2F */ +struct hfi_get_value_cmd { + u32 hdr; + u32 type; + u32 subtype; +} __packed; + +/* Internal */ +struct hfi_get_value_req { + struct hfi_get_value_cmd cmd; + u32 data[16]; +} __packed; + +/* F2H */ +struct hfi_get_value_reply_cmd { + u32 hdr; + u32 req_hdr; + u32 data[16]; +} __packed; + +/* H2F */ +struct hfi_set_value_cmd { + u32 hdr; + u32 type; + u32 subtype; + u32 data; +} __packed; + +/* H2F */ +struct hfi_core_fw_start_cmd { + u32 hdr; + u32 handle; +} __packed; + +struct hfi_mem_alloc_desc { + u64 gpu_addr; + u32 flags; + u32 mem_kind; + u32 host_mem_handle; + u32 gmu_mem_handle; + u32 gmu_addr; + u32 size; /* Bytes */ +} __packed; + +struct hfi_mem_alloc_entry { + struct hfi_mem_alloc_desc desc; + struct kgsl_memdesc *md; +}; + +/* F2H */ +struct hfi_mem_alloc_cmd { + u32 hdr; + u32 reserved; /* Padding to ensure alignment of 'desc' below */ + struct hfi_mem_alloc_desc desc; +} __packed; + +/* H2F */ +struct hfi_mem_alloc_reply_cmd { + u32 hdr; + u32 req_hdr; + struct hfi_mem_alloc_desc desc; +} __packed; + +/* H2F */ +struct hfi_gx_bw_perf_vote_cmd { + u32 hdr; + u32 ack_type; + u32 freq; + u32 bw; +} __packed; + +/* H2F */ +struct hfi_fw_halt_cmd { + u32 hdr; + u32 en_halt; +} __packed; + +/* H2F */ +struct hfi_prep_slumber_cmd { + u32 hdr; + u32 bw; + u32 freq; +} __packed; + +/* F2H */ +struct hfi_err_cmd { + u32 hdr; + u32 error_code; + u32 data[16]; +} __packed; + +/* F2H */ +struct hfi_debug_cmd { + u32 hdr; + u32 type; + u32 timestamp; + u32 data; +} __packed; + +/* F2H */ +struct hfi_gmu_cntr_register_cmd { + u32 hdr; + u32 group_id; + u32 countable; +} __packed; + +/* H2F */ +struct hfi_gmu_cntr_register_reply_cmd { + u32 hdr; + u32 req_hdr; + u32 group_id; + u32 countable; + u64 counter_addr; +} __packed; + +/* F2H */ +struct hfi_gmu_cntr_release_cmd { + u32 hdr; + u32 group_id; + u32 countable; +} __packed; + +/* H2F */ +struct hfi_register_ctxt_cmd { + u32 hdr; + u32 ctxt_id; + u32 flags; + u64 pt_addr; + u32 ctxt_idr; + u32 ctxt_bank; +} __packed; + +/* H2F */ +struct hfi_unregister_ctxt_cmd { + u32 hdr; + u32 ctxt_id; + u32 ts; +} __packed; + +struct hfi_issue_ib { + u64 addr; + u32 size; +} __packed; + +/* H2F */ +struct hfi_issue_cmd_cmd { + u32 hdr; + u32 ctxt_id; + u32 flags; + u32 ts; + u32 count; + struct hfi_issue_ib *ibs[]; +} __packed; + +/* Internal */ +struct hfi_issue_cmd_req { + u32 queue; + u32 ctxt_id; + struct hfi_issue_cmd_cmd cmd; +} __packed; + +/* H2F */ +/* The length of *buf will be embedded in the hdr */ +struct hfi_issue_cmd_raw_cmd { + u32 hdr; + u32 *buf; +} __packed; + +/* Internal */ +struct hfi_issue_cmd_raw_req { + u32 queue; + u32 ctxt_id; + u32 len; + u32 *buf; +} __packed; + +/* H2F */ +struct hfi_ts_notify_cmd { + u32 hdr; + u32 ctxt_id; + u32 ts; +} __packed; + +#define CMDBATCH_SUCCESS 0 +#define CMDBATCH_RETIRED 1 +#define CMDBATCH_ERROR 2 +#define CMDBATCH_SKIP 3 + +#define CMDBATCH_PROFILING BIT(4) + +/* F2H */ +struct hfi_ts_retire_cmd { + u32 hdr; + u32 ctxt_id; + u32 ts; + u32 type; + u64 submitted_to_rb; + u64 sop; + u64 eop; + u64 retired_on_gmu; +} __packed; + +/* H2F */ +struct hfi_context_pointers_cmd { + u32 hdr; + u32 ctxt_id; + u64 sop_addr; + u64 eop_addr; + u64 user_ctxt_record_addr; +} __packed; + +/* H2F */ +struct hfi_context_rule_cmd { + u32 hdr; + u32 ctxt_id; + u32 type; + u32 status; +} __packed; + +/* F2H */ +struct hfi_context_bad_cmd { + u32 hdr; + u32 ctxt_id; + u32 policy; + u32 ts; + u32 error; + u32 payload[]; +} __packed; + +/* H2F */ +struct hfi_context_bad_reply_cmd { + u32 hdr; + u32 req_hdr; +} __packed; + +/* H2F */ +struct hfi_submit_cmd { + u32 hdr; + u32 ctxt_id; + u32 flags; + u32 ts; + u32 profile_gpuaddr_lo; + u32 profile_gpuaddr_hi; + u32 numibs; + u32 big_ib_gmu_va; +} __packed; + +struct hfi_log_block { + u32 hdr; + u32 version; + u32 start_index; + u32 stop_index; +} __packed; + +/** + * struct pending_cmd - data structure to track outstanding HFI + * command messages + */ +struct pending_cmd { + /** @sent_hdr: Header of the un-ack'd hfi packet */ + u32 sent_hdr; + /** @results: Array to store the ack packet */ + u32 results[MAX_RCVD_SIZE]; + /** @complete: Completion to signal hfi ack has been received */ + struct completion complete; + /** @node: to add it to the list of hfi packets waiting for ack */ + struct list_head node; +}; + +static inline int _CMD_MSG_HDR(u32 *hdr, int id, size_t size) +{ + if (WARN_ON(size > HFI_MAX_MSG_SIZE)) + return -EMSGSIZE; + + *hdr = CREATE_MSG_HDR(id, size, HFI_MSG_CMD); + return 0; +} + +#define CMD_MSG_HDR(cmd, id) \ + _CMD_MSG_HDR(&(cmd).hdr, id, sizeof(cmd)) + +/* Maximum number of IBs in a submission */ +#define HWSCHED_MAX_DISPATCH_NUMIBS \ + ((HFI_MAX_MSG_SIZE - offsetof(struct hfi_issue_cmd_cmd, ibs)) \ + / sizeof(struct hfi_issue_ib)) + +/** + * struct payload_section - Container of keys values + * + * There may be a variable number of payload sections appended + * to the context bad HFI message. Each payload section contains + * a variable number of key-value pairs, both key and value being + * single dword each. + */ +struct payload_section { + /** @type: Type of the payload */ + u16 type; + /** @dwords: Number of dwords in the data array. */ + u16 dwords; + /** @data: A sequence of key-value pairs. Each pair is 2 dwords. */ + u32 data[]; +} __packed; + +/* IDs for context bad hfi payloads */ +#define PAYLOAD_FAULT_REGS 1 +#define PAYLOAD_RB 2 +#define PAYLOAD_PREEMPT_TIMEOUT 3 + +/* Keys for PAYLOAD_FAULT_REGS type payload */ +#define KEY_CP_OPCODE_ERROR 1 +#define KEY_CP_PROTECTED_ERROR 2 +#define KEY_CP_HW_FAULT 3 +#define KEY_CP_BV_OPCODE_ERROR 4 +#define KEY_CP_BV_PROTECTED_ERROR 5 +#define KEY_CP_BV_HW_FAULT 6 + +/* Keys for PAYLOAD_RB type payload */ +#define KEY_RB_ID 1 +#define KEY_RB_RPTR 2 +#define KEY_RB_WPTR 3 +#define KEY_RB_SIZEDWORDS 4 +#define KEY_RB_QUEUED_TS 5 +#define KEY_RB_RETIRED_TS 6 +#define KEY_RB_GPUADDR_LO 7 +#define KEY_RB_GPUADDR_HI 8 + +/* Keys for PAYLOAD_PREEMPT_TIMEOUT type payload */ +#define KEY_PREEMPT_TIMEOUT_CUR_RB_ID 1 +#define KEY_PREEMPT_TIMEOUT_NEXT_RB_ID 2 + +/* Types of errors that trigger context bad HFI */ + +/* GPU encountered a CP HW error */ +#define GMU_CP_HW_ERROR 600 +/* GPU encountered a GPU Hang interrupt */ +#define GMU_GPU_HW_HANG 601 +/* Preemption didn't complete in given time */ +#define GMU_GPU_PREEMPT_TIMEOUT 602 +/* Fault due to Long IB timeout */ +#define GMU_GPU_SW_HANG 603 +/* GPU encountered a bad opcode */ +#define GMU_CP_OPCODE_ERROR 604 +/* GPU encountered protected mode error */ +#define GMU_CP_PROTECTED_ERROR 605 +/* GPU encountered an illegal instruction */ +#define GMU_CP_ILLEGAL_INST_ERROR 606 +/* GPU encountered a CP ucode error */ +#define GMU_CP_UCODE_ERROR 607 +/* GPU encountered a CP hw fault error */ +#define GMU_CP_HW_FAULT_ERROR 608 +/* GPU encountered a GPC error */ +#define GMU_CP_GPC_ERROR 609 +/* GPU BV encountered a bad opcode */ +#define GMU_CP_BV_OPCODE_ERROR 610 +/* GPU BV encountered protected mode error */ +#define GMU_CP_BV_PROTECTED_ERROR 611 +/* GPU BV encountered a CP hw fault error */ +#define GMU_CP_BV_HW_FAULT_ERROR 612 +/* GPU BV encountered a CP ucode error */ +#define GMU_CP_BV_UCODE_ERROR 613 +/* GPU BV encountered an illegal instruction */ +#define GMU_CP_BV_ILLEGAL_INST_ERROR 614 +/* GPU encountered an unknown CP error */ +#define GMU_CP_UNKNOWN_ERROR 700 + +/** + * hfi_update_read_idx - Update the read index of an hfi queue + * hdr: Pointer to the hfi queue header + * index: New read index + * + * This function makes sure that kgsl has consumed f2h packets + * before GMU sees the updated read index. This avoids a corner + * case where GMU might over-write f2h packets that have not yet + * been consumed by kgsl. + */ +static inline void hfi_update_read_idx(struct hfi_queue_header *hdr, u32 index) +{ + /* + * This is to make sure packets are consumed before gmu sees the updated + * read index + */ + mb(); + + hdr->read_index = index; +} + +/** + * hfi_update_write_idx - Update the write index of an hfi queue + * hdr: Pointer to the hfi queue header + * index: New write index + * + * This function makes sure that the h2f packets are written out + * to memory before GMU sees the updated write index. This avoids + * corner cases where GMU might fetch stale entries that can happen + * if write index is updated before new packets have been written + * out to memory. + */ +static inline void hfi_update_write_idx(struct hfi_queue_header *hdr, u32 index) +{ + /* + * This is to make sure packets are written out before gmu sees the + * updated write index + */ + wmb(); + + hdr->write_index = index; + + /* + * Memory barrier to make sure write index is written before an + * interrupt is raised + */ + wmb(); +} +#endif diff --git a/adreno_hwsched.c b/adreno_hwsched.c new file mode 100644 index 0000000000..84a21aec8c --- /dev/null +++ b/adreno_hwsched.c @@ -0,0 +1,1714 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_hfi.h" +#include "adreno_snapshot.h" +#include "adreno_sysfs.h" +#include "adreno_trace.h" +#include "kgsl_timeline.h" + +/* This structure represents inflight command object */ +struct cmd_list_obj { + /** @cmdobj: Handle to the command object */ + struct kgsl_drawobj_cmd *cmdobj; + /** @node: List node to put it in the list of inflight commands */ + struct list_head node; +}; + +/* + * Number of commands that can be queued in a context before it sleeps + * + * Our code that "puts back" a command from the context is much cleaner + * if we are sure that there will always be enough room in the ringbuffer + * so restrict the size of the context queue to ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1 + */ +static u32 _context_drawqueue_size = ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1; + +/* Number of milliseconds to wait for the context queue to clear */ +static unsigned int _context_queue_wait = 10000; + +/* + * GFT throttle parameters. If GFT recovered more than + * X times in Y ms invalidate the context and do not attempt recovery. + * X -> _fault_throttle_burst + * Y -> _fault_throttle_time + */ +static unsigned int _fault_throttle_time = 2000; +static unsigned int _fault_throttle_burst = 3; + +/* Use a kmem cache to speed up allocations for dispatcher jobs */ +static struct kmem_cache *jobs_cache; +/* Use a kmem cache to speed up allocations for inflight command objects */ +static struct kmem_cache *obj_cache; + +static bool _check_context_queue(struct adreno_context *drawctxt, u32 count) +{ + bool ret; + + spin_lock(&drawctxt->lock); + + /* + * Wake up if there is room in the context or if the whole thing got + * invalidated while we were asleep + */ + + if (kgsl_context_invalid(&drawctxt->base)) + ret = false; + else + ret = ((drawctxt->queued + count) < _context_drawqueue_size) ? 1 : 0; + + spin_unlock(&drawctxt->lock); + + return ret; +} + +static void _pop_drawobj(struct adreno_context *drawctxt) +{ + drawctxt->drawqueue_head = DRAWQUEUE_NEXT(drawctxt->drawqueue_head, + ADRENO_CONTEXT_DRAWQUEUE_SIZE); + drawctxt->queued--; +} + +static int _retire_syncobj(struct kgsl_drawobj_sync *syncobj, + struct adreno_context *drawctxt) +{ + if (!kgsl_drawobj_events_pending(syncobj)) { + _pop_drawobj(drawctxt); + kgsl_drawobj_destroy(DRAWOBJ(syncobj)); + return 0; + } + + /* + * If we got here, there are pending events for sync object. + * Start the canary timer if it hasnt been started already. + */ + if (!syncobj->timeout_jiffies) { + syncobj->timeout_jiffies = jiffies + msecs_to_jiffies(5000); + mod_timer(&syncobj->timer, syncobj->timeout_jiffies); + } + + return -EAGAIN; +} + +static bool _marker_expired(struct kgsl_drawobj_cmd *markerobj) +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(markerobj); + + return (drawobj->flags & KGSL_DRAWOBJ_MARKER) && + kgsl_check_timestamp(drawobj->device, drawobj->context, + markerobj->marker_timestamp); +} + +static void _retire_timestamp(struct kgsl_drawobj *drawobj) +{ + struct kgsl_context *context = drawobj->context; + struct kgsl_device *device = context->device; + + /* + * Write the start and end timestamp to the memstore to keep the + * accounting sane + */ + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp), + drawobj->timestamp); + + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp), + drawobj->timestamp); + + if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME) + atomic64_inc(&drawobj->context->proc_priv->frame_count); + + /* Retire pending GPU events for the object */ + kgsl_process_event_group(device, &context->events); + + kgsl_drawobj_destroy(drawobj); +} + +static int _retire_markerobj(struct kgsl_drawobj_cmd *cmdobj, + struct adreno_context *drawctxt) +{ + if (_marker_expired(cmdobj)) { + _pop_drawobj(drawctxt); + _retire_timestamp(DRAWOBJ(cmdobj)); + return 0; + } + + /* + * If the marker isn't expired but the SKIP bit + * is set then there are real commands following + * this one in the queue. This means that we + * need to dispatch the command so that we can + * keep the timestamp accounting correct. If + * skip isn't set then we block this queue + * until the dependent timestamp expires + */ + + return test_bit(CMDOBJ_SKIP, &cmdobj->priv) ? 1 : -EAGAIN; +} + +static int _retire_timelineobj(struct kgsl_drawobj *drawobj, + struct adreno_context *drawctxt) +{ + struct kgsl_drawobj_timeline *timelineobj = TIMELINEOBJ(drawobj); + int i; + + for (i = 0; i < timelineobj->count; i++) + kgsl_timeline_signal(timelineobj->timelines[i].timeline, + timelineobj->timelines[i].seqno); + + _pop_drawobj(drawctxt); + _retire_timestamp(drawobj); + + return 0; +} + +static int drawqueue_retire_bindobj(struct kgsl_drawobj *drawobj, + struct adreno_context *drawctxt) +{ + struct kgsl_drawobj_bind *bindobj = BINDOBJ(drawobj); + + if (test_bit(KGSL_BINDOBJ_STATE_DONE, &bindobj->state)) { + _pop_drawobj(drawctxt); + _retire_timestamp(drawobj); + return 0; + } + + if (!test_and_set_bit(KGSL_BINDOBJ_STATE_START, &bindobj->state)) { + /* + * Take a reference to the drawobj and the context because both + * get referenced in the bind callback + */ + _kgsl_context_get(&drawctxt->base); + kref_get(&drawobj->refcount); + + kgsl_sharedmem_bind_ranges(bindobj->bind); + } + + return -EAGAIN; +} + +/* + * Retires all expired marker and sync objs from the context + * queue and returns one of the below + * a) next drawobj that needs to be sent to ringbuffer + * b) -EAGAIN for syncobj with syncpoints pending. + * c) -EAGAIN for markerobj whose marker timestamp has not expired yet. + * c) NULL for no commands remaining in drawqueue. + */ +static struct kgsl_drawobj *_process_drawqueue_get_next_drawobj( + struct adreno_device *adreno_dev, struct adreno_context *drawctxt) +{ + struct kgsl_drawobj *drawobj; + unsigned int i = drawctxt->drawqueue_head; + struct kgsl_drawobj_cmd *cmdobj; + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + int ret = 0; + + if (drawctxt->drawqueue_head == drawctxt->drawqueue_tail) + return NULL; + + for (i = drawctxt->drawqueue_head; i != drawctxt->drawqueue_tail; + i = DRAWQUEUE_NEXT(i, ADRENO_CONTEXT_DRAWQUEUE_SIZE)) { + + drawobj = drawctxt->drawqueue[i]; + + if (!drawobj) + return NULL; + + switch (drawobj->type) { + case CMDOBJ_TYPE: + cmdobj = CMDOBJ(drawobj); + + /* We only support one big IB inflight */ + if ((cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS) && + hwsched->big_cmdobj) + return ERR_PTR(-ENOSPC); + + return drawobj; + case SYNCOBJ_TYPE: + ret = _retire_syncobj(SYNCOBJ(drawobj), drawctxt); + break; + case MARKEROBJ_TYPE: + ret = _retire_markerobj(CMDOBJ(drawobj), drawctxt); + /* Special case where marker needs to be sent to GPU */ + if (ret == 1) + return drawobj; + break; + case BINDOBJ_TYPE: + ret = drawqueue_retire_bindobj(drawobj, drawctxt); + break; + case TIMELINEOBJ_TYPE: + ret = _retire_timelineobj(drawobj, drawctxt); + break; + default: + ret = -EINVAL; + break; + } + + if (ret) + return ERR_PTR(ret); + } + + return NULL; +} + +/** + * hwsched_dispatcher_requeue_cmdobj() - Put a command back on the context + * queue + * @drawctxt: Pointer to the adreno draw context + * @cmdobj: Pointer to the KGSL command object to requeue + * + * Failure to submit a command to the ringbuffer isn't the fault of the command + * being submitted so if a failure happens, push it back on the head of the + * context queue to be reconsidered again unless the context got detached. + */ +static inline int hwsched_dispatcher_requeue_cmdobj( + struct adreno_context *drawctxt, + struct kgsl_drawobj_cmd *cmdobj) +{ + unsigned int prev; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + + spin_lock(&drawctxt->lock); + + if (kgsl_context_is_bad(&drawctxt->base)) { + spin_unlock(&drawctxt->lock); + /* get rid of this drawobj since the context is bad */ + kgsl_drawobj_destroy(drawobj); + return -ENOENT; + } + + prev = drawctxt->drawqueue_head == 0 ? + (ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1) : + (drawctxt->drawqueue_head - 1); + + /* + * The maximum queue size always needs to be one less then the size of + * the ringbuffer queue so there is "room" to put the drawobj back in + */ + + WARN_ON(prev == drawctxt->drawqueue_tail); + + drawctxt->drawqueue[prev] = drawobj; + drawctxt->queued++; + + /* Reset the command queue head to reflect the newly requeued change */ + drawctxt->drawqueue_head = prev; + spin_unlock(&drawctxt->lock); + return 0; +} + +/** + * hwsched_queue_context() - Queue a context in the dispatcher list of jobs + * @adreno_dev: Pointer to the adreno device structure + * @drawctxt: Pointer to the adreno draw context + * + * Add a context to the dispatcher list of jobs. + */ +static int hwsched_queue_context(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct adreno_dispatch_job *job; + + /* Refuse to queue a detached context */ + if (kgsl_context_detached(&drawctxt->base)) + return 0; + + if (!_kgsl_context_get(&drawctxt->base)) + return 0; + + job = kmem_cache_alloc(jobs_cache, GFP_ATOMIC); + if (!job) { + kgsl_context_put(&drawctxt->base); + return -ENOMEM; + } + + job->drawctxt = drawctxt; + + trace_dispatch_queue_context(drawctxt); + llist_add(&job->node, &hwsched->jobs[drawctxt->base.priority]); + + return 0; +} + +void adreno_hwsched_flush(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + + kthread_flush_worker(hwsched->worker); +} + +static bool hwsched_in_fault(struct adreno_hwsched *hwsched) +{ + /* make sure we're reading the latest value */ + smp_rmb(); + return atomic_read(&hwsched->fault) != 0; +} + +/** + * sendcmd() - Send a drawobj to the GPU hardware + * @dispatcher: Pointer to the adreno dispatcher struct + * @drawobj: Pointer to the KGSL drawobj being sent + * + * Send a KGSL drawobj to the GPU hardware + */ +static int hwsched_sendcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct kgsl_context *context = drawobj->context; + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + int ret; + struct cmd_list_obj *obj; + + obj = kmem_cache_alloc(obj_cache, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + mutex_lock(&device->mutex); + + if (adreno_gpu_halt(adreno_dev) || hwsched_in_fault(hwsched)) { + mutex_unlock(&device->mutex); + kmem_cache_free(obj_cache, obj); + return -EBUSY; + } + + + if (kgsl_context_detached(context)) { + mutex_unlock(&device->mutex); + kmem_cache_free(obj_cache, obj); + return -ENOENT; + } + + hwsched->inflight++; + + if (hwsched->inflight == 1 && + !test_bit(ADRENO_HWSCHED_POWER, &hwsched->flags)) { + ret = adreno_active_count_get(adreno_dev); + if (ret) { + hwsched->inflight--; + mutex_unlock(&device->mutex); + kmem_cache_free(obj_cache, obj); + return ret; + } + set_bit(ADRENO_HWSCHED_POWER, &hwsched->flags); + } + + ret = hwsched->hwsched_ops->submit_cmdobj(adreno_dev, cmdobj); + if (ret) { + /* + * If the first submission failed, then put back the active + * count to relinquish active vote + */ + if (hwsched->inflight == 1) { + adreno_active_count_put(adreno_dev); + clear_bit(ADRENO_HWSCHED_POWER, &hwsched->flags); + } + + hwsched->inflight--; + kmem_cache_free(obj_cache, obj); + mutex_unlock(&device->mutex); + return ret; + } + + if ((hwsched->inflight == 1) && + !test_and_set_bit(ADRENO_HWSCHED_ACTIVE, &hwsched->flags)) + reinit_completion(&hwsched->idle_gate); + + if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS) { + hwsched->big_cmdobj = cmdobj; + kref_get(&drawobj->refcount); + } + + drawctxt->internal_timestamp = drawobj->timestamp; + + obj->cmdobj = cmdobj; + list_add_tail(&obj->node, &hwsched->cmd_list); + mutex_unlock(&device->mutex); + + return 0; +} + +/** + * hwsched_sendcmds() - Send commands from a context to the GPU + * @adreno_dev: Pointer to the adreno device struct + * @drawctxt: Pointer to the adreno context to dispatch commands from + * + * Dequeue and send a burst of commands from the specified context to the GPU + * Returns postive if the context needs to be put back on the pending queue + * 0 if the context is empty or detached and negative on error + */ +static int hwsched_sendcmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + int count = 0; + int ret = 0; + unsigned int timestamp; + + while (1) { + struct kgsl_drawobj *drawobj; + struct kgsl_drawobj_cmd *cmdobj; + + spin_lock(&drawctxt->lock); + drawobj = _process_drawqueue_get_next_drawobj(adreno_dev, + drawctxt); + + /* + * adreno_context_get_drawobj returns -EAGAIN if the current + * drawobj has pending sync points so no more to do here. + * When the sync points are satisfied then the context will get + * reqeueued + */ + + if (IS_ERR_OR_NULL(drawobj)) { + if (IS_ERR(drawobj)) + ret = PTR_ERR(drawobj); + spin_unlock(&drawctxt->lock); + break; + } + _pop_drawobj(drawctxt); + spin_unlock(&drawctxt->lock); + + timestamp = drawobj->timestamp; + cmdobj = CMDOBJ(drawobj); + ret = hwsched_sendcmd(adreno_dev, cmdobj); + + /* + * On error from hwsched_sendcmd() try to requeue the cmdobj + * unless we got back -ENOENT which means that the context has + * been detached and there will be no more deliveries from here + */ + if (ret != 0) { + /* Destroy the cmdobj on -ENOENT */ + if (ret == -ENOENT) + kgsl_drawobj_destroy(drawobj); + else { + /* + * If we couldn't put it on dispatch queue + * then return it to the context queue + */ + int r = hwsched_dispatcher_requeue_cmdobj( + drawctxt, cmdobj); + if (r) + ret = r; + } + + break; + } + + drawctxt->submitted_timestamp = timestamp; + + count++; + } + + /* + * Wake up any snoozing threads if we have consumed any real commands + * or marker commands and we have room in the context queue. + */ + + if (_check_context_queue(drawctxt, 0)) + wake_up_all(&drawctxt->wq); + + if (!ret) + ret = count; + + /* Return error or the number of commands queued */ + return ret; +} + +static void hwsched_handle_jobs_list(struct adreno_device *adreno_dev, + int id, unsigned long *map, struct llist_node *list) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct adreno_dispatch_job *job, *next; + + if (!list) + return; + + /* Reverse the list so we deal with oldest submitted contexts first */ + list = llist_reverse_order(list); + + llist_for_each_entry_safe(job, next, list, node) { + int ret; + + if (kgsl_context_is_bad(&job->drawctxt->base)) { + kgsl_context_put(&job->drawctxt->base); + kmem_cache_free(jobs_cache, job); + continue; + } + + /* + * Due to the nature of the lockless queue the same context + * might have multiple jobs on the list. We allow this so we + * don't have to query the list on the producer side but on the + * consumer side we only want each context to be considered + * once. Use a bitmap to remember which contexts we've already + * seen and quietly discard duplicate jobs + */ + if (test_and_set_bit(job->drawctxt->base.id, map)) { + kgsl_context_put(&job->drawctxt->base); + kmem_cache_free(jobs_cache, job); + continue; + } + + ret = hwsched_sendcmds(adreno_dev, job->drawctxt); + + /* + * If the context had nothing queued or the context has been + * destroyed then drop the job + */ + if (!ret || ret == -ENOENT) { + kgsl_context_put(&job->drawctxt->base); + kmem_cache_free(jobs_cache, job); + continue; + } + + /* + * If the dispatch queue is full then requeue the job to be + * considered first next time. Otherwise the context + * either successfully submmitted to the GPU or another error + * happened and it should go back on the regular queue + */ + if (ret == -ENOSPC) + llist_add(&job->node, &hwsched->requeue[id]); + else + llist_add(&job->node, &hwsched->jobs[id]); + } +} + +static void hwsched_handle_jobs(struct adreno_device *adreno_dev, int id) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + unsigned long map[BITS_TO_LONGS(KGSL_MEMSTORE_MAX)]; + struct llist_node *requeue, *jobs; + + memset(map, 0, sizeof(map)); + + requeue = llist_del_all(&hwsched->requeue[id]); + jobs = llist_del_all(&hwsched->jobs[id]); + + hwsched_handle_jobs_list(adreno_dev, id, map, requeue); + hwsched_handle_jobs_list(adreno_dev, id, map, jobs); +} + +/** + * hwsched_issuecmds() - Issue commmands from pending contexts + * @adreno_dev: Pointer to the adreno device struct + * + * Issue as many commands as possible (up to inflight) from the pending contexts + * This function assumes the dispatcher mutex has been locked. + */ +static void hwsched_issuecmds(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + int i; + + for (i = 0; i < ARRAY_SIZE(hwsched->jobs); i++) + hwsched_handle_jobs(adreno_dev, i); +} + +void adreno_hwsched_trigger(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + + kthread_queue_work(hwsched->worker, &hwsched->work); +} + +/** + * adreno_hwsched_issuecmds() - Issue commmands from pending contexts + * @adreno_dev: Pointer to the adreno device struct + * + * Lock the dispatcher and call hwsched_issuecmds + */ +static void adreno_hwsched_issuecmds(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + + /* If the dispatcher is busy then schedule the work for later */ + if (!mutex_trylock(&hwsched->mutex)) { + adreno_hwsched_trigger(adreno_dev); + return; + } + + if (!hwsched_in_fault(hwsched)) + hwsched_issuecmds(adreno_dev); + + mutex_unlock(&hwsched->mutex); +} + +/** + * get_timestamp() - Return the next timestamp for the context + * @drawctxt - Pointer to an adreno draw context struct + * @drawobj - Pointer to a drawobj + * @timestamp - Pointer to a timestamp value possibly passed from the user + * @user_ts - user generated timestamp + * + * Assign a timestamp based on the settings of the draw context and the command + * batch. + */ +static int get_timestamp(struct adreno_context *drawctxt, + struct kgsl_drawobj *drawobj, unsigned int *timestamp, + unsigned int user_ts) +{ + + if (drawctxt->base.flags & KGSL_CONTEXT_USER_GENERATED_TS) { + /* + * User specified timestamps need to be greater than the last + * issued timestamp in the context + */ + if (timestamp_cmp(drawctxt->timestamp, user_ts) >= 0) + return -ERANGE; + + drawctxt->timestamp = user_ts; + } else + drawctxt->timestamp++; + + *timestamp = drawctxt->timestamp; + drawobj->timestamp = *timestamp; + return 0; +} + +static inline int _check_context_state(struct kgsl_context *context) +{ + if (kgsl_context_invalid(context)) + return -EDEADLK; + + if (kgsl_context_detached(context)) + return -ENOENT; + + return 0; +} + +static inline bool _verify_ib(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_memobj_node *ib) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_process_private *private = dev_priv->process_priv; + + /* The maximum allowable size for an IB in the CP is 0xFFFFF dwords */ + if (ib->size == 0 || ((ib->size >> 2) > 0xFFFFF)) { + pr_context(device, context, "ctxt %d invalid ib size %lld\n", + context->id, ib->size); + return false; + } + + /* Make sure that the address is mapped */ + if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr)) { + pr_context(device, context, "ctxt %d invalid ib gpuaddr %llX\n", + context->id, ib->gpuaddr); + return false; + } + + return true; +} + +static inline int _verify_cmdobj(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_drawobj *drawobj[], + uint32_t count) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_memobj_node *ib; + unsigned int i; + + for (i = 0; i < count; i++) { + /* Verify the IBs before they get queued */ + if (drawobj[i]->type == CMDOBJ_TYPE) { + struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj[i]); + + list_for_each_entry(ib, &cmdobj->cmdlist, node) + if (!_verify_ib(dev_priv, + &ADRENO_CONTEXT(context)->base, ib)) + return -EINVAL; + + /* + * Clear the wake on touch bit to indicate an IB has + * been submitted since the last time we set it. + * But only clear it when we have rendering commands. + */ + ADRENO_DEVICE(device)->wake_on_touch = false; + } + } + + return 0; +} + +static inline int _wait_for_room_in_context_queue( + struct adreno_context *drawctxt, u32 count) +{ + int ret = 0; + + /* + * There is always a possibility that dispatcher may end up pushing + * the last popped draw object back to the context drawqueue. Hence, + * we can only queue up to _context_drawqueue_size - 1 here to make + * sure we never let drawqueue->queued exceed _context_drawqueue_size. + */ + if ((drawctxt->queued + count) > (_context_drawqueue_size - 1)) { + trace_adreno_drawctxt_sleep(drawctxt); + spin_unlock(&drawctxt->lock); + + ret = wait_event_interruptible_timeout(drawctxt->wq, + _check_context_queue(drawctxt, count), + msecs_to_jiffies(_context_queue_wait)); + + spin_lock(&drawctxt->lock); + trace_adreno_drawctxt_wake(drawctxt); + + /* + * Account for the possibility that the context got invalidated + * while we were sleeping + */ + if (ret > 0) + ret = _check_context_state(&drawctxt->base); + else if (ret == 0) + ret = -ETIMEDOUT; + } + + return ret; +} + +static unsigned int _check_context_state_to_queue_cmds( + struct adreno_context *drawctxt, u32 count) +{ + int ret = _check_context_state(&drawctxt->base); + + if (ret) + return ret; + + return _wait_for_room_in_context_queue(drawctxt, count); +} + +static void _queue_drawobj(struct adreno_context *drawctxt, + struct kgsl_drawobj *drawobj) +{ + /* Put the command into the queue */ + drawctxt->drawqueue[drawctxt->drawqueue_tail] = drawobj; + drawctxt->drawqueue_tail = (drawctxt->drawqueue_tail + 1) % + ADRENO_CONTEXT_DRAWQUEUE_SIZE; + drawctxt->queued++; + trace_adreno_cmdbatch_queued(drawobj, drawctxt->queued); +} + +static int _queue_cmdobj(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct kgsl_drawobj_cmd *cmdobj, + uint32_t *timestamp, unsigned int user_ts) +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + u32 j; + int ret; + + ret = get_timestamp(drawctxt, drawobj, timestamp, user_ts); + if (ret) + return ret; + + /* + * If this is a real command then we need to force any markers + * queued before it to dispatch to keep time linear - set the + * skip bit so the commands get NOPed. + */ + j = drawctxt->drawqueue_head; + + while (j != drawctxt->drawqueue_tail) { + if (drawctxt->drawqueue[j]->type == MARKEROBJ_TYPE) { + struct kgsl_drawobj_cmd *markerobj = + CMDOBJ(drawctxt->drawqueue[j]); + + set_bit(CMDOBJ_SKIP, &markerobj->priv); + } + + j = DRAWQUEUE_NEXT(j, ADRENO_CONTEXT_DRAWQUEUE_SIZE); + } + + drawctxt->queued_timestamp = *timestamp; + + _queue_drawobj(drawctxt, drawobj); + + return 0; +} + +static void _queue_syncobj(struct adreno_context *drawctxt, + struct kgsl_drawobj_sync *syncobj, uint32_t *timestamp) +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj); + + *timestamp = 0; + drawobj->timestamp = 0; + + _queue_drawobj(drawctxt, drawobj); +} + +static int _queue_markerobj(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct kgsl_drawobj_cmd *markerobj, + u32 *timestamp, u32 user_ts) +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(markerobj); + int ret; + + ret = get_timestamp(drawctxt, drawobj, timestamp, user_ts); + if (ret) + return ret; + + /* + * See if we can fastpath this thing - if nothing is queued + * and nothing is inflight retire without bothering the GPU + */ + if (!drawctxt->queued && kgsl_check_timestamp(drawobj->device, + drawobj->context, drawctxt->queued_timestamp)) { + _retire_timestamp(drawobj); + return 1; + } + + /* + * Remember the last queued timestamp - the marker will block + * until that timestamp is expired (unless another command + * comes along and forces the marker to execute) + */ + markerobj->marker_timestamp = drawctxt->queued_timestamp; + drawctxt->queued_timestamp = *timestamp; + + _queue_drawobj(drawctxt, drawobj); + + return 0; +} + +static int _queue_auxobj(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct kgsl_drawobj *drawobj, + u32 *timestamp, u32 user_ts) +{ + int ret; + + ret = get_timestamp(drawctxt, drawobj, timestamp, user_ts); + if (ret) + return ret; + + drawctxt->queued_timestamp = *timestamp; + _queue_drawobj(drawctxt, drawobj); + + return 0; +} + +static int adreno_hwsched_queue_cmds(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_drawobj *drawobj[], + u32 count, u32 *timestamp) + +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct adreno_dispatch_job *job; + int ret; + unsigned int i, user_ts; + + /* + * There is always a possibility that dispatcher may end up pushing + * the last popped draw object back to the context drawqueue. Hence, + * we can only queue up to _context_drawqueue_size - 1 here to make + * sure we never let drawqueue->queued exceed _context_drawqueue_size. + */ + if (!count || count > _context_drawqueue_size - 1) + return -EINVAL; + + for (i = 0; i < count; i++) { + struct kgsl_drawobj_cmd *cmdobj; + struct kgsl_memobj_node *ib; + + if (drawobj[i]->type != CMDOBJ_TYPE) + continue; + + cmdobj = CMDOBJ(drawobj[i]); + + list_for_each_entry(ib, &cmdobj->cmdlist, node) + cmdobj->numibs++; + + if (cmdobj->numibs > HWSCHED_MAX_IBS) + return -EINVAL; + } + + ret = _check_context_state(&drawctxt->base); + if (ret) + return ret; + + ret = _verify_cmdobj(dev_priv, context, drawobj, count); + if (ret) + return ret; + + /* wait for the suspend gate */ + wait_for_completion(&device->halt_gate); + + job = kmem_cache_alloc(jobs_cache, GFP_KERNEL); + if (!job) + return -ENOMEM; + + job->drawctxt = drawctxt; + + spin_lock(&drawctxt->lock); + + ret = _check_context_state_to_queue_cmds(drawctxt, count); + if (ret) { + spin_unlock(&drawctxt->lock); + kmem_cache_free(jobs_cache, job); + return ret; + } + + user_ts = *timestamp; + + for (i = 0; i < count; i++) { + + switch (drawobj[i]->type) { + case MARKEROBJ_TYPE: + ret = _queue_markerobj(adreno_dev, drawctxt, + CMDOBJ(drawobj[i]), + timestamp, user_ts); + if (ret == 1) { + spin_unlock(&drawctxt->lock); + kmem_cache_free(jobs_cache, job); + return 0; + } else if (ret) { + spin_unlock(&drawctxt->lock); + kmem_cache_free(jobs_cache, job); + return ret; + } + break; + case CMDOBJ_TYPE: + ret = _queue_cmdobj(adreno_dev, drawctxt, + CMDOBJ(drawobj[i]), + timestamp, user_ts); + if (ret) { + spin_unlock(&drawctxt->lock); + kmem_cache_free(jobs_cache, job); + return ret; + } + break; + case SYNCOBJ_TYPE: + _queue_syncobj(drawctxt, SYNCOBJ(drawobj[i]), + timestamp); + break; + case BINDOBJ_TYPE: + case TIMELINEOBJ_TYPE: + ret = _queue_auxobj(adreno_dev, drawctxt, drawobj[i], + timestamp, user_ts); + if (ret) { + spin_unlock(&drawctxt->lock); + kmem_cache_free(jobs_cache, job); + return ret; + } + break; + default: + spin_unlock(&drawctxt->lock); + kmem_cache_free(jobs_cache, job); + return -EINVAL; + } + + } + + spin_unlock(&drawctxt->lock); + + /* Add the context to the dispatcher pending list */ + if (_kgsl_context_get(&drawctxt->base)) { + trace_dispatch_queue_context(drawctxt); + llist_add(&job->node, &hwsched->jobs[drawctxt->base.priority]); + adreno_hwsched_issuecmds(adreno_dev); + + } else + kmem_cache_free(jobs_cache, job); + + return 0; +} + +static void retire_cmdobj(struct adreno_hwsched *hwsched, + struct kgsl_drawobj_cmd *cmdobj) +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct kgsl_mem_entry *entry; + struct kgsl_drawobj_profiling_buffer *profile_buffer; + + if (cmdobj != NULL) { + if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME) + atomic64_inc(&drawobj->context->proc_priv->frame_count); + + entry = cmdobj->profiling_buf_entry; + if (entry) { + profile_buffer = kgsl_gpuaddr_to_vaddr(&entry->memdesc, + cmdobj->profiling_buffer_gpuaddr); + + if (profile_buffer == NULL) + return; + + kgsl_memdesc_unmap(&entry->memdesc); + } + } + + if (hwsched->big_cmdobj == cmdobj) { + hwsched->big_cmdobj = NULL; + kgsl_drawobj_put(drawobj); + } + + kgsl_drawobj_destroy(drawobj); +} + +static int retire_cmd_list(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int count = 0; + struct cmd_list_obj *obj, *tmp; + + list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) { + struct kgsl_drawobj_cmd *cmdobj = obj->cmdobj; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + + if (!kgsl_check_timestamp(device, drawobj->context, + drawobj->timestamp)) + continue; + + retire_cmdobj(hwsched, cmdobj); + + list_del_init(&obj->node); + + kmem_cache_free(obj_cache, obj); + + hwsched->inflight--; + + count++; + } + + return count; +} + +/* Take down the dispatcher and release any power states */ +static void hwsched_power_down(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + + mutex_lock(&device->mutex); + + if (test_and_clear_bit(ADRENO_HWSCHED_ACTIVE, &hwsched->flags)) + complete_all(&hwsched->idle_gate); + + if (test_bit(ADRENO_HWSCHED_POWER, &hwsched->flags)) { + adreno_active_count_put(adreno_dev); + clear_bit(ADRENO_HWSCHED_POWER, &hwsched->flags); + } + + mutex_unlock(&device->mutex); +} + +static void adreno_hwsched_queue_context(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + hwsched_queue_context(adreno_dev, drawctxt); + adreno_hwsched_trigger(adreno_dev); +} + +void adreno_hwsched_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + complete_all(&device->halt_gate); + + adreno_hwsched_trigger(adreno_dev); +} + +static int _skipsaverestore_store(struct adreno_device *adreno_dev, bool val) +{ + return adreno_power_cycle_bool(adreno_dev, + &adreno_dev->preempt.skipsaverestore, val); +} + +static bool _skipsaverestore_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->preempt.skipsaverestore; +} + +static int _usesgmem_store(struct adreno_device *adreno_dev, bool val) +{ + return adreno_power_cycle_bool(adreno_dev, + &adreno_dev->preempt.usesgmem, val); +} + +static bool _usesgmem_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->preempt.usesgmem; +} + +static int _preempt_level_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + return adreno_power_cycle_u32(adreno_dev, + &adreno_dev->preempt.preempt_level, + min_t(unsigned int, val, 2)); +} + +static unsigned int _preempt_level_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->preempt.preempt_level; +} + +static void change_preemption(struct adreno_device *adreno_dev, void *priv) +{ + change_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); +} + +static int _preemption_store(struct adreno_device *adreno_dev, bool val) +{ + if (!(ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) || + (test_bit(ADRENO_DEVICE_PREEMPTION, + &adreno_dev->priv) == val)) + return 0; + + return adreno_power_cycle(adreno_dev, change_preemption, NULL); +} + +static bool _preemption_show(struct adreno_device *adreno_dev) +{ + return adreno_is_preemption_enabled(adreno_dev); +} + +static unsigned int _preempt_count_show(struct adreno_device *adreno_dev) +{ + const struct adreno_hwsched_ops *hwsched_ops = + adreno_dev->hwsched.hwsched_ops; + + return hwsched_ops->preempt_count(adreno_dev); +} + +static int _ft_long_ib_detect_store(struct adreno_device *adreno_dev, bool val) +{ + return adreno_power_cycle_bool(adreno_dev, &adreno_dev->long_ib_detect, + val); +} + +static bool _ft_long_ib_detect_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->long_ib_detect; +} + +static ADRENO_SYSFS_BOOL(preemption); +static ADRENO_SYSFS_U32(preempt_level); +static ADRENO_SYSFS_BOOL(usesgmem); +static ADRENO_SYSFS_BOOL(skipsaverestore); +static ADRENO_SYSFS_RO_U32(preempt_count); +static ADRENO_SYSFS_BOOL(ft_long_ib_detect); + +static const struct attribute *_hwsched_attr_list[] = { + &adreno_attr_preemption.attr.attr, + &adreno_attr_preempt_level.attr.attr, + &adreno_attr_usesgmem.attr.attr, + &adreno_attr_skipsaverestore.attr.attr, + &adreno_attr_preempt_count.attr.attr, + &adreno_attr_ft_long_ib_detect.attr.attr, + NULL, +}; + +static void adreno_hwsched_dispatcher_close(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!IS_ERR_OR_NULL(hwsched->worker)) + kthread_destroy_worker(hwsched->worker); + + adreno_set_dispatch_ops(adreno_dev, NULL); + + kmem_cache_destroy(jobs_cache); + kmem_cache_destroy(obj_cache); + + sysfs_remove_files(&device->dev->kobj, _hwsched_attr_list); + + kfree(hwsched->ctxt_bad); +} + +static void force_retire_timestamp(struct kgsl_device *device, + struct kgsl_drawobj *drawobj) +{ + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(drawobj->context->id, soptimestamp), + drawobj->timestamp); + + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(drawobj->context->id, eoptimestamp), + drawobj->timestamp); +} + +static void adreno_hwsched_replay(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct cmd_list_obj *obj, *tmp; + u32 retired = 0; + + list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) { + struct kgsl_drawobj_cmd *cmdobj = obj->cmdobj; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct kgsl_context *context = drawobj->context; + + /* + * Get rid of retired objects or objects that belong to detached + * or invalidated contexts + */ + if ((kgsl_check_timestamp(device, context, drawobj->timestamp)) + || kgsl_context_is_bad(context)) { + + retire_cmdobj(hwsched, cmdobj); + retired++; + list_del_init(&obj->node); + kmem_cache_free(obj_cache, obj); + hwsched->inflight--; + + continue; + } + + hwsched->hwsched_ops->submit_cmdobj(adreno_dev, cmdobj); + } + + /* Signal fences */ + if (retired) + kgsl_process_event_groups(device); +} + +static void do_fault_header(struct adreno_device *adreno_dev, + struct kgsl_drawobj *drawobj) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + u32 status, rptr, wptr, ib1sz, ib2sz; + u64 ib1base, ib2base; + + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS, &status); + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr); + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr); + adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE, + ADRENO_REG_CP_IB1_BASE_HI, &ib1base); + adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &ib1sz); + adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB2_BASE, + ADRENO_REG_CP_IB2_BASE_HI, &ib2base); + adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ib2sz); + + drawobj->context->last_faulted_cmd_ts = drawobj->timestamp; + drawobj->context->total_fault_count++; + + pr_context(device, drawobj->context, + "ctx %d ctx_type %s ts %d status %8.8X dispatch_queue=%d rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + drawobj->context->id, kgsl_context_type(drawctxt->type), + drawobj->timestamp, status, + drawobj->context->gmu_dispatch_queue, rptr, wptr, + ib1base, ib1sz, ib2base, ib2sz); + + trace_adreno_gpu_fault(drawobj->context->id, drawobj->timestamp, status, + rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, + adreno_get_level(drawobj->context->priority)); +} + +static struct cmd_list_obj *get_active_cmdobj( + struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct cmd_list_obj *obj, *tmp, *active_obj = NULL; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 consumed = 0, retired = 0, prio = UINT_MAX; + struct kgsl_drawobj *drawobj = NULL; + + list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) { + drawobj = DRAWOBJ(obj->cmdobj); + + kgsl_readtimestamp(device, drawobj->context, + KGSL_TIMESTAMP_CONSUMED, &consumed); + kgsl_readtimestamp(device, drawobj->context, + KGSL_TIMESTAMP_RETIRED, &retired); + + if (!consumed) + continue; + + if (consumed == retired) + continue; + + /* Find the first submission that started but didn't finish */ + if (!active_obj) { + active_obj = obj; + prio = adreno_get_level(drawobj->context->priority); + continue; + } + + /* Find the highest priority active submission */ + if (adreno_get_level(drawobj->context->priority) < prio) { + active_obj = obj; + prio = adreno_get_level(drawobj->context->priority); + } + } + + if (active_obj) { + drawobj = DRAWOBJ(active_obj->cmdobj); + + if (kref_get_unless_zero(&drawobj->refcount)) { + set_bit(CMDOBJ_FAULT, &active_obj->cmdobj->priv); + return active_obj; + } + } + + return NULL; +} + +static struct cmd_list_obj *get_fault_cmdobj(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct cmd_list_obj *obj, *tmp; + struct hfi_context_bad_cmd *bad = hwsched->ctxt_bad; + + list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) { + struct kgsl_drawobj *drawobj = DRAWOBJ(obj->cmdobj); + + if ((bad->ctxt_id == drawobj->context->id) && + (bad->ts == drawobj->timestamp)) { + if (kref_get_unless_zero(&drawobj->refcount)) { + set_bit(CMDOBJ_FAULT, &obj->cmdobj->priv); + return obj; + } + } + } + + return NULL; +} + +static bool context_is_throttled(struct kgsl_device *device, + struct kgsl_context *context) +{ + if (ktime_ms_delta(ktime_get(), context->fault_time) > + _fault_throttle_time) { + context->fault_time = ktime_get(); + context->fault_count = 1; + return false; + } + + context->fault_count++; + + if (context->fault_count > _fault_throttle_burst) { + pr_context(device, context, + "gpu fault threshold exceeded %d faults in %d msecs\n", + _fault_throttle_burst, _fault_throttle_time); + return true; + } + + return false; +} +static void reset_and_snapshot(struct adreno_device *adreno_dev, int fault) +{ + struct kgsl_drawobj *drawobj = NULL; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_context *context = NULL; + struct cmd_list_obj *obj; + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + + if (device->state != KGSL_STATE_ACTIVE) + return; + + /* + * First, try to see if the faulted command object is marked + * in case there was a context bad hfi. But, with stall-on-fault, + * we know that GMU cannot send context bad hfi. Hence, attempt + * to walk the list of active submissions to find the one that + * faulted. + */ + obj = get_fault_cmdobj(adreno_dev); + if (!obj && (fault & ADRENO_IOMMU_PAGE_FAULT)) + obj = get_active_cmdobj(adreno_dev); + + if (!obj) { + kgsl_device_snapshot(device, NULL, false); + goto done; + } + + drawobj = DRAWOBJ(obj->cmdobj); + + context = drawobj->context; + + do_fault_header(adreno_dev, drawobj); + + kgsl_device_snapshot(device, context, false); + + force_retire_timestamp(device, drawobj); + + if ((context->flags & KGSL_CONTEXT_INVALIDATE_ON_FAULT) || + (context->flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE) || + (cmd->error == GMU_GPU_SW_HANG) || + context_is_throttled(device, context)) { + adreno_drawctxt_set_guilty(device, context); + } + + /* + * Put back the reference which we incremented while trying to find + * faulted command object + */ + kgsl_drawobj_put(drawobj); +done: + memset(adreno_dev->hwsched.ctxt_bad, 0x0, HFI_MAX_MSG_SIZE); + gpudev->reset(adreno_dev); +} + +static bool adreno_hwsched_do_fault(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + int fault; + + fault = atomic_xchg(&hwsched->fault, 0); + if (fault == 0) + return false; + + mutex_lock(&device->mutex); + + reset_and_snapshot(adreno_dev, fault); + + adreno_hwsched_replay(adreno_dev); + + adreno_hwsched_trigger(adreno_dev); + + mutex_unlock(&device->mutex); + + return true; +} + +static void adreno_hwsched_work(struct kthread_work *work) +{ + struct adreno_hwsched *hwsched = container_of(work, + struct adreno_hwsched, work); + struct adreno_device *adreno_dev = container_of(hwsched, + struct adreno_device, hwsched); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int count = 0; + + mutex_lock(&hwsched->mutex); + + if (adreno_hwsched_do_fault(adreno_dev)) { + mutex_unlock(&hwsched->mutex); + return; + } + + /* + * As long as there are inflight commands, process retired comamnds from + * all drawqueues + */ + count += retire_cmd_list(adreno_dev); + + /* Signal fences */ + kgsl_process_event_groups(device); + + /* Run the scheduler for to dispatch new commands */ + hwsched_issuecmds(adreno_dev); + + if (hwsched->inflight == 0) { + hwsched_power_down(adreno_dev); + } else { + mutex_lock(&device->mutex); + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + mutex_unlock(&device->mutex); + } + + mutex_unlock(&hwsched->mutex); +} + +void adreno_hwsched_fault(struct adreno_device *adreno_dev, + u32 fault) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + u32 curr = atomic_read(&hwsched->fault); + + atomic_set(&hwsched->fault, curr | fault); + + /* make sure fault is written before triggering dispatcher */ + smp_wmb(); + + adreno_hwsched_trigger(adreno_dev); +} + +static const struct adreno_dispatch_ops hwsched_ops = { + .close = adreno_hwsched_dispatcher_close, + .queue_cmds = adreno_hwsched_queue_cmds, + .queue_context = adreno_hwsched_queue_context, + .fault = adreno_hwsched_fault, + .idle = adreno_hwsched_idle, +}; + +int adreno_hwsched_init(struct adreno_device *adreno_dev, + const struct adreno_hwsched_ops *target_hwsched_ops) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + int i; + + memset(hwsched, 0, sizeof(*hwsched)); + + hwsched->ctxt_bad = kzalloc(HFI_MAX_MSG_SIZE, GFP_KERNEL); + if (!hwsched->ctxt_bad) + return -ENOMEM; + + hwsched->worker = kthread_create_worker(0, "kgsl_hwsched"); + if (IS_ERR(hwsched->worker)) { + kfree(hwsched->ctxt_bad); + return PTR_ERR(hwsched->worker); + } + + mutex_init(&hwsched->mutex); + + kthread_init_work(&hwsched->work, adreno_hwsched_work); + + jobs_cache = KMEM_CACHE(adreno_dispatch_job, 0); + obj_cache = KMEM_CACHE(cmd_list_obj, 0); + + INIT_LIST_HEAD(&hwsched->cmd_list); + + for (i = 0; i < ARRAY_SIZE(hwsched->jobs); i++) { + init_llist_head(&hwsched->jobs[i]); + init_llist_head(&hwsched->requeue[i]); + } + + sched_set_fifo(hwsched->worker->task); + + sysfs_create_files(&device->dev->kobj, _hwsched_attr_list); + adreno_set_dispatch_ops(adreno_dev, &hwsched_ops); + hwsched->hwsched_ops = target_hwsched_ops; + init_completion(&hwsched->idle_gate); + complete_all(&hwsched->idle_gate); + return 0; +} + +void adreno_hwsched_parse_fault_cmdobj(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct cmd_list_obj *obj, *tmp; + + list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) { + struct kgsl_drawobj_cmd *cmdobj = obj->cmdobj; + + if (test_bit(CMDOBJ_FAULT, &cmdobj->priv)) { + struct kgsl_memobj_node *ib; + + list_for_each_entry(ib, &cmdobj->cmdlist, node) { + adreno_parse_ib(KGSL_DEVICE(adreno_dev), + snapshot, snapshot->process, + ib->gpuaddr, ib->size >> 2); + } + clear_bit(CMDOBJ_FAULT, &cmdobj->priv); + } + } +} + +static int unregister_context(int id, void *ptr, void *data) +{ + struct kgsl_context *context = ptr; + + /* + * We don't need to send the unregister hfi packet because + * we are anyway going to lose the gmu state of registered + * contexts. So just reset the flag so that the context + * registers with gmu on its first submission post slumber. + */ + context->gmu_registered = false; + + return 0; +} + +void adreno_hwsched_unregister_contexts(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + read_lock(&device->context_lock); + idr_for_each(&device->context_idr, unregister_context, NULL); + read_unlock(&device->context_lock); +} + +static int hwsched_idle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + int ret; + + /* Block any new submissions from being submitted */ + adreno_get_gpu_halt(adreno_dev); + + mutex_unlock(&device->mutex); + + /* + * Flush the worker to make sure all executing + * or pending dispatcher works on worker are + * finished + */ + adreno_hwsched_flush(adreno_dev); + + ret = wait_for_completion_timeout(&hwsched->idle_gate, + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT)); + if (ret == 0) { + ret = -ETIMEDOUT; + WARN(1, "hwsched halt timeout\n"); + } else if (ret < 0) { + dev_err(device->dev, "hwsched halt failed %d\n", ret); + } else { + ret = 0; + } + + mutex_lock(&device->mutex); + + /* + * This will allow the dispatcher to start submitting to + * hardware once device mutex is released + */ + adreno_put_gpu_halt(adreno_dev); + + /* + * Requeue dispatcher work to resubmit pending commands + * that may have been blocked due to this idling request + */ + adreno_hwsched_trigger(adreno_dev); + return ret; +} + +int adreno_hwsched_idle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + unsigned long wait = jiffies + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int ret; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EDEADLK; + + if (!kgsl_state_is_awake(device)) + return 0; + + ret = hwsched_idle(adreno_dev); + if (ret) + return ret; + + do { + if (hwsched_in_fault(hwsched)) + return -EDEADLK; + + if (gpudev->hw_isidle(adreno_dev)) + return 0; + } while (time_before(jiffies, wait)); + + /* + * Under rare conditions, preemption can cause the while loop to exit + * without checking if the gpu is idle. check one last time before we + * return failure. + */ + if (hwsched_in_fault(hwsched)) + return -EDEADLK; + + if (gpudev->hw_isidle(adreno_dev)) + return 0; + + return -ETIMEDOUT; +} diff --git a/adreno_hwsched.h b/adreno_hwsched.h new file mode 100644 index 0000000000..4ae34fad48 --- /dev/null +++ b/adreno_hwsched.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _ADRENO_HWSCHED_H_ +#define _ADRENO_HWSCHED_H_ + +/** + * struct adreno_hwsched_ops - Function table to hook hwscheduler things + * to target specific routines + */ +struct adreno_hwsched_ops { + /** + * @submit_cmdobj - Target specific function to submit IBs to hardware + */ + int (*submit_cmdobj)(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj); + /** + * @preempt_count - Target specific function to get preemption count + */ + u32 (*preempt_count)(struct adreno_device *adreno_dev); +}; + +/** + * struct adreno_hwsched - Container for the hardware scheduler + */ +struct adreno_hwsched { + /** @mutex: Mutex needed to run dispatcher function */ + struct mutex mutex; + /** @flags: Container for the dispatcher internal flags */ + unsigned long flags; + /** @inflight: Number of active submissions to the dispatch queues */ + u32 inflight; + /** @jobs - Array of dispatch job lists for each priority level */ + struct llist_head jobs[16]; + /** @requeue - Array of lists for dispatch jobs that got requeued */ + struct llist_head requeue[16]; + /** @work: The work structure to execute dispatcher function */ + struct kthread_work work; + /** @cmd_list: List of objects submitted to dispatch queues */ + struct list_head cmd_list; + /** @fault: Atomic to record a fault */ + atomic_t fault; + struct kthread_worker *worker; + /** @hwsched_ops: Container for target specific hwscheduler ops */ + const struct adreno_hwsched_ops *hwsched_ops; + /** @ctxt_bad: Container for the context bad hfi packet */ + void *ctxt_bad; + /** @idle_gate: Gate to wait on for hwscheduler to idle */ + struct completion idle_gate; + /** @big_cmdobj = Points to the big IB that is inflight */ + struct kgsl_drawobj_cmd *big_cmdobj; +}; + +/* + * This value is based on maximum number of IBs that can fit + * in the ringbuffer. + */ +#define HWSCHED_MAX_IBS 2000 + +enum adreno_hwsched_flags { + ADRENO_HWSCHED_POWER = 0, + ADRENO_HWSCHED_ACTIVE, +}; + +/** + * adreno_hwsched_trigger - Function to schedule the hwsched thread + * @adreno_dev: A handle to adreno device + * + * Schedule the hw dispatcher for retiring and submitting command objects + */ +void adreno_hwsched_trigger(struct adreno_device *adreno_dev); + +/** + * adreno_hwsched_start() - activate the hwsched dispatcher + * @adreno_dev: pointer to the adreno device + * + * Enable dispatcher thread to execute + */ +void adreno_hwsched_start(struct adreno_device *adreno_dev); +/** + * adreno_hwsched_dispatcher_init() - Initialize the hwsched dispatcher + * @adreno_dev: pointer to the adreno device + * @hwsched_ops: Pointer to target specific hwsched ops + * + * Set up the dispatcher resources. + * Return: 0 on success or negative on failure. + */ +int adreno_hwsched_init(struct adreno_device *adreno_dev, + const struct adreno_hwsched_ops *hwsched_ops); + +/** + * adreno_hwsched_fault - Set hwsched fault to request recovery + * @adreno_dev: A handle to adreno device + * @fault: The type of fault + */ +void adreno_hwsched_fault(struct adreno_device *adreno_dev, u32 fault); + +/** + * adreno_hwsched_parse_fault_ib - Parse the faulty submission + * @adreno_dev: pointer to the adreno device + * @snapshot: Pointer to the snapshot structure + * + * Walk the list of active submissions to find the one that faulted and + * parse it so that relevant command buffers can be added to the snapshot + */ +void adreno_hwsched_parse_fault_cmdobj(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + +void adreno_hwsched_flush(struct adreno_device *adreno_dev); + +/** + * adreno_hwsched_unregister_contexts - Reset context gmu_registered bit + * @adreno_dev: pointer to the adreno device + * + * Walk the list of contexts and reset the gmu_registered for all + * contexts + */ +void adreno_hwsched_unregister_contexts(struct adreno_device *adreno_dev); + +/** + * adreno_hwsched_idle - Wait for dispatcher and hardware to become idle + * @adreno_dev: A handle to adreno device + * + * Return: 0 on success or negative error on failure + */ +int adreno_hwsched_idle(struct adreno_device *adreno_dev); +#endif diff --git a/adreno_ioctl.c b/adreno_ioctl.c new file mode 100644 index 0000000000..2b69d7934d --- /dev/null +++ b/adreno_ioctl.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ + +#include + +#include "adreno.h" +#include "adreno_a5xx.h" + +/* + * Add a perfcounter to the per-fd list. + * Call with the device mutex held + */ +static int adreno_process_perfcounter_add(struct kgsl_device_private *dev_priv, + unsigned int groupid, unsigned int countable) +{ + struct adreno_device_private *adreno_priv = container_of(dev_priv, + struct adreno_device_private, dev_priv); + struct adreno_perfcounter_list_node *perfctr; + + perfctr = kmalloc(sizeof(*perfctr), GFP_KERNEL); + if (!perfctr) + return -ENOMEM; + + perfctr->groupid = groupid; + perfctr->countable = countable; + + /* add the pair to process perfcounter list */ + list_add(&perfctr->node, &adreno_priv->perfcounter_list); + return 0; +} + +/* + * Remove a perfcounter from the per-fd list. + * Call with the device mutex held + */ +static int adreno_process_perfcounter_del(struct kgsl_device_private *dev_priv, + unsigned int groupid, unsigned int countable) +{ + struct adreno_device_private *adreno_priv = container_of(dev_priv, + struct adreno_device_private, dev_priv); + struct adreno_perfcounter_list_node *p; + + list_for_each_entry(p, &adreno_priv->perfcounter_list, node) { + if (p->groupid == groupid && p->countable == countable) { + list_del(&p->node); + kfree(p); + return 0; + } + } + return -ENODEV; +} + +long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_perfcounter_get *get = data; + int result; + + mutex_lock(&device->mutex); + + /* + * adreno_perfcounter_get() is called by kernel clients + * during start(), so it is not safe to take an + * active count inside that function. + */ + + result = adreno_perfcntr_active_oob_get(adreno_dev); + if (result) { + mutex_unlock(&device->mutex); + return (long)result; + } + + result = adreno_perfcounter_get(adreno_dev, + get->groupid, get->countable, &get->offset, + &get->offset_hi, PERFCOUNTER_FLAG_NONE); + + /* Add the perfcounter into the list */ + if (!result) { + result = adreno_process_perfcounter_add(dev_priv, get->groupid, + get->countable); + if (result) + adreno_perfcounter_put(adreno_dev, get->groupid, + get->countable, PERFCOUNTER_FLAG_NONE); + } + + adreno_perfcntr_active_oob_put(adreno_dev); + + mutex_unlock(&device->mutex); + + return (long) result; +} + +long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_perfcounter_put *put = data; + int result; + + mutex_lock(&device->mutex); + + /* Delete the perfcounter from the process list */ + result = adreno_process_perfcounter_del(dev_priv, put->groupid, + put->countable); + + /* Put the perfcounter refcount */ + if (!result) + adreno_perfcounter_put(adreno_dev, put->groupid, + put->countable, PERFCOUNTER_FLAG_NONE); + mutex_unlock(&device->mutex); + + return (long) result; +} + +static long adreno_ioctl_perfcounter_query(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device); + struct kgsl_perfcounter_query *query = data; + + return (long) adreno_perfcounter_query_group(adreno_dev, query->groupid, + query->countables, query->count, &query->max_counters); +} + +static long adreno_ioctl_perfcounter_read(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device); + struct kgsl_perfcounter_read *read = data; + + return (long) adreno_perfcounter_read_group(adreno_dev, read->reads, + read->count); +} + +static long adreno_ioctl_preemption_counters_query( + struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device); + struct kgsl_preemption_counters_query *read = data; + int size_level = A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE; + int levels_to_copy; + + if (!adreno_is_a5xx(adreno_dev) || + !adreno_is_preemption_enabled(adreno_dev)) + return -EOPNOTSUPP; + + if (read->size_user < size_level) + return -EINVAL; + + /* Calculate number of preemption counter levels to copy to userspace */ + levels_to_copy = (read->size_user / size_level); + + levels_to_copy = min_t(int, levels_to_copy, + ARRAY_SIZE(adreno_dev->ringbuffers)); + + if (copy_to_user(u64_to_user_ptr(read->counters), + adreno_dev->preempt.scratch->hostptr, + levels_to_copy * size_level)) + return -EFAULT; + + read->max_priority_level = levels_to_copy; + read->size_priority_level = size_level; + + return 0; +} + +long adreno_ioctl_helper(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg, + const struct kgsl_ioctl *cmds, int len) +{ + unsigned char data[128] = { 0 }; + long ret; + int i; + + for (i = 0; i < len; i++) { + if (_IOC_NR(cmd) == _IOC_NR(cmds[i].cmd)) + break; + } + + if (i == len) + return -ENOIOCTLCMD; + + if (_IOC_SIZE(cmds[i].cmd > sizeof(data))) { + dev_err_ratelimited(dev_priv->device->dev, + "data too big for ioctl 0x%08x: %d/%zu\n", + cmd, _IOC_SIZE(cmds[i].cmd), sizeof(data)); + return -EINVAL; + } + + if (_IOC_SIZE(cmds[i].cmd)) { + ret = kgsl_ioctl_copy_in(cmds[i].cmd, cmd, arg, data); + + if (ret) + return ret; + } else { + memset(data, 0, sizeof(data)); + } + + ret = cmds[i].func(dev_priv, cmd, data); + + if (ret == 0 && _IOC_SIZE(cmds[i].cmd)) + ret = kgsl_ioctl_copy_out(cmds[i].cmd, cmd, arg, data); + + return ret; +} + +static struct kgsl_ioctl adreno_ioctl_funcs[] = { + { IOCTL_KGSL_PERFCOUNTER_GET, adreno_ioctl_perfcounter_get }, + { IOCTL_KGSL_PERFCOUNTER_PUT, adreno_ioctl_perfcounter_put }, + { IOCTL_KGSL_PERFCOUNTER_QUERY, adreno_ioctl_perfcounter_query }, + { IOCTL_KGSL_PERFCOUNTER_READ, adreno_ioctl_perfcounter_read }, + { IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY, + adreno_ioctl_preemption_counters_query }, +}; + +long adreno_ioctl(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg) +{ + return adreno_ioctl_helper(dev_priv, cmd, arg, + adreno_ioctl_funcs, ARRAY_SIZE(adreno_ioctl_funcs)); +} diff --git a/adreno_perfcounter.c b/adreno_perfcounter.c new file mode 100644 index 0000000000..c9260e9d6e --- /dev/null +++ b/adreno_perfcounter.c @@ -0,0 +1,580 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved. + */ + +#include + +#include "adreno.h" +#include "adreno_perfcounter.h" + +static inline int active_countable(unsigned int countable) +{ + return ((countable != KGSL_PERFCOUNTER_NOT_USED) && + (countable != KGSL_PERFCOUNTER_BROKEN)); +} + +/** + * adreno_perfcounter_restore() - Restore performance counters + * @adreno_dev: adreno device to configure + * + * Load the physical performance counters with 64 bit value which are + * saved on GPU power collapse. + */ +void adreno_perfcounter_restore(struct adreno_device *adreno_dev) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + unsigned int counter, groupid; + + if (counters == NULL) + return; + + for (groupid = 0; groupid < counters->group_count; groupid++) { + group = &(counters->groups[groupid]); + + if (!group->load) + continue; + + /* Restore the counters for the group */ + for (counter = 0; counter < group->reg_count; counter++) { + /* If not active or broken, skip this counter */ + if (!active_countable(group->regs[counter].countable)) + continue; + + group->load(adreno_dev, &group->regs[counter]); + } + } +} + +/** + * adreno_perfcounter_save() - Save performance counters + * @adreno_dev: adreno device to configure + * + * Save the performance counter values before GPU power collapse. + * The saved values are restored on restart. + * This ensures physical counters are coherent across power-collapse. + * This function must be called with the oob_gpu set request. + */ +inline void adreno_perfcounter_save(struct adreno_device *adreno_dev) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + unsigned int counter, groupid; + + if (counters == NULL) + return; + + for (groupid = 0; groupid < counters->group_count; groupid++) { + group = &(counters->groups[groupid]); + + /* Save the counter values for the group */ + for (counter = 0; counter < group->reg_count; counter++) { + /* If not active or broken, skip this counter */ + if (!active_countable(group->regs[counter].countable)) + continue; + + /* accumulate values for non-loadable counters */ + if (group->regs[counter].load_bit >= 0) + group->regs[counter].value = 0; + + group->regs[counter].value = + group->regs[counter].value + + adreno_perfcounter_read(adreno_dev, groupid, + counter); + } + } +} + +static int adreno_perfcounter_enable(struct adreno_device *adreno_dev, + unsigned int group, unsigned int counter, unsigned int countable); + +/** + * adreno_perfcounter_start: Enable performance counters + * @adreno_dev: Adreno device to configure + * + * Ensure all performance counters are enabled that are allocated. Since + * the device was most likely stopped, we can't trust that the counters + * are still valid so make it so. + */ + +void adreno_perfcounter_start(struct adreno_device *adreno_dev) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + unsigned int i, j; + + if (counters == NULL) + return; + /* group id iter */ + for (i = 0; i < counters->group_count; i++) { + group = &(counters->groups[i]); + + /* countable iter */ + for (j = 0; j < group->reg_count; j++) { + if (!active_countable(group->regs[j].countable)) + continue; + + /* + * The GPU has to be idle before calling the perfcounter + * enable function, but since this function is called + * during start we already know the GPU is idle. + * Since the countable/counter pairs have already been + * validated, there is no way for _enable() to fail so + * no need to check the return code. + */ + adreno_perfcounter_enable(adreno_dev, i, j, + group->regs[j].countable); + } + } +} + +/** + * adreno_perfcounter_read_group() - Determine which countables are in counters + * @adreno_dev: Adreno device to configure + * @reads: List of kgsl_perfcounter_read_groups + * @count: Length of list + * + * Read the performance counters for the groupid/countable pairs and return + * the 64 bit result for each pair + */ + +int adreno_perfcounter_read_group(struct adreno_device *adreno_dev, + struct kgsl_perfcounter_read_group __user *reads, unsigned int count) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + struct kgsl_perfcounter_read_group *list = NULL; + unsigned int i, j; + int ret = 0; + + if (counters == NULL) + return -EINVAL; + + /* sanity check params passed in */ + if (reads == NULL || count == 0 || count > 100) + return -EINVAL; + + list = kmalloc_array(count, sizeof(struct kgsl_perfcounter_read_group), + GFP_KERNEL); + if (!list) + return -ENOMEM; + + if (copy_from_user(list, reads, + sizeof(struct kgsl_perfcounter_read_group) * count)) { + ret = -EFAULT; + goto done; + } + + mutex_lock(&device->mutex); + + ret = adreno_perfcntr_active_oob_get(adreno_dev); + if (ret) { + mutex_unlock(&device->mutex); + goto done; + } + + /* list iterator */ + for (j = 0; j < count; j++) { + + list[j].value = 0; + + /* Verify that the group ID is within range */ + if (list[j].groupid >= counters->group_count) { + ret = -EINVAL; + break; + } + + group = &(counters->groups[list[j].groupid]); + + /* group/counter iterator */ + for (i = 0; i < group->reg_count; i++) { + if (group->regs[i].countable == list[j].countable) { + list[j].value = adreno_perfcounter_read( + adreno_dev, list[j].groupid, i); + break; + } + } + } + + adreno_perfcntr_active_oob_put(adreno_dev); + + mutex_unlock(&device->mutex); + + /* write the data */ + if (ret == 0) + if (copy_to_user(reads, list, + sizeof(struct kgsl_perfcounter_read_group) * count)) + ret = -EFAULT; + +done: + kfree(list); + return ret; +} + +/** + * adreno_perfcounter_get_groupid() - Get the performance counter ID + * @adreno_dev: Adreno device + * @name: Performance counter group name string + * + * Get the groupid based on the name and return this ID + */ + +int adreno_perfcounter_get_groupid(struct adreno_device *adreno_dev, + const char *name) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + int i; + + if (name == NULL || counters == NULL) + return -EINVAL; + + for (i = 0; i < counters->group_count; ++i) { + group = &(counters->groups[i]); + + /* make sure there is a name for this group */ + if (group->name == NULL) + continue; + + /* verify name and length */ + if (strlen(name) == strlen(group->name) && + strcmp(group->name, name) == 0) + return i; + } + + return -EINVAL; +} + +/** + * adreno_perfcounter_get_name() - Get the group name + * @adreno_dev: Adreno device + * @groupid: Desired performance counter groupid + * + * Get the name based on the groupid and return it + */ + +const char *adreno_perfcounter_get_name(struct adreno_device *adreno_dev, + unsigned int groupid) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + + if (counters != NULL && groupid < counters->group_count) + return counters->groups[groupid].name; + + return NULL; +} + +/** + * adreno_perfcounter_query_group: Determine which countables are in counters + * @adreno_dev: Adreno device to configure + * @groupid: Desired performance counter group + * @countables: Return list of all countables in the groups counters + * @count: Max length of the array + * @max_counters: max counters for the groupid + * + * Query the current state of counters for the group. + */ + +int adreno_perfcounter_query_group(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int __user *countables, + unsigned int count, unsigned int *max_counters) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + unsigned int i, t; + int ret = 0; + unsigned int *buf; + + *max_counters = 0; + + if (counters == NULL || groupid >= counters->group_count) + return -EINVAL; + + mutex_lock(&device->mutex); + + group = &(counters->groups[groupid]); + *max_counters = group->reg_count; + + /* + * if NULL countable or *count of zero, return max reg_count in + * *max_counters and return success + */ + if (countables == NULL || count == 0) { + mutex_unlock(&device->mutex); + return 0; + } + + t = min_t(unsigned int, group->reg_count, count); + + buf = kmalloc_array(t, sizeof(unsigned int), GFP_KERNEL); + if (buf == NULL) { + mutex_unlock(&device->mutex); + return -ENOMEM; + } + + for (i = 0; i < t; i++) + buf[i] = group->regs[i].countable; + + mutex_unlock(&device->mutex); + + if (copy_to_user(countables, buf, sizeof(unsigned int) * t)) + ret = -EFAULT; + + kfree(buf); + + return ret; +} + +static inline void refcount_group(const struct adreno_perfcount_group *group, + unsigned int reg, unsigned int flags, + unsigned int *lo, unsigned int *hi) +{ + if (flags & PERFCOUNTER_FLAG_KERNEL) + group->regs[reg].kernelcount++; + else + group->regs[reg].usercount++; + + if (lo) + *lo = group->regs[reg].offset; + + if (hi) + *hi = group->regs[reg].offset_hi; +} + +/** + * adreno_perfcounter_get: Try to put a countable in an available counter + * @adreno_dev: Adreno device to configure + * @groupid: Desired performance counter group + * @countable: Countable desired to be in a counter + * @offset: Return offset of the LO counter assigned + * @offset_hi: Return offset of the HI counter assigned + * @flags: Used to setup kernel perf counters + * + * Try to place a countable in an available counter. If the countable is + * already in a counter, reference count the counter/countable pair resource + * and return success + */ + +int adreno_perfcounter_get(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable, unsigned int *offset, + unsigned int *offset_hi, unsigned int flags) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + unsigned int empty = -1; + int ret = 0; + + /* always clear return variables */ + if (offset) + *offset = 0; + if (offset_hi) + *offset_hi = 0; + + if (counters == NULL) + return -EINVAL; + + if (groupid >= counters->group_count) + return -EINVAL; + + group = &(counters->groups[groupid]); + + if (group->flags & ADRENO_PERFCOUNTER_GROUP_FIXED) { + /* + * In fixed groups the countable equals the fixed register the + * user wants. First make sure it is in range + */ + + if (countable >= group->reg_count) + return -EINVAL; + + /* If it is already reserved, just increase the refcounts */ + if ((group->regs[countable].kernelcount != 0) || + (group->regs[countable].usercount != 0)) { + refcount_group(group, countable, flags, + offset, offset_hi); + return 0; + } + + empty = countable; + } else { + unsigned int i; + + /* + * Check if the countable is already associated with a counter. + * Refcount and return the offset, otherwise, try and find an + * empty counter and assign the countable to it. + */ + + for (i = 0; i < group->reg_count; i++) { + if (group->regs[i].countable == countable) { + refcount_group(group, i, flags, + offset, offset_hi); + return 0; + } else if (group->regs[i].countable == + KGSL_PERFCOUNTER_NOT_USED) { + /* keep track of unused counter */ + empty = i; + } + } + } + + /* no available counters, so do nothing else */ + if (empty == -1) + return -EBUSY; + + /* initialize the new counter */ + group->regs[empty].countable = countable; + + /* enable the new counter */ + ret = adreno_perfcounter_enable(adreno_dev, groupid, empty, countable); + if (ret) { + /* Put back the perfcounter */ + if (!(group->flags & ADRENO_PERFCOUNTER_GROUP_FIXED)) + group->regs[empty].countable = + KGSL_PERFCOUNTER_NOT_USED; + return ret; + } + + /* set initial kernel and user count */ + if (flags & PERFCOUNTER_FLAG_KERNEL) { + group->regs[empty].kernelcount = 1; + group->regs[empty].usercount = 0; + } else { + group->regs[empty].kernelcount = 0; + group->regs[empty].usercount = 1; + } + + if (offset) + *offset = group->regs[empty].offset; + if (offset_hi) + *offset_hi = group->regs[empty].offset_hi; + + return ret; +} + + +/** + * adreno_perfcounter_put: Release a countable from counter resource + * @adreno_dev: Adreno device to configure + * @groupid: Desired performance counter group + * @countable: Countable desired to be freed from a counter + * @flags: Flag to determine if kernel or user space request + * + * Put a performance counter/countable pair that was previously received. If + * noone else is using the countable, free up the counter for others. + */ +int adreno_perfcounter_put(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable, unsigned int flags) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + unsigned int i; + + if (counters == NULL || groupid >= counters->group_count) + return -EINVAL; + + group = &(counters->groups[groupid]); + + /* + * Find if the counter/countable pair is used currently. + * Start cycling through registers in the bank. + */ + for (i = 0; i < group->reg_count; i++) { + /* check if countable assigned is what we are looking for */ + if (group->regs[i].countable == countable) { + /* found pair, book keep count based on request type */ + if (flags & PERFCOUNTER_FLAG_KERNEL && + group->regs[i].kernelcount > 0) + group->regs[i].kernelcount--; + else if (group->regs[i].usercount > 0) + group->regs[i].usercount--; + else + break; + + /* mark available if not used anymore */ + if (group->regs[i].kernelcount == 0 && + group->regs[i].usercount == 0) + group->regs[i].countable = + KGSL_PERFCOUNTER_NOT_USED; + + return 0; + } + } + + return -EINVAL; +} + +/** + * adreno_perfcounter_enable - Configure a performance counter for a countable + * @adreno_dev - Adreno device to configure + * @group - Desired performance counter group + * @counter - Desired performance counter in the group + * @countable - Desired countable + * + * Function is used for adreno cores + * Physically set up a counter within a group with the desired countable + * Return 0 on success else error code + */ +static int adreno_perfcounter_enable(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int counter, unsigned int countable) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + + if (counters == NULL) + return -EINVAL; + + if (groupid >= counters->group_count) + return -EINVAL; + + group = &counters->groups[groupid]; + + if (counter >= group->reg_count) + return -EINVAL; + + return group->enable(adreno_dev, group, counter, countable); +} + +/** + * adreno_perfcounter_read() - Reads a performance counter + * @adreno_dev: The device on which the counter is running + * @group: The group of the counter + * @counter: The counter within the group + * + * Function is used to read the counter of adreno devices + * Returns the 64 bit counter value on success else 0. + */ +uint64_t adreno_perfcounter_read(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int counter) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + + /* Lets hope this doesn't fail. Now subfunctions don't need to check */ + if (counters == NULL) + return 0; + + if (groupid >= counters->group_count) + return 0; + + group = &counters->groups[groupid]; + + if (counter >= group->reg_count) + return 0; + + return group->read(adreno_dev, group, counter); +} diff --git a/adreno_perfcounter.h b/adreno_perfcounter.h new file mode 100644 index 0000000000..85006b0174 --- /dev/null +++ b/adreno_perfcounter.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2008-2015,2017,2019-2021 The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_PERFCOUNTER_H +#define __ADRENO_PERFCOUNTER_H + +struct adreno_device; + +/* ADRENO_PERFCOUNTERS - Given an adreno device, return the perfcounters list */ +#define ADRENO_PERFCOUNTERS(_a) ((_a)->gpucore->perfcounters) + +#define PERFCOUNTER_FLAG_NONE 0x0 +#define PERFCOUNTER_FLAG_KERNEL 0x1 + +/* Structs to maintain the list of active performance counters */ + +/** + * struct adreno_perfcount_register: register state + * @countable: countable the register holds + * @kernelcount: number of user space users of the register + * @usercount: number of kernel users of the register + * @offset: register hardware offset + * @load_bit: The bit number in LOAD register which corresponds to this counter + * @select: The countable register offset + * @value: The 64 bit countable register value + */ +struct adreno_perfcount_register { + unsigned int countable; + unsigned int kernelcount; + unsigned int usercount; + unsigned int offset; + unsigned int offset_hi; + int load_bit; + unsigned int select; + uint64_t value; +}; + +/** + * struct adreno_perfcount_group: registers for a hardware group + * @regs: available registers for this group + * @reg_count: total registers for this group + * @name: group name for this group + */ +struct adreno_perfcount_group { + struct adreno_perfcount_register *regs; + unsigned int reg_count; + const char *name; + unsigned long flags; + int (*enable)(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable); + u64 (*read)(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter); + void (*load)(struct adreno_device *adreno_dev, + struct adreno_perfcount_register *reg); +}; + +/* + * ADRENO_PERFCOUNTER_GROUP_FIXED indicates that a perfcounter group is fixed - + * instead of having configurable countables like the other groups, registers in + * fixed groups have a hardwired countable. So when the user requests a + * countable in one of these groups, that countable should be used as the + * register offset to return + */ + +#define ADRENO_PERFCOUNTER_GROUP_FIXED BIT(0) + +/* + * ADRENO_PERFCOUNTER_GROUP_RESTORE indicates CP needs to restore the select + * registers of this perfcounter group as part of preemption and IFPC + */ +#define ADRENO_PERFCOUNTER_GROUP_RESTORE BIT(1) + + +/** + * adreno_perfcounts: all available perfcounter groups + * @groups: available groups for this device + * @group_count: total groups for this device + */ +struct adreno_perfcounters { + const struct adreno_perfcount_group *groups; + unsigned int group_count; +}; + +#define ADRENO_PERFCOUNTER_GROUP_FLAGS(core, offset, name, flags, \ + enable, read, load) \ + [KGSL_PERFCOUNTER_GROUP_##offset] = { core##_perfcounters_##name, \ + ARRAY_SIZE(core##_perfcounters_##name), __stringify(name), flags, \ + enable, read, load } + +#define ADRENO_PERFCOUNTER_GROUP(core, offset, name, enable, read, load) \ + ADRENO_PERFCOUNTER_GROUP_FLAGS(core, offset, name, 0, enable, read, \ + load) + +int adreno_perfcounter_query_group(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int __user *countables, + unsigned int count, unsigned int *max_counters); + +int adreno_perfcounter_read_group(struct adreno_device *adreno_dev, + struct kgsl_perfcounter_read_group __user *reads, unsigned int count); + +void adreno_perfcounter_restore(struct adreno_device *adreno_dev); + +void adreno_perfcounter_save(struct adreno_device *adreno_dev); + +void adreno_perfcounter_start(struct adreno_device *adreno_dev); + +int adreno_perfcounter_get_groupid(struct adreno_device *adreno_dev, + const char *name); + +uint64_t adreno_perfcounter_read(struct adreno_device *adreno_dev, + unsigned int group, unsigned int counter); + +const char *adreno_perfcounter_get_name(struct adreno_device + *adreno_dev, unsigned int groupid); + +int adreno_perfcounter_get(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable, unsigned int *offset, + unsigned int *offset_hi, unsigned int flags); + +int adreno_perfcounter_put(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable, unsigned int flags); + +static inline int adreno_perfcounter_kernel_get( + struct adreno_device *adreno_dev, + int group, int countable, u32 *lo, u32 *hi) +{ + if (*lo) + return 0; + + return adreno_perfcounter_get(adreno_dev, group, countable, lo, hi, + PERFCOUNTER_FLAG_KERNEL); +} + +#endif /* __ADRENO_PERFCOUNTER_H */ diff --git a/adreno_pm4types.h b/adreno_pm4types.h new file mode 100644 index 0000000000..1d5ab43fa9 --- /dev/null +++ b/adreno_pm4types.h @@ -0,0 +1,404 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_PM4TYPES_H +#define __ADRENO_PM4TYPES_H + +#include "adreno.h" + +#define CP_TYPE0_PKT (0 << 30) +#define CP_TYPE3_PKT (3 << 30) +#define CP_TYPE4_PKT (4 << 28) +#define CP_TYPE7_PKT (7 << 28) + +#define PM4_TYPE4_PKT_SIZE_MAX 128 + +/* type3 packets */ + +/* Enable preemption flag */ +#define CP_PREEMPT_ENABLE 0x1C +/* Preemption token command on which preemption occurs */ +#define CP_PREEMPT_TOKEN 0x1E +/* Bit to set in CP_PREEMPT_TOKEN ordinal for interrupt on preemption */ +#define CP_PREEMPT_ORDINAL_INTERRUPT 24 + +/* Wait for memory writes to complete */ +#define CP_WAIT_MEM_WRITES 0x12 + +/* initialize CP's micro-engine */ +#define CP_ME_INIT 0x48 + +/* skip N 32-bit words to get to the next packet */ +#define CP_NOP 0x10 + +/* indirect buffer dispatch. same as IB, but init is pipelined */ +#define CP_INDIRECT_BUFFER_PFD 0x37 + +/* wait for the IDLE state of the engine */ +#define CP_WAIT_FOR_IDLE 0x26 + +/* wait until a register or memory location is a specific value */ +#define CP_WAIT_REG_MEM 0x3c + +/* wait until a register location is equal to a specific value */ +#define CP_WAIT_REG_EQ 0x52 + +/* switches SMMU pagetable, used on a5xx only */ +#define CP_SMMU_TABLE_UPDATE 0x53 + +/* Set internal CP registers, used to indicate context save data addresses */ +#define CP_SET_PSEUDO_REGISTER 0x56 + +/* Tell CP the current operation mode, indicates save and restore procedure */ +#define CP_SET_MARKER 0x65 + +/* register read/modify/write */ +#define CP_REG_RMW 0x21 + +/* Set binning configuration registers */ +#define CP_SET_BIN_DATA 0x2f + +/* reads register in chip and writes to memory */ +#define CP_REG_TO_MEM 0x3e + +/* write N 32-bit words to memory */ +#define CP_MEM_WRITE 0x3d + +/* conditional execution of a sequence of packets */ +#define CP_COND_EXEC 0x44 + +/* conditional write to memory or register */ +#define CP_COND_WRITE 0x45 + +/* generate an event that creates a write to memory when completed */ +#define CP_EVENT_WRITE 0x46 + +/* initiate fetch of index buffer and draw */ +#define CP_DRAW_INDX 0x22 + +/* New draw packets defined for A4XX */ +#define CP_DRAW_INDX_OFFSET 0x38 +#define CP_DRAW_INDIRECT 0x28 +#define CP_DRAW_INDX_INDIRECT 0x29 +#define CP_DRAW_AUTO 0x24 + +/* load constant into chip and to memory */ +#define CP_SET_CONSTANT 0x2d + +/* selective invalidation of state pointers */ +#define CP_INVALIDATE_STATE 0x3b + +/* generate interrupt from the command stream */ +#define CP_INTERRUPT 0x40 + +/* A5XX Enable yield in RB only */ +#define CP_YIELD_ENABLE 0x1C + +#define CP_WHERE_AM_I 0x62 + +/* Enable/Disable/Defer A5x global preemption model */ +#define CP_PREEMPT_ENABLE_GLOBAL 0x69 + +/* Enable/Disable A5x local preemption model */ +#define CP_PREEMPT_ENABLE_LOCAL 0x6A + +/* Yeild token on a5xx similar to CP_PREEMPT on a4xx */ +#define CP_CONTEXT_SWITCH_YIELD 0x6B + +/* Inform CP about current render mode (needed for a5xx preemption) */ +#define CP_SET_RENDER_MODE 0x6C + +/* Write register, ignoring context state for context sensitive registers */ +#define CP_REG_WR_NO_CTXT 0x78 + +/* + * for A4xx + * Write to register with address that does not fit into type-0 pkt + */ +#define CP_WIDE_REG_WRITE 0x74 + + +/* PFP waits until the FIFO between the PFP and the ME is empty */ +#define CP_WAIT_FOR_ME 0x13 + +/* Stall the SQE until the CP processing pipeline is empty */ +#define CP_WAIT_FOR_CP_FLUSH 0x13 + +#define CP_SET_PROTECTED_MODE 0x5f /* sets the register protection mode */ + +/* Used to switch GPU between secure and non-secure modes */ +#define CP_SET_SECURE_MODE 0x66 + +#define CP_BOOTSTRAP_UCODE 0x6f /* bootstraps microcode */ + +/* + * for a3xx + */ + +#define CP_LOAD_STATE 0x30 /* load high level sequencer command */ + +/* Conditionally load a IB based on a flag */ +#define CP_COND_INDIRECT_BUFFER_PFE 0x3A /* prefetch enabled */ +#define CP_COND_INDIRECT_BUFFER_PFD 0x32 /* prefetch disabled */ + +/* Load a buffer with pre-fetch enabled */ +#define CP_INDIRECT_BUFFER_PFE 0x3F + +#define CP_EXEC_CL 0x31 + +/* (A4x) save PM4 stream pointers to execute upon a visible draw */ +#define CP_SET_DRAW_STATE 0x43 + +#define CP_LOADSTATE_DSTOFFSET_SHIFT 0x00000000 +#define CP_LOADSTATE_STATESRC_SHIFT 0x00000010 +#define CP_LOADSTATE_STATEBLOCKID_SHIFT 0x00000013 +#define CP_LOADSTATE_NUMOFUNITS_SHIFT 0x00000016 +#define CP_LOADSTATE_STATETYPE_SHIFT 0x00000000 +#define CP_LOADSTATE_EXTSRCADDR_SHIFT 0x00000002 + +/* This is a commonly used CP_EVENT_WRITE */ +#define CACHE_FLUSH_TS 4 +#define CACHE_CLEAN 0x31 + +/* Controls which threads execute the PM4 commands the follow this packet */ +#define CP_THREAD_CONTROL 0x17 + +#define CP_SET_THREAD_BR FIELD_PREP(GENMASK(1, 0), 1) +#define CP_SET_THREAD_BOTH FIELD_PREP(GENMASK(1, 0), 3) +#define CP_SYNC_THREADS BIT(31) +#define CP_CONCURRENT_BIN_DISABLE BIT(27) + +#define CP_RESET_CONTEXT_STATE 0x1F + +#define CP_CLEAR_BV_BR_COUNTER BIT(2) +#define CP_CLEAR_RESOURCE_TABLE BIT(1) +#define CP_CLEAR_ON_CHIP_TS BIT(0) + +static inline uint pm4_calc_odd_parity_bit(uint val) +{ + return (0x9669 >> (0xf & ((val) ^ + ((val) >> 4) ^ ((val) >> 8) ^ ((val) >> 12) ^ + ((val) >> 16) ^ ((val) >> 20) ^ ((val) >> 24) ^ + ((val) >> 28)))) & 1; +} + +/* + * PM4 packet header functions + * For all the packet functions the passed in count should be the size of the + * payload excluding the header + */ +static inline uint cp_type0_packet(uint regindx, uint cnt) +{ + return CP_TYPE0_PKT | ((cnt-1) << 16) | ((regindx) & 0x7FFF); +} + +static inline uint cp_type3_packet(uint opcode, uint cnt) +{ + return CP_TYPE3_PKT | ((cnt-1) << 16) | (((opcode) & 0xFF) << 8); +} + +static inline uint cp_type4_packet(uint opcode, uint cnt) +{ + return CP_TYPE4_PKT | ((cnt) << 0) | + (pm4_calc_odd_parity_bit(cnt) << 7) | + (((opcode) & 0x3FFFF) << 8) | + ((pm4_calc_odd_parity_bit(opcode) << 27)); +} + +static inline uint cp_type7_packet(uint opcode, uint cnt) +{ + return CP_TYPE7_PKT | ((cnt) << 0) | + (pm4_calc_odd_parity_bit(cnt) << 15) | + (((opcode) & 0x7F) << 16) | + ((pm4_calc_odd_parity_bit(opcode) << 23)); + +} + +#define pkt_is_type0(pkt) (((pkt) & 0XC0000000) == CP_TYPE0_PKT) + +#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) +#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF) + +/* + * Check both for the type3 opcode and make sure that the reserved bits [1:7] + * and 15 are 0 + */ + +#define pkt_is_type3(pkt) \ + ((((pkt) & 0xC0000000) == CP_TYPE3_PKT) && \ + (((pkt) & 0x80FE) == 0)) + +#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF) +#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) + +#define pkt_is_type4(pkt) \ + ((((pkt) & 0xF0000000) == CP_TYPE4_PKT) && \ + ((((pkt) >> 27) & 0x1) == \ + pm4_calc_odd_parity_bit(cp_type4_base_index_one_reg_wr(pkt))) \ + && ((((pkt) >> 7) & 0x1) == \ + pm4_calc_odd_parity_bit(type4_pkt_size(pkt)))) + +#define cp_type4_base_index_one_reg_wr(pkt) (((pkt) >> 8) & 0x7FFFF) +#define type4_pkt_size(pkt) ((pkt) & 0x7F) + +#define pkt_is_type7(pkt) \ + ((((pkt) & 0xF0000000) == CP_TYPE7_PKT) && \ + (((pkt) & 0x0F000000) == 0) && \ + ((((pkt) >> 23) & 0x1) == \ + pm4_calc_odd_parity_bit(cp_type7_opcode(pkt))) \ + && ((((pkt) >> 15) & 0x1) == \ + pm4_calc_odd_parity_bit(type7_pkt_size(pkt)))) + +#define cp_type7_opcode(pkt) (((pkt) >> 16) & 0x7F) +#define type7_pkt_size(pkt) ((pkt) & 0x3FFF) + +/* dword base address of the GFX decode space */ +#define SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000))) + +/* gmem command buffer length */ +#define CP_REG(reg) ((0x4 << 16) | (SUBBLOCK_OFFSET(reg))) + +/* Return true if the hardware uses the legacy (A4XX and older) PM4 format */ +#define ADRENO_LEGACY_PM4(_d) (ADRENO_GPUREV(_d) < 500) + +/** + * cp_packet - Generic CP packet to support different opcodes on + * different GPU cores. + * @adreno_dev: The adreno device + * @opcode: Operation for cp packet + * @size: size for cp packet + */ +static inline uint cp_packet(struct adreno_device *adreno_dev, + int opcode, uint size) +{ + if (ADRENO_LEGACY_PM4(adreno_dev)) + return cp_type3_packet(opcode, size); + + return cp_type7_packet(opcode, size); +} + +/** + * cp_mem_packet - Generic CP memory packet to support different + * opcodes on different GPU cores. + * @adreno_dev: The adreno device + * @opcode: mem operation for cp packet + * @size: size for cp packet + * @num_mem: num of mem access + */ +static inline uint cp_mem_packet(struct adreno_device *adreno_dev, + int opcode, uint size, uint num_mem) +{ + if (ADRENO_LEGACY_PM4(adreno_dev)) + return cp_type3_packet(opcode, size); + + return cp_type7_packet(opcode, size + num_mem); +} + +/* Return 1 if the command is an indirect buffer of any kind */ +static inline int adreno_cmd_is_ib(struct adreno_device *adreno_dev, + unsigned int cmd) +{ + return cmd == cp_mem_packet(adreno_dev, + CP_INDIRECT_BUFFER_PFE, 2, 1) || + cmd == cp_mem_packet(adreno_dev, + CP_INDIRECT_BUFFER_PFD, 2, 1) || + cmd == cp_mem_packet(adreno_dev, + CP_COND_INDIRECT_BUFFER_PFE, 2, 1) || + cmd == cp_mem_packet(adreno_dev, + CP_COND_INDIRECT_BUFFER_PFD, 2, 1); +} + +/** + * cp_gpuaddr - Generic function to add 64bit and 32bit gpuaddr + * to pm4 commands + * @adreno_dev: The adreno device + * @cmds: command pointer to add gpuaddr + * @gpuaddr: gpuaddr to add + */ +static inline uint cp_gpuaddr(struct adreno_device *adreno_dev, + uint *cmds, uint64_t gpuaddr) +{ + uint *start = cmds; + + if (ADRENO_LEGACY_PM4(adreno_dev)) + *cmds++ = (uint)gpuaddr; + else { + *cmds++ = lower_32_bits(gpuaddr); + *cmds++ = upper_32_bits(gpuaddr); + } + return cmds - start; +} + +/** + * cp_register - Generic function for gpu register operation + * @adreno_dev: The adreno device + * @reg: GPU register + * @size: count for PM4 operation + */ +static inline uint cp_register(struct adreno_device *adreno_dev, + unsigned int reg, unsigned int size) +{ + if (ADRENO_LEGACY_PM4(adreno_dev)) + return cp_type0_packet(reg, size); + + return cp_type4_packet(reg, size); +} + +/** + * cp_wait_for_me - common function for WAIT_FOR_ME + * @adreno_dev: The adreno device + * @cmds: command pointer to add gpuaddr + */ +static inline uint cp_wait_for_me(struct adreno_device *adreno_dev, + uint *cmds) +{ + uint *start = cmds; + + if (ADRENO_LEGACY_PM4(adreno_dev)) { + *cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1); + *cmds++ = 0; + } else + *cmds++ = cp_type7_packet(CP_WAIT_FOR_ME, 0); + + return cmds - start; +} + +/** + * cp_wait_for_idle - common function for WAIT_FOR_IDLE + * @adreno_dev: The adreno device + * @cmds: command pointer to add gpuaddr + */ +static inline uint cp_wait_for_idle(struct adreno_device *adreno_dev, + uint *cmds) +{ + uint *start = cmds; + + if (ADRENO_LEGACY_PM4(adreno_dev)) { + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0; + } else + *cmds++ = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + + return cmds - start; +} + +static inline u32 cp_protected_mode(struct adreno_device *adreno_dev, + u32 *cmds, int on) +{ + cmds[0] = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1); + cmds[1] = on; + + return 2; +} + +static inline u32 cp_identifier(struct adreno_device *adreno_dev, + u32 *cmds, u32 id) +{ + cmds[0] = cp_packet(adreno_dev, CP_NOP, 1); + cmds[1] = id; + + return 2; +} + +#endif /* __ADRENO_PM4TYPES_H */ diff --git a/adreno_profile.c b/adreno_profile.c new file mode 100644 index 0000000000..925d9e844d --- /dev/null +++ b/adreno_profile.c @@ -0,0 +1,1130 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2013-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include + +#include "adreno.h" +#include "adreno_hwsched.h" +#include "adreno_profile.h" +#include "adreno_pm4types.h" +#include "adreno_ringbuffer.h" + +#define ASSIGNS_STR_FORMAT "%.8s:%u " + +/* + * Raw Data for processing later: + * : 3 - timestamp, count, context id + * [per counter] - data for each counter + * : 1 - Register offset + * : 2 - Pre IB register hi/lo value + * : 2 - Post IB register hi/lo value + * [per counter end] + */ +#define SIZE_DATA(cnt) (6 + (cnt) * 5) + +/* + * Pre-IB command size (in dwords): + * : 2 - NOP start identifier + * : 4 - timestamp + * : 4 - count + * : 4 - context id + * : 4 - pid + * : 4 - tid + * : 4 - type + * [loop count start] - for each counter to watch + * : 4 - Register offset + * : 4 - Register read lo + * : 4 - Register read high + * [loop end] + * : 2 - NOP end identifier + */ +#define SIZE_PREIB(cnt) (28 + (cnt) * 12) + +/* + * Post-IB command size (in dwords): + * : 2 - NOP start identifier + * [loop count start] - for each counter to watch + * : 4 - Register read lo + * : 4 - Register read high + * [loop end] + * : 2 - NOP end identifier + */ +#define SIZE_POSTIB(cnt) (4 + (cnt) * 8) + +/* Counter data + Pre size + post size = total size */ +#define SIZE_SHARED_ENTRY(cnt) (SIZE_DATA(cnt) + SIZE_PREIB(cnt) \ + + SIZE_POSTIB(cnt)) + +/* + * Space for following string :"%u %u %u %.5s %u " + * [count iterations]: "%.8s:%u %llu %llu%c" + */ +#define SIZE_PIPE_ENTRY(cnt) (50 + (cnt) * 62) +#define SIZE_LOG_ENTRY(cnt) (6 + (cnt) * 5) + +static inline uint _ib_cmd_mem_write(struct adreno_device *adreno_dev, + uint *cmds, uint64_t gpuaddr, uint val, uint *off) +{ + unsigned int *start = cmds; + + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); + *cmds++ = val; + + *off += sizeof(unsigned int); + return cmds - start; +} + +static inline uint _ib_cmd_reg_to_mem(struct adreno_device *adreno_dev, + uint *cmds, uint64_t gpuaddr, uint val, uint *off) +{ + unsigned int *start = cmds; + + *cmds++ = cp_mem_packet(adreno_dev, CP_REG_TO_MEM, 2, 1); + *cmds++ = val; + cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); + + *off += sizeof(unsigned int); + return cmds - start; +} + +static u64 _build_pre_ib_cmds(struct adreno_device *adreno_dev, + struct adreno_profile *profile, + unsigned int head, unsigned int timestamp, + struct adreno_context *drawctxt, + u32 *dwords) +{ + struct adreno_profile_assigns_list *entry; + unsigned int *start, *ibcmds; + unsigned int count = profile->assignment_count; + uint64_t gpuaddr = profile->shared_buffer->gpuaddr; + unsigned int ib_offset = head + SIZE_DATA(count); + unsigned int data_offset = head * sizeof(unsigned int); + + ibcmds = ib_offset + ((unsigned int *) profile->shared_buffer->hostptr); + start = ibcmds; + + ibcmds += cp_identifier(adreno_dev, ibcmds, START_PROFILE_IDENTIFIER); + + /* + * Write ringbuffer commands to save the following to memory: + * timestamp, count, context_id, pid, tid, context type + */ + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + timestamp, &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + profile->assignment_count, &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + drawctxt->base.id, &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + pid_nr(drawctxt->base.proc_priv->pid), &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + drawctxt->base.tid, &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + drawctxt->type, &data_offset); + + /* loop for each countable assigned */ + list_for_each_entry(entry, &profile->assignments_list, list) { + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset, + &data_offset); + ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset, + &data_offset); + ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset_hi, + &data_offset); + + /* skip over post_ib counter data */ + data_offset += sizeof(unsigned int) * 2; + } + + + ibcmds += cp_identifier(adreno_dev, ibcmds, END_PROFILE_IDENTIFIER); + + *dwords = (ibcmds - start); + return profile->shared_buffer->gpuaddr + (ib_offset * sizeof(u32)); +} + +static u64 _build_post_ib_cmds(struct adreno_device *adreno_dev, + struct adreno_profile *profile, unsigned int head, + u32 *dwords) +{ + struct adreno_profile_assigns_list *entry; + unsigned int *start, *ibcmds; + unsigned int count = profile->assignment_count; + uint64_t gpuaddr = profile->shared_buffer->gpuaddr; + unsigned int ib_offset = head + SIZE_DATA(count) + SIZE_PREIB(count); + unsigned int data_offset = head * sizeof(unsigned int); + + ibcmds = ib_offset + ((unsigned int *) profile->shared_buffer->hostptr); + start = ibcmds; + + /* start of profile identifier */ + ibcmds += cp_identifier(adreno_dev, ibcmds, START_PROFILE_IDENTIFIER); + + /* skip over pre_ib preamble */ + data_offset += sizeof(unsigned int) * 6; + + /* loop for each countable assigned */ + list_for_each_entry(entry, &profile->assignments_list, list) { + /* skip over pre_ib counter data */ + data_offset += sizeof(unsigned int) * 3; + ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset, + &data_offset); + ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset_hi, + &data_offset); + } + + /* end of profile identifier */ + ibcmds += cp_identifier(adreno_dev, ibcmds, END_PROFILE_IDENTIFIER); + + *dwords = (ibcmds - start); + return profile->shared_buffer->gpuaddr + (ib_offset * sizeof(u32)); +} + +static bool shared_buf_empty(struct adreno_profile *profile) +{ + if (profile->shared_buffer->hostptr == NULL || + profile->shared_buffer->size == 0) + return true; + + if (profile->shared_head == profile->shared_tail) + return true; + + return false; +} + +static inline void shared_buf_inc(unsigned int max_size, + unsigned int *offset, size_t inc) +{ + *offset = (*offset + inc) % max_size; +} + +static inline void log_buf_wrapcnt(unsigned int cnt, uintptr_t *off) +{ + *off = (*off + cnt) % ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS; +} + +static inline void log_buf_wrapinc_len(unsigned int *profile_log_buffer, + unsigned int **ptr, unsigned int len) +{ + *ptr += len; + if (*ptr >= (profile_log_buffer + + ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS)) + *ptr -= ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS; +} + +static inline void log_buf_wrapinc(unsigned int *profile_log_buffer, + unsigned int **ptr) +{ + log_buf_wrapinc_len(profile_log_buffer, ptr, 1); +} + +static inline unsigned int log_buf_available(struct adreno_profile *profile, + unsigned int *head_ptr) +{ + uintptr_t tail, head; + + tail = (uintptr_t) profile->log_tail - + (uintptr_t) profile->log_buffer; + head = (uintptr_t)head_ptr - (uintptr_t) profile->log_buffer; + if (tail > head) + return (tail - head) / sizeof(uintptr_t); + else + return ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS - ((head - tail) / + sizeof(uintptr_t)); +} + +static inline unsigned int shared_buf_available(struct adreno_profile *profile) +{ + if (profile->shared_tail > profile->shared_head) + return profile->shared_tail - profile->shared_head; + else + return profile->shared_size - + (profile->shared_head - profile->shared_tail); +} + +static struct adreno_profile_assigns_list *_find_assignment_by_offset( + struct adreno_profile *profile, unsigned int offset) +{ + struct adreno_profile_assigns_list *entry; + + list_for_each_entry(entry, &profile->assignments_list, list) { + if (entry->offset == offset) + return entry; + } + + return NULL; +} + +static bool _in_assignments_list(struct adreno_profile *profile, + unsigned int groupid, unsigned int countable) +{ + struct adreno_profile_assigns_list *entry; + + list_for_each_entry(entry, &profile->assignments_list, list) { + if (entry->groupid == groupid && entry->countable == + countable) + return true; + } + + return false; +} + +static bool _add_to_assignments_list(struct adreno_profile *profile, + const char *str, unsigned int groupid, unsigned int countable, + unsigned int offset, unsigned int offset_hi) +{ + struct adreno_profile_assigns_list *entry; + + /* first make sure we can alloc memory */ + entry = kmalloc(sizeof(struct adreno_profile_assigns_list), GFP_KERNEL); + if (!entry) + return false; + + list_add_tail(&entry->list, &profile->assignments_list); + + entry->countable = countable; + entry->groupid = groupid; + entry->offset = offset; + entry->offset_hi = offset_hi; + + strlcpy(entry->name, str, sizeof(entry->name)); + + profile->assignment_count++; + + return true; +} + +static bool results_available(struct adreno_device *adreno_dev, + struct adreno_profile *profile, unsigned int *shared_buf_tail) +{ + unsigned int global_eop; + unsigned int off = profile->shared_tail; + unsigned int *shared_ptr = (unsigned int *) + profile->shared_buffer->hostptr; + unsigned int ts, cnt; + int ts_cmp; + + /* + * If shared_buffer empty or Memstore EOP timestamp is less than + * outstanding counter buffer timestamps then no results available + */ + if (shared_buf_empty(profile)) + return false; + + if (adreno_rb_readtimestamp(adreno_dev, + adreno_dev->cur_rb, + KGSL_TIMESTAMP_RETIRED, &global_eop)) + return false; + do { + cnt = *(shared_ptr + off + 1); + if (cnt == 0) + return false; + + ts = *(shared_ptr + off); + ts_cmp = timestamp_cmp(ts, global_eop); + if (ts_cmp >= 0) { + *shared_buf_tail = off; + if (off == profile->shared_tail) + return false; + else + return true; + } + shared_buf_inc(profile->shared_size, &off, + SIZE_SHARED_ENTRY(cnt)); + } while (off != profile->shared_head); + + *shared_buf_tail = profile->shared_head; + + return true; +} + +static void transfer_results(struct adreno_profile *profile, + unsigned int shared_buf_tail) +{ + unsigned int buf_off; + unsigned int ts, cnt, ctxt_id, pid, tid, client_type; + unsigned int *ptr = (unsigned int *) profile->shared_buffer->hostptr; + unsigned int *log_ptr, *log_base; + struct adreno_profile_assigns_list *assigns_list; + int i, tmp_tail; + + log_ptr = profile->log_head; + log_base = profile->log_buffer; + if (log_ptr == NULL) + return; + + /* + * go through counter buffers and format for write into log_buffer + * if log buffer doesn't have space just overwrite it circularly + * shared_buf is guaranteed to not wrap within an entry so can use + * ptr increment + */ + while (profile->shared_tail != shared_buf_tail) { + buf_off = profile->shared_tail; + /* + * format: timestamp, count, context_id + * count entries: pc_off, pc_start, pc_end + */ + ts = *(ptr + buf_off++); + cnt = *(ptr + buf_off++); + ctxt_id = *(ptr + buf_off++); + pid = *(ptr + buf_off++); + tid = *(ptr + buf_off++); + client_type = *(ptr + buf_off++); + + /* + * if entry overwrites the tail of log_buffer then adjust tail + * ptr to make room for the new entry, discarding old entry + */ + while (log_buf_available(profile, log_ptr) <= + SIZE_LOG_ENTRY(cnt)) { + unsigned int size_tail; + uintptr_t boff; + + size_tail = SIZE_LOG_ENTRY(0xffff & + *(profile->log_tail)); + boff = ((uintptr_t) profile->log_tail - + (uintptr_t) log_base) / sizeof(uintptr_t); + log_buf_wrapcnt(size_tail, &boff); + profile->log_tail = log_base + boff; + } + + *log_ptr = cnt; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = client_type; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = pid; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = tid; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = ctxt_id; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = ts; + log_buf_wrapinc(log_base, &log_ptr); + + for (i = 0; i < cnt; i++) { + assigns_list = _find_assignment_by_offset( + profile, *(ptr + buf_off++)); + if (assigns_list == NULL) { + *log_ptr = (unsigned int) -1; + + shared_buf_inc(profile->shared_size, + &profile->shared_tail, + SIZE_SHARED_ENTRY(cnt)); + goto err; + } else { + *log_ptr = assigns_list->groupid << 16 | + (assigns_list->countable & 0xffff); + } + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = *(ptr + buf_off++); /* perf cntr start hi */ + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = *(ptr + buf_off++); /* perf cntr start lo */ + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = *(ptr + buf_off++); /* perf cntr end hi */ + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = *(ptr + buf_off++); /* perf cntr end lo */ + log_buf_wrapinc(log_base, &log_ptr); + + } + + tmp_tail = profile->shared_tail; + shared_buf_inc(profile->shared_size, + &profile->shared_tail, + SIZE_SHARED_ENTRY(cnt)); + /* + * Possibly lost some room as we cycled around, so it's safe to + * reset the max size + */ + if (profile->shared_tail < tmp_tail) + profile->shared_size = + ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS; + + } + profile->log_head = log_ptr; + return; +err: + /* reset head/tail to same on error in hopes we work correctly later */ + profile->log_head = profile->log_tail; +} + +static int profile_enable_get(void *data, u64 *val) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + mutex_lock(&device->mutex); + *val = adreno_profile_enabled(&adreno_dev->profile); + mutex_unlock(&device->mutex); + + return 0; +} + +static int profile_enable_set(void *data, u64 val) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_profile *profile = &adreno_dev->profile; + + mutex_lock(&device->mutex); + + if (val && profile->log_buffer == NULL) { + /* allocate profile_log_buffer the first time enabled */ + profile->log_buffer = vmalloc(ADRENO_PROFILE_LOG_BUF_SIZE); + if (profile->log_buffer == NULL) { + mutex_unlock(&device->mutex); + return -ENOMEM; + } + profile->log_tail = profile->log_buffer; + profile->log_head = profile->log_buffer; + } + + profile->enabled = val; + + mutex_unlock(&device->mutex); + + return 0; +} + +static ssize_t profile_assignments_read(struct file *filep, + char __user *ubuf, size_t max, loff_t *ppos) +{ + struct kgsl_device *device = (struct kgsl_device *) filep->private_data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_profile *profile = &adreno_dev->profile; + struct adreno_profile_assigns_list *entry; + int len = 0, max_size = PAGE_SIZE; + char *buf, *pos; + ssize_t size = 0; + + mutex_lock(&device->mutex); + + if (profile->assignment_count == 0) { + mutex_unlock(&device->mutex); + return 0; + } + + buf = kzalloc(max_size, GFP_KERNEL); + if (!buf) { + mutex_unlock(&device->mutex); + return -ENOMEM; + } + + pos = buf; + + /* copy all assingments from list to str */ + list_for_each_entry(entry, &profile->assignments_list, list) { + len = scnprintf(pos, max_size, ASSIGNS_STR_FORMAT, + entry->name, entry->countable); + + max_size -= len; + pos += len; + } + + size = simple_read_from_buffer(ubuf, max, ppos, buf, + pos - buf); + + kfree(buf); + + mutex_unlock(&device->mutex); + return size; +} + +static void _remove_assignment(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable) +{ + struct adreno_profile *profile = &adreno_dev->profile; + struct adreno_profile_assigns_list *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &profile->assignments_list, list) { + if (entry->groupid == groupid && + entry->countable == countable) { + list_del(&entry->list); + + profile->assignment_count--; + + kfree(entry); + + /* remove from perf counter allocation */ + adreno_perfcounter_put(adreno_dev, groupid, countable, + PERFCOUNTER_FLAG_KERNEL); + } + } +} + +static void _add_assignment(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable) +{ + struct adreno_profile *profile = &adreno_dev->profile; + unsigned int offset, offset_hi; + const char *name = NULL; + + name = adreno_perfcounter_get_name(adreno_dev, groupid); + if (!name) + return; + + /* if already in assigned list skip it */ + if (_in_assignments_list(profile, groupid, countable)) + return; + + /* add to perf counter allocation, if fail skip it */ + if (adreno_perfcounter_get(adreno_dev, groupid, countable, + &offset, &offset_hi, PERFCOUNTER_FLAG_NONE)) + return; + + /* add to assignments list, put counter back if error */ + if (!_add_to_assignments_list(profile, name, groupid, + countable, offset, offset_hi)) + adreno_perfcounter_put(adreno_dev, groupid, + countable, PERFCOUNTER_FLAG_KERNEL); +} + +static char *_parse_next_assignment(struct adreno_device *adreno_dev, + char *str, int *groupid, int *countable, bool *remove) +{ + char *groupid_str, *countable_str, *next_str = NULL; + int ret; + + *groupid = -EINVAL; + *countable = -EINVAL; + *remove = false; + + /* remove spaces */ + while (*str == ' ') + str++; + + /* check if it's a remove assignment */ + if (*str == '-') { + *remove = true; + str++; + } + + /* get the groupid string */ + groupid_str = str; + while (*str != ':') { + if (*str == '\0') + return NULL; + *str = tolower(*str); + str++; + } + if (groupid_str == str) + return NULL; + + *str = '\0'; + str++; + + /* get the countable string */ + countable_str = str; + while (*str != ' ' && *str != '\0') + str++; + if (countable_str == str) + return NULL; + + /* + * If we have reached the end of the original string then make sure we + * return NULL from this function or we could accidently overrun + */ + + if (*str != '\0') { + *str = '\0'; + next_str = str + 1; + } + + /* set results */ + *groupid = adreno_perfcounter_get_groupid(adreno_dev, + groupid_str); + if (*groupid < 0) + return NULL; + ret = kstrtou32(countable_str, 10, countable); + if (ret) + return NULL; + + return next_str; +} + +static ssize_t profile_assignments_write(struct file *filep, + const char __user *user_buf, size_t len, loff_t *off) +{ + struct kgsl_device *device = (struct kgsl_device *) filep->private_data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_profile *profile = &adreno_dev->profile; + size_t size = 0; + char *buf, *pbuf; + bool remove_assignment = false; + int groupid, countable, ret; + + if (len >= PAGE_SIZE || len == 0) + return -EINVAL; + + buf = kmalloc(len + 1, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + + if (copy_from_user(buf, user_buf, len)) { + size = -EFAULT; + goto error_free; + } + + mutex_lock(&device->mutex); + + if (adreno_profile_enabled(profile)) { + size = -EINVAL; + goto error_unlock; + } + + ret = adreno_perfcntr_active_oob_get(adreno_dev); + if (ret) { + size = ret; + goto error_unlock; + } + + /* + * When adding/removing assignments, ensure that the GPU is done with + * all it's work. This helps to synchronize the work flow to the + * GPU and avoid racey conditions. + */ + if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->idle) + ret = adreno_dev->dispatch_ops->idle(adreno_dev); + else + ret = adreno_idle(device); + if (ret) { + size = -ETIMEDOUT; + goto error_put; + } + + /* clear all shared buffer results */ + adreno_profile_process_results(adreno_dev); + + pbuf = buf; + + /* clear the log buffer */ + if (profile->log_buffer != NULL) { + profile->log_head = profile->log_buffer; + profile->log_tail = profile->log_buffer; + } + + + /* for sanity and parsing, ensure it is null terminated */ + buf[len] = '\0'; + + /* parse file buf and add(remove) to(from) appropriate lists */ + while (pbuf) { + pbuf = _parse_next_assignment(adreno_dev, pbuf, &groupid, + &countable, &remove_assignment); + if (groupid < 0 || countable < 0) + break; + + if (remove_assignment) + _remove_assignment(adreno_dev, groupid, countable); + else + _add_assignment(adreno_dev, groupid, countable); + } + + size = len; + +error_put: + adreno_perfcntr_active_oob_put(adreno_dev); +error_unlock: + mutex_unlock(&device->mutex); +error_free: + kfree(buf); + return size; +} + +static int _pipe_print_pending(char __user *ubuf, size_t max) +{ + loff_t unused = 0; + char str[] = "Operation Would Block!"; + + return simple_read_from_buffer(ubuf, max, + &unused, str, strlen(str)); +} + +static int _pipe_print_results(struct adreno_device *adreno_dev, + char __user *ubuf, size_t max) +{ + struct adreno_profile *profile = &adreno_dev->profile; + const char *grp_name; + char __user *usr_buf = ubuf; + unsigned int *log_ptr = NULL, *tmp_log_ptr = NULL; + int len, i; + int status = 0; + ssize_t size, total_size = 0; + unsigned int cnt, api_type, ctxt_id, pid, tid, ts, cnt_reg; + unsigned long long pc_start, pc_end; + const char *api_str; + char format_space; + loff_t unused = 0; + char pipe_hdr_buf[51]; /* 4 uint32 + 5 space + 5 API type + '\0' */ + char pipe_cntr_buf[63]; /* 2 uint64 + 1 uint32 + 4 spaces + 8 group */ + + /* convert unread entries to ASCII, copy to user-space */ + log_ptr = profile->log_tail; + + do { + /* store the tmp var for error cases so we can skip */ + tmp_log_ptr = log_ptr; + + /* Too many to output to pipe, so skip this data */ + cnt = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + + if (SIZE_PIPE_ENTRY(cnt) > max) { + log_buf_wrapinc_len(profile->log_buffer, + &tmp_log_ptr, SIZE_PIPE_ENTRY(cnt)); + log_ptr = tmp_log_ptr; + goto done; + } + + /* + * Not enough space left in pipe, return without doing + * anything + */ + if ((max - (usr_buf - ubuf)) < SIZE_PIPE_ENTRY(cnt)) { + log_ptr = tmp_log_ptr; + goto done; + } + + api_type = *log_ptr; + api_str = kgsl_context_type(api_type); + log_buf_wrapinc(profile->log_buffer, &log_ptr); + pid = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + tid = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + ctxt_id = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + ts = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + len = scnprintf(pipe_hdr_buf, sizeof(pipe_hdr_buf) - 1, + "%u %u %u %.5s %u ", + pid, tid, ctxt_id, api_str, ts); + size = simple_read_from_buffer(usr_buf, + max - (usr_buf - ubuf), + &unused, pipe_hdr_buf, len); + + /* non-fatal error, so skip rest of entry and return */ + if (size < 0) { + log_buf_wrapinc_len(profile->log_buffer, + &tmp_log_ptr, SIZE_PIPE_ENTRY(cnt)); + log_ptr = tmp_log_ptr; + goto done; + } + + unused = 0; + usr_buf += size; + total_size += size; + + for (i = 0; i < cnt; i++) { + unsigned int start_lo, start_hi; + unsigned int end_lo, end_hi; + + grp_name = adreno_perfcounter_get_name( + adreno_dev, (*log_ptr >> 16) & 0xffff); + + /* non-fatal error, so skip rest of entry and return */ + if (grp_name == NULL) { + log_buf_wrapinc_len(profile->log_buffer, + &tmp_log_ptr, SIZE_PIPE_ENTRY(cnt)); + log_ptr = tmp_log_ptr; + goto done; + } + + if (i == cnt - 1) + format_space = '\n'; + else + format_space = ' '; + + cnt_reg = *log_ptr & 0xffff; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + start_lo = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + start_hi = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + end_lo = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + end_hi = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + + pc_start = (((uint64_t) start_hi) << 32) | start_lo; + pc_end = (((uint64_t) end_hi) << 32) | end_lo; + + len = scnprintf(pipe_cntr_buf, + sizeof(pipe_cntr_buf) - 1, + "%.8s:%u %llu %llu%c", + grp_name, cnt_reg, pc_start, + pc_end, format_space); + + size = simple_read_from_buffer(usr_buf, + max - (usr_buf - ubuf), + &unused, pipe_cntr_buf, len); + + /* non-fatal error, so skip rest of entry and return */ + if (size < 0) { + log_buf_wrapinc_len(profile->log_buffer, + &tmp_log_ptr, SIZE_PIPE_ENTRY(cnt)); + log_ptr = tmp_log_ptr; + goto done; + } + unused = 0; + usr_buf += size; + total_size += size; + } + } while (log_ptr != profile->log_head); + +done: + status = total_size; + profile->log_tail = log_ptr; + + return status; +} + +static ssize_t profile_pipe_print(struct file *filep, char __user *ubuf, + size_t max, loff_t *ppos) +{ + struct kgsl_device *device = (struct kgsl_device *) filep->private_data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_profile *profile = &adreno_dev->profile; + char __user *usr_buf = ubuf; + int status = 0; + + /* + * this file not seekable since it only supports streaming, ignore + * ppos <> 0 + */ + /* + * format + * for each perf counter + */ + + mutex_lock(&device->mutex); + + while (1) { + /* process any results that are available into the log_buffer */ + status = adreno_profile_process_results(adreno_dev); + if (status > 0) { + /* if we have results, print them and exit */ + status = _pipe_print_results(adreno_dev, usr_buf, max); + break; + } + + /* there are no unread results, act accordingly */ + if (filep->f_flags & O_NONBLOCK) { + if (profile->shared_tail != profile->shared_head) { + status = _pipe_print_pending(usr_buf, max); + break; + } + + status = 0; + break; + } + + mutex_unlock(&device->mutex); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(msecs_to_jiffies(100)); + mutex_lock(&device->mutex); + + if (signal_pending(current)) { + status = 0; + break; + } + } + + mutex_unlock(&device->mutex); + + return status; +} + +static int profile_groups_show(struct seq_file *s, void *unused) +{ + struct kgsl_device *device = (struct kgsl_device *) s->private; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + int i, j, used; + + mutex_lock(&device->mutex); + + for (i = 0; i < counters->group_count; ++i) { + group = &(counters->groups[i]); + /* get number of counters used for this group */ + used = 0; + for (j = 0; j < group->reg_count; j++) { + if (group->regs[j].countable != + KGSL_PERFCOUNTER_NOT_USED) + used++; + } + + seq_printf(s, "%s %d %d\n", group->name, + group->reg_count, used); + } + + mutex_unlock(&device->mutex); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(profile_groups); + +static const struct file_operations profile_pipe_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = profile_pipe_print, + .llseek = noop_llseek, +}; + +static const struct file_operations profile_assignments_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = profile_assignments_read, + .write = profile_assignments_write, + .llseek = noop_llseek, +}; + +DEFINE_DEBUGFS_ATTRIBUTE(profile_enable_fops, + profile_enable_get, + profile_enable_set, "%llu\n"); + +void adreno_profile_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_profile *profile = &adreno_dev->profile; + struct dentry *profile_dir; + + profile->enabled = false; + + /* allocate shared_buffer, which includes pre_ib and post_ib */ + profile->shared_size = ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS; + profile->shared_buffer = kgsl_allocate_global(device, + profile->shared_size * sizeof(unsigned int), + 0, 0, 0, "profile"); + if (IS_ERR(profile->shared_buffer)) { + profile->shared_size = 0; + return; + } + + INIT_LIST_HEAD(&profile->assignments_list); + + /* Create perf counter debugfs */ + profile_dir = debugfs_create_dir("profiling", device->d_debugfs); + if (IS_ERR(profile_dir)) + return; + + debugfs_create_file("enable", 0644, profile_dir, device, + &profile_enable_fops); + debugfs_create_file("blocks", 0444, profile_dir, device, + &profile_groups_fops); + debugfs_create_file("pipe", 0444, profile_dir, device, + &profile_pipe_fops); + debugfs_create_file("assignments", 0644, profile_dir, device, + &profile_assignments_fops); +} + +void adreno_profile_close(struct adreno_device *adreno_dev) +{ + struct adreno_profile *profile = &adreno_dev->profile; + struct adreno_profile_assigns_list *entry, *tmp; + + profile->enabled = false; + vfree(profile->log_buffer); + profile->log_buffer = NULL; + profile->log_head = NULL; + profile->log_tail = NULL; + profile->shared_head = 0; + profile->shared_tail = 0; + profile->shared_size = 0; + + profile->assignment_count = 0; + + list_for_each_entry_safe(entry, tmp, &profile->assignments_list, list) { + list_del(&entry->list); + kfree(entry); + } +} + +int adreno_profile_process_results(struct adreno_device *adreno_dev) +{ + struct adreno_profile *profile = &adreno_dev->profile; + unsigned int shared_buf_tail = profile->shared_tail; + + if (!results_available(adreno_dev, profile, &shared_buf_tail)) + return 0; + + /* + * transfer retired results to log_buffer + * update shared_buffer tail ptr + */ + transfer_results(profile, shared_buf_tail); + + return 1; +} + +u64 adreno_profile_preib_processing(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *dwords) +{ + struct adreno_profile *profile = &adreno_dev->profile; + int count = profile->assignment_count; + unsigned int entry_head = profile->shared_head; + unsigned int *shared_ptr; + struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + + if (!drawctxt || !adreno_profile_assignments_ready(profile)) + return 0; + + /* + * check if space available, include the post_ib in space available + * check so don't have to handle trying to undo the pre_ib insertion in + * ringbuffer in the case where only the post_ib fails enough space + */ + if (SIZE_SHARED_ENTRY(count) >= shared_buf_available(profile)) + return 0; + + if (entry_head + SIZE_SHARED_ENTRY(count) >= profile->shared_size) { + /* entry_head would wrap, start entry_head at 0 in buffer */ + entry_head = 0; + profile->shared_size = profile->shared_head; + profile->shared_head = 0; + + /* recheck space available */ + if (SIZE_SHARED_ENTRY(count) >= shared_buf_available(profile)) + return 0; + } + + /* zero out the counter area of shared_buffer entry_head */ + shared_ptr = entry_head + ((unsigned int *) + profile->shared_buffer->hostptr); + memset(shared_ptr, 0, SIZE_SHARED_ENTRY(count) * sizeof(unsigned int)); + + /* reserve space for the pre ib shared buffer */ + shared_buf_inc(profile->shared_size, &profile->shared_head, + SIZE_SHARED_ENTRY(count)); + + /* create the shared ibdesc */ + return _build_pre_ib_cmds(adreno_dev, profile, entry_head, + rb->timestamp + 1, drawctxt, dwords); +} + +u64 adreno_profile_postib_processing(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *dwords) +{ + struct adreno_profile *profile = &adreno_dev->profile; + int count = profile->assignment_count; + unsigned int entry_head = profile->shared_head - + SIZE_SHARED_ENTRY(count); + + if (!drawctxt || !adreno_profile_assignments_ready(profile)) + return 0; + + /* create the shared ibdesc */ + return _build_post_ib_cmds(adreno_dev, profile, entry_head, dwords); +} diff --git a/adreno_profile.h b/adreno_profile.h new file mode 100644 index 0000000000..1408d91a9b --- /dev/null +++ b/adreno_profile.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013-2014,2019-2021 The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_PROFILE_H +#define __ADRENO_PROFILE_H + +/** + * struct adreno_profile_assigns_list: linked list for assigned perf counters + * @list: linkage for nodes in list + * @name: group name or GPU name name + * @groupid: group id + * @countable: countable assigned to perfcounter + * @offset: perfcounter register address offset + */ +struct adreno_profile_assigns_list { + struct list_head list; + char name[25]; + unsigned int groupid; + unsigned int countable; + unsigned int offset; /* LO offset */ + unsigned int offset_hi; /* HI offset */ +}; + +struct adreno_profile { + struct list_head assignments_list; /* list of all assignments */ + unsigned int assignment_count; /* Number of assigned counters */ + unsigned int *log_buffer; + unsigned int *log_head; + unsigned int *log_tail; + bool enabled; + /* counter, pre_ib, and post_ib held in one large circular buffer + * shared between kgsl and GPU + * counter entry 0 + * pre_ib entry 0 + * post_ib entry 0 + * ... + * counter entry N + * pre_ib entry N + * post_ib entry N + */ + struct kgsl_memdesc *shared_buffer; + unsigned int shared_head; + unsigned int shared_tail; + unsigned int shared_size; +}; + +#define ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS (48 * 4096 / sizeof(uint)) +/* sized @ 48 pages should allow for over 50 outstanding IBs minimum, 1755 max*/ + +#define ADRENO_PROFILE_LOG_BUF_SIZE (1024 * 920) +/* sized for 1024 entries of fully assigned 45 cnters in log buffer, 230 pages*/ +#define ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS (ADRENO_PROFILE_LOG_BUF_SIZE / \ + sizeof(unsigned int)) + +#ifdef CONFIG_DEBUG_FS +void adreno_profile_init(struct adreno_device *adreno_dev); +void adreno_profile_close(struct adreno_device *adreno_dev); +int adreno_profile_process_results(struct adreno_device *adreno_dev); +u64 adreno_profile_preib_processing(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *dwords); +u64 adreno_profile_postib_processing(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *dwords); +#else +static inline void adreno_profile_init(struct adreno_device *adreno_dev) { } +static inline void adreno_profile_close(struct adreno_device *adreno_dev) { } +static inline int adreno_profile_process_results( + struct adreno_device *adreno_dev) +{ + return 0; +} + +static inline u64 +adreno_profile_preib_processing(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *dwords) +{ + return 0; +} + +static inline u64 +adreno_profile_postib_processing(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *dwords) +{ + return 0; +} + +#endif + +static inline bool adreno_profile_enabled(struct adreno_profile *profile) +{ + return profile->enabled; +} + +static inline bool adreno_profile_has_assignments( + struct adreno_profile *profile) +{ + return list_empty(&profile->assignments_list) ? false : true; +} + +static inline bool adreno_profile_assignments_ready( + struct adreno_profile *profile) +{ + return adreno_profile_enabled(profile) && + adreno_profile_has_assignments(profile); +} + +#endif diff --git a/adreno_ringbuffer.c b/adreno_ringbuffer.c new file mode 100644 index 0000000000..5721bb4fb1 --- /dev/null +++ b/adreno_ringbuffer.c @@ -0,0 +1,435 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include "a3xx_reg.h" +#include "a5xx_reg.h" +#include "a6xx_reg.h" +#include "adreno.h" +#include "adreno_pm4types.h" +#include "adreno_ringbuffer.h" +#include "adreno_trace.h" +#include "kgsl_trace.h" + + +#define RB_HOSTPTR(_rb, _pos) \ + ((unsigned int *) ((_rb)->buffer_desc->hostptr + \ + ((_pos) * sizeof(unsigned int)))) + +#define RB_GPUADDR(_rb, _pos) \ + ((_rb)->buffer_desc->gpuaddr + ((_pos) * sizeof(unsigned int))) + +void adreno_get_submit_time(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_submit_time *time) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned long flags; + struct adreno_context *drawctxt = rb->drawctxt_active; + struct kgsl_context *context = &drawctxt->base; + + if (!time) + return; + + /* + * Here we are attempting to create a mapping between the + * GPU time domain (alwayson counter) and the CPU time domain + * (local_clock) by sampling both values as close together as + * possible. This is useful for many types of debugging and + * profiling. In order to make this mapping as accurate as + * possible, we must turn off interrupts to avoid running + * interrupt handlers between the two samples. + */ + + local_irq_save(flags); + + time->ticks = gpudev->read_alwayson(adreno_dev); + + /* Trace the GPU time to create a mapping to ftrace time */ + trace_adreno_cmdbatch_sync(context->id, context->priority, + drawctxt->timestamp, time->ticks); + + /* Get the kernel clock for time since boot */ + time->ktime = local_clock(); + + /* Get the timeofday for the wall time (for the user) */ + ktime_get_real_ts64(&time->utime); + + local_irq_restore(flags); +} + +unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, + unsigned int dwords) +{ + struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + unsigned int rptr = adreno_get_rptr(rb); + unsigned int ret; + + if (rptr <= rb->_wptr) { + unsigned int *cmds; + + if (rb->_wptr + dwords <= (KGSL_RB_DWORDS - 2)) { + ret = rb->_wptr; + rb->_wptr = (rb->_wptr + dwords) % KGSL_RB_DWORDS; + return RB_HOSTPTR(rb, ret); + } + + /* + * There isn't enough space toward the end of ringbuffer. So + * look for space from the beginning of ringbuffer upto the + * read pointer. + */ + if (dwords < rptr) { + cmds = RB_HOSTPTR(rb, rb->_wptr); + *cmds = cp_packet(adreno_dev, CP_NOP, + KGSL_RB_DWORDS - rb->_wptr - 1); + rb->_wptr = dwords; + return RB_HOSTPTR(rb, 0); + } + } + + if (rb->_wptr + dwords < rptr) { + ret = rb->_wptr; + rb->_wptr = (rb->_wptr + dwords) % KGSL_RB_DWORDS; + return RB_HOSTPTR(rb, ret); + } + + return ERR_PTR(-ENOSPC); +} + +void adreno_ringbuffer_stop(struct adreno_device *adreno_dev) +{ + struct adreno_ringbuffer *rb; + int i; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) + kgsl_cancel_events(KGSL_DEVICE(adreno_dev), &(rb->events)); +} + +static int _rb_readtimestamp(struct kgsl_device *device, + void *priv, enum kgsl_timestamp_type type, + unsigned int *timestamp) +{ + return adreno_rb_readtimestamp(ADRENO_DEVICE(device), priv, type, + timestamp); +} + +int adreno_ringbuffer_setup(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, int id) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int priv = 0; + int ret; + + /* + * Allocate mem for storing RB pagetables and commands to + * switch pagetable + */ + ret = adreno_allocate_global(device, &rb->pagetable_desc, PAGE_SIZE, + SZ_16K, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc"); + if (ret) + return ret; + + /* allocate a chunk of memory to create user profiling IB1s */ + adreno_allocate_global(device, &rb->profile_desc, PAGE_SIZE, + 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "profile_desc"); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) + priv |= KGSL_MEMDESC_PRIVILEGED; + + ret = adreno_allocate_global(device, &rb->buffer_desc, KGSL_RB_SIZE, + SZ_4K, KGSL_MEMFLAGS_GPUREADONLY, priv, "ringbuffer"); + if (ret) + return ret; + + if (!list_empty(&rb->events.group)) + return 0; + + rb->id = id; + kgsl_add_event_group(device, &rb->events, NULL, _rb_readtimestamp, rb, + "rb_events-%d", id); + + rb->timestamp = 0; + init_waitqueue_head(&rb->ts_expire_waitq); + + spin_lock_init(&rb->preempt_lock); + + return 0; +} + +void adreno_preemption_timer(struct timer_list *t) +{ + struct adreno_preemption *preempt = from_timer(preempt, t, timer); + struct adreno_device *adreno_dev = container_of(preempt, + struct adreno_device, preempt); + + /* We should only be here from a triggered state */ + if (!adreno_move_preempt_state(adreno_dev, + ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_FAULTED)) + return; + + /* Schedule the worker to take care of the details */ + queue_work(system_unbound_wq, &adreno_dev->preempt.work); +} + +void adreno_drawobj_set_constraint(struct kgsl_device *device, + struct kgsl_drawobj *drawobj) +{ + struct kgsl_context *context = drawobj->context; + unsigned long flags = drawobj->flags; + + /* + * Check if the context has a constraint and constraint flags are + * set. + */ + if (context->pwr_constraint.type && + ((context->flags & KGSL_CONTEXT_PWR_CONSTRAINT) || + (drawobj->flags & KGSL_CONTEXT_PWR_CONSTRAINT))) + kgsl_pwrctrl_set_constraint(device, &context->pwr_constraint, + context->id, drawobj->timestamp); + + if (context->l3_pwr_constraint.type && + ((context->flags & KGSL_CONTEXT_PWR_CONSTRAINT) || + (flags & KGSL_CONTEXT_PWR_CONSTRAINT))) { + + if (!device->num_l3_pwrlevels) { + dev_err_once(device->dev, + "l3 voting not available\n"); + return; + } + + switch (context->l3_pwr_constraint.type) { + case KGSL_CONSTRAINT_L3_PWRLEVEL: { + unsigned int sub_type; + unsigned int new_l3; + int ret = 0; + struct dcvs_freq freq = {0}; + + if (!device->l3_vote) + return; + + sub_type = context->l3_pwr_constraint.sub_type; + + /* + * If an L3 constraint is already set, set the new + * one only if it is higher. + */ + new_l3 = max_t(unsigned int, sub_type + 1, + device->cur_l3_pwrlevel); + new_l3 = min_t(unsigned int, new_l3, + device->num_l3_pwrlevels - 1); + + if (device->cur_l3_pwrlevel == new_l3) + return; + + freq.ib = device->l3_freq[new_l3]; + freq.hw_type = DCVS_L3; + ret = qcom_dcvs_update_votes(KGSL_L3_DEVICE, &freq, 1, + DCVS_SLOW_PATH); + if (!ret) { + trace_kgsl_constraint(device, + KGSL_CONSTRAINT_L3_PWRLEVEL, new_l3, 1); + device->cur_l3_pwrlevel = new_l3; + } else { + dev_err_ratelimited(device->dev, + "Could not set l3_vote: %d\n", + ret); + } + break; + } + } + } +} + +int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, + struct adreno_submit_time *time) +{ + struct adreno_submit_time local = { 0 }; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + struct adreno_ringbuffer *rb = drawctxt->rb; + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u32 flags = 0; + int ret; + + /* + * If SKIP CMD flag is set for current context + * a) set SKIPCMD as fault_recovery for current commandbatch + * b) store context's commandbatch fault_policy in current + * commandbatch fault_policy and clear context's commandbatch + * fault_policy + * c) force preamble for commandbatch + */ + if (test_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv) && + (!test_bit(CMDOBJ_SKIP, &cmdobj->priv))) { + + set_bit(KGSL_FT_SKIPCMD, &cmdobj->fault_recovery); + cmdobj->fault_policy = drawctxt->fault_policy; + set_bit(CMDOBJ_FORCE_PREAMBLE, &cmdobj->priv); + + /* if context is detached print fault recovery */ + adreno_fault_skipcmd_detached(adreno_dev, drawctxt, drawobj); + + /* clear the drawctxt flags */ + clear_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv); + drawctxt->fault_policy = 0; + } + + /* Check if user profiling should be enabled */ + + if ((drawobj->flags & KGSL_DRAWOBJ_PROFILING) && + cmdobj->profiling_buf_entry) { + flags |= F_USER_PROFILE; + + /* + * we want to use an adreno_submit_time struct to get the + * precise moment when the command is submitted to the + * ringbuffer. If an upstream caller already passed down a + * pointer piggyback on that otherwise use a local struct + */ + if (!time) + time = &local; + + time->drawobj = drawobj; + } + + flags |= F_PREAMBLE; + + /* + * When preamble is enabled, the preamble buffer with state restoration + * commands are stored in the first node of the IB chain. + * We can skip that if a context switch hasn't occurred. + */ + if ((drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE) && + !test_bit(CMDOBJ_FORCE_PREAMBLE, &cmdobj->priv) && + (rb->drawctxt_active == drawctxt)) + flags &= ~F_PREAMBLE; + + /* + * In skip mode don't issue the draw IBs but keep all the other + * accoutrements of a submision (including the interrupt) to keep + * the accounting sane. Set start_index and numibs to 0 to just + * generate the start and end markers and skip everything else + */ + if (test_bit(CMDOBJ_SKIP, &cmdobj->priv)) { + flags &= ~F_PREAMBLE; + flags |= F_SKIP; + } + + /* Enable kernel profiling */ + if (test_bit(CMDOBJ_PROFILE, &cmdobj->priv)) + flags |= F_KERNEL_PROFILE; + + /* Add a WFI to the end of the submission */ + if (test_bit(CMDOBJ_WFI, &cmdobj->priv)) + flags |= F_WFI; + + /* + * For some targets, we need to execute a dummy shader operation after a + * power collapse + */ + if (test_and_clear_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv) && + test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv)) + flags |= F_PWRON_FIXUP; + + /* Check to see the submission should be secure */ + if (drawobj->context->flags & KGSL_CONTEXT_SECURE) + flags |= F_SECURE; + + /* process any profiling results that are available into the log_buf */ + adreno_profile_process_results(adreno_dev); + + ret = gpudev->ringbuffer_submitcmd(adreno_dev, cmdobj, + flags, time); + + if (!ret) { + set_bit(KGSL_CONTEXT_PRIV_SUBMITTED, &drawobj->context->priv); + cmdobj->global_ts = drawctxt->internal_timestamp; + } + + return ret; +} + +/** + * adreno_ringbuffer_wait_callback() - Callback function for event registered + * on a ringbuffer timestamp + * @device: Device for which the the callback is valid + * @context: The context of the event + * @priv: The private parameter of the event + * @result: Result of the event trigger + */ +static void adreno_ringbuffer_wait_callback(struct kgsl_device *device, + struct kgsl_event_group *group, + void *priv, int result) +{ + struct adreno_ringbuffer *rb = group->priv; + + wake_up_all(&rb->ts_expire_waitq); +} + +/* check if timestamp is greater than the current rb timestamp */ +static inline int adreno_ringbuffer_check_timestamp( + struct adreno_ringbuffer *rb, + unsigned int timestamp, int type) +{ + struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + unsigned int ts; + + adreno_rb_readtimestamp(adreno_dev, rb, type, &ts); + return (timestamp_cmp(ts, timestamp) >= 0); +} + + +/** + * adreno_ringbuffer_waittimestamp() - Wait for a RB timestamp + * @rb: The ringbuffer to wait on + * @timestamp: The timestamp to wait for + * @msecs: The wait timeout period + */ +int adreno_ringbuffer_waittimestamp(struct adreno_ringbuffer *rb, + unsigned int timestamp, + unsigned int msecs) +{ + struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + unsigned long wait_time; + + /* check immediately if timeout is 0 */ + if (msecs == 0) + return adreno_ringbuffer_check_timestamp(rb, + timestamp, KGSL_TIMESTAMP_RETIRED) ? 0 : -EBUSY; + + ret = kgsl_add_event(device, &rb->events, timestamp, + adreno_ringbuffer_wait_callback, NULL); + if (ret) + return ret; + + mutex_unlock(&device->mutex); + + wait_time = msecs_to_jiffies(msecs); + if (wait_event_timeout(rb->ts_expire_waitq, + !kgsl_event_pending(device, &rb->events, timestamp, + adreno_ringbuffer_wait_callback, NULL), + wait_time) == 0) + ret = -ETIMEDOUT; + + mutex_lock(&device->mutex); + /* + * after wake up make sure that expected timestamp has retired + * because the wakeup could have happened due to a cancel event + */ + if (!ret && !adreno_ringbuffer_check_timestamp(rb, + timestamp, KGSL_TIMESTAMP_RETIRED)) { + ret = -EAGAIN; + } + + return ret; +} diff --git a/adreno_ringbuffer.h b/adreno_ringbuffer.h new file mode 100644 index 0000000000..447586e72d --- /dev/null +++ b/adreno_ringbuffer.h @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_RINGBUFFER_H +#define __ADRENO_RINGBUFFER_H + +/* Given a ringbuffer, return the adreno device that owns it */ + +#define _RB_OFFSET(_id) (offsetof(struct adreno_device, ringbuffers) + \ + ((_id) * sizeof(struct adreno_ringbuffer))) + +#define ADRENO_RB_DEVICE(_rb) \ + ((struct adreno_device *) (((void *) (_rb)) - _RB_OFFSET((_rb)->id))) + +/* Adreno ringbuffer size in bytes */ +#define KGSL_RB_SIZE (32 * 1024) + +/* + * A handy macro to convert the RB size to dwords since most ringbuffer + * operations happen in dword increments + */ +#define KGSL_RB_DWORDS (KGSL_RB_SIZE >> 2) + +/* Specifies that the command should be run in protected mode */ +#define F_NOTPROTECTED BIT(0) +/* Indicates that the CP should wait for idle after executing the command */ +#define F_WFI BIT(1) +/* Indicates that the poweron fixup should be executed before the command */ +#define F_PWRON_FIXUP BIT(2) +/* Indicates that the submission should be secure */ +#define F_SECURE BIT(3) +/* Indicates that the IBs in the submission should be skipped */ +#define F_SKIP BIT(4) +/* Indicates that user always on timer profiling is enabled */ +#define F_USER_PROFILE BIT(5) +/* Indicates that kernel always on timer profiling is enabled */ +#define F_KERNEL_PROFILE BIT(6) +/* Indicates that the submission has a preamble */ +#define F_PREAMBLE BIT(7) + +#define IS_NOTPROTECTED(flags) ((flags) & F_NOTPROTECTED) +#define IS_WFI(flags) ((flags) & F_WFI) +#define IS_PWRON_FIXUP(flags) ((flags) & F_PWRON_FIXUP) +#define IS_SECURE(flags) ((flags) & F_SECURE) +#define IS_SKIP(flags) ((flags) & F_SKIP) +#define IS_USER_PROFILE(flags) ((flags) & F_USER_PROFILE) +#define IS_KERNEL_PROFILE(flags) ((flags) & F_KERNEL_PROFILE) +#define IS_PREAMBLE(flags) ((flags) & F_PREAMBLE) + +struct kgsl_device; +struct kgsl_device_private; + +/** + * struct adreno_submit_time - utility structure to store the wall clock / GPU + * ticks at command submit time + * @ticks: GPU ticks at submit time (from the 19.2Mhz timer) + * @ktime: local clock time (in nanoseconds) + * @utime: Wall clock time + * @drawobj: the object that we want to profile + */ +struct adreno_submit_time { + uint64_t ticks; + u64 ktime; + struct timespec64 utime; + struct kgsl_drawobj *drawobj; +}; + +/** + * struct adreno_ringbuffer_pagetable_info - Contains fields used during a + * pagetable switch. + * @current_global_ptname: The current pagetable id being used by the GPU. + * Only the ringbuffers[0] current_global_ptname is used to keep track of + * the current pagetable id + * @current_rb_ptname: The current pagetable active on the given RB + * @incoming_ptname: Contains the incoming pagetable we are switching to. After + * switching of pagetable this value equals current_rb_ptname. + * @switch_pt_enable: Flag used during pagetable switch to check if pt + * switch can be skipped + * @ttbr0: value to program into TTBR0 during pagetable switch. + * @contextidr: value to program into CONTEXTIDR during pagetable switch. + */ +struct adreno_ringbuffer_pagetable_info { + int current_global_ptname; + int current_rb_ptname; + int incoming_ptname; + int switch_pt_enable; + uint64_t ttbr0; + unsigned int contextidr; +}; + +#define PT_INFO_OFFSET(_field) \ + offsetof(struct adreno_ringbuffer_pagetable_info, _field) + +/** + * struct adreno_ringbuffer - Definition for an adreno ringbuffer object + * @flags: Internal control flags for the ringbuffer + * @buffer_desc: Pointer to the ringbuffer memory descriptor + * @_wptr: The next value of wptr to be written to the hardware on submit + * @wptr: Local copy of the wptr offset last written to hardware + * @last_wptr: offset of the last wptr that was written to CFF + * @rb_ctx: The context that represents a ringbuffer + * @id: Priority level of the ringbuffer, also used as an ID + * @fault_detect_ts: The last retired global timestamp read during fault detect + * @timestamp: The RB's global timestamp + * @events: A kgsl_event_group for this context - contains the list of GPU + * events + * @drawctxt_active: The last pagetable that this ringbuffer is set to + * @preemption_desc: The memory descriptor containing + * preemption info written/read by CP + * @secure_preemption_desc: The memory descriptor containing + * preemption info written/read by CP for secure contexts + * @perfcounter_save_restore_desc: Used by CP to save/restore the perfcounter + * values across preemption + * @pagetable_desc: Memory to hold information about the pagetables being used + * and the commands to switch pagetable on the RB + * @dispatch_q: The dispatcher side queue for this ringbuffer + * @ts_expire_waitq: Wait queue to wait for rb timestamp to expire + * @ts_expire_waitq: Wait q to wait for rb timestamp to expire + * @wptr_preempt_end: Used during preemption to check that preemption occurred + * at the right rptr + * @gpr11: The gpr11 value of this RB + * @preempted_midway: Indicates that the RB was preempted before rptr = wptr + * @preempt_lock: Lock to protect the wptr pointer while it is being updated + * @skip_inline_wptr: Used during preemption to make sure wptr is updated in + * hardware + */ +struct adreno_ringbuffer { + uint32_t flags; + struct kgsl_memdesc *buffer_desc; + unsigned int _wptr; + unsigned int wptr; + unsigned int last_wptr; + int id; + unsigned int fault_detect_ts; + unsigned int timestamp; + struct kgsl_event_group events; + struct adreno_context *drawctxt_active; + struct kgsl_memdesc *preemption_desc; + struct kgsl_memdesc *secure_preemption_desc; + struct kgsl_memdesc *perfcounter_save_restore_desc; + struct kgsl_memdesc *pagetable_desc; + struct adreno_dispatcher_drawqueue dispatch_q; + wait_queue_head_t ts_expire_waitq; + unsigned int wptr_preempt_end; + unsigned int gpr11; + int preempted_midway; + spinlock_t preempt_lock; + bool skip_inline_wptr; + /** + * @profile_desc: global memory to construct IB1s to do user side + * profiling + */ + struct kgsl_memdesc *profile_desc; + /** + * @profile_index: Pointer to the next "slot" in profile_desc for a user + * profiling IB1. This allows for PAGE_SIZE / 16 = 256 simultaneous + * commands per ringbuffer with user profiling enabled + * enough. + */ + u32 profile_index; +}; + +/* Returns the current ringbuffer */ +#define ADRENO_CURRENT_RINGBUFFER(a) ((a)->cur_rb) + +int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, + struct kgsl_drawobj *drawobj, + uint32_t *timestamp); + +/** + * adreno_ringbuffer_setup - Do generic set up on a ringbuffer + * @adreno_dev: Pointer to an Adreno GPU handle + * @rb: Pointer to the ringbuffer struct to set up + * @id: Index of the ringbuffer + * + * Set up generic memory and other bits of a ringbuffer. + * Return: 0 on success or negative on error. + */ +int adreno_ringbuffer_setup(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, int id); + +int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, + struct adreno_submit_time *time); + + +void adreno_ringbuffer_stop(struct adreno_device *adreno_dev); + +void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time); + +void kgsl_cp_intrcallback(struct kgsl_device *device); + +unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, + unsigned int numcmds); + +void adreno_ringbuffer_read_pfp_ucode(struct kgsl_device *device); + +void adreno_ringbuffer_read_pm4_ucode(struct kgsl_device *device); + +int adreno_ringbuffer_waittimestamp(struct adreno_ringbuffer *rb, + unsigned int timestamp, + unsigned int msecs); + +int adreno_rb_readtimestamp(struct adreno_device *adreno_dev, + void *priv, enum kgsl_timestamp_type type, + unsigned int *timestamp); + +static inline int adreno_ringbuffer_count(struct adreno_ringbuffer *rb, + unsigned int rptr) +{ + if (rb->wptr >= rptr) + return rb->wptr - rptr; + return rb->wptr + KGSL_RB_DWORDS - rptr; +} + +/* Increment a value by 4 bytes with wrap-around based on size */ +static inline unsigned int adreno_ringbuffer_inc_wrapped(unsigned int val, + unsigned int size) +{ + return (val + sizeof(unsigned int)) % size; +} + +/* Decrement a value by 4 bytes with wrap-around based on size */ +static inline unsigned int adreno_ringbuffer_dec_wrapped(unsigned int val, + unsigned int size) +{ + return (val + size - sizeof(unsigned int)) % size; +} + +/** + * adreno_ringbuffer_set_constraint - Set a system constraint before submission + * @device: A KGSL GPU device handle + * @drawobj: Pointer to the drawobj being sbumitted + * + * Check the drawobj to see if a constraint is applied and apply it. + */ +void adreno_ringbuffer_set_constraint(struct kgsl_device *device, + struct kgsl_drawobj *drawobj); + +void adreno_get_submit_time(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_submit_time *time); + +#endif /* __ADRENO_RINGBUFFER_H */ diff --git a/adreno_snapshot.c b/adreno_snapshot.c new file mode 100644 index 0000000000..ec6defa94f --- /dev/null +++ b/adreno_snapshot.c @@ -0,0 +1,1134 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. + */ + +#include + +#include "adreno.h" +#include "adreno_cp_parser.h" +#include "adreno_pm4types.h" +#include "adreno_snapshot.h" + +/* Maintain a list of the objects we see during parsing */ + +#define SNAPSHOT_OBJ_BUFSIZE 64 + +/* Used to print error message if an IB has too many objects in it */ +static int ib_max_objs; + +struct snapshot_rb_params { + struct kgsl_snapshot *snapshot; + struct adreno_ringbuffer *rb; +}; + +/* Keep track of how many bytes are frozen after a snapshot and tell the user */ +static size_t snapshot_frozen_objsize; + +static struct kgsl_snapshot_object objbuf[SNAPSHOT_OBJ_BUFSIZE]; + +/* Pointer to the next open entry in the object list */ +static unsigned int objbufptr; + +static inline int adreno_rb_ctxtswitch(struct adreno_device *adreno_dev, + unsigned int *cmd) +{ + return cmd[0] == cp_packet(adreno_dev, CP_NOP, 1) && + cmd[1] == CONTEXT_TO_MEM_IDENTIFIER; +} + +/* Push a new buffer object onto the list */ +void kgsl_snapshot_push_object(struct kgsl_device *device, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t dwords) +{ + int index; + struct kgsl_mem_entry *entry; + + if (process == NULL || gpuaddr == 0) + return; + + /* + * Sometimes IBs can be reused in the same dump. Because we parse from + * oldest to newest, if we come across an IB that has already been used, + * assume that it has been reused and update the list with the newest + * size. + */ + + for (index = 0; index < objbufptr; index++) { + if (objbuf[index].gpuaddr == gpuaddr && + objbuf[index].entry->priv == process) { + /* + * Check if newly requested size is within the + * allocated range or not, otherwise continue + * with previous size. + */ + if (!kgsl_gpuaddr_in_memdesc( + &objbuf[index].entry->memdesc, + gpuaddr, dwords << 2)) { + dev_err(device->dev, + "snapshot: gpuaddr 0x%016llX size is less than requested\n", + gpuaddr); + return; + } + + objbuf[index].size = max_t(uint64_t, + objbuf[index].size, + dwords << 2); + return; + } + } + + if (objbufptr == SNAPSHOT_OBJ_BUFSIZE) { + dev_err(device->dev, "snapshot: too many snapshot objects\n"); + return; + } + + entry = kgsl_sharedmem_find(process, gpuaddr); + if (entry == NULL) { + dev_err(device->dev, + "snapshot: Can't find entry for 0x%016llX\n", gpuaddr); + return; + } + + if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, dwords << 2)) { + dev_err(device->dev, + "snapshot: Mem entry 0x%016llX is too small\n", + gpuaddr); + kgsl_mem_entry_put(entry); + return; + } + + /* Put it on the list of things to parse */ + objbuf[objbufptr].gpuaddr = gpuaddr; + objbuf[objbufptr].size = dwords << 2; + objbuf[objbufptr++].entry = entry; +} + +/* + * Returns index of the specified object is already on the list of buffers + * to be dumped + */ + +static int find_object(uint64_t gpuaddr, struct kgsl_process_private *process) +{ + int index; + + for (index = 0; index < objbufptr; index++) { + if (objbuf[index].gpuaddr == gpuaddr && + objbuf[index].entry->priv == process) + return index; + } + return -ENOENT; +} + +/* + * snapshot_freeze_obj_list() - Take a list of ib objects and freeze their + * memory for snapshot + * @snapshot: The snapshot data. + * @process: The process to which the IB belongs + * @ib_obj_list: List of the IB objects + * + * Returns 0 on success else error code + */ +static int snapshot_freeze_obj_list(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list) +{ + int ret = 0; + struct adreno_ib_object *ib_objs; + int i; + + for (i = 0; i < ib_obj_list->num_objs; i++) { + int temp_ret; + int index; + int freeze = 1; + + ib_objs = &(ib_obj_list->obj_list[i]); + /* Make sure this object is not going to be saved statically */ + for (index = 0; index < objbufptr; index++) { + if ((objbuf[index].gpuaddr <= ib_objs->gpuaddr) && + ((objbuf[index].gpuaddr + + (objbuf[index].size)) >= + (ib_objs->gpuaddr + ib_objs->size)) && + (objbuf[index].entry->priv == process)) { + freeze = 0; + objbuf[index].entry->memdesc.priv &= + ~KGSL_MEMDESC_SKIP_RECLAIM; + break; + } + } + + if (freeze) { + temp_ret = kgsl_snapshot_get_object(snapshot, + process, ib_objs->gpuaddr, + ib_objs->size, + ib_objs->snapshot_obj_type); + if (temp_ret < 0) { + if (ret >= 0) + ret = temp_ret; + } else { + snapshot_frozen_objsize += temp_ret; + } + } + } + return ret; +} + +void adreno_parse_ib(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t dwords) +{ + struct adreno_ib_object_list *ib_obj_list; + + /* + * Check the IB address - if it is either the last executed IB1 + * then push it into the static blob otherwise put it in the dynamic + * list + */ + if (kgsl_addr_range_overlap(gpuaddr, dwords, + snapshot->ib1base, snapshot->ib1size)) { + /* + * During restore after preemption, ib1base in the register + * can be updated by CP. In such scenarios, to dump complete + * IB1 in snapshot, we should consider ib1base from ringbuffer. + */ + if (gpuaddr != snapshot->ib1base) { + snapshot->ib1base = gpuaddr; + snapshot->ib1size = dwords; + } + kgsl_snapshot_push_object(device, process, gpuaddr, dwords); + return; + } + + if (kgsl_snapshot_have_object(snapshot, process, + gpuaddr, dwords << 2)) + return; + + if (-E2BIG == adreno_ib_create_object_list(device, process, + gpuaddr, dwords, snapshot->ib2base, + &ib_obj_list)) + ib_max_objs = 1; + + if (ib_obj_list) + kgsl_snapshot_add_ib_obj_list(snapshot, ib_obj_list); + +} + +static void dump_all_ibs(struct kgsl_device *device, + struct adreno_ringbuffer *rb, + struct kgsl_snapshot *snapshot) +{ + int index = 0; + unsigned int *rbptr; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + + rbptr = rb->buffer_desc->hostptr; + + for (index = 0; index < KGSL_RB_DWORDS;) { + + if (adreno_cmd_is_ib(adreno_dev, rbptr[index])) { + uint64_t ibaddr; + uint64_t ibsize; + + if (ADRENO_LEGACY_PM4(adreno_dev)) { + ibaddr = rbptr[index + 1]; + ibsize = rbptr[index + 2]; + index += 3; + } else { + ibaddr = rbptr[index + 2]; + ibaddr = ibaddr << 32 | rbptr[index + 1]; + ibsize = rbptr[index + 3]; + index += 4; + } + + /* Don't parse known global IBs */ + if (kgsl_gpuaddr_in_memdesc(iommu->setstate, + ibaddr, ibsize)) + continue; + + if (kgsl_gpuaddr_in_memdesc(adreno_dev->pwron_fixup, + ibaddr, ibsize)) + continue; + + adreno_parse_ib(device, snapshot, snapshot->process, + ibaddr, ibsize); + } else + index = index + 1; + } +} + +/** + * snapshot_rb_ibs() - Dump rb data and capture the IB's in the RB as well + * @device: Pointer to a KGSL device + * @rb: The RB to dump + * @data: Pointer to memory where the RB data is to be dumped + * @snapshot: Pointer to information about the current snapshot being taken + */ +static void snapshot_rb_ibs(struct kgsl_device *device, + struct adreno_ringbuffer *rb, + struct kgsl_snapshot *snapshot) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int *rbptr, rptr = adreno_get_rptr(rb); + int index, i; + int parse_ibs = 0, ib_parse_start; + + /* + * Figure out the window of ringbuffer data to dump. First we need to + * find where the last processed IB ws submitted. Start walking back + * from the rptr + */ + index = rptr; + rbptr = rb->buffer_desc->hostptr; + + do { + index--; + + if (index < 0) { + if (ADRENO_LEGACY_PM4(adreno_dev)) + index = KGSL_RB_DWORDS - 3; + else + index = KGSL_RB_DWORDS - 4; + + /* We wrapped without finding what we wanted */ + if (index < rb->wptr) { + index = rb->wptr; + break; + } + } + + if (adreno_cmd_is_ib(adreno_dev, rbptr[index])) { + uint64_t ibaddr; + uint64_t ibsize; + + if (ADRENO_LEGACY_PM4(adreno_dev)) { + ibaddr = rbptr[index + 1]; + ibsize = rbptr[index + 2]; + } else { + ibaddr = rbptr[index + 2]; + ibaddr = ibaddr << 32 | rbptr[index + 1]; + ibsize = rbptr[index + 3]; + } + + if (kgsl_addr_range_overlap(ibaddr, ibsize, + snapshot->ib1base, snapshot->ib1size)) { + /* + * During restore after preemption, ib1base in + * the register can be updated by CP. In such + * scenario, to dump complete IB1 in snapshot, + * we should consider ib1base from ringbuffer. + */ + snapshot->ib1base = ibaddr; + snapshot->ib1size = ibsize; + break; + } + } + } while (index != rb->wptr); + + /* + * If the ib1 was not found, for example, if ib1base was restored + * incorrectly after preemption, then simply dump the entire + * ringbuffer along with all the IBs in the ringbuffer. + */ + + if (index == rb->wptr) { + dump_all_ibs(device, rb, snapshot); + return; + } + + /* + * index points at the last submitted IB. We can only trust that the + * memory between the context switch and the hanging IB is valid, so + * the next step is to find the context switch before the submission + */ + + while (index != rb->wptr) { + index--; + + if (index < 0) { + index = KGSL_RB_DWORDS - 2; + + /* + * Wrapped without finding the context switch. This is + * harmless - we should still have enough data to dump a + * valid state + */ + + if (index < rb->wptr) { + index = rb->wptr; + break; + } + } + + /* Break if the current packet is a context switch identifier */ + if ((rbptr[index] == cp_packet(adreno_dev, CP_NOP, 1)) && + (rbptr[index + 1] == CONTEXT_TO_MEM_IDENTIFIER)) + break; + } + + /* + * Index represents the start of the window of interest. We will try + * to dump all buffers between here and the rptr + */ + + ib_parse_start = index; + + /* + * Loop through the RB, looking for indirect buffers and MMU pagetable + * changes + */ + + index = rb->wptr; + for (i = 0; i < KGSL_RB_DWORDS; i++) { + /* + * Only parse IBs between the start and the rptr or the next + * context switch, whichever comes first + */ + + if (parse_ibs == 0 && index == ib_parse_start) + parse_ibs = 1; + else if (index == rptr || adreno_rb_ctxtswitch(adreno_dev, + &rbptr[index])) + parse_ibs = 0; + + if (parse_ibs && adreno_cmd_is_ib(adreno_dev, rbptr[index])) { + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + uint64_t ibaddr; + uint64_t ibsize; + + if (ADRENO_LEGACY_PM4(adreno_dev)) { + ibaddr = rbptr[index + 1]; + ibsize = rbptr[index + 2]; + } else { + ibaddr = rbptr[index + 2]; + ibaddr = ibaddr << 32 | rbptr[index + 1]; + ibsize = rbptr[index + 3]; + } + + index = (index + 1) % KGSL_RB_DWORDS; + + /* Don't parse known global IBs */ + if (kgsl_gpuaddr_in_memdesc(iommu->setstate, + ibaddr, ibsize)) + continue; + + if (kgsl_gpuaddr_in_memdesc(adreno_dev->pwron_fixup, + ibaddr, ibsize)) + continue; + + adreno_parse_ib(device, snapshot, snapshot->process, + ibaddr, ibsize); + } else + index = (index + 1) % KGSL_RB_DWORDS; + } + +} + +/* Snapshot the ringbuffer memory */ +static size_t snapshot_rb(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct snapshot_rb_params *snap_rb_params = priv; + struct kgsl_snapshot *snapshot = snap_rb_params->snapshot; + struct adreno_ringbuffer *rb = snap_rb_params->rb; + + /* + * Dump the entire ringbuffer - the parser can choose how much of it to + * process + */ + + if (remain < KGSL_RB_SIZE + sizeof(*header)) { + dev_err(device->dev, + "snapshot: Not enough memory for the rb section\n"); + return 0; + } + + /* Write the sub-header for the section */ + header->start = 0; + header->end = KGSL_RB_DWORDS; + header->wptr = rb->wptr; + header->rptr = adreno_get_rptr(rb); + header->rbsize = KGSL_RB_DWORDS; + header->count = KGSL_RB_DWORDS; + adreno_rb_readtimestamp(adreno_dev, rb, KGSL_TIMESTAMP_QUEUED, + &header->timestamp_queued); + adreno_rb_readtimestamp(adreno_dev, rb, KGSL_TIMESTAMP_RETIRED, + &header->timestamp_retired); + header->gpuaddr = rb->buffer_desc->gpuaddr; + header->id = rb->id; + + if (rb == adreno_dev->cur_rb) + snapshot_rb_ibs(device, rb, snapshot); + + /* Just copy the ringbuffer, there are no active IBs */ + memcpy(data, rb->buffer_desc->hostptr, KGSL_RB_SIZE); + + /* Return the size of the section */ + return KGSL_RB_SIZE + sizeof(*header); +} + +static int _count_mem_entries(int id, void *ptr, void *data) +{ + int *count = data; + *count = *count + 1; + return 0; +} + +struct mem_entry { + uint64_t gpuaddr; + uint64_t size; + unsigned int type; +} __packed; + +static int _save_mem_entries(int id, void *ptr, void *data) +{ + struct kgsl_mem_entry *entry = ptr; + struct mem_entry *m = (struct mem_entry *) data; + unsigned int index = id - 1; + + m[index].gpuaddr = entry->memdesc.gpuaddr; + m[index].size = entry->memdesc.size; + m[index].type = kgsl_memdesc_get_memtype(&entry->memdesc); + + return 0; +} + +static size_t snapshot_capture_mem_list(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_mem_list_v2 *header = + (struct kgsl_snapshot_mem_list_v2 *)buf; + int num_mem = 0; + int ret = 0; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct kgsl_process_private *process = priv; + + /* we need a process to search! */ + if (process == NULL) + return 0; + + spin_lock(&process->mem_lock); + + /* We need to know the number of memory objects that the process has */ + idr_for_each(&process->mem_idr, _count_mem_entries, &num_mem); + + if (num_mem == 0) + goto out; + + if (remain < ((num_mem * sizeof(struct mem_entry)) + sizeof(*header))) { + dev_err(device->dev, + "snapshot: Not enough memory for the mem list\n"); + goto out; + } + + header->num_entries = num_mem; + header->ptbase = kgsl_mmu_pagetable_get_ttbr0(process->pagetable); + + /* + * Walk through the memory list and store the + * tuples(gpuaddr, size, memtype) in snapshot + */ + idr_for_each(&process->mem_idr, _save_mem_entries, data); + + ret = sizeof(*header) + (num_mem * sizeof(struct mem_entry)); +out: + spin_unlock(&process->mem_lock); + return ret; +} + +struct snapshot_ib_meta { + struct kgsl_snapshot *snapshot; + struct kgsl_snapshot_object *obj; + uint64_t ib1base; + uint64_t ib1size; + uint64_t ib2base; + uint64_t ib2size; +}; + +static void kgsl_snapshot_add_active_ib_obj_list(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + struct adreno_ib_object_list *ib_obj_list; + int index = -ENOENT; + + if (!snapshot->ib1dumped) + index = find_object(snapshot->ib1base, snapshot->process); + + /* only do this for IB1 because the IB2's are part of IB1 objects */ + if ((index != -ENOENT) && + (snapshot->ib1base == objbuf[index].gpuaddr)) { + if (-E2BIG == adreno_ib_create_object_list(device, + objbuf[index].entry->priv, + objbuf[index].gpuaddr, + objbuf[index].size >> 2, + snapshot->ib2base, + &ib_obj_list)) + ib_max_objs = 1; + if (ib_obj_list) { + /* freeze the IB objects in the IB */ + snapshot_freeze_obj_list(snapshot, + objbuf[index].entry->priv, + ib_obj_list); + adreno_ib_destroy_obj_list(ib_obj_list); + } + } else { + /* Get the IB2 index from parsed object */ + index = find_object(snapshot->ib2base, snapshot->process); + + if (index != -ENOENT) + adreno_parse_ib(device, snapshot, snapshot->process, + snapshot->ib2base, objbuf[index].size >> 2); + } +} + +/* + * active_ib_is_parsed() - Checks if active ib is already parsed + * @gpuaddr: Active IB base address at the time of fault + * @size: Active IB size + * @process: The process to which the IB belongs + * + * Function returns true if the active is already is parsed + * else false + */ +static bool active_ib_is_parsed(uint64_t gpuaddr, uint64_t size, + struct kgsl_process_private *process) +{ + int index; + /* go through the static list for gpuaddr is in list or not */ + for (index = 0; index < objbufptr; index++) { + if ((objbuf[index].gpuaddr <= gpuaddr) && + ((objbuf[index].gpuaddr + + (objbuf[index].size)) >= + (gpuaddr + size)) && + (objbuf[index].entry->priv == process)) + return true; + } + return false; +} +/* Snapshot the memory for an indirect buffer */ +static size_t snapshot_ib(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_ib_v2 *header = (struct kgsl_snapshot_ib_v2 *)buf; + struct snapshot_ib_meta *meta = priv; + unsigned int *src; + unsigned int *dst = (unsigned int *)(buf + sizeof(*header)); + struct adreno_ib_object_list *ib_obj_list; + struct kgsl_snapshot *snapshot; + struct kgsl_snapshot_object *obj; + struct kgsl_memdesc *memdesc; + + if (meta == NULL || meta->snapshot == NULL || meta->obj == NULL) { + dev_err(device->dev, "snapshot: bad metadata\n"); + return 0; + } + snapshot = meta->snapshot; + obj = meta->obj; + memdesc = &obj->entry->memdesc; + + /* If size is zero get it from the medesc size */ + if (!obj->size) + obj->size = (memdesc->size - (obj->gpuaddr - memdesc->gpuaddr)); + + if (remain < (obj->size + sizeof(*header))) { + dev_err(device->dev, "snapshot: Not enough memory for the ib\n"); + return 0; + } + + src = kgsl_gpuaddr_to_vaddr(memdesc, obj->gpuaddr); + if (src == NULL) { + dev_err(device->dev, + "snapshot: Unable to map GPU memory object 0x%016llX into the kernel\n", + obj->gpuaddr); + return 0; + } + + /* only do this for IB1 because the IB2's are part of IB1 objects */ + if (meta->ib1base == obj->gpuaddr) { + + snapshot->ib1dumped = active_ib_is_parsed(obj->gpuaddr, + obj->size, obj->entry->priv); + if (-E2BIG == adreno_ib_create_object_list(device, + obj->entry->priv, + obj->gpuaddr, obj->size >> 2, + snapshot->ib2base, + &ib_obj_list)) + ib_max_objs = 1; + if (ib_obj_list) { + /* freeze the IB objects in the IB */ + snapshot_freeze_obj_list(snapshot, + obj->entry->priv, + ib_obj_list); + adreno_ib_destroy_obj_list(ib_obj_list); + } + } + + + if (meta->ib2base == obj->gpuaddr) + snapshot->ib2dumped = active_ib_is_parsed(obj->gpuaddr, + obj->size, obj->entry->priv); + + /* Write the sub-header for the section */ + header->gpuaddr = obj->gpuaddr; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(obj->entry->priv->pagetable); + header->size = obj->size >> 2; + + /* Write the contents of the ib */ + memcpy((void *)dst, (void *)src, (size_t) obj->size); + /* Write the contents of the ib */ + + return obj->size + sizeof(*header); +} + +/* Dump another item on the current pending list */ +static void dump_object(struct kgsl_device *device, int obj, + struct kgsl_snapshot *snapshot) +{ + struct snapshot_ib_meta meta; + + meta.snapshot = snapshot; + meta.obj = &objbuf[obj]; + meta.ib1base = snapshot->ib1base; + meta.ib1size = snapshot->ib1size; + meta.ib2base = snapshot->ib2base; + meta.ib2size = snapshot->ib2size; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_IB_V2, + snapshot, snapshot_ib, &meta); + if (objbuf[obj].entry) { + kgsl_memdesc_unmap(&(objbuf[obj].entry->memdesc)); + kgsl_mem_entry_put(objbuf[obj].entry); + } +} + +/* setup_fault process - Find kgsl_process_private struct that caused the fault + * + * Find the faulting process based what the dispatcher thinks happened and + * what the hardware is using for the current pagetable. The process struct + * will be used to look up GPU addresses that are encountered while parsing + * the GPU state. + */ +static void setup_fault_process(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process) +{ + u64 hw_ptbase, proc_ptbase; + + if (process != NULL && !kgsl_process_private_get(process)) + process = NULL; + + /* Get the physical address of the MMU pagetable */ + hw_ptbase = kgsl_mmu_get_current_ttbr0(&device->mmu); + + /* if we have an input process, make sure the ptbases match */ + if (process) { + proc_ptbase = kgsl_mmu_pagetable_get_ttbr0(process->pagetable); + /* agreement! No need to check further */ + if (hw_ptbase == proc_ptbase) + goto done; + + kgsl_process_private_put(process); + process = NULL; + dev_err(device->dev, + "snapshot: ptbase mismatch hw %llx sw %llx\n", + hw_ptbase, proc_ptbase); + } + + /* try to find the right pagetable by walking the process list */ + if (kgsl_mmu_is_perprocess(&device->mmu)) { + struct kgsl_process_private *tmp; + + read_lock(&kgsl_driver.proclist_lock); + list_for_each_entry(tmp, &kgsl_driver.process_list, list) { + u64 pt_ttbr0; + + pt_ttbr0 = kgsl_mmu_pagetable_get_ttbr0(tmp->pagetable); + if ((pt_ttbr0 == hw_ptbase) + && kgsl_process_private_get(tmp)) { + process = tmp; + break; + } + } + read_unlock(&kgsl_driver.proclist_lock); + } +done: + snapshot->process = process; +} + +/* Snapshot a global memory buffer */ +size_t adreno_snapshot_global(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_memdesc *memdesc = priv; + + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)buf; + + u8 *ptr = buf + sizeof(*header); + + if (!memdesc || memdesc->size == 0) + return 0; + + if (remain < (memdesc->size + sizeof(*header))) { + dev_err(device->dev, + "snapshot: Not enough memory for the memdesc\n"); + return 0; + } + + if (memdesc->hostptr == NULL) { + dev_err(device->dev, + "snapshot: no kernel mapping for global object 0x%016llX\n", + memdesc->gpuaddr); + return 0; + } + + header->size = memdesc->size >> 2; + header->gpuaddr = memdesc->gpuaddr; + header->ptbase = MMU_DEFAULT_TTBR0(device); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + memcpy(ptr, memdesc->hostptr, memdesc->size); + + return memdesc->size + sizeof(*header); +} + +/* Snapshot IOMMU specific buffers */ +static void adreno_snapshot_iommu(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, iommu->setstate); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, iommu->smmu_info); +} + +static void adreno_snapshot_ringbuffer(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct adreno_ringbuffer *rb) +{ + struct snapshot_rb_params params = { + .snapshot = snapshot, + .rb = rb, + }; + + if (rb == NULL) + return; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB_V2, snapshot, + snapshot_rb, ¶ms); +} + +static void adreno_snapshot_os(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_context *guilty, + bool dump_contexts) +{ + struct kgsl_snapshot_section_header *sect = + (struct kgsl_snapshot_section_header *) snapshot->ptr; + struct kgsl_snapshot_linux_v2 *header = (struct kgsl_snapshot_linux_v2 *) + (snapshot->ptr + sizeof(*sect)); + + if (snapshot->remain < (sizeof(*sect) + sizeof(*header))) { + SNAPSHOT_ERR_NOMEM(device, "OS"); + return; + } + + header->osid = KGSL_SNAPSHOT_OS_LINUX_V3; + + strlcpy(header->release, init_utsname()->release, sizeof(header->release)); + strlcpy(header->version, init_utsname()->version, sizeof(header->version)); + + header->seconds = get_seconds(); + header->power_flags = device->pwrctrl.power_flags; + header->power_level = device->pwrctrl.active_pwrlevel; + header->power_interval_timeout = device->pwrctrl.interval_timeout; + header->grpclk = clk_get_rate(device->pwrctrl.grp_clks[0]); + + /* Get the current PT base */ + header->ptbase = kgsl_mmu_get_current_ttbr0(&device->mmu); + header->ctxtcount = 0; + + /* If we know the guilty context then dump it */ + if (guilty) { + header->pid = guilty->tid; + strlcpy(header->comm, guilty->proc_priv->comm, + sizeof(header->comm)); + } + + if (dump_contexts) { + u32 remain = snapshot->remain - sizeof(*sect) + sizeof(*header); + void *mem = snapshot->ptr + sizeof(*sect) + sizeof(*header); + struct kgsl_context *context; + int id; + + read_lock(&device->context_lock); + idr_for_each_entry(&device->context_idr, context, id) { + struct kgsl_snapshot_linux_context_v2 *c = mem; + + if (remain < sizeof(*c)) + break; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, + &c->timestamp_queued); + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_CONSUMED, + &c->timestamp_consumed); + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, + &c->timestamp_retired); + + header->ctxtcount++; + + mem += sizeof(*c); + remain -= sizeof(*c); + + } + read_unlock(&device->context_lock); + } + + sect->magic = SNAPSHOT_SECTION_MAGIC; + sect->id = KGSL_SNAPSHOT_SECTION_OS; + sect->size = sizeof(*sect) + sizeof(*header) + + header->ctxtcount * sizeof(struct kgsl_snapshot_linux_context_v2); + + snapshot->ptr += sect->size; + snapshot->remain -= sect->size; + snapshot->size += sect->size; +} + +/* adreno_snapshot - Snapshot the Adreno GPU state + * @device - KGSL device to snapshot + * @snapshot - Pointer to the snapshot instance + * @context - context that caused the fault, if known by the driver + * This is a hook function called by kgsl_snapshot to snapshot the + * Adreno specific information for the GPU snapshot. In turn, this function + * calls the GPU specific snapshot function to get core specific information. + */ +void adreno_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot, + struct kgsl_context *context) +{ + unsigned int i; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct kgsl_snapshot_header *header = (struct kgsl_snapshot_header *) + snapshot->ptr; + + /* Set up the master header */ + header->magic = SNAPSHOT_MAGIC; + /* gpuid is deprecated so initialize it to an obviously wrong value */ + header->gpuid = UINT_MAX; + header->chipid = adreno_dev->chipid; + + snapshot->ptr += sizeof(*header); + snapshot->remain -= sizeof(*header); + snapshot->size += sizeof(*header); + + /* Write the OS section */ + adreno_snapshot_os(device, snapshot, context, device->gmu_fault); + + ib_max_objs = 0; + /* Reset the list of objects */ + objbufptr = 0; + + snapshot_frozen_objsize = 0; + + setup_fault_process(device, snapshot, + context ? context->proc_priv : NULL); + + /* Add GPU specific sections - registers mainly, but other stuff too */ + if (gpudev->snapshot) + gpudev->snapshot(adreno_dev, snapshot); + + snapshot->ib1dumped = false; + snapshot->ib2dumped = false; + + adreno_snapshot_ringbuffer(device, snapshot, adreno_dev->cur_rb); + + /* Dump the prev ringbuffer */ + if (adreno_dev->prev_rb != adreno_dev->cur_rb) + adreno_snapshot_ringbuffer(device, snapshot, + adreno_dev->prev_rb); + + if ((adreno_dev->next_rb != adreno_dev->prev_rb) && + (adreno_dev->next_rb != adreno_dev->cur_rb)) + adreno_snapshot_ringbuffer(device, snapshot, + adreno_dev->next_rb); + + if (device->snapshot_atomic) + return; + + /* Dump selected global buffers */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, device->memstore); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, device->scratch); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, + adreno_dev->pwron_fixup); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, + adreno_dev->profile_buffer); + + if (kgsl_mmu_get_mmutype(device) == KGSL_MMU_TYPE_IOMMU) + adreno_snapshot_iommu(device, snapshot); + + /* + * Add a section that lists (gpuaddr, size, memtype) tuples of the + * hanging process + */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MEMLIST_V2, + snapshot, snapshot_capture_mem_list, snapshot->process); + /* + * Make sure that the last IB1 that was being executed is dumped. + * Since this was the last IB1 that was processed, we should have + * already added it to the list during the ringbuffer parse but we + * want to be double plus sure. + * The problem is that IB size from the register is the unprocessed size + * of the buffer not the original size, so if we didn't catch this + * buffer being directly used in the RB, then we might not be able to + * dump the whole thing. Try to dump the maximum possible size from the + * IB1 base address till the end of memdesc size so that we dont miss + * what we are interested in. Print a warning message so we can try to + * figure how often this really happens. + */ + + if (-ENOENT == find_object(snapshot->ib1base, snapshot->process)) { + struct kgsl_mem_entry *entry; + u64 ibsize; + + entry = kgsl_sharedmem_find(snapshot->process, + snapshot->ib1base); + if (entry == NULL) { + dev_err(device->dev, + "Can't find a memory entry containing IB1BASE %16llx\n", + snapshot->ib1base); + } else { + ibsize = entry->memdesc.size - + (snapshot->ib1base - entry->memdesc.gpuaddr); + kgsl_mem_entry_put(entry); + + kgsl_snapshot_push_object(device, snapshot->process, + snapshot->ib1base, ibsize >> 2); + dev_err(device->dev, + "CP_IB1_BASE is not found in the ringbuffer. Dumping %llx dwords of the buffer\n", + ibsize >> 2); + } + } + + /* + * Add the last parsed IB2 to the list. The IB2 should be found as we + * parse the objects below, but we try to add it to the list first, so + * it too can be parsed. Don't print an error message in this case - if + * the IB2 is found during parsing, the list will be updated with the + * correct size. + */ + + if (-ENOENT == find_object(snapshot->ib2base, snapshot->process)) + kgsl_snapshot_push_object(device, snapshot->process, + snapshot->ib2base, snapshot->ib2size); + + /* + * Go through the list of found objects and dump each one. As the IBs + * are parsed, more objects might be found, and objbufptr will increase + */ + for (i = 0; i < objbufptr; i++) + dump_object(device, i, snapshot); + + /* + * Incase snapshot static blob is running out of memory, Add Active IB1 + * and IB2 entries to obj_list so that active ib's can be dumped to + * snapshot dynamic blob. + */ + if (!snapshot->ib1dumped || !snapshot->ib2dumped) + kgsl_snapshot_add_active_ib_obj_list(device, snapshot); + + if (ib_max_objs) + dev_err(device->dev, "Max objects found in IB\n"); + if (snapshot_frozen_objsize) + dev_err(device->dev, + "GPU snapshot froze %zdKb of GPU buffers\n", + snapshot_frozen_objsize / 1024); + +} + +void adreno_snapshot_registers(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + const unsigned int *regs, unsigned int count) +{ + struct kgsl_snapshot_registers r; + + r.regs = regs; + r.count = count; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot, + kgsl_snapshot_dump_registers, &r); +} + +int adreno_snapshot_regs_count(const u32 *ptr) +{ + unsigned int count = 0; + unsigned int group_count; + + for ( ; ptr[0] != UINT_MAX; ptr += 2) { + group_count = REG_COUNT(ptr); + if (group_count == 1) + count += group_count + 1; + else + count += group_count + 2; + } + return count; +} + +/* + * This is a new format for dumping the registers, where we dump just the first + * address of the register along with the count of the contiguous registers + * which we going to dump. This helps us save memory by not dumping the + * address for each register + */ +size_t adreno_snapshot_registers_v2(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + const u32 *ptr = (const u32 *)priv; + unsigned int *data = (unsigned int *)buf; + int count = 0, k; + + /* Figure out how many registers we are going to dump */ + count = adreno_snapshot_regs_count(ptr); + + if (remain < (count * 4)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + for (ptr = (const u32 *)priv; ptr[0] != UINT_MAX; ptr += 2) { + int cnt = REG_COUNT(ptr); + + if (cnt == 1) + *data++ = BIT(31) | ptr[0]; + else { + *data++ = ptr[0]; + *data++ = cnt; + } + for (k = ptr[0]; k <= ptr[1]; k++) { + kgsl_regread(device, k, data); + data++; + } + } + + /* Return the size of the section */ + return (count * 4); +} diff --git a/adreno_snapshot.h b/adreno_snapshot.h new file mode 100644 index 0000000000..254c0c4a5c --- /dev/null +++ b/adreno_snapshot.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013-2015,2020-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_SNAPSHOT_H +#define __ADRENO_SNAPSHOT_H + +#include "kgsl_snapshot.h" + +#define CP_CRASH_DUMPER_TIMEOUT 500 + +#define DEBUG_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \ + + sizeof(struct kgsl_snapshot_debug)) + +#define SHADER_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \ + + sizeof(struct kgsl_snapshot_shader)) + +/* Macro to make it super easy to dump registers */ +#define SNAPSHOT_REGISTERS(_d, _s, _r) \ + adreno_snapshot_registers((_d), (_s), \ + (unsigned int *) _r, ARRAY_SIZE(_r) / 2) + +#define REG_COUNT(_ptr) ((_ptr[1] - _ptr[0]) + 1) + +void adreno_snapshot_registers(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + const unsigned int *regs, unsigned int count); + +/** + * adreno_snapshot_regs_count - Helper function to calculate register and + * header size + * @ptr: Pointer to the register array + * + * Return: Number of registers in the array + * + * Helper function to count the total number of regsiters + * in a given array plus the header space needed for each group. + */ +int adreno_snapshot_regs_count(const u32 *ptr); + +/** + * adreno_snapshot_registers_v2 - Dump a series of registers + * @device: Pointer to the kgsl device + * @buf: The snapshot buffer + * @remain: The size remaining in the snapshot buffer + * @priv: Pointer to the register array to be dumped + * + * Return: Number of bytes written to the snapshot + * + * This function dumps the registers in a way that we need to + * only dump the start address and count for each pair of register + * in the array. This helps us save some memory in snapshot. + */ +size_t adreno_snapshot_registers_v2(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv); + +/** + * adreno_parse_ib - Parse the given IB + * @device: Pointer to the kgsl device + * @snapshot: Pointer to the snapshot structure + * @process: Process to which this IB belongs + * @gpuaddr: Gpu address of the IB + * @dwords: Size in dwords of the IB + * + * We want to store the last executed IB1 and IB2 in the static region to ensure + * that we get at least some information out of the snapshot even if we can't + * access the dynamic data from the sysfs file. Push all other IBs on the + * dynamic list + */ +void adreno_parse_ib(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, + u64 gpuaddr, u64 dwords); +/** + * adreno_snapshot_global - Add global buffer to snapshot + * @device: Pointer to the kgsl device + * @buf: Where the global buffer section is to be written + * @remain: Remaining bytes in snapshot buffer + * @priv: Opaque data + * + * Return: Number of bytes written to the snapshot buffer + */ +size_t adreno_snapshot_global(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); +#endif /*__ADRENO_SNAPSHOT_H */ diff --git a/adreno_sysfs.c b/adreno_sysfs.c new file mode 100644 index 0000000000..5fb55aad09 --- /dev/null +++ b/adreno_sysfs.c @@ -0,0 +1,337 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2014-2021, The Linux Foundation. All rights reserved. + */ + +#include + +#include "adreno.h" +#include "adreno_sysfs.h" +#include "kgsl_sysfs.h" + +static ssize_t _gpu_model_show(struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, adreno_get_gpu_model(device)); +} + +static ssize_t gpu_model_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _gpu_model_show(device, buf); +} + +static int _l3_vote_store(struct adreno_device *adreno_dev, bool val) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_L3_VOTE)) + device->l3_vote = val; + + return 0; +} + +static bool _l3_vote_show(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + return device->l3_vote; +} + +static int _ft_policy_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + adreno_dev->ft_policy = val & KGSL_FT_POLICY_MASK; + return 0; +} + +static unsigned int _ft_policy_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->ft_policy; +} + +static int _ft_pagefault_policy_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = 0; + + mutex_lock(&device->mutex); + val &= KGSL_FT_PAGEFAULT_MASK; + + if (device->state == KGSL_STATE_ACTIVE) + ret = kgsl_mmu_set_pagefault_policy(&device->mmu, + (unsigned long) val); + + if (ret == 0) + device->mmu.pfpolicy = val; + + mutex_unlock(&device->mutex); + + return 0; +} + +static unsigned int _ft_pagefault_policy_show(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + return device->mmu.pfpolicy; +} + +static int _gpu_llc_slice_enable_store(struct adreno_device *adreno_dev, + bool val) +{ + if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) + adreno_dev->gpu_llc_slice_enable = val; + return 0; +} + +static bool _gpu_llc_slice_enable_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->gpu_llc_slice_enable; +} + +static int _gpuhtw_llc_slice_enable_store(struct adreno_device *adreno_dev, + bool val) +{ + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + adreno_dev->gpuhtw_llc_slice_enable = val; + return 0; +} + +static bool _gpuhtw_llc_slice_enable_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->gpuhtw_llc_slice_enable; +} + +static bool _ft_hang_intr_status_show(struct adreno_device *adreno_dev) +{ + /* Hang interrupt is always on on all targets */ + return true; +} + +static int _hwcg_store(struct adreno_device *adreno_dev, bool val) +{ + if (adreno_dev->hwcg_enabled == val) + return 0; + + return adreno_power_cycle_bool(adreno_dev, &adreno_dev->hwcg_enabled, + val); +} + +static bool _hwcg_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->hwcg_enabled; +} + +static int _throttling_store(struct adreno_device *adreno_dev, bool val) +{ + if (!adreno_is_a540(adreno_dev) || + adreno_dev->throttling_enabled == val) + return 0; + + return adreno_power_cycle_bool(adreno_dev, + &adreno_dev->throttling_enabled, val); +} + +static bool _throttling_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->throttling_enabled; +} + +static int _sptp_pc_store(struct adreno_device *adreno_dev, bool val) +{ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC) || + adreno_dev->sptp_pc_enabled == val) + return 0; + + return adreno_power_cycle_bool(adreno_dev, &adreno_dev->sptp_pc_enabled, + val); +} + +static bool _sptp_pc_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->sptp_pc_enabled; +} + +static int _lm_store(struct adreno_device *adreno_dev, bool val) +{ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM) || + adreno_dev->lm_enabled == val) + return 0; + + return adreno_power_cycle_bool(adreno_dev, &adreno_dev->lm_enabled, + val); +} + +static bool _lm_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->lm_enabled; +} + +static int _ifpc_store(struct adreno_device *adreno_dev, bool val) +{ + return gmu_core_dev_ifpc_store(KGSL_DEVICE(adreno_dev), val); +} + +static bool _ifpc_show(struct adreno_device *adreno_dev) +{ + return gmu_core_dev_ifpc_show(KGSL_DEVICE(adreno_dev)); +} + +static unsigned int _ifpc_count_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->ifpc_count; +} + +static bool _acd_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->acd_enabled; +} + +static int _acd_store(struct adreno_device *adreno_dev, bool val) +{ + return gmu_core_dev_acd_set(KGSL_DEVICE(adreno_dev), val); +} + +static bool _bcl_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->bcl_enabled; +} + +static int _bcl_store(struct adreno_device *adreno_dev, bool val) +{ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_BCL) || + adreno_dev->bcl_enabled == val) + return 0; + + return adreno_power_cycle_bool(adreno_dev, &adreno_dev->bcl_enabled, + val); +} + +ssize_t adreno_sysfs_store_u32(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_get_drvdata(dev)); + const struct adreno_sysfs_attribute_u32 *_attr = + container_of(attr, struct adreno_sysfs_attribute_u32, attr); + u32 val; + int ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + ret = _attr->store(adreno_dev, val); + if (ret) + return ret; + + return count; +} + +ssize_t adreno_sysfs_show_u32(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_get_drvdata(dev)); + const struct adreno_sysfs_attribute_u32 *_attr = + container_of(attr, struct adreno_sysfs_attribute_u32, attr); + + return scnprintf(buf, PAGE_SIZE, "0x%X\n", _attr->show(adreno_dev)); +} + +ssize_t adreno_sysfs_store_bool(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_get_drvdata(dev)); + const struct adreno_sysfs_attribute_bool *_attr = + container_of(attr, struct adreno_sysfs_attribute_bool, attr); + bool val; + int ret; + + ret = kstrtobool(buf, &val); + if (ret) + return ret; + + ret = _attr->store(adreno_dev, val); + if (ret) + return ret; + + return count; +} + +ssize_t adreno_sysfs_show_bool(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_get_drvdata(dev)); + const struct adreno_sysfs_attribute_bool *_attr = + container_of(attr, struct adreno_sysfs_attribute_bool, attr); + + return scnprintf(buf, PAGE_SIZE, "%d\n", _attr->show(adreno_dev)); +} + +static ADRENO_SYSFS_U32(ft_policy); +static ADRENO_SYSFS_U32(ft_pagefault_policy); +static ADRENO_SYSFS_RO_BOOL(ft_hang_intr_status); +static ADRENO_SYSFS_BOOL(gpu_llc_slice_enable); +static ADRENO_SYSFS_BOOL(gpuhtw_llc_slice_enable); + +static DEVICE_INT_ATTR(wake_nice, 0644, adreno_wake_nice); +static DEVICE_INT_ATTR(wake_timeout, 0644, adreno_wake_timeout); + +static ADRENO_SYSFS_BOOL(sptp_pc); +static ADRENO_SYSFS_BOOL(lm); +static ADRENO_SYSFS_BOOL(hwcg); +static ADRENO_SYSFS_BOOL(throttling); +static ADRENO_SYSFS_BOOL(ifpc); +static ADRENO_SYSFS_RO_U32(ifpc_count); +static ADRENO_SYSFS_BOOL(acd); +static ADRENO_SYSFS_BOOL(bcl); +static ADRENO_SYSFS_BOOL(l3_vote); + +static DEVICE_ATTR_RO(gpu_model); + +static const struct attribute *_attr_list[] = { + &adreno_attr_ft_policy.attr.attr, + &adreno_attr_ft_pagefault_policy.attr.attr, + &adreno_attr_ft_hang_intr_status.attr.attr, + &dev_attr_wake_nice.attr.attr, + &dev_attr_wake_timeout.attr.attr, + &adreno_attr_sptp_pc.attr.attr, + &adreno_attr_lm.attr.attr, + &adreno_attr_hwcg.attr.attr, + &adreno_attr_throttling.attr.attr, + &adreno_attr_gpu_llc_slice_enable.attr.attr, + &adreno_attr_gpuhtw_llc_slice_enable.attr.attr, + &adreno_attr_ifpc.attr.attr, + &adreno_attr_ifpc_count.attr.attr, + &adreno_attr_acd.attr.attr, + &adreno_attr_bcl.attr.attr, + &dev_attr_gpu_model.attr, + &adreno_attr_l3_vote.attr.attr, + NULL, +}; + +static GPU_SYSFS_ATTR(gpu_model, 0444, _gpu_model_show, NULL); + +/** + * adreno_sysfs_init() - Initialize adreno sysfs files + * @adreno_dev: Pointer to the adreno device + * + * Initialize many of the adreno specific sysfs files especially for fault + * tolerance and power control + */ +int adreno_sysfs_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + ret = sysfs_create_files(&device->dev->kobj, _attr_list); + + if (!ret) + ret = sysfs_create_file(&device->gpu_sysfs_kobj, + &gpu_sysfs_attr_gpu_model.attr); + + return ret; +} + diff --git a/adreno_sysfs.h b/adreno_sysfs.h new file mode 100644 index 0000000000..a60b5ab47d --- /dev/null +++ b/adreno_sysfs.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _ADRENO_SYSFS_H_ +#define _ADRENO_SYSFS_H_ + +/* + * struct adreno_sysfs_attribute_u32 - Container for accessing and modifying + * integers in kgsl via sysfs + */ +struct adreno_sysfs_attribute_u32 { + /** #attr: The device attribute corresponding to the sysfs node */ + struct device_attribute attr; + /** @show: Function to show the value of the integer */ + u32 (*show)(struct adreno_device *adreno_dev); + /** @store: Function to store the value of the integer */ + int (*store)(struct adreno_device *adreno_dev, u32 val); +}; + +/* + * struct adreno_sysfs_attribute_bool - Container for accessing and modifying + * booleans in kgsl via sysfs + */ +struct adreno_sysfs_attribute_bool { + /** #attr: The device attribute corresponding to the sysfs node */ + struct device_attribute attr; + /** @show: Function to show the value of the boolean */ + bool (*show)(struct adreno_device *adreno_dev); + /** @store: Function to store the value of the boolean */ + int (*store)(struct adreno_device *adreno_dev, bool val); +}; + +/* Helper function to modify an integer in kgsl */ +ssize_t adreno_sysfs_store_u32(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count); + +/* Helper function to read an integer in kgsl */ +ssize_t adreno_sysfs_show_u32(struct device *dev, + struct device_attribute *attr, char *buf); + +/* Helper function to modify a boolean in kgsl */ +ssize_t adreno_sysfs_store_bool(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count); + +/* Helper function to read a boolean in kgsl */ +ssize_t adreno_sysfs_show_bool(struct device *dev, + struct device_attribute *attr, char *buf); + +#define ADRENO_SYSFS_BOOL(_name) \ +const struct adreno_sysfs_attribute_bool adreno_attr_##_name = { \ + .attr = __ATTR(_name, 0644, adreno_sysfs_show_bool, \ + adreno_sysfs_store_bool), \ + .show = _ ## _name ## _show, \ + .store = _ ## _name ## _store, \ +} + +#define ADRENO_SYSFS_RO_BOOL(_name) \ +const struct adreno_sysfs_attribute_bool adreno_attr_##_name = { \ + .attr = __ATTR(_name, 0444, adreno_sysfs_show_bool, NULL), \ + .show = _ ## _name ## _show, \ +} + +#define ADRENO_SYSFS_U32(_name) \ +const struct adreno_sysfs_attribute_u32 adreno_attr_##_name = { \ + .attr = __ATTR(_name, 0644, adreno_sysfs_show_u32, \ + adreno_sysfs_store_u32), \ + .show = _ ## _name ## _show, \ + .store = _ ## _name ## _store, \ +} + +#define ADRENO_SYSFS_RO_U32(_name) \ +const struct adreno_sysfs_attribute_u32 adreno_attr_##_name = { \ + .attr = __ATTR(_name, 0444, adreno_sysfs_show_u32, NULL), \ + .show = _ ## _name ## _show, \ +} +#endif diff --git a/adreno_trace.c b/adreno_trace.c new file mode 100644 index 0000000000..84577f2441 --- /dev/null +++ b/adreno_trace.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021 The Linux Foundation. All rights reserved. + */ + +#include +#include "adreno.h" + +/* Instantiate tracepoints */ +#define CREATE_TRACE_POINTS +#include "adreno_trace.h" + +static const char * const kgsl_fence_trace_events[] = { + "adreno_cmdbatch_submitted", + "adreno_cmdbatch_retired", + "syncpoint_fence", + "syncpoint_fence_expire", + "kgsl_fire_event", + "kgsl_timeline_fence_alloc", + "kgsl_timeline_fence_release", +}; + +void adreno_fence_trace_array_init(struct kgsl_device *device) +{ + int i; + + device->fence_trace_array = trace_array_get_by_name("kgsl-fence"); + + if (!device->fence_trace_array) + return; + + for (i = 0; i < ARRAY_SIZE(kgsl_fence_trace_events); i++) + trace_array_set_clr_event(device->fence_trace_array, + "kgsl", kgsl_fence_trace_events[i], true); + +} diff --git a/adreno_trace.h b/adreno_trace.h new file mode 100644 index 0000000000..3890dfc501 --- /dev/null +++ b/adreno_trace.h @@ -0,0 +1,786 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013-2021, The Linux Foundation. All rights reserved. + */ + +#if !defined(_ADRENO_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _ADRENO_TRACE_H + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kgsl +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE adreno_trace + +#include +#include "adreno_a3xx.h" +#include "adreno_a5xx.h" +#include "adreno_gen7.h" + +#define ADRENO_FT_TYPES \ + { BIT(KGSL_FT_OFF), "off" }, \ + { BIT(KGSL_FT_REPLAY), "replay" }, \ + { BIT(KGSL_FT_SKIPIB), "skipib" }, \ + { BIT(KGSL_FT_SKIPFRAME), "skipframe" }, \ + { BIT(KGSL_FT_DISABLE), "disable" }, \ + { BIT(KGSL_FT_TEMP_DISABLE), "temp" }, \ + { BIT(KGSL_FT_THROTTLE), "throttle"}, \ + { BIT(KGSL_FT_SKIPCMD), "skipcmd" } + +TRACE_EVENT(adreno_cmdbatch_queued, + TP_PROTO(struct kgsl_drawobj *drawobj, unsigned int queued), + TP_ARGS(drawobj, queued), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(unsigned int, queued) + __field(unsigned int, flags) + __field(unsigned int, prio) + ), + TP_fast_assign( + __entry->id = drawobj->context->id; + __entry->timestamp = drawobj->timestamp; + __entry->queued = queued; + __entry->flags = drawobj->flags; + __entry->prio = drawobj->context->priority; + ), + TP_printk( + "ctx=%u ctx_prio=%u ts=%u queued=%u flags=%s", + __entry->id, __entry->prio, + __entry->timestamp, __entry->queued, + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_DRAWOBJ_FLAGS) : "none" + ) +); + +TRACE_EVENT(adreno_cmdbatch_submitted, + TP_PROTO(struct kgsl_drawobj *drawobj, struct submission_info *info, + uint64_t ticks, unsigned long secs, unsigned long usecs, + int q_inflight), + TP_ARGS(drawobj, info, ticks, secs, usecs, q_inflight), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(int, inflight) + __field(unsigned int, flags) + __field(uint64_t, ticks) + __field(unsigned long, secs) + __field(unsigned long, usecs) + __field(int, prio) + __field(int, rb_id) + __field(unsigned int, rptr) + __field(unsigned int, wptr) + __field(int, q_inflight) + __field(int, dispatch_queue) + ), + TP_fast_assign( + __entry->id = drawobj->context->id; + __entry->timestamp = drawobj->timestamp; + __entry->inflight = info->inflight; + __entry->flags = drawobj->flags; + __entry->ticks = ticks; + __entry->secs = secs; + __entry->usecs = usecs; + __entry->prio = drawobj->context->priority; + __entry->rb_id = info->rb_id; + __entry->rptr = info->rptr; + __entry->wptr = info->wptr; + __entry->q_inflight = q_inflight; + __entry->dispatch_queue = info->gmu_dispatch_queue; + ), + TP_printk( + "ctx=%u ctx_prio=%d ts=%u inflight=%d flags=%s ticks=%lld time=%lu.%0lu rb_id=%d r/w=%x/%x, q_inflight=%d dq_id=%d", + __entry->id, __entry->prio, __entry->timestamp, + __entry->inflight, + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_DRAWOBJ_FLAGS) : "none", + __entry->ticks, __entry->secs, __entry->usecs, + __entry->rb_id, __entry->rptr, __entry->wptr, + __entry->q_inflight, __entry->dispatch_queue + ) +); + +TRACE_EVENT(adreno_cmdbatch_retired, + TP_PROTO(struct kgsl_context *context, struct retire_info *info, + unsigned int flags, int q_inflight, + unsigned long fault_recovery), + TP_ARGS(context, info, flags, q_inflight, fault_recovery), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(int, inflight) + __field(unsigned int, recovery) + __field(unsigned int, flags) + __field(uint64_t, start) + __field(uint64_t, retire) + __field(int, prio) + __field(int, rb_id) + __field(unsigned int, rptr) + __field(unsigned int, wptr) + __field(int, q_inflight) + __field(unsigned long, fault_recovery) + __field(unsigned int, dispatch_queue) + __field(uint64_t, submitted_to_rb) + __field(uint64_t, retired_on_gmu) + ), + TP_fast_assign( + __entry->id = context->id; + __entry->timestamp = info->timestamp; + __entry->inflight = info->inflight; + __entry->recovery = fault_recovery; + __entry->flags = flags; + __entry->start = info->sop; + __entry->retire = info->eop; + __entry->prio = context->priority; + __entry->rb_id = info->rb_id; + __entry->rptr = info->rptr; + __entry->wptr = info->wptr; + __entry->q_inflight = q_inflight; + __entry->dispatch_queue = info->gmu_dispatch_queue; + __entry->submitted_to_rb = info->submitted_to_rb; + __entry->retired_on_gmu = info->retired_on_gmu; + ), + + TP_printk( + "ctx=%u ctx_prio=%d ts=%u inflight=%d recovery=%s flags=%s start=%llu retire=%llu rb_id=%d, r/w=%x/%x, q_inflight=%d, dq_id=%u, submitted_to_rb=%llu retired_on_gmu=%llu", + __entry->id, __entry->prio, __entry->timestamp, + __entry->inflight, + __entry->recovery ? + __print_flags(__entry->fault_recovery, "|", + ADRENO_FT_TYPES) : "none", + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_DRAWOBJ_FLAGS) : "none", + __entry->start, + __entry->retire, + __entry->rb_id, __entry->rptr, __entry->wptr, + __entry->q_inflight, + __entry->dispatch_queue, + __entry->submitted_to_rb, __entry->retired_on_gmu + ) +); + +TRACE_EVENT(gmu_ao_sync, + TP_PROTO(u64 ticks), + TP_ARGS(ticks), + TP_STRUCT__entry( + __field(u64, ticks) + ), + TP_fast_assign( + __entry->ticks = ticks; + ), + TP_printk( + "ticks=%llu", __entry->ticks + ) +); + +TRACE_EVENT(gmu_event, + TP_PROTO(u32 *event_info), + TP_ARGS(event_info), + TP_STRUCT__entry( + __field(u32, event) + __field(u32, ticks) + __field(u32, data1) + __field(u32, data2) + ), + TP_fast_assign( + __entry->event = event_info[0]; + __entry->ticks = event_info[1]; + __entry->data1 = event_info[2]; + __entry->data2 = event_info[3]; + ), + TP_printk( + "event=%08u ticks=%08u data1=0x%08x data2=0x%08x", + __entry->event, __entry->ticks, __entry->data1, __entry->data2 + ) +); + +TRACE_EVENT(adreno_cmdbatch_sync, + TP_PROTO(unsigned int ctx_id, unsigned int ctx_prio, + unsigned int timestamp, uint64_t ticks), + TP_ARGS(ctx_id, ctx_prio, timestamp, ticks), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(uint64_t, ticks) + __field(int, prio) + ), + TP_fast_assign( + __entry->id = ctx_id; + __entry->timestamp = timestamp; + __entry->ticks = ticks; + __entry->prio = ctx_prio; + ), + TP_printk( + "ctx=%u ctx_prio=%d ts=%u ticks=%lld", + __entry->id, __entry->prio, __entry->timestamp, + __entry->ticks + ) +); + +TRACE_EVENT(adreno_cmdbatch_fault, + TP_PROTO(struct kgsl_drawobj_cmd *cmdobj, unsigned int fault), + TP_ARGS(cmdobj, fault), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(unsigned int, fault) + ), + TP_fast_assign( + __entry->id = cmdobj->base.context->id; + __entry->timestamp = cmdobj->base.timestamp; + __entry->fault = fault; + ), + TP_printk( + "ctx=%u ts=%u type=%s", + __entry->id, __entry->timestamp, + __print_symbolic(__entry->fault, + { 0, "none" }, + { ADRENO_SOFT_FAULT, "soft" }, + { ADRENO_HARD_FAULT, "hard" }, + { ADRENO_TIMEOUT_FAULT, "timeout" }) + ) +); + +TRACE_EVENT(adreno_cmdbatch_recovery, + TP_PROTO(struct kgsl_drawobj_cmd *cmdobj, unsigned int action), + TP_ARGS(cmdobj, action), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(unsigned int, action) + ), + TP_fast_assign( + __entry->id = cmdobj->base.context->id; + __entry->timestamp = cmdobj->base.timestamp; + __entry->action = action; + ), + TP_printk( + "ctx=%u ts=%u action=%s", + __entry->id, __entry->timestamp, + __print_symbolic(__entry->action, ADRENO_FT_TYPES) + ) +); + +DECLARE_EVENT_CLASS(adreno_drawctxt_template, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, priority) + ), + TP_fast_assign( + __entry->id = drawctxt->base.id; + __entry->priority = drawctxt->base.priority; + ), + TP_printk("ctx=%u priority=%u", __entry->id, __entry->priority) +); + +DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_sleep, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_wake, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +DEFINE_EVENT(adreno_drawctxt_template, dispatch_queue_context, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_invalidate, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +TRACE_EVENT(adreno_drawctxt_wait_start, + TP_PROTO(unsigned int rb_id, unsigned int ctx_id, unsigned int ts), + TP_ARGS(rb_id, ctx_id, ts), + TP_STRUCT__entry( + __field(unsigned int, rb_id) + __field(unsigned int, ctx_id) + __field(unsigned int, ts) + ), + TP_fast_assign( + __entry->rb_id = rb_id; + __entry->ctx_id = ctx_id; + __entry->ts = ts; + ), + TP_printk( + "rb=%u ctx=%u ts=%u", + __entry->rb_id, __entry->ctx_id, __entry->ts + ) +); + +TRACE_EVENT(adreno_drawctxt_wait_done, + TP_PROTO(unsigned int rb_id, unsigned int ctx_id, + unsigned int ts, int status), + TP_ARGS(rb_id, ctx_id, ts, status), + TP_STRUCT__entry( + __field(unsigned int, rb_id) + __field(unsigned int, ctx_id) + __field(unsigned int, ts) + __field(int, status) + ), + TP_fast_assign( + __entry->rb_id = rb_id; + __entry->ctx_id = ctx_id; + __entry->ts = ts; + __entry->status = status; + ), + TP_printk( + "rb=%u ctx=%u ts=%u status=%d", + __entry->rb_id, __entry->ctx_id, __entry->ts, __entry->status + ) +); + +TRACE_EVENT(adreno_drawctxt_switch, + TP_PROTO(struct adreno_ringbuffer *rb, + struct adreno_context *newctx), + TP_ARGS(rb, newctx), + TP_STRUCT__entry( + __field(int, rb_level) + __field(unsigned int, oldctx) + __field(unsigned int, newctx) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->rb_level = rb->id; + __entry->oldctx = rb->drawctxt_active ? + rb->drawctxt_active->base.id : 0; + __entry->newctx = newctx ? newctx->base.id : 0; + ), + TP_printk( + "rb level=%d oldctx=%u newctx=%u", + __entry->rb_level, __entry->oldctx, __entry->newctx + ) +); + +TRACE_EVENT(adreno_gpu_fault, + TP_PROTO(unsigned int ctx, unsigned int ts, + unsigned int status, unsigned int rptr, unsigned int wptr, + unsigned int ib1base, unsigned int ib1size, + unsigned int ib2base, unsigned int ib2size, int rb_id), + TP_ARGS(ctx, ts, status, rptr, wptr, ib1base, ib1size, ib2base, + ib2size, rb_id), + TP_STRUCT__entry( + __field(unsigned int, ctx) + __field(unsigned int, ts) + __field(unsigned int, status) + __field(unsigned int, rptr) + __field(unsigned int, wptr) + __field(unsigned int, ib1base) + __field(unsigned int, ib1size) + __field(unsigned int, ib2base) + __field(unsigned int, ib2size) + __field(int, rb_id) + ), + TP_fast_assign( + __entry->ctx = ctx; + __entry->ts = ts; + __entry->status = status; + __entry->rptr = rptr; + __entry->wptr = wptr; + __entry->ib1base = ib1base; + __entry->ib1size = ib1size; + __entry->ib2base = ib2base; + __entry->ib2size = ib2size; + __entry->rb_id = rb_id; + ), + TP_printk( + "ctx=%d ts=%d rb_id=%d status=%X RB=%X/%X IB1=%X/%X IB2=%X/%X", + __entry->ctx, __entry->ts, __entry->rb_id, __entry->status, + __entry->wptr, __entry->rptr, __entry->ib1base, + __entry->ib1size, __entry->ib2base, __entry->ib2size) +); + +TRACE_EVENT(adreno_sp_tp, + + TP_PROTO(unsigned long ip), + + TP_ARGS(ip), + + TP_STRUCT__entry( + __field(unsigned long, ip) + ), + + TP_fast_assign( + __entry->ip = ip; + ), + + TP_printk( + "func=%pS", (void *) __entry->ip + ) +); + +/* + * Tracepoint for a3xx irq. Includes status info + */ +TRACE_EVENT(kgsl_a3xx_irq_status, + + TP_PROTO(struct adreno_device *adreno_dev, unsigned int status), + + TP_ARGS(adreno_dev, status), + + TP_STRUCT__entry( + __string(device_name, adreno_dev->dev.name) + __field(unsigned int, status) + ), + + TP_fast_assign( + __assign_str(device_name, adreno_dev->dev.name); + __entry->status = status; + ), + + TP_printk( + "d_name=%s status=%s", + __get_str(device_name), + __entry->status ? __print_flags(__entry->status, "|", + { BIT(A3XX_INT_RBBM_GPU_IDLE), "RBBM_GPU_IDLE" }, + { BIT(A3XX_INT_RBBM_AHB_ERROR), "RBBM_AHB_ERR" }, + { BIT(A3XX_INT_RBBM_REG_TIMEOUT), "RBBM_REG_TIMEOUT" }, + { BIT(A3XX_INT_RBBM_ME_MS_TIMEOUT), + "RBBM_ME_MS_TIMEOUT" }, + { BIT(A3XX_INT_RBBM_PFP_MS_TIMEOUT), + "RBBM_PFP_MS_TIMEOUT" }, + { BIT(A3XX_INT_RBBM_ATB_BUS_OVERFLOW), + "RBBM_ATB_BUS_OVERFLOW" }, + { BIT(A3XX_INT_VFD_ERROR), "RBBM_VFD_ERROR" }, + { BIT(A3XX_INT_CP_SW_INT), "CP_SW" }, + { BIT(A3XX_INT_CP_T0_PACKET_IN_IB), + "CP_T0_PACKET_IN_IB" }, + { BIT(A3XX_INT_CP_OPCODE_ERROR), "CP_OPCODE_ERROR" }, + { BIT(A3XX_INT_CP_RESERVED_BIT_ERROR), + "CP_RESERVED_BIT_ERROR" }, + { BIT(A3XX_INT_CP_HW_FAULT), "CP_HW_FAULT" }, + { BIT(A3XX_INT_CP_DMA), "CP_DMA" }, + { BIT(A3XX_INT_CP_IB2_INT), "CP_IB2_INT" }, + { BIT(A3XX_INT_CP_IB1_INT), "CP_IB1_INT" }, + { BIT(A3XX_INT_CP_RB_INT), "CP_RB_INT" }, + { BIT(A3XX_INT_CP_REG_PROTECT_FAULT), + "CP_REG_PROTECT_FAULT" }, + { BIT(A3XX_INT_CP_RB_DONE_TS), "CP_RB_DONE_TS" }, + { BIT(A3XX_INT_CP_VS_DONE_TS), "CP_VS_DONE_TS" }, + { BIT(A3XX_INT_CP_PS_DONE_TS), "CP_PS_DONE_TS" }, + { BIT(A3XX_INT_CACHE_FLUSH_TS), "CACHE_FLUSH_TS" }, + { BIT(A3XX_INT_CP_AHB_ERROR_HALT), + "CP_AHB_ERROR_HALT" }, + { BIT(A3XX_INT_MISC_HANG_DETECT), "MISC_HANG_DETECT" }, + { BIT(A3XX_INT_UCHE_OOB_ACCESS), "UCHE_OOB_ACCESS" }) + : "None" + ) +); + +/* + * Tracepoint for a5xx irq. Includes status info + */ +TRACE_EVENT(kgsl_a5xx_irq_status, + + TP_PROTO(struct adreno_device *adreno_dev, unsigned int status), + + TP_ARGS(adreno_dev, status), + + TP_STRUCT__entry( + __string(device_name, adreno_dev->dev.name) + __field(unsigned int, status) + ), + + TP_fast_assign( + __assign_str(device_name, adreno_dev->dev.name); + __entry->status = status; + ), + + TP_printk( + "d_name=%s status=%s", + __get_str(device_name), + __entry->status ? __print_flags(__entry->status, "|", + { BIT(A5XX_INT_RBBM_GPU_IDLE), "RBBM_GPU_IDLE" }, + { BIT(A5XX_INT_RBBM_AHB_ERROR), "RBBM_AHB_ERR" }, + { BIT(A5XX_INT_RBBM_TRANSFER_TIMEOUT), + "RBBM_TRANSFER_TIMEOUT" }, + { BIT(A5XX_INT_RBBM_ME_MS_TIMEOUT), + "RBBM_ME_MS_TIMEOUT" }, + { BIT(A5XX_INT_RBBM_PFP_MS_TIMEOUT), + "RBBM_PFP_MS_TIMEOUT" }, + { BIT(A5XX_INT_RBBM_ETS_MS_TIMEOUT), + "RBBM_ETS_MS_TIMEOUT" }, + { BIT(A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW), + "RBBM_ATB_ASYNC_OVERFLOW" }, + { BIT(A5XX_INT_RBBM_GPC_ERROR), "RBBM_GPC_ERR" }, + { BIT(A5XX_INT_CP_SW), "CP_SW" }, + { BIT(A5XX_INT_CP_HW_ERROR), "CP_OPCODE_ERROR" }, + { BIT(A5XX_INT_CP_CCU_FLUSH_DEPTH_TS), + "CP_CCU_FLUSH_DEPTH_TS" }, + { BIT(A5XX_INT_CP_CCU_FLUSH_COLOR_TS), + "CP_CCU_FLUSH_COLOR_TS" }, + { BIT(A5XX_INT_CP_CCU_RESOLVE_TS), + "CP_CCU_RESOLVE_TS" }, + { BIT(A5XX_INT_CP_IB2), "CP_IB2_INT" }, + { BIT(A5XX_INT_CP_IB1), "CP_IB1_INT" }, + { BIT(A5XX_INT_CP_RB), "CP_RB_INT" }, + { BIT(A5XX_INT_CP_UNUSED_1), "CP_UNUSED_1" }, + { BIT(A5XX_INT_CP_RB_DONE_TS), "CP_RB_DONE_TS" }, + { BIT(A5XX_INT_CP_WT_DONE_TS), "CP_WT_DONE_TS" }, + { BIT(A5XX_INT_UNKNOWN_1), "UNKNOWN_1" }, + { BIT(A5XX_INT_CP_CACHE_FLUSH_TS), + "CP_CACHE_FLUSH_TS" }, + { BIT(A5XX_INT_UNUSED_2), "UNUSED_2" }, + { BIT(A5XX_INT_RBBM_ATB_BUS_OVERFLOW), + "RBBM_ATB_BUS_OVERFLOW" }, + { BIT(A5XX_INT_MISC_HANG_DETECT), "MISC_HANG_DETECT" }, + { BIT(A5XX_INT_UCHE_OOB_ACCESS), "UCHE_OOB_ACCESS" }, + { BIT(A5XX_INT_UCHE_TRAP_INTR), "UCHE_TRAP_INTR" }, + { BIT(A5XX_INT_DEBBUS_INTR_0), "DEBBUS_INTR_0" }, + { BIT(A5XX_INT_DEBBUS_INTR_1), "DEBBUS_INTR_1" }, + { BIT(A5XX_INT_GPMU_VOLTAGE_DROOP), + "GPMU_VOLTAGE_DROOP" }, + { BIT(A5XX_INT_GPMU_FIRMWARE), "GPMU_FIRMWARE" }, + { BIT(A5XX_INT_ISDB_CPU_IRQ), "ISDB_CPU_IRQ" }, + { BIT(A5XX_INT_ISDB_UNDER_DEBUG), "ISDB_UNDER_DEBUG" }) + : "None" + ) +); + +/* + * Tracepoint for gen7 irq. Includes status info + */ +TRACE_EVENT(kgsl_gen7_irq_status, + + TP_PROTO(struct adreno_device *adreno_dev, unsigned int status), + + TP_ARGS(adreno_dev, status), + + TP_STRUCT__entry( + __string(device_name, adreno_dev->dev.name) + __field(unsigned int, status) + ), + + TP_fast_assign( + __assign_str(device_name, adreno_dev->dev.name); + __entry->status = status; + ), + + TP_printk( + "d_name=%s status=%s", + __get_str(device_name), + __entry->status ? __print_flags(__entry->status, "|", + { BIT(GEN7_INT_GPUIDLE), "GPUIDLE" }, + { BIT(GEN7_INT_AHBERROR), "AHBERROR" }, + { BIT(GEN7_INT_CPIPCINT0), "CPIPCINT0" }, + { BIT(GEN7_INT_CPIPCINT1), "CPIPCINT1" }, + { BIT(GEN7_INT_ATBASYNCFIFOOVERFLOW), + "ATBASYNCFIFOOVERFLOW" }, + { BIT(GEN7_INT_GPCERROR), "GPCERROR" }, + { BIT(GEN7_INT_SWINTERRUPT), "SWINTERRUPT" }, + { BIT(GEN7_INT_HWERROR), "HWERROR" }, + { BIT(GEN7_INT_CCU_CLEAN_DEPTH_TS), + "CCU_CLEAN_DEPTH_TS" }, + { BIT(GEN7_INT_CCU_CLEAN_COLOR_TS), + "CCU_CLEAN_COLOR_TS" }, + { BIT(GEN7_INT_CCU_RESOLVE_CLEAN_TS), + "CCU_RESOLVE_CLEAN_TS" }, + { BIT(GEN7_INT_PM4CPINTERRUPT), "PM4CPINTERRUPT" }, + { BIT(GEN7_INT_PM4CPINTERRUPTLPAC), + "PM4CPINTERRUPTLPAC" }, + { BIT(GEN7_INT_RB_DONE_TS), "RB_DONE_TS" }, + { BIT(GEN7_INT_CACHE_CLEAN_TS), "CACHE_CLEAN_TS" }, + { BIT(GEN7_INT_CACHE_CLEAN_TS_LPAC), + "CACHE_CLEAN_TS_LPAC" }, + { BIT(GEN7_INT_ATBBUSOVERFLOW), "ATBBUSOVERFLOW" }, + { BIT(GEN7_INT_HANGDETECTINTERRUPT), + "HANGDETECTINTERRUPT" }, + { BIT(GEN7_INT_OUTOFBOUNDACCESS), + "OUTOFBOUNDACCESS" }, + { BIT(GEN7_INT_UCHETRAPINTERRUPT), + "UCHETRAPINTERRUPT" }, + { BIT(GEN7_INT_DEBUGBUSINTERRUPT0), + "DEBUGBUSINTERRUPT0" }, + { BIT(GEN7_INT_DEBUGBUSINTERRUPT1), + "DEBUGBUSINTERRUPT1" }, + { BIT(GEN7_INT_TSBWRITEERROR), "TSBWRITEERROR" }, + { BIT(GEN7_INT_ISDBCPUIRQ), "ISDBCPUIRQ" }, + { BIT(GEN7_INT_ISDBUNDERDEBUG), "ISDBUNDERDEBUG" }, + { BIT(GEN7_INT_ISDBUNDERDEBUG), "ISDBUNDERDEBUG" }) + : "None" + ) +); + +DECLARE_EVENT_CLASS(adreno_hw_preempt_template, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb, + unsigned int cur_rptr, unsigned int new_rptr), + TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr), + TP_STRUCT__entry(__field(int, cur_level) + __field(int, new_level) + __field(unsigned int, cur_rptr) + __field(unsigned int, new_rptr) + __field(unsigned int, cur_wptr) + __field(unsigned int, new_wptr) + __field(unsigned int, cur_rbbase) + __field(unsigned int, new_rbbase) + ), + TP_fast_assign(__entry->cur_level = cur_rb->id; + __entry->new_level = new_rb->id; + __entry->cur_rptr = cur_rptr; + __entry->new_rptr = new_rptr; + __entry->cur_wptr = cur_rb->wptr; + __entry->new_wptr = new_rb->wptr; + __entry->cur_rbbase = cur_rb->buffer_desc->gpuaddr; + __entry->new_rbbase = new_rb->buffer_desc->gpuaddr; + ), + TP_printk( + "cur_rb_lvl=%d rptr=%x wptr=%x rbbase=%x new_rb_lvl=%d rptr=%x wptr=%x rbbase=%x", + __entry->cur_level, __entry->cur_rptr, + __entry->cur_wptr, __entry->cur_rbbase, + __entry->new_level, __entry->new_rptr, + __entry->new_wptr, __entry->new_rbbase + ) +); + +DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_clear_to_trig, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb, + unsigned int cur_rptr, unsigned int new_rptr), + TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr) +); + +DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_trig_to_comp, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb, + unsigned int cur_rptr, unsigned int new_rptr), + TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr) +); + +DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_trig_to_comp_int, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb, + unsigned int cur_rptr, unsigned int new_rptr), + TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr) +); + +TRACE_EVENT(adreno_hw_preempt_comp_to_clear, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb, + unsigned int cur_rptr, unsigned int new_rptr), + TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr), + TP_STRUCT__entry(__field(int, cur_level) + __field(int, new_level) + __field(unsigned int, cur_rptr) + __field(unsigned int, new_rptr) + __field(unsigned int, cur_wptr) + __field(unsigned int, new_wptr_end) + __field(unsigned int, new_wptr) + __field(unsigned int, cur_rbbase) + __field(unsigned int, new_rbbase) + ), + TP_fast_assign(__entry->cur_level = cur_rb->id; + __entry->new_level = new_rb->id; + __entry->cur_rptr = cur_rptr; + __entry->new_rptr = new_rptr; + __entry->cur_wptr = cur_rb->wptr; + __entry->new_wptr_end = new_rb->wptr_preempt_end; + __entry->new_wptr = new_rb->wptr; + __entry->cur_rbbase = cur_rb->buffer_desc->gpuaddr; + __entry->new_rbbase = new_rb->buffer_desc->gpuaddr; + ), + TP_printk( + "cur_rb_lvl=%d rptr=%x wptr=%x rbbase=%x prev_rb_lvl=%d rptr=%x wptr_preempt_end=%x wptr=%x rbbase=%x", + __entry->cur_level, __entry->cur_rptr, + __entry->cur_wptr, __entry->cur_rbbase, + __entry->new_level, __entry->new_rptr, + __entry->new_wptr_end, __entry->new_wptr, __entry->new_rbbase + ) +); + +TRACE_EVENT(adreno_hw_preempt_token_submit, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb, + unsigned int cur_rptr, unsigned int new_rptr), + TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr), + TP_STRUCT__entry(__field(int, cur_level) + __field(int, new_level) + __field(unsigned int, cur_rptr) + __field(unsigned int, new_rptr) + __field(unsigned int, cur_wptr) + __field(unsigned int, cur_wptr_end) + __field(unsigned int, new_wptr) + __field(unsigned int, cur_rbbase) + __field(unsigned int, new_rbbase) + ), + TP_fast_assign(__entry->cur_level = cur_rb->id; + __entry->new_level = new_rb->id; + __entry->cur_rptr = cur_rptr; + __entry->new_rptr = new_rptr; + __entry->cur_wptr = cur_rb->wptr; + __entry->cur_wptr_end = cur_rb->wptr_preempt_end; + __entry->new_wptr = new_rb->wptr; + __entry->cur_rbbase = cur_rb->buffer_desc->gpuaddr; + __entry->new_rbbase = new_rb->buffer_desc->gpuaddr; + ), + TP_printk( + "cur_rb_lvl=%d rptr=%x wptr_preempt_end=%x wptr=%x rbbase=%x new_rb_lvl=%d rptr=%x wptr=%x rbbase=%x", + __entry->cur_level, __entry->cur_rptr, + __entry->cur_wptr_end, __entry->cur_wptr, + __entry->cur_rbbase, + __entry->new_level, __entry->new_rptr, + __entry->new_wptr, __entry->new_rbbase + ) +); + +TRACE_EVENT(adreno_preempt_trigger, + TP_PROTO(struct adreno_ringbuffer *cur, struct adreno_ringbuffer *next, + unsigned int cntl), + TP_ARGS(cur, next, cntl), + TP_STRUCT__entry( + __field(unsigned int, cur) + __field(unsigned int, next) + __field(unsigned int, cntl) + ), + TP_fast_assign( + __entry->cur = cur->id; + __entry->next = next->id; + __entry->cntl = cntl; + ), + TP_printk("trigger from id=%d to id=%d cntl=%x", + __entry->cur, __entry->next, __entry->cntl + ) +); + +TRACE_EVENT(adreno_preempt_done, + TP_PROTO(struct adreno_ringbuffer *cur, struct adreno_ringbuffer *next, + unsigned int level), + TP_ARGS(cur, next, level), + TP_STRUCT__entry( + __field(unsigned int, cur) + __field(unsigned int, next) + __field(unsigned int, level) + ), + TP_fast_assign( + __entry->cur = cur->id; + __entry->next = next->id; + __entry->level = level; + ), + TP_printk("done switch to id=%d from id=%d level=%x", + __entry->next, __entry->cur, __entry->level + ) +); + +TRACE_EVENT(adreno_ifpc_count, + TP_PROTO(unsigned int ifpc_count), + TP_ARGS(ifpc_count), + TP_STRUCT__entry( + __field(unsigned int, ifpc_count) + ), + TP_fast_assign( + __entry->ifpc_count = ifpc_count; + ), + TP_printk("total times GMU entered IFPC = %d", __entry->ifpc_count) +); + +#endif /* _ADRENO_TRACE_H */ + +/* This part must be outside protection */ +#include diff --git a/build.config.msm_kgsl b/build.config.msm_kgsl new file mode 100644 index 0000000000..f30c524b08 --- /dev/null +++ b/build.config.msm_kgsl @@ -0,0 +1 @@ +EXT_MODULES+=msm_kgsl diff --git a/config/gki_waipiodisp.conf b/config/gki_waipiodisp.conf new file mode 100644 index 0000000000..87097eadd1 --- /dev/null +++ b/config/gki_waipiodisp.conf @@ -0,0 +1,15 @@ +CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y +CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y +CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 +CONFIG_QCOM_KGSL_SORT_POOL = y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y +CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT = y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" + +ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ + -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ + -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ + -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ + -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ + -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=1 \ + -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" diff --git a/gen7_reg.h b/gen7_reg.h new file mode 100644 index 0000000000..e530a02854 --- /dev/null +++ b/gen7_reg.h @@ -0,0 +1,1158 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _GEN7_REG_H +#define _GEN7_REG_H + +/* GEN7 interrupt bits */ +#define GEN7_INT_GPUIDLE 0 +#define GEN7_INT_AHBERROR 1 +#define GEN7_INT_CPIPCINT0 4 +#define GEN7_INT_CPIPCINT1 5 +#define GEN7_INT_ATBASYNCFIFOOVERFLOW 6 +#define GEN7_INT_GPCERROR 7 +#define GEN7_INT_SWINTERRUPT 8 +#define GEN7_INT_HWERROR 9 +#define GEN7_INT_CCU_CLEAN_DEPTH_TS 10 +#define GEN7_INT_CCU_CLEAN_COLOR_TS 11 +#define GEN7_INT_CCU_RESOLVE_CLEAN_TS 12 +#define GEN7_INT_PM4CPINTERRUPT 15 +#define GEN7_INT_PM4CPINTERRUPTLPAC 16 +#define GEN7_INT_RB_DONE_TS 17 +#define GEN7_INT_CACHE_CLEAN_TS 20 +#define GEN7_INT_CACHE_CLEAN_TS_LPAC 21 +#define GEN7_INT_ATBBUSOVERFLOW 22 +#define GEN7_INT_HANGDETECTINTERRUPT 23 +#define GEN7_INT_OUTOFBOUNDACCESS 24 +#define GEN7_INT_UCHETRAPINTERRUPT 25 +#define GEN7_INT_DEBUGBUSINTERRUPT0 26 +#define GEN7_INT_DEBUGBUSINTERRUPT1 27 +#define GEN7_INT_TSBWRITEERROR 28 +#define GEN7_INT_ISDBCPUIRQ 30 +#define GEN7_INT_ISDBUNDERDEBUG 31 + +/* CP registers */ +#define GEN7_CP_RB_BASE 0x800 +#define GEN7_CP_RB_BASE_HI 0x801 +#define GEN7_CP_RB_CNTL 0x802 +#define GEN7_CP_RB_RPTR_ADDR_LO 0x804 +#define GEN7_CP_RB_RPTR_ADDR_HI 0x805 +#define GEN7_CP_RB_RPTR 0x806 +#define GEN7_CP_RB_WPTR 0x807 +#define GEN7_CP_SQE_CNTL 0x808 +#define GEN7_CP_CP2GMU_STATUS 0x812 +#define GEN7_CP_HW_FAULT 0x821 +#define GEN7_CP_INTERRUPT_STATUS 0x823 +#define GEN7_CP_PROTECT_STATUS 0x824 +#define GEN7_CP_STATUS_1 0x825 +#define GEN7_CP_SQE_INSTR_BASE_LO 0x830 +#define GEN7_CP_SQE_INSTR_BASE_HI 0x831 +#define GEN7_CP_MISC_CNTL 0x840 +#define GEN7_CP_CHICKEN_DBG 0x841 +#define GEN7_CP_DBG_ECO_CNTL 0x843 +#define GEN7_CP_APRIV_CNTL 0x844 +#define GEN7_CP_PROTECT_CNTL 0x84f +#define GEN7_CP_PROTECT_REG 0x850 +#define GEN7_CP_CONTEXT_SWITCH_CNTL 0x8a0 +#define GEN7_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x8a1 +#define GEN7_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x8a2 +#define GEN7_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO 0x8a3 +#define GEN7_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI 0x8a4 +#define GEN7_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO 0x8a5 +#define GEN7_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI 0x8a6 +#define GEN7_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO 0x8a7 +#define GEN7_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI 0x8a8 +#define GEN7_CP_CONTEXT_SWITCH_LEVEL_STATUS 0x8ab +#define GEN7_CP_PERFCTR_CP_SEL_0 0x8d0 +#define GEN7_CP_PERFCTR_CP_SEL_1 0x8d1 +#define GEN7_CP_PERFCTR_CP_SEL_2 0x8d2 +#define GEN7_CP_PERFCTR_CP_SEL_3 0x8d3 +#define GEN7_CP_PERFCTR_CP_SEL_4 0x8d4 +#define GEN7_CP_PERFCTR_CP_SEL_5 0x8d5 +#define GEN7_CP_PERFCTR_CP_SEL_6 0x8d6 +#define GEN7_CP_PERFCTR_CP_SEL_7 0x8d7 +#define GEN7_CP_PERFCTR_CP_SEL_8 0x8d8 +#define GEN7_CP_PERFCTR_CP_SEL_9 0x8d9 +#define GEN7_CP_PERFCTR_CP_SEL_10 0x8da +#define GEN7_CP_PERFCTR_CP_SEL_11 0x8db +#define GEN7_CP_PERFCTR_CP_SEL_12 0x8dc +#define GEN7_CP_PERFCTR_CP_SEL_13 0x8dd +#define GEN7_CP_BV_PERFCTR_CP_SEL_0 0x8e0 +#define GEN7_CP_BV_PERFCTR_CP_SEL_1 0x8e1 +#define GEN7_CP_BV_PERFCTR_CP_SEL_2 0x8e2 +#define GEN7_CP_BV_PERFCTR_CP_SEL_3 0x8e3 +#define GEN7_CP_BV_PERFCTR_CP_SEL_4 0x8e4 +#define GEN7_CP_BV_PERFCTR_CP_SEL_5 0x8e5 +#define GEN7_CP_BV_PERFCTR_CP_SEL_6 0x8e6 +#define GEN7_CP_CRASH_SCRIPT_BASE_LO 0x900 +#define GEN7_CP_CRASH_SCRIPT_BASE_HI 0x901 +#define GEN7_CP_CRASH_DUMP_CNTL 0x902 +#define GEN7_CP_CRASH_DUMP_STATUS 0x903 +#define GEN7_CP_SQE_STAT_ADDR 0x908 +#define GEN7_CP_SQE_STAT_DATA 0x909 +#define GEN7_CP_DRAW_STATE_ADDR 0x90a +#define GEN7_CP_DRAW_STATE_DATA 0x90b +#define GEN7_CP_ROQ_DBG_ADDR 0x90c +#define GEN7_CP_ROQ_DBG_DATA 0x90d +#define GEN7_CP_MEM_POOL_DBG_ADDR 0x90e +#define GEN7_CP_MEM_POOL_DBG_DATA 0x90f +#define GEN7_CP_SQE_UCODE_DBG_ADDR 0x910 +#define GEN7_CP_SQE_UCODE_DBG_DATA 0x911 +#define GEN7_CP_IB1_BASE 0x928 +#define GEN7_CP_IB1_BASE_HI 0x929 +#define GEN7_CP_IB1_REM_SIZE 0x92a +#define GEN7_CP_IB2_BASE 0x92b +#define GEN7_CP_IB2_BASE_HI 0x92c +#define GEN7_CP_IB2_REM_SIZE 0x92d +#define GEN7_CP_ALWAYS_ON_COUNTER_LO 0x980 +#define GEN7_CP_ALWAYS_ON_COUNTER_HI 0x981 +#define GEN7_CP_AHB_CNTL 0x98d +#define GEN7_CP_APERTURE_CNTL_HOST 0xa00 +#define GEN7_CP_APERTURE_CNTL_CD 0xa03 +#define GEN7_CP_BV_PROTECT_STATUS 0xa61 +#define GEN7_CP_BV_HW_FAULT 0xa64 +#define GEN7_CP_BV_DRAW_STATE_ADDR 0xa81 +#define GEN7_CP_BV_DRAW_STATE_DATA 0xa82 +#define GEN7_CP_BV_ROQ_DBG_ADDR 0xa83 +#define GEN7_CP_BV_ROQ_DBG_DATA 0xa84 +#define GEN7_CP_BV_SQE_UCODE_DBG_ADDR 0xa85 +#define GEN7_CP_BV_SQE_UCODE_DBG_DATA 0xa86 +#define GEN7_CP_BV_SQE_STAT_ADDR 0xa87 +#define GEN7_CP_BV_SQE_STAT_DATA 0xa88 +#define GEN7_CP_BV_RB_RPTR_ADDR_LO 0xa98 +#define GEN7_CP_BV_RB_RPTR_ADDR_HI 0xa99 +#define GEN7_CP_RESOURCE_TBL_DBG_ADDR 0xa9a +#define GEN7_CP_RESOURCE_TBL_DBG_DATA 0xa9b +#define GEN7_CP_BV_MEM_POOL_DBG_ADDR 0xa96 +#define GEN7_CP_BV_MEM_POOL_DBG_DATA 0xa97 +#define GEN7_CP_BV_APRIV_CNTL 0xad0 +#define GEN7_CP_BV_CHICKEN_DBG 0xada + +/* LPAC registers */ +#define GEN7_CP_LPAC_DRAW_STATE_ADDR 0xb0a +#define GEN7_CP_LPAC_DRAW_STATE_DATA 0xb0b +#define GEN7_CP_LPAC_ROQ_DBG_ADDR 0xb0c +#define GEN7_CP_SQE_AC_UCODE_DBG_ADDR 0xb27 +#define GEN7_CP_SQE_AC_UCODE_DBG_DATA 0xb28 +#define GEN7_CP_SQE_AC_STAT_ADDR 0xb29 +#define GEN7_CP_SQE_AC_STAT_DATA 0xb2a +#define GEN7_CP_LPAC_APRIV_CNTL 0xb31 +#define GEN7_CP_LPAC_ROQ_DBG_DATA 0xb35 +#define GEN7_CP_LPAC_FIFO_DBG_DATA 0xb36 +#define GEN7_CP_LPAC_FIFO_DBG_ADDR 0xb40 + +/* RBBM registers */ +#define GEN7_RBBM_INT_0_STATUS 0x201 +#define GEN7_RBBM_STATUS 0x210 +#define GEN7_RBBM_STATUS3 0x213 +#define GEN7_RBBM_PERFCTR_CP_0_LO 0x300 +#define GEN7_RBBM_PERFCTR_CP_0_HI 0x301 +#define GEN7_RBBM_PERFCTR_CP_1_LO 0x302 +#define GEN7_RBBM_PERFCTR_CP_1_HI 0x303 +#define GEN7_RBBM_PERFCTR_CP_2_LO 0x304 +#define GEN7_RBBM_PERFCTR_CP_2_HI 0x305 +#define GEN7_RBBM_PERFCTR_CP_3_LO 0x306 +#define GEN7_RBBM_PERFCTR_CP_3_HI 0x307 +#define GEN7_RBBM_PERFCTR_CP_4_LO 0x308 +#define GEN7_RBBM_PERFCTR_CP_4_HI 0x309 +#define GEN7_RBBM_PERFCTR_CP_5_LO 0x30a +#define GEN7_RBBM_PERFCTR_CP_5_HI 0x30b +#define GEN7_RBBM_PERFCTR_CP_6_LO 0x30c +#define GEN7_RBBM_PERFCTR_CP_6_HI 0x30d +#define GEN7_RBBM_PERFCTR_CP_7_LO 0x30e +#define GEN7_RBBM_PERFCTR_CP_7_HI 0x30f +#define GEN7_RBBM_PERFCTR_CP_8_LO 0x310 +#define GEN7_RBBM_PERFCTR_CP_8_HI 0x311 +#define GEN7_RBBM_PERFCTR_CP_9_LO 0x312 +#define GEN7_RBBM_PERFCTR_CP_9_HI 0x313 +#define GEN7_RBBM_PERFCTR_CP_10_LO 0x314 +#define GEN7_RBBM_PERFCTR_CP_10_HI 0x315 +#define GEN7_RBBM_PERFCTR_CP_11_LO 0x316 +#define GEN7_RBBM_PERFCTR_CP_11_HI 0x317 +#define GEN7_RBBM_PERFCTR_CP_12_LO 0x318 +#define GEN7_RBBM_PERFCTR_CP_12_HI 0x319 +#define GEN7_RBBM_PERFCTR_CP_13_LO 0x31a +#define GEN7_RBBM_PERFCTR_CP_13_HI 0x31b +#define GEN7_RBBM_PERFCTR_RBBM_0_LO 0x31c +#define GEN7_RBBM_PERFCTR_RBBM_0_HI 0x31d +#define GEN7_RBBM_PERFCTR_RBBM_1_LO 0x31e +#define GEN7_RBBM_PERFCTR_RBBM_1_HI 0x31f +#define GEN7_RBBM_PERFCTR_RBBM_2_LO 0x320 +#define GEN7_RBBM_PERFCTR_RBBM_2_HI 0x321 +#define GEN7_RBBM_PERFCTR_RBBM_3_LO 0x322 +#define GEN7_RBBM_PERFCTR_RBBM_3_HI 0x323 +#define GEN7_RBBM_PERFCTR_PC_0_LO 0x324 +#define GEN7_RBBM_PERFCTR_PC_0_HI 0x325 +#define GEN7_RBBM_PERFCTR_PC_1_LO 0x326 +#define GEN7_RBBM_PERFCTR_PC_1_HI 0x327 +#define GEN7_RBBM_PERFCTR_PC_2_LO 0x328 +#define GEN7_RBBM_PERFCTR_PC_2_HI 0x329 +#define GEN7_RBBM_PERFCTR_PC_3_LO 0x32a +#define GEN7_RBBM_PERFCTR_PC_3_HI 0x32b +#define GEN7_RBBM_PERFCTR_PC_4_LO 0x32c +#define GEN7_RBBM_PERFCTR_PC_4_HI 0x32d +#define GEN7_RBBM_PERFCTR_PC_5_LO 0x32e +#define GEN7_RBBM_PERFCTR_PC_5_HI 0x32f +#define GEN7_RBBM_PERFCTR_PC_6_LO 0x330 +#define GEN7_RBBM_PERFCTR_PC_6_HI 0x331 +#define GEN7_RBBM_PERFCTR_PC_7_LO 0x332 +#define GEN7_RBBM_PERFCTR_PC_7_HI 0x333 +#define GEN7_RBBM_PERFCTR_VFD_0_LO 0x334 +#define GEN7_RBBM_PERFCTR_VFD_0_HI 0x335 +#define GEN7_RBBM_PERFCTR_VFD_1_LO 0x336 +#define GEN7_RBBM_PERFCTR_VFD_1_HI 0x337 +#define GEN7_RBBM_PERFCTR_VFD_2_LO 0x338 +#define GEN7_RBBM_PERFCTR_VFD_2_HI 0x339 +#define GEN7_RBBM_PERFCTR_VFD_3_LO 0x33a +#define GEN7_RBBM_PERFCTR_VFD_3_HI 0x33b +#define GEN7_RBBM_PERFCTR_VFD_4_LO 0x33c +#define GEN7_RBBM_PERFCTR_VFD_4_HI 0x33d +#define GEN7_RBBM_PERFCTR_VFD_5_LO 0x33e +#define GEN7_RBBM_PERFCTR_VFD_5_HI 0x33f +#define GEN7_RBBM_PERFCTR_VFD_6_LO 0x340 +#define GEN7_RBBM_PERFCTR_VFD_6_HI 0x341 +#define GEN7_RBBM_PERFCTR_VFD_7_LO 0x342 +#define GEN7_RBBM_PERFCTR_VFD_7_HI 0x343 +#define GEN7_RBBM_PERFCTR_HLSQ_0_LO 0x344 +#define GEN7_RBBM_PERFCTR_HLSQ_0_HI 0x345 +#define GEN7_RBBM_PERFCTR_HLSQ_1_LO 0x346 +#define GEN7_RBBM_PERFCTR_HLSQ_1_HI 0x347 +#define GEN7_RBBM_PERFCTR_HLSQ_2_LO 0x348 +#define GEN7_RBBM_PERFCTR_HLSQ_2_HI 0x349 +#define GEN7_RBBM_PERFCTR_HLSQ_3_LO 0x34a +#define GEN7_RBBM_PERFCTR_HLSQ_3_HI 0x34b +#define GEN7_RBBM_PERFCTR_HLSQ_4_LO 0x34c +#define GEN7_RBBM_PERFCTR_HLSQ_4_HI 0x34d +#define GEN7_RBBM_PERFCTR_HLSQ_5_LO 0x34e +#define GEN7_RBBM_PERFCTR_HLSQ_5_HI 0x34f +#define GEN7_RBBM_PERFCTR_VPC_0_LO 0x350 +#define GEN7_RBBM_PERFCTR_VPC_0_HI 0x351 +#define GEN7_RBBM_PERFCTR_VPC_1_LO 0x352 +#define GEN7_RBBM_PERFCTR_VPC_1_HI 0x353 +#define GEN7_RBBM_PERFCTR_VPC_2_LO 0x354 +#define GEN7_RBBM_PERFCTR_VPC_2_HI 0x355 +#define GEN7_RBBM_PERFCTR_VPC_3_LO 0x356 +#define GEN7_RBBM_PERFCTR_VPC_3_HI 0x357 +#define GEN7_RBBM_PERFCTR_VPC_4_LO 0x358 +#define GEN7_RBBM_PERFCTR_VPC_4_HI 0x359 +#define GEN7_RBBM_PERFCTR_VPC_5_LO 0x35a +#define GEN7_RBBM_PERFCTR_VPC_5_HI 0x35b +#define GEN7_RBBM_PERFCTR_CCU_0_LO 0x35c +#define GEN7_RBBM_PERFCTR_CCU_0_HI 0x35d +#define GEN7_RBBM_PERFCTR_CCU_1_LO 0x35e +#define GEN7_RBBM_PERFCTR_CCU_1_HI 0x35f +#define GEN7_RBBM_PERFCTR_CCU_2_LO 0x360 +#define GEN7_RBBM_PERFCTR_CCU_2_HI 0x361 +#define GEN7_RBBM_PERFCTR_CCU_3_LO 0x362 +#define GEN7_RBBM_PERFCTR_CCU_3_HI 0x363 +#define GEN7_RBBM_PERFCTR_CCU_4_LO 0x364 +#define GEN7_RBBM_PERFCTR_CCU_4_HI 0x365 +#define GEN7_RBBM_PERFCTR_TSE_0_LO 0x366 +#define GEN7_RBBM_PERFCTR_TSE_0_HI 0x367 +#define GEN7_RBBM_PERFCTR_TSE_1_LO 0x368 +#define GEN7_RBBM_PERFCTR_TSE_1_HI 0x369 +#define GEN7_RBBM_PERFCTR_TSE_2_LO 0x36a +#define GEN7_RBBM_PERFCTR_TSE_2_HI 0x36b +#define GEN7_RBBM_PERFCTR_TSE_3_LO 0x36c +#define GEN7_RBBM_PERFCTR_TSE_3_HI 0x36d +#define GEN7_RBBM_PERFCTR_RAS_0_LO 0x36e +#define GEN7_RBBM_PERFCTR_RAS_0_HI 0x36f +#define GEN7_RBBM_PERFCTR_RAS_1_LO 0x370 +#define GEN7_RBBM_PERFCTR_RAS_1_HI 0x371 +#define GEN7_RBBM_PERFCTR_RAS_2_LO 0x372 +#define GEN7_RBBM_PERFCTR_RAS_2_HI 0x373 +#define GEN7_RBBM_PERFCTR_RAS_3_LO 0x374 +#define GEN7_RBBM_PERFCTR_RAS_3_HI 0x375 +#define GEN7_RBBM_PERFCTR_UCHE_0_LO 0x376 +#define GEN7_RBBM_PERFCTR_UCHE_0_HI 0x377 +#define GEN7_RBBM_PERFCTR_UCHE_1_LO 0x378 +#define GEN7_RBBM_PERFCTR_UCHE_1_HI 0x379 +#define GEN7_RBBM_PERFCTR_UCHE_2_LO 0x37a +#define GEN7_RBBM_PERFCTR_UCHE_2_HI 0x37b +#define GEN7_RBBM_PERFCTR_UCHE_3_LO 0x37c +#define GEN7_RBBM_PERFCTR_UCHE_3_HI 0x37d +#define GEN7_RBBM_PERFCTR_UCHE_4_LO 0x37e +#define GEN7_RBBM_PERFCTR_UCHE_4_HI 0x37f +#define GEN7_RBBM_PERFCTR_UCHE_5_LO 0x380 +#define GEN7_RBBM_PERFCTR_UCHE_5_HI 0x381 +#define GEN7_RBBM_PERFCTR_UCHE_6_LO 0x382 +#define GEN7_RBBM_PERFCTR_UCHE_6_HI 0x383 +#define GEN7_RBBM_PERFCTR_UCHE_7_LO 0x384 +#define GEN7_RBBM_PERFCTR_UCHE_7_HI 0x385 +#define GEN7_RBBM_PERFCTR_UCHE_8_LO 0x386 +#define GEN7_RBBM_PERFCTR_UCHE_8_HI 0x387 +#define GEN7_RBBM_PERFCTR_UCHE_9_LO 0x388 +#define GEN7_RBBM_PERFCTR_UCHE_9_HI 0x389 +#define GEN7_RBBM_PERFCTR_UCHE_10_LO 0x38a +#define GEN7_RBBM_PERFCTR_UCHE_10_HI 0x38b +#define GEN7_RBBM_PERFCTR_UCHE_11_LO 0x38c +#define GEN7_RBBM_PERFCTR_UCHE_11_HI 0x38d +#define GEN7_RBBM_PERFCTR_TP_0_LO 0x38e +#define GEN7_RBBM_PERFCTR_TP_0_HI 0x38f +#define GEN7_RBBM_PERFCTR_TP_1_LO 0x390 +#define GEN7_RBBM_PERFCTR_TP_1_HI 0x391 +#define GEN7_RBBM_PERFCTR_TP_2_LO 0x392 +#define GEN7_RBBM_PERFCTR_TP_2_HI 0x393 +#define GEN7_RBBM_PERFCTR_TP_3_LO 0x394 +#define GEN7_RBBM_PERFCTR_TP_3_HI 0x395 +#define GEN7_RBBM_PERFCTR_TP_4_LO 0x396 +#define GEN7_RBBM_PERFCTR_TP_4_HI 0x397 +#define GEN7_RBBM_PERFCTR_TP_5_LO 0x398 +#define GEN7_RBBM_PERFCTR_TP_5_HI 0x399 +#define GEN7_RBBM_PERFCTR_TP_6_LO 0x39a +#define GEN7_RBBM_PERFCTR_TP_6_HI 0x39b +#define GEN7_RBBM_PERFCTR_TP_7_LO 0x39c +#define GEN7_RBBM_PERFCTR_TP_7_HI 0x39d +#define GEN7_RBBM_PERFCTR_TP_8_LO 0x39e +#define GEN7_RBBM_PERFCTR_TP_8_HI 0x39f +#define GEN7_RBBM_PERFCTR_TP_9_LO 0x3a0 +#define GEN7_RBBM_PERFCTR_TP_9_HI 0x3a1 +#define GEN7_RBBM_PERFCTR_TP_10_LO 0x3a2 +#define GEN7_RBBM_PERFCTR_TP_10_HI 0x3a3 +#define GEN7_RBBM_PERFCTR_TP_11_LO 0x3a4 +#define GEN7_RBBM_PERFCTR_TP_11_HI 0x3a5 +#define GEN7_RBBM_PERFCTR_SP_0_LO 0x3a6 +#define GEN7_RBBM_PERFCTR_SP_0_HI 0x3a7 +#define GEN7_RBBM_PERFCTR_SP_1_LO 0x3a8 +#define GEN7_RBBM_PERFCTR_SP_1_HI 0x3a9 +#define GEN7_RBBM_PERFCTR_SP_2_LO 0x3aa +#define GEN7_RBBM_PERFCTR_SP_2_HI 0x3ab +#define GEN7_RBBM_PERFCTR_SP_3_LO 0x3ac +#define GEN7_RBBM_PERFCTR_SP_3_HI 0x3ad +#define GEN7_RBBM_PERFCTR_SP_4_LO 0x3ae +#define GEN7_RBBM_PERFCTR_SP_4_HI 0x3af +#define GEN7_RBBM_PERFCTR_SP_5_LO 0x3b0 +#define GEN7_RBBM_PERFCTR_SP_5_HI 0x3b1 +#define GEN7_RBBM_PERFCTR_SP_6_LO 0x3b2 +#define GEN7_RBBM_PERFCTR_SP_6_HI 0x3b3 +#define GEN7_RBBM_PERFCTR_SP_7_LO 0x3b4 +#define GEN7_RBBM_PERFCTR_SP_7_HI 0x3b5 +#define GEN7_RBBM_PERFCTR_SP_8_LO 0x3b6 +#define GEN7_RBBM_PERFCTR_SP_8_HI 0x3b7 +#define GEN7_RBBM_PERFCTR_SP_9_LO 0x3b8 +#define GEN7_RBBM_PERFCTR_SP_9_HI 0x3b9 +#define GEN7_RBBM_PERFCTR_SP_10_LO 0x3ba +#define GEN7_RBBM_PERFCTR_SP_10_HI 0x3bb +#define GEN7_RBBM_PERFCTR_SP_11_LO 0x3bc +#define GEN7_RBBM_PERFCTR_SP_11_HI 0x3bd +#define GEN7_RBBM_PERFCTR_SP_12_LO 0x3be +#define GEN7_RBBM_PERFCTR_SP_12_HI 0x3bf +#define GEN7_RBBM_PERFCTR_SP_13_LO 0x3c0 +#define GEN7_RBBM_PERFCTR_SP_13_HI 0x3c1 +#define GEN7_RBBM_PERFCTR_SP_14_LO 0x3c2 +#define GEN7_RBBM_PERFCTR_SP_14_HI 0x3c3 +#define GEN7_RBBM_PERFCTR_SP_15_LO 0x3c4 +#define GEN7_RBBM_PERFCTR_SP_15_HI 0x3c5 +#define GEN7_RBBM_PERFCTR_SP_16_LO 0x3c6 +#define GEN7_RBBM_PERFCTR_SP_16_HI 0x3c7 +#define GEN7_RBBM_PERFCTR_SP_17_LO 0x3c8 +#define GEN7_RBBM_PERFCTR_SP_17_HI 0x3c9 +#define GEN7_RBBM_PERFCTR_SP_18_LO 0x3ca +#define GEN7_RBBM_PERFCTR_SP_18_HI 0x3cb +#define GEN7_RBBM_PERFCTR_SP_19_LO 0x3cc +#define GEN7_RBBM_PERFCTR_SP_19_HI 0x3cd +#define GEN7_RBBM_PERFCTR_SP_20_LO 0x3ce +#define GEN7_RBBM_PERFCTR_SP_20_HI 0x3cf +#define GEN7_RBBM_PERFCTR_SP_21_LO 0x3d0 +#define GEN7_RBBM_PERFCTR_SP_21_HI 0x3d1 +#define GEN7_RBBM_PERFCTR_SP_22_LO 0x3d2 +#define GEN7_RBBM_PERFCTR_SP_22_HI 0x3d3 +#define GEN7_RBBM_PERFCTR_SP_23_LO 0x3d4 +#define GEN7_RBBM_PERFCTR_SP_23_HI 0x3d5 +#define GEN7_RBBM_PERFCTR_RB_0_LO 0x3d6 +#define GEN7_RBBM_PERFCTR_RB_0_HI 0x3d7 +#define GEN7_RBBM_PERFCTR_RB_1_LO 0x3d8 +#define GEN7_RBBM_PERFCTR_RB_1_HI 0x3d9 +#define GEN7_RBBM_PERFCTR_RB_2_LO 0x3da +#define GEN7_RBBM_PERFCTR_RB_2_HI 0x3db +#define GEN7_RBBM_PERFCTR_RB_3_LO 0x3dc +#define GEN7_RBBM_PERFCTR_RB_3_HI 0x3dd +#define GEN7_RBBM_PERFCTR_RB_4_LO 0x3de +#define GEN7_RBBM_PERFCTR_RB_4_HI 0x3df +#define GEN7_RBBM_PERFCTR_RB_5_LO 0x3e0 +#define GEN7_RBBM_PERFCTR_RB_5_HI 0x3e1 +#define GEN7_RBBM_PERFCTR_RB_6_LO 0x3e2 +#define GEN7_RBBM_PERFCTR_RB_6_HI 0x3e3 +#define GEN7_RBBM_PERFCTR_RB_7_LO 0x3e4 +#define GEN7_RBBM_PERFCTR_RB_7_HI 0x3e5 +#define GEN7_RBBM_PERFCTR_VSC_0_LO 0x3e6 +#define GEN7_RBBM_PERFCTR_VSC_0_HI 0x3e7 +#define GEN7_RBBM_PERFCTR_VSC_1_LO 0x3e8 +#define GEN7_RBBM_PERFCTR_VSC_1_HI 0x3e9 +#define GEN7_RBBM_PERFCTR_LRZ_0_LO 0x3ea +#define GEN7_RBBM_PERFCTR_LRZ_0_HI 0x3eb +#define GEN7_RBBM_PERFCTR_LRZ_1_LO 0x3ec +#define GEN7_RBBM_PERFCTR_LRZ_1_HI 0x3ed +#define GEN7_RBBM_PERFCTR_LRZ_2_LO 0x3ee +#define GEN7_RBBM_PERFCTR_LRZ_2_HI 0x3ef +#define GEN7_RBBM_PERFCTR_LRZ_3_LO 0x3f0 +#define GEN7_RBBM_PERFCTR_LRZ_3_HI 0x3f1 +#define GEN7_RBBM_PERFCTR_CMP_0_LO 0x3f2 +#define GEN7_RBBM_PERFCTR_CMP_0_HI 0x3f3 +#define GEN7_RBBM_PERFCTR_CMP_1_LO 0x3f4 +#define GEN7_RBBM_PERFCTR_CMP_1_HI 0x3f5 +#define GEN7_RBBM_PERFCTR_CMP_2_LO 0x3f6 +#define GEN7_RBBM_PERFCTR_CMP_2_HI 0x3f7 +#define GEN7_RBBM_PERFCTR_CMP_3_LO 0x3f8 +#define GEN7_RBBM_PERFCTR_CMP_3_HI 0x3f9 +#define GEN7_RBBM_PERFCTR_UFC_0_LO 0x3fa +#define GEN7_RBBM_PERFCTR_UFC_0_HI 0x3fb +#define GEN7_RBBM_PERFCTR_UFC_1_LO 0x3fc +#define GEN7_RBBM_PERFCTR_UFC_1_HI 0x3fd +#define GEN7_RBBM_PERFCTR_UFC_2_LO 0x3fe +#define GEN7_RBBM_PERFCTR_UFC_2_HI 0x3ff +#define GEN7_RBBM_PERFCTR_UFC_3_LO 0x400 +#define GEN7_RBBM_PERFCTR_UFC_3_HI 0x401 +#define GEN7_RBBM_PERFCTR2_HLSQ_0_LO 0x410 +#define GEN7_RBBM_PERFCTR2_HLSQ_0_HI 0x411 +#define GEN7_RBBM_PERFCTR2_HLSQ_1_LO 0x412 +#define GEN7_RBBM_PERFCTR2_HLSQ_1_HI 0x413 +#define GEN7_RBBM_PERFCTR2_HLSQ_2_LO 0x414 +#define GEN7_RBBM_PERFCTR2_HLSQ_2_HI 0x415 +#define GEN7_RBBM_PERFCTR2_HLSQ_3_LO 0x416 +#define GEN7_RBBM_PERFCTR2_HLSQ_3_HI 0x417 +#define GEN7_RBBM_PERFCTR2_HLSQ_4_LO 0x418 +#define GEN7_RBBM_PERFCTR2_HLSQ_4_HI 0x419 +#define GEN7_RBBM_PERFCTR2_HLSQ_5_LO 0x41a +#define GEN7_RBBM_PERFCTR2_HLSQ_5_HI 0x41b +#define GEN7_RBBM_PERFCTR2_CP_0_LO 0x41c +#define GEN7_RBBM_PERFCTR2_CP_0_HI 0x41d +#define GEN7_RBBM_PERFCTR2_CP_1_LO 0x41e +#define GEN7_RBBM_PERFCTR2_CP_1_HI 0x41f +#define GEN7_RBBM_PERFCTR2_CP_2_LO 0x420 +#define GEN7_RBBM_PERFCTR2_CP_2_HI 0x421 +#define GEN7_RBBM_PERFCTR2_CP_3_LO 0x422 +#define GEN7_RBBM_PERFCTR2_CP_3_HI 0x423 +#define GEN7_RBBM_PERFCTR2_CP_4_LO 0x424 +#define GEN7_RBBM_PERFCTR2_CP_4_HI 0x425 +#define GEN7_RBBM_PERFCTR2_CP_5_LO 0x426 +#define GEN7_RBBM_PERFCTR2_CP_5_HI 0x427 +#define GEN7_RBBM_PERFCTR2_CP_6_LO 0x428 +#define GEN7_RBBM_PERFCTR2_CP_6_HI 0x429 +#define GEN7_RBBM_PERFCTR2_SP_0_LO 0x42a +#define GEN7_RBBM_PERFCTR2_SP_0_HI 0x42b +#define GEN7_RBBM_PERFCTR2_SP_1_LO 0x42c +#define GEN7_RBBM_PERFCTR2_SP_1_HI 0x42d +#define GEN7_RBBM_PERFCTR2_SP_2_LO 0x42e +#define GEN7_RBBM_PERFCTR2_SP_2_HI 0x42f +#define GEN7_RBBM_PERFCTR2_SP_3_LO 0x430 +#define GEN7_RBBM_PERFCTR2_SP_3_HI 0x431 +#define GEN7_RBBM_PERFCTR2_SP_4_LO 0x432 +#define GEN7_RBBM_PERFCTR2_SP_4_HI 0x433 +#define GEN7_RBBM_PERFCTR2_SP_5_LO 0x434 +#define GEN7_RBBM_PERFCTR2_SP_5_HI 0x435 +#define GEN7_RBBM_PERFCTR2_SP_6_LO 0x436 +#define GEN7_RBBM_PERFCTR2_SP_6_HI 0x437 +#define GEN7_RBBM_PERFCTR2_SP_7_LO 0x438 +#define GEN7_RBBM_PERFCTR2_SP_7_HI 0x439 +#define GEN7_RBBM_PERFCTR2_SP_8_LO 0x43a +#define GEN7_RBBM_PERFCTR2_SP_8_HI 0x43b +#define GEN7_RBBM_PERFCTR2_SP_9_LO 0x43c +#define GEN7_RBBM_PERFCTR2_SP_9_HI 0x43d +#define GEN7_RBBM_PERFCTR2_SP_10_LO 0x43e +#define GEN7_RBBM_PERFCTR2_SP_10_HI 0x43f +#define GEN7_RBBM_PERFCTR2_SP_11_LO 0x440 +#define GEN7_RBBM_PERFCTR2_SP_11_HI 0x441 +#define GEN7_RBBM_PERFCTR2_TP_0_LO 0x442 +#define GEN7_RBBM_PERFCTR2_TP_0_HI 0x443 +#define GEN7_RBBM_PERFCTR2_TP_1_LO 0x444 +#define GEN7_RBBM_PERFCTR2_TP_1_HI 0x445 +#define GEN7_RBBM_PERFCTR2_TP_2_LO 0x446 +#define GEN7_RBBM_PERFCTR2_TP_2_HI 0x447 +#define GEN7_RBBM_PERFCTR2_TP_3_LO 0x448 +#define GEN7_RBBM_PERFCTR2_TP_3_HI 0x449 +#define GEN7_RBBM_PERFCTR2_TP_4_LO 0x44a +#define GEN7_RBBM_PERFCTR2_TP_4_HI 0x44b +#define GEN7_RBBM_PERFCTR2_TP_5_LO 0x44c +#define GEN7_RBBM_PERFCTR2_TP_5_HI 0x44d +#define GEN7_RBBM_PERFCTR2_UFC_0_LO 0x44e +#define GEN7_RBBM_PERFCTR2_UFC_0_HI 0x44f +#define GEN7_RBBM_PERFCTR2_UFC_1_LO 0x450 +#define GEN7_RBBM_PERFCTR2_UFC_1_HI 0x451 +#define GEN7_RBBM_PERFCTR_BV_PC_0_LO 0x460 +#define GEN7_RBBM_PERFCTR_BV_PC_0_HI 0x461 +#define GEN7_RBBM_PERFCTR_BV_PC_1_LO 0x462 +#define GEN7_RBBM_PERFCTR_BV_PC_1_HI 0x463 +#define GEN7_RBBM_PERFCTR_BV_PC_2_LO 0x464 +#define GEN7_RBBM_PERFCTR_BV_PC_2_HI 0x465 +#define GEN7_RBBM_PERFCTR_BV_PC_3_LO 0x466 +#define GEN7_RBBM_PERFCTR_BV_PC_3_HI 0x467 +#define GEN7_RBBM_PERFCTR_BV_PC_4_LO 0x468 +#define GEN7_RBBM_PERFCTR_BV_PC_4_HI 0x469 +#define GEN7_RBBM_PERFCTR_BV_PC_5_LO 0x46a +#define GEN7_RBBM_PERFCTR_BV_PC_5_HI 0x46b +#define GEN7_RBBM_PERFCTR_BV_PC_6_LO 0x46c +#define GEN7_RBBM_PERFCTR_BV_PC_6_HI 0x46d +#define GEN7_RBBM_PERFCTR_BV_PC_7_LO 0x46e +#define GEN7_RBBM_PERFCTR_BV_PC_7_HI 0x46f +#define GEN7_RBBM_PERFCTR_BV_VFD_0_LO 0x470 +#define GEN7_RBBM_PERFCTR_BV_VFD_0_HI 0x471 +#define GEN7_RBBM_PERFCTR_BV_VFD_1_LO 0x472 +#define GEN7_RBBM_PERFCTR_BV_VFD_1_HI 0x473 +#define GEN7_RBBM_PERFCTR_BV_VFD_2_LO 0x474 +#define GEN7_RBBM_PERFCTR_BV_VFD_2_HI 0x475 +#define GEN7_RBBM_PERFCTR_BV_VFD_3_LO 0x476 +#define GEN7_RBBM_PERFCTR_BV_VFD_3_HI 0x477 +#define GEN7_RBBM_PERFCTR_BV_VFD_4_LO 0x478 +#define GEN7_RBBM_PERFCTR_BV_VFD_4_HI 0x479 +#define GEN7_RBBM_PERFCTR_BV_VFD_5_LO 0x47a +#define GEN7_RBBM_PERFCTR_BV_VFD_5_HI 0x47b +#define GEN7_RBBM_PERFCTR_BV_VFD_6_LO 0x47c +#define GEN7_RBBM_PERFCTR_BV_VFD_6_HI 0x47d +#define GEN7_RBBM_PERFCTR_BV_VFD_7_LO 0x47e +#define GEN7_RBBM_PERFCTR_BV_VFD_7_HI 0x47f +#define GEN7_RBBM_PERFCTR_BV_VPC_0_LO 0x480 +#define GEN7_RBBM_PERFCTR_BV_VPC_0_HI 0x481 +#define GEN7_RBBM_PERFCTR_BV_VPC_1_LO 0x482 +#define GEN7_RBBM_PERFCTR_BV_VPC_1_HI 0x483 +#define GEN7_RBBM_PERFCTR_BV_VPC_2_LO 0x484 +#define GEN7_RBBM_PERFCTR_BV_VPC_2_HI 0x485 +#define GEN7_RBBM_PERFCTR_BV_VPC_3_LO 0x486 +#define GEN7_RBBM_PERFCTR_BV_VPC_3_HI 0x487 +#define GEN7_RBBM_PERFCTR_BV_VPC_4_LO 0x488 +#define GEN7_RBBM_PERFCTR_BV_VPC_4_HI 0x489 +#define GEN7_RBBM_PERFCTR_BV_VPC_5_LO 0x48a +#define GEN7_RBBM_PERFCTR_BV_VPC_5_HI 0x48b +#define GEN7_RBBM_PERFCTR_BV_TSE_0_LO 0x48c +#define GEN7_RBBM_PERFCTR_BV_TSE_0_HI 0x48d +#define GEN7_RBBM_PERFCTR_BV_TSE_1_LO 0x48e +#define GEN7_RBBM_PERFCTR_BV_TSE_1_HI 0x48f +#define GEN7_RBBM_PERFCTR_BV_TSE_2_LO 0x490 +#define GEN7_RBBM_PERFCTR_BV_TSE_2_HI 0x491 +#define GEN7_RBBM_PERFCTR_BV_TSE_3_LO 0x492 +#define GEN7_RBBM_PERFCTR_BV_TSE_3_HI 0x493 +#define GEN7_RBBM_PERFCTR_BV_RAS_0_LO 0x494 +#define GEN7_RBBM_PERFCTR_BV_RAS_0_HI 0x495 +#define GEN7_RBBM_PERFCTR_BV_RAS_1_LO 0x496 +#define GEN7_RBBM_PERFCTR_BV_RAS_1_HI 0x497 +#define GEN7_RBBM_PERFCTR_BV_RAS_2_LO 0x498 +#define GEN7_RBBM_PERFCTR_BV_RAS_2_HI 0x499 +#define GEN7_RBBM_PERFCTR_BV_RAS_3_LO 0x49a +#define GEN7_RBBM_PERFCTR_BV_RAS_3_HI 0x49b +#define GEN7_RBBM_PERFCTR_BV_LRZ_0_LO 0x49c +#define GEN7_RBBM_PERFCTR_BV_LRZ_0_HI 0x49d +#define GEN7_RBBM_PERFCTR_BV_LRZ_1_LO 0x49e +#define GEN7_RBBM_PERFCTR_BV_LRZ_1_HI 0x49f +#define GEN7_RBBM_PERFCTR_BV_LRZ_2_LO 0x4a0 +#define GEN7_RBBM_PERFCTR_BV_LRZ_2_HI 0x4a1 +#define GEN7_RBBM_PERFCTR_BV_LRZ_3_LO 0x4a2 +#define GEN7_RBBM_PERFCTR_BV_LRZ_3_HI 0x4a3 + +#define GEN7_RBBM_PERFCTR_CNTL 0x500 +#define GEN7_RBBM_PERFCTR_RBBM_SEL_0 0x507 +#define GEN7_RBBM_PERFCTR_RBBM_SEL_1 0x508 +#define GEN7_RBBM_PERFCTR_RBBM_SEL_2 0x509 +#define GEN7_RBBM_PERFCTR_RBBM_SEL_3 0x50a +#define GEN7_RBBM_PERFCTR_GPU_BUSY_MASKED 0x50b + +#define GEN7_RBBM_ISDB_CNT 0x533 +#define GEN7_RBBM_NC_MODE_CNTL 0x534 +#define GEN7_RBBM_SNAPSHOT_STATUS 0x535 + +#define GEN7_RBBM_SECVID_TRUST_CNTL 0xf400 +#define GEN7_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0xf800 +#define GEN7_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0xf801 +#define GEN7_RBBM_SECVID_TSB_TRUSTED_SIZE 0xf802 +#define GEN7_RBBM_SECVID_TSB_CNTL 0xf803 + +#define GEN7_RBBM_GBIF_CLIENT_QOS_CNTL 0x00011 +#define GEN7_RBBM_GBIF_HALT 0x00016 +#define GEN7_RBBM_GBIF_HALT_ACK 0x00017 +#define GEN7_RBBM_INTERFACE_HANG_INT_CNTL 0x0001f +#define GEN7_RBBM_INT_CLEAR_CMD 0x00037 +#define GEN7_RBBM_INT_0_MASK 0x00038 +#define GEN7_RBBM_INT_2_MASK 0x0003a +#define GEN7_RBBM_SP_HYST_CNT 0x00042 +#define GEN7_RBBM_SW_RESET_CMD 0x00043 +#define GEN7_RBBM_RAC_THRESHOLD_CNT 0x00044 +#define GEN7_RBBM_CLOCK_CNTL 0x000ae +#define GEN7_RBBM_CLOCK_CNTL_SP0 0x000b0 +#define GEN7_RBBM_CLOCK_CNTL2_SP0 0x000b4 +#define GEN7_RBBM_CLOCK_DELAY_SP0 0x000b8 +#define GEN7_RBBM_CLOCK_HYST_SP0 0x000bc +#define GEN7_RBBM_CLOCK_CNTL_TP0 0x000c0 +#define GEN7_RBBM_CLOCK_CNTL2_TP0 0x000c4 +#define GEN7_RBBM_CLOCK_CNTL3_TP0 0x000c8 +#define GEN7_RBBM_CLOCK_CNTL4_TP0 0x000cc +#define GEN7_RBBM_CLOCK_DELAY_TP0 0x000d0 +#define GEN7_RBBM_CLOCK_DELAY2_TP0 0x000d4 +#define GEN7_RBBM_CLOCK_DELAY3_TP0 0x000d8 +#define GEN7_RBBM_CLOCK_DELAY4_TP0 0x000dc +#define GEN7_RBBM_CLOCK_HYST_TP0 0x000e0 +#define GEN7_RBBM_CLOCK_HYST2_TP0 0x000e4 +#define GEN7_RBBM_CLOCK_HYST3_TP0 0x000e8 +#define GEN7_RBBM_CLOCK_HYST4_TP0 0x000ec +#define GEN7_RBBM_CLOCK_CNTL_RB0 0x000f0 +#define GEN7_RBBM_CLOCK_CNTL2_RB0 0x000f4 +#define GEN7_RBBM_CLOCK_CNTL_CCU0 0x000f8 +#define GEN7_RBBM_CLOCK_HYST_RB_CCU0 0x00100 +#define GEN7_RBBM_CLOCK_CNTL_RAC 0x00104 +#define GEN7_RBBM_CLOCK_CNTL2_RAC 0x00105 +#define GEN7_RBBM_CLOCK_DELAY_RAC 0x00106 +#define GEN7_RBBM_CLOCK_HYST_RAC 0x00107 +#define GEN7_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x00108 +#define GEN7_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x00109 +#define GEN7_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x0010a +#define GEN7_RBBM_CLOCK_CNTL_UCHE 0x0010b +#define GEN7_RBBM_CLOCK_DELAY_UCHE 0x0010f +#define GEN7_RBBM_CLOCK_HYST_UCHE 0x00110 +#define GEN7_RBBM_CLOCK_MODE_VFD 0x00111 +#define GEN7_RBBM_CLOCK_DELAY_VFD 0x00112 +#define GEN7_RBBM_CLOCK_HYST_VFD 0x00113 +#define GEN7_RBBM_CLOCK_MODE_GPC 0x00114 +#define GEN7_RBBM_CLOCK_DELAY_GPC 0x00115 +#define GEN7_RBBM_CLOCK_HYST_GPC 0x00116 +#define GEN7_RBBM_CLOCK_DELAY_HLSQ_2 0x00117 +#define GEN7_RBBM_CLOCK_CNTL_GMU_GX 0x00118 +#define GEN7_RBBM_CLOCK_DELAY_GMU_GX 0x00119 +#define GEN7_RBBM_CLOCK_HYST_GMU_GX 0x0011a +#define GEN7_RBBM_CLOCK_MODE_HLSQ 0x0011b +#define GEN7_RBBM_CLOCK_DELAY_HLSQ 0x0011c +#define GEN7_RBBM_CLOCK_HYST_HLSQ 0x0011d +#define GEN7_RBBM_CLOCK_MODE_CP 0x00260 +#define GEN7_RBBM_CLOCK_MODE_BV_LRZ 0x00284 +#define GEN7_RBBM_CLOCK_MODE_BV_GRAS 0x00285 +#define GEN7_RBBM_CLOCK_MODE2_GRAS 0x00286 +#define GEN7_RBBM_CLOCK_MODE_BV_VFD 0x00287 +#define GEN7_RBBM_CLOCK_MODE_BV_GPC 0x00288 + +/* DBGC_CFG registers */ +#define GEN7_DBGC_CFG_DBGBUS_SEL_A 0x600 +#define GEN7_DBGC_CFG_DBGBUS_SEL_B 0x601 +#define GEN7_DBGC_CFG_DBGBUS_SEL_C 0x602 +#define GEN7_DBGC_CFG_DBGBUS_SEL_D 0x603 +#define GEN7_DBGC_CFG_DBGBUS_CNTLT 0x604 +#define GEN7_DBGC_CFG_DBGBUS_CNTLM 0x605 +#define GEN7_DBGC_CFG_DBGBUS_OPL 0x606 +#define GEN7_DBGC_CFG_DBGBUS_OPE 0x607 +#define GEN7_DBGC_CFG_DBGBUS_IVTL_0 0x608 +#define GEN7_DBGC_CFG_DBGBUS_IVTL_1 0x609 +#define GEN7_DBGC_CFG_DBGBUS_IVTL_2 0x60a +#define GEN7_DBGC_CFG_DBGBUS_IVTL_3 0x60b +#define GEN7_DBGC_CFG_DBGBUS_MASKL_0 0x60c +#define GEN7_DBGC_CFG_DBGBUS_MASKL_1 0x60d +#define GEN7_DBGC_CFG_DBGBUS_MASKL_2 0x60e +#define GEN7_DBGC_CFG_DBGBUS_MASKL_3 0x60f +#define GEN7_DBGC_CFG_DBGBUS_BYTEL_0 0x610 +#define GEN7_DBGC_CFG_DBGBUS_BYTEL_1 0x611 +#define GEN7_DBGC_CFG_DBGBUS_IVTE_0 0x612 +#define GEN7_DBGC_CFG_DBGBUS_IVTE_1 0x613 +#define GEN7_DBGC_CFG_DBGBUS_IVTE_2 0x614 +#define GEN7_DBGC_CFG_DBGBUS_IVTE_3 0x615 +#define GEN7_DBGC_CFG_DBGBUS_MASKE_0 0x616 +#define GEN7_DBGC_CFG_DBGBUS_MASKE_1 0x617 +#define GEN7_DBGC_CFG_DBGBUS_MASKE_2 0x618 +#define GEN7_DBGC_CFG_DBGBUS_MASKE_3 0x619 +#define GEN7_DBGC_CFG_DBGBUS_NIBBLEE 0x61a +#define GEN7_DBGC_CFG_DBGBUS_PTRC0 0x61b +#define GEN7_DBGC_CFG_DBGBUS_PTRC1 0x61c +#define GEN7_DBGC_CFG_DBGBUS_LOADREG 0x61d +#define GEN7_DBGC_CFG_DBGBUS_IDX 0x61e +#define GEN7_DBGC_CFG_DBGBUS_CLRC 0x61f +#define GEN7_DBGC_CFG_DBGBUS_LOADIVT 0x620 +#define GEN7_DBGC_VBIF_DBG_CNTL 0x621 +#define GEN7_DBGC_DBG_LO_HI_GPIO 0x622 +#define GEN7_DBGC_EXT_TRACE_BUS_CNTL 0x623 +#define GEN7_DBGC_READ_AHB_THROUGH_DBG 0x624 +#define GEN7_DBGC_CFG_DBGBUS_OVER 0x626 +#define GEN7_DBGC_CFG_DBGBUS_TRACE_BUF1 0x62f +#define GEN7_DBGC_CFG_DBGBUS_TRACE_BUF2 0x630 +#define GEN7_DBGC_EVT_CFG 0x640 +#define GEN7_DBGC_EVT_INTF_SEL_0 0x641 +#define GEN7_DBGC_EVT_INTF_SEL_1 0x642 +#define GEN7_DBGC_PERF_ATB_CFG 0x643 +#define GEN7_DBGC_PERF_ATB_COUNTER_SEL_0 0x644 +#define GEN7_DBGC_PERF_ATB_COUNTER_SEL_1 0x645 +#define GEN7_DBGC_PERF_ATB_COUNTER_SEL_2 0x646 +#define GEN7_DBGC_PERF_ATB_COUNTER_SEL_3 0x647 +#define GEN7_DBGC_PERF_ATB_TRIG_INTF_SEL_0 0x648 +#define GEN7_DBGC_PERF_ATB_TRIG_INTF_SEL_1 0x649 +#define GEN7_DBGC_PERF_ATB_DRAIN_CMD 0x64a +#define GEN7_DBGC_ECO_CNTL 0x650 +#define GEN7_DBGC_AHB_DBG_CNTL 0x651 + +/* VSC registers */ +#define GEN7_VSC_PERFCTR_VSC_SEL_0 0xcd8 +#define GEN7_VSC_PERFCTR_VSC_SEL_1 0xcd9 + +/* GRAS registers */ +#define GEN7_GRAS_NC_MODE_CNTL 0x8602 +#define GEN7_GRAS_PERFCTR_TSE_SEL_0 0x8610 +#define GEN7_GRAS_PERFCTR_TSE_SEL_1 0x8611 +#define GEN7_GRAS_PERFCTR_TSE_SEL_2 0x8612 +#define GEN7_GRAS_PERFCTR_TSE_SEL_3 0x8613 +#define GEN7_GRAS_PERFCTR_RAS_SEL_0 0x8614 +#define GEN7_GRAS_PERFCTR_RAS_SEL_1 0x8615 +#define GEN7_GRAS_PERFCTR_RAS_SEL_2 0x8616 +#define GEN7_GRAS_PERFCTR_RAS_SEL_3 0x8617 +#define GEN7_GRAS_PERFCTR_LRZ_SEL_0 0x8618 +#define GEN7_GRAS_PERFCTR_LRZ_SEL_1 0x8619 +#define GEN7_GRAS_PERFCTR_LRZ_SEL_2 0x861a +#define GEN7_GRAS_PERFCTR_LRZ_SEL_3 0x861b + +/* RB registers */ +#define GEN7_RB_NC_MODE_CNTL 0x8e08 +#define GEN7_RB_PERFCTR_RB_SEL_0 0x8e10 +#define GEN7_RB_PERFCTR_RB_SEL_1 0x8e11 +#define GEN7_RB_PERFCTR_RB_SEL_2 0x8e12 +#define GEN7_RB_PERFCTR_RB_SEL_3 0x8e13 +#define GEN7_RB_PERFCTR_RB_SEL_4 0x8e14 +#define GEN7_RB_PERFCTR_RB_SEL_5 0x8e15 +#define GEN7_RB_PERFCTR_RB_SEL_6 0x8e16 +#define GEN7_RB_PERFCTR_RB_SEL_7 0x8e17 +#define GEN7_RB_PERFCTR_CCU_SEL_0 0x8e18 +#define GEN7_RB_PERFCTR_CCU_SEL_1 0x8e19 +#define GEN7_RB_PERFCTR_CCU_SEL_2 0x8e1a +#define GEN7_RB_PERFCTR_CCU_SEL_3 0x8e1b +#define GEN7_RB_PERFCTR_CCU_SEL_4 0x8e1c +#define GEN7_RB_PERFCTR_CMP_SEL_0 0x8e2c +#define GEN7_RB_PERFCTR_CMP_SEL_1 0x8e2d +#define GEN7_RB_PERFCTR_CMP_SEL_2 0x8e2e +#define GEN7_RB_PERFCTR_CMP_SEL_3 0x8e2f +#define GEN7_RB_PERFCTR_UFC_SEL_0 0x8e30 +#define GEN7_RB_PERFCTR_UFC_SEL_1 0x8e31 +#define GEN7_RB_PERFCTR_UFC_SEL_2 0x8e32 +#define GEN7_RB_PERFCTR_UFC_SEL_3 0x8e33 +#define GEN7_RB_PERFCTR_UFC_SEL_4 0x8e34 +#define GEN7_RB_PERFCTR_UFC_SEL_5 0x8e35 +#define GEN7_RB_RB_SUB_BLOCK_SEL_CNTL_HOST 0x8e3b +#define GEN7_RB_RB_SUB_BLOCK_SEL_CNTL_CD 0x8e3d +#define GEN7_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE 0x8e50 + +/* PC registers */ +#define GEN7_PC_PERFCTR_PC_SEL_0 0x9e42 +#define GEN7_PC_PERFCTR_PC_SEL_1 0x9e43 +#define GEN7_PC_PERFCTR_PC_SEL_2 0x9e44 +#define GEN7_PC_PERFCTR_PC_SEL_3 0x9e45 +#define GEN7_PC_PERFCTR_PC_SEL_4 0x9e46 +#define GEN7_PC_PERFCTR_PC_SEL_5 0x9e47 +#define GEN7_PC_PERFCTR_PC_SEL_6 0x9e48 +#define GEN7_PC_PERFCTR_PC_SEL_7 0x9e49 +#define GEN7_PC_PERFCTR_PC_SEL_8 0x9e4a +#define GEN7_PC_PERFCTR_PC_SEL_9 0x9e4b +#define GEN7_PC_PERFCTR_PC_SEL_10 0x9e4c +#define GEN7_PC_PERFCTR_PC_SEL_11 0x9e4d +#define GEN7_PC_PERFCTR_PC_SEL_12 0x9e4e +#define GEN7_PC_PERFCTR_PC_SEL_13 0x9e4f +#define GEN7_PC_PERFCTR_PC_SEL_14 0x9e50 +#define GEN7_PC_PERFCTR_PC_SEL_15 0x9e51 + +/* VFD registers */ +#define GEN7_VFD_PERFCTR_VFD_SEL_0 0xa610 +#define GEN7_VFD_PERFCTR_VFD_SEL_1 0xa611 +#define GEN7_VFD_PERFCTR_VFD_SEL_2 0xa612 +#define GEN7_VFD_PERFCTR_VFD_SEL_3 0xa613 +#define GEN7_VFD_PERFCTR_VFD_SEL_4 0xa614 +#define GEN7_VFD_PERFCTR_VFD_SEL_5 0xa615 +#define GEN7_VFD_PERFCTR_VFD_SEL_6 0xa616 +#define GEN7_VFD_PERFCTR_VFD_SEL_7 0xa617 +#define GEN7_VFD_PERFCTR_VFD_SEL_8 0xa618 +#define GEN7_VFD_PERFCTR_VFD_SEL_9 0xa619 +#define GEN7_VFD_PERFCTR_VFD_SEL_10 0xa61a +#define GEN7_VFD_PERFCTR_VFD_SEL_11 0xa61b +#define GEN7_VFD_PERFCTR_VFD_SEL_12 0xa61c +#define GEN7_VFD_PERFCTR_VFD_SEL_13 0xa61d +#define GEN7_VFD_PERFCTR_VFD_SEL_14 0xa61e +#define GEN7_VFD_PERFCTR_VFD_SEL_15 0xa61f +#define GEN7_SP_READ_SEL 0xae6d +#define GEN7_SP_AHB_READ_APERTURE 0xc000 + +/* VPC registers */ +#define GEN7_VPC_PERFCTR_VPC_SEL_0 0x960b +#define GEN7_VPC_PERFCTR_VPC_SEL_1 0x960c +#define GEN7_VPC_PERFCTR_VPC_SEL_2 0x960c +#define GEN7_VPC_PERFCTR_VPC_SEL_3 0x960e +#define GEN7_VPC_PERFCTR_VPC_SEL_4 0x960f +#define GEN7_VPC_PERFCTR_VPC_SEL_5 0x9610 +#define GEN7_VPC_PERFCTR_VPC_SEL_6 0x9611 +#define GEN7_VPC_PERFCTR_VPC_SEL_7 0x9612 +#define GEN7_VPC_PERFCTR_VPC_SEL_8 0x9613 +#define GEN7_VPC_PERFCTR_VPC_SEL_9 0x9614 +#define GEN7_VPC_PERFCTR_VPC_SEL_10 0x9615 +#define GEN7_VPC_PERFCTR_VPC_SEL_11 0x9616 + +/* UCHE registers */ +#define GEN7_UCHE_MODE_CNTL 0xe01 +#define GEN7_UCHE_WRITE_THRU_BASE_LO 0xe07 +#define GEN7_UCHE_WRITE_THRU_BASE_HI 0xe08 +#define GEN7_UCHE_TRAP_BASE_LO 0xe09 +#define GEN7_UCHE_TRAP_BASE_HI 0xe0a +#define GEN7_UCHE_GMEM_RANGE_MIN_LO 0xe0b +#define GEN7_UCHE_GMEM_RANGE_MIN_HI 0xe0c +#define GEN7_UCHE_GMEM_RANGE_MAX_LO 0xe0d +#define GEN7_UCHE_GMEM_RANGE_MAX_HI 0xe0e +#define GEN7_UCHE_CACHE_WAYS 0xe17 +#define GEN7_UCHE_CLIENT_PF 0xe19 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_0 0xe1c +#define GEN7_UCHE_PERFCTR_UCHE_SEL_1 0xe1d +#define GEN7_UCHE_PERFCTR_UCHE_SEL_2 0xe1e +#define GEN7_UCHE_PERFCTR_UCHE_SEL_3 0xe1f +#define GEN7_UCHE_PERFCTR_UCHE_SEL_4 0xe20 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_5 0xe21 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_6 0xe22 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_7 0xe23 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_8 0xe24 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_9 0xe25 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_10 0xe26 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_11 0xe27 +#define GEN7_UCHE_GBIF_GX_CONFIG 0xe3a +#define GEN7_UCHE_CMDQ_CONFIG 0xe3c + +/* SP registers */ +#define GEN7_SP_NC_MODE_CNTL 0xae02 +#define GEN7_SP_PERFCTR_HLSQ_SEL_0 0xae60 +#define GEN7_SP_PERFCTR_HLSQ_SEL_1 0xae61 +#define GEN7_SP_PERFCTR_HLSQ_SEL_2 0xae62 +#define GEN7_SP_PERFCTR_HLSQ_SEL_3 0xae63 +#define GEN7_SP_PERFCTR_HLSQ_SEL_4 0xae64 +#define GEN7_SP_PERFCTR_HLSQ_SEL_5 0xae65 +#define GEN7_SP_PERFCTR_SP_SEL_0 0xae80 +#define GEN7_SP_PERFCTR_SP_SEL_1 0xae81 +#define GEN7_SP_PERFCTR_SP_SEL_2 0xae82 +#define GEN7_SP_PERFCTR_SP_SEL_3 0xae83 +#define GEN7_SP_PERFCTR_SP_SEL_4 0xae84 +#define GEN7_SP_PERFCTR_SP_SEL_5 0xae85 +#define GEN7_SP_PERFCTR_SP_SEL_6 0xae86 +#define GEN7_SP_PERFCTR_SP_SEL_7 0xae87 +#define GEN7_SP_PERFCTR_SP_SEL_8 0xae88 +#define GEN7_SP_PERFCTR_SP_SEL_9 0xae89 +#define GEN7_SP_PERFCTR_SP_SEL_10 0xae8a +#define GEN7_SP_PERFCTR_SP_SEL_11 0xae8b +#define GEN7_SP_PERFCTR_SP_SEL_12 0xae8c +#define GEN7_SP_PERFCTR_SP_SEL_13 0xae8d +#define GEN7_SP_PERFCTR_SP_SEL_14 0xae8e +#define GEN7_SP_PERFCTR_SP_SEL_15 0xae8f +#define GEN7_SP_PERFCTR_SP_SEL_16 0xae90 +#define GEN7_SP_PERFCTR_SP_SEL_17 0xae91 +#define GEN7_SP_PERFCTR_SP_SEL_18 0xae92 +#define GEN7_SP_PERFCTR_SP_SEL_19 0xae93 +#define GEN7_SP_PERFCTR_SP_SEL_20 0xae94 +#define GEN7_SP_PERFCTR_SP_SEL_21 0xae95 +#define GEN7_SP_PERFCTR_SP_SEL_22 0xae96 +#define GEN7_SP_PERFCTR_SP_SEL_23 0xae97 +#define GEN7_SP_PERFCTR_SP_SEL_24 0xae98 +#define GEN7_SP_PERFCTR_SP_SEL_25 0xae99 +#define GEN7_SP_PERFCTR_SP_SEL_26 0xae9a +#define GEN7_SP_PERFCTR_SP_SEL_27 0xae9b +#define GEN7_SP_PERFCTR_SP_SEL_28 0xae9c +#define GEN7_SP_PERFCTR_SP_SEL_29 0xae9d +#define GEN7_SP_PERFCTR_SP_SEL_30 0xae9e +#define GEN7_SP_PERFCTR_SP_SEL_31 0xae9f +#define GEN7_SP_PERFCTR_SP_SEL_32 0xaea0 +#define GEN7_SP_PERFCTR_SP_SEL_33 0xaea1 +#define GEN7_SP_PERFCTR_SP_SEL_34 0xaea2 +#define GEN7_SP_PERFCTR_SP_SEL_35 0xaea3 + +/* TP registers */ +#define GEN7_TPL1_NC_MODE_CNTL 0xb604 +#define GEN7_TPL1_PERFCTR_TP_SEL_0 0xb610 +#define GEN7_TPL1_PERFCTR_TP_SEL_1 0xb611 +#define GEN7_TPL1_PERFCTR_TP_SEL_2 0xb612 +#define GEN7_TPL1_PERFCTR_TP_SEL_3 0xb613 +#define GEN7_TPL1_PERFCTR_TP_SEL_4 0xb614 +#define GEN7_TPL1_PERFCTR_TP_SEL_5 0xb615 +#define GEN7_TPL1_PERFCTR_TP_SEL_6 0xb616 +#define GEN7_TPL1_PERFCTR_TP_SEL_7 0xb617 +#define GEN7_TPL1_PERFCTR_TP_SEL_8 0xb618 +#define GEN7_TPL1_PERFCTR_TP_SEL_9 0xb619 +#define GEN7_TPL1_PERFCTR_TP_SEL_10 0xb61a +#define GEN7_TPL1_PERFCTR_TP_SEL_11 0xb61b +#define GEN7_TPL1_PERFCTR_TP_SEL_12 0xb61c +#define GEN7_TPL1_PERFCTR_TP_SEL_13 0xb61d +#define GEN7_TPL1_PERFCTR_TP_SEL_14 0xb61e +#define GEN7_TPL1_PERFCTR_TP_SEL_15 0xb61f +#define GEN7_TPL1_PERFCTR_TP_SEL_16 0xb620 +#define GEN7_TPL1_PERFCTR_TP_SEL_17 0xb621 + +/* VBIF registers */ +#define GEN7_VBIF_XIN_HALT_CTRL1 0x3081 +#define GEN7_VBIF_TEST_BUS_OUT_CTRL 0x3084 +#define GEN7_VBIF_TEST_BUS1_CTRL0 0x3085 +#define GEN7_VBIF_TEST_BUS1_CTRL1 0x3086 +#define GEN7_VBIF_TEST_BUS2_CTRL0 0x3087 +#define GEN7_VBIF_TEST_BUS2_CTRL1 0x3088 +#define GEN7_VBIF_TEST_BUS_OUT 0x308c +#define GEN7_VBIF_PERF_CNT_SEL0 0x30d0 +#define GEN7_VBIF_PERF_CNT_SEL1 0x30d1 +#define GEN7_VBIF_PERF_CNT_SEL2 0x30d2 +#define GEN7_VBIF_PERF_CNT_SEL3 0x30d3 +#define GEN7_VBIF_PERF_CNT_LOW0 0x30d8 +#define GEN7_VBIF_PERF_CNT_LOW1 0x30d9 +#define GEN7_VBIF_PERF_CNT_LOW2 0x30da +#define GEN7_VBIF_PERF_CNT_LOW3 0x30db +#define GEN7_VBIF_PERF_CNT_HIGH0 0x30e0 +#define GEN7_VBIF_PERF_CNT_HIGH1 0x30e1 +#define GEN7_VBIF_PERF_CNT_HIGH2 0x30e2 +#define GEN7_VBIF_PERF_CNT_HIGH3 0x30e3 +#define GEN7_VBIF_PERF_PWR_CNT_EN0 0x3100 +#define GEN7_VBIF_PERF_PWR_CNT_EN1 0x3101 +#define GEN7_VBIF_PERF_PWR_CNT_EN2 0x3102 +#define GEN7_VBIF_PERF_PWR_CNT_LOW0 0x3110 +#define GEN7_VBIF_PERF_PWR_CNT_LOW1 0x3111 +#define GEN7_VBIF_PERF_PWR_CNT_LOW2 0x3112 +#define GEN7_VBIF_PERF_PWR_CNT_HIGH0 0x3118 +#define GEN7_VBIF_PERF_PWR_CNT_HIGH1 0x3119 +#define GEN7_VBIF_PERF_PWR_CNT_HIGH2 0x311a + +/* GBIF countables */ +#define GBIF_AXI0_READ_DATA_TOTAL_BEATS 34 +#define GBIF_AXI1_READ_DATA_TOTAL_BEATS 35 +#define GBIF_AXI0_WRITE_DATA_TOTAL_BEATS 46 +#define GBIF_AXI1_WRITE_DATA_TOTAL_BEATS 47 + +/* GBIF registers */ +#define GEN7_GBIF_SCACHE_CNTL0 0x3c01 +#define GEN7_GBIF_SCACHE_CNTL1 0x3c02 +#define GEN7_GBIF_QSB_SIDE0 0x3c03 +#define GEN7_GBIF_QSB_SIDE1 0x3c04 +#define GEN7_GBIF_QSB_SIDE2 0x3c05 +#define GEN7_GBIF_QSB_SIDE3 0x3c06 +#define GEN7_GBIF_HALT 0x3c45 +#define GEN7_GBIF_HALT_ACK 0x3c46 + +#define GEN7_GBIF_CLIENT_HALT_MASK BIT(0) +#define GEN7_GBIF_ARB_HALT_MASK BIT(1) +#define GEN7_GBIF_GX_HALT_MASK BIT(0) + +#define GEN7_GBIF_PERF_PWR_CNT_EN 0x3cc0 +#define GEN7_GBIF_PERF_PWR_CNT_CLR 0x3cc1 +#define GEN7_GBIF_PERF_CNT_SEL 0x3cc2 +#define GEN7_GBIF_PERF_PWR_CNT_SEL 0x3cc3 +#define GEN7_GBIF_PERF_CNT_LOW0 0x3cc4 +#define GEN7_GBIF_PERF_CNT_LOW1 0x3cc5 +#define GEN7_GBIF_PERF_CNT_LOW2 0x3cc6 +#define GEN7_GBIF_PERF_CNT_LOW3 0x3cc7 +#define GEN7_GBIF_PERF_CNT_HIGH0 0x3cc8 +#define GEN7_GBIF_PERF_CNT_HIGH1 0x3cc9 +#define GEN7_GBIF_PERF_CNT_HIGH2 0x3cca +#define GEN7_GBIF_PERF_CNT_HIGH3 0x3ccb +#define GEN7_GBIF_PWR_CNT_LOW0 0x3ccc +#define GEN7_GBIF_PWR_CNT_LOW1 0x3ccd +#define GEN7_GBIF_PWR_CNT_LOW2 0x3cce +#define GEN7_GBIF_PWR_CNT_HIGH0 0x3ccf +#define GEN7_GBIF_PWR_CNT_HIGH1 0x3cd0 +#define GEN7_GBIF_PWR_CNT_HIGH2 0x3cd1 + + +/* CX_DBGC_CFG registers */ +#define GEN7_CX_DBGC_CFG_DBGBUS_SEL_A 0x18400 +#define GEN7_CX_DBGC_CFG_DBGBUS_SEL_B 0x18401 +#define GEN7_CX_DBGC_CFG_DBGBUS_SEL_C 0x18402 +#define GEN7_CX_DBGC_CFG_DBGBUS_SEL_D 0x18403 +#define GEN7_CX_DBGC_CFG_DBGBUS_CNTLT 0x18404 +#define GEN7_CX_DBGC_CFG_DBGBUS_CNTLM 0x18405 +#define GEN7_CX_DBGC_CFG_DBGBUS_OPL 0x18406 +#define GEN7_CX_DBGC_CFG_DBGBUS_OPE 0x18407 +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTL_0 0x18408 +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTL_1 0x18409 +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTL_2 0x1840a +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTL_3 0x1840b +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKL_0 0x1840c +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKL_1 0x1840d +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKL_2 0x1840e +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKL_3 0x1840f +#define GEN7_CX_DBGC_CFG_DBGBUS_BYTEL_0 0x18410 +#define GEN7_CX_DBGC_CFG_DBGBUS_BYTEL_1 0x18411 +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTE_0 0x18412 +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTE_1 0x18413 +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTE_2 0x18414 +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTE_3 0x18415 +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKE_0 0x18416 +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKE_1 0x18417 +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKE_2 0x18418 +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKE_3 0x18419 +#define GEN7_CX_DBGC_CFG_DBGBUS_NIBBLEE 0x1841a +#define GEN7_CX_DBGC_CFG_DBGBUS_PTRC0 0x1841b +#define GEN7_CX_DBGC_CFG_DBGBUS_PTRC1 0x1841c +#define GEN7_CX_DBGC_CFG_DBGBUS_LOADREG 0x1841d +#define GEN7_CX_DBGC_CFG_DBGBUS_IDX 0x1841e +#define GEN7_CX_DBGC_CFG_DBGBUS_CLRC 0x1841f +#define GEN7_CX_DBGC_CFG_DBGBUS_LOADIVT 0x18420 +#define GEN7_CX_DBGC_VBIF_DBG_CNTL 0x18421 +#define GEN7_CX_DBGC_DBG_LO_HI_GPIO 0x18422 +#define GEN7_CX_DBGC_EXT_TRACE_BUS_CNTL 0x18423 +#define GEN7_CX_DBGC_READ_AHB_THROUGH_DBG 0x18424 +#define GEN7_CX_DBGC_CFG_DBGBUS_OVER 0x18426 +#define GEN7_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 0x1842f +#define GEN7_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 0x18430 +#define GEN7_CX_DBGC_EVT_CFG 0x18440 +#define GEN7_CX_DBGC_EVT_INTF_SEL_0 0x18441 +#define GEN7_CX_DBGC_EVT_INTF_SEL_1 0x18442 +#define GEN7_CX_DBGC_PERF_ATB_CFG 0x18443 +#define GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_0 0x18444 +#define GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_1 0x18445 +#define GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_2 0x18446 +#define GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_3 0x18447 +#define GEN7_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 0x18448 +#define GEN7_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 0x18449 +#define GEN7_CX_DBGC_PERF_ATB_DRAIN_CMD 0x1844a +#define GEN7_CX_DBGC_ECO_CNTL 0x18450 +#define GEN7_CX_DBGC_AHB_DBG_CNTL 0x18451 + +/* GMU control registers */ +#define GEN7_GMU_CM3_ITCM_START 0x1b400 +#define GEN7_GMU_CM3_DTCM_START 0x1c400 +#define GEN7_GMU_NMI_CONTROL_STATUS 0x1cbf0 +#define GEN7_GMU_BOOT_SLUMBER_OPTION 0x1cbf8 +#define GEN7_GMU_GX_VOTE_IDX 0x1cbf9 +#define GEN7_GMU_MX_VOTE_IDX 0x1cbfa +#define GEN7_GMU_DCVS_ACK_OPTION 0x1cbfc +#define GEN7_GMU_DCVS_PERF_SETTING 0x1cbfd +#define GEN7_GMU_DCVS_BW_SETTING 0x1cbfe +#define GEN7_GMU_DCVS_RETURN 0x1cbff +#define GEN7_GMU_ICACHE_CONFIG 0x1f400 +#define GEN7_GMU_DCACHE_CONFIG 0x1f401 +#define GEN7_GMU_SYS_BUS_CONFIG 0x1f40f +#define GEN7_GMU_CM3_SYSRESET 0x1f800 +#define GEN7_GMU_CM3_BOOT_CONFIG 0x1f801 +#define GEN7_GMU_CX_GMU_WFI_CONFIG 0x1f802 +#define GEN7_GMU_CX_GMU_WDOG_CTRL 0x1f813 +#define GEN7_GMU_CM3_FW_BUSY 0x1f81a +#define GEN7_GMU_CM3_FW_INIT_RESULT 0x1f81c +#define GEN7_GMU_CM3_CFG 0x1f82d +#define GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE 0x1f840 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0 0x1f841 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1 0x1f842 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L 0x1f844 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H 0x1f845 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L 0x1f846 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_H 0x1f847 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L 0x1f848 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_H 0x1f849 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L 0x1f84a +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_H 0x1f84b +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_L 0x1f84c +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_H 0x1f84d +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_L 0x1f84e +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_H 0x1f84f +#define GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_L 0x1f850 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_H 0x1f851 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_L 0x1f852 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_H 0x1f853 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_2 0x1f860 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_L 0x1f870 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_H 0x1f871 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_L 0x1f872 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_H 0x1f843 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_L 0x1f874 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_H 0x1f875 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_L 0x1f876 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_H 0x1f877 +#define GEN7_GMU_CX_GMU_ALWAYS_ON_COUNTER_L 0x1f888 +#define GEN7_GMU_CX_GMU_ALWAYS_ON_COUNTER_H 0x1f889 +#define GEN7_GMU_PWR_COL_INTER_FRAME_CTRL 0x1f8c0 +#define GEN7_GMU_PWR_COL_INTER_FRAME_HYST 0x1f8c1 +#define GEN7_GMU_GFX_PWR_CLK_STATUS 0x1f8d0 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_ENABLE 0x1f8a0 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_0 0x1f8a1 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_1 0x1f8a2 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_0_L 0x1f8a4 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_0_H 0x1f8a5 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_1_L 0x1f8a6 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_1_H 0x1f8a7 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_2_L 0x1f8a8 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_2_H 0x1f8a9 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_3_L 0x1f8aa +#define GEN7_GMU_CX_GMU_PERF_COUNTER_3_H 0x1f8ab +#define GEN7_GMU_CX_GMU_PERF_COUNTER_4_L 0x1f8ac +#define GEN7_GMU_CX_GMU_PERF_COUNTER_4_H 0x1f8ad +#define GEN7_GMU_CX_GMU_PERF_COUNTER_5_L 0x1f8ae +#define GEN7_GMU_CX_GMU_PERF_COUNTER_5_H 0x1f8af +#define GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE 0x1f8ec +#define GEN7_GMU_BOOT_KMD_LM_HANDSHAKE 0x1f9f0 + +/* HFI registers*/ +#define GEN7_GMU_ALWAYS_ON_COUNTER_L 0x1f888 +#define GEN7_GMU_ALWAYS_ON_COUNTER_H 0x1f889 +#define GEN7_GMU_GMU_PWR_COL_KEEPALIVE 0x1f8c3 +#define GEN7_GMU_PWR_COL_PREEMPT_KEEPALIVE 0x1f8c4 +#define GEN7_GMU_HFI_CTRL_STATUS 0x1f980 +#define GEN7_GMU_HFI_QTBL_INFO 0x1f984 +#define GEN7_GMU_HFI_QTBL_ADDR 0x1f985 +#define GEN7_GMU_HFI_CTRL_INIT 0x1f986 +#define GEN7_GMU_GMU2HOST_INTR_SET 0x1f990 +#define GEN7_GMU_GMU2HOST_INTR_CLR 0x1f991 +#define GEN7_GMU_GMU2HOST_INTR_INFO 0x1f992 +#define GEN7_GMU_GMU2HOST_INTR_MASK 0x1f993 +#define GEN7_GMU_HOST2GMU_INTR_SET 0x1f994 +#define GEN7_GMU_HOST2GMU_INTR_CLR 0x1f995 +#define GEN7_GMU_HOST2GMU_INTR_RAW_INFO 0x1f996 +#define GEN7_GMU_HOST2GMU_INTR_EN_0 0x1f997 +#define GEN7_GMU_HOST2GMU_INTR_EN_1 0x1f998 +#define GEN7_GMU_HOST2GMU_INTR_EN_2 0x1f999 +#define GEN7_GMU_HOST2GMU_INTR_EN_3 0x1f99a +#define GEN7_GMU_HOST2GMU_INTR_INFO_0 0x1f99b +#define GEN7_GMU_HOST2GMU_INTR_INFO_1 0x1f99c +#define GEN7_GMU_HOST2GMU_INTR_INFO_2 0x1f99d +#define GEN7_GMU_HOST2GMU_INTR_INFO_3 0x1f99e +#define GEN7_GMU_GENERAL_0 0x1f9c5 +#define GEN7_GMU_GENERAL_1 0x1f9c6 +#define GEN7_GMU_GENERAL_6 0x1f9cb +#define GEN7_GMU_GENERAL_7 0x1f9cc +#define GEN7_GMU_GENERAL_8 0x1f9cd +#define GEN7_GMU_GENERAL_9 0x1f9ce +#define GEN7_GMU_GENERAL_10 0x1f9cf + +/* FAL10 veto register */ +#define GEN7_GPU_GMU_CX_GMU_CX_FAL_INTF 0x1f8f0 +#define GEN7_GPU_GMU_CX_GMU_CX_FALNEXT_INTF 0x1f8f1 + +#define GEN7_GMU_AO_INTERRUPT_EN 0x23b03 +#define GEN7_GMU_AO_HOST_INTERRUPT_CLR 0x23b04 +#define GEN7_GMU_AO_HOST_INTERRUPT_STATUS 0x23b05 +#define GEN7_GMU_AO_HOST_INTERRUPT_MASK 0x23b06 +#define GEN7_GPU_GMU_AO_GMU_CGC_MODE_CNTL 0x23b09 +#define GEN7_GPU_GMU_AO_GMU_CGC_DELAY_CNTL 0x23b0a +#define GEN7_GPU_GMU_AO_GMU_CGC_HYST_CNTL 0x23b0b +#define GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS 0x23b0c +#define GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS2 0x23b0d +#define GEN7_GPU_GMU_AO_GPU_CX_BUSY_MASK 0x23b0e +#define GEN7_GMU_AO_AHB_FENCE_CTRL 0x23b10 +#define GEN7_GMU_AHB_FENCE_STATUS 0x23b13 +#define GEN7_GMU_AHB_FENCE_STATUS_CLR 0x23b14 +#define GEN7_GMU_RBBM_INT_UNMASKED_STATUS 0x23b15 +#define GEN7_GMU_AO_SPARE_CNTL 0x23b16 + +/* GMU RSC control registers */ +#define GEN7_GMU_RSCC_CONTROL_REQ 0x23b07 +#define GEN7_GMU_RSCC_CONTROL_ACK 0x23b08 + +/* FENCE control registers */ +#define GEN7_GMU_AHB_FENCE_RANGE_0 0x23b11 + +/* GPUCC registers */ +#define GEN7_GPU_CC_GX_DOMAIN_MISC3 0x26541 +#define GEN7_GPU_CC_CX_GDSCR 0x26442 + +/* GPU RSC sequencer registers */ +#define GEN7_GPU_RSCC_RSC_STATUS0_DRV0 0x00004 +#define GEN7_RSCC_PDC_SEQ_START_ADDR 0x00008 +#define GEN7_RSCC_PDC_MATCH_VALUE_LO 0x00009 +#define GEN7_RSCC_PDC_MATCH_VALUE_HI 0x0000a +#define GEN7_RSCC_PDC_SLAVE_ID_DRV0 0x0000b +#define GEN7_RSCC_HIDDEN_TCS_CMD0_ADDR 0x0000d +#define GEN7_RSCC_HIDDEN_TCS_CMD0_DATA 0x0000e +#define GEN7_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_L_DRV0 0x00082 +#define GEN7_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_H_DRV0 0x00083 +#define GEN7_RSCC_TIMESTAMP_UNIT1_EN_DRV0 0x00089 +#define GEN7_RSCC_TIMESTAMP_UNIT1_OUTPUT_DRV0 0x0008c +#define GEN7_RSCC_OVERRIDE_START_ADDR 0x00100 +#define GEN7_RSCC_SEQ_BUSY_DRV0 0x00101 +#define GEN7_RSCC_SEQ_MEM_0_DRV0 0x00180 +#define GEN7_RSCC_TCS0_DRV0_STATUS 0x00346 +#define GEN7_RSCC_TCS1_DRV0_STATUS 0x003ee +#define GEN7_RSCC_TCS2_DRV0_STATUS 0x00496 +#define GEN7_RSCC_TCS3_DRV0_STATUS 0x0053e + +/* GPU PDC sequencer registers in AOSS.RPMh domain */ +#define GEN7_PDC_GPU_ENABLE_PDC 0x1140 +#define GEN7_PDC_GPU_SEQ_START_ADDR 0x1148 + +#define GEN7_SMMU_BASE 0x28000 + +/* GPU CX_MISC registers */ +#define GEN7_GPU_CX_MISC_TCM_RET_CNTL 0x39 + +#endif /* _GEN7_REG_H */ diff --git a/gfx_driver_product.mk b/gfx_driver_product.mk new file mode 100644 index 0000000000..64bee7aea5 --- /dev/null +++ b/gfx_driver_product.mk @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only + +PRODUCT_PACKAGES += msm_kgsl.ko + diff --git a/gfx_kernel_board.mk b/gfx_kernel_board.mk new file mode 100644 index 0000000000..238b8ff9ed --- /dev/null +++ b/gfx_kernel_board.mk @@ -0,0 +1,10 @@ +#SPDX-License-Identifier: GPL-2.0-only + +ifneq ($(TARGET_BOARD_AUTO),true) + ifeq ($(call is-board-platform-in-list,$(TARGET_BOARD_PLATFORM)),true) + BOARD_VENDOR_KERNEL_MODULES += $(KERNEL_MODULES_OUT)/msm_kgsl.ko + BOARD_VENDOR_RAMDISK_KERNEL_MODULES += $(KERNEL_MODULES_OUT)/msm_kgsl.ko + BOARD_VENDOR_RAMDISK_RECOVERY_KERNEL_MODULES_LOAD += $(KERNEL_MODULES_OUT)/msm_kgsl.ko + endif +endif + diff --git a/gfx_kernel_headers.py b/gfx_kernel_headers.py new file mode 100644 index 0000000000..ab0c06a2a5 --- /dev/null +++ b/gfx_kernel_headers.py @@ -0,0 +1,96 @@ +# Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 as published by +# the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see . + +import argparse +import filecmp +import os +import re +import subprocess +import sys + +def run_headers_install(verbose, gen_dir, headers_install, unifdef, prefix, h): + if not h.startswith(prefix): + print('error: expected prefix [%s] on header [%s]' % (prefix, h)) + return False + + out_h = os.path.join(gen_dir, h[len(prefix):]) + (out_h_dirname, out_h_basename) = os.path.split(out_h) + env = os.environ.copy() + env["LOC_UNIFDEF"] = unifdef + cmd = ["sh", headers_install, h, out_h] + + if verbose: + print('run_headers_install: cmd is %s' % cmd) + + result = subprocess.call(cmd, env=env) + + if result != 0: + print('error: run_headers_install: cmd %s failed %d' % (cmd, result)) + return False + return True + +def gen_gfx_headers(verbose, gen_dir, headers_install, unifdef, gfx_include_uapi): + error_count = 0 + for h in gfx_include_uapi: + gfx_uapi_include_prefix = os.path.join(h.split('/include/uapi/')[0], + 'include', + 'uapi', + 'linux') + os.sep + + if not run_headers_install( + verbose, gen_dir, headers_install, unifdef, + gfx_uapi_include_prefix, h): error_count += 1 + return error_count + +def main(): + """Parse command line arguments and perform top level control.""" + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + + # Arguments that apply to every invocation of this script. + parser.add_argument( + '--verbose', action='store_true', + help='Print output that describes the workings of this script.') + parser.add_argument( + '--header_arch', required=True, + help='The arch for which to generate headers.') + parser.add_argument( + '--gen_dir', required=True, + help='Where to place the generated files.') + parser.add_argument( + '--gfx_include_uapi', required=True, nargs='*', + help='The list of techpack/*/include/uapi header files.') + parser.add_argument( + '--headers_install', required=True, + help='The headers_install tool to process input headers.') + parser.add_argument( + '--unifdef', + required=True, + help='The unifdef tool used by headers_install.') + + args = parser.parse_args() + + if args.verbose: + print('header_arch [%s]' % args.header_arch) + print('gen_dir [%s]' % args.gen_dir) + print('gfx_include_uapi [%s]' % args.gfx_include_uapi) + print('headers_install [%s]' % args.headers_install) + print('unifdef [%s]' % args.unifdef) + + return gen_gfx_headers(args.verbose, args.gen_dir, + args.headers_install, args.unifdef, args.gfx_include_uapi) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/governor_gpubw_mon.c b/governor_gpubw_mon.c new file mode 100644 index 0000000000..147c43511f --- /dev/null +++ b/governor_gpubw_mon.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2014-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "governor.h" +#include "msm_adreno_devfreq.h" + +#define MIN_BUSY 1000 +#define LONG_FLOOR 50000 +#define HIST 5 +#define TARGET 80 +#define CAP 75 +#define WAIT_THRESHOLD 10 +/* AB vote is in multiple of BW_STEP Mega bytes */ +#define BW_STEP 50 + +static void _update_cutoff(struct devfreq_msm_adreno_tz_data *priv, + unsigned int norm_max) +{ + int i; + + priv->bus.max = norm_max; + for (i = 0; i < priv->bus.num; i++) { + priv->bus.up[i] = priv->bus.p_up[i] * norm_max / 100; + priv->bus.down[i] = priv->bus.p_down[i] * norm_max / 100; + } +} + +static ssize_t cur_ab_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct devfreq *df = to_devfreq(dev); + struct msm_busmon_extended_profile *bus_profile = container_of( + (df->profile), + struct msm_busmon_extended_profile, + profile); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", bus_profile->ab_mbytes); +} + +static ssize_t sampling_interval_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct devfreq *df = to_devfreq(dev); + struct msm_busmon_extended_profile *bus_profile = container_of( + (df->profile), + struct msm_busmon_extended_profile, + profile); + + return scnprintf(buf, PAGE_SIZE, "%d\n", bus_profile->sampling_ms); +} + +static ssize_t sampling_interval_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct devfreq *df = to_devfreq(dev); + struct msm_busmon_extended_profile *bus_profile = container_of( + (df->profile), + struct msm_busmon_extended_profile, + profile); + u32 value; + int ret; + + ret = kstrtou32(buf, 0, &value); + if (ret) + return ret; + + bus_profile->sampling_ms = value; + + return count; +} + +static DEVICE_ATTR_RW(sampling_interval); +static DEVICE_ATTR_RO(cur_ab); + +static const struct device_attribute *gpubw_attr_list[] = { + &dev_attr_sampling_interval, + &dev_attr_cur_ab, + NULL +}; + +static int devfreq_gpubw_get_target(struct devfreq *df, + unsigned long *freq) +{ + + struct devfreq_msm_adreno_tz_data *priv = df->data; + struct msm_busmon_extended_profile *bus_profile = container_of( + (df->profile), + struct msm_busmon_extended_profile, + profile); + struct devfreq_dev_status *stats = &df->last_status; + struct xstats b; + int result; + int act_level; + int norm_max_cycles; + int norm_cycles; + int wait_active_percent; + int gpu_percent; + /* + * Normalized AB should at max usage be the gpu_bimc frequency in MHz. + * Start with a reasonable value and let the system push it up to max. + */ + static int norm_ab_max = 300; + int norm_ab; + unsigned long ab_mbytes = 0; + + if (priv == NULL) + return 0; + + stats->private_data = &b; + + result = devfreq_update_stats(df); + + *freq = stats->current_frequency; + + priv->bus.total_time += stats->total_time; + priv->bus.gpu_time += stats->busy_time; + priv->bus.ram_time += b.ram_time; + priv->bus.ram_wait += b.ram_wait; + + if (priv->bus.total_time < bus_profile->sampling_ms) + return result; + + norm_max_cycles = (unsigned int)(priv->bus.ram_time) / + (unsigned int) priv->bus.total_time; + norm_cycles = (unsigned int)(priv->bus.ram_time + priv->bus.ram_wait) / + (unsigned int) priv->bus.total_time; + wait_active_percent = (100 * (unsigned int)priv->bus.ram_wait) / + (unsigned int) priv->bus.ram_time; + gpu_percent = (100 * (unsigned int)priv->bus.gpu_time) / + (unsigned int) priv->bus.total_time; + + /* + * If there's a new high watermark, update the cutoffs and send the + * FAST hint, provided that we are using a floating watermark. + * Otherwise check the current value against the current + * cutoffs. + */ + if (norm_max_cycles > priv->bus.max && priv->bus.floating) { + _update_cutoff(priv, norm_max_cycles); + bus_profile->flag = DEVFREQ_FLAG_FAST_HINT; + } else { + /* GPU votes for IB not AB so don't under vote the system */ + norm_cycles = (100 * norm_cycles) / TARGET; + act_level = b.buslevel; + act_level = (act_level < 0) ? 0 : act_level; + act_level = (act_level >= priv->bus.num) ? + (priv->bus.num - 1) : act_level; + if ((norm_cycles > priv->bus.up[act_level] || + wait_active_percent > WAIT_THRESHOLD) && + gpu_percent > CAP) + bus_profile->flag = DEVFREQ_FLAG_FAST_HINT; + else if (norm_cycles < priv->bus.down[act_level] && b.buslevel) + bus_profile->flag = DEVFREQ_FLAG_SLOW_HINT; + } + + /* Calculate the AB vote based on bus width if defined */ + if (priv->bus.width) { + norm_ab = (unsigned int)priv->bus.ram_time / + (unsigned int) priv->bus.total_time; + /* Calculate AB in Mega Bytes and roundup in BW_STEP */ + ab_mbytes = (norm_ab * priv->bus.width * 1000000ULL) >> 20; + bus_profile->ab_mbytes = roundup(ab_mbytes, BW_STEP); + } else if (bus_profile->flag) { + /* Re-calculate the AB percentage for a new IB vote */ + norm_ab = (unsigned int)priv->bus.ram_time / + (unsigned int) priv->bus.total_time; + if (norm_ab > norm_ab_max) + norm_ab_max = norm_ab; + bus_profile->percent_ab = (100 * norm_ab) / norm_ab_max; + } + + priv->bus.total_time = 0; + priv->bus.gpu_time = 0; + priv->bus.ram_time = 0; + priv->bus.ram_wait = 0; + + return result; +} + +static int gpubw_start(struct devfreq *devfreq) +{ + struct devfreq_msm_adreno_tz_data *priv; + + struct msm_busmon_extended_profile *bus_profile = container_of( + (devfreq->profile), + struct msm_busmon_extended_profile, + profile); + unsigned int t1, t2 = 2 * HIST; + int i, bus_size; + + + devfreq->data = bus_profile->private_data; + priv = devfreq->data; + + bus_size = sizeof(u32) * priv->bus.num; + priv->bus.up = kzalloc(bus_size, GFP_KERNEL); + priv->bus.down = kzalloc(bus_size, GFP_KERNEL); + priv->bus.p_up = kzalloc(bus_size, GFP_KERNEL); + priv->bus.p_down = kzalloc(bus_size, GFP_KERNEL); + if (priv->bus.up == NULL || priv->bus.down == NULL || + priv->bus.p_up == NULL || priv->bus.p_down == NULL) + return -ENOMEM; + + /* Set up the cut-over percentages for the bus calculation. */ + for (i = 0; i < priv->bus.num; i++) { + t1 = (u32)(100 * priv->bus.ib_kbps[i]) / + (u32)priv->bus.ib_kbps[priv->bus.num - 1]; + priv->bus.p_up[i] = t1 - HIST; + priv->bus.p_down[i] = t2 - 2 * HIST; + t2 = t1; + } + /* Set the upper-most and lower-most bounds correctly. */ + priv->bus.p_down[0] = 0; + + for (i = 0; i < priv->bus.num; i++) { + if (priv->bus.p_down[i] < 2 * HIST) + priv->bus.p_down[i] = 2 * HIST; + } + + if (priv->bus.num >= 1) + priv->bus.p_up[priv->bus.num - 1] = 100; + _update_cutoff(priv, priv->bus.max); + + bus_profile->sampling_ms = LONG_FLOOR; + + for (i = 0; gpubw_attr_list[i] != NULL; i++) + device_create_file(&devfreq->dev, gpubw_attr_list[i]); + + return 0; +} + +static int gpubw_stop(struct devfreq *devfreq) +{ + struct devfreq_msm_adreno_tz_data *priv = devfreq->data; + int i; + + for (i = 0; gpubw_attr_list[i] != NULL; i++) + device_remove_file(&devfreq->dev, gpubw_attr_list[i]); + + if (priv) { + kfree(priv->bus.up); + kfree(priv->bus.down); + kfree(priv->bus.p_up); + kfree(priv->bus.p_down); + } + devfreq->data = NULL; + return 0; +} + +static int devfreq_gpubw_event_handler(struct devfreq *devfreq, + unsigned int event, void *data) +{ + int result = 0; + unsigned long freq; + + if (strcmp(dev_name(devfreq->dev.parent), "kgsl-busmon")) + return -EINVAL; + + mutex_lock(&devfreq->lock); + freq = devfreq->previous_freq; + switch (event) { + case DEVFREQ_GOV_START: + result = gpubw_start(devfreq); + break; + case DEVFREQ_GOV_STOP: + result = gpubw_stop(devfreq); + break; + case DEVFREQ_GOV_RESUME: + /* TODO ..... */ + /* ret = update_devfreq(devfreq); */ + break; + case DEVFREQ_GOV_SUSPEND: + { + struct devfreq_msm_adreno_tz_data *priv = devfreq->data; + + if (priv) { + priv->bus.total_time = 0; + priv->bus.gpu_time = 0; + priv->bus.ram_time = 0; + } + } + break; + default: + result = 0; + break; + } + mutex_unlock(&devfreq->lock); + return result; +} + +static struct devfreq_governor devfreq_gpubw = { + .name = "gpubw_mon", + .get_target_freq = devfreq_gpubw_get_target, + .event_handler = devfreq_gpubw_event_handler, + .immutable = 1, +}; + +int devfreq_gpubw_init(void) +{ + return devfreq_add_governor(&devfreq_gpubw); +} + +void devfreq_gpubw_exit(void) +{ + int ret; + + ret = devfreq_remove_governor(&devfreq_gpubw); + if (ret) + pr_err("%s: failed remove governor %d\n", __func__, ret); + +} diff --git a/governor_msm_adreno_tz.c b/governor_msm_adreno_tz.c new file mode 100644 index 0000000000..18f4f16ead --- /dev/null +++ b/governor_msm_adreno_tz.c @@ -0,0 +1,563 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "governor.h" +#include "msm_adreno_devfreq.h" + +static DEFINE_SPINLOCK(tz_lock); +static DEFINE_SPINLOCK(sample_lock); +static DEFINE_SPINLOCK(suspend_lock); +/* + * FLOOR is 5msec to capture up to 3 re-draws + * per frame for 60fps content. + */ +#define FLOOR 5000 +/* + * MIN_BUSY is 1 msec for the sample to be sent + */ +#define MIN_BUSY 1000 +#define MAX_TZ_VERSION 0 + +/* + * CEILING is 50msec, larger than any standard + * frame length, but less than the idle timer. + */ +#define CEILING 50000 +#define TZ_RESET_ID 0x3 +#define TZ_UPDATE_ID 0x4 +#define TZ_INIT_ID 0x6 + +#define TZ_RESET_ID_64 0x7 +#define TZ_UPDATE_ID_64 0x8 +#define TZ_INIT_ID_64 0x9 + +#define TZ_V2_UPDATE_ID_64 0xA +#define TZ_V2_INIT_ID_64 0xB +#define TZ_V2_INIT_CA_ID_64 0xC +#define TZ_V2_UPDATE_WITH_CA_ID_64 0xD + +#define TAG "msm_adreno_tz: " + +static u64 suspend_time; +static u64 suspend_start; +static unsigned long acc_total, acc_relative_busy; + +/* + * Returns GPU suspend time in millisecond. + */ +u64 suspend_time_ms(void) +{ + u64 suspend_sampling_time; + u64 time_diff = 0; + + if (suspend_start == 0) + return 0; + + suspend_sampling_time = (u64)ktime_to_ms(ktime_get()); + time_diff = suspend_sampling_time - suspend_start; + /* Update the suspend_start sample again */ + suspend_start = suspend_sampling_time; + return time_diff; +} + +static ssize_t gpu_load_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long sysfs_busy_perc = 0; + /* + * Average out the samples taken since last read + * This will keep the average value in sync with + * with the client sampling duration. + */ + spin_lock(&sample_lock); + if (acc_total) + sysfs_busy_perc = (acc_relative_busy * 100) / acc_total; + + /* Reset the parameters */ + acc_total = 0; + acc_relative_busy = 0; + spin_unlock(&sample_lock); + return snprintf(buf, PAGE_SIZE, "%lu\n", sysfs_busy_perc); +} + +/* + * Returns the time in ms for which gpu was in suspend state + * since last time the entry is read. + */ +static ssize_t suspend_time_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u64 time_diff = 0; + + spin_lock(&suspend_lock); + time_diff = suspend_time_ms(); + /* + * Adding the previous suspend time also as the gpu + * can go and come out of suspend states in between + * reads also and we should have the total suspend + * since last read. + */ + time_diff += suspend_time; + suspend_time = 0; + spin_unlock(&suspend_lock); + + return snprintf(buf, PAGE_SIZE, "%llu\n", time_diff); +} + +static ssize_t mod_percent_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned int val; + struct devfreq *devfreq = to_devfreq(dev); + struct devfreq_msm_adreno_tz_data *priv = devfreq->data; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + priv->mod_percent = clamp_t(u32, val, 10, 1000); + + return count; +} + +static ssize_t mod_percent_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct devfreq *devfreq = to_devfreq(dev); + struct devfreq_msm_adreno_tz_data *priv = devfreq->data; + + return scnprintf(buf, PAGE_SIZE, "%u\n", priv->mod_percent); +} + +static DEVICE_ATTR_RO(gpu_load); + +static DEVICE_ATTR_RO(suspend_time); +static DEVICE_ATTR_RW(mod_percent); + +static const struct device_attribute *adreno_tz_attr_list[] = { + &dev_attr_gpu_load, + &dev_attr_suspend_time, + &dev_attr_mod_percent, + NULL +}; + +void compute_work_load(struct devfreq_dev_status *stats, + struct devfreq_msm_adreno_tz_data *priv, + struct devfreq *devfreq) +{ + u64 busy; + + spin_lock(&sample_lock); + /* + * Keep collecting the stats till the client + * reads it. Average of all samples and reset + * is done when the entry is read + */ + acc_total += stats->total_time; + busy = (u64)stats->busy_time * stats->current_frequency; + do_div(busy, devfreq->profile->freq_table[0]); + acc_relative_busy += busy; + + spin_unlock(&sample_lock); +} + +/* Trap into the TrustZone, and call funcs there. */ +static int __secure_tz_reset_entry2(unsigned int *scm_data, u32 size_scm_data, + bool is_64) +{ + int ret; + /* sync memory before sending the commands to tz */ + __iowmb(); + + if (!is_64) { + spin_lock(&tz_lock); + ret = qcom_scm_io_reset(); + spin_unlock(&tz_lock); + } else { + ret = qcom_scm_dcvs_reset(); + } + + return ret; +} + +static int __secure_tz_update_entry3(int level, s64 total_time, s64 busy_time, + int context_count, struct devfreq_msm_adreno_tz_data *priv) +{ + int ret; + /* sync memory before sending the commands to tz */ + __iowmb(); + + if (!priv->is_64) { + spin_lock(&tz_lock); + ret = qcom_scm_dcvs_update(level, total_time, busy_time); + spin_unlock(&tz_lock); + } else if (!priv->ctxt_aware_enable) { + ret = qcom_scm_dcvs_update_v2(level, total_time, busy_time); + } else { + ret = qcom_scm_dcvs_update_ca_v2(level, total_time, busy_time, + context_count); + } + + return ret; +} + +static int tz_init_ca(struct device *dev, + struct devfreq_msm_adreno_tz_data *priv) +{ + unsigned int tz_ca_data[2]; + phys_addr_t paddr; + u8 *tz_buf; + int ret; + struct qtee_shm shm; + + /* Set data for TZ */ + tz_ca_data[0] = priv->bin.ctxt_aware_target_pwrlevel; + tz_ca_data[1] = priv->bin.ctxt_aware_busy_penalty; + + if (!qtee_shmbridge_is_enabled()) { + tz_buf = kzalloc(PAGE_ALIGN(sizeof(tz_ca_data)), GFP_KERNEL); + if (!tz_buf) + return -ENOMEM; + paddr = virt_to_phys(tz_buf); + } else { + ret = qtee_shmbridge_allocate_shm( + PAGE_ALIGN(sizeof(tz_ca_data)), &shm); + if (ret) + return -ENOMEM; + tz_buf = shm.vaddr; + paddr = shm.paddr; + } + + memcpy(tz_buf, tz_ca_data, sizeof(tz_ca_data)); + /* Ensure memcpy completes execution */ + mb(); + dma_sync_single_for_device(dev, paddr, + PAGE_ALIGN(sizeof(tz_ca_data)), DMA_BIDIRECTIONAL); + + ret = qcom_scm_dcvs_init_ca_v2(paddr, sizeof(tz_ca_data)); + + if (!qtee_shmbridge_is_enabled()) + kfree_sensitive(tz_buf); + else + qtee_shmbridge_free_shm(&shm); + + return ret; +} + +static int tz_init(struct device *dev, struct devfreq_msm_adreno_tz_data *priv, + unsigned int *tz_pwrlevels, u32 size_pwrlevels, + unsigned int *version, u32 size_version) +{ + int ret; + phys_addr_t paddr; + + if (qcom_scm_dcvs_core_available()) { + u8 *tz_buf; + struct qtee_shm shm; + + if (!qtee_shmbridge_is_enabled()) { + tz_buf = kzalloc(PAGE_ALIGN(size_pwrlevels), + GFP_KERNEL); + if (!tz_buf) + return -ENOMEM; + paddr = virt_to_phys(tz_buf); + } else { + ret = qtee_shmbridge_allocate_shm( + PAGE_ALIGN(size_pwrlevels), &shm); + if (ret) + return -ENOMEM; + tz_buf = shm.vaddr; + paddr = shm.paddr; + } + + memcpy(tz_buf, tz_pwrlevels, size_pwrlevels); + /* Ensure memcpy completes execution */ + mb(); + dma_sync_single_for_device(dev, paddr, + PAGE_ALIGN(size_pwrlevels), DMA_BIDIRECTIONAL); + + ret = qcom_scm_dcvs_init_v2(paddr, size_pwrlevels, version); + if (!ret) + priv->is_64 = true; + if (!qtee_shmbridge_is_enabled()) + kfree_sensitive(tz_buf); + else + qtee_shmbridge_free_shm(&shm); + } else + ret = -EINVAL; + + /* Initialize context aware feature, if enabled. */ + if (!ret && priv->ctxt_aware_enable) { + if (priv->is_64 && qcom_scm_dcvs_ca_available()) { + ret = tz_init_ca(dev, priv); + /* + * If context aware feature initialization fails, + * just print an error message and return + * success as normal DCVS will still work. + */ + if (ret) { + pr_err(TAG "tz: context aware DCVS init failed\n"); + priv->ctxt_aware_enable = false; + return 0; + } + } else { + pr_warn(TAG "tz: context aware DCVS not supported\n"); + priv->ctxt_aware_enable = false; + } + } + + return ret; +} + +static inline int devfreq_get_freq_level(struct devfreq *devfreq, + unsigned long freq) +{ + int lev; + + for (lev = 0; lev < devfreq->profile->max_state; lev++) + if (freq == devfreq->profile->freq_table[lev]) + return lev; + + return -EINVAL; +} + +static int tz_get_target_freq(struct devfreq *devfreq, unsigned long *freq) +{ + int result = 0; + struct devfreq_msm_adreno_tz_data *priv = devfreq->data; + struct devfreq_dev_status *stats = &devfreq->last_status; + int val, level = 0; + int context_count = 0; + u64 busy_time; + + if (!priv) + return 0; + + /* keeps stats.private_data == NULL */ + result = devfreq_update_stats(devfreq); + if (result) { + pr_err(TAG "get_status failed %d\n", result); + return result; + } + + *freq = stats->current_frequency; + priv->bin.total_time += stats->total_time; + + /* Update gpu busy time as per mod_percent */ + busy_time = stats->busy_time * priv->mod_percent; + do_div(busy_time, 100); + + /* busy_time should not go over total_time */ + stats->busy_time = min_t(u64, busy_time, stats->total_time); + + priv->bin.busy_time += stats->busy_time; + + if (stats->private_data) + context_count = *((int *)stats->private_data); + + /* Update the GPU load statistics */ + compute_work_load(stats, priv, devfreq); + /* + * Do not waste CPU cycles running this algorithm if + * the GPU just started, or if less than FLOOR time + * has passed since the last run or the gpu hasn't been + * busier than MIN_BUSY. + */ + if ((stats->total_time == 0) || + (priv->bin.total_time < FLOOR) || + (unsigned int) priv->bin.busy_time < MIN_BUSY) { + return 0; + } + + level = devfreq_get_freq_level(devfreq, stats->current_frequency); + if (level < 0) { + pr_err(TAG "bad freq %ld\n", stats->current_frequency); + return level; + } + + /* + * If there is an extended block of busy processing, + * increase frequency. Otherwise run the normal algorithm. + */ + if (!priv->disable_busy_time_burst && + priv->bin.busy_time > CEILING) { + val = -1 * level; + } else { + val = __secure_tz_update_entry3(level, priv->bin.total_time, + priv->bin.busy_time, context_count, priv); + } + + priv->bin.total_time = 0; + priv->bin.busy_time = 0; + + /* + * If the decision is to move to a different level, make sure the GPU + * frequency changes. + */ + if (val) { + level += val; + level = max(level, 0); + level = min_t(int, level, devfreq->profile->max_state - 1); + } + + *freq = devfreq->profile->freq_table[level]; + return 0; +} + +static int tz_start(struct devfreq *devfreq) +{ + struct devfreq_msm_adreno_tz_data *priv; + unsigned int tz_pwrlevels[MSM_ADRENO_MAX_PWRLEVELS + 1]; + int i, out, ret; + unsigned int version; + + struct msm_adreno_extended_profile *gpu_profile = container_of( + (devfreq->profile), + struct msm_adreno_extended_profile, + profile); + + /* + * Assuming that we have only one instance of the adreno device + * connected to this governor, + * can safely restore the pointer to the governor private data + * from the container of the device profile + */ + devfreq->data = gpu_profile->private_data; + + priv = devfreq->data; + + out = 1; + if (devfreq->profile->max_state < ARRAY_SIZE(tz_pwrlevels)) { + for (i = 0; i < devfreq->profile->max_state; i++) + tz_pwrlevels[out++] = devfreq->profile->freq_table[i]; + tz_pwrlevels[0] = i; + } else { + pr_err(TAG "tz_pwrlevels[] is too short\n"); + return -EINVAL; + } + + ret = tz_init(&devfreq->dev, priv, tz_pwrlevels, sizeof(tz_pwrlevels), + &version, sizeof(version)); + if (ret != 0 || version > MAX_TZ_VERSION) { + pr_err(TAG "tz_init failed\n"); + return ret; + } + + for (i = 0; adreno_tz_attr_list[i] != NULL; i++) + device_create_file(&devfreq->dev, adreno_tz_attr_list[i]); + + return 0; +} + +static int tz_stop(struct devfreq *devfreq) +{ + int i; + + for (i = 0; adreno_tz_attr_list[i] != NULL; i++) + device_remove_file(&devfreq->dev, adreno_tz_attr_list[i]); + + /* leaving the governor and cleaning the pointer to private data */ + devfreq->data = NULL; + return 0; +} + +static int tz_suspend(struct devfreq *devfreq) +{ + struct devfreq_msm_adreno_tz_data *priv = devfreq->data; + unsigned int scm_data[2] = {0, 0}; + + if (!priv) + return 0; + + __secure_tz_reset_entry2(scm_data, sizeof(scm_data), priv->is_64); + + priv->bin.total_time = 0; + priv->bin.busy_time = 0; + return 0; +} + +static int tz_handler(struct devfreq *devfreq, unsigned int event, void *data) +{ + int result; + struct device_node *node = devfreq->dev.parent->of_node; + + if (!of_device_is_compatible(node, "qcom,kgsl-3d0")) + return -EINVAL; + + switch (event) { + case DEVFREQ_GOV_START: + result = tz_start(devfreq); + break; + + case DEVFREQ_GOV_STOP: + spin_lock(&suspend_lock); + suspend_start = 0; + spin_unlock(&suspend_lock); + result = tz_stop(devfreq); + break; + + case DEVFREQ_GOV_SUSPEND: + result = tz_suspend(devfreq); + if (!result) { + spin_lock(&suspend_lock); + /* Collect the start sample for suspend time */ + suspend_start = (u64)ktime_to_ms(ktime_get()); + spin_unlock(&suspend_lock); + } + break; + + case DEVFREQ_GOV_RESUME: + spin_lock(&suspend_lock); + suspend_time += suspend_time_ms(); + /* Reset the suspend_start when gpu resumes */ + suspend_start = 0; + spin_unlock(&suspend_lock); + /* fallthrough */ + case DEVFREQ_GOV_UPDATE_INTERVAL: + /* fallthrough, this governor doesn't use polling */ + default: + result = 0; + break; + } + + return result; +} + +static struct devfreq_governor msm_adreno_tz = { + .name = "msm-adreno-tz", + .get_target_freq = tz_get_target_freq, + .event_handler = tz_handler, + .immutable = 1, +}; + +int msm_adreno_tz_init(void) +{ + return devfreq_add_governor(&msm_adreno_tz); +} + +void msm_adreno_tz_exit(void) +{ + int ret = devfreq_remove_governor(&msm_adreno_tz); + + if (ret) + pr_err(TAG "failed to remove governor %d\n", ret); +} diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h new file mode 100644 index 0000000000..e5950b135d --- /dev/null +++ b/include/linux/msm_kgsl.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ +#ifndef _MSM_KGSL_H +#define _MSM_KGSL_H + +/** + * struct kgsl_gpu_freq_stat - Per GPU freq stat struct + * @freq: GPU frequency in Hz + * @active_time: GPU busy time in usecs + * @idle_time: GPU idle time in usec + */ +struct kgsl_gpu_freq_stat { + u32 freq; + u64 active_time; + u64 idle_time; +}; + +/** + * kgsl_gpu_num_freqs - Get number of available GPU frequencies + * + * Return: number of available frequencies on success or negative error + * on failure + */ +int kgsl_gpu_num_freqs(void); + +/** + * kgsl_gpu_stat - Get per GPU freq stats + * @stats: Array of struct kgsl_gpu_freq_stat to hold stats + * @numfreq: Number of entries in @stats + * + * This function will populate @stats with per freq stats. + * Number of entries in @stats array must be greater or + * equal to value returned by function kgsl_gpu_num_freqs + * + * Return: 0 on success or negative error on failure + */ +int kgsl_gpu_stat(struct kgsl_gpu_freq_stat *stats, u32 numfreq); + +/** + * kgsl_gpu_frame_count - Get number of frames already processed by GPU + * @pid: pid of the process for which frame count is required + * @frame_count: pointer to a u64 to store frame count + * + * Return: zero on success and number of frames processed corresponding + * to @pid in @frame_count or negative error on failure + */ +int kgsl_gpu_frame_count(pid_t pid, u64 *frame_count); + +#endif /* _MSM_KGSL_H */ + diff --git a/include/uapi/linux/msm_kgsl.h b/include/uapi/linux/msm_kgsl.h new file mode 100644 index 0000000000..4b67887f09 --- /dev/null +++ b/include/uapi/linux/msm_kgsl.h @@ -0,0 +1,2001 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _UAPI_MSM_KGSL_H +#define _UAPI_MSM_KGSL_H + +#include +#include + +/* + * The KGSL version has proven not to be very useful in userspace if features + * are cherry picked into other trees out of order so it is frozen as of 3.14. + * It is left here for backwards compatabilty and as a reminder that + * software releases are never linear. Also, I like pie. + */ + +#define KGSL_VERSION_MAJOR 3 +#define KGSL_VERSION_MINOR 14 + +/* + * We have traditionally mixed context and issueibcmds / command batch flags + * together into a big flag stew. This worked fine until we started adding a + * lot more command batch flags and we started running out of bits. Turns out + * we have a bit of room in the context type / priority mask that we could use + * for command batches, but that means we need to split out the flags into two + * coherent sets. + * + * If any future definitions are for both context and cmdbatch add both defines + * and link the cmdbatch to the context define as we do below. Otherwise feel + * free to add exclusive bits to either set. + */ + +/* --- context flags --- */ +#define KGSL_CONTEXT_SAVE_GMEM 0x00000001 +#define KGSL_CONTEXT_NO_GMEM_ALLOC 0x00000002 +/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */ +#define KGSL_CONTEXT_SUBMIT_IB_LIST 0x00000004 +#define KGSL_CONTEXT_CTX_SWITCH 0x00000008 +#define KGSL_CONTEXT_PREAMBLE 0x00000010 +#define KGSL_CONTEXT_TRASH_STATE 0x00000020 +#define KGSL_CONTEXT_PER_CONTEXT_TS 0x00000040 +#define KGSL_CONTEXT_USER_GENERATED_TS 0x00000080 +/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */ +#define KGSL_CONTEXT_END_OF_FRAME 0x00000100 +#define KGSL_CONTEXT_NO_FAULT_TOLERANCE 0x00000200 +/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */ +#define KGSL_CONTEXT_SYNC 0x00000400 +#define KGSL_CONTEXT_PWR_CONSTRAINT 0x00000800 +#define KGSL_CONTEXT_PRIORITY_MASK 0x0000F000 +#define KGSL_CONTEXT_PRIORITY_SHIFT 12 +#define KGSL_CONTEXT_PRIORITY_UNDEF 0 + +#define KGSL_CONTEXT_IFH_NOP 0x00010000 +#define KGSL_CONTEXT_SECURE 0x00020000 +#define KGSL_CONTEXT_NO_SNAPSHOT 0x00040000 +#define KGSL_CONTEXT_SPARSE 0x00080000 + +#define KGSL_CONTEXT_PREEMPT_STYLE_MASK 0x0E000000 +#define KGSL_CONTEXT_PREEMPT_STYLE_SHIFT 25 +#define KGSL_CONTEXT_PREEMPT_STYLE_DEFAULT 0x0 +#define KGSL_CONTEXT_PREEMPT_STYLE_RINGBUFFER 0x1 +#define KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN 0x2 + +#define KGSL_CONTEXT_TYPE_MASK 0x01F00000 +#define KGSL_CONTEXT_TYPE_SHIFT 20 +#define KGSL_CONTEXT_TYPE_ANY 0 +#define KGSL_CONTEXT_TYPE_GL 1 +#define KGSL_CONTEXT_TYPE_CL 2 +#define KGSL_CONTEXT_TYPE_C2D 3 +#define KGSL_CONTEXT_TYPE_RS 4 +#define KGSL_CONTEXT_TYPE_VK 5 +#define KGSL_CONTEXT_TYPE_UNKNOWN 0x1E + +#define KGSL_CONTEXT_INVALIDATE_ON_FAULT 0x10000000 + +#define KGSL_CONTEXT_INVALID 0xffffffff + +/* + * --- command batch flags --- + * The bits that are linked to a KGSL_CONTEXT equivalent are either legacy + * definitions or bits that are valid for both contexts and cmdbatches. To be + * safe the other 8 bits that are still available in the context field should be + * omitted here in case we need to share - the other bits are available for + * cmdbatch only flags as needed + */ +#define KGSL_CMDBATCH_MEMLIST 0x00000001 +#define KGSL_CMDBATCH_MARKER 0x00000002 +#define KGSL_CMDBATCH_SUBMIT_IB_LIST KGSL_CONTEXT_SUBMIT_IB_LIST /* 0x004 */ +#define KGSL_CMDBATCH_CTX_SWITCH KGSL_CONTEXT_CTX_SWITCH /* 0x008 */ +#define KGSL_CMDBATCH_PROFILING 0x00000010 +/* + * KGSL_CMDBATCH_PROFILING must also be set for KGSL_CMDBATCH_PROFILING_KTIME + * to take effect, as the latter only affects the time data returned. + */ +#define KGSL_CMDBATCH_PROFILING_KTIME 0x00000020 +#define KGSL_CMDBATCH_END_OF_FRAME KGSL_CONTEXT_END_OF_FRAME /* 0x100 */ +#define KGSL_CMDBATCH_SYNC KGSL_CONTEXT_SYNC /* 0x400 */ +#define KGSL_CMDBATCH_PWR_CONSTRAINT KGSL_CONTEXT_PWR_CONSTRAINT /* 0x800 */ +#define KGSL_CMDBATCH_SPARSE 0x1000 /* 0x1000 */ + +/* + * Reserve bits [16:19] and bits [28:31] for possible bits shared between + * contexts and command batches. Update this comment as new flags are added. + */ + +/* + * gpu_command_object flags - these flags communicate the type of command or + * memory object being submitted for a GPU command + */ + +/* Flags for GPU command objects */ +#define KGSL_CMDLIST_IB 0x00000001U +#define KGSL_CMDLIST_CTXTSWITCH_PREAMBLE 0x00000002U +#define KGSL_CMDLIST_IB_PREAMBLE 0x00000004U + +/* Flags for GPU command memory objects */ +#define KGSL_OBJLIST_MEMOBJ 0x00000008U +#define KGSL_OBJLIST_PROFILE 0x00000010U + +/* Flags for GPU command sync points */ +#define KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP 0 +#define KGSL_CMD_SYNCPOINT_TYPE_FENCE 1 +#define KGSL_CMD_SYNCPOINT_TYPE_TIMELINE 2 + +/* --- Memory allocation flags --- */ + +/* General allocation hints */ +#define KGSL_MEMFLAGS_SECURE (1ULL << 3) +#define KGSL_MEMFLAGS_GPUREADONLY (1ULL << 24) +#define KGSL_MEMFLAGS_GPUWRITEONLY (1ULL << 25) +#define KGSL_MEMFLAGS_FORCE_32BIT (1ULL << 32) + +/* Flag for binding all the virt range to single phys data */ +#define KGSL_SPARSE_BIND_MULTIPLE_TO_PHYS 0x400000000ULL +#define KGSL_SPARSE_BIND 0x1ULL +#define KGSL_SPARSE_UNBIND 0x2ULL + +/* Memory caching hints */ +#define KGSL_CACHEMODE_MASK 0x0C000000U +#define KGSL_CACHEMODE_SHIFT 26 + +#define KGSL_CACHEMODE_WRITECOMBINE 0 +#define KGSL_CACHEMODE_UNCACHED 1 +#define KGSL_CACHEMODE_WRITETHROUGH 2 +#define KGSL_CACHEMODE_WRITEBACK 3 + +#define KGSL_MEMFLAGS_USE_CPU_MAP (1ULL << 28) +#define KGSL_MEMFLAGS_SPARSE_PHYS (1ULL << 29) +#define KGSL_MEMFLAGS_SPARSE_VIRT (1ULL << 30) +#define KGSL_MEMFLAGS_IOCOHERENT (1ULL << 31) +#define KGSL_MEMFLAGS_GUARD_PAGE (1ULL << 33) +#define KGSL_MEMFLAGS_VBO (1ULL << 34) + +/* Memory types for which allocations are made */ +#define KGSL_MEMTYPE_MASK 0x0000FF00 +#define KGSL_MEMTYPE_SHIFT 8 + +#define KGSL_MEMTYPE_OBJECTANY 0 +#define KGSL_MEMTYPE_FRAMEBUFFER 1 +#define KGSL_MEMTYPE_RENDERBUFFER 2 +#define KGSL_MEMTYPE_ARRAYBUFFER 3 +#define KGSL_MEMTYPE_ELEMENTARRAYBUFFER 4 +#define KGSL_MEMTYPE_VERTEXARRAYBUFFER 5 +#define KGSL_MEMTYPE_TEXTURE 6 +#define KGSL_MEMTYPE_SURFACE 7 +#define KGSL_MEMTYPE_EGL_SURFACE 8 +#define KGSL_MEMTYPE_GL 9 +#define KGSL_MEMTYPE_CL 10 +#define KGSL_MEMTYPE_CL_BUFFER_MAP 11 +#define KGSL_MEMTYPE_CL_BUFFER_NOMAP 12 +#define KGSL_MEMTYPE_CL_IMAGE_MAP 13 +#define KGSL_MEMTYPE_CL_IMAGE_NOMAP 14 +#define KGSL_MEMTYPE_CL_KERNEL_STACK 15 +#define KGSL_MEMTYPE_COMMAND 16 +#define KGSL_MEMTYPE_2D 17 +#define KGSL_MEMTYPE_EGL_IMAGE 18 +#define KGSL_MEMTYPE_EGL_SHADOW 19 +#define KGSL_MEMTYPE_MULTISAMPLE 20 +#define KGSL_MEMTYPE_KERNEL 255 + +/* + * Alignment hint, passed as the power of 2 exponent. + * i.e 4k (2^12) would be 12, 64k (2^16)would be 16. + */ +#define KGSL_MEMALIGN_MASK 0x00FF0000 +#define KGSL_MEMALIGN_SHIFT 16 + +enum kgsl_user_mem_type { + KGSL_USER_MEM_TYPE_PMEM = 0x00000000, + KGSL_USER_MEM_TYPE_ASHMEM = 0x00000001, + KGSL_USER_MEM_TYPE_ADDR = 0x00000002, + KGSL_USER_MEM_TYPE_ION = 0x00000003, + /* + * ION type is retained for backwards compatibility but Ion buffers are + * dma-bufs so try to use that naming if we can + */ + KGSL_USER_MEM_TYPE_DMABUF = 0x00000003, + KGSL_USER_MEM_TYPE_MAX = 0x00000007, +}; +#define KGSL_MEMFLAGS_USERMEM_MASK 0x000000e0 +#define KGSL_MEMFLAGS_USERMEM_SHIFT 5 + +/* + * Unfortunately, enum kgsl_user_mem_type starts at 0 which does not + * leave a good value for allocated memory. In the flags we use + * 0 to indicate allocated memory and thus need to add 1 to the enum + * values. + */ +#define KGSL_USERMEM_FLAG(x) (((x) + 1) << KGSL_MEMFLAGS_USERMEM_SHIFT) + +#define KGSL_MEMFLAGS_NOT_USERMEM 0 +#define KGSL_MEMFLAGS_USERMEM_PMEM KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_PMEM) +#define KGSL_MEMFLAGS_USERMEM_ASHMEM \ + KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ASHMEM) +#define KGSL_MEMFLAGS_USERMEM_ADDR KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ADDR) +#define KGSL_MEMFLAGS_USERMEM_ION KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ION) + +/* --- generic KGSL flag values --- */ + +#define KGSL_FLAGS_NORMALMODE 0x00000000 +#define KGSL_FLAGS_SAFEMODE 0x00000001 +#define KGSL_FLAGS_INITIALIZED0 0x00000002 +#define KGSL_FLAGS_INITIALIZED 0x00000004 +#define KGSL_FLAGS_STARTED 0x00000008 +#define KGSL_FLAGS_ACTIVE 0x00000010 +#define KGSL_FLAGS_RESERVED0 0x00000020 +#define KGSL_FLAGS_RESERVED1 0x00000040 +#define KGSL_FLAGS_RESERVED2 0x00000080 +#define KGSL_FLAGS_SOFT_RESET 0x00000100 +#define KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS 0x00000200 + +/* Server Side Sync Timeout in milliseconds */ +#define KGSL_SYNCOBJ_SERVER_TIMEOUT 2000 + +/* UBWC Modes */ +#define KGSL_UBWC_NONE 0 +#define KGSL_UBWC_1_0 1 +#define KGSL_UBWC_2_0 2 +#define KGSL_UBWC_3_0 3 +#define KGSL_UBWC_4_0 4 + +/* + * Reset status values for context + */ +enum kgsl_ctx_reset_stat { + KGSL_CTX_STAT_NO_ERROR = 0x00000000, + KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT = 0x00000001, + KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT = 0x00000002, + KGSL_CTX_STAT_UNKNOWN_CONTEXT_RESET_EXT = 0x00000003 +}; + +#define KGSL_CONVERT_TO_MBPS(val) \ + (val*1000*1000U) + +struct kgsl_devinfo { + + unsigned int device_id; + /* + * chip revision id + * coreid:8 majorrev:8 minorrev:8 patch:8 + */ + unsigned int chip_id; + unsigned int mmu_enabled; + unsigned long gmem_gpubaseaddr; + /* + * This field contains the adreno revision + * number 200, 205, 220, etc... + */ + unsigned int gpu_id; + __kernel_size_t gmem_sizebytes; +}; + +/* + * struct kgsl_devmemstore - this structure defines the region of memory + * that can be mmap()ed from this driver. The timestamp fields are volatile + * because they are written by the GPU + * @soptimestamp: Start of pipeline timestamp written by GPU before the + * commands in concern are processed + * @sbz: Unused, kept for 8 byte alignment + * @eoptimestamp: End of pipeline timestamp written by GPU after the + * commands in concern are processed + * @sbz2: Unused, kept for 8 byte alignment + * @preempted: Indicates if the context was preempted + * @sbz3: Unused, kept for 8 byte alignment + * @ref_wait_ts: Timestamp on which to generate interrupt, unused now. + * @sbz4: Unused, kept for 8 byte alignment + * @current_context: The current context the GPU is working on + * @sbz5: Unused, kept for 8 byte alignment + */ +struct kgsl_devmemstore { + volatile unsigned int soptimestamp; + unsigned int sbz; + volatile unsigned int eoptimestamp; + unsigned int sbz2; + volatile unsigned int preempted; + unsigned int sbz3; + volatile unsigned int ref_wait_ts; + unsigned int sbz4; + unsigned int current_context; + unsigned int sbz5; +}; + +#define KGSL_MEMSTORE_OFFSET(ctxt_id, field) \ + ((ctxt_id)*sizeof(struct kgsl_devmemstore) + \ + offsetof(struct kgsl_devmemstore, field)) + +/* timestamp id*/ +enum kgsl_timestamp_type { + KGSL_TIMESTAMP_CONSUMED = 0x00000001, /* start-of-pipeline timestamp */ + KGSL_TIMESTAMP_RETIRED = 0x00000002, /* end-of-pipeline timestamp*/ + KGSL_TIMESTAMP_QUEUED = 0x00000003, +}; + +/* property types - used with kgsl_device_getproperty */ +#define KGSL_PROP_DEVICE_INFO 0x1 +#define KGSL_PROP_DEVICE_SHADOW 0x2 +#define KGSL_PROP_DEVICE_POWER 0x3 +#define KGSL_PROP_SHMEM 0x4 +#define KGSL_PROP_SHMEM_APERTURES 0x5 +#define KGSL_PROP_MMU_ENABLE 0x6 +#define KGSL_PROP_INTERRUPT_WAITS 0x7 +#define KGSL_PROP_VERSION 0x8 +#define KGSL_PROP_GPU_RESET_STAT 0x9 +#define KGSL_PROP_PWRCTRL 0xE +#define KGSL_PROP_PWR_CONSTRAINT 0x12 +#define KGSL_PROP_UCHE_GMEM_VADDR 0x13 +#define KGSL_PROP_SP_GENERIC_MEM 0x14 +#define KGSL_PROP_UCODE_VERSION 0x15 +#define KGSL_PROP_GPMU_VERSION 0x16 +#define KGSL_PROP_HIGHEST_BANK_BIT 0x17 +#define KGSL_PROP_DEVICE_BITNESS 0x18 +#define KGSL_PROP_DEVICE_QDSS_STM 0x19 +#define KGSL_PROP_MIN_ACCESS_LENGTH 0x1A +#define KGSL_PROP_UBWC_MODE 0x1B +#define KGSL_PROP_DEVICE_QTIMER 0x20 +#define KGSL_PROP_L3_PWR_CONSTRAINT 0x22 +#define KGSL_PROP_SECURE_BUFFER_ALIGNMENT 0x23 +#define KGSL_PROP_SECURE_CTXT_SUPPORT 0x24 +#define KGSL_PROP_SPEED_BIN 0x25 +#define KGSL_PROP_GAMING_BIN 0x26 +#define KGSL_PROP_QUERY_CAPABILITIES 0x27 +#define KGSL_PROP_CONTEXT_PROPERTY 0x28 +#define KGSL_PROP_GPU_MODEL 0x29 +#define KGSL_PROP_VK_DEVICE_ID 0x2A + +/* + * kgsl_capabilities_properties returns a list of supported properties. + * If the user passes 0 for 'count' the kernel will set it to the number of + * supported properties. The list is expected to be 'count * sizeof(__u32)' + * bytes long. The kernel will return the actual number of entries copied into + * list via 'count'. + */ +struct kgsl_capabilities_properties { + __u64 list; + __u32 count; +}; + +/* + * KGSL_QUERY_CAPS_PROPERTIES returns a list of the valid properties in the + * kernel. The subtype data should be struct kgsl_capabilities_properties + */ +#define KGSL_QUERY_CAPS_PROPERTIES 1 + +/* + * kgsl_capabilities allows the user to query kernel capabilities. The 'data' + * type should be set appropriately for the querytype (see above). Pass 0 to + * 'size' and the kernel will set it to the expected size of 'data' that is + * appropriate for querytype (in bytes). + */ +struct kgsl_capabilities { + __u64 data; + __u64 size; + __u32 querytype; +}; + +struct kgsl_shadowprop { + unsigned long gpuaddr; + __kernel_size_t size; + unsigned int flags; /* contains KGSL_FLAGS_ values */ +}; + +struct kgsl_qdss_stm_prop { + __u64 gpuaddr; + __u64 size; +}; + +struct kgsl_qtimer_prop { + __u64 gpuaddr; + __u64 size; +}; + +struct kgsl_version { + unsigned int drv_major; + unsigned int drv_minor; + unsigned int dev_major; + unsigned int dev_minor; +}; + +struct kgsl_sp_generic_mem { + __u64 local; + __u64 pvt; +}; + +struct kgsl_ucode_version { + unsigned int pfp; + unsigned int pm4; +}; + +struct kgsl_gpmu_version { + unsigned int major; + unsigned int minor; + unsigned int features; +}; + +struct kgsl_context_property { + __u64 data; + __u32 size; + __u32 type; + __u32 contextid; +}; + +struct kgsl_context_property_fault { + __s32 faults; + __u32 timestamp; +}; + +struct kgsl_gpu_model { + char gpu_model[32]; +}; + +/* Context property sub types */ +#define KGSL_CONTEXT_PROP_FAULTS 1 + +/* Performance counter groups */ + +#define KGSL_PERFCOUNTER_GROUP_CP 0x0 +#define KGSL_PERFCOUNTER_GROUP_RBBM 0x1 +#define KGSL_PERFCOUNTER_GROUP_PC 0x2 +#define KGSL_PERFCOUNTER_GROUP_VFD 0x3 +#define KGSL_PERFCOUNTER_GROUP_HLSQ 0x4 +#define KGSL_PERFCOUNTER_GROUP_VPC 0x5 +#define KGSL_PERFCOUNTER_GROUP_TSE 0x6 +#define KGSL_PERFCOUNTER_GROUP_RAS 0x7 +#define KGSL_PERFCOUNTER_GROUP_UCHE 0x8 +#define KGSL_PERFCOUNTER_GROUP_TP 0x9 +#define KGSL_PERFCOUNTER_GROUP_SP 0xA +#define KGSL_PERFCOUNTER_GROUP_RB 0xB +#define KGSL_PERFCOUNTER_GROUP_PWR 0xC +#define KGSL_PERFCOUNTER_GROUP_VBIF 0xD +#define KGSL_PERFCOUNTER_GROUP_VBIF_PWR 0xE +#define KGSL_PERFCOUNTER_GROUP_MH 0xF +#define KGSL_PERFCOUNTER_GROUP_PA_SU 0x10 +#define KGSL_PERFCOUNTER_GROUP_SQ 0x11 +#define KGSL_PERFCOUNTER_GROUP_SX 0x12 +#define KGSL_PERFCOUNTER_GROUP_TCF 0x13 +#define KGSL_PERFCOUNTER_GROUP_TCM 0x14 +#define KGSL_PERFCOUNTER_GROUP_TCR 0x15 +#define KGSL_PERFCOUNTER_GROUP_L2 0x16 +#define KGSL_PERFCOUNTER_GROUP_VSC 0x17 +#define KGSL_PERFCOUNTER_GROUP_CCU 0x18 +#define KGSL_PERFCOUNTER_GROUP_LRZ 0x19 +#define KGSL_PERFCOUNTER_GROUP_CMP 0x1A +#define KGSL_PERFCOUNTER_GROUP_ALWAYSON 0x1B +#define KGSL_PERFCOUNTER_GROUP_SP_PWR 0x1C +#define KGSL_PERFCOUNTER_GROUP_TP_PWR 0x1D +#define KGSL_PERFCOUNTER_GROUP_RB_PWR 0x1E +#define KGSL_PERFCOUNTER_GROUP_CCU_PWR 0x1F +#define KGSL_PERFCOUNTER_GROUP_UCHE_PWR 0x20 +#define KGSL_PERFCOUNTER_GROUP_CP_PWR 0x21 +#define KGSL_PERFCOUNTER_GROUP_GPMU_PWR 0x22 +#define KGSL_PERFCOUNTER_GROUP_ALWAYSON_PWR 0x23 +#define KGSL_PERFCOUNTER_GROUP_GLC 0x24 +#define KGSL_PERFCOUNTER_GROUP_FCHE 0x25 +#define KGSL_PERFCOUNTER_GROUP_MHUB 0x26 +#define KGSL_PERFCOUNTER_GROUP_GMU_XOCLK 0x27 +#define KGSL_PERFCOUNTER_GROUP_GMU_GMUCLK 0x28 +#define KGSL_PERFCOUNTER_GROUP_GMU_PERF 0x29 +#define KGSL_PERFCOUNTER_GROUP_SW 0x2a +#define KGSL_PERFCOUNTER_GROUP_UFC 0x2b +#define KGSL_PERFCOUNTER_GROUP_BV_CP 0x2c +#define KGSL_PERFCOUNTER_GROUP_BV_PC 0x2d +#define KGSL_PERFCOUNTER_GROUP_BV_VFD 0x2e +#define KGSL_PERFCOUNTER_GROUP_BV_VPC 0x2f +#define KGSL_PERFCOUNTER_GROUP_BV_TP 0x30 +#define KGSL_PERFCOUNTER_GROUP_BV_SP 0x31 +#define KGSL_PERFCOUNTER_GROUP_BV_UFC 0x32 +#define KGSL_PERFCOUNTER_GROUP_MAX 0x33 + +#define KGSL_PERFCOUNTER_NOT_USED 0xFFFFFFFF +#define KGSL_PERFCOUNTER_BROKEN 0xFFFFFFFE + +/* structure holds list of ibs */ +struct kgsl_ibdesc { + unsigned long gpuaddr; + unsigned long __pad; + __kernel_size_t sizedwords; + unsigned int ctrl; +}; + +/** + * struct kgsl_cmdbatch_profiling_buffer + * @wall_clock_s: Ringbuffer submission time (seconds). + * If KGSL_CMDBATCH_PROFILING_KTIME is set, time is provided + * in kernel clocks, otherwise wall clock time is used. + * @wall_clock_ns: Ringbuffer submission time (nanoseconds). + * If KGSL_CMDBATCH_PROFILING_KTIME is set time is provided + * in kernel clocks, otherwise wall clock time is used. + * @gpu_ticks_queued: GPU ticks at ringbuffer submission + * @gpu_ticks_submitted: GPU ticks when starting cmdbatch execution + * @gpu_ticks_retired: GPU ticks when finishing cmdbatch execution + * + * This structure defines the profiling buffer used to measure cmdbatch + * execution time + */ +struct kgsl_cmdbatch_profiling_buffer { + __u64 wall_clock_s; + __u64 wall_clock_ns; + __u64 gpu_ticks_queued; + __u64 gpu_ticks_submitted; + __u64 gpu_ticks_retired; +}; + +/* ioctls */ +#define KGSL_IOC_TYPE 0x09 + +/* + * get misc info about the GPU + * type should be a value from enum kgsl_property_type + * value points to a structure that varies based on type + * sizebytes is sizeof() that structure + * for KGSL_PROP_DEVICE_INFO, use struct kgsl_devinfo + * this structure contaings hardware versioning info. + * for KGSL_PROP_DEVICE_SHADOW, use struct kgsl_shadowprop + * this is used to find mmap() offset and sizes for mapping + * struct kgsl_memstore into userspace. + */ +struct kgsl_device_getproperty { + unsigned int type; + void __user *value; + __kernel_size_t sizebytes; +}; + +#define IOCTL_KGSL_DEVICE_GETPROPERTY \ + _IOWR(KGSL_IOC_TYPE, 0x2, struct kgsl_device_getproperty) + +/* IOCTL_KGSL_DEVICE_READ (0x3) - removed 03/2012 + */ + +/* block until the GPU has executed past a given timestamp + * timeout is in milliseconds. + */ +struct kgsl_device_waittimestamp { + unsigned int timestamp; + unsigned int timeout; +}; + +#define IOCTL_KGSL_DEVICE_WAITTIMESTAMP \ + _IOW(KGSL_IOC_TYPE, 0x6, struct kgsl_device_waittimestamp) + +struct kgsl_device_waittimestamp_ctxtid { + unsigned int context_id; + unsigned int timestamp; + unsigned int timeout; +}; + +#define IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID \ + _IOW(KGSL_IOC_TYPE, 0x7, struct kgsl_device_waittimestamp_ctxtid) + +/* DEPRECATED: issue indirect commands to the GPU. + * drawctxt_id must have been created with IOCTL_KGSL_DRAWCTXT_CREATE + * ibaddr and sizedwords must specify a subset of a buffer created + * with IOCTL_KGSL_SHAREDMEM_FROM_PMEM + * flags may be a mask of KGSL_CONTEXT_ values + * timestamp is a returned counter value which can be passed to + * other ioctls to determine when the commands have been executed by + * the GPU. + * + * This function is deprecated - consider using IOCTL_KGSL_SUBMIT_COMMANDS + * instead + */ +struct kgsl_ringbuffer_issueibcmds { + unsigned int drawctxt_id; + unsigned long ibdesc_addr; + unsigned int numibs; + unsigned int timestamp; /*output param */ + unsigned int flags; +}; + +#define IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS \ + _IOWR(KGSL_IOC_TYPE, 0x10, struct kgsl_ringbuffer_issueibcmds) + +/* read the most recently executed timestamp value + * type should be a value from enum kgsl_timestamp_type + */ +struct kgsl_cmdstream_readtimestamp { + unsigned int type; + unsigned int timestamp; /*output param */ +}; + +#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_OLD \ + _IOR(KGSL_IOC_TYPE, 0x11, struct kgsl_cmdstream_readtimestamp) + +#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP \ + _IOWR(KGSL_IOC_TYPE, 0x11, struct kgsl_cmdstream_readtimestamp) + +/* free memory when the GPU reaches a given timestamp. + * gpuaddr specify a memory region created by a + * IOCTL_KGSL_SHAREDMEM_FROM_PMEM call + * type should be a value from enum kgsl_timestamp_type + */ +struct kgsl_cmdstream_freememontimestamp { + unsigned long gpuaddr; + unsigned int type; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP \ + _IOW(KGSL_IOC_TYPE, 0x12, struct kgsl_cmdstream_freememontimestamp) + +/* + * Previous versions of this header had incorrectly defined + * IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP as a read-only ioctl instead + * of a write only ioctl. To ensure binary compatibility, the following + * #define will be used to intercept the incorrect ioctl + */ + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_OLD \ + _IOR(KGSL_IOC_TYPE, 0x12, struct kgsl_cmdstream_freememontimestamp) + +/* create a draw context, which is used to preserve GPU state. + * The flags field may contain a mask KGSL_CONTEXT_* values + */ +struct kgsl_drawctxt_create { + unsigned int flags; + unsigned int drawctxt_id; /*output param */ +}; + +#define IOCTL_KGSL_DRAWCTXT_CREATE \ + _IOWR(KGSL_IOC_TYPE, 0x13, struct kgsl_drawctxt_create) + +/* destroy a draw context */ +struct kgsl_drawctxt_destroy { + unsigned int drawctxt_id; +}; + +#define IOCTL_KGSL_DRAWCTXT_DESTROY \ + _IOW(KGSL_IOC_TYPE, 0x14, struct kgsl_drawctxt_destroy) + +/* + * add a block of pmem, fb, ashmem or user allocated address + * into the GPU address space + */ +struct kgsl_map_user_mem { + int fd; + unsigned long gpuaddr; /*output param */ + __kernel_size_t len; + __kernel_size_t offset; + unsigned long hostptr; /*input param */ + enum kgsl_user_mem_type memtype; + unsigned int flags; +}; + +#define IOCTL_KGSL_MAP_USER_MEM \ + _IOWR(KGSL_IOC_TYPE, 0x15, struct kgsl_map_user_mem) + +struct kgsl_cmdstream_readtimestamp_ctxtid { + unsigned int context_id; + unsigned int type; + unsigned int timestamp; /*output param */ +}; + +#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID \ + _IOWR(KGSL_IOC_TYPE, 0x16, struct kgsl_cmdstream_readtimestamp_ctxtid) + +struct kgsl_cmdstream_freememontimestamp_ctxtid { + unsigned int context_id; + unsigned long gpuaddr; + unsigned int type; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID \ + _IOW(KGSL_IOC_TYPE, 0x17, \ + struct kgsl_cmdstream_freememontimestamp_ctxtid) + +/* add a block of pmem or fb into the GPU address space */ +struct kgsl_sharedmem_from_pmem { + int pmem_fd; + unsigned long gpuaddr; /*output param */ + unsigned int len; + unsigned int offset; +}; + +#define IOCTL_KGSL_SHAREDMEM_FROM_PMEM \ + _IOWR(KGSL_IOC_TYPE, 0x20, struct kgsl_sharedmem_from_pmem) + +/* remove memory from the GPU's address space */ +struct kgsl_sharedmem_free { + unsigned long gpuaddr; +}; + +#define IOCTL_KGSL_SHAREDMEM_FREE \ + _IOW(KGSL_IOC_TYPE, 0x21, struct kgsl_sharedmem_free) + +struct kgsl_cff_user_event { + unsigned char cff_opcode; + unsigned int op1; + unsigned int op2; + unsigned int op3; + unsigned int op4; + unsigned int op5; + unsigned int __pad[2]; +}; + +#define IOCTL_KGSL_CFF_USER_EVENT \ + _IOW(KGSL_IOC_TYPE, 0x31, struct kgsl_cff_user_event) + +struct kgsl_gmem_desc { + unsigned int x; + unsigned int y; + unsigned int width; + unsigned int height; + unsigned int pitch; +}; + +struct kgsl_buffer_desc { + void *hostptr; + unsigned long gpuaddr; + int size; + unsigned int format; + unsigned int pitch; + unsigned int enabled; +}; + +struct kgsl_bind_gmem_shadow { + unsigned int drawctxt_id; + struct kgsl_gmem_desc gmem_desc; + unsigned int shadow_x; + unsigned int shadow_y; + struct kgsl_buffer_desc shadow_buffer; + unsigned int buffer_id; +}; + +#define IOCTL_KGSL_DRAWCTXT_BIND_GMEM_SHADOW \ + _IOW(KGSL_IOC_TYPE, 0x22, struct kgsl_bind_gmem_shadow) + +/* add a block of memory into the GPU address space */ + +/* + * IOCTL_KGSL_SHAREDMEM_FROM_VMALLOC deprecated 09/2012 + * use IOCTL_KGSL_GPUMEM_ALLOC instead + */ + +struct kgsl_sharedmem_from_vmalloc { + unsigned long gpuaddr; /*output param */ + unsigned int hostptr; + unsigned int flags; +}; + +#define IOCTL_KGSL_SHAREDMEM_FROM_VMALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x23, struct kgsl_sharedmem_from_vmalloc) + +/* + * This is being deprecated in favor of IOCTL_KGSL_GPUMEM_CACHE_SYNC which + * supports both directions (flush and invalidate). This code will still + * work, but by definition it will do a flush of the cache which might not be + * what you want to have happen on a buffer following a GPU operation. It is + * safer to go with IOCTL_KGSL_GPUMEM_CACHE_SYNC + */ + +#define IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE \ + _IOW(KGSL_IOC_TYPE, 0x24, struct kgsl_sharedmem_free) + +struct kgsl_drawctxt_set_bin_base_offset { + unsigned int drawctxt_id; + unsigned int offset; +}; + +#define IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET \ + _IOW(KGSL_IOC_TYPE, 0x25, struct kgsl_drawctxt_set_bin_base_offset) + +enum kgsl_cmdwindow_type { + KGSL_CMDWINDOW_MIN = 0x00000000, + KGSL_CMDWINDOW_2D = 0x00000000, + KGSL_CMDWINDOW_3D = 0x00000001, /* legacy */ + KGSL_CMDWINDOW_MMU = 0x00000002, + KGSL_CMDWINDOW_ARBITER = 0x000000FF, + KGSL_CMDWINDOW_MAX = 0x000000FF, +}; + +/* write to the command window */ +struct kgsl_cmdwindow_write { + enum kgsl_cmdwindow_type target; + unsigned int addr; + unsigned int data; +}; + +#define IOCTL_KGSL_CMDWINDOW_WRITE \ + _IOW(KGSL_IOC_TYPE, 0x2e, struct kgsl_cmdwindow_write) + +struct kgsl_gpumem_alloc { + unsigned long gpuaddr; /* output param */ + __kernel_size_t size; + unsigned int flags; +}; + +#define IOCTL_KGSL_GPUMEM_ALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x2f, struct kgsl_gpumem_alloc) + +struct kgsl_cff_syncmem { + unsigned long gpuaddr; + __kernel_size_t len; + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_CFF_SYNCMEM \ + _IOW(KGSL_IOC_TYPE, 0x30, struct kgsl_cff_syncmem) + +/* + * A timestamp event allows the user space to register an action following an + * expired timestamp. Note IOCTL_KGSL_TIMESTAMP_EVENT has been redefined to + * _IOWR to support fences which need to return a fd for the priv parameter. + */ + +struct kgsl_timestamp_event { + int type; /* Type of event (see list below) */ + unsigned int timestamp; /* Timestamp to trigger event on */ + unsigned int context_id; /* Context for the timestamp */ + void __user *priv; /* Pointer to the event specific blob */ + __kernel_size_t len; /* Size of the event specific blob */ +}; + +#define IOCTL_KGSL_TIMESTAMP_EVENT_OLD \ + _IOW(KGSL_IOC_TYPE, 0x31, struct kgsl_timestamp_event) + +/* A genlock timestamp event releases an existing lock on timestamp expire */ + +#define KGSL_TIMESTAMP_EVENT_GENLOCK 1 + +struct kgsl_timestamp_event_genlock { + int handle; /* Handle of the genlock lock to release */ +}; + +/* A fence timestamp event releases an existing lock on timestamp expire */ + +#define KGSL_TIMESTAMP_EVENT_FENCE 2 + +struct kgsl_timestamp_event_fence { + int fence_fd; /* Fence to signal */ +}; + +/* + * Set a property within the kernel. Uses the same structure as + * IOCTL_KGSL_GETPROPERTY + */ + +#define IOCTL_KGSL_SETPROPERTY \ + _IOW(KGSL_IOC_TYPE, 0x32, struct kgsl_device_getproperty) + +#define IOCTL_KGSL_TIMESTAMP_EVENT \ + _IOWR(KGSL_IOC_TYPE, 0x33, struct kgsl_timestamp_event) + +/** + * struct kgsl_gpumem_alloc_id - argument to IOCTL_KGSL_GPUMEM_ALLOC_ID + * @id: returned id value for this allocation. + * @flags: mask of KGSL_MEM* values requested and actual flags on return. + * @size: requested size of the allocation and actual size on return. + * @mmapsize: returned size to pass to mmap() which may be larger than 'size' + * @gpuaddr: returned GPU address for the allocation + * + * Allocate memory for access by the GPU. The flags and size fields are echoed + * back by the kernel, so that the caller can know if the request was + * adjusted. + * + * Supported flags: + * KGSL_MEMFLAGS_GPUREADONLY: the GPU will be unable to write to the buffer + * KGSL_MEMTYPE*: usage hint for debugging aid + * KGSL_MEMALIGN*: alignment hint, may be ignored or adjusted by the kernel. + * KGSL_MEMFLAGS_USE_CPU_MAP: If set on call and return, the returned GPU + * address will be 0. Calling mmap() will set the GPU address. + */ +struct kgsl_gpumem_alloc_id { + unsigned int id; + unsigned int flags; + __kernel_size_t size; + __kernel_size_t mmapsize; + unsigned long gpuaddr; +/* private: reserved for future use*/ + unsigned long __pad[2]; +}; + +#define IOCTL_KGSL_GPUMEM_ALLOC_ID \ + _IOWR(KGSL_IOC_TYPE, 0x34, struct kgsl_gpumem_alloc_id) + +/** + * struct kgsl_gpumem_free_id - argument to IOCTL_KGSL_GPUMEM_FREE_ID + * @id: GPU allocation id to free + * + * Free an allocation by id, in case a GPU address has not been assigned or + * is unknown. Freeing an allocation by id with this ioctl or by GPU address + * with IOCTL_KGSL_SHAREDMEM_FREE are equivalent. + */ +struct kgsl_gpumem_free_id { + unsigned int id; +/* private: reserved for future use*/ + unsigned int __pad; +}; + +#define IOCTL_KGSL_GPUMEM_FREE_ID \ + _IOWR(KGSL_IOC_TYPE, 0x35, struct kgsl_gpumem_free_id) + +/** + * struct kgsl_gpumem_get_info - argument to IOCTL_KGSL_GPUMEM_GET_INFO + * @gpuaddr: GPU address to query. Also set on return. + * @id: GPU allocation id to query. Also set on return. + * @flags: returned mask of KGSL_MEM* values. + * @size: returned size of the allocation. + * @mmapsize: returned size to pass mmap(), which may be larger than 'size' + * @useraddr: returned address of the userspace mapping for this buffer + * + * This ioctl allows querying of all user visible attributes of an existing + * allocation, by either the GPU address or the id returned by a previous + * call to IOCTL_KGSL_GPUMEM_ALLOC_ID. Legacy allocation ioctls may not + * return all attributes so this ioctl can be used to look them up if needed. + * + */ +struct kgsl_gpumem_get_info { + unsigned long gpuaddr; + unsigned int id; + unsigned int flags; + __kernel_size_t size; + __kernel_size_t mmapsize; + unsigned long useraddr; +/* private: reserved for future use*/ + unsigned long __pad[4]; +}; + +#define IOCTL_KGSL_GPUMEM_GET_INFO\ + _IOWR(KGSL_IOC_TYPE, 0x36, struct kgsl_gpumem_get_info) + +/** + * struct kgsl_gpumem_sync_cache - argument to IOCTL_KGSL_GPUMEM_SYNC_CACHE + * @gpuaddr: GPU address of the buffer to sync. + * @id: id of the buffer to sync. Either gpuaddr or id is sufficient. + * @op: a mask of KGSL_GPUMEM_CACHE_* values + * @offset: offset into the buffer + * @length: number of bytes starting from offset to perform + * the cache operation on + * + * Sync the L2 cache for memory headed to and from the GPU - this replaces + * KGSL_SHAREDMEM_FLUSH_CACHE since it can handle cache management for both + * directions + * + */ +struct kgsl_gpumem_sync_cache { + unsigned long gpuaddr; + unsigned int id; + unsigned int op; + __kernel_size_t offset; + __kernel_size_t length; +}; + +#define KGSL_GPUMEM_CACHE_CLEAN (1 << 0) +#define KGSL_GPUMEM_CACHE_TO_GPU KGSL_GPUMEM_CACHE_CLEAN + +#define KGSL_GPUMEM_CACHE_INV (1 << 1) +#define KGSL_GPUMEM_CACHE_FROM_GPU KGSL_GPUMEM_CACHE_INV + +#define KGSL_GPUMEM_CACHE_FLUSH \ + (KGSL_GPUMEM_CACHE_CLEAN | KGSL_GPUMEM_CACHE_INV) + +/* Flag to ensure backwards compatibility of kgsl_gpumem_sync_cache struct */ +#define KGSL_GPUMEM_CACHE_RANGE (1 << 31U) + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE \ + _IOW(KGSL_IOC_TYPE, 0x37, struct kgsl_gpumem_sync_cache) + +/** + * struct kgsl_perfcounter_get - argument to IOCTL_KGSL_PERFCOUNTER_GET + * @groupid: Performance counter group ID + * @countable: Countable to select within the group + * @offset: Return offset of the reserved LO counter + * @offset_hi: Return offset of the reserved HI counter + * + * Get an available performance counter from a specified groupid. The offset + * of the performance counter will be returned after successfully assigning + * the countable to the counter for the specified group. An error will be + * returned and an offset of 0 if the groupid is invalid or there are no + * more counters left. After successfully getting a perfcounter, the user + * must call kgsl_perfcounter_put(groupid, contable) when finished with + * the perfcounter to clear up perfcounter resources. + * + */ +struct kgsl_perfcounter_get { + unsigned int groupid; + unsigned int countable; + unsigned int offset; + unsigned int offset_hi; +/* private: reserved for future use */ + unsigned int __pad; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_GET \ + _IOWR(KGSL_IOC_TYPE, 0x38, struct kgsl_perfcounter_get) + +/** + * struct kgsl_perfcounter_put - argument to IOCTL_KGSL_PERFCOUNTER_PUT + * @groupid: Performance counter group ID + * @countable: Countable to release within the group + * + * Put an allocated performance counter to allow others to have access to the + * resource that was previously taken. This is only to be called after + * successfully getting a performance counter from kgsl_perfcounter_get(). + * + */ +struct kgsl_perfcounter_put { + unsigned int groupid; + unsigned int countable; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_PUT \ + _IOW(KGSL_IOC_TYPE, 0x39, struct kgsl_perfcounter_put) + +/** + * struct kgsl_perfcounter_query - argument to IOCTL_KGSL_PERFCOUNTER_QUERY + * @groupid: Performance counter group ID + * @countable: Return active countables array + * @size: Size of active countables array + * @max_counters: Return total number counters for the group ID + * + * Query the available performance counters given a groupid. The array + * *countables is used to return the current active countables in counters. + * The size of the array is passed in so the kernel will only write at most + * size or counter->size for the group id. The total number of available + * counters for the group ID is returned in max_counters. + * If the array or size passed in are invalid, then only the maximum number + * of counters will be returned, no data will be written to *countables. + * If the groupid is invalid an error code will be returned. + * + */ +struct kgsl_perfcounter_query { + unsigned int groupid; + /* Array to return the current countable for up to size counters */ + unsigned int __user *countables; + unsigned int count; + unsigned int max_counters; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_QUERY \ + _IOWR(KGSL_IOC_TYPE, 0x3A, struct kgsl_perfcounter_query) + +/** + * struct kgsl_perfcounter_query - argument to IOCTL_KGSL_PERFCOUNTER_QUERY + * @groupid: Performance counter group IDs + * @countable: Performance counter countable IDs + * @value: Return performance counter reads + * @size: Size of all arrays (groupid/countable pair and return value) + * + * Read in the current value of a performance counter given by the groupid + * and countable. + * + */ + +struct kgsl_perfcounter_read_group { + unsigned int groupid; + unsigned int countable; + unsigned long long value; +}; + +struct kgsl_perfcounter_read { + struct kgsl_perfcounter_read_group __user *reads; + unsigned int count; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_READ \ + _IOWR(KGSL_IOC_TYPE, 0x3B, struct kgsl_perfcounter_read) +/* + * struct kgsl_gpumem_sync_cache_bulk - argument to + * IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK + * @id_list: list of GPU buffer ids of the buffers to sync + * @count: number of GPU buffer ids in id_list + * @op: a mask of KGSL_GPUMEM_CACHE_* values + * + * Sync the cache for memory headed to and from the GPU. Certain + * optimizations can be made on the cache operation based on the total + * size of the working set of memory to be managed. + */ +struct kgsl_gpumem_sync_cache_bulk { + unsigned int __user *id_list; + unsigned int count; + unsigned int op; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK \ + _IOWR(KGSL_IOC_TYPE, 0x3C, struct kgsl_gpumem_sync_cache_bulk) + +/* + * struct kgsl_cmd_syncpoint_timestamp + * @context_id: ID of a KGSL context + * @timestamp: GPU timestamp + * + * This structure defines a syncpoint comprising a context/timestamp pair. A + * list of these may be passed by IOCTL_KGSL_SUBMIT_COMMANDS to define + * dependencies that must be met before the command can be submitted to the + * hardware + */ +struct kgsl_cmd_syncpoint_timestamp { + unsigned int context_id; + unsigned int timestamp; +}; + +struct kgsl_cmd_syncpoint_fence { + int fd; +}; + +/* + * struct kgsl_cmd_syncpoint_timeline + * @timelines: Address of an array of &struct kgsl_timeline_val + * @count: Number of entries in @timelines + * @timelines_size: Size of each entry in @timelines + * + * Define a syncpoint for a number of timelines. This syncpoint will + * be satisfied when all of the specified timelines are signaled. + */ +struct kgsl_cmd_syncpoint_timeline { + __u64 timelines; + __u32 count; + __u32 timelines_size; +}; + +/** + * struct kgsl_cmd_syncpoint - Define a sync point for a command batch + * @type: type of sync point defined here + * @priv: Pointer to the type specific buffer + * @size: Size of the type specific buffer + * + * This structure contains pointers defining a specific command sync point. + * The pointer and size should point to a type appropriate structure. + */ +struct kgsl_cmd_syncpoint { + int type; + void __user *priv; + __kernel_size_t size; +}; + +/* Flag to indicate that the cmdlist may contain memlists */ +#define KGSL_IBDESC_MEMLIST 0x1 + +/* Flag to point out the cmdbatch profiling buffer in the memlist */ +#define KGSL_IBDESC_PROFILING_BUFFER 0x2 + +/** + * struct kgsl_submit_commands - Argument to IOCTL_KGSL_SUBMIT_COMMANDS + * @context_id: KGSL context ID that owns the commands + * @flags: + * @cmdlist: User pointer to a list of kgsl_ibdesc structures + * @numcmds: Number of commands listed in cmdlist + * @synclist: User pointer to a list of kgsl_cmd_syncpoint structures + * @numsyncs: Number of sync points listed in synclist + * @timestamp: On entry the a user defined timestamp, on exist the timestamp + * assigned to the command batch + * + * This structure specifies a command to send to the GPU hardware. This is + * similar to kgsl_issueibcmds expect that it doesn't support the legacy way to + * submit IB lists and it adds sync points to block the IB until the + * dependencies are satisified. This entry point is the new and preferred way + * to submit commands to the GPU. The memory list can be used to specify all + * memory that is referrenced in the current set of commands. + */ + +struct kgsl_submit_commands { + unsigned int context_id; + unsigned int flags; + struct kgsl_ibdesc __user *cmdlist; + unsigned int numcmds; + struct kgsl_cmd_syncpoint __user *synclist; + unsigned int numsyncs; + unsigned int timestamp; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_SUBMIT_COMMANDS \ + _IOWR(KGSL_IOC_TYPE, 0x3D, struct kgsl_submit_commands) + +/** + * struct kgsl_device_constraint - device constraint argument + * @context_id: KGSL context ID + * @type: type of constraint i.e pwrlevel/none + * @data: constraint data + * @size: size of the constraint data + */ +struct kgsl_device_constraint { + unsigned int type; + unsigned int context_id; + void __user *data; + __kernel_size_t size; +}; + +/* Constraint Type*/ +#define KGSL_CONSTRAINT_NONE 0 +#define KGSL_CONSTRAINT_PWRLEVEL 1 + +/* L3 constraint Type */ +#define KGSL_CONSTRAINT_L3_NONE 2 +#define KGSL_CONSTRAINT_L3_PWRLEVEL 3 + +/* PWRLEVEL constraint level*/ +/* set to min frequency */ +#define KGSL_CONSTRAINT_PWR_MIN 0 +/* set to max frequency */ +#define KGSL_CONSTRAINT_PWR_MAX 1 + +struct kgsl_device_constraint_pwrlevel { + unsigned int level; +}; + +/** + * struct kgsl_syncsource_create - Argument to IOCTL_KGSL_SYNCSOURCE_CREATE + * @id: returned id for the syncsource that was created. + * + * This ioctl creates a userspace sync timeline. + */ + +struct kgsl_syncsource_create { + unsigned int id; +/* private: reserved for future use */ + unsigned int __pad[3]; +}; + +#define IOCTL_KGSL_SYNCSOURCE_CREATE \ + _IOWR(KGSL_IOC_TYPE, 0x40, struct kgsl_syncsource_create) + +/** + * struct kgsl_syncsource_destroy - Argument to IOCTL_KGSL_SYNCSOURCE_DESTROY + * @id: syncsource id to destroy + * + * This ioctl creates a userspace sync timeline. + */ + +struct kgsl_syncsource_destroy { + unsigned int id; +/* private: reserved for future use */ + unsigned int __pad[3]; +}; + +#define IOCTL_KGSL_SYNCSOURCE_DESTROY \ + _IOWR(KGSL_IOC_TYPE, 0x41, struct kgsl_syncsource_destroy) + +/** + * struct kgsl_syncsource_create_fence - Argument to + * IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE + * @id: syncsource id + * @fence_fd: returned sync_fence fd + * + * Create a fence that may be signaled by userspace by calling + * IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE. There are no order dependencies between + * these fences. + */ +struct kgsl_syncsource_create_fence { + unsigned int id; + int fence_fd; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +/** + * struct kgsl_syncsource_signal_fence - Argument to + * IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE + * @id: syncsource id + * @fence_fd: sync_fence fd to signal + * + * Signal a fence that was created by a IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE + * call using the same syncsource id. This allows a fence to be shared + * to other processes but only signaled by the process owning the fd + * used to create the fence. + */ +#define IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE \ + _IOWR(KGSL_IOC_TYPE, 0x42, struct kgsl_syncsource_create_fence) + +struct kgsl_syncsource_signal_fence { + unsigned int id; + int fence_fd; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE \ + _IOWR(KGSL_IOC_TYPE, 0x43, struct kgsl_syncsource_signal_fence) + +/** + * struct kgsl_cff_sync_gpuobj - Argument to IOCTL_KGSL_CFF_SYNC_GPUOBJ + * @offset: Offset into the GPU object to sync + * @length: Number of bytes to sync + * @id: ID of the GPU object to sync + */ +struct kgsl_cff_sync_gpuobj { + __u64 offset; + __u64 length; + unsigned int id; +}; + +#define IOCTL_KGSL_CFF_SYNC_GPUOBJ \ + _IOW(KGSL_IOC_TYPE, 0x44, struct kgsl_cff_sync_gpuobj) + +/** + * struct kgsl_gpuobj_alloc - Argument to IOCTL_KGSL_GPUOBJ_ALLOC + * @size: Size in bytes of the object to allocate + * @flags: mask of KGSL_MEMFLAG_* bits + * @va_len: Size in bytes of the virtual region to allocate + * @mmapsize: Returns the mmap() size of the object + * @id: Returns the GPU object ID of the new object + * @metadata_len: Length of the metdata to copy from the user + * @metadata: Pointer to the user specified metadata to store for the object + */ +struct kgsl_gpuobj_alloc { + __u64 size; + __u64 flags; + __u64 va_len; + __u64 mmapsize; + unsigned int id; + unsigned int metadata_len; + __u64 metadata; +}; + +/* Let the user know that this header supports the gpuobj metadata */ +#define KGSL_GPUOBJ_ALLOC_METADATA_MAX 64 + +#define IOCTL_KGSL_GPUOBJ_ALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x45, struct kgsl_gpuobj_alloc) + +/** + * struct kgsl_gpuobj_free - Argument to IOCTL_KGLS_GPUOBJ_FREE + * @flags: Mask of: KGSL_GUPOBJ_FREE_ON_EVENT + * @priv: Pointer to the private object if KGSL_GPUOBJ_FREE_ON_EVENT is + * specified + * @id: ID of the GPU object to free + * @type: If KGSL_GPUOBJ_FREE_ON_EVENT is specified, the type of asynchronous + * event to free on + * @len: Length of the data passed in priv + */ +struct kgsl_gpuobj_free { + __u64 flags; + __u64 __user priv; + unsigned int id; + unsigned int type; + unsigned int len; +}; + +#define KGSL_GPUOBJ_FREE_ON_EVENT 1 + +#define KGSL_GPU_EVENT_TIMESTAMP 1 +#define KGSL_GPU_EVENT_FENCE 2 + +/** + * struct kgsl_gpu_event_timestamp - Specifies a timestamp event to free a GPU + * object on + * @context_id: ID of the timestamp event to wait for + * @timestamp: Timestamp of the timestamp event to wait for + */ +struct kgsl_gpu_event_timestamp { + unsigned int context_id; + unsigned int timestamp; +}; + +/** + * struct kgsl_gpu_event_fence - Specifies a fence ID to to free a GPU object on + * @fd: File descriptor for the fence + */ +struct kgsl_gpu_event_fence { + int fd; +}; + +#define IOCTL_KGSL_GPUOBJ_FREE \ + _IOW(KGSL_IOC_TYPE, 0x46, struct kgsl_gpuobj_free) + +/** + * struct kgsl_gpuobj_info - argument to IOCTL_KGSL_GPUOBJ_INFO + * @gpuaddr: GPU address of the object + * @flags: Current flags for the object + * @size: Size of the object + * @va_len: VA size of the object + * @va_addr: Virtual address of the object (if it is mapped) + * id - GPU object ID of the object to query + */ +struct kgsl_gpuobj_info { + __u64 gpuaddr; + __u64 flags; + __u64 size; + __u64 va_len; + __u64 va_addr; + unsigned int id; +}; + +#define IOCTL_KGSL_GPUOBJ_INFO \ + _IOWR(KGSL_IOC_TYPE, 0x47, struct kgsl_gpuobj_info) + +/** + * struct kgsl_gpuobj_import - argument to IOCTL_KGSL_GPUOBJ_IMPORT + * @priv: Pointer to the private data for the import type + * @priv_len: Length of the private data + * @flags: Mask of KGSL_MEMFLAG_ flags + * @type: Type of the import (KGSL_USER_MEM_TYPE_*) + * @id: Returns the ID of the new GPU object + */ +struct kgsl_gpuobj_import { + __u64 __user priv; + __u64 priv_len; + __u64 flags; + unsigned int type; + unsigned int id; +}; + +/** + * struct kgsl_gpuobj_import_dma_buf - import a dmabuf object + * @fd: File descriptor for the dma-buf object + */ +struct kgsl_gpuobj_import_dma_buf { + int fd; +}; + +/** + * struct kgsl_gpuobj_import_useraddr - import an object based on a useraddr + * @virtaddr: Virtual address of the object to import + */ +struct kgsl_gpuobj_import_useraddr { + __u64 virtaddr; +}; + +#define IOCTL_KGSL_GPUOBJ_IMPORT \ + _IOWR(KGSL_IOC_TYPE, 0x48, struct kgsl_gpuobj_import) + +/** + * struct kgsl_gpuobj_sync_obj - Individual GPU object to sync + * @offset: Offset within the GPU object to sync + * @length: Number of bytes to sync + * @id: ID of the GPU object to sync + * @op: Cache operation to execute + */ + +struct kgsl_gpuobj_sync_obj { + __u64 offset; + __u64 length; + unsigned int id; + unsigned int op; +}; + +/** + * struct kgsl_gpuobj_sync - Argument for IOCTL_KGSL_GPUOBJ_SYNC + * @objs: Pointer to an array of kgsl_gpuobj_sync_obj structs + * @obj_len: Size of each item in the array + * @count: Number of items in the array + */ + +struct kgsl_gpuobj_sync { + __u64 __user objs; + unsigned int obj_len; + unsigned int count; +}; + +#define IOCTL_KGSL_GPUOBJ_SYNC \ + _IOW(KGSL_IOC_TYPE, 0x49, struct kgsl_gpuobj_sync) + +/** + * struct kgsl_command_object - GPU command object + * @offset: GPU address offset of the object + * @gpuaddr: GPU address of the object + * @size: Size of the object + * @flags: Current flags for the object + * @id - GPU command object ID + */ +struct kgsl_command_object { + __u64 offset; + __u64 gpuaddr; + __u64 size; + unsigned int flags; + unsigned int id; +}; + +/** + * struct kgsl_command_syncpoint - GPU syncpoint object + * @priv: Pointer to the type specific buffer + * @size: Size of the type specific buffer + * @type: type of sync point defined here + */ +struct kgsl_command_syncpoint { + __u64 __user priv; + __u64 size; + unsigned int type; +}; + +/** + * struct kgsl_command_object - Argument for IOCTL_KGSL_GPU_COMMAND + * @flags: Current flags for the object + * @cmdlist: List of kgsl_command_objects for submission + * @cmd_size: Size of kgsl_command_objects structure + * @numcmds: Number of kgsl_command_objects in command list + * @objlist: List of kgsl_command_objects for tracking + * @obj_size: Size of kgsl_command_objects structure + * @numobjs: Number of kgsl_command_objects in object list + * @synclist: List of kgsl_command_syncpoints + * @sync_size: Size of kgsl_command_syncpoint structure + * @numsyncs: Number of kgsl_command_syncpoints in syncpoint list + * @context_id: Context ID submittin ghte kgsl_gpu_command + * @timestamp: Timestamp for the submitted commands + */ +struct kgsl_gpu_command { + __u64 flags; + __u64 __user cmdlist; + unsigned int cmdsize; + unsigned int numcmds; + __u64 __user objlist; + unsigned int objsize; + unsigned int numobjs; + __u64 __user synclist; + unsigned int syncsize; + unsigned int numsyncs; + unsigned int context_id; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_GPU_COMMAND \ + _IOWR(KGSL_IOC_TYPE, 0x4A, struct kgsl_gpu_command) + +/** + * struct kgsl_preemption_counters_query - argument to + * IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY + * @counters: Return preemption counters array + * @size_user: Size allocated by userspace + * @size_priority_level: Size of preemption counters for each + * priority level + * @max_priority_level: Return max number of priority levels + * + * Query the available preemption counters. The array counters + * is used to return preemption counters. The size of the array + * is passed in so the kernel will only write at most size_user + * or max available preemption counters. The total number of + * preemption counters is returned in max_priority_level. If the + * array or size passed in are invalid, then an error is + * returned back. + */ +struct kgsl_preemption_counters_query { + __u64 __user counters; + unsigned int size_user; + unsigned int size_priority_level; + unsigned int max_priority_level; +}; + +#define IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY \ + _IOWR(KGSL_IOC_TYPE, 0x4B, struct kgsl_preemption_counters_query) + +/** + * struct kgsl_gpuobj_set_info - argument for IOCTL_KGSL_GPUOBJ_SET_INFO + * @flags: Flags to indicate which parameters to change + * @metadata: If KGSL_GPUOBJ_SET_INFO_METADATA is set, a pointer to the new + * metadata + * @id: GPU memory object ID to change + * @metadata_len: If KGSL_GPUOBJ_SET_INFO_METADATA is set, the length of the + * new metadata string + * @type: If KGSL_GPUOBJ_SET_INFO_TYPE is set, the new type of the memory object + */ + +#define KGSL_GPUOBJ_SET_INFO_METADATA (1 << 0) +#define KGSL_GPUOBJ_SET_INFO_TYPE (1 << 1) + +struct kgsl_gpuobj_set_info { + __u64 flags; + __u64 metadata; + unsigned int id; + unsigned int metadata_len; + unsigned int type; +}; + +#define IOCTL_KGSL_GPUOBJ_SET_INFO \ + _IOW(KGSL_IOC_TYPE, 0x4C, struct kgsl_gpuobj_set_info) + +/** + * struct kgsl_sparse_phys_alloc - Argument for IOCTL_KGSL_SPARSE_PHYS_ALLOC + * @size: Size in bytes to back + * @pagesize: Pagesize alignment required + * @flags: Flags for this allocation + * @id: Returned ID for this allocation + */ +struct kgsl_sparse_phys_alloc { + __u64 size; + __u64 pagesize; + __u64 flags; + unsigned int id; +}; + +#define IOCTL_KGSL_SPARSE_PHYS_ALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x50, struct kgsl_sparse_phys_alloc) + +/** + * struct kgsl_sparse_phys_free - Argument for IOCTL_KGSL_SPARSE_PHYS_FREE + * @id: ID to free + */ +struct kgsl_sparse_phys_free { + unsigned int id; +}; + +#define IOCTL_KGSL_SPARSE_PHYS_FREE \ + _IOW(KGSL_IOC_TYPE, 0x51, struct kgsl_sparse_phys_free) + +/** + * struct kgsl_sparse_virt_alloc - Argument for IOCTL_KGSL_SPARSE_VIRT_ALLOC + * @size: Size in bytes to reserve + * @pagesize: Pagesize alignment required + * @flags: Flags for this allocation + * @id: Returned ID for this allocation + * @gpuaddr: Returned GPU address for this allocation + */ +struct kgsl_sparse_virt_alloc { + __u64 size; + __u64 pagesize; + __u64 flags; + __u64 gpuaddr; + unsigned int id; +}; + +#define IOCTL_KGSL_SPARSE_VIRT_ALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x52, struct kgsl_sparse_virt_alloc) + +/** + * struct kgsl_sparse_virt_free - Argument for IOCTL_KGSL_SPARSE_VIRT_FREE + * @id: ID to free + */ +struct kgsl_sparse_virt_free { + unsigned int id; +}; + +#define IOCTL_KGSL_SPARSE_VIRT_FREE \ + _IOW(KGSL_IOC_TYPE, 0x53, struct kgsl_sparse_virt_free) + +/** + * struct kgsl_sparse_binding_object - Argument for kgsl_sparse_bind + * @virtoffset: Offset into the virtual ID + * @physoffset: Offset into the physical ID (bind only) + * @size: Size in bytes to reserve + * @flags: Flags for this kgsl_sparse_binding_object + * @id: Physical ID to bind (bind only) + */ +struct kgsl_sparse_binding_object { + __u64 virtoffset; + __u64 physoffset; + __u64 size; + __u64 flags; + unsigned int id; +}; + +/** + * struct kgsl_sparse_bind - Argument for IOCTL_KGSL_SPARSE_BIND + * @list: List of kgsl_sparse_bind_objects to bind/unbind + * @id: Virtual ID to bind/unbind + * @size: Size of kgsl_sparse_bind_object + * @count: Number of elements in list + * + */ +struct kgsl_sparse_bind { + __u64 __user list; + unsigned int id; + unsigned int size; + unsigned int count; +}; + +#define IOCTL_KGSL_SPARSE_BIND \ + _IOW(KGSL_IOC_TYPE, 0x54, struct kgsl_sparse_bind) + +/** + * struct kgsl_gpu_sparse_command - Argument for + * IOCTL_KGSL_GPU_SPARSE_COMMAND + * @flags: Current flags for the object + * @sparselist: List of kgsl_sparse_binding_object to bind/unbind + * @synclist: List of kgsl_command_syncpoints + * @sparsesize: Size of kgsl_sparse_binding_object + * @numsparse: Number of elements in list + * @sync_size: Size of kgsl_command_syncpoint structure + * @numsyncs: Number of kgsl_command_syncpoints in syncpoint list + * @context_id: Context ID submitting the kgsl_gpu_command + * @timestamp: Timestamp for the submitted commands + * @id: Virtual ID to bind/unbind + */ +struct kgsl_gpu_sparse_command { + __u64 flags; + __u64 __user sparselist; + __u64 __user synclist; + unsigned int sparsesize; + unsigned int numsparse; + unsigned int syncsize; + unsigned int numsyncs; + unsigned int context_id; + unsigned int timestamp; + unsigned int id; +}; + +#define IOCTL_KGSL_GPU_SPARSE_COMMAND \ + _IOWR(KGSL_IOC_TYPE, 0x55, struct kgsl_gpu_sparse_command) + +#define KGSL_GPUMEM_RANGE_OP_BIND 1 +#define KGSL_GPUMEM_RANGE_OP_UNBIND 2 + +/** + * struct kgsl_gpumem_bind_range - specifies a bind operation for a virtual + * buffer object + * @child_offset: Offset to the start of memory within the child buffer object + * (not used for KGSL_GPUMEM_RANGE_OP_UNBIND operations) + * @target_offset: GPU address offset within the target VBO + * @length: Amount of memory to map/unmap (in bytes) + * @child_id: The GPU buffer ID for the child object to map/unmap in the VBO + * @op: One of KGSL_GPUMEM_RANGE_OP_BIND or KGSL_GPUMEM_RANGE_OP_UNBIND + * + * This defines a specific bind operation to a virtual buffer object specified + * in &struct kgsl_gpumem_bind_ranges. When @op is KGSL_GPUMEM_RANGE_OP_BIND the + * physical memory starting at @child_offset in the memory object identified by + * @child_id will be mapped into the target virtual buffer object starting at + * @offset for @length bytes. + * + * When @op is KGSL_GPUMEM_RANGE_OP_UNBIND any entries in the target virtual + * buffer object between @offset and @length that belong to @child_id will be + * removed. + */ +struct kgsl_gpumem_bind_range { + __u64 child_offset; + __u64 target_offset; + __u64 length; + __u32 child_id; + __u32 op; +}; + +#define KGSL_GPUMEM_BIND_ASYNC (1UL << 0) +#define KGSL_GPUMEM_BIND_FENCE_OUT (1UL << 1) + +/** + * struct kgsl_gpumem_bind_ranges - Argument to IOCTL_KGSL_GPUMEM_BIND_RANGES to + * either map or unmap a child buffer object into a virtual buffer object. + * @ranges: User memory pointer to an array of range operations of type &struct + * kgsl_gpumem_bind_range + * @ranges_nents: Number of entries in @ranges + * @ranges_size: Size of each entry in @ranges in bytes + * @id: GPU buffer object identifier for the target virtual buffer object + * @flags: Bitmap of KGSL_GPUMEM_BIND_ASYNC and KGSL_GPUMEM_BIND_FENCE_OUT + * @fence_id: If KGSL_GPUMEM_BIND_FENCE_OUT is set in @flags contains the + * identifier for the sync fence that will be signaled after the operation + * completes + * + * Describes a number of range operations to perform on a virtual buffer object + * identified by @id. Ranges should be a __u64 representation of an array of + * &struct kgsl_gpumem_bind_range entries. @ranges_nents will contain the number + * of entries in the array, and @ranges_size will contain the size of each entry + * in the array. If KGSL_GPUMEM_BIND_ASYNC is set the operation will be + * performed asynchronously and the operation will immediately return to the + * user. Otherwise the calling context will block until the operation has + * completed. + * + * If KGSL_GPUMEM_BIND_ASYNC and KGSL_GPUMEM_BIND_FENCE_OUT are both set a sync + * fence will be created and returned in @fence_id. The fence will be signaled + * when the bind operation has completed. + */ +struct kgsl_gpumem_bind_ranges { + __u64 ranges; + __u32 ranges_nents; + __u32 ranges_size; + __u32 id; + __u32 flags; + int fence_id; + /* private: 64 bit compatibility */ + __u32 padding; +}; + +#define IOCTL_KGSL_GPUMEM_BIND_RANGES \ + _IOWR(KGSL_IOC_TYPE, 0x56, struct kgsl_gpumem_bind_ranges) + +#define KGSL_GPU_AUX_COMMAND_BIND (1 << 0) +#define KGSL_GPU_AUX_COMMAND_TIMELINE (1 << 1) +/* Reuse the same flag that GPU COMMAND uses */ +#define KGSL_GPU_AUX_COMMAND_SYNC KGSL_CMDBATCH_SYNC + +/** + * struct kgsl_gpu_aux_command_bind - Descriptor for a GPU AUX bind command + * @rangeslist: Pointer to a list of &struct kgsl_gpumem_bind_range items + * @numranges Number of entries in @rangeslist + * @rangesize: Size of each entry in @rangeslist + * @target: The GPU memory ID for the target virtual buffer object + * + * Describe a GPU AUX command to bind ranges in a virtual buffer object. + * @rangeslist points to a &struct kgsl_gpumem_bind_ranges which is the same + * struct that is used by IOCTl_KGSL_GPUMEM_BIND_RANGES. @numrages is the size + * of the array in @rangeslist and @rangesize is the size of each entity in + * @rangeslist. @target points to the GPU ID for the target VBO object. + */ +struct kgsl_gpu_aux_command_bind { + __u64 rangeslist; + __u64 numranges; + __u64 rangesize; + __u32 target; +/* private: Padding for 64 bit compatibility */ + __u32 padding; +}; + +/** + * struct kgsl_aux_command_generic - Container for an AUX command + * @priv: Pointer to the type specific buffer + * @size: Size of the type specific buffer + * @type: type of sync point defined here + * + * Describes a generic container for GPU aux commands. @priv is a user pointer + * to the command struct matching @type of size @size. + */ +struct kgsl_gpu_aux_command_generic { + __u64 priv; + __u64 size; + __u32 type; +/* private: Padding for 64 bit compatibility */ + __u32 padding; +}; + +/** + * struct kgsl_gpu_aux_command - Argument for IOCTL_KGSL_GPU_AUX_COMMAND + * @flags: flags for the object + * @cmdlist: List of &struct kgsl_gpu_aux_command_generic objects + * @cmd_size: Size of each entry in @cmdlist + * @numcmds: Number of entries in @cmdlist + * @synclist: List of &struct kgsl_command_syncpoint objects + * @syncsize: Size of each entry in @synclist + * @numsyncs: Number of entries in @synclist + * @context_id: ID of the context submtting the aux command + * @timestamp: Timestamp for the command submission + * + * Describe a GPU auxiliary command. Auxiliary commands are tasks that are not + * performed on hardware but can be queued like normal GPU commands. Like GPU + * commands AUX commands are assigned a timestamp and processed in order in the + * queue. They can also have standard sync objects attached. The only + * difference is that AUX commands usually perform some sort of administrative + * task in the CPU and are retired in the dispatcher. + * + * For bind operations flags must have one of the KGSL_GPU_AUX_COMMAND_* flags + * set. If sync objects are attached KGSL_GPU_AUX_COMMAND_SYNC must be set. + * @cmdlist points to an array of &struct kgsl_gpu_aux_command_generic structs + * which in turn will have a pointer to a specific command type. + * @numcmds is the number of commands in the list and @cmdsize is the size + * of each entity in @cmdlist. + * + * If KGSL_GPU_AUX_COMMAND_SYNC is specified @synclist will point to an array of + * &struct kgsl_command_syncpoint items in the same fashion as a GPU hardware + * command. @numsyncs and @syncsize describe the list. + * + * @context_id is the context that is submitting the command and @timestamp + * contains the timestamp for the operation. + */ +struct kgsl_gpu_aux_command { + __u64 flags; + __u64 cmdlist; + __u32 cmdsize; + __u32 numcmds; + __u64 synclist; + __u32 syncsize; + __u32 numsyncs; + __u32 context_id; + __u32 timestamp; +}; + +#define IOCTL_KGSL_GPU_AUX_COMMAND \ + _IOWR(KGSL_IOC_TYPE, 0x57, struct kgsl_gpu_aux_command) + +/** + * struct kgsl_timeline_create - Argument for IOCTL_KGSL_TIMELINE_CREATE + * @seqno: Initial sequence number for the timeline + * @id: Timeline identifier [out] + * + * Create a new semaphore timeline and return the identifier in @id. + * The identifier is global for the device and can be used to + * identify the timeline in all subsequent commands. + */ +struct kgsl_timeline_create { + __u64 seqno; + __u32 id; +/* private: padding for 64 bit compatibility */ + __u32 padding; +}; + +#define IOCTL_KGSL_TIMELINE_CREATE \ + _IOWR(KGSL_IOC_TYPE, 0x58, struct kgsl_timeline_create) + +/** + * struct kgsl_timeline_val - A container to store a timeline/sequence number + * pair. + * @seqno: Sequence number to signal/query + * @timeline: The timeline identifier to signal/query + * + * A container to store a timeline/seqno pair used by the query and signal + * ioctls. + */ +struct kgsl_timeline_val { + __u64 seqno; + __u32 timeline; +/* private: padding for 64 bit compatibility */ + __u32 padding; +}; + +#define KGSL_TIMELINE_WAIT_ALL 1 +#define KGSL_TIMELINE_WAIT_ANY 2 + +/** + * struct kgsl_timeline_wait - Argument for IOCTL_KGSL_TIMELINE_WAIT + * @tv_sec: Number of seconds to wait for the signal + * @tv_nsec: Number of nanoseconds to wait for the signal + * @timelines: Address of an array of &struct kgsl_timeline_val entries + * @count: Number of entries in @timeline + * @timelines_size: Size of each entry in @timelines + * @flags: One of KGSL_TIMELINE_WAIT_ALL or KGSL_TIMELINE_WAIT_ANY + * + * Wait for the timelines listed in @timelines to be signaled. If @flags is + * equal to KGSL_TIMELINE_WAIT_ALL then wait for all timelines or if + * KGSL_TIMELINE_WAIT_ANY is specified then wait for any of the timelines to + * signal. @tv_sec and @tv_nsec indicates the number of seconds and nanoseconds + * that the process should be blocked waiting for the signal. + */ +struct kgsl_timeline_wait { + __s64 tv_sec; + __s64 tv_nsec; + __u64 timelines; + __u32 count; + __u32 timelines_size; + __u32 flags; +/* private: padding for 64 bit compatibility */ + __u32 padding; +}; + +#define IOCTL_KGSL_TIMELINE_WAIT \ + _IOW(KGSL_IOC_TYPE, 0x59, struct kgsl_timeline_wait) + +#define IOCTL_KGSL_TIMELINE_QUERY \ + _IOWR(KGSL_IOC_TYPE, 0x5A, struct kgsl_timeline_val) + +/** + * struct kgsl_timeline_signal - argument for IOCTL_KGSL_TIMELINE_SIGNAL + * @timelines: Address of an array of &struct kgsl_timeline_val entries + * @count: Number of entries in @timelines + * @timelines_size: Size of each entry in @timelines + * + * Signal an array of timelines of type @struct kgsl_timeline_val. + */ +struct kgsl_timeline_signal { + __u64 timelines; + __u32 count; + __u32 timelines_size; +}; + +#define IOCTL_KGSL_TIMELINE_SIGNAL \ + _IOW(KGSL_IOC_TYPE, 0x5B, struct kgsl_timeline_signal) + +/** + * struct kgsl_timeline_fence_get - argument for IOCTL_KGSL_TIMELINE_FENCE_GET + * @seqno: Sequence number for the fence + * @timeline: Timeline to create the fence on + * @handle: Contains the fence fd for a successful operation [out] + * + * Create a sync file descriptor for the seqnum on the timeline and return it in + * @handle. Can be polled and queried just like any other sync file descriptor + */ +struct kgsl_timeline_fence_get { + __u64 seqno; + __u32 timeline; + int handle; +}; + +#define IOCTL_KGSL_TIMELINE_FENCE_GET \ + _IOWR(KGSL_IOC_TYPE, 0x5C, struct kgsl_timeline_fence_get) +/** + * IOCTL_KGSL_TIMELINE_DESTROY takes a u32 identifier for the timeline to + * destroy + */ +#define IOCTL_KGSL_TIMELINE_DESTROY _IOW(KGSL_IOC_TYPE, 0x5D, __u32) + +/** + * struct kgsl_gpu_aux_command_timeline - An aux command for timeline signals + * @timelines: An array of &struct kgsl_timeline_val elements + * @count: The number of entries in @timelines + * @timelines_size: The size of each element in @timelines + * + * An aux command for timeline signals that can be pointed to by + * &struct kgsl_aux_command_generic when the type is + * KGSL_GPU_AUX_COMMAND_TIMELINE. + */ +struct kgsl_gpu_aux_command_timeline { + __u64 timelines; + __u32 count; + __u32 timelines_size; +}; + +#endif /* _UAPI_MSM_KGSL_H */ diff --git a/kgsl.c b/kgsl.c new file mode 100644 index 0000000000..c59ccff831 --- /dev/null +++ b/kgsl.c @@ -0,0 +1,4809 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kgsl_compat.h" +#include "kgsl_debugfs.h" +#include "kgsl_device.h" +#include "kgsl_eventlog.h" +#include "kgsl_mmu.h" +#include "kgsl_pool.h" +#include "kgsl_reclaim.h" +#include "kgsl_sync.h" +#include "kgsl_sysfs.h" +#include "kgsl_trace.h" + +#ifndef arch_mmap_check +#define arch_mmap_check(addr, len, flags) (0) +#endif + +#ifndef pgprot_writebackcache +#define pgprot_writebackcache(_prot) (_prot) +#endif + +#ifndef pgprot_writethroughcache +#define pgprot_writethroughcache(_prot) (_prot) +#endif + +#if defined(CONFIG_ARM64) || defined(CONFIG_ARM_LPAE) +#define KGSL_DMA_BIT_MASK DMA_BIT_MASK(64) +#else +#define KGSL_DMA_BIT_MASK DMA_BIT_MASK(32) +#endif + +/* List of dmabufs mapped */ +static LIST_HEAD(kgsl_dmabuf_list); +static DEFINE_SPINLOCK(kgsl_dmabuf_lock); + +struct dmabuf_list_entry { + struct page *firstpage; + struct list_head node; + struct list_head dmabuf_list; +}; + +struct kgsl_dma_buf_meta { + struct kgsl_mem_entry *entry; + struct dma_buf_attachment *attach; + struct dma_buf *dmabuf; + struct sg_table *table; + struct dmabuf_list_entry *dle; + struct list_head node; +}; + +static inline struct kgsl_pagetable *_get_memdesc_pagetable( + struct kgsl_pagetable *pt, struct kgsl_mem_entry *entry) +{ + /* if a secured buffer, map it to secure global pagetable */ + if (kgsl_memdesc_is_secured(&entry->memdesc)) + return pt->mmu->securepagetable; + + return pt; +} + +static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry); + +static const struct vm_operations_struct kgsl_gpumem_vm_ops; + +/* + * The memfree list contains the last N blocks of memory that have been freed. + * On a GPU fault we walk the list to see if the faulting address had been + * recently freed and print out a message to that effect + */ + +#define MEMFREE_ENTRIES 512 + +static DEFINE_SPINLOCK(memfree_lock); + +struct memfree_entry { + pid_t ptname; + uint64_t gpuaddr; + uint64_t size; + pid_t pid; + uint64_t flags; +}; + +static struct { + struct memfree_entry *list; + int head; + int tail; +} memfree; + +static inline bool match_memfree_addr(struct memfree_entry *entry, + pid_t ptname, uint64_t gpuaddr) +{ + return ((entry->ptname == ptname) && + (entry->size > 0) && + (gpuaddr >= entry->gpuaddr && + gpuaddr < (entry->gpuaddr + entry->size))); +} +int kgsl_memfree_find_entry(pid_t ptname, uint64_t *gpuaddr, + uint64_t *size, uint64_t *flags, pid_t *pid) +{ + int ptr; + + if (memfree.list == NULL) + return 0; + + spin_lock(&memfree_lock); + + ptr = memfree.head - 1; + if (ptr < 0) + ptr = MEMFREE_ENTRIES - 1; + + /* Walk backwards through the list looking for the last match */ + while (ptr != memfree.tail) { + struct memfree_entry *entry = &memfree.list[ptr]; + + if (match_memfree_addr(entry, ptname, *gpuaddr)) { + *gpuaddr = entry->gpuaddr; + *flags = entry->flags; + *size = entry->size; + *pid = entry->pid; + + spin_unlock(&memfree_lock); + return 1; + } + + ptr = ptr - 1; + + if (ptr < 0) + ptr = MEMFREE_ENTRIES - 1; + } + + spin_unlock(&memfree_lock); + return 0; +} + +static void kgsl_memfree_purge(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr, uint64_t size) +{ + pid_t ptname = pagetable ? pagetable->name : 0; + int i; + + if (memfree.list == NULL) + return; + + spin_lock(&memfree_lock); + + for (i = 0; i < MEMFREE_ENTRIES; i++) { + struct memfree_entry *entry = &memfree.list[i]; + + if (entry->ptname != ptname || entry->size == 0) + continue; + + if (gpuaddr > entry->gpuaddr && + gpuaddr < entry->gpuaddr + entry->size) { + /* truncate the end of the entry */ + entry->size = gpuaddr - entry->gpuaddr; + } else if (gpuaddr <= entry->gpuaddr) { + if (gpuaddr + size > entry->gpuaddr && + gpuaddr + size < entry->gpuaddr + entry->size) + /* Truncate the beginning of the entry */ + entry->gpuaddr = gpuaddr + size; + else if (gpuaddr + size >= entry->gpuaddr + entry->size) + /* Remove the entire entry */ + entry->size = 0; + } + } + spin_unlock(&memfree_lock); +} + +static void kgsl_memfree_add(pid_t pid, pid_t ptname, uint64_t gpuaddr, + uint64_t size, uint64_t flags) + +{ + struct memfree_entry *entry; + + if (memfree.list == NULL) + return; + + spin_lock(&memfree_lock); + + entry = &memfree.list[memfree.head]; + + entry->pid = pid; + entry->ptname = ptname; + entry->gpuaddr = gpuaddr; + entry->size = size; + entry->flags = flags; + + memfree.head = (memfree.head + 1) % MEMFREE_ENTRIES; + + if (memfree.head == memfree.tail) + memfree.tail = (memfree.tail + 1) % MEMFREE_ENTRIES; + + spin_unlock(&memfree_lock); +} + +int kgsl_readtimestamp(struct kgsl_device *device, void *priv, + enum kgsl_timestamp_type type, unsigned int *timestamp) +{ + if (device) + return device->ftbl->readtimestamp(device, priv, type, + timestamp); + return -EINVAL; + +} + +const char *kgsl_context_type(int type) +{ + if (type == KGSL_CONTEXT_TYPE_GL) + return "GL"; + else if (type == KGSL_CONTEXT_TYPE_CL) + return "CL"; + else if (type == KGSL_CONTEXT_TYPE_C2D) + return "C2D"; + else if (type == KGSL_CONTEXT_TYPE_RS) + return "RS"; + else if (type == KGSL_CONTEXT_TYPE_VK) + return "VK"; + + return "ANY"; +} + +/* Scheduled by kgsl_mem_entry_put_deferred() */ +static void _deferred_put(struct work_struct *work) +{ + struct kgsl_mem_entry *entry = + container_of(work, struct kgsl_mem_entry, work); + + kgsl_mem_entry_put(entry); +} + +static struct kgsl_mem_entry *kgsl_mem_entry_create(void) +{ + struct kgsl_mem_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + + if (entry != NULL) { + kref_init(&entry->refcount); + /* put this ref in userspace memory alloc and map ioctls */ + kref_get(&entry->refcount); + atomic_set(&entry->map_count, 0); + } + + return entry; +} + +static void add_dmabuf_list(struct kgsl_dma_buf_meta *meta) +{ + struct kgsl_device *device = dev_get_drvdata(meta->attach->dev); + struct dmabuf_list_entry *dle; + struct page *page; + + /* + * Get the first page. We will use it to identify the imported + * buffer, since the same buffer can be mapped as different + * mem entries. + */ + page = sg_page(meta->table->sgl); + + spin_lock(&kgsl_dmabuf_lock); + + /* Go through the list to see if we imported this buffer before */ + list_for_each_entry(dle, &kgsl_dmabuf_list, node) { + if (dle->firstpage == page) { + /* Add the dmabuf meta to the list for this dle */ + meta->dle = dle; + list_add(&meta->node, &dle->dmabuf_list); + spin_unlock(&kgsl_dmabuf_lock); + return; + } + } + + /* This is a new buffer. Add a new entry for it */ + dle = kzalloc(sizeof(*dle), GFP_ATOMIC); + if (dle) { + dle->firstpage = page; + INIT_LIST_HEAD(&dle->dmabuf_list); + list_add(&dle->node, &kgsl_dmabuf_list); + meta->dle = dle; + list_add(&meta->node, &dle->dmabuf_list); + kgsl_trace_gpu_mem_total(device, + meta->entry->memdesc.size); + } + spin_unlock(&kgsl_dmabuf_lock); +} + +static void remove_dmabuf_list(struct kgsl_dma_buf_meta *meta) +{ + struct kgsl_device *device = dev_get_drvdata(meta->attach->dev); + struct dmabuf_list_entry *dle = meta->dle; + + if (!dle) + return; + + spin_lock(&kgsl_dmabuf_lock); + list_del(&meta->node); + if (list_empty(&dle->dmabuf_list)) { + list_del(&dle->node); + kfree(dle); + kgsl_trace_gpu_mem_total(device, + -(meta->entry->memdesc.size)); + } + spin_unlock(&kgsl_dmabuf_lock); +} + +#ifdef CONFIG_DMA_SHARED_BUFFER +static void kgsl_destroy_ion(struct kgsl_memdesc *memdesc) +{ + struct kgsl_mem_entry *entry = container_of(memdesc, + struct kgsl_mem_entry, memdesc); + struct kgsl_dma_buf_meta *meta = entry->priv_data; + + if (meta != NULL) { + remove_dmabuf_list(meta); + dma_buf_detach(meta->dmabuf, meta->attach); + dma_buf_put(meta->dmabuf); + kfree(meta); + } + + memdesc->sgt = NULL; +} + +static const struct kgsl_memdesc_ops kgsl_dmabuf_ops = { + .free = kgsl_destroy_ion, + .put_gpuaddr = kgsl_unmap_and_put_gpuaddr, +}; +#endif + +static void kgsl_destroy_anon(struct kgsl_memdesc *memdesc) +{ + int i = 0, j; + struct scatterlist *sg; + struct page *page; + + for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) { + page = sg_page(sg); + for (j = 0; j < (sg->length >> PAGE_SHIFT); j++) { + + /* + * Mark the page in the scatterlist as dirty if they + * were writable by the GPU. + */ + if (!(memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY)) + set_page_dirty_lock(nth_page(page, j)); + + /* + * Put the page reference taken using get_user_pages + * during memdesc_sg_virt. + */ + put_page(nth_page(page, j)); + } + } + + sg_free_table(memdesc->sgt); + kfree(memdesc->sgt); + memdesc->sgt = NULL; +} + +void +kgsl_mem_entry_destroy(struct kref *kref) +{ + struct kgsl_mem_entry *entry = container_of(kref, + struct kgsl_mem_entry, + refcount); + unsigned int memtype; + + if (entry == NULL) + return; + + /* pull out the memtype before the flags get cleared */ + memtype = kgsl_memdesc_usermem_type(&entry->memdesc); + + /* + * VBO allocations at gpumem_alloc_vbo_entry are not added into stats + * (using kgsl_process_add_stats) so do not subtract here. For all other + * allocations subtract before freeing memdesc + */ + if (!(entry->memdesc.flags & KGSL_MEMFLAGS_VBO)) + atomic64_sub(entry->memdesc.size, &entry->priv->stats[memtype].cur); + + /* Detach from process list */ + kgsl_mem_entry_detach_process(entry); + + if (memtype != KGSL_MEM_ENTRY_KERNEL) + atomic_long_sub(entry->memdesc.size, + &kgsl_driver.stats.mapped); + + kgsl_sharedmem_free(&entry->memdesc); + + kfree(entry); +} + +/* Commit the entry to the process so it can be accessed by other operations */ +static void kgsl_mem_entry_commit_process(struct kgsl_mem_entry *entry) +{ + if (!entry) + return; + + spin_lock(&entry->priv->mem_lock); + idr_replace(&entry->priv->mem_idr, entry, entry->id); + spin_unlock(&entry->priv->mem_lock); +} + +static int kgsl_mem_entry_attach_to_process(struct kgsl_device *device, + struct kgsl_process_private *process, + struct kgsl_mem_entry *entry) +{ + struct kgsl_memdesc *memdesc = &entry->memdesc; + int ret, id; + + ret = kgsl_process_private_get(process); + if (!ret) + return -EBADF; + + /* Assign a gpu address */ + if (!kgsl_memdesc_use_cpu_map(memdesc) && + kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_NONE) { + struct kgsl_pagetable *pagetable; + + pagetable = kgsl_memdesc_is_secured(memdesc) ? + device->mmu.securepagetable : process->pagetable; + + ret = kgsl_mmu_get_gpuaddr(pagetable, memdesc); + if (ret) { + kgsl_process_private_put(process); + return ret; + } + } + + idr_preload(GFP_KERNEL); + spin_lock(&process->mem_lock); + /* Allocate the ID but don't attach the pointer just yet */ + id = idr_alloc(&process->mem_idr, NULL, 1, 0, GFP_NOWAIT); + spin_unlock(&process->mem_lock); + idr_preload_end(); + + if (id < 0) { + if (!kgsl_memdesc_use_cpu_map(memdesc)) + kgsl_mmu_put_gpuaddr(memdesc->pagetable, memdesc); + kgsl_process_private_put(process); + return id; + } + + entry->id = id; + entry->priv = process; + + return 0; +} + +/* + * Attach the memory object to a process by (possibly) getting a GPU address and + * (possibly) mapping it + */ +static int kgsl_mem_entry_attach_and_map(struct kgsl_device *device, + struct kgsl_process_private *process, + struct kgsl_mem_entry *entry) +{ + struct kgsl_memdesc *memdesc = &entry->memdesc; + int ret; + + ret = kgsl_mem_entry_attach_to_process(device, process, entry); + if (ret) + return ret; + + if (memdesc->gpuaddr) { + /* + * Map the memory if a GPU address is already assigned, either + * through kgsl_mem_entry_attach_to_process() or via some other + * SVM process + */ + ret = kgsl_mmu_map(memdesc->pagetable, memdesc); + + if (ret) { + kgsl_mem_entry_detach_process(entry); + return ret; + } + } + + kgsl_memfree_purge(memdesc->pagetable, memdesc->gpuaddr, + memdesc->size); + + return ret; +} + +/* Detach a memory entry from a process and unmap it from the MMU */ +static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry) +{ + if (entry == NULL) + return; + + /* + * First remove the entry from mem_idr list + * so that no one can operate on obsolete values + */ + spin_lock(&entry->priv->mem_lock); + if (entry->id != 0) + idr_remove(&entry->priv->mem_idr, entry->id); + entry->id = 0; + + spin_unlock(&entry->priv->mem_lock); + + kgsl_sharedmem_put_gpuaddr(&entry->memdesc); + + if (entry->memdesc.priv & KGSL_MEMDESC_RECLAIMED) + atomic_sub(entry->memdesc.page_count, + &entry->priv->unpinned_page_count); + + kgsl_process_private_put(entry->priv); + + entry->priv = NULL; +} + +#ifdef CONFIG_QCOM_KGSL_CONTEXT_DEBUG +static void kgsl_context_debug_info(struct kgsl_device *device) +{ + struct kgsl_context *context; + struct kgsl_process_private *p; + int next; + /* + * Keep an interval between consecutive logging to avoid + * flooding the kernel log + */ + static DEFINE_RATELIMIT_STATE(_rs, 10 * HZ, 1); + + if (!__ratelimit(&_rs)) + return; + + dev_info(device->dev, "KGSL active contexts:\n"); + dev_info(device->dev, "pid process total attached detached\n"); + + read_lock(&kgsl_driver.proclist_lock); + read_lock(&device->context_lock); + + list_for_each_entry(p, &kgsl_driver.process_list, list) { + int total_contexts = 0, num_detached = 0; + + idr_for_each_entry(&device->context_idr, context, next) { + if (context->proc_priv == p) { + total_contexts++; + if (kgsl_context_detached(context)) + num_detached++; + } + } + + dev_info(device->dev, "%-8u %-15.15s %-8d %-10d %-10d\n", + pid_nr(p->pid), p->comm, total_contexts, + total_contexts - num_detached, num_detached); + } + + read_unlock(&device->context_lock); + read_unlock(&kgsl_driver.proclist_lock); +} +#else +static void kgsl_context_debug_info(struct kgsl_device *device) +{ +} +#endif + +/** + * kgsl_context_dump() - dump information about a draw context + * @device: KGSL device that owns the context + * @context: KGSL context to dump information about + * + * Dump specific information about the context to the kernel log. Used for + * fence timeout callbacks + */ +void kgsl_context_dump(struct kgsl_context *context) +{ + struct kgsl_device *device; + + if (_kgsl_context_get(context) == 0) + return; + + device = context->device; + + if (kgsl_context_detached(context)) { + dev_err(device->dev, " context[%u]: context detached\n", + context->id); + } else if (device->ftbl->drawctxt_dump != NULL) + device->ftbl->drawctxt_dump(device, context); + + kgsl_context_put(context); +} + +/* Allocate a new context ID */ +static int _kgsl_get_context_id(struct kgsl_device *device) +{ + int id; + + idr_preload(GFP_KERNEL); + write_lock(&device->context_lock); + /* Allocate the slot but don't put a pointer in it yet */ + id = idr_alloc(&device->context_idr, NULL, 1, + KGSL_MEMSTORE_MAX, GFP_NOWAIT); + write_unlock(&device->context_lock); + idr_preload_end(); + + return id; +} + +/** + * kgsl_context_init() - helper to initialize kgsl_context members + * @dev_priv: the owner of the context + * @context: the newly created context struct, should be allocated by + * the device specific drawctxt_create function. + * + * This is a helper function for the device specific drawctxt_create + * function to initialize the common members of its context struct. + * If this function succeeds, reference counting is active in the context + * struct and the caller should kgsl_context_put() it on error. + * If it fails, the caller should just free the context structure + * it passed in. + */ +int kgsl_context_init(struct kgsl_device_private *dev_priv, + struct kgsl_context *context) +{ + struct kgsl_device *device = dev_priv->device; + int ret = 0, id; + struct kgsl_process_private *proc_priv = dev_priv->process_priv; + + /* + * Read and increment the context count under lock to make sure + * no process goes beyond the specified context limit. + */ + spin_lock(&proc_priv->ctxt_count_lock); + if (atomic_read(&proc_priv->ctxt_count) > KGSL_MAX_CONTEXTS_PER_PROC) { + dev_err(device->dev, + "Per process context limit reached for pid %u\n", + pid_nr(dev_priv->process_priv->pid)); + spin_unlock(&proc_priv->ctxt_count_lock); + kgsl_context_debug_info(device); + return -ENOSPC; + } + + atomic_inc(&proc_priv->ctxt_count); + spin_unlock(&proc_priv->ctxt_count_lock); + + id = _kgsl_get_context_id(device); + if (id == -ENOSPC) { + /* + * Before declaring that there are no contexts left try + * flushing the event workqueue just in case there are + * detached contexts waiting to finish + */ + + flush_workqueue(device->events_wq); + id = _kgsl_get_context_id(device); + } + + if (id < 0) { + if (id == -ENOSPC) { + dev_warn(device->dev, + "cannot have more than %zu contexts due to memstore limitation\n", + KGSL_MEMSTORE_MAX); + kgsl_context_debug_info(device); + } + atomic_dec(&proc_priv->ctxt_count); + return id; + } + + context->id = id; + + kref_init(&context->refcount); + /* + * Get a refernce to the process private so its not destroyed, until + * the context is destroyed. This will also prevent the pagetable + * from being destroyed + */ + if (!kgsl_process_private_get(dev_priv->process_priv)) { + ret = -EBADF; + goto out; + } + context->device = dev_priv->device; + context->dev_priv = dev_priv; + context->proc_priv = dev_priv->process_priv; + context->tid = task_pid_nr(current); + + ret = kgsl_sync_timeline_create(context); + if (ret) { + kgsl_process_private_put(dev_priv->process_priv); + goto out; + } + + kgsl_add_event_group(device, &context->events, context, + kgsl_readtimestamp, context, "context-%d", id); + +out: + if (ret) { + atomic_dec(&proc_priv->ctxt_count); + write_lock(&device->context_lock); + idr_remove(&dev_priv->device->context_idr, id); + write_unlock(&device->context_lock); + } + + return ret; +} + +/** + * kgsl_context_detach() - Release the "master" context reference + * @context: The context that will be detached + * + * This is called when a context becomes unusable, because userspace + * has requested for it to be destroyed. The context itself may + * exist a bit longer until its reference count goes to zero. + * Other code referencing the context can detect that it has been + * detached by checking the KGSL_CONTEXT_PRIV_DETACHED bit in + * context->priv. + */ +void kgsl_context_detach(struct kgsl_context *context) +{ + struct kgsl_device *device; + + if (context == NULL) + return; + + /* + * Mark the context as detached to keep others from using + * the context before it gets fully removed, and to make sure + * we don't try to detach twice. + */ + if (test_and_set_bit(KGSL_CONTEXT_PRIV_DETACHED, &context->priv)) + return; + + device = context->device; + + trace_kgsl_context_detach(device, context); + + context->device->ftbl->drawctxt_detach(context); + + /* + * Cancel all pending events after the device-specific context is + * detached, to avoid possibly freeing memory while it is still + * in use by the GPU. + */ + kgsl_cancel_events(device, &context->events); + + /* Remove the event group from the list */ + kgsl_del_event_group(device, &context->events); + + kgsl_sync_timeline_detach(context->ktimeline); + kgsl_context_put(context); +} + +void +kgsl_context_destroy(struct kref *kref) +{ + struct kgsl_context *context = container_of(kref, struct kgsl_context, + refcount); + struct kgsl_device *device = context->device; + + trace_kgsl_context_destroy(device, context); + + /* + * It's not safe to destroy the context if it's not detached as GPU + * may still be executing commands + */ + BUG_ON(!kgsl_context_detached(context)); + + kgsl_sync_timeline_put(context->ktimeline); + + write_lock(&device->context_lock); + if (context->id != KGSL_CONTEXT_INVALID) { + + /* Clear the timestamps in the memstore during destroy */ + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp), 0); + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp), 0); + + /* clear device power constraint */ + if (context->id == device->pwrctrl.constraint.owner_id) { + trace_kgsl_constraint(device, + device->pwrctrl.constraint.type, + device->pwrctrl.active_pwrlevel, + 0); + device->pwrctrl.constraint.type = KGSL_CONSTRAINT_NONE; + } + + atomic_dec(&context->proc_priv->ctxt_count); + idr_remove(&device->context_idr, context->id); + context->id = KGSL_CONTEXT_INVALID; + } + write_unlock(&device->context_lock); + kgsl_process_private_put(context->proc_priv); + + device->ftbl->drawctxt_destroy(context); +} + +struct kgsl_device *kgsl_get_device(int dev_idx) +{ + int i; + struct kgsl_device *ret = NULL; + + mutex_lock(&kgsl_driver.devlock); + + for (i = 0; i < ARRAY_SIZE(kgsl_driver.devp); i++) { + if (kgsl_driver.devp[i] && kgsl_driver.devp[i]->id == dev_idx) { + ret = kgsl_driver.devp[i]; + break; + } + } + + mutex_unlock(&kgsl_driver.devlock); + return ret; +} + +static struct kgsl_device *kgsl_get_minor(int minor) +{ + struct kgsl_device *ret = NULL; + + if (minor < 0 || minor >= ARRAY_SIZE(kgsl_driver.devp)) + return NULL; + + mutex_lock(&kgsl_driver.devlock); + ret = kgsl_driver.devp[minor]; + mutex_unlock(&kgsl_driver.devlock); + + return ret; +} + +/** + * kgsl_check_timestamp() - return true if the specified timestamp is retired + * @device: Pointer to the KGSL device to check + * @context: Pointer to the context for the timestamp + * @timestamp: The timestamp to compare + */ +bool kgsl_check_timestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp) +{ + unsigned int ts_processed; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, + &ts_processed); + + return (timestamp_cmp(ts_processed, timestamp) >= 0); +} + +/** + * kgsl_destroy_process_private() - Cleanup function to free process private + * @kref: - Pointer to object being destroyed's kref struct + * Free struct object and all other resources attached to it. + * Since the function can be used when not all resources inside process + * private have been allocated, there is a check to (before each resource + * cleanup) see if the struct member being cleaned is in fact allocated or not. + * If the value is not NULL, resource is freed. + */ +static void kgsl_destroy_process_private(struct kref *kref) +{ + struct kgsl_process_private *private = container_of(kref, + struct kgsl_process_private, refcount); + + mutex_lock(&kgsl_driver.process_mutex); + debugfs_remove_recursive(private->debug_root); + kobject_put(&private->kobj); + + /* When using global pagetables, do not detach global pagetable */ + if (private->pagetable->name != KGSL_MMU_GLOBAL_PT) + kgsl_mmu_detach_pagetable(private->pagetable); + + /* Remove the process struct from the master list */ + write_lock(&kgsl_driver.proclist_lock); + list_del(&private->list); + write_unlock(&kgsl_driver.proclist_lock); + mutex_unlock(&kgsl_driver.process_mutex); + + put_pid(private->pid); + idr_destroy(&private->mem_idr); + idr_destroy(&private->syncsource_idr); + + /* When using global pagetables, do not put global pagetable */ + if (private->pagetable->name != KGSL_MMU_GLOBAL_PT) + kgsl_mmu_putpagetable(private->pagetable); + + kfree(private); +} + +void +kgsl_process_private_put(struct kgsl_process_private *private) +{ + if (private) + kref_put(&private->refcount, kgsl_destroy_process_private); +} + +/** + * kgsl_process_private_find() - Find the process associated with the specified + * name + * @name: pid_t of the process to search for + * Return the process struct for the given ID. + */ +struct kgsl_process_private *kgsl_process_private_find(pid_t pid) +{ + struct kgsl_process_private *p, *private = NULL; + + read_lock(&kgsl_driver.proclist_lock); + list_for_each_entry(p, &kgsl_driver.process_list, list) { + if (pid_nr(p->pid) == pid) { + if (kgsl_process_private_get(p)) + private = p; + break; + } + } + read_unlock(&kgsl_driver.proclist_lock); + + return private; +} + +static struct kgsl_process_private *kgsl_process_private_new( + struct kgsl_device *device) +{ + struct kgsl_process_private *private; + struct pid *cur_pid = get_task_pid(current->group_leader, PIDTYPE_PID); + + /* Search in the process list */ + list_for_each_entry(private, &kgsl_driver.process_list, list) { + if (private->pid == cur_pid) { + if (!kgsl_process_private_get(private)) + /* + * This will happen only if refcount is zero + * i.e. destroy is triggered but didn't complete + * yet. Return -EEXIST to indicate caller that + * destroy is pending to allow caller to take + * appropriate action. + */ + private = ERR_PTR(-EEXIST); + /* + * We need to hold only one reference to the PID for + * each process struct to avoid overflowing the + * reference counter which can lead to use-after-free. + */ + put_pid(cur_pid); + return private; + } + } + + /* Create a new object */ + private = kzalloc(sizeof(struct kgsl_process_private), GFP_KERNEL); + if (private == NULL) { + put_pid(cur_pid); + return ERR_PTR(-ENOMEM); + } + + kref_init(&private->refcount); + + private->pid = cur_pid; + get_task_comm(private->comm, current->group_leader); + + spin_lock_init(&private->mem_lock); + spin_lock_init(&private->syncsource_lock); + spin_lock_init(&private->ctxt_count_lock); + + idr_init(&private->mem_idr); + idr_init(&private->syncsource_idr); + + kgsl_reclaim_proc_private_init(private); + + /* Allocate a pagetable for the new process object */ + private->pagetable = kgsl_mmu_getpagetable(&device->mmu, pid_nr(cur_pid)); + if (IS_ERR(private->pagetable)) { + int err = PTR_ERR(private->pagetable); + + idr_destroy(&private->mem_idr); + idr_destroy(&private->syncsource_idr); + put_pid(private->pid); + + kfree(private); + private = ERR_PTR(err); + return private; + } + + kgsl_process_init_sysfs(device, private); + kgsl_process_init_debugfs(private); + write_lock(&kgsl_driver.proclist_lock); + list_add(&private->list, &kgsl_driver.process_list); + write_unlock(&kgsl_driver.proclist_lock); + + return private; +} + +static void process_release_memory(struct kgsl_process_private *private) +{ + struct kgsl_mem_entry *entry; + int next = 0; + + while (1) { + spin_lock(&private->mem_lock); + entry = idr_get_next(&private->mem_idr, &next); + if (entry == NULL) { + spin_unlock(&private->mem_lock); + break; + } + /* + * If the free pending flag is not set it means that user space + * did not free it's reference to this entry, in that case + * free a reference to this entry, other references are from + * within kgsl so they will be freed eventually by kgsl + */ + if (!entry->pending_free) { + entry->pending_free = 1; + spin_unlock(&private->mem_lock); + kgsl_mem_entry_put(entry); + } else { + spin_unlock(&private->mem_lock); + } + next = next + 1; + } +} + +static void kgsl_process_private_close(struct kgsl_device_private *dev_priv, + struct kgsl_process_private *private) +{ + mutex_lock(&kgsl_driver.process_mutex); + + if (--private->fd_count > 0) { + mutex_unlock(&kgsl_driver.process_mutex); + kgsl_process_private_put(private); + return; + } + + /* + * If this is the last file on the process garbage collect + * any outstanding resources + */ + process_release_memory(private); + + /* Release all syncsource objects from process private */ + kgsl_syncsource_process_release_syncsources(private); + + mutex_unlock(&kgsl_driver.process_mutex); + + kgsl_process_private_put(private); +} + +static struct kgsl_process_private *_process_private_open( + struct kgsl_device *device) +{ + struct kgsl_process_private *private; + + mutex_lock(&kgsl_driver.process_mutex); + private = kgsl_process_private_new(device); + + if (IS_ERR(private)) + goto done; + + private->fd_count++; + +done: + mutex_unlock(&kgsl_driver.process_mutex); + return private; +} + +static struct kgsl_process_private *kgsl_process_private_open( + struct kgsl_device *device) +{ + struct kgsl_process_private *private; + int i; + + private = _process_private_open(device); + + /* + * If we get error and error is -EEXIST that means previous process + * private destroy is triggered but didn't complete. Retry creating + * process private after sometime to allow previous destroy to complete. + */ + for (i = 0; (PTR_ERR_OR_ZERO(private) == -EEXIST) && (i < 5); i++) { + usleep_range(10, 100); + private = _process_private_open(device); + } + + return private; +} + +int kgsl_gpu_frame_count(pid_t pid, u64 *frame_count) +{ + struct kgsl_process_private *p; + + if (!frame_count) + return -EINVAL; + + p = kgsl_process_private_find(pid); + if (!p) + return -ENOENT; + + *frame_count = atomic64_read(&p->frame_count); + kgsl_process_private_put(p); + + return 0; +} +EXPORT_SYMBOL(kgsl_gpu_frame_count); + +static int kgsl_close_device(struct kgsl_device *device) +{ + int result = 0; + + mutex_lock(&device->mutex); + if (device->open_count == 1) + result = device->ftbl->last_close(device); + + /* + * We must decrement the open_count after last_close() has finished. + * This is because last_close() relinquishes device mutex while + * waiting for active count to become 0. This opens up a window + * where a new process can come in, see that open_count is 0, and + * initiate a first_open(). This can potentially mess up the power + * state machine. To avoid a first_open() from happening before + * last_close() has finished, decrement the open_count after + * last_close(). + */ + device->open_count--; + mutex_unlock(&device->mutex); + return result; + +} + +static void device_release_contexts(struct kgsl_device_private *dev_priv) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + int next = 0; + int result = 0; + + while (1) { + read_lock(&device->context_lock); + context = idr_get_next(&device->context_idr, &next); + + if (context == NULL) { + read_unlock(&device->context_lock); + break; + } else if (context->dev_priv == dev_priv) { + /* + * Hold a reference to the context in case somebody + * tries to put it while we are detaching + */ + result = _kgsl_context_get(context); + } + read_unlock(&device->context_lock); + + if (result) { + kgsl_context_detach(context); + kgsl_context_put(context); + result = 0; + } + + next = next + 1; + } +} + +static int kgsl_release(struct inode *inodep, struct file *filep) +{ + struct kgsl_device_private *dev_priv = filep->private_data; + struct kgsl_device *device = dev_priv->device; + int result; + + filep->private_data = NULL; + + /* Release the contexts for the file */ + device_release_contexts(dev_priv); + + /* Close down the process wide resources for the file */ + kgsl_process_private_close(dev_priv, dev_priv->process_priv); + + /* Destroy the device-specific structure */ + device->ftbl->device_private_destroy(dev_priv); + + result = kgsl_close_device(device); + pm_runtime_put(&device->pdev->dev); + + return result; +} + +static int kgsl_open_device(struct kgsl_device *device) +{ + int result = 0; + + mutex_lock(&device->mutex); + if (device->open_count == 0) { + result = device->ftbl->first_open(device); + if (result) + goto out; + } + device->open_count++; +out: + mutex_unlock(&device->mutex); + return result; +} + +static int kgsl_open(struct inode *inodep, struct file *filep) +{ + int result; + struct kgsl_device_private *dev_priv; + struct kgsl_device *device; + unsigned int minor = iminor(inodep); + + device = kgsl_get_minor(minor); + if (device == NULL) { + pr_err("kgsl: No device found\n"); + return -ENODEV; + } + + result = pm_runtime_get_sync(&device->pdev->dev); + if (result < 0) { + dev_err(device->dev, + "Runtime PM: Unable to wake up the device, rc = %d\n", + result); + return result; + } + result = 0; + + dev_priv = device->ftbl->device_private_create(); + if (dev_priv == NULL) { + result = -ENOMEM; + goto err; + } + + dev_priv->device = device; + filep->private_data = dev_priv; + + result = kgsl_open_device(device); + if (result) + goto err; + + /* + * Get file (per process) private struct. This must be done + * after the first start so that the global pagetable mappings + * are set up before we create the per-process pagetable. + */ + dev_priv->process_priv = kgsl_process_private_open(device); + if (IS_ERR(dev_priv->process_priv)) { + result = PTR_ERR(dev_priv->process_priv); + kgsl_close_device(device); + goto err; + } + +err: + if (result) { + filep->private_data = NULL; + kfree(dev_priv); + pm_runtime_put(&device->pdev->dev); + } + return result; +} + +#define GPUADDR_IN_MEMDESC(_val, _memdesc) \ + (((_val) >= (_memdesc)->gpuaddr) && \ + ((_val) < ((_memdesc)->gpuaddr + (_memdesc)->size))) + +/** + * kgsl_sharedmem_find() - Find a gpu memory allocation + * + * @private: private data for the process to check. + * @gpuaddr: start address of the region + * + * Find a gpu allocation. Caller must kgsl_mem_entry_put() + * the returned entry when finished using it. + */ +struct kgsl_mem_entry * __must_check +kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr) +{ + int id; + struct kgsl_mem_entry *entry, *ret = NULL; + + if (!private) + return NULL; + + if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr) && + !kgsl_mmu_gpuaddr_in_range( + private->pagetable->mmu->securepagetable, gpuaddr)) + return NULL; + + spin_lock(&private->mem_lock); + idr_for_each_entry(&private->mem_idr, entry, id) { + if (GPUADDR_IN_MEMDESC(gpuaddr, &entry->memdesc)) { + if (!entry->pending_free) + ret = kgsl_mem_entry_get(entry); + break; + } + } + spin_unlock(&private->mem_lock); + + return ret; +} + +static struct kgsl_mem_entry * __must_check +kgsl_sharedmem_find_id_flags(struct kgsl_process_private *process, + unsigned int id, uint64_t flags) +{ + struct kgsl_mem_entry *entry, *ret = NULL; + + spin_lock(&process->mem_lock); + entry = idr_find(&process->mem_idr, id); + if (entry) + if (!entry->pending_free && + (flags & entry->memdesc.flags) == flags) + ret = kgsl_mem_entry_get(entry); + spin_unlock(&process->mem_lock); + + return ret; +} + +/** + * kgsl_sharedmem_find_id() - find a memory entry by id + * @process: the owning process + * @id: id to find + * + * @returns - the mem_entry or NULL + * + * Caller must kgsl_mem_entry_put() the returned entry, when finished using + * it. + */ +struct kgsl_mem_entry * __must_check +kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id) +{ + return kgsl_sharedmem_find_id_flags(process, id, 0); +} + +/** + * kgsl_mem_entry_unset_pend() - Unset the pending free flag of an entry + * @entry - The memory entry + */ +static inline void kgsl_mem_entry_unset_pend(struct kgsl_mem_entry *entry) +{ + if (entry == NULL) + return; + spin_lock(&entry->priv->mem_lock); + entry->pending_free = 0; + spin_unlock(&entry->priv->mem_lock); +} + +/** + * kgsl_mem_entry_set_pend() - Set the pending free flag of a memory entry + * @entry - The memory entry + * + * @returns - true if pending flag was 0 else false + * + * This function will set the pending free flag if it is previously unset. Used + * to prevent race condition between ioctls calling free/freememontimestamp + * on the same entry. Whichever thread set's the flag first will do the free. + */ +static inline bool kgsl_mem_entry_set_pend(struct kgsl_mem_entry *entry) +{ + bool ret = false; + + if (entry == NULL) + return false; + + spin_lock(&entry->priv->mem_lock); + if (!entry->pending_free) { + entry->pending_free = 1; + ret = true; + } + spin_unlock(&entry->priv->mem_lock); + return ret; +} + +static int kgsl_get_ctxt_fault_stats(struct kgsl_context *context, + struct kgsl_context_property *ctxt_property) +{ + struct kgsl_context_property_fault fault_stats; + size_t copy; + + /* Return the size of the subtype struct */ + if (ctxt_property->size == 0) { + ctxt_property->size = sizeof(fault_stats); + return 0; + } + + memset(&fault_stats, 0, sizeof(fault_stats)); + + copy = min_t(size_t, ctxt_property->size, sizeof(fault_stats)); + + fault_stats.faults = context->total_fault_count; + fault_stats.timestamp = context->last_faulted_cmd_ts; + + /* + * Copy the context fault stats to data which also serves as + * the out parameter. + */ + if (copy_to_user(u64_to_user_ptr(ctxt_property->data), + &fault_stats, copy)) + return -EFAULT; + + return 0; +} + +static long kgsl_get_ctxt_properties(struct kgsl_device_private *dev_priv, + struct kgsl_device_getproperty *param) +{ + /* Return fault stats of given context */ + struct kgsl_context_property ctxt_property; + struct kgsl_context *context; + size_t copy; + long ret; + + /* + * If sizebytes is zero, tell the user how big the + * ctxt_property struct should be. + */ + if (param->sizebytes == 0) { + param->sizebytes = sizeof(ctxt_property); + return 0; + } + + memset(&ctxt_property, 0, sizeof(ctxt_property)); + + copy = min_t(size_t, param->sizebytes, sizeof(ctxt_property)); + + /* We expect the value passed in to contain the context id */ + if (copy_from_user(&ctxt_property, param->value, copy)) + return -EFAULT; + + /* ctxt type zero is not valid, as we consider it as uninitialized. */ + if (ctxt_property.type == 0) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, + ctxt_property.contextid); + if (!context) + return -EINVAL; + + if (ctxt_property.type == KGSL_CONTEXT_PROP_FAULTS) + ret = kgsl_get_ctxt_fault_stats(context, &ctxt_property); + else + ret = -EOPNOTSUPP; + + kgsl_context_put(context); + + return ret; +} + +static long kgsl_prop_version(struct kgsl_device_private *dev_priv, + struct kgsl_device_getproperty *param) +{ + struct kgsl_version version = { + .drv_major = KGSL_VERSION_MAJOR, + .drv_minor = KGSL_VERSION_MINOR, + .dev_major = 3, + .dev_minor = 1, + }; + + if (param->sizebytes != sizeof(version)) + return -EINVAL; + + if (copy_to_user(param->value, &version, sizeof(version))) + return -EFAULT; + + return 0; +} + +/* Return reset status of given context and clear it */ +static long kgsl_prop_gpu_reset_stat(struct kgsl_device_private *dev_priv, + struct kgsl_device_getproperty *param) +{ + u32 id; + struct kgsl_context *context; + + if (param->sizebytes != sizeof(id)) + return -EINVAL; + + /* We expect the value passed in to contain the context id */ + if (copy_from_user(&id, param->value, sizeof(id))) + return -EFAULT; + + context = kgsl_context_get_owner(dev_priv, id); + if (!context) + return -EINVAL; + + /* + * Copy the reset status to value which also serves as + * the out parameter + */ + id = context->reset_status; + + context->reset_status = KGSL_CTX_STAT_NO_ERROR; + kgsl_context_put(context); + + if (copy_to_user(param->value, &id, sizeof(id))) + return -EFAULT; + + return 0; +} + +static long kgsl_prop_secure_buf_alignment(struct kgsl_device_private *dev_priv, + struct kgsl_device_getproperty *param) +{ + u32 align = PAGE_SIZE; + + if (param->sizebytes != sizeof(align)) + return -EINVAL; + + if (copy_to_user(param->value, &align, sizeof(align))) + return -EFAULT; + + return 0; +} + +static long kgsl_prop_secure_ctxt_support(struct kgsl_device_private *dev_priv, + struct kgsl_device_getproperty *param) +{ + u32 secure; + + if (param->sizebytes != sizeof(secure)) + return -EINVAL; + + secure = dev_priv->device->mmu.secured ? 1 : 0; + + if (copy_to_user(param->value, &secure, sizeof(secure))) + return -EFAULT; + + return 0; +} + +static int kgsl_query_caps_properties(struct kgsl_device *device, + struct kgsl_capabilities *caps) +{ + struct kgsl_capabilities_properties props; + size_t copy; + u32 count, *local; + int ret; + + /* Return the size of the subtype struct */ + if (caps->size == 0) { + caps->size = sizeof(props); + return 0; + } + + memset(&props, 0, sizeof(props)); + + copy = min_t(size_t, caps->size, sizeof(props)); + + if (copy_from_user(&props, u64_to_user_ptr(caps->data), copy)) + return -EFAULT; + + /* Get the number of properties */ + count = kgsl_query_property_list(device, NULL, 0); + + /* + * If the incoming user count is zero, they are querying the number of + * available properties. Set it and return. + */ + if (props.count == 0) { + props.count = count; + goto done; + } + + /* Copy the lesser of the user or kernel property count */ + if (props.count < count) + count = props.count; + + /* Create a local buffer to store the property list */ + local = kcalloc(count, sizeof(u32), GFP_KERNEL); + if (!local) + return -ENOMEM; + + /* Get the properties */ + props.count = kgsl_query_property_list(device, local, count); + + ret = copy_to_user(u64_to_user_ptr(props.list), local, + props.count * sizeof(u32)); + + kfree(local); + + if (ret) + return -EFAULT; + +done: + if (copy_to_user(u64_to_user_ptr(caps->data), &props, copy)) + return -EFAULT; + + return 0; +} + +static long kgsl_prop_query_capabilities(struct kgsl_device_private *dev_priv, + struct kgsl_device_getproperty *param) +{ + struct kgsl_capabilities caps; + long ret; + size_t copy; + + /* + * If sizebytes is zero, tell the user how big the capabilities struct + * should be + */ + if (param->sizebytes == 0) { + param->sizebytes = sizeof(caps); + return 0; + } + + memset(&caps, 0, sizeof(caps)); + + copy = min_t(size_t, param->sizebytes, sizeof(caps)); + + if (copy_from_user(&caps, param->value, copy)) + return -EFAULT; + + /* querytype must be non zero */ + if (caps.querytype == 0) + return -EINVAL; + + if (caps.querytype == KGSL_QUERY_CAPS_PROPERTIES) + ret = kgsl_query_caps_properties(dev_priv->device, &caps); + else { + /* Unsupported querytypes should return a unique return value */ + return -EOPNOTSUPP; + } + + if (copy_to_user(param->value, &caps, copy)) + return -EFAULT; + + return ret; +} + +static const struct { + int type; + long (*func)(struct kgsl_device_private *dev_priv, + struct kgsl_device_getproperty *param); +} kgsl_property_funcs[] = { + { KGSL_PROP_VERSION, kgsl_prop_version }, + { KGSL_PROP_GPU_RESET_STAT, kgsl_prop_gpu_reset_stat}, + { KGSL_PROP_SECURE_BUFFER_ALIGNMENT, kgsl_prop_secure_buf_alignment }, + { KGSL_PROP_SECURE_CTXT_SUPPORT, kgsl_prop_secure_ctxt_support }, + { KGSL_PROP_QUERY_CAPABILITIES, kgsl_prop_query_capabilities }, + { KGSL_PROP_CONTEXT_PROPERTY, kgsl_get_ctxt_properties }, +}; + +/*call all ioctl sub functions with driver locked*/ +long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_device_getproperty *param = data; + int i; + + for (i = 0; i < ARRAY_SIZE(kgsl_property_funcs); i++) { + if (param->type == kgsl_property_funcs[i].type) + return kgsl_property_funcs[i].func(dev_priv, param); + } + + if (is_compat_task()) + return device->ftbl->getproperty_compat(device, param); + + return device->ftbl->getproperty(device, param); +} + +int kgsl_query_property_list(struct kgsl_device *device, u32 *list, u32 count) +{ + int num = 0; + + if (!list) { + num = ARRAY_SIZE(kgsl_property_funcs); + + if (device->ftbl->query_property_list) + num += device->ftbl->query_property_list(device, list, + count); + + return num; + } + + for (; num < count && num < ARRAY_SIZE(kgsl_property_funcs); num++) + list[num] = kgsl_property_funcs[num].type; + + if (device->ftbl->query_property_list) + num += device->ftbl->query_property_list(device, &list[num], + count - num); + + return num; +} + +long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + /* The getproperty struct is reused for setproperty too */ + struct kgsl_device_getproperty *param = data; + + /* Reroute to compat version if coming from compat_ioctl */ + if (is_compat_task()) + result = dev_priv->device->ftbl->setproperty_compat( + dev_priv, param->type, param->value, + param->sizebytes); + else if (dev_priv->device->ftbl->setproperty) + result = dev_priv->device->ftbl->setproperty( + dev_priv, param->type, param->value, + param->sizebytes); + + return result; +} + +long kgsl_ioctl_device_waittimestamp_ctxtid( + struct kgsl_device_private *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_device_waittimestamp_ctxtid *param = data; + struct kgsl_device *device = dev_priv->device; + long result = -EINVAL; + unsigned int temp_cur_ts = 0; + struct kgsl_context *context; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (context == NULL) + return result; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, + &temp_cur_ts); + + trace_kgsl_waittimestamp_entry(device, context->id, temp_cur_ts, + param->timestamp, param->timeout); + + result = device->ftbl->waittimestamp(device, context, param->timestamp, + param->timeout); + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, + &temp_cur_ts); + trace_kgsl_waittimestamp_exit(device, temp_cur_ts, result); + + kgsl_context_put(context); + + return result; +} + +long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_ringbuffer_issueibcmds *param = data; + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + struct kgsl_drawobj *drawobj; + struct kgsl_drawobj_cmd *cmdobj; + long result = -EINVAL; + + /* The legacy functions don't support synchronization commands */ + if ((param->flags & (KGSL_DRAWOBJ_SYNC | KGSL_DRAWOBJ_MARKER))) + return -EINVAL; + + /* Sanity check the number of IBs */ + if (param->flags & KGSL_DRAWOBJ_SUBMIT_IB_LIST && + (param->numibs == 0 || param->numibs > KGSL_MAX_NUMIBS)) + return -EINVAL; + + /* Get the context */ + context = kgsl_context_get_owner(dev_priv, param->drawctxt_id); + if (context == NULL) + return -EINVAL; + + cmdobj = kgsl_drawobj_cmd_create(device, context, param->flags, + CMDOBJ_TYPE); + if (IS_ERR(cmdobj)) { + kgsl_context_put(context); + return PTR_ERR(cmdobj); + } + + drawobj = DRAWOBJ(cmdobj); + + if (param->flags & KGSL_DRAWOBJ_SUBMIT_IB_LIST) + result = kgsl_drawobj_cmd_add_ibdesc_list(device, cmdobj, + (void __user *) param->ibdesc_addr, + param->numibs); + else { + struct kgsl_ibdesc ibdesc; + /* Ultra legacy path */ + + ibdesc.gpuaddr = param->ibdesc_addr; + ibdesc.sizedwords = param->numibs; + ibdesc.ctrl = 0; + + result = kgsl_drawobj_cmd_add_ibdesc(device, cmdobj, &ibdesc); + } + + if (result == 0) + result = kgsl_reclaim_to_pinned_state(dev_priv->process_priv); + + if (result == 0) + result = dev_priv->device->ftbl->queue_cmds(dev_priv, context, + &drawobj, 1, ¶m->timestamp); + + /* + * -EPROTO is a "success" error - it just tells the user that the + * context had previously faulted + */ + if (result && result != -EPROTO) + kgsl_drawobj_destroy(drawobj); + + kgsl_context_put(context); + return result; +} + +/* Returns 0 on failure. Returns command type(s) on success */ +static unsigned int _process_command_input(struct kgsl_device *device, + unsigned int flags, unsigned int numcmds, + unsigned int numobjs, unsigned int numsyncs) +{ + if (numcmds > KGSL_MAX_NUMIBS || + numobjs > KGSL_MAX_NUMIBS || + numsyncs > KGSL_MAX_SYNCPOINTS) + return 0; + + /* + * The SYNC bit is supposed to identify a dummy sync object + * so warn the user if they specified any IBs with it. + * A MARKER command can either have IBs or not but if the + * command has 0 IBs it is automatically assumed to be a marker. + */ + + /* If they specify the flag, go with what they say */ + if (flags & KGSL_DRAWOBJ_MARKER) + return MARKEROBJ_TYPE; + else if (flags & KGSL_DRAWOBJ_SYNC) + return SYNCOBJ_TYPE; + + /* If not, deduce what they meant */ + if (numsyncs && numcmds) + return SYNCOBJ_TYPE | CMDOBJ_TYPE; + else if (numsyncs) + return SYNCOBJ_TYPE; + else if (numcmds) + return CMDOBJ_TYPE; + else if (numcmds == 0) + return MARKEROBJ_TYPE; + + return 0; +} + +long kgsl_ioctl_submit_commands(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_submit_commands *param = data; + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + struct kgsl_drawobj *drawobj[2]; + unsigned int type; + long result; + unsigned int i = 0; + + type = _process_command_input(device, param->flags, param->numcmds, 0, + param->numsyncs); + if (!type) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (context == NULL) + return -EINVAL; + + if (type & SYNCOBJ_TYPE) { + struct kgsl_drawobj_sync *syncobj = + kgsl_drawobj_sync_create(device, context); + if (IS_ERR(syncobj)) { + result = PTR_ERR(syncobj); + goto done; + } + + drawobj[i++] = DRAWOBJ(syncobj); + + result = kgsl_drawobj_sync_add_syncpoints(device, syncobj, + param->synclist, param->numsyncs); + if (result) + goto done; + } + + if (type & (CMDOBJ_TYPE | MARKEROBJ_TYPE)) { + struct kgsl_drawobj_cmd *cmdobj = + kgsl_drawobj_cmd_create(device, + context, param->flags, type); + if (IS_ERR(cmdobj)) { + result = PTR_ERR(cmdobj); + goto done; + } + + drawobj[i++] = DRAWOBJ(cmdobj); + + result = kgsl_drawobj_cmd_add_ibdesc_list(device, cmdobj, + param->cmdlist, param->numcmds); + if (result) + goto done; + + /* If no profiling buffer was specified, clear the flag */ + if (cmdobj->profiling_buf_entry == NULL) + DRAWOBJ(cmdobj)->flags &= + ~(unsigned long)KGSL_DRAWOBJ_PROFILING; + + if (type & CMDOBJ_TYPE) { + result = kgsl_reclaim_to_pinned_state( + dev_priv->process_priv); + if (result) + goto done; + } + } + + result = device->ftbl->queue_cmds(dev_priv, context, drawobj, + i, ¶m->timestamp); + +done: + /* + * -EPROTO is a "success" error - it just tells the user that the + * context had previously faulted + */ + if (result && result != -EPROTO) + while (i--) + kgsl_drawobj_destroy(drawobj[i]); + + + kgsl_context_put(context); + return result; +} + +long kgsl_ioctl_gpu_command(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpu_command *param = data; + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + struct kgsl_drawobj *drawobj[2]; + unsigned int type; + long result; + unsigned int i = 0; + + type = _process_command_input(device, param->flags, param->numcmds, + param->numobjs, param->numsyncs); + if (!type) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (context == NULL) + return -EINVAL; + + if (type & SYNCOBJ_TYPE) { + struct kgsl_drawobj_sync *syncobj = + kgsl_drawobj_sync_create(device, context); + + if (IS_ERR(syncobj)) { + result = PTR_ERR(syncobj); + goto done; + } + + drawobj[i++] = DRAWOBJ(syncobj); + + result = kgsl_drawobj_sync_add_synclist(device, syncobj, + u64_to_user_ptr(param->synclist), + param->syncsize, param->numsyncs); + if (result) + goto done; + } + + if (type & (CMDOBJ_TYPE | MARKEROBJ_TYPE)) { + struct kgsl_drawobj_cmd *cmdobj = + kgsl_drawobj_cmd_create(device, + context, param->flags, type); + + if (IS_ERR(cmdobj)) { + result = PTR_ERR(cmdobj); + goto done; + } + + drawobj[i++] = DRAWOBJ(cmdobj); + + result = kgsl_drawobj_cmd_add_cmdlist(device, cmdobj, + u64_to_user_ptr(param->cmdlist), + param->cmdsize, param->numcmds); + if (result) + goto done; + + result = kgsl_drawobj_cmd_add_memlist(device, cmdobj, + u64_to_user_ptr(param->objlist), + param->objsize, param->numobjs); + if (result) + goto done; + + /* If no profiling buffer was specified, clear the flag */ + if (cmdobj->profiling_buf_entry == NULL) + DRAWOBJ(cmdobj)->flags &= + ~(unsigned long)KGSL_DRAWOBJ_PROFILING; + + if (type & CMDOBJ_TYPE) { + result = kgsl_reclaim_to_pinned_state( + dev_priv->process_priv); + if (result) + goto done; + } + } + + result = device->ftbl->queue_cmds(dev_priv, context, drawobj, + i, ¶m->timestamp); + +done: + /* + * -EPROTO is a "success" error - it just tells the user that the + * context had previously faulted + */ + if (result && result != -EPROTO) + while (i--) + kgsl_drawobj_destroy(drawobj[i]); + + kgsl_context_put(context); + return result; +} + +long kgsl_ioctl_gpu_aux_command(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpu_aux_command *param = data; + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + struct kgsl_drawobj **drawobjs; + struct kgsl_drawobj_sync *tsobj; + void __user *cmdlist; + u32 queued, count; + int i, index = 0; + long ret; + struct kgsl_gpu_aux_command_generic generic; + + /* We support only one aux command */ + if (param->numcmds != 1) + return -EINVAL; + + if (!(param->flags & + (KGSL_GPU_AUX_COMMAND_BIND | KGSL_GPU_AUX_COMMAND_TIMELINE))) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (!context) + return -EINVAL; + + /* + * param->numcmds is always one and we have one additional drawobj + * for the timestamp sync if KGSL_GPU_AUX_COMMAND_SYNC flag is passed. + * On top of that we make an implicit sync object for the last queued + * timestamp on this context. + */ + count = (param->flags & KGSL_GPU_AUX_COMMAND_SYNC) ? 3 : 2; + + drawobjs = kvcalloc(count, sizeof(*drawobjs), + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + + if (!drawobjs) { + kgsl_context_put(context); + return -ENOMEM; + } + + trace_kgsl_aux_command(context->id, param->numcmds, param->flags, + param->timestamp); + + if (param->flags & KGSL_GPU_AUX_COMMAND_SYNC) { + struct kgsl_drawobj_sync *syncobj = + kgsl_drawobj_sync_create(device, context); + + if (IS_ERR(syncobj)) { + ret = PTR_ERR(syncobj); + goto err; + } + + drawobjs[index++] = DRAWOBJ(syncobj); + + ret = kgsl_drawobj_sync_add_synclist(device, syncobj, + u64_to_user_ptr(param->synclist), + param->syncsize, param->numsyncs); + if (ret) + goto err; + } + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, &queued); + + /* + * Make an implicit sync object for the last queued timestamp on this + * context + */ + tsobj = kgsl_drawobj_create_timestamp_syncobj(device, + context, queued); + + if (IS_ERR(tsobj)) { + ret = PTR_ERR(tsobj); + goto err; + } + + drawobjs[index++] = DRAWOBJ(tsobj); + + cmdlist = u64_to_user_ptr(param->cmdlist); + + /* + * Create a draw object for KGSL_GPU_AUX_COMMAND_BIND or + * KGSL_GPU_AUX_COMMAND_TIMELINE. + */ + if (copy_struct_from_user(&generic, sizeof(generic), + cmdlist, param->cmdsize)) { + ret = -EFAULT; + goto err; + } + + if (generic.type == KGSL_GPU_AUX_COMMAND_BIND) { + struct kgsl_drawobj_bind *bindobj; + + bindobj = kgsl_drawobj_bind_create(device, context); + + if (IS_ERR(bindobj)) { + ret = PTR_ERR(bindobj); + goto err; + } + + drawobjs[index++] = DRAWOBJ(bindobj); + + ret = kgsl_drawobj_add_bind(dev_priv, bindobj, + cmdlist, param->cmdsize); + if (ret) + goto err; + } else if (generic.type == KGSL_GPU_AUX_COMMAND_TIMELINE) { + struct kgsl_drawobj_timeline *timelineobj; + + timelineobj = kgsl_drawobj_timeline_create(device, + context); + + if (IS_ERR(timelineobj)) { + ret = PTR_ERR(timelineobj); + goto err; + } + + drawobjs[index++] = DRAWOBJ(timelineobj); + + ret = kgsl_drawobj_add_timeline(dev_priv, timelineobj, + cmdlist, param->cmdsize); + if (ret) + goto err; + + } else { + ret = -EINVAL; + goto err; + } + + ret = device->ftbl->queue_cmds(dev_priv, context, + drawobjs, index, ¶m->timestamp); + +err: + kgsl_context_put(context); + + if (ret && ret != -EPROTO) { + for (i = 0; i < count; i++) + kgsl_drawobj_destroy(drawobjs[i]); + } + + kvfree(drawobjs); + return ret; +} + +long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_cmdstream_readtimestamp_ctxtid *param = data; + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + long result = -EINVAL; + + mutex_lock(&device->mutex); + context = kgsl_context_get_owner(dev_priv, param->context_id); + + if (context) { + result = kgsl_readtimestamp(device, context, + param->type, ¶m->timestamp); + + trace_kgsl_readtimestamp(device, context->id, + param->type, param->timestamp); + } + + kgsl_context_put(context); + mutex_unlock(&device->mutex); + return result; +} + +long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + struct kgsl_drawctxt_create *param = data; + struct kgsl_context *context = NULL; + struct kgsl_device *device = dev_priv->device; + + context = device->ftbl->drawctxt_create(dev_priv, ¶m->flags); + if (IS_ERR(context)) { + result = PTR_ERR(context); + goto done; + } + trace_kgsl_context_create(dev_priv->device, context, param->flags); + + /* Commit the pointer to the context in context_idr */ + write_lock(&device->context_lock); + idr_replace(&device->context_idr, context, context->id); + param->drawctxt_id = context->id; + write_unlock(&device->context_lock); + +done: + return result; +} + +long kgsl_ioctl_drawctxt_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_drawctxt_destroy *param = data; + struct kgsl_context *context; + + context = kgsl_context_get_owner(dev_priv, param->drawctxt_id); + if (context == NULL) + return -EINVAL; + + kgsl_context_detach(context); + kgsl_context_put(context); + + return 0; +} + +long gpumem_free_entry(struct kgsl_mem_entry *entry) +{ + if (!kgsl_mem_entry_set_pend(entry)) + return -EBUSY; + + trace_kgsl_mem_free(entry); + kgsl_memfree_add(pid_nr(entry->priv->pid), + entry->memdesc.pagetable ? + entry->memdesc.pagetable->name : 0, + entry->memdesc.gpuaddr, entry->memdesc.size, + entry->memdesc.flags); + + kgsl_mem_entry_put(entry); + + return 0; +} + +static void gpumem_free_func(struct kgsl_device *device, + struct kgsl_event_group *group, void *priv, int ret) +{ + struct kgsl_context *context = group->context; + struct kgsl_mem_entry *entry = priv; + unsigned int timestamp; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, ×tamp); + + /* Free the memory for all event types */ + trace_kgsl_mem_timestamp_free(device, entry, KGSL_CONTEXT_ID(context), + timestamp, 0); + kgsl_memfree_add(pid_nr(entry->priv->pid), + entry->memdesc.pagetable ? + entry->memdesc.pagetable->name : 0, + entry->memdesc.gpuaddr, entry->memdesc.size, + entry->memdesc.flags); + + kgsl_mem_entry_put(entry); +} + +static long gpumem_free_entry_on_timestamp(struct kgsl_device *device, + struct kgsl_mem_entry *entry, + struct kgsl_context *context, unsigned int timestamp) +{ + int ret; + unsigned int temp; + + if (!kgsl_mem_entry_set_pend(entry)) + return -EBUSY; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &temp); + trace_kgsl_mem_timestamp_queue(device, entry, context->id, temp, + timestamp); + ret = kgsl_add_event(device, &context->events, + timestamp, gpumem_free_func, entry); + + if (ret) + kgsl_mem_entry_unset_pend(entry); + + return ret; +} + +long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_sharedmem_free *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry; + long ret; + + entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr); + if (entry == NULL) + return -EINVAL; + + ret = gpumem_free_entry(entry); + kgsl_mem_entry_put(entry); + + return ret; +} + +long kgsl_ioctl_gpumem_free_id(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpumem_free_id *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry; + long ret; + + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) + return -EINVAL; + + ret = gpumem_free_entry(entry); + kgsl_mem_entry_put(entry); + + return ret; +} + +static long gpuobj_free_on_timestamp(struct kgsl_device_private *dev_priv, + struct kgsl_mem_entry *entry, struct kgsl_gpuobj_free *param) +{ + struct kgsl_gpu_event_timestamp event; + struct kgsl_context *context; + long ret; + + if (copy_struct_from_user(&event, sizeof(event), + u64_to_user_ptr(param->priv), param->len)) + return -EFAULT; + + if (event.context_id == 0) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, event.context_id); + if (context == NULL) + return -EINVAL; + + ret = gpumem_free_entry_on_timestamp(dev_priv->device, entry, context, + event.timestamp); + + kgsl_context_put(context); + return ret; +} + +static bool gpuobj_free_fence_func(void *priv) +{ + struct kgsl_mem_entry *entry = priv; + + trace_kgsl_mem_free(entry); + kgsl_memfree_add(pid_nr(entry->priv->pid), + entry->memdesc.pagetable ? + entry->memdesc.pagetable->name : 0, + entry->memdesc.gpuaddr, entry->memdesc.size, + entry->memdesc.flags); + + INIT_WORK(&entry->work, _deferred_put); + queue_work(kgsl_driver.mem_workqueue, &entry->work); + return true; +} + +static long gpuobj_free_on_fence(struct kgsl_device_private *dev_priv, + struct kgsl_mem_entry *entry, struct kgsl_gpuobj_free *param) +{ + struct kgsl_sync_fence_cb *handle; + struct kgsl_gpu_event_fence event; + + if (!kgsl_mem_entry_set_pend(entry)) + return -EBUSY; + + if (copy_struct_from_user(&event, sizeof(event), + u64_to_user_ptr(param->priv), param->len)) { + kgsl_mem_entry_unset_pend(entry); + return -EFAULT; + } + + if (event.fd < 0) { + kgsl_mem_entry_unset_pend(entry); + return -EINVAL; + } + + handle = kgsl_sync_fence_async_wait(event.fd, + gpuobj_free_fence_func, entry, NULL); + + if (IS_ERR(handle)) { + kgsl_mem_entry_unset_pend(entry); + return PTR_ERR(handle); + } + + /* if handle is NULL the fence has already signaled */ + if (handle == NULL) + gpuobj_free_fence_func(entry); + + return 0; +} + +long kgsl_ioctl_gpuobj_free(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpuobj_free *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry; + long ret; + + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) + return -EINVAL; + + /* If no event is specified then free immediately */ + if (!(param->flags & KGSL_GPUOBJ_FREE_ON_EVENT)) + ret = gpumem_free_entry(entry); + else if (param->type == KGSL_GPU_EVENT_TIMESTAMP) + ret = gpuobj_free_on_timestamp(dev_priv, entry, param); + else if (param->type == KGSL_GPU_EVENT_FENCE) + ret = gpuobj_free_on_fence(dev_priv, entry, param); + else + ret = -EINVAL; + + kgsl_mem_entry_put(entry); + return ret; +} + +long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid( + struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_cmdstream_freememontimestamp_ctxtid *param = data; + struct kgsl_context *context = NULL; + struct kgsl_mem_entry *entry; + long ret = -EINVAL; + + if (param->type != KGSL_TIMESTAMP_RETIRED) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (context == NULL) + return -EINVAL; + + entry = kgsl_sharedmem_find(dev_priv->process_priv, + (uint64_t) param->gpuaddr); + if (entry == NULL) { + kgsl_context_put(context); + return -EINVAL; + } + + ret = gpumem_free_entry_on_timestamp(dev_priv->device, entry, + context, param->timestamp); + + kgsl_mem_entry_put(entry); + kgsl_context_put(context); + + return ret; +} + +static int check_vma_flags(struct vm_area_struct *vma, + unsigned int flags) +{ + unsigned long flags_requested = (VM_READ | VM_WRITE); + + if (flags & KGSL_MEMFLAGS_GPUREADONLY) + flags_requested &= ~(unsigned long)VM_WRITE; + + if ((vma->vm_flags & flags_requested) == flags_requested) + return 0; + + return -EFAULT; +} + +static int check_vma(unsigned long hostptr, u64 size) +{ + struct vm_area_struct *vma; + unsigned long cur = hostptr; + + while (cur < (hostptr + size)) { + vma = find_vma(current->mm, cur); + if (!vma) + return false; + + /* Don't remap memory that we already own */ + if (vma->vm_file && vma->vm_ops == &kgsl_gpumem_vm_ops) + return false; + + cur = vma->vm_end; + } + + return true; +} + +static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, unsigned long useraddr) +{ + int ret = 0; + long npages = 0, i; + size_t sglen = (size_t) (memdesc->size / PAGE_SIZE); + struct page **pages = NULL; + int write = ((memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY) ? 0 : + FOLL_WRITE); + + if (sglen == 0 || sglen >= LONG_MAX) + return -EINVAL; + + pages = kvcalloc(sglen, sizeof(*pages), GFP_KERNEL); + if (pages == NULL) + return -ENOMEM; + + memdesc->sgt = kmalloc(sizeof(*memdesc->sgt), GFP_KERNEL); + if (memdesc->sgt == NULL) { + ret = -ENOMEM; + goto out; + } + + mmap_read_lock(current->mm); + if (!check_vma(useraddr, memdesc->size)) { + mmap_read_unlock(current->mm); + ret = -EFAULT; + goto out; + } + + npages = get_user_pages(useraddr, sglen, write, pages, NULL); + mmap_read_unlock(current->mm); + + ret = (npages < 0) ? (int)npages : 0; + if (ret) + goto out; + + if ((unsigned long) npages != sglen) { + ret = -EINVAL; + goto out; + } + + ret = sg_alloc_table_from_pages(memdesc->sgt, pages, npages, + 0, memdesc->size, GFP_KERNEL); +out: + if (ret) { + for (i = 0; i < npages; i++) + put_page(pages[i]); + + kfree(memdesc->sgt); + memdesc->sgt = NULL; + } + kvfree(pages); + return ret; +} + +static const struct kgsl_memdesc_ops kgsl_usermem_ops = { + .free = kgsl_destroy_anon, + .put_gpuaddr = kgsl_unmap_and_put_gpuaddr, +}; + +static int kgsl_setup_anon_useraddr(struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, unsigned long hostptr, + size_t offset, size_t size) +{ + /* Map an anonymous memory chunk */ + + int ret; + + if (size == 0 || offset != 0 || + !IS_ALIGNED(size, PAGE_SIZE)) + return -EINVAL; + + entry->memdesc.pagetable = pagetable; + entry->memdesc.size = (uint64_t) size; + entry->memdesc.flags |= (uint64_t)KGSL_MEMFLAGS_USERMEM_ADDR; + entry->memdesc.ops = &kgsl_usermem_ops; + + if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) { + + /* Register the address in the database */ + ret = kgsl_mmu_set_svm_region(pagetable, + (uint64_t) hostptr, (uint64_t) size); + + if (ret) + return ret; + + entry->memdesc.gpuaddr = (uint64_t) hostptr; + } + + ret = memdesc_sg_virt(&entry->memdesc, hostptr); + + if (ret && kgsl_memdesc_use_cpu_map(&entry->memdesc)) + kgsl_mmu_put_gpuaddr(pagetable, &entry->memdesc); + + return ret; +} + +#ifdef CONFIG_DMA_SHARED_BUFFER +static void _setup_cache_mode(struct kgsl_mem_entry *entry, + struct vm_area_struct *vma) +{ + uint64_t mode; + pgprot_t pgprot = vma->vm_page_prot; + + if ((pgprot_val(pgprot) == pgprot_val(pgprot_noncached(pgprot))) || + (pgprot_val(pgprot) == pgprot_val(pgprot_writecombine(pgprot)))) + mode = KGSL_CACHEMODE_WRITECOMBINE; + else + mode = KGSL_CACHEMODE_WRITEBACK; + + entry->memdesc.flags |= FIELD_PREP(KGSL_CACHEMODE_MASK, mode); +} + +static int kgsl_setup_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct dma_buf *dmabuf); + +static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, unsigned long hostptr) +{ + struct vm_area_struct *vma; + struct dma_buf *dmabuf = NULL; + int ret; + + /* + * Find the VMA containing this pointer and figure out if it + * is a dma-buf. + */ + mmap_read_lock(current->mm); + vma = find_vma(current->mm, hostptr); + + if (vma && vma->vm_file) { + ret = check_vma_flags(vma, entry->memdesc.flags); + if (ret) { + mmap_read_unlock(current->mm); + return ret; + } + + /* + * Check to see that this isn't our own memory that we have + * already mapped + */ + if (vma->vm_ops == &kgsl_gpumem_vm_ops) { + mmap_read_unlock(current->mm); + return -EFAULT; + } + + if (!is_dma_buf_file(vma->vm_file)) { + mmap_read_unlock(current->mm); + return -ENODEV; + } + + /* Take a refcount because dma_buf_put() decrements the refcount */ + get_file(vma->vm_file); + + dmabuf = vma->vm_file->private_data; + } + + if (!dmabuf) { + mmap_read_unlock(current->mm); + return -ENODEV; + } + + ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf); + if (ret) { + dma_buf_put(dmabuf); + mmap_read_unlock(current->mm); + return ret; + } + + /* Setup the cache mode for cache operations */ + _setup_cache_mode(entry, vma); + + if (kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT) && + (IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT) && + kgsl_cachemode_is_cached(entry->memdesc.flags))) + entry->memdesc.flags |= KGSL_MEMFLAGS_IOCOHERENT; + else + entry->memdesc.flags &= ~((u64) KGSL_MEMFLAGS_IOCOHERENT); + + mmap_read_unlock(current->mm); + return 0; +} +#else +static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, unsigned long hostptr) +{ + return -ENODEV; +} +#endif + +static int kgsl_setup_useraddr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + unsigned long hostptr, size_t offset, size_t size) +{ + int ret; + + if (hostptr == 0 || !IS_ALIGNED(hostptr, PAGE_SIZE)) + return -EINVAL; + + /* Try to set up a dmabuf - if it returns -ENODEV assume anonymous */ + ret = kgsl_setup_dmabuf_useraddr(device, pagetable, entry, hostptr); + if (ret != -ENODEV) + return ret; + + /* Okay - lets go legacy */ + return kgsl_setup_anon_useraddr(pagetable, entry, + hostptr, offset, size); +} + +static long _gpuobj_map_useraddr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct kgsl_gpuobj_import *param) +{ + struct kgsl_gpuobj_import_useraddr useraddr; + + param->flags &= KGSL_MEMFLAGS_GPUREADONLY + | KGSL_CACHEMODE_MASK + | KGSL_MEMFLAGS_USE_CPU_MAP + | KGSL_MEMTYPE_MASK + | KGSL_MEMFLAGS_FORCE_32BIT + | KGSL_MEMFLAGS_IOCOHERENT; + + /* Specifying SECURE is an explicit error */ + if (param->flags & KGSL_MEMFLAGS_SECURE) + return -ENOTSUPP; + + kgsl_memdesc_init(device, &entry->memdesc, param->flags); + + if (copy_from_user(&useraddr, + u64_to_user_ptr(param->priv), sizeof(useraddr))) + return -EINVAL; + + /* Verify that the virtaddr and len are within bounds */ + if (useraddr.virtaddr > ULONG_MAX) + return -EINVAL; + + return kgsl_setup_useraddr(device, pagetable, entry, + (unsigned long) useraddr.virtaddr, 0, param->priv_len); +} + +static bool check_and_warn_secured(struct kgsl_device *device) +{ + if (kgsl_mmu_is_secured(&device->mmu)) + return true; + + dev_WARN_ONCE(device->dev, 1, "Secure buffers are not supported\n"); + return false; +} + +#ifdef CONFIG_DMA_SHARED_BUFFER +static long _gpuobj_map_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct kgsl_gpuobj_import *param, + int *fd) +{ + bool iocoherent = (param->flags & KGSL_MEMFLAGS_IOCOHERENT); + struct kgsl_gpuobj_import_dma_buf buf; + struct dma_buf *dmabuf; + int ret; + + param->flags &= KGSL_MEMFLAGS_GPUREADONLY | + KGSL_MEMTYPE_MASK | + KGSL_MEMALIGN_MASK | + KGSL_MEMFLAGS_SECURE | + KGSL_MEMFLAGS_FORCE_32BIT | + KGSL_MEMFLAGS_GUARD_PAGE; + + kgsl_memdesc_init(device, &entry->memdesc, param->flags); + + /* + * If content protection is not enabled and secure buffer + * is requested to be mapped return error. + */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) { + if (!check_and_warn_secured(device)) + return -ENOTSUPP; + + entry->memdesc.priv |= KGSL_MEMDESC_SECURE; + } + + if (copy_struct_from_user(&buf, sizeof(buf), + u64_to_user_ptr(param->priv), param->priv_len)) + return -EFAULT; + + if (buf.fd < 0) + return -EINVAL; + + *fd = buf.fd; + dmabuf = dma_buf_get(buf.fd); + + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + /* + * DMA BUFS are always cached so make sure that is reflected in + * the memdesc. + */ + entry->memdesc.flags |= + FIELD_PREP(KGSL_CACHEMODE_MASK, KGSL_CACHEMODE_WRITEBACK); + + /* + * Enable I/O coherency if it is 1) a thing, and either + * 2) enabled by default or 3) enabled by the caller + */ + if (kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT) && + (IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT) || + iocoherent)) + entry->memdesc.flags |= KGSL_MEMFLAGS_IOCOHERENT; + + ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf); + if (ret) + dma_buf_put(dmabuf); + + return ret; +} +#else +static long _gpuobj_map_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct kgsl_gpuobj_import *param, + int *fd) +{ + return -EINVAL; +} +#endif + +static void kgsl_process_add_stats(struct kgsl_process_private *priv, + unsigned int type, uint64_t size) +{ + u64 ret = atomic64_add_return(size, &priv->stats[type].cur); + + if (ret > priv->stats[type].max) + priv->stats[type].max = ret; +} + + + +long kgsl_ioctl_gpuobj_import(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_device *device = dev_priv->device; + struct kgsl_gpuobj_import *param = data; + struct kgsl_mem_entry *entry; + int ret, fd = -1; + + if (param->type != KGSL_USER_MEM_TYPE_ADDR && + param->type != KGSL_USER_MEM_TYPE_DMABUF) + return -ENOTSUPP; + + if (param->flags & KGSL_MEMFLAGS_VBO) + return -EINVAL; + + entry = kgsl_mem_entry_create(); + if (entry == NULL) + return -ENOMEM; + + if (param->type == KGSL_USER_MEM_TYPE_ADDR) + ret = _gpuobj_map_useraddr(device, private->pagetable, + entry, param); + else + ret = _gpuobj_map_dma_buf(device, private->pagetable, + entry, param, &fd); + + if (ret) + goto out; + + if (entry->memdesc.size >= SZ_1M) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_1M)); + else if (entry->memdesc.size >= SZ_64K) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_64K)); + + param->flags = entry->memdesc.flags; + + ret = kgsl_mem_entry_attach_and_map(device, private, entry); + if (ret) + goto unmap; + + param->id = entry->id; + + KGSL_STATS_ADD(entry->memdesc.size, &kgsl_driver.stats.mapped, + &kgsl_driver.stats.mapped_max); + + kgsl_process_add_stats(private, + kgsl_memdesc_usermem_type(&entry->memdesc), + entry->memdesc.size); + + trace_kgsl_mem_map(entry, fd); + + kgsl_mem_entry_commit_process(entry); + + /* Put the extra ref from kgsl_mem_entry_create() */ + kgsl_mem_entry_put(entry); + + return 0; + +unmap: + kgsl_sharedmem_free(&entry->memdesc); + +out: + kfree(entry); + return ret; +} + +static long _map_usermem_addr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, struct kgsl_mem_entry *entry, + unsigned long hostptr, size_t offset, size_t size) +{ + if (!kgsl_mmu_has_feature(device, KGSL_MMU_PAGED)) + return -EINVAL; + + /* No CPU mapped buffer could ever be secure */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) + return -EINVAL; + + return kgsl_setup_useraddr(device, pagetable, entry, hostptr, + offset, size); +} + +#ifdef CONFIG_DMA_SHARED_BUFFER +static int _map_usermem_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + unsigned int fd) +{ + int ret; + struct dma_buf *dmabuf; + + /* + * If content protection is not enabled and secure buffer + * is requested to be mapped return error. + */ + + if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) { + if (!check_and_warn_secured(device)) + return -EOPNOTSUPP; + + entry->memdesc.priv |= KGSL_MEMDESC_SECURE; + } + + dmabuf = dma_buf_get(fd); + if (IS_ERR_OR_NULL(dmabuf)) { + ret = PTR_ERR(dmabuf); + return ret ? ret : -EINVAL; + } + ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf); + if (ret) + dma_buf_put(dmabuf); + return ret; +} +#else +static int _map_usermem_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + unsigned int fd) +{ + return -EINVAL; +} +#endif + +#ifdef CONFIG_DMA_SHARED_BUFFER +static int kgsl_setup_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct dma_buf *dmabuf) +{ + int ret = 0; + struct scatterlist *s; + struct sg_table *sg_table; + struct dma_buf_attachment *attach = NULL; + struct kgsl_dma_buf_meta *meta; + + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; + + attach = dma_buf_attach(dmabuf, device->dev); + + if (IS_ERR(attach)) { + ret = PTR_ERR(attach); + goto out; + } + + /* + * If dma buffer is marked IO coherent, skip sync at attach, + * which involves flushing the buffer on CPU. + * HW manages coherency for IO coherent buffers. + */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_IOCOHERENT) + attach->dma_map_attrs |= DMA_ATTR_SKIP_CPU_SYNC; + + meta->dmabuf = dmabuf; + meta->attach = attach; + meta->entry = entry; + + entry->priv_data = meta; + entry->memdesc.pagetable = pagetable; + entry->memdesc.size = 0; + entry->memdesc.ops = &kgsl_dmabuf_ops; + /* USE_CPU_MAP is not impemented for ION. */ + entry->memdesc.flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); + entry->memdesc.flags |= (uint64_t)KGSL_MEMFLAGS_USERMEM_ION; + + sg_table = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); + + if (IS_ERR_OR_NULL(sg_table)) { + ret = PTR_ERR(sg_table); + goto out; + } + + dma_buf_unmap_attachment(attach, sg_table, DMA_BIDIRECTIONAL); + + meta->table = sg_table; + entry->priv_data = meta; + entry->memdesc.sgt = sg_table; + + if (entry->memdesc.priv & KGSL_MEMDESC_SECURE) { + uint32_t *vmid_list = NULL, *perms_list = NULL; + uint32_t nelems = 0; + int i; + + if (mem_buf_dma_buf_exclusive_owner(dmabuf)) { + ret = -EPERM; + goto out; + } + + ret = mem_buf_dma_buf_copy_vmperm(dmabuf, (int **)&vmid_list, + (int **)&perms_list, (int *)&nelems); + if (ret) { + ret = 0; + dev_info(device->dev, "Skipped access check\n"); + goto skip_access_check; + } + + /* Check if secure buffer is accessible to CP_PIXEL */ + for (i = 0; i < nelems; i++) { + if (vmid_list[i] == QCOM_DMA_HEAP_FLAG_CP_PIXEL) + break; + } + + kfree(vmid_list); + kfree(perms_list); + + if (i == nelems) { + /* + * Secure buffer is not accessible to CP_PIXEL, there is no point + * in importing this buffer. + */ + ret = -EPERM; + goto out; + } + } + +skip_access_check: + /* Calculate the size of the memdesc from the sglist */ + for (s = entry->memdesc.sgt->sgl; s != NULL; s = sg_next(s)) + entry->memdesc.size += (uint64_t) s->length; + + if (!entry->memdesc.size) { + ret = -EINVAL; + goto out; + } + + add_dmabuf_list(meta); + entry->memdesc.size = PAGE_ALIGN(entry->memdesc.size); + +out: + if (ret) { + if (!IS_ERR_OR_NULL(attach)) + dma_buf_detach(dmabuf, attach); + + kfree(meta); + } + + return ret; +} +#endif + +#ifdef CONFIG_DMA_SHARED_BUFFER +void kgsl_get_egl_counts(struct kgsl_mem_entry *entry, + int *egl_surface_count, int *egl_image_count) +{ + struct kgsl_dma_buf_meta *meta = entry->priv_data; + struct dmabuf_list_entry *dle = meta->dle; + struct kgsl_dma_buf_meta *scan_meta; + struct kgsl_mem_entry *scan_mem_entry; + + if (!dle) + return; + + spin_lock(&kgsl_dmabuf_lock); + list_for_each_entry(scan_meta, &dle->dmabuf_list, node) { + scan_mem_entry = scan_meta->entry; + + switch (kgsl_memdesc_get_memtype(&scan_mem_entry->memdesc)) { + case KGSL_MEMTYPE_EGL_SURFACE: + (*egl_surface_count)++; + break; + case KGSL_MEMTYPE_EGL_IMAGE: + (*egl_image_count)++; + break; + } + } + spin_unlock(&kgsl_dmabuf_lock); +} + +unsigned long kgsl_get_dmabuf_inode_number(struct kgsl_mem_entry *entry) +{ + struct kgsl_dma_buf_meta *meta = entry->priv_data; + + return meta ? file_inode(meta->dmabuf->file)->i_ino : 0; +} +#else +void kgsl_get_egl_counts(struct kgsl_mem_entry *entry, + int *egl_surface_count, int *egl_image_count) +{ +} + +unsigned long kgsl_get_dmabuf_inode_number(struct kgsl_mem_entry *entry) +{ +} +#endif + +long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = -EINVAL; + struct kgsl_map_user_mem *param = data; + struct kgsl_mem_entry *entry = NULL; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_device *device = dev_priv->device; + unsigned int memtype; + uint64_t flags; + + /* + * If content protection is not enabled and secure buffer + * is requested to be mapped return error. + */ + + if (param->flags & KGSL_MEMFLAGS_SECURE) { + if (!check_and_warn_secured(device)) + return -EOPNOTSUPP; + + /* + * On 64 bit kernel, secure memory region is expanded and + * moved to 64 bit address, 32 bit apps can not access it from + * this IOCTL. + */ + if (is_compat_task() && + test_bit(KGSL_MMU_64BIT, &device->mmu.features)) + return -EOPNOTSUPP; + + /* Can't use CPU map with secure buffers */ + if (param->flags & KGSL_MEMFLAGS_USE_CPU_MAP) + return -EINVAL; + } + + entry = kgsl_mem_entry_create(); + + if (entry == NULL) + return -ENOMEM; + + /* + * Convert from enum value to KGSL_MEM_ENTRY value, so that + * we can use the latter consistently everywhere. + */ + memtype = param->memtype + 1; + + /* + * Mask off unknown flags from userspace. This way the caller can + * check if a flag is supported by looking at the returned flags. + * Note: CACHEMODE is ignored for this call. Caching should be + * determined by type of allocation being mapped. + */ + flags = param->flags & (KGSL_MEMFLAGS_GPUREADONLY + | KGSL_MEMTYPE_MASK + | KGSL_MEMALIGN_MASK + | KGSL_MEMFLAGS_USE_CPU_MAP + | KGSL_MEMFLAGS_SECURE + | KGSL_MEMFLAGS_IOCOHERENT); + + if (is_compat_task()) + flags |= KGSL_MEMFLAGS_FORCE_32BIT; + + kgsl_memdesc_init(device, &entry->memdesc, flags); + + switch (memtype) { + case KGSL_MEM_ENTRY_USER: + result = _map_usermem_addr(device, private->pagetable, + entry, param->hostptr, param->offset, param->len); + break; + case KGSL_MEM_ENTRY_ION: + if (param->offset != 0) + result = -EINVAL; + else + result = _map_usermem_dma_buf(device, + private->pagetable, entry, param->fd); + break; + default: + result = -EOPNOTSUPP; + break; + } + + if (result) + goto error; + + if (entry->memdesc.size >= SZ_2M) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_2M)); + else if (entry->memdesc.size >= SZ_1M) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_1M)); + else if (entry->memdesc.size >= SZ_64K) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_64)); + + /* echo back flags */ + param->flags = (unsigned int) entry->memdesc.flags; + + result = kgsl_mem_entry_attach_and_map(device, private, + entry); + if (result) + goto error_attach; + + /* Adjust the returned value for a non 4k aligned offset */ + param->gpuaddr = (unsigned long) + entry->memdesc.gpuaddr + (param->offset & PAGE_MASK); + + KGSL_STATS_ADD(param->len, &kgsl_driver.stats.mapped, + &kgsl_driver.stats.mapped_max); + + kgsl_process_add_stats(private, + kgsl_memdesc_usermem_type(&entry->memdesc), param->len); + + trace_kgsl_mem_map(entry, param->fd); + + kgsl_mem_entry_commit_process(entry); + + /* Put the extra ref from kgsl_mem_entry_create() */ + kgsl_mem_entry_put(entry); + + return result; + +error_attach: + kgsl_sharedmem_free(&entry->memdesc); +error: + /* Clear gpuaddr here so userspace doesn't get any wrong ideas */ + param->gpuaddr = 0; + + kfree(entry); + return result; +} + +static int _kgsl_gpumem_sync_cache(struct kgsl_mem_entry *entry, + uint64_t offset, uint64_t length, unsigned int op) +{ + int ret = 0; + int cacheop; + + if (!entry) + return 0; + + /* Cache ops are not allowed on secure memory */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) + return 0; + + /* + * Flush is defined as (clean | invalidate). If both bits are set, then + * do a flush, otherwise check for the individual bits and clean or inv + * as requested + */ + + if ((op & KGSL_GPUMEM_CACHE_FLUSH) == KGSL_GPUMEM_CACHE_FLUSH) + cacheop = KGSL_CACHE_OP_FLUSH; + else if (op & KGSL_GPUMEM_CACHE_CLEAN) + cacheop = KGSL_CACHE_OP_CLEAN; + else if (op & KGSL_GPUMEM_CACHE_INV) + cacheop = KGSL_CACHE_OP_INV; + else { + ret = -EINVAL; + goto done; + } + + if (!(op & KGSL_GPUMEM_CACHE_RANGE)) { + offset = 0; + length = entry->memdesc.size; + } + + if (kgsl_cachemode_is_cached(entry->memdesc.flags)) { + trace_kgsl_mem_sync_cache(entry, offset, length, op); + ret = kgsl_cache_range_op(&entry->memdesc, offset, + length, cacheop); + } + +done: + return ret; +} + +/* New cache sync function - supports both directions (clean and invalidate) */ + +long kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpumem_sync_cache *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry = NULL; + long ret; + + if (param->id != 0) + entry = kgsl_sharedmem_find_id(private, param->id); + else if (param->gpuaddr != 0) + entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr); + + if (entry == NULL) + return -EINVAL; + + ret = _kgsl_gpumem_sync_cache(entry, (uint64_t) param->offset, + (uint64_t) param->length, param->op); + kgsl_mem_entry_put(entry); + return ret; +} + +static int mem_id_cmp(const void *_a, const void *_b) +{ + const unsigned int *a = _a, *b = _b; + + if (*a == *b) + return 0; + return (*a > *b) ? 1 : -1; +} + +#ifdef CONFIG_ARM64 +/* Do not support full flush on ARM64 targets */ +static inline bool check_full_flush(size_t size, int op) +{ + return false; +} +#else +/* Support full flush if the size is bigger than the threshold */ +static inline bool check_full_flush(size_t size, int op) +{ + /* If we exceed the breakeven point, flush the entire cache */ + bool ret = (kgsl_driver.full_cache_threshold != 0) && + (size >= kgsl_driver.full_cache_threshold) && + (op == KGSL_GPUMEM_CACHE_FLUSH); + if (ret) + flush_cache_all(); + return ret; +} +#endif + +long kgsl_ioctl_gpumem_sync_cache_bulk(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int i; + struct kgsl_gpumem_sync_cache_bulk *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + unsigned int id, last_id = 0, *id_list = NULL, actual_count = 0; + struct kgsl_mem_entry **entries = NULL; + long ret = 0; + uint64_t op_size = 0; + bool full_flush = false; + + if (param->id_list == NULL || param->count == 0 + || param->count > (PAGE_SIZE / sizeof(unsigned int))) + return -EINVAL; + + id_list = kcalloc(param->count, sizeof(unsigned int), GFP_KERNEL); + if (id_list == NULL) + return -ENOMEM; + + entries = kcalloc(param->count, sizeof(*entries), GFP_KERNEL); + if (entries == NULL) { + ret = -ENOMEM; + goto end; + } + + if (copy_from_user(id_list, param->id_list, + param->count * sizeof(unsigned int))) { + ret = -EFAULT; + goto end; + } + /* sort the ids so we can weed out duplicates */ + sort(id_list, param->count, sizeof(*id_list), mem_id_cmp, NULL); + + for (i = 0; i < param->count; i++) { + unsigned int cachemode; + struct kgsl_mem_entry *entry = NULL; + + id = id_list[i]; + /* skip 0 ids or duplicates */ + if (id == last_id) + continue; + + entry = kgsl_sharedmem_find_id(private, id); + if (entry == NULL) + continue; + + /* skip uncached memory */ + cachemode = kgsl_memdesc_get_cachemode(&entry->memdesc); + if (cachemode != KGSL_CACHEMODE_WRITETHROUGH && + cachemode != KGSL_CACHEMODE_WRITEBACK) { + kgsl_mem_entry_put(entry); + continue; + } + + op_size += entry->memdesc.size; + entries[actual_count++] = entry; + + full_flush = check_full_flush(op_size, param->op); + if (full_flush) { + trace_kgsl_mem_sync_full_cache(actual_count, op_size); + break; + } + + last_id = id; + } + + param->op &= ~KGSL_GPUMEM_CACHE_RANGE; + + for (i = 0; i < actual_count; i++) { + if (!full_flush) + _kgsl_gpumem_sync_cache(entries[i], 0, + entries[i]->memdesc.size, + param->op); + kgsl_mem_entry_put(entries[i]); + } +end: + kfree(entries); + kfree(id_list); + return ret; +} + +/* Legacy cache function, does a flush (clean + invalidate) */ + +long kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_sharedmem_free *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry = NULL; + long ret; + + entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr); + if (entry == NULL) + return -EINVAL; + + ret = _kgsl_gpumem_sync_cache(entry, 0, entry->memdesc.size, + KGSL_GPUMEM_CACHE_FLUSH); + kgsl_mem_entry_put(entry); + return ret; +} + +long kgsl_ioctl_gpuobj_sync(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpuobj_sync *param = data; + struct kgsl_gpuobj_sync_obj *objs; + struct kgsl_mem_entry **entries; + long ret = 0; + uint64_t size = 0; + int i; + void __user *ptr; + + if (param->count == 0 || param->count > 128) + return -EINVAL; + + objs = kcalloc(param->count, sizeof(*objs), GFP_KERNEL); + if (objs == NULL) + return -ENOMEM; + + entries = kcalloc(param->count, sizeof(*entries), GFP_KERNEL); + if (entries == NULL) { + kfree(objs); + return -ENOMEM; + } + + ptr = u64_to_user_ptr(param->objs); + + for (i = 0; i < param->count; i++) { + ret = copy_struct_from_user(&objs[i], sizeof(*objs), ptr, + param->obj_len); + if (ret) + goto out; + + entries[i] = kgsl_sharedmem_find_id(private, objs[i].id); + + /* Not finding the ID is not a fatal failure - just skip it */ + if (entries[i] == NULL) + continue; + + if (!(objs[i].op & KGSL_GPUMEM_CACHE_RANGE)) + size += entries[i]->memdesc.size; + else if (objs[i].offset < entries[i]->memdesc.size) + size += (entries[i]->memdesc.size - objs[i].offset); + + if (check_full_flush(size, objs[i].op)) { + trace_kgsl_mem_sync_full_cache(i, size); + goto out; + } + + ptr += sizeof(*objs); + } + + for (i = 0; !ret && i < param->count; i++) + ret = _kgsl_gpumem_sync_cache(entries[i], + objs[i].offset, objs[i].length, objs[i].op); + +out: + for (i = 0; i < param->count; i++) + kgsl_mem_entry_put(entries[i]); + + kfree(entries); + kfree(objs); + + return ret; +} + +#ifdef CONFIG_ARM64 +static uint64_t kgsl_filter_cachemode(uint64_t flags) +{ + /* + * WRITETHROUGH is not supported in arm64, so we tell the user that we + * use WRITEBACK which is the default caching policy. + */ + if (FIELD_GET(KGSL_CACHEMODE_MASK, flags) == KGSL_CACHEMODE_WRITETHROUGH) { + flags &= ~((uint64_t) KGSL_CACHEMODE_MASK); + flags |= FIELD_PREP(KGSL_CACHEMODE_MASK, KGSL_CACHEMODE_WRITEBACK); + } + return flags; +} +#else +static uint64_t kgsl_filter_cachemode(uint64_t flags) +{ + return flags; +} +#endif + +/* The largest allowable alignment for a GPU object is 32MB */ +#define KGSL_MAX_ALIGN (32 * SZ_1M) + +static u64 cap_alignment(struct kgsl_device *device, u64 flags) +{ + u32 align = FIELD_GET(KGSL_MEMALIGN_MASK, flags); + + if (align >= ilog2(KGSL_MAX_ALIGN)) { + /* Cap the alignment bits to the highest number we can handle */ + dev_err(device->dev, + "Alignment too large; restricting to %dK\n", + KGSL_MAX_ALIGN >> 10); + align = ilog2(KGSL_MAX_ALIGN); + } + + flags &= ~((u64) KGSL_MEMALIGN_MASK); + return flags | FIELD_PREP(KGSL_MEMALIGN_MASK, align); +} + +static struct kgsl_mem_entry * +gpumem_alloc_vbo_entry(struct kgsl_device_private *dev_priv, + u64 size, u64 flags) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_device *device = dev_priv->device; + struct kgsl_memdesc *memdesc; + struct kgsl_mem_entry *entry; + int ret; + + /* Disallow specific flags */ + if (flags & (KGSL_MEMFLAGS_GPUREADONLY | KGSL_CACHEMODE_MASK)) + return ERR_PTR(-EINVAL); + + if (flags & (KGSL_MEMFLAGS_USE_CPU_MAP | KGSL_MEMFLAGS_IOCOHERENT)) + return ERR_PTR(-EINVAL); + + /* Quietly ignore the other flags that aren't this list */ + flags &= KGSL_MEMFLAGS_SECURE | + KGSL_MEMFLAGS_VBO | + KGSL_MEMTYPE_MASK | + KGSL_MEMALIGN_MASK | + KGSL_MEMFLAGS_FORCE_32BIT; + + if ((flags & KGSL_MEMFLAGS_SECURE) && !check_and_warn_secured(device)) + return ERR_PTR(-EOPNOTSUPP); + + flags = cap_alignment(device, flags); + + entry = kgsl_mem_entry_create(); + if (!entry) + return ERR_PTR(-ENOMEM); + + memdesc = &entry->memdesc; + + ret = kgsl_sharedmem_allocate_vbo(device, memdesc, size, flags); + if (ret) { + kfree(entry); + return ERR_PTR(ret); + } + + if (flags & KGSL_MEMFLAGS_SECURE) + entry->memdesc.priv |= KGSL_MEMDESC_SECURE; + + ret = kgsl_mem_entry_attach_to_process(device, private, entry); + if (ret) + goto out; + + ret = kgsl_mmu_map_zero_page_to_range(memdesc->pagetable, + memdesc, 0, memdesc->size); + if (!ret) { + trace_kgsl_mem_alloc(entry); + kgsl_mem_entry_commit_process(entry); + return entry; + } + +out: + kgsl_sharedmem_free(memdesc); + kfree(entry); + return ERR_PTR(ret); +} + +struct kgsl_mem_entry *gpumem_alloc_entry( + struct kgsl_device_private *dev_priv, + uint64_t size, uint64_t flags) +{ + int ret; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry; + struct kgsl_device *device = dev_priv->device; + u32 cachemode; + + /* For 32-bit kernel world nothing to do with this flag */ + if (BITS_PER_LONG == 32) + flags &= ~((uint64_t) KGSL_MEMFLAGS_FORCE_32BIT); + + if (flags & KGSL_MEMFLAGS_VBO) + return gpumem_alloc_vbo_entry(dev_priv, size, flags); + + flags &= KGSL_MEMFLAGS_GPUREADONLY + | KGSL_CACHEMODE_MASK + | KGSL_MEMTYPE_MASK + | KGSL_MEMALIGN_MASK + | KGSL_MEMFLAGS_USE_CPU_MAP + | KGSL_MEMFLAGS_SECURE + | KGSL_MEMFLAGS_FORCE_32BIT + | KGSL_MEMFLAGS_IOCOHERENT + | KGSL_MEMFLAGS_GUARD_PAGE; + + /* Return not supported error if secure memory isn't enabled */ + if ((flags & KGSL_MEMFLAGS_SECURE) && !check_and_warn_secured(device)) + return ERR_PTR(-EOPNOTSUPP); + + flags = cap_alignment(device, flags); + + /* For now only allow allocations up to 4G */ + if (size == 0 || size > UINT_MAX) + return ERR_PTR(-EINVAL); + + flags = kgsl_filter_cachemode(flags); + + entry = kgsl_mem_entry_create(); + if (entry == NULL) + return ERR_PTR(-ENOMEM); + + if (IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT) && + kgsl_cachemode_is_cached(flags)) + flags |= KGSL_MEMFLAGS_IOCOHERENT; + + ret = kgsl_allocate_user(device, &entry->memdesc, + size, flags, 0); + if (ret != 0) + goto err; + + ret = kgsl_mem_entry_attach_and_map(device, private, entry); + if (ret != 0) { + kgsl_sharedmem_free(&entry->memdesc); + goto err; + } + + cachemode = kgsl_memdesc_get_cachemode(&entry->memdesc); + /* + * Secure buffers cannot be reclaimed. Avoid reclaim of cached buffers + * as we could get request for cache operations on these buffers when + * they are reclaimed. + */ + if (!(flags & KGSL_MEMFLAGS_SECURE) && + !(cachemode == KGSL_CACHEMODE_WRITEBACK) && + !(cachemode == KGSL_CACHEMODE_WRITETHROUGH)) + entry->memdesc.priv |= KGSL_MEMDESC_CAN_RECLAIM; + + kgsl_process_add_stats(private, + kgsl_memdesc_usermem_type(&entry->memdesc), + entry->memdesc.size); + trace_kgsl_mem_alloc(entry); + + kgsl_mem_entry_commit_process(entry); + return entry; +err: + kfree(entry); + return ERR_PTR(ret); +} + +static void copy_metadata(struct kgsl_mem_entry *entry, uint64_t metadata, + unsigned int len) +{ + unsigned int i, size; + + if (len == 0) + return; + + size = min_t(unsigned int, len, sizeof(entry->metadata) - 1); + + if (copy_from_user(entry->metadata, u64_to_user_ptr(metadata), size)) { + memset(entry->metadata, 0, sizeof(entry->metadata)); + return; + } + + /* Clean up non printable characters in the string */ + for (i = 0; i < size && entry->metadata[i] != 0; i++) { + if (!isprint(entry->metadata[i])) + entry->metadata[i] = '?'; + } +} + +long kgsl_ioctl_gpuobj_alloc(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpuobj_alloc *param = data; + struct kgsl_mem_entry *entry; + + entry = gpumem_alloc_entry(dev_priv, param->size, param->flags); + + if (IS_ERR(entry)) + return PTR_ERR(entry); + + copy_metadata(entry, param->metadata, param->metadata_len); + + param->size = entry->memdesc.size; + param->flags = entry->memdesc.flags; + param->mmapsize = kgsl_memdesc_footprint(&entry->memdesc); + param->id = entry->id; + + /* Put the extra ref from kgsl_mem_entry_create() */ + kgsl_mem_entry_put(entry); + + return 0; +} + +long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_gpumem_alloc *param = data; + struct kgsl_mem_entry *entry; + uint64_t flags = param->flags; + + /* + * On 64 bit kernel, secure memory region is expanded and + * moved to 64 bit address, 32 bit apps can not access it from + * this IOCTL. + */ + if ((param->flags & KGSL_MEMFLAGS_SECURE) && is_compat_task() + && test_bit(KGSL_MMU_64BIT, &device->mmu.features)) + return -EOPNOTSUPP; + + /* Legacy functions doesn't support these advanced features */ + flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); + + if (is_compat_task()) + flags |= KGSL_MEMFLAGS_FORCE_32BIT; + + entry = gpumem_alloc_entry(dev_priv, (uint64_t) param->size, flags); + + if (IS_ERR(entry)) + return PTR_ERR(entry); + + param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr; + param->size = (size_t) entry->memdesc.size; + param->flags = (unsigned int) entry->memdesc.flags; + + /* Put the extra ref from kgsl_mem_entry_create() */ + kgsl_mem_entry_put(entry); + + return 0; +} + +long kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_gpumem_alloc_id *param = data; + struct kgsl_mem_entry *entry; + uint64_t flags = param->flags; + + /* + * On 64 bit kernel, secure memory region is expanded and + * moved to 64 bit address, 32 bit apps can not access it from + * this IOCTL. + */ + if ((param->flags & KGSL_MEMFLAGS_SECURE) && is_compat_task() + && test_bit(KGSL_MMU_64BIT, &device->mmu.features)) + return -EOPNOTSUPP; + + if (is_compat_task()) + flags |= KGSL_MEMFLAGS_FORCE_32BIT; + + entry = gpumem_alloc_entry(dev_priv, (uint64_t) param->size, flags); + + if (IS_ERR(entry)) + return PTR_ERR(entry); + + param->id = entry->id; + param->flags = (unsigned int) entry->memdesc.flags; + param->size = (size_t) entry->memdesc.size; + param->mmapsize = (size_t) kgsl_memdesc_footprint(&entry->memdesc); + param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr; + + /* Put the extra ref from kgsl_mem_entry_create() */ + kgsl_mem_entry_put(entry); + + return 0; +} + +long kgsl_ioctl_gpumem_get_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpumem_get_info *param = data; + struct kgsl_mem_entry *entry = NULL; + int result = 0; + + if (param->id != 0) + entry = kgsl_sharedmem_find_id(private, param->id); + else if (param->gpuaddr != 0) + entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr); + + if (entry == NULL) + return -EINVAL; + + /* + * If any of the 64 bit address / sizes would end up being + * truncated, return -ERANGE. That will signal the user that they + * should use a more modern API + */ + if (entry->memdesc.gpuaddr > ULONG_MAX) + result = -ERANGE; + + param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr; + param->id = entry->id; + param->flags = (unsigned int) entry->memdesc.flags; + param->size = (size_t) entry->memdesc.size; + param->mmapsize = (size_t) kgsl_memdesc_footprint(&entry->memdesc); + /* + * Entries can have multiple user mappings so thre isn't any one address + * we can report. Plus, the user should already know their mappings, so + * there isn't any value in reporting it back to them. + */ + param->useraddr = 0; + + kgsl_mem_entry_put(entry); + return result; +} + +long kgsl_ioctl_gpuobj_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpuobj_info *param = data; + struct kgsl_mem_entry *entry; + + if (param->id == 0) + return -EINVAL; + + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) + return -EINVAL; + + param->id = entry->id; + param->gpuaddr = entry->memdesc.gpuaddr; + param->flags = entry->memdesc.flags; + param->size = entry->memdesc.size; + + /* VBOs cannot be mapped, so don't report a va_len */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_VBO) + param->va_len = 0; + else + param->va_len = kgsl_memdesc_footprint(&entry->memdesc); + + /* + * Entries can have multiple user mappings so thre isn't any one address + * we can report. Plus, the user should already know their mappings, so + * there isn't any value in reporting it back to them. + */ + param->va_addr = 0; + + kgsl_mem_entry_put(entry); + return 0; +} + +long kgsl_ioctl_gpuobj_set_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpuobj_set_info *param = data; + struct kgsl_mem_entry *entry; + int ret = 0; + + if (param->id == 0) + return -EINVAL; + + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) + return -EINVAL; + + if (param->flags & KGSL_GPUOBJ_SET_INFO_METADATA) + copy_metadata(entry, param->metadata, param->metadata_len); + + if (param->flags & KGSL_GPUOBJ_SET_INFO_TYPE) { + if (FIELD_FIT(KGSL_MEMTYPE_MASK, param->type)) { + entry->memdesc.flags &= ~((uint64_t) KGSL_MEMTYPE_MASK); + entry->memdesc.flags |= + FIELD_PREP(KGSL_MEMTYPE_MASK, param->type); + } else + ret = -EINVAL; + } + + kgsl_mem_entry_put(entry); + return ret; +} + +/** + * kgsl_ioctl_timestamp_event - Register a new timestamp event from userspace + * @dev_priv - pointer to the private device structure + * @cmd - the ioctl cmd passed from kgsl_ioctl + * @data - the user data buffer from kgsl_ioctl + * @returns 0 on success or error code on failure + */ + +long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_timestamp_event *param = data; + int ret; + + switch (param->type) { + case KGSL_TIMESTAMP_EVENT_FENCE: + ret = kgsl_add_fence_event(dev_priv->device, + param->context_id, param->timestamp, param->priv, + param->len, dev_priv); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static vm_fault_t +kgsl_memstore_vm_fault(struct vm_fault *vmf) +{ + struct kgsl_memdesc *memdesc = vmf->vma->vm_private_data; + + return memdesc->ops->vmfault(memdesc, vmf->vma, vmf); +} + +static const struct vm_operations_struct kgsl_memstore_vm_ops = { + .fault = kgsl_memstore_vm_fault, +}; + +static int +kgsl_mmap_memstore(struct file *file, struct kgsl_device *device, + struct vm_area_struct *vma) +{ + struct kgsl_memdesc *memdesc = device->memstore; + unsigned int vma_size = vma->vm_end - vma->vm_start; + + /* The memstore can only be mapped as read only */ + + if (vma->vm_flags & VM_WRITE) + return -EPERM; + + vma->vm_flags &= ~VM_MAYWRITE; + + if (memdesc->size != vma_size) { + dev_err(device->dev, "Cannot partially map the memstore\n"); + return -EINVAL; + } + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + vma->vm_private_data = memdesc; + vma->vm_flags |= memdesc->ops->vmflags; + vma->vm_ops = &kgsl_memstore_vm_ops; + vma->vm_file = file; + + return 0; +} + +/* + * kgsl_gpumem_vm_open is called whenever a vma region is copied or split. + * Increase the refcount to make sure that the accounting stays correct + */ + +static void kgsl_gpumem_vm_open(struct vm_area_struct *vma) +{ + struct kgsl_mem_entry *entry = vma->vm_private_data; + + if (!kgsl_mem_entry_get(entry)) + vma->vm_private_data = NULL; + + atomic_inc(&entry->map_count); +} + +static vm_fault_t +kgsl_gpumem_vm_fault(struct vm_fault *vmf) +{ + struct kgsl_mem_entry *entry = vmf->vma->vm_private_data; + + if (!entry) + return VM_FAULT_SIGBUS; + if (!entry->memdesc.ops || !entry->memdesc.ops->vmfault) + return VM_FAULT_SIGBUS; + + return entry->memdesc.ops->vmfault(&entry->memdesc, vmf->vma, vmf); +} + +static void +kgsl_gpumem_vm_close(struct vm_area_struct *vma) +{ + struct kgsl_mem_entry *entry = vma->vm_private_data; + + if (!entry) + return; + + /* + * Remove the memdesc from the mapped stat once all the mappings have + * gone away + */ + if (!atomic_dec_return(&entry->map_count)) + atomic64_sub(entry->memdesc.size, &entry->priv->gpumem_mapped); + + kgsl_mem_entry_put(entry); +} + +static const struct vm_operations_struct kgsl_gpumem_vm_ops = { + .open = kgsl_gpumem_vm_open, + .fault = kgsl_gpumem_vm_fault, + .close = kgsl_gpumem_vm_close, +}; + +static int +get_mmap_entry(struct kgsl_process_private *private, + struct kgsl_mem_entry **out_entry, unsigned long pgoff, + unsigned long len) +{ + int ret = 0; + struct kgsl_mem_entry *entry; + + entry = kgsl_sharedmem_find_id(private, pgoff); + if (entry == NULL) + entry = kgsl_sharedmem_find(private, pgoff << PAGE_SHIFT); + + if (!entry) + return -EINVAL; + + if (!entry->memdesc.ops || + !entry->memdesc.ops->vmflags || + !entry->memdesc.ops->vmfault) { + ret = -EINVAL; + goto err_put; + } + + /* Don't allow ourselves to remap user memory */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_USERMEM_ADDR) { + ret = -EBUSY; + goto err_put; + } + + if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) { + if (len != kgsl_memdesc_footprint(&entry->memdesc)) { + ret = -ERANGE; + goto err_put; + } + } else if (len != kgsl_memdesc_footprint(&entry->memdesc) && + len != entry->memdesc.size) { + /* + * If cpu_map != gpumap then user can map either the + * footprint or the entry size + */ + ret = -ERANGE; + goto err_put; + } + + *out_entry = entry; + return 0; +err_put: + kgsl_mem_entry_put(entry); + return ret; +} + +static unsigned long _gpu_set_svm_region(struct kgsl_process_private *private, + struct kgsl_mem_entry *entry, unsigned long addr, + unsigned long size) +{ + int ret; + + /* + * Protect access to the gpuaddr here to prevent multiple vmas from + * trying to map a SVM region at the same time + */ + spin_lock(&entry->memdesc.lock); + + if (entry->memdesc.gpuaddr) { + spin_unlock(&entry->memdesc.lock); + return (unsigned long) -EBUSY; + } + + ret = kgsl_mmu_set_svm_region(private->pagetable, (uint64_t) addr, + (uint64_t) size); + + if (ret != 0) { + spin_unlock(&entry->memdesc.lock); + return (unsigned long) ret; + } + + entry->memdesc.gpuaddr = (uint64_t) addr; + spin_unlock(&entry->memdesc.lock); + + entry->memdesc.pagetable = private->pagetable; + + ret = kgsl_mmu_map(private->pagetable, &entry->memdesc); + if (ret) { + kgsl_mmu_put_gpuaddr(private->pagetable, &entry->memdesc); + return (unsigned long) ret; + } + + kgsl_memfree_purge(private->pagetable, entry->memdesc.gpuaddr, + entry->memdesc.size); + + return addr; +} + +static unsigned long get_align(struct kgsl_mem_entry *entry) +{ + int bit = kgsl_memdesc_get_align(&entry->memdesc); + + if (bit >= ilog2(SZ_2M)) + return SZ_2M; + else if (bit >= ilog2(SZ_1M)) + return SZ_1M; + else if (bit >= ilog2(SZ_64K)) + return SZ_64K; + + return SZ_4K; +} + +static unsigned long set_svm_area(struct file *file, + struct kgsl_mem_entry *entry, + unsigned long addr, unsigned long len, + unsigned long flags) +{ + struct kgsl_device_private *dev_priv = file->private_data; + struct kgsl_process_private *private = dev_priv->process_priv; + unsigned long ret; + + /* + * Do additoinal constraints checking on the address. Passing MAP_FIXED + * ensures that the address we want gets checked + */ + ret = current->mm->get_unmapped_area(file, addr, len, 0, + flags & MAP_FIXED); + + /* If it passes, attempt to set the region in the SVM */ + if (!IS_ERR_VALUE(ret)) + return _gpu_set_svm_region(private, entry, addr, len); + + return ret; +} + +static unsigned long get_svm_unmapped_area(struct file *file, + struct kgsl_mem_entry *entry, + unsigned long addr, unsigned long len, + unsigned long flags) +{ + struct kgsl_device_private *dev_priv = file->private_data; + struct kgsl_process_private *private = dev_priv->process_priv; + unsigned long align = get_align(entry); + unsigned long ret, iova; + u64 start = 0, end = 0; + struct vm_area_struct *vma; + + if (flags & MAP_FIXED) { + /* Even fixed addresses need to obey alignment */ + if (!IS_ALIGNED(addr, align)) + return -EINVAL; + + return set_svm_area(file, entry, addr, len, flags); + } + + /* If a hint was provided, try to use that first */ + if (addr) { + if (IS_ALIGNED(addr, align)) { + ret = set_svm_area(file, entry, addr, len, flags); + if (!IS_ERR_VALUE(ret)) + return ret; + } + } + + /* Get the SVM range for the current process */ + if (kgsl_mmu_svm_range(private->pagetable, &start, &end, + entry->memdesc.flags)) + return -ERANGE; + + /* Find the first gap in the iova map */ + iova = kgsl_mmu_find_svm_region(private->pagetable, start, end, + len, align); + + while (!IS_ERR_VALUE(iova)) { + vma = find_vma_intersection(current->mm, iova, iova + len - 1); + if (vma) { + iova = vma->vm_start; + } else { + ret = set_svm_area(file, entry, iova, len, flags); + if (!IS_ERR_VALUE(ret)) + return ret; + + /* + * set_svm_area will return -EBUSY if we tried to set up + * SVM on an object that already has a GPU address. If + * that happens don't bother walking the rest of the + * region + */ + if ((long) ret == -EBUSY) + return -EBUSY; + + } + + iova = kgsl_mmu_find_svm_region(private->pagetable, + start, iova - 1, len, align); + } + + return -ENOMEM; +} + +static unsigned long +kgsl_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + unsigned long val; + unsigned long vma_offset = pgoff << PAGE_SHIFT; + struct kgsl_device_private *dev_priv = file->private_data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_device *device = dev_priv->device; + struct kgsl_mem_entry *entry = NULL; + + if (vma_offset == (unsigned long) KGSL_MEMSTORE_TOKEN_ADDRESS) + return get_unmapped_area(NULL, addr, len, pgoff, flags); + + val = get_mmap_entry(private, &entry, pgoff, len); + if (val) + return val; + + /* Do not allow CPU mappings for secure buffers */ + if (kgsl_memdesc_is_secured(&entry->memdesc)) { + kgsl_mem_entry_put(entry); + return (unsigned long) -EPERM; + } + + if (!kgsl_memdesc_use_cpu_map(&entry->memdesc)) { + val = current->mm->get_unmapped_area(file, addr, len, 0, flags); + if (IS_ERR_VALUE(val)) + dev_err_ratelimited(device->dev, + "get_unmapped_area: pid %d addr %lx pgoff %lx len %ld failed error %d\n", + pid_nr(private->pid), addr, pgoff, len, + (int) val); + } else { + val = get_svm_unmapped_area(file, entry, addr, len, flags); + if (IS_ERR_VALUE(val)) + dev_err_ratelimited(device->dev, + "_get_svm_area: pid %d addr %lx pgoff %lx len %ld failed error %d\n", + pid_nr(private->pid), addr, pgoff, len, + (int) val); + } + + kgsl_mem_entry_put(entry); + return val; +} + +static int kgsl_mmap(struct file *file, struct vm_area_struct *vma) +{ + unsigned int cache; + unsigned long vma_offset = vma->vm_pgoff << PAGE_SHIFT; + struct kgsl_device_private *dev_priv = file->private_data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry = NULL; + struct kgsl_device *device = dev_priv->device; + int ret; + + /* Handle leagacy behavior for memstore */ + + if (vma_offset == (unsigned long) KGSL_MEMSTORE_TOKEN_ADDRESS) + return kgsl_mmap_memstore(file, device, vma); + + /* + * The reference count on the entry that we get from + * get_mmap_entry() will be held until kgsl_gpumem_vm_close(). + */ + ret = get_mmap_entry(private, &entry, vma->vm_pgoff, + vma->vm_end - vma->vm_start); + if (ret) + return ret; + + vma->vm_flags |= entry->memdesc.ops->vmflags; + + vma->vm_private_data = entry; + + /* Determine user-side caching policy */ + + cache = kgsl_memdesc_get_cachemode(&entry->memdesc); + + switch (cache) { + case KGSL_CACHEMODE_WRITETHROUGH: + vma->vm_page_prot = pgprot_writethroughcache(vma->vm_page_prot); + if (pgprot_val(vma->vm_page_prot) == + pgprot_val(pgprot_writebackcache(vma->vm_page_prot))) + WARN_ONCE(1, "WRITETHROUGH is deprecated for arm64"); + break; + case KGSL_CACHEMODE_WRITEBACK: + vma->vm_page_prot = pgprot_writebackcache(vma->vm_page_prot); + break; + case KGSL_CACHEMODE_UNCACHED: + case KGSL_CACHEMODE_WRITECOMBINE: + default: + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + break; + } + + vma->vm_ops = &kgsl_gpumem_vm_ops; + + if (cache == KGSL_CACHEMODE_WRITEBACK + || cache == KGSL_CACHEMODE_WRITETHROUGH) { + int i; + unsigned long addr = vma->vm_start; + struct kgsl_memdesc *m = &entry->memdesc; + + for (i = 0; i < m->page_count; i++) { + struct page *page = m->pages[i]; + + vm_insert_page(vma, addr, page); + addr += PAGE_SIZE; + } + } + + if (entry->memdesc.shmem_filp) { + fput(vma->vm_file); + vma->vm_file = get_file(entry->memdesc.shmem_filp); + } + + atomic64_add(entry->memdesc.size, &entry->priv->gpumem_mapped); + + atomic_inc(&entry->map_count); + + /* + * kgsl gets the entry id or the gpu address through vm_pgoff. + * It is used during mmap and never needed again. But this vm_pgoff + * has different meaning at other parts of kernel. Not setting to + * zero will let way for wrong assumption when tried to unmap a page + * from this vma. + */ + vma->vm_pgoff = 0; + + trace_kgsl_mem_mmap(entry, vma->vm_start); + return 0; +} + +#define KGSL_READ_MESSAGE "OH HAI GPU\n" + +static ssize_t kgsl_read(struct file *filep, char __user *buf, size_t count, + loff_t *pos) +{ + return simple_read_from_buffer(buf, count, pos, + KGSL_READ_MESSAGE, strlen(KGSL_READ_MESSAGE) + 1); +} + +static const struct file_operations kgsl_fops = { + .owner = THIS_MODULE, + .release = kgsl_release, + .open = kgsl_open, + .mmap = kgsl_mmap, + .read = kgsl_read, + .get_unmapped_area = kgsl_get_unmapped_area, + .unlocked_ioctl = kgsl_ioctl, + .compat_ioctl = kgsl_compat_ioctl, +}; + +struct kgsl_driver kgsl_driver = { + .process_mutex = __MUTEX_INITIALIZER(kgsl_driver.process_mutex), + .proclist_lock = __RW_LOCK_UNLOCKED(kgsl_driver.proclist_lock), + .ptlock = __SPIN_LOCK_UNLOCKED(kgsl_driver.ptlock), + .devlock = __MUTEX_INITIALIZER(kgsl_driver.devlock), + /* + * Full cache flushes are faster than line by line on at least + * 8064 and 8974 once the region to be flushed is > 16mb. + */ + .full_cache_threshold = SZ_16M, + + .stats.vmalloc = ATOMIC_LONG_INIT(0), + .stats.vmalloc_max = ATOMIC_LONG_INIT(0), + .stats.page_alloc = ATOMIC_LONG_INIT(0), + .stats.page_alloc_max = ATOMIC_LONG_INIT(0), + .stats.coherent = ATOMIC_LONG_INIT(0), + .stats.coherent_max = ATOMIC_LONG_INIT(0), + .stats.secure = ATOMIC_LONG_INIT(0), + .stats.secure_max = ATOMIC_LONG_INIT(0), + .stats.mapped = ATOMIC_LONG_INIT(0), + .stats.mapped_max = ATOMIC_LONG_INIT(0), +}; + +static void _unregister_device(struct kgsl_device *device) +{ + int minor; + + mutex_lock(&kgsl_driver.devlock); + for (minor = 0; minor < ARRAY_SIZE(kgsl_driver.devp); minor++) { + if (device == kgsl_driver.devp[minor]) { + device_destroy(kgsl_driver.class, + MKDEV(MAJOR(kgsl_driver.major), minor)); + kgsl_driver.devp[minor] = NULL; + break; + } + } + mutex_unlock(&kgsl_driver.devlock); +} + +/* sysfs_ops for the /sys/kernel/gpu kobject */ +static ssize_t kgsl_gpu_sysfs_attr_show(struct kobject *kobj, + struct attribute *__attr, char *buf) +{ + struct kgsl_gpu_sysfs_attr *attr = container_of(__attr, + struct kgsl_gpu_sysfs_attr, attr); + struct kgsl_device *device = container_of(kobj, + struct kgsl_device, gpu_sysfs_kobj); + + if (attr->show) + return attr->show(device, buf); + + return -EIO; +} + +static ssize_t kgsl_gpu_sysfs_attr_store(struct kobject *kobj, + struct attribute *__attr, const char *buf, size_t count) +{ + struct kgsl_gpu_sysfs_attr *attr = container_of(__attr, + struct kgsl_gpu_sysfs_attr, attr); + struct kgsl_device *device = container_of(kobj, + struct kgsl_device, gpu_sysfs_kobj); + + if (attr->store) + return attr->store(device, buf, count); + + return -EIO; +} + +/* Dummy release function - we have nothing to do here */ +static void kgsl_gpu_sysfs_release(struct kobject *kobj) +{ +} + +static const struct sysfs_ops kgsl_gpu_sysfs_ops = { + .show = kgsl_gpu_sysfs_attr_show, + .store = kgsl_gpu_sysfs_attr_store, +}; + +static struct kobj_type kgsl_gpu_sysfs_ktype = { + .sysfs_ops = &kgsl_gpu_sysfs_ops, + .release = kgsl_gpu_sysfs_release, +}; + +static int _register_device(struct kgsl_device *device) +{ + static u64 dma_mask = DMA_BIT_MASK(64); + static struct device_dma_parameters dma_parms; + int minor, ret; + dev_t dev; + + /* Find a minor for the device */ + + mutex_lock(&kgsl_driver.devlock); + for (minor = 0; minor < ARRAY_SIZE(kgsl_driver.devp); minor++) { + if (kgsl_driver.devp[minor] == NULL) { + kgsl_driver.devp[minor] = device; + break; + } + } + mutex_unlock(&kgsl_driver.devlock); + + if (minor == ARRAY_SIZE(kgsl_driver.devp)) { + pr_err("kgsl: minor devices exhausted\n"); + return -ENODEV; + } + + /* Create the device */ + dev = MKDEV(MAJOR(kgsl_driver.major), minor); + device->dev = device_create(kgsl_driver.class, + &device->pdev->dev, + dev, device, + device->name); + + if (IS_ERR(device->dev)) { + mutex_lock(&kgsl_driver.devlock); + kgsl_driver.devp[minor] = NULL; + mutex_unlock(&kgsl_driver.devlock); + ret = PTR_ERR(device->dev); + pr_err("kgsl: device_create(%s): %d\n", device->name, ret); + return ret; + } + + device->dev->dma_mask = &dma_mask; + device->dev->dma_parms = &dma_parms; + + dma_set_max_seg_size(device->dev, DMA_BIT_MASK(32)); + + set_dma_ops(device->dev, NULL); + + kobject_init_and_add(&device->gpu_sysfs_kobj, &kgsl_gpu_sysfs_ktype, + kernel_kobj, "gpu"); + + return 0; +} + +int kgsl_request_irq(struct platform_device *pdev, const char *name, + irq_handler_t handler, void *data) +{ + int ret, num = platform_get_irq_byname(pdev, name); + + if (num < 0) + return num; + + ret = devm_request_irq(&pdev->dev, num, handler, IRQF_TRIGGER_HIGH, + name, data); + + if (ret) { + dev_err(&pdev->dev, "Unable to get interrupt %s: %d\n", + name, ret); + return ret; + } + + disable_irq(num); + return num; +} + +int kgsl_of_property_read_ddrtype(struct device_node *node, const char *base, + u32 *ptr) +{ + char str[32]; + int ddr = of_fdt_get_ddrtype(); + + /* of_fdt_get_ddrtype returns error if the DDR type isn't determined */ + if (ddr >= 0) { + int ret; + + /* Construct expanded string for the DDR type */ + ret = snprintf(str, sizeof(str), "%s-ddr%d", base, ddr); + + /* WARN_ON() if the array size was too small for the string */ + if (WARN_ON(ret > sizeof(str))) + return -ENOMEM; + + /* Read the expanded string */ + if (!of_property_read_u32(node, str, ptr)) + return 0; + } + + /* Read the default string */ + return of_property_read_u32(node, base, ptr); +} + +int kgsl_device_platform_probe(struct kgsl_device *device) +{ + struct platform_device *pdev = device->pdev; + int status = -EINVAL; + + status = _register_device(device); + if (status) + return status; + + /* Can return -EPROBE_DEFER */ + status = kgsl_pwrctrl_init(device); + if (status) + goto error; + + device->events_wq = alloc_workqueue("kgsl-events", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS | WQ_HIGHPRI, 0); + + if (!device->events_wq) { + dev_err(device->dev, "Failed to allocate events workqueue\n"); + status = -ENOMEM; + goto error_pwrctrl_close; + } + + /* This can return -EPROBE_DEFER */ + status = kgsl_mmu_probe(device); + if (status != 0) + goto error_pwrctrl_close; + + status = kgsl_reclaim_init(); + if (status) + goto error_pwrctrl_close; + + rwlock_init(&device->context_lock); + spin_lock_init(&device->submit_lock); + + idr_init(&device->timelines); + spin_lock_init(&device->timelines_lock); + + kgsl_device_debugfs_init(device); + + dma_set_coherent_mask(&pdev->dev, KGSL_DMA_BIT_MASK); + + /* Set up the GPU events for the device */ + kgsl_device_events_probe(device); + + /* Initialize common sysfs entries */ + kgsl_pwrctrl_init_sysfs(device); + + return 0; + +error_pwrctrl_close: + if (device->events_wq) { + destroy_workqueue(device->events_wq); + device->events_wq = NULL; + } + + kgsl_pwrctrl_close(device); +error: + _unregister_device(device); + return status; +} + +void kgsl_device_platform_remove(struct kgsl_device *device) +{ + if (device->events_wq) { + destroy_workqueue(device->events_wq); + device->events_wq = NULL; + } + + kgsl_device_snapshot_close(device); + + if (device->gpu_sysfs_kobj.state_initialized) + kobject_del(&device->gpu_sysfs_kobj); + + idr_destroy(&device->context_idr); + idr_destroy(&device->timelines); + + kgsl_device_events_remove(device); + + kgsl_mmu_close(device); + + /* + * This needs to come after the MMU close so we can be sure all the + * pagetables have been freed + */ + kgsl_free_globals(device); + + kgsl_pwrctrl_close(device); + + kgsl_device_debugfs_close(device); + _unregister_device(device); +} + +void kgsl_core_exit(void) +{ + kgsl_exit_page_pools(); + kgsl_eventlog_exit(); + + if (kgsl_driver.workqueue) { + destroy_workqueue(kgsl_driver.workqueue); + kgsl_driver.workqueue = NULL; + } + + if (kgsl_driver.mem_workqueue) { + destroy_workqueue(kgsl_driver.mem_workqueue); + kgsl_driver.mem_workqueue = NULL; + } + + kgsl_events_exit(); + kgsl_core_debugfs_close(); + + kgsl_reclaim_close(); + + /* + * We call device_unregister() + * only if kgsl_driver.virtdev has been populated. + * We check at least one member of kgsl_driver.virtdev to + * see if it is not NULL (and thus, has been populated). + */ + if (kgsl_driver.virtdev.class) + device_unregister(&kgsl_driver.virtdev); + + if (kgsl_driver.class) { + class_destroy(kgsl_driver.class); + kgsl_driver.class = NULL; + } + + kgsl_drawobjs_cache_exit(); + + kfree(memfree.list); + memset(&memfree, 0, sizeof(memfree)); + + unregister_chrdev_region(kgsl_driver.major, + ARRAY_SIZE(kgsl_driver.devp)); +} + +int __init kgsl_core_init(void) +{ + int result = 0; + + /* alloc major and minor device numbers */ + result = alloc_chrdev_region(&kgsl_driver.major, 0, + ARRAY_SIZE(kgsl_driver.devp), "kgsl"); + + if (result < 0) { + + pr_err("kgsl: alloc_chrdev_region failed err = %d\n", result); + goto err; + } + + cdev_init(&kgsl_driver.cdev, &kgsl_fops); + kgsl_driver.cdev.owner = THIS_MODULE; + kgsl_driver.cdev.ops = &kgsl_fops; + result = cdev_add(&kgsl_driver.cdev, MKDEV(MAJOR(kgsl_driver.major), 0), + ARRAY_SIZE(kgsl_driver.devp)); + + if (result) { + pr_err("kgsl: cdev_add() failed, dev_num= %d,result= %d\n", + kgsl_driver.major, result); + goto err; + } + + kgsl_driver.class = class_create(THIS_MODULE, "kgsl"); + + if (IS_ERR(kgsl_driver.class)) { + result = PTR_ERR(kgsl_driver.class); + pr_err("kgsl: failed to create class for kgsl\n"); + goto err; + } + + /* + * Make a virtual device for managing core related things + * in sysfs + */ + kgsl_driver.virtdev.class = kgsl_driver.class; + dev_set_name(&kgsl_driver.virtdev, "kgsl"); + result = device_register(&kgsl_driver.virtdev); + if (result) { + pr_err("kgsl: driver_register failed\n"); + goto err; + } + + /* Make kobjects in the virtual device for storing statistics */ + + kgsl_driver.ptkobj = + kobject_create_and_add("pagetables", + &kgsl_driver.virtdev.kobj); + + kgsl_driver.prockobj = + kobject_create_and_add("proc", + &kgsl_driver.virtdev.kobj); + + kgsl_core_debugfs_init(); + + kgsl_sharedmem_init_sysfs(); + + /* Initialize the memory pools */ + kgsl_probe_page_pools(); + + INIT_LIST_HEAD(&kgsl_driver.process_list); + + INIT_LIST_HEAD(&kgsl_driver.pagetable_list); + + kgsl_driver.workqueue = alloc_workqueue("kgsl-workqueue", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); + + if (!kgsl_driver.workqueue) { + pr_err("kgsl: Failed to allocate kgsl workqueue\n"); + result = -ENOMEM; + goto err; + } + + kgsl_driver.mem_workqueue = alloc_workqueue("kgsl-mementry", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0); + + if (!kgsl_driver.mem_workqueue) { + pr_err("kgsl: Failed to allocate mem workqueue\n"); + result = -ENOMEM; + goto err; + } + + kgsl_eventlog_init(); + + kgsl_events_init(); + + result = kgsl_drawobjs_cache_init(); + if (result) + goto err; + + memfree.list = kcalloc(MEMFREE_ENTRIES, sizeof(struct memfree_entry), + GFP_KERNEL); + + return 0; + +err: + kgsl_core_exit(); + return result; +} diff --git a/kgsl.h b/kgsl.h new file mode 100644 index 0000000000..98b25cb300 --- /dev/null +++ b/kgsl.h @@ -0,0 +1,606 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_H +#define __KGSL_H + +#include +#include +#include +#include +#include +#include +#include + +#include "kgsl_gmu_core.h" +#include "kgsl_pwrscale.h" + +#define KGSL_L3_DEVICE "kgsl-l3" + +/* + * --- kgsl drawobj flags --- + * These flags are same as --- drawobj flags --- + * but renamed to reflect that cmdbatch is renamed to drawobj. + */ +#define KGSL_DRAWOBJ_MEMLIST KGSL_CMDBATCH_MEMLIST +#define KGSL_DRAWOBJ_MARKER KGSL_CMDBATCH_MARKER +#define KGSL_DRAWOBJ_SUBMIT_IB_LIST KGSL_CMDBATCH_SUBMIT_IB_LIST +#define KGSL_DRAWOBJ_CTX_SWITCH KGSL_CMDBATCH_CTX_SWITCH +#define KGSL_DRAWOBJ_PROFILING KGSL_CMDBATCH_PROFILING +#define KGSL_DRAWOBJ_PROFILING_KTIME KGSL_CMDBATCH_PROFILING_KTIME +#define KGSL_DRAWOBJ_END_OF_FRAME KGSL_CMDBATCH_END_OF_FRAME +#define KGSL_DRAWOBJ_SYNC KGSL_CMDBATCH_SYNC +#define KGSL_DRAWOBJ_PWR_CONSTRAINT KGSL_CMDBATCH_PWR_CONSTRAINT + +#define kgsl_drawobj_profiling_buffer kgsl_cmdbatch_profiling_buffer + + +/* The number of memstore arrays limits the number of contexts allowed. + * If more contexts are needed, update multiple for MEMSTORE_SIZE + */ +#define KGSL_MEMSTORE_SIZE ((int)(PAGE_SIZE * 8)) +#define KGSL_MEMSTORE_GLOBAL (0) +#define KGSL_PRIORITY_MAX_RB_LEVELS 4 +#define KGSL_MEMSTORE_MAX (KGSL_MEMSTORE_SIZE / \ + sizeof(struct kgsl_devmemstore) - 1 - KGSL_PRIORITY_MAX_RB_LEVELS) +#define KGSL_MAX_CONTEXTS_PER_PROC 200 + +#define MEMSTORE_RB_OFFSET(rb, field) \ + KGSL_MEMSTORE_OFFSET(((rb)->id + KGSL_MEMSTORE_MAX), field) + +#define MEMSTORE_ID_GPU_ADDR(dev, iter, field) \ + ((dev)->memstore->gpuaddr + KGSL_MEMSTORE_OFFSET(iter, field)) + +#define MEMSTORE_RB_GPU_ADDR(dev, rb, field) \ + ((dev)->memstore->gpuaddr + \ + KGSL_MEMSTORE_OFFSET(((rb)->id + KGSL_MEMSTORE_MAX), field)) + +/* + * SCRATCH MEMORY: The scratch memory is one page worth of data that + * is mapped into the GPU. This allows for some 'shared' data between + * the GPU and CPU. For example, it will be used by the GPU to write + * each updated RPTR for each RB. + * + * Used Data: + * Offset: Length(bytes): What + * 0x0: 4 * KGSL_PRIORITY_MAX_RB_LEVELS: RB0 RPTR + */ + +/* Shadow global helpers */ +#define SCRATCH_RPTR_OFFSET(id) ((id) * sizeof(unsigned int)) +#define SCRATCH_RPTR_GPU_ADDR(dev, id) \ + ((dev)->scratch->gpuaddr + SCRATCH_RPTR_OFFSET(id)) +#define SCRATCH_BV_RPTR_OFFSET(id) (0x40 + (id) * sizeof(unsigned int)) +#define SCRATCH_BV_RPTR_GPU_ADDR(dev, id) \ + ((dev)->scratch->gpuaddr + SCRATCH_BV_RPTR_OFFSET(id)) + +/* Timestamp window used to detect rollovers (half of integer range) */ +#define KGSL_TIMESTAMP_WINDOW 0x80000000 + +/* + * A macro for memory statistics - add the new size to the stat and if + * the statisic is greater then _max, set _max + */ +static inline void KGSL_STATS_ADD(uint64_t size, atomic_long_t *stat, + atomic_long_t *max) +{ + uint64_t ret = atomic_long_add_return(size, stat); + + if (ret > atomic_long_read(max)) + atomic_long_set(max, ret); +} + +#define KGSL_MAX_NUMIBS 100000 +#define KGSL_MAX_SYNCPOINTS 32 + +struct kgsl_device; +struct kgsl_context; + +/** + * struct kgsl_driver - main container for global KGSL things + * @cdev: Character device struct + * @major: Major ID for the KGSL device + * @class: Pointer to the class struct for the core KGSL sysfs entries + * @virtdev: Virtual device for managing the core + * @ptkobj: kobject for storing the pagetable statistics + * @prockobj: kobject for storing the process statistics + * @devp: Array of pointers to the individual KGSL device structs + * @process_list: List of open processes + * @pagetable_list: LIst of open pagetables + * @ptlock: Lock for accessing the pagetable list + * @process_mutex: Mutex for accessing the process list + * @proclist_lock: Lock for accessing the process list + * @devlock: Mutex protecting the device list + * @stats: Struct containing atomic memory statistics + * @full_cache_threshold: the threshold that triggers a full cache flush + * @workqueue: Pointer to a single threaded workqueue + * @mem_workqueue: Pointer to a workqueue for deferring memory entries + */ +struct kgsl_driver { + struct cdev cdev; + dev_t major; + struct class *class; + struct device virtdev; + struct kobject *ptkobj; + struct kobject *prockobj; + struct kgsl_device *devp[1]; + struct list_head process_list; + struct list_head pagetable_list; + spinlock_t ptlock; + struct mutex process_mutex; + rwlock_t proclist_lock; + struct mutex devlock; + struct { + atomic_long_t vmalloc; + atomic_long_t vmalloc_max; + atomic_long_t page_alloc; + atomic_long_t page_alloc_max; + atomic_long_t coherent; + atomic_long_t coherent_max; + atomic_long_t secure; + atomic_long_t secure_max; + atomic_long_t mapped; + atomic_long_t mapped_max; + } stats; + unsigned int full_cache_threshold; + struct workqueue_struct *workqueue; + struct workqueue_struct *mem_workqueue; +}; + +extern struct kgsl_driver kgsl_driver; + +struct kgsl_pagetable; +struct kgsl_memdesc; + +struct kgsl_memdesc_ops { + unsigned int vmflags; + vm_fault_t (*vmfault)(struct kgsl_memdesc *memdesc, + struct vm_area_struct *vma, struct vm_fault *vmf); + void (*free)(struct kgsl_memdesc *memdesc); + int (*map_kernel)(struct kgsl_memdesc *memdesc); + void (*unmap_kernel)(struct kgsl_memdesc *memdesc); + /** + * @put_gpuaddr: Put away the GPU address and unmap the memory + * descriptor + */ + void (*put_gpuaddr)(struct kgsl_memdesc *memdesc); +}; + +/* Internal definitions for memdesc->priv */ +#define KGSL_MEMDESC_GUARD_PAGE BIT(0) +/* Set if the memdesc is mapped into all pagetables */ +#define KGSL_MEMDESC_GLOBAL BIT(1) +/* The memdesc is frozen during a snapshot */ +#define KGSL_MEMDESC_FROZEN BIT(2) +/* The memdesc is mapped into a pagetable */ +#define KGSL_MEMDESC_MAPPED BIT(3) +/* The memdesc is secured for content protection */ +#define KGSL_MEMDESC_SECURE BIT(4) +/* Memory is accessible in privileged mode */ +#define KGSL_MEMDESC_PRIVILEGED BIT(6) +/* This is an instruction buffer */ +#define KGSL_MEMDESC_UCODE BIT(7) +/* For global buffers, randomly assign an address from the region */ +#define KGSL_MEMDESC_RANDOM BIT(8) +/* Allocate memory from the system instead of the pools */ +#define KGSL_MEMDESC_SYSMEM BIT(9) +/* The memdesc pages can be reclaimed */ +#define KGSL_MEMDESC_CAN_RECLAIM BIT(10) +/* The memdesc pages were reclaimed */ +#define KGSL_MEMDESC_RECLAIMED BIT(11) +/* Skip reclaim of the memdesc pages */ +#define KGSL_MEMDESC_SKIP_RECLAIM BIT(12) + +/** + * struct kgsl_memdesc - GPU memory object descriptor + * @pagetable: Pointer to the pagetable that the object is mapped in + * @hostptr: Kernel virtual address + * @hostptr_count: Number of threads using hostptr + * @gpuaddr: GPU virtual address + * @physaddr: Physical address of the memory object + * @size: Size of the memory object + * @priv: Internal flags and settings + * @sgt: Scatter gather table for allocated pages + * @ops: Function hooks for the memdesc memory type + * @flags: Flags set from userspace + * @dev: Pointer to the struct device that owns this memory + * @attrs: dma attributes for this memory + * @pages: An array of pointers to allocated pages + * @page_count: Total number of pages allocated + */ +struct kgsl_memdesc { + struct kgsl_pagetable *pagetable; + void *hostptr; + unsigned int hostptr_count; + uint64_t gpuaddr; + phys_addr_t physaddr; + uint64_t size; + unsigned int priv; + struct sg_table *sgt; + const struct kgsl_memdesc_ops *ops; + uint64_t flags; + struct device *dev; + unsigned long attrs; + struct page **pages; + unsigned int page_count; + /* + * @lock: Spinlock to protect the gpuaddr from being accessed by + * multiple entities trying to map the same SVM region at once + */ + spinlock_t lock; + /** @shmem_filp: Pointer to the shmem file backing this memdesc */ + struct file *shmem_filp; + /** @ranges: rbtree base for the interval list of vbo ranges */ + struct rb_root_cached ranges; + /** @ranges_lock: Mutex to protect the range database */ + struct mutex ranges_lock; + /** @gmuaddr: GMU VA if this is mapped in GMU */ + u32 gmuaddr; +}; + +/** + * struct kgsl_global_memdesc - wrapper for global memory objects + */ +struct kgsl_global_memdesc { + /** @memdesc: Container for the GPU memory descriptor for the object */ + struct kgsl_memdesc memdesc; + /** @name: Name of the object for the debugfs list */ + const char *name; + /** @node: List node for the list of global objects */ + struct list_head node; +}; + +/* + * List of different memory entry types. The usermem enum + * starts at 0, which we use for allocated memory, so 1 is + * added to the enum values. + */ +#define KGSL_MEM_ENTRY_KERNEL 0 +#define KGSL_MEM_ENTRY_USER (KGSL_USER_MEM_TYPE_ADDR + 1) +#define KGSL_MEM_ENTRY_ION (KGSL_USER_MEM_TYPE_ION + 1) +#define KGSL_MEM_ENTRY_MAX (KGSL_USER_MEM_TYPE_MAX + 1) + +/* symbolic table for trace and debugfs */ +/* + * struct kgsl_mem_entry - a userspace memory allocation + * @refcount: reference count. Currently userspace can only + * hold a single reference count, but the kernel may hold more. + * @memdesc: description of the memory + * @priv_data: type-specific data, such as the dma-buf attachment pointer. + * @node: rb_node for the gpu address lookup rb tree + * @id: idr index for this entry, can be used to find memory that does not have + * a valid GPU address. + * @priv: back pointer to the process that owns this memory + * @pending_free: if !0, userspace requested that his memory be freed, but there + * are still references to it. + * @dev_priv: back pointer to the device file that created this entry. + * @metadata: String containing user specified metadata for the entry + * @work: Work struct used to schedule a kgsl_mem_entry_put in atomic contexts + */ +struct kgsl_mem_entry { + struct kref refcount; + struct kgsl_memdesc memdesc; + void *priv_data; + struct rb_node node; + unsigned int id; + struct kgsl_process_private *priv; + int pending_free; + char metadata[KGSL_GPUOBJ_ALLOC_METADATA_MAX + 1]; + struct work_struct work; + /** + * @map_count: Count how many vmas this object is mapped in - used for + * debugfs accounting + */ + atomic_t map_count; +}; + +struct kgsl_device_private; +struct kgsl_event_group; + +typedef void (*kgsl_event_func)(struct kgsl_device *, struct kgsl_event_group *, + void *, int); + +/** + * struct kgsl_event - KGSL GPU timestamp event + * @device: Pointer to the KGSL device that owns the event + * @context: Pointer to the context that owns the event + * @timestamp: Timestamp for the event to expire + * @func: Callback function for for the event when it expires + * @priv: Private data passed to the callback function + * @node: List node for the kgsl_event_group list + * @created: Jiffies when the event was created + * @work: Work struct for dispatching the callback + * @result: KGSL event result type to pass to the callback + * group: The event group this event belongs to + */ +struct kgsl_event { + struct kgsl_device *device; + struct kgsl_context *context; + unsigned int timestamp; + kgsl_event_func func; + void *priv; + struct list_head node; + unsigned int created; + struct work_struct work; + int result; + struct kgsl_event_group *group; +}; + +typedef int (*readtimestamp_func)(struct kgsl_device *, void *, + enum kgsl_timestamp_type, unsigned int *); + +/** + * struct event_group - A list of GPU events + * @context: Pointer to the active context for the events + * @lock: Spinlock for protecting the list + * @events: List of active GPU events + * @group: Node for the master group list + * @processed: Last processed timestamp + * @name: String name for the group (for the debugfs file) + * @readtimestamp: Function pointer to read a timestamp + * @priv: Priv member to pass to the readtimestamp function + */ +struct kgsl_event_group { + struct kgsl_context *context; + spinlock_t lock; + struct list_head events; + struct list_head group; + unsigned int processed; + char name[64]; + readtimestamp_func readtimestamp; + void *priv; +}; + +/** + * struct submission_info - Container for submission statistics + * @inflight: Number of commands that are inflight + * @rb_id: id of the ringbuffer to which this submission is made + * @rptr: Read pointer of the ringbuffer + * @wptr: Write pointer of the ringbuffer + * @gmu_dispatch_queue: GMU dispach queue to which this submission is made + */ +struct submission_info { + int inflight; + u32 rb_id; + u32 rptr; + u32 wptr; + u32 gmu_dispatch_queue; +}; + +/** + * struct retire_info - Container for retire statistics + * @inflight: NUmber of commands that are inflight + * @rb_id: id of the ringbuffer to which this submission is made + * @rptr: Read pointer of the ringbuffer + * @wptr: Write pointer of the ringbuffer + * @gmu_dispatch_queue: GMU dispach queue to which this submission is made + * @timestamp: Timestamp of submission that retired + * @submitted_to_rb: AO ticks when GMU put this submission on ringbuffer + * @sop: AO ticks when GPU started procssing this submission + * @eop: AO ticks when GPU finished this submission + * @retired_on_gmu: AO ticks when GMU retired this submission + */ +struct retire_info { + int inflight; + int rb_id; + u32 rptr; + u32 wptr; + u32 gmu_dispatch_queue; + u32 timestamp; + u64 submitted_to_rb; + u64 sop; + u64 eop; + u64 retired_on_gmu; +}; + +long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_device_waittimestamp_ctxtid(struct kgsl_device_private + *dev_priv, unsigned int cmd, void *data); +long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_submit_commands(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data); +long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid( + struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data); +long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_drawctxt_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_free_id(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_sync_cache_bulk(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_get_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_alloc(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_free(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_import(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_sync(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpu_command(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_set_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_bind_ranges(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpu_aux_command(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timeline_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timeline_wait(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timeline_query(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timeline_fence_get(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timeline_signal(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timeline_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); + +void kgsl_mem_entry_destroy(struct kref *kref); + +void kgsl_get_egl_counts(struct kgsl_mem_entry *entry, + int *egl_surface_count, int *egl_image_count); + +unsigned long kgsl_get_dmabuf_inode_number(struct kgsl_mem_entry *entry); + +struct kgsl_mem_entry * __must_check +kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr); + +struct kgsl_mem_entry * __must_check +kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id); + +struct kgsl_mem_entry *gpumem_alloc_entry(struct kgsl_device_private *dev_priv, + uint64_t size, uint64_t flags); +long gpumem_free_entry(struct kgsl_mem_entry *entry); + +enum kgsl_mmutype kgsl_mmu_get_mmutype(struct kgsl_device *device); +void kgsl_mmu_add_global(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, const char *name); +void kgsl_mmu_remove_global(struct kgsl_device *device, + struct kgsl_memdesc *memdesc); + +/* Helper functions */ +int kgsl_request_irq(struct platform_device *pdev, const char *name, + irq_handler_t handler, void *data); + +int __init kgsl_core_init(void); +void kgsl_core_exit(void); + +static inline bool kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc, + uint64_t gpuaddr, uint64_t size) +{ + if (!memdesc) + return false; + + /* set a minimum size to search for */ + if (!size) + size = 1; + + /* don't overflow */ + if (size > U64_MAX - gpuaddr) + return false; + + return (gpuaddr >= memdesc->gpuaddr && + ((gpuaddr + size) <= (memdesc->gpuaddr + memdesc->size))); +} + +static inline void *kgsl_memdesc_map(struct kgsl_memdesc *memdesc) +{ + if (memdesc->ops && memdesc->ops->map_kernel) + memdesc->ops->map_kernel(memdesc); + + return memdesc->hostptr; +} + +static inline void kgsl_memdesc_unmap(struct kgsl_memdesc *memdesc) +{ + if (memdesc->ops && memdesc->ops->unmap_kernel) + memdesc->ops->unmap_kernel(memdesc); +} + +static inline void *kgsl_gpuaddr_to_vaddr(struct kgsl_memdesc *memdesc, + uint64_t gpuaddr) +{ + void *hostptr = NULL; + + if ((gpuaddr >= memdesc->gpuaddr) && + (gpuaddr < (memdesc->gpuaddr + memdesc->size))) + hostptr = kgsl_memdesc_map(memdesc); + + return hostptr != NULL ? hostptr + (gpuaddr - memdesc->gpuaddr) : NULL; +} + +static inline int timestamp_cmp(unsigned int a, unsigned int b) +{ + /* check for equal */ + if (a == b) + return 0; + + /* check for greater-than for non-rollover case */ + if ((a > b) && (a - b < KGSL_TIMESTAMP_WINDOW)) + return 1; + + /* check for greater-than for rollover case + * note that <= is required to ensure that consistent + * results are returned for values whose difference is + * equal to the window size + */ + a += KGSL_TIMESTAMP_WINDOW; + b += KGSL_TIMESTAMP_WINDOW; + return ((a > b) && (a - b <= KGSL_TIMESTAMP_WINDOW)) ? 1 : -1; +} + +/** + * kgsl_schedule_work() - Schedule a work item on the KGSL workqueue + * @work: work item to schedule + */ +static inline void kgsl_schedule_work(struct work_struct *work) +{ + queue_work(kgsl_driver.workqueue, work); +} + +static inline struct kgsl_mem_entry * +kgsl_mem_entry_get(struct kgsl_mem_entry *entry) +{ + if (!IS_ERR_OR_NULL(entry) && kref_get_unless_zero(&entry->refcount)) + return entry; + + return NULL; +} + +static inline void +kgsl_mem_entry_put(struct kgsl_mem_entry *entry) +{ + if (!IS_ERR_OR_NULL(entry)) + kref_put(&entry->refcount, kgsl_mem_entry_destroy); +} + +/* + * kgsl_addr_range_overlap() - Checks if 2 ranges overlap + * @gpuaddr1: Start of first address range + * @size1: Size of first address range + * @gpuaddr2: Start of second address range + * @size2: Size of second address range + * + * Function returns true if the 2 given address ranges overlap + * else false + */ +static inline bool kgsl_addr_range_overlap(uint64_t gpuaddr1, + uint64_t size1, uint64_t gpuaddr2, uint64_t size2) +{ + if ((size1 > (U64_MAX - gpuaddr1)) || (size2 > (U64_MAX - gpuaddr2))) + return false; + return !(((gpuaddr1 + size1) <= gpuaddr2) || + (gpuaddr1 >= (gpuaddr2 + size2))); +} +#endif /* __KGSL_H */ diff --git a/kgsl_bus.c b/kgsl_bus.c new file mode 100644 index 0000000000..2279ce5d2e --- /dev/null +++ b/kgsl_bus.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include + +#include "kgsl_bus.h" +#include "kgsl_device.h" +#include "kgsl_trace.h" + + +static u32 _ab_buslevel_update(struct kgsl_pwrctrl *pwr, + u32 ib) +{ + if (!ib) + return 0; + + /* + * In the absence of any other settings, make ab 25% of ib + * where the ib vote is in kbps + */ + if ((!pwr->bus_percent_ab) && (!pwr->bus_ab_mbytes)) + return 25 * ib / 100000; + + if (pwr->bus_width) + return pwr->bus_ab_mbytes; + + return (pwr->bus_percent_ab * pwr->bus_max) / 100; +} + + +int kgsl_bus_update(struct kgsl_device *device, + enum kgsl_bus_vote vote_state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + /* FIXME: this might be wrong? */ + int cur = pwr->pwrlevels[pwr->active_pwrlevel].bus_freq; + int buslevel = 0; + u32 ab; + + /* the bus should be ON to update the active frequency */ + if ((vote_state != KGSL_BUS_VOTE_OFF) && + !(test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags))) + return 0; + /* + * If the bus should remain on calculate our request and submit it, + * otherwise request bus level 0, off. + */ + if (vote_state == KGSL_BUS_VOTE_ON) { + buslevel = min_t(int, pwr->pwrlevels[0].bus_max, + cur + pwr->bus_mod); + buslevel = max_t(int, buslevel, 1); + } else if (vote_state == KGSL_BUS_VOTE_MINIMUM) { + /* Request bus level 1, minimum non-zero value */ + buslevel = 1; + pwr->bus_mod = 0; + pwr->bus_percent_ab = 0; + pwr->bus_ab_mbytes = 0; + } else if (vote_state == KGSL_BUS_VOTE_OFF) { + /* If the bus is being turned off, reset to default level */ + pwr->bus_mod = 0; + pwr->bus_percent_ab = 0; + pwr->bus_ab_mbytes = 0; + } + + /* buslevel is the IB vote, update the AB */ + ab = _ab_buslevel_update(pwr, pwr->ddr_table[buslevel]); + + return device->ftbl->gpu_bus_set(device, buslevel, ab); +} + +static void validate_pwrlevels(struct kgsl_device *device, u32 *ibs, + int count) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int i; + + for (i = 0; i < pwr->num_pwrlevels - 1; i++) { + struct kgsl_pwrlevel *pwrlevel = &pwr->pwrlevels[i]; + + if (pwrlevel->bus_freq >= count) { + dev_err(device->dev, "Bus setting for GPU freq %d is out of bounds\n", + pwrlevel->gpu_freq); + pwrlevel->bus_freq = count - 1; + } + + if (pwrlevel->bus_max >= count) { + dev_err(device->dev, "Bus max for GPU freq %d is out of bounds\n", + pwrlevel->gpu_freq); + pwrlevel->bus_max = count - 1; + } + + if (pwrlevel->bus_min >= count) { + dev_err(device->dev, "Bus min for GPU freq %d is out of bounds\n", + pwrlevel->gpu_freq); + pwrlevel->bus_min = count - 1; + } + + if (pwrlevel->bus_min > pwrlevel->bus_max) { + dev_err(device->dev, "Bus min is bigger than bus max for GPU freq %d\n", + pwrlevel->gpu_freq); + pwrlevel->bus_min = pwrlevel->bus_max; + } + } +} + +u32 *kgsl_bus_get_table(struct platform_device *pdev, + const char *name, int *count) +{ + u32 *levels; + int i, num = of_property_count_elems_of_size(pdev->dev.of_node, + name, sizeof(u32)); + + /* If the bus wasn't specified, then build a static table */ + if (num <= 0) + return ERR_PTR(-EINVAL); + + levels = kcalloc(num, sizeof(*levels), GFP_KERNEL); + if (!levels) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < num; i++) + of_property_read_u32_index(pdev->dev.of_node, + name, i, &levels[i]); + + *count = num; + return levels; +} + +int kgsl_bus_init(struct kgsl_device *device, struct platform_device *pdev) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int count; + int ddr = of_fdt_get_ddrtype(); + + if (ddr >= 0) { + char str[32]; + + snprintf(str, sizeof(str), "qcom,bus-table-ddr%d", ddr); + + pwr->ddr_table = kgsl_bus_get_table(pdev, str, &count); + if (!IS_ERR(pwr->ddr_table)) + goto done; + } + + /* Look if a generic table is present */ + pwr->ddr_table = kgsl_bus_get_table(pdev, "qcom,bus-table-ddr", &count); + if (IS_ERR(pwr->ddr_table)) { + int ret = PTR_ERR(pwr->ddr_table); + + pwr->ddr_table = NULL; + return ret; + } +done: + pwr->ddr_table_count = count; + + validate_pwrlevels(device, pwr->ddr_table, pwr->ddr_table_count); + + pwr->icc_path = of_icc_get(&pdev->dev, "gpu_icc_path"); + if (IS_ERR(pwr->icc_path) && !gmu_core_scales_bandwidth(device)) { + WARN(1, "The CPU has no way to set the GPU bus levels\n"); + + kfree(pwr->ddr_table); + pwr->ddr_table = NULL; + return PTR_ERR(pwr->icc_path); + } + + return 0; +} + +void kgsl_bus_close(struct kgsl_device *device) +{ + kfree(device->pwrctrl.ddr_table); + device->pwrctrl.ddr_table = NULL; + icc_put(device->pwrctrl.icc_path); +} diff --git a/kgsl_bus.h b/kgsl_bus.h new file mode 100644 index 0000000000..1814233658 --- /dev/null +++ b/kgsl_bus.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2019-2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _KGSL_BUS_H +#define _KGSL_BUS_H + +enum kgsl_bus_vote { + KGSL_BUS_VOTE_OFF = 0, + KGSL_BUS_VOTE_ON, + KGSL_BUS_VOTE_MINIMUM, +}; + +struct kgsl_device; +struct platform_device; + +int kgsl_bus_init(struct kgsl_device *device, struct platform_device *pdev); +void kgsl_bus_close(struct kgsl_device *device); +int kgsl_bus_update(struct kgsl_device *device, enum kgsl_bus_vote vote_state); + +u32 *kgsl_bus_get_table(struct platform_device *pdev, + const char *name, int *count); + +#endif diff --git a/kgsl_compat.c b/kgsl_compat.c new file mode 100644 index 0000000000..a5e8deaf78 --- /dev/null +++ b/kgsl_compat.c @@ -0,0 +1,392 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2013-2021, The Linux Foundation. All rights reserved. + */ + +#include "kgsl_device.h" +#include "kgsl_compat.h" +#include "kgsl_sync.h" + +static long +kgsl_ioctl_device_getproperty_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device_getproperty_compat *param32 = data; + struct kgsl_device_getproperty param; + + param.type = param32->type; + param.value = compat_ptr(param32->value); + param.sizebytes = (size_t)param32->sizebytes; + + return kgsl_ioctl_device_getproperty(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_device_setproperty_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device_getproperty_compat *param32 = data; + struct kgsl_device_getproperty param; + + param.type = param32->type; + param.value = compat_ptr(param32->value); + param.sizebytes = (size_t)param32->sizebytes; + + return kgsl_ioctl_device_setproperty(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_submit_commands_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result; + struct kgsl_submit_commands_compat *param32 = data; + struct kgsl_submit_commands param; + + param.context_id = param32->context_id; + param.flags = param32->flags; + param.cmdlist = compat_ptr(param32->cmdlist); + param.numcmds = param32->numcmds; + param.synclist = compat_ptr(param32->synclist); + param.numsyncs = param32->numsyncs; + param.timestamp = param32->timestamp; + + result = kgsl_ioctl_submit_commands(dev_priv, cmd, ¶m); + + param32->timestamp = param.timestamp; + + return result; +} + +static long +kgsl_ioctl_rb_issueibcmds_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result; + struct kgsl_ringbuffer_issueibcmds_compat *param32 = data; + struct kgsl_ringbuffer_issueibcmds param; + + param.drawctxt_id = param32->drawctxt_id; + param.flags = param32->flags; + param.ibdesc_addr = (unsigned long)param32->ibdesc_addr; + param.numibs = param32->numibs; + param.timestamp = param32->timestamp; + + result = kgsl_ioctl_rb_issueibcmds(dev_priv, cmd, ¶m); + + param32->timestamp = param.timestamp; + + return result; +} + +static long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid_compat( + struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_cmdstream_freememontimestamp_ctxtid_compat *param32 = data; + struct kgsl_cmdstream_freememontimestamp_ctxtid param; + + param.context_id = param32->context_id; + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.type = param32->type; + param.timestamp = param32->timestamp; + + return kgsl_ioctl_cmdstream_freememontimestamp_ctxtid(dev_priv, cmd, + ¶m); +} + +static long kgsl_ioctl_sharedmem_free_compat(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_sharedmem_free_compat *param32 = data; + struct kgsl_sharedmem_free param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + + return kgsl_ioctl_sharedmem_free(dev_priv, cmd, ¶m); +} + +static long kgsl_ioctl_map_user_mem_compat(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + int result = 0; + struct kgsl_map_user_mem_compat *param32 = data; + struct kgsl_map_user_mem param; + + param.fd = param32->fd; + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.len = (size_t)param32->len; + param.offset = (size_t)param32->offset; + param.hostptr = (unsigned long)param32->hostptr; + param.memtype = param32->memtype; + param.flags = param32->flags; + + result = kgsl_ioctl_map_user_mem(dev_priv, cmd, ¶m); + + param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr); + param32->flags = param.flags; + return result; +} + +static long +kgsl_ioctl_gpumem_sync_cache_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpumem_sync_cache_compat *param32 = data; + struct kgsl_gpumem_sync_cache param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.id = param32->id; + param.op = param32->op; + param.offset = (size_t)param32->offset; + param.length = (size_t)param32->length; + + return kgsl_ioctl_gpumem_sync_cache(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_gpumem_sync_cache_bulk_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpumem_sync_cache_bulk_compat *param32 = data; + struct kgsl_gpumem_sync_cache_bulk param; + + param.id_list = compat_ptr(param32->id_list); + param.count = param32->count; + param.op = param32->op; + + return kgsl_ioctl_gpumem_sync_cache_bulk(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_sharedmem_flush_cache_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_sharedmem_free_compat *param32 = data; + struct kgsl_sharedmem_free param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + + return kgsl_ioctl_sharedmem_flush_cache(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_gpumem_alloc_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + struct kgsl_gpumem_alloc_compat *param32 = data; + struct kgsl_gpumem_alloc param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.size = (size_t)param32->size; + param.flags = param32->flags; + + /* + * Since this is a 32 bit application the page aligned size is expected + * to fit inside of 32 bits - check for overflow and return error if so + */ + if (PAGE_ALIGN(param.size) >= UINT_MAX) + return -EINVAL; + + result = kgsl_ioctl_gpumem_alloc(dev_priv, cmd, ¶m); + + param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr); + param32->size = sizet_to_compat(param.size); + param32->flags = param.flags; + + return result; +} + +static long +kgsl_ioctl_gpumem_alloc_id_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + struct kgsl_gpumem_alloc_id_compat *param32 = data; + struct kgsl_gpumem_alloc_id param; + + param.id = param32->id; + param.flags = param32->flags; + param.size = (size_t)param32->size; + param.mmapsize = (size_t)param32->mmapsize; + param.gpuaddr = (unsigned long)param32->gpuaddr; + + /* + * Since this is a 32 bit application the page aligned size is expected + * to fit inside of 32 bits - check for overflow and return error if so + */ + if (PAGE_ALIGN(param.size) >= UINT_MAX) + return -EINVAL; + + result = kgsl_ioctl_gpumem_alloc_id(dev_priv, cmd, ¶m); + + param32->id = param.id; + param32->flags = param.flags; + param32->size = sizet_to_compat(param.size); + param32->mmapsize = sizet_to_compat(param.mmapsize); + param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr); + + return result; +} + +static long +kgsl_ioctl_gpumem_get_info_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + struct kgsl_gpumem_get_info_compat *param32 = data; + struct kgsl_gpumem_get_info param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.id = param32->id; + param.flags = param32->flags; + param.size = (size_t)param32->size; + param.mmapsize = (size_t)param32->mmapsize; + param.useraddr = (unsigned long)param32->useraddr; + + result = kgsl_ioctl_gpumem_get_info(dev_priv, cmd, ¶m); + + param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr); + param32->id = param.id; + param32->flags = param.flags; + param32->size = sizet_to_compat(param.size); + param32->mmapsize = sizet_to_compat(param.mmapsize); + param32->useraddr = (compat_ulong_t)param.useraddr; + + return result; +} + +static long kgsl_ioctl_timestamp_event_compat(struct kgsl_device_private + *dev_priv, unsigned int cmd, void *data) +{ + struct kgsl_timestamp_event_compat *param32 = data; + struct kgsl_timestamp_event param; + + param.type = param32->type; + param.timestamp = param32->timestamp; + param.context_id = param32->context_id; + param.priv = compat_ptr(param32->priv); + param.len = (size_t)param32->len; + + return kgsl_ioctl_timestamp_event(dev_priv, cmd, ¶m); +} + + +static const struct kgsl_ioctl kgsl_compat_ioctl_funcs[] = { + KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY_COMPAT, + kgsl_ioctl_device_getproperty_compat), + /* IOCTL_KGSL_DEVICE_WAITTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, + kgsl_ioctl_device_waittimestamp_ctxtid), + KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS_COMPAT, + kgsl_ioctl_rb_issueibcmds_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SUBMIT_COMMANDS_COMPAT, + kgsl_ioctl_submit_commands_compat), + /* IOCTL_KGSL_CMDSTREAM_READTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID, + kgsl_ioctl_cmdstream_readtimestamp_ctxtid), + /* IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID_COMPAT, + kgsl_ioctl_cmdstream_freememontimestamp_ctxtid_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE, + kgsl_ioctl_drawctxt_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY, + kgsl_ioctl_drawctxt_destroy), + KGSL_IOCTL_FUNC(IOCTL_KGSL_MAP_USER_MEM_COMPAT, + kgsl_ioctl_map_user_mem_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FREE_COMPAT, + kgsl_ioctl_sharedmem_free_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE_COMPAT, + kgsl_ioctl_sharedmem_flush_cache_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_COMPAT, + kgsl_ioctl_gpumem_alloc_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT_COMPAT, + kgsl_ioctl_timestamp_event_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY_COMPAT, + kgsl_ioctl_device_setproperty_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_ID_COMPAT, + kgsl_ioctl_gpumem_alloc_id_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_FREE_ID, + kgsl_ioctl_gpumem_free_id), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_GET_INFO_COMPAT, + kgsl_ioctl_gpumem_get_info_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_COMPAT, + kgsl_ioctl_gpumem_sync_cache_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK_COMPAT, + kgsl_ioctl_gpumem_sync_cache_bulk_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE, + kgsl_ioctl_syncsource_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_DESTROY, + kgsl_ioctl_syncsource_destroy), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE, + kgsl_ioctl_syncsource_create_fence), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE, + kgsl_ioctl_syncsource_signal_fence), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_ALLOC, + kgsl_ioctl_gpuobj_alloc), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_FREE, + kgsl_ioctl_gpuobj_free), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_INFO, + kgsl_ioctl_gpuobj_info), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_IMPORT, + kgsl_ioctl_gpuobj_import), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SYNC, + kgsl_ioctl_gpuobj_sync), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_COMMAND, + kgsl_ioctl_gpu_command), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SET_INFO, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_PHYS_ALLOC, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_PHYS_FREE, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_VIRT_ALLOC, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_VIRT_FREE, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_BIND, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_SPARSE_COMMAND, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_BIND_RANGES, + kgsl_ioctl_gpumem_bind_ranges), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_AUX_COMMAND, + kgsl_ioctl_gpu_aux_command), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_CREATE, + kgsl_ioctl_timeline_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_WAIT, + kgsl_ioctl_timeline_wait), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_FENCE_GET, + kgsl_ioctl_timeline_fence_get), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_QUERY, + kgsl_ioctl_timeline_query), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_SIGNAL, + kgsl_ioctl_timeline_signal), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_DESTROY, + kgsl_ioctl_timeline_destroy), +}; + +long kgsl_compat_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct kgsl_device_private *dev_priv = filep->private_data; + struct kgsl_device *device = dev_priv->device; + + long ret = kgsl_ioctl_helper(filep, cmd, arg, kgsl_compat_ioctl_funcs, + ARRAY_SIZE(kgsl_compat_ioctl_funcs)); + + /* + * If the command was unrecognized in the generic core, try the device + * specific function + */ + + if (ret == -ENOIOCTLCMD) { + if (device->ftbl->compat_ioctl != NULL) + return device->ftbl->compat_ioctl(dev_priv, cmd, arg); + } + + return ret; +} diff --git a/kgsl_compat.h b/kgsl_compat.h new file mode 100644 index 0000000000..a8081dd08f --- /dev/null +++ b/kgsl_compat.h @@ -0,0 +1,243 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013-2017,2019,2021 The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_COMPAT_H +#define __KGSL_COMPAT_H + +#include +#include + +#ifdef CONFIG_COMPAT + +struct kgsl_ibdesc_compat { + compat_ulong_t gpuaddr; + unsigned int __pad; + compat_size_t sizedwords; + unsigned int ctrl; +}; + +struct kgsl_cmd_syncpoint_compat { + int type; + compat_uptr_t priv; + compat_size_t size; +}; + +struct kgsl_devinfo_compat { + unsigned int device_id; + unsigned int chip_id; + unsigned int mmu_enabled; + compat_ulong_t gmem_gpubaseaddr; + unsigned int gpu_id; + compat_size_t gmem_sizebytes; +}; + +struct kgsl_shadowprop_compat { + compat_ulong_t gpuaddr; + compat_size_t size; + unsigned int flags; +}; + +struct kgsl_device_constraint_compat { + unsigned int type; + unsigned int context_id; + compat_uptr_t data; + compat_size_t size; +}; + +struct kgsl_device_getproperty_compat { + unsigned int type; + compat_uptr_t value; + compat_size_t sizebytes; +}; + +#define IOCTL_KGSL_DEVICE_GETPROPERTY_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x2, struct kgsl_device_getproperty_compat) + +#define IOCTL_KGSL_SETPROPERTY_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x32, struct kgsl_device_getproperty_compat) + + +struct kgsl_submit_commands_compat { + unsigned int context_id; + unsigned int flags; + compat_uptr_t cmdlist; + unsigned int numcmds; + compat_uptr_t synclist; + unsigned int numsyncs; + unsigned int timestamp; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_SUBMIT_COMMANDS_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x3D, struct kgsl_submit_commands_compat) + +struct kgsl_ringbuffer_issueibcmds_compat { + unsigned int drawctxt_id; + compat_ulong_t ibdesc_addr; + unsigned int numibs; + unsigned int timestamp; /* output param */ + unsigned int flags; +}; + +#define IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x10, struct kgsl_ringbuffer_issueibcmds_compat) + +struct kgsl_cmdstream_freememontimestamp_ctxtid_compat { + unsigned int context_id; + compat_ulong_t gpuaddr; + unsigned int type; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x17, \ + struct kgsl_cmdstream_freememontimestamp_ctxtid_compat) + +struct kgsl_map_user_mem_compat { + int fd; + compat_ulong_t gpuaddr; + compat_size_t len; + compat_size_t offset; + compat_ulong_t hostptr; + enum kgsl_user_mem_type memtype; + unsigned int flags; +}; + +#define IOCTL_KGSL_MAP_USER_MEM_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x15, struct kgsl_map_user_mem_compat) + +struct kgsl_sharedmem_free_compat { + compat_ulong_t gpuaddr; +}; + +#define IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x24, struct kgsl_sharedmem_free_compat) + +#define IOCTL_KGSL_SHAREDMEM_FREE_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x21, struct kgsl_sharedmem_free_compat) + +struct kgsl_gpumem_alloc_compat { + compat_ulong_t gpuaddr; /* output param */ + compat_size_t size; + unsigned int flags; +}; + +#define IOCTL_KGSL_GPUMEM_ALLOC_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x2f, struct kgsl_gpumem_alloc_compat) + +struct kgsl_cff_syncmem_compat { + compat_ulong_t gpuaddr; + compat_size_t len; + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_CFF_SYNCMEM_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x30, struct kgsl_cff_syncmem_compat) + +struct kgsl_timestamp_event_compat { + int type; /* Type of event (see list below) */ + unsigned int timestamp; /* Timestamp to trigger event on */ + unsigned int context_id; /* Context for the timestamp */ + compat_uptr_t priv; /* Pointer to the event specific blob */ + compat_size_t len; /* Size of the event specific blob */ +}; + +#define IOCTL_KGSL_TIMESTAMP_EVENT_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x33, struct kgsl_timestamp_event_compat) + +struct kgsl_gpumem_alloc_id_compat { + unsigned int id; + unsigned int flags; + compat_size_t size; + compat_size_t mmapsize; + compat_ulong_t gpuaddr; +/* private: reserved for future use*/ + unsigned int __pad[2]; +}; + +#define IOCTL_KGSL_GPUMEM_ALLOC_ID_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x34, struct kgsl_gpumem_alloc_id_compat) + +struct kgsl_gpumem_get_info_compat { + compat_ulong_t gpuaddr; + unsigned int id; + unsigned int flags; + compat_size_t size; + compat_size_t mmapsize; + compat_ulong_t useraddr; +/* private: reserved for future use*/ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_GPUMEM_GET_INFO_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x36, struct kgsl_gpumem_get_info_compat) + +struct kgsl_gpumem_sync_cache_compat { + compat_ulong_t gpuaddr; + unsigned int id; + unsigned int op; + compat_size_t offset; + compat_size_t length; +}; + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x37, struct kgsl_gpumem_sync_cache_compat) + +struct kgsl_gpumem_sync_cache_bulk_compat { + compat_uptr_t id_list; + unsigned int count; + unsigned int op; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x3C, struct kgsl_gpumem_sync_cache_bulk_compat) + +struct kgsl_perfcounter_query_compat { + unsigned int groupid; + compat_uptr_t countables; + unsigned int count; + unsigned int max_counters; + unsigned int __pad[2]; +}; + +#define IOCTL_KGSL_PERFCOUNTER_QUERY_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x3A, struct kgsl_perfcounter_query_compat) + +struct kgsl_perfcounter_read_compat { + compat_uptr_t reads; + unsigned int count; + unsigned int __pad[2]; +}; + +#define IOCTL_KGSL_PERFCOUNTER_READ_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x3B, struct kgsl_perfcounter_read_compat) + +static inline compat_ulong_t gpuaddr_to_compat(unsigned long gpuaddr) +{ + WARN(gpuaddr >> 32, "Top 32 bits of gpuaddr have been set\n"); + return (compat_ulong_t)gpuaddr; +} + +static inline compat_size_t sizet_to_compat(size_t size) +{ + WARN(size >> 32, "Size greater than 4G\n"); + return (compat_size_t)size; +} + +long kgsl_compat_ioctl(struct file *filep, unsigned int cmd, + unsigned long arg); + +#else + +static inline long kgsl_compat_ioctl(struct file *filep, unsigned int cmd, + unsigned long arg) +{ + return -EINVAL; +} + +#endif /* CONFIG_COMPAT */ +#endif /* __KGSL_COMPAT_H */ diff --git a/kgsl_debugfs.c b/kgsl_debugfs.c new file mode 100644 index 0000000000..0a9356010f --- /dev/null +++ b/kgsl_debugfs.c @@ -0,0 +1,406 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2002,2008-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "kgsl_debugfs.h" +#include "kgsl_device.h" +#include "kgsl_sharedmem.h" + +struct dentry *kgsl_debugfs_dir; +static struct dentry *proc_d_debugfs; + +static void kgsl_qdss_gfx_register_probe(struct kgsl_device *device) +{ + struct resource *res; + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "qdss_gfx"); + + if (res == NULL) + return; + + device->qdss_gfx_virt = devm_ioremap(&device->pdev->dev, res->start, + resource_size(res)); + + if (device->qdss_gfx_virt == NULL) + dev_warn(device->dev, "qdss_gfx ioremap failed\n"); +} + +static int _isdb_set(void *data, u64 val) +{ + struct kgsl_device *device = data; + + if (device->qdss_gfx_virt == NULL) + kgsl_qdss_gfx_register_probe(device); + + device->set_isdb_breakpoint = val ? true : false; + return 0; +} + +static int _isdb_get(void *data, u64 *val) +{ + struct kgsl_device *device = data; + + *val = device->set_isdb_breakpoint ? 1 : 0; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(_isdb_fops, _isdb_get, _isdb_set, "%llu\n"); + +static int globals_show(struct seq_file *s, void *unused) +{ + struct kgsl_device *device = s->private; + struct kgsl_global_memdesc *md; + + list_for_each_entry(md, &device->globals, node) { + struct kgsl_memdesc *memdesc = &md->memdesc; + char flags[6]; + + flags[0] = memdesc->priv & KGSL_MEMDESC_PRIVILEGED ? 'p' : '-'; + flags[1] = !(memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY) ? 'w' : '-'; + flags[2] = kgsl_memdesc_is_secured(memdesc) ? 's' : '-'; + flags[3] = memdesc->priv & KGSL_MEMDESC_RANDOM ? 'r' : '-'; + flags[4] = memdesc->priv & KGSL_MEMDESC_UCODE ? 'u' : '-'; + flags[5] = '\0'; + + seq_printf(s, "0x%pK-0x%pK %16llu %5s %s\n", + (u64 *)(uintptr_t) memdesc->gpuaddr, + (u64 *)(uintptr_t) (memdesc->gpuaddr + + memdesc->size - 1), memdesc->size, flags, + md->name); + } + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(globals); + +void kgsl_device_debugfs_init(struct kgsl_device *device) +{ + struct dentry *snapshot_dir; + + if (IS_ERR_OR_NULL(kgsl_debugfs_dir)) + return; + + device->d_debugfs = debugfs_create_dir(device->name, + kgsl_debugfs_dir); + + debugfs_create_file("globals", 0444, device->d_debugfs, device, + &globals_fops); + + snapshot_dir = debugfs_create_dir("snapshot", kgsl_debugfs_dir); + debugfs_create_file("break_isdb", 0644, snapshot_dir, device, + &_isdb_fops); +} + +void kgsl_device_debugfs_close(struct kgsl_device *device) +{ + debugfs_remove_recursive(device->d_debugfs); +} + +static const char *memtype_str(int memtype) +{ + if (memtype == KGSL_MEM_ENTRY_KERNEL) + return "gpumem"; + else if (memtype == KGSL_MEM_ENTRY_USER) + return "usermem"; + else if (memtype == KGSL_MEM_ENTRY_ION) + return "ion"; + + return "unknown"; +} + +static char get_alignflag(const struct kgsl_memdesc *m) +{ + int align = kgsl_memdesc_get_align(m); + + if (align >= ilog2(SZ_1M)) + return 'L'; + else if (align >= ilog2(SZ_64K)) + return 'l'; + return '-'; +} + +static char get_cacheflag(const struct kgsl_memdesc *m) +{ + static const char table[] = { + [KGSL_CACHEMODE_WRITECOMBINE] = '-', + [KGSL_CACHEMODE_UNCACHED] = 'u', + [KGSL_CACHEMODE_WRITEBACK] = 'b', + [KGSL_CACHEMODE_WRITETHROUGH] = 't', + }; + + return table[kgsl_memdesc_get_cachemode(m)]; +} + + +static int print_mem_entry(void *data, void *ptr) +{ + struct seq_file *s = data; + struct kgsl_mem_entry *entry = ptr; + char flags[11]; + char usage[16]; + struct kgsl_memdesc *m = &entry->memdesc; + unsigned int usermem_type = kgsl_memdesc_usermem_type(m); + int egl_surface_count = 0, egl_image_count = 0; + unsigned long inode_number = 0; + u32 map_count = atomic_read(&entry->map_count); + + flags[0] = kgsl_memdesc_is_global(m) ? 'g' : '-'; + flags[1] = '-'; + flags[2] = !(m->flags & KGSL_MEMFLAGS_GPUREADONLY) ? 'w' : '-'; + flags[3] = get_alignflag(m); + flags[4] = get_cacheflag(m); + flags[5] = kgsl_memdesc_use_cpu_map(m) ? 'p' : '-'; + /* Show Y if at least one vma has this entry mapped (could be multiple) */ + flags[6] = map_count ? 'Y' : 'N'; + flags[7] = kgsl_memdesc_is_secured(m) ? 's' : '-'; + flags[8] = '-'; + flags[9] = m->flags & KGSL_MEMFLAGS_VBO ? 'v' : '-'; + flags[10] = '\0'; + + kgsl_get_memory_usage(usage, sizeof(usage), m->flags); + + if (usermem_type == KGSL_MEM_ENTRY_ION) { + kgsl_get_egl_counts(entry, &egl_surface_count, + &egl_image_count); + inode_number = kgsl_get_dmabuf_inode_number(entry); + } + + seq_printf(s, "%pK %pK %16llu %5d %10s %10s %16s %5d %10d %6d %6d %10lu", + (uint64_t *)(uintptr_t) m->gpuaddr, + /* + * Show zero for the useraddr - we can't reliably track + * that value for multiple vmas anyway + */ + NULL, m->size, entry->id, flags, + memtype_str(usermem_type), + usage, (m->sgt ? m->sgt->nents : 0), map_count, + egl_surface_count, egl_image_count, inode_number); + + if (entry->metadata[0] != 0) + seq_printf(s, " %s", entry->metadata); + + seq_putc(s, '\n'); + + return 0; +} + +static struct kgsl_mem_entry *process_mem_seq_find(struct seq_file *s, + void *ptr, loff_t pos) +{ + struct kgsl_mem_entry *entry = ptr; + struct kgsl_process_private *private = s->private; + int id = 0; + + loff_t temp_pos = 1; + + if (entry != SEQ_START_TOKEN) + id = entry->id + 1; + + spin_lock(&private->mem_lock); + for (entry = idr_get_next(&private->mem_idr, &id); entry; + id++, entry = idr_get_next(&private->mem_idr, &id), + temp_pos++) { + if (temp_pos == pos && kgsl_mem_entry_get(entry)) { + spin_unlock(&private->mem_lock); + goto found; + } + } + spin_unlock(&private->mem_lock); + + entry = NULL; +found: + if (ptr != SEQ_START_TOKEN) + kgsl_mem_entry_put(ptr); + + return entry; +} + +static void *process_mem_seq_start(struct seq_file *s, loff_t *pos) +{ + loff_t seq_file_offset = *pos; + + if (seq_file_offset == 0) + return SEQ_START_TOKEN; + else + return process_mem_seq_find(s, SEQ_START_TOKEN, + seq_file_offset); +} + +static void process_mem_seq_stop(struct seq_file *s, void *ptr) +{ + if (ptr && ptr != SEQ_START_TOKEN) + kgsl_mem_entry_put(ptr); +} + +static void *process_mem_seq_next(struct seq_file *s, void *ptr, + loff_t *pos) +{ + ++*pos; + return process_mem_seq_find(s, ptr, 1); +} + +static int process_mem_seq_show(struct seq_file *s, void *ptr) +{ + if (ptr == SEQ_START_TOKEN) { + seq_printf(s, "%16s %16s %16s %5s %10s %10s %16s %5s %10s %6s %6s %10s\n", + "gpuaddr", "useraddr", "size", "id", "flags", "type", + "usage", "sglen", "mapcnt", "eglsrf", "eglimg", "inode"); + return 0; + } else + return print_mem_entry(s, ptr); +} + +static const struct seq_operations process_mem_seq_fops = { + .start = process_mem_seq_start, + .stop = process_mem_seq_stop, + .next = process_mem_seq_next, + .show = process_mem_seq_show, +}; + +static int process_mem_open(struct inode *inode, struct file *file) +{ + int ret; + pid_t pid = (pid_t) (unsigned long) inode->i_private; + struct seq_file *s = NULL; + struct kgsl_process_private *private = NULL; + + private = kgsl_process_private_find(pid); + + if (!private) + return -ENODEV; + + ret = seq_open(file, &process_mem_seq_fops); + if (ret) + kgsl_process_private_put(private); + else { + s = file->private_data; + s->private = private; + } + + return ret; +} + +static int process_mem_release(struct inode *inode, struct file *file) +{ + struct kgsl_process_private *private = + ((struct seq_file *)file->private_data)->private; + + if (private) + kgsl_process_private_put(private); + + return seq_release(inode, file); +} + +static const struct file_operations process_mem_fops = { + .open = process_mem_open, + .read = seq_read, + .llseek = seq_lseek, + .release = process_mem_release, +}; + + +static int print_vbo_ranges(int id, void *ptr, void *data) +{ + kgsl_memdesc_print_vbo_ranges(ptr, data); + return 0; +} + +static int vbo_print(struct seq_file *s, void *unused) +{ + struct kgsl_process_private *private = s->private; + + seq_puts(s, "id child range\n"); + + spin_lock(&private->mem_lock); + idr_for_each(&private->mem_idr, print_vbo_ranges, s); + spin_unlock(&private->mem_lock); + + return 0; +} + +static int vbo_open(struct inode *inode, struct file *file) +{ + pid_t pid = (pid_t) (unsigned long) inode->i_private; + struct kgsl_process_private *private; + int ret; + + private = kgsl_process_private_find(pid); + + if (!private) + return -ENODEV; + + ret = single_open(file, vbo_print, private); + if (ret) + kgsl_process_private_put(private); + + return ret; +} + +static const struct file_operations vbo_fops = { + .open = vbo_open, + .read = seq_read, + .llseek = seq_lseek, + /* Reuse the same release function */ + .release = process_mem_release, +}; + +/** + * kgsl_process_init_debugfs() - Initialize debugfs for a process + * @private: Pointer to process private structure created for the process + * + * kgsl_process_init_debugfs() is called at the time of creating the + * process struct when a process opens kgsl device for the first time. + * This function is not fatal - all we do is print a warning message if + * the files can't be created + */ +void kgsl_process_init_debugfs(struct kgsl_process_private *private) +{ + unsigned char name[16]; + struct dentry *dentry; + + snprintf(name, sizeof(name), "%d", pid_nr(private->pid)); + + private->debug_root = debugfs_create_dir(name, proc_d_debugfs); + + if (IS_ERR(private->debug_root)) { + WARN_ONCE("Unable to create debugfs dir for %s\n", name); + private->debug_root = NULL; + return; + } + + dentry = debugfs_create_file("mem", 0444, private->debug_root, + (void *) ((unsigned long) pid_nr(private->pid)), &process_mem_fops); + + if (IS_ERR(dentry)) + WARN_ONCE("Unable to create 'mem' file for %s\n", name); + + debugfs_create_file("vbos", 0444, private->debug_root, + (void *) ((unsigned long) pid_nr(private->pid)), &vbo_fops); +} + +void kgsl_core_debugfs_init(void) +{ + struct dentry *debug_dir; + + kgsl_debugfs_dir = debugfs_create_dir("kgsl", NULL); + if (IS_ERR_OR_NULL(kgsl_debugfs_dir)) + return; + + debug_dir = debugfs_create_dir("debug", kgsl_debugfs_dir); + + proc_d_debugfs = debugfs_create_dir("proc", kgsl_debugfs_dir); + + debugfs_create_bool("strict_memory", 0644, debug_dir, + &kgsl_sharedmem_noretry_flag); +} + +void kgsl_core_debugfs_close(void) +{ + debugfs_remove_recursive(kgsl_debugfs_dir); +} diff --git a/kgsl_debugfs.h b/kgsl_debugfs.h new file mode 100644 index 0000000000..16799f2900 --- /dev/null +++ b/kgsl_debugfs.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2002,2008-2011,2013,2015,2017,2019, The Linux Foundation. All rights reserved. + */ +#ifndef _KGSL_DEBUGFS_H +#define _KGSL_DEBUGFS_H + +struct kgsl_device; +struct kgsl_process_private; + +#ifdef CONFIG_DEBUG_FS +void kgsl_core_debugfs_init(void); +void kgsl_core_debugfs_close(void); + +void kgsl_device_debugfs_init(struct kgsl_device *device); +void kgsl_device_debugfs_close(struct kgsl_device *device); + +extern struct dentry *kgsl_debugfs_dir; +static inline struct dentry *kgsl_get_debugfs_dir(void) +{ + return kgsl_debugfs_dir; +} + +void kgsl_process_init_debugfs(struct kgsl_process_private *priv); +#else +static inline void kgsl_core_debugfs_init(void) { } +static inline void kgsl_device_debugfs_init(struct kgsl_device *device) { } +static inline void kgsl_device_debugfs_close(struct kgsl_device *device) { } +static inline void kgsl_core_debugfs_close(void) { } +static inline struct dentry *kgsl_get_debugfs_dir(void) { return NULL; } +static inline void kgsl_process_init_debugfs(struct kgsl_process_private *priv) +{ +} +#endif + +#endif diff --git a/kgsl_device.h b/kgsl_device.h new file mode 100644 index 0000000000..ba704d3cb1 --- /dev/null +++ b/kgsl_device.h @@ -0,0 +1,962 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_DEVICE_H +#define __KGSL_DEVICE_H + +#include +#include +#include + +#include "kgsl.h" +#include "kgsl_drawobj.h" +#include "kgsl_mmu.h" +#include "kgsl_regmap.h" + +#define KGSL_IOCTL_FUNC(_cmd, _func) \ + [_IOC_NR((_cmd))] = \ + { .cmd = (_cmd), .func = (_func) } + +/* + * KGSL device state is initialized to INIT when platform_probe * + * successfully initialized the device. Once a device has been opened * + * (started) it becomes active. NAP implies that only low latency * + * resources (for now clocks on some platforms) are off. SLEEP implies * + * that the KGSL module believes a device is idle (has been inactive * + * past its timer) and all system resources are released. SUSPEND is * + * requested by the kernel and will be enforced upon all open devices. * + * RESET indicates that GPU or GMU hang happens. KGSL is handling * + * snapshot or recover GPU from hang. MINBW implies that DDR BW vote is * + * set to non-zero minimum value. + */ + +#define KGSL_STATE_NONE 0x00000000 +#define KGSL_STATE_INIT 0x00000001 +#define KGSL_STATE_ACTIVE 0x00000002 +#define KGSL_STATE_NAP 0x00000004 +#define KGSL_STATE_SUSPEND 0x00000010 +#define KGSL_STATE_AWARE 0x00000020 +#define KGSL_STATE_SLUMBER 0x00000080 +#define KGSL_STATE_MINBW 0x00000100 + +/** + * enum kgsl_event_results - result codes passed to an event callback when the + * event is retired or cancelled + * @KGSL_EVENT_RETIRED: The timestamp associated with the event retired + * successflly + * @KGSL_EVENT_CANCELLED: The event was cancelled before the event was fired + */ +enum kgsl_event_results { + KGSL_EVENT_RETIRED = 1, + KGSL_EVENT_CANCELLED = 2, +}; + +/* + * "list" of event types for ftrace symbolic magic + */ + +#define KGSL_CONTEXT_FLAGS \ + { KGSL_CONTEXT_NO_GMEM_ALLOC, "NO_GMEM_ALLOC" }, \ + { KGSL_CONTEXT_PREAMBLE, "PREAMBLE" }, \ + { KGSL_CONTEXT_TRASH_STATE, "TRASH_STATE" }, \ + { KGSL_CONTEXT_CTX_SWITCH, "CTX_SWITCH" }, \ + { KGSL_CONTEXT_PER_CONTEXT_TS, "PER_CONTEXT_TS" }, \ + { KGSL_CONTEXT_USER_GENERATED_TS, "USER_TS" }, \ + { KGSL_CONTEXT_NO_FAULT_TOLERANCE, "NO_FT" }, \ + { KGSL_CONTEXT_INVALIDATE_ON_FAULT, "INVALIDATE_ON_FAULT" }, \ + { KGSL_CONTEXT_PWR_CONSTRAINT, "PWR" }, \ + { KGSL_CONTEXT_SAVE_GMEM, "SAVE_GMEM" }, \ + { KGSL_CONTEXT_IFH_NOP, "IFH_NOP" }, \ + { KGSL_CONTEXT_SECURE, "SECURE" }, \ + { KGSL_CONTEXT_NO_SNAPSHOT, "NO_SNAPSHOT" } + +#define KGSL_CONTEXT_ID(_context) \ + ((_context != NULL) ? (_context)->id : KGSL_MEMSTORE_GLOBAL) + +struct kgsl_device; +struct platform_device; +struct kgsl_device_private; +struct kgsl_context; +struct kgsl_power_stats; +struct kgsl_event; +struct kgsl_snapshot; + +struct kgsl_functable { + /* Mandatory functions - these functions must be implemented + * by the client device. The driver will not check for a NULL + * pointer before calling the hook. + */ + int (*suspend_context)(struct kgsl_device *device); + int (*first_open)(struct kgsl_device *device); + int (*last_close)(struct kgsl_device *device); + int (*start)(struct kgsl_device *device, int priority); + int (*stop)(struct kgsl_device *device); + int (*getproperty)(struct kgsl_device *device, + struct kgsl_device_getproperty *param); + int (*getproperty_compat)(struct kgsl_device *device, + struct kgsl_device_getproperty *param); + int (*waittimestamp)(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp, + unsigned int msecs); + int (*readtimestamp)(struct kgsl_device *device, void *priv, + enum kgsl_timestamp_type type, unsigned int *timestamp); + int (*queue_cmds)(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_drawobj *drawobj[], + uint32_t count, uint32_t *timestamp); + void (*power_stats)(struct kgsl_device *device, + struct kgsl_power_stats *stats); + void (*snapshot)(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_context *context); + /** @drain_and_idle: Drain the GPU and wait for it to idle */ + int (*drain_and_idle)(struct kgsl_device *device); + struct kgsl_device_private * (*device_private_create)(void); + void (*device_private_destroy)(struct kgsl_device_private *dev_priv); + /* + * Optional functions - these functions are not mandatory. The + * driver will check that the function pointer is not NULL before + * calling the hook + */ + struct kgsl_context *(*drawctxt_create) + (struct kgsl_device_private *dev_priv, + uint32_t *flags); + void (*drawctxt_detach)(struct kgsl_context *context); + void (*drawctxt_destroy)(struct kgsl_context *context); + void (*drawctxt_dump)(struct kgsl_device *device, + struct kgsl_context *context); + long (*ioctl)(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg); + long (*compat_ioctl)(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg); + int (*setproperty)(struct kgsl_device_private *dev_priv, + unsigned int type, void __user *value, + unsigned int sizebytes); + int (*setproperty_compat)(struct kgsl_device_private *dev_priv, + unsigned int type, void __user *value, + unsigned int sizebytes); + void (*drawctxt_sched)(struct kgsl_device *device, + struct kgsl_context *context); + void (*resume)(struct kgsl_device *device); + int (*regulator_enable)(struct kgsl_device *device); + bool (*is_hw_collapsible)(struct kgsl_device *device); + void (*regulator_disable)(struct kgsl_device *device); + void (*pwrlevel_change_settings)(struct kgsl_device *device, + unsigned int prelevel, unsigned int postlevel, bool post); + void (*clk_set_options)(struct kgsl_device *device, + const char *name, struct clk *clk, bool on); + /** + * @query_property_list: query the list of properties + * supported by the device. If 'list' is NULL just return the total + * number of properties available otherwise copy up to 'count' items + * into the list and return the total number of items copied. + */ + int (*query_property_list)(struct kgsl_device *device, u32 *list, + u32 count); + bool (*is_hwcg_on)(struct kgsl_device *device); + /** @gpu_clock_set: Target specific function to set gpu frequency */ + int (*gpu_clock_set)(struct kgsl_device *device, u32 pwrlevel); + /** @gpu_bus_set: Target specific function to set gpu bandwidth */ + int (*gpu_bus_set)(struct kgsl_device *device, int bus_level, u32 ab); + void (*deassert_gbif_halt)(struct kgsl_device *device); +}; + +struct kgsl_ioctl { + unsigned int cmd; + long (*func)(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +}; + +long kgsl_ioctl_helper(struct file *filep, unsigned int cmd, unsigned long arg, + const struct kgsl_ioctl *cmds, int len); + +/* Flag to mark that the memobj_node should not go to the hadrware */ +#define MEMOBJ_SKIP BIT(1) + +/** + * struct kgsl_memobj_node - Memory object descriptor + * @node: Local list node for the object + * @id: GPU memory ID for the object + * offset: Offset within the object + * @gpuaddr: GPU address for the object + * @flags: External flags passed by the user + * @priv: Internal flags set by the driver + */ +struct kgsl_memobj_node { + struct list_head node; + unsigned int id; + uint64_t offset; + uint64_t gpuaddr; + uint64_t size; + unsigned long flags; + unsigned long priv; +}; + +struct kgsl_device { + struct device *dev; + const char *name; + u32 id; + + /* Kernel virtual address for GPU shader memory */ + void __iomem *shader_mem_virt; + + /* Starting kernel virtual address for QDSS GFX DBG register block */ + void __iomem *qdss_gfx_virt; + + struct kgsl_memdesc *memstore; + struct kgsl_memdesc *scratch; + + struct kgsl_mmu mmu; + struct gmu_core_device gmu_core; + struct completion hwaccess_gate; + struct completion halt_gate; + const struct kgsl_functable *ftbl; + struct work_struct idle_check_ws; + struct timer_list idle_timer; + struct kgsl_pwrctrl pwrctrl; + int open_count; + + /* For GPU inline submission */ + uint32_t submit_now; + spinlock_t submit_lock; + bool slumber; + + struct mutex mutex; + uint32_t state; + uint32_t requested_state; + + atomic_t active_cnt; + /** @total_mapped: To trace overall gpu memory usage */ + atomic64_t total_mapped; + + wait_queue_head_t active_cnt_wq; + struct platform_device *pdev; + struct dentry *d_debugfs; + struct idr context_idr; + rwlock_t context_lock; + + struct { + void *ptr; + dma_addr_t dma_handle; + u32 size; + } snapshot_memory; + + struct kgsl_snapshot *snapshot; + /** @panic_nb: notifier block to capture GPU snapshot on kernel panic */ + struct notifier_block panic_nb; + struct { + void *ptr; + u32 size; + } snapshot_memory_atomic; + + u32 snapshot_faultcount; /* Total number of faults since boot */ + bool force_panic; /* Force panic after snapshot dump */ + bool skip_ib_capture; /* Skip IB capture after snapshot */ + bool prioritize_unrecoverable; /* Overwrite with new GMU snapshots */ + bool set_isdb_breakpoint; /* Set isdb registers before snapshot */ + bool snapshot_atomic; /* To capture snapshot in atomic context*/ + /* Use CP Crash dumper to get GPU snapshot*/ + bool snapshot_crashdumper; + /* Use HOST side register reads to get GPU snapshot*/ + bool snapshot_legacy; + /* Use to dump the context record in bytes */ + u64 snapshot_ctxt_record_size; + + struct kobject snapshot_kobj; + + struct kgsl_pwrscale pwrscale; + + int reset_counter; /* Track how many GPU core resets have occurred */ + struct workqueue_struct *events_wq; + + /* Number of active contexts seen globally for this device */ + int active_context_count; + struct kobject gpu_sysfs_kobj; + unsigned int l3_freq[3]; + unsigned int num_l3_pwrlevels; + /* store current L3 vote to determine if we should change our vote */ + unsigned int cur_l3_pwrlevel; + /** @globals: List of global memory objects */ + struct list_head globals; + /** @globlal_map: bitmap for global memory allocations */ + unsigned long *global_map; + /* @qdss_desc: Memory descriptor for the QDSS region if applicable */ + struct kgsl_memdesc *qdss_desc; + /* @qtimer_desc: Memory descriptor for the QDSS region if applicable */ + struct kgsl_memdesc *qtimer_desc; + /** @event_groups: List of event groups for this device */ + struct list_head event_groups; + /** @event_groups_lock: A R/W lock for the events group list */ + rwlock_t event_groups_lock; + /** @speed_bin: Speed bin for the GPU device if applicable */ + u32 speed_bin; + /** @gmu_fault: Set when a gmu or rgmu fault is encountered */ + bool gmu_fault; + /** @regmap: GPU register map */ + struct kgsl_regmap regmap; + /** @timelines: Iterator for assigning IDs to timelines */ + struct idr timelines; + /** @timelines_lock: Spinlock to protect the timelines idr */ + spinlock_t timelines_lock; + /** @fence_trace_array: A local trace array for fence debugging */ + struct trace_array *fence_trace_array; + /** @l3_vote: Enable/Disable l3 voting */ + bool l3_vote; + /** @pdev_loaded: Flag to test if platform driver is probed */ + bool pdev_loaded; +}; + +#define KGSL_MMU_DEVICE(_mmu) \ + container_of((_mmu), struct kgsl_device, mmu) + +/** + * enum bits for struct kgsl_context.priv + * @KGSL_CONTEXT_PRIV_SUBMITTED - The context has submitted commands to gpu. + * @KGSL_CONTEXT_PRIV_DETACHED - The context has been destroyed by userspace + * and is no longer using the gpu. + * @KGSL_CONTEXT_PRIV_INVALID - The context has been destroyed by the kernel + * because it caused a GPU fault. + * @KGSL_CONTEXT_PRIV_PAGEFAULT - The context has caused a page fault. + * @KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC - this value and higher values are + * reserved for devices specific use. + */ +enum kgsl_context_priv { + KGSL_CONTEXT_PRIV_SUBMITTED = 0, + KGSL_CONTEXT_PRIV_DETACHED, + KGSL_CONTEXT_PRIV_INVALID, + KGSL_CONTEXT_PRIV_PAGEFAULT, + KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC = 16, +}; + +struct kgsl_process_private; + +/** + * struct kgsl_context - The context fields that are valid for a user defined + * context + * @refcount: kref object for reference counting the context + * @id: integer identifier for the context + * @priority; The context's priority to submit commands to GPU + * @tid: task that created this context. + * @dev_priv: pointer to the owning device instance + * @proc_priv: pointer to process private, the process that allocated the + * context + * @priv: in-kernel context flags, use KGSL_CONTEXT_* values + * @reset_status: status indication whether a gpu reset occurred and whether + * this context was responsible for causing it + * @timeline: sync timeline used to create fences that can be signaled when a + * sync_pt timestamp expires + * @events: A kgsl_event_group for this context - contains the list of GPU + * events + * @flags: flags from userspace controlling the behavior of this context + * @pwr_constraint: power constraint from userspace for this context + * @fault_count: number of times gpu hanged in last _context_throttle_time ms + * @fault_time: time of the first gpu hang in last _context_throttle_time ms + * @user_ctxt_record: memory descriptor used by CP to save/restore VPC data + * across preemption + * @total_fault_count: number of times gpu faulted in this context + * @last_faulted_cmd_ts: last faulted command batch timestamp + * @gmu_registered: whether context is registered with gmu or not + */ +struct kgsl_context { + struct kref refcount; + uint32_t id; + uint32_t priority; + pid_t tid; + struct kgsl_device_private *dev_priv; + struct kgsl_process_private *proc_priv; + unsigned long priv; + struct kgsl_device *device; + unsigned int reset_status; + struct kgsl_sync_timeline *ktimeline; + struct kgsl_event_group events; + unsigned int flags; + struct kgsl_pwr_constraint pwr_constraint; + struct kgsl_pwr_constraint l3_pwr_constraint; + unsigned int fault_count; + ktime_t fault_time; + struct kgsl_mem_entry *user_ctxt_record; + unsigned int total_fault_count; + unsigned int last_faulted_cmd_ts; + bool gmu_registered; + /** + * @gmu_dispatch_queue: dispatch queue id to which this context will be + * submitted + */ + u32 gmu_dispatch_queue; +}; + +#define _context_comm(_c) \ + (((_c) && (_c)->proc_priv) ? (_c)->proc_priv->comm : "unknown") + +/* + * Print log messages with the context process name/pid: + * [...] kgsl kgsl-3d0: kgsl-api-test[22182]: + */ + +#define pr_context(_d, _c, fmt, args...) \ + dev_err((_d)->dev, "%s[%d]: " fmt, \ + _context_comm((_c)), \ + pid_nr((_c)->proc_priv->pid), ##args) + +/** + * struct kgsl_process_private - Private structure for a KGSL process (across + * all devices) + * @priv: Internal flags, use KGSL_PROCESS_* values + * @pid: Identification structure for the task owner of the process + * @comm: task name of the process + * @mem_lock: Spinlock to protect the process memory lists + * @refcount: kref object for reference counting the process + * @idr: Iterator for assigning IDs to memory allocations + * @pagetable: Pointer to the pagetable owned by this process + * @kobj: Pointer to a kobj for the sysfs directory for this process + * @debug_root: Pointer to the debugfs root for this process + * @stats: Memory allocation statistics for this process + * @gpumem_mapped: KGSL memory mapped in the process address space + * @syncsource_idr: sync sources created by this process + * @syncsource_lock: Spinlock to protect the syncsource idr + * @fd_count: Counter for the number of FDs for this process + * @ctxt_count: Count for the number of contexts for this process + * @ctxt_count_lock: Spinlock to protect ctxt_count + * @frame_count: Count for the number of frames processed + */ +struct kgsl_process_private { + unsigned long priv; + struct pid *pid; + char comm[TASK_COMM_LEN]; + spinlock_t mem_lock; + struct kref refcount; + struct idr mem_idr; + struct kgsl_pagetable *pagetable; + struct list_head list; + struct list_head reclaim_list; + struct kobject kobj; + struct dentry *debug_root; + struct { + atomic64_t cur; + uint64_t max; + } stats[KGSL_MEM_ENTRY_MAX]; + atomic64_t gpumem_mapped; + struct idr syncsource_idr; + spinlock_t syncsource_lock; + int fd_count; + atomic_t ctxt_count; + spinlock_t ctxt_count_lock; + atomic64_t frame_count; + /** + * @state: state consisting KGSL_PROC_STATE and KGSL_PROC_PINNED_STATE + */ + unsigned long state; + /** + * @unpinned_page_count: The number of pages unpinned for reclaim + */ + atomic_t unpinned_page_count; + /** + * @fg_work: Work struct to schedule foreground work + */ + struct work_struct fg_work; + /** + * @reclaim_lock: Mutex lock to protect KGSL_PROC_PINNED_STATE + */ + struct mutex reclaim_lock; + /** + * @cmd_count: The number of cmds that are active for the process + */ + atomic_t cmd_count; +}; + +struct kgsl_device_private { + struct kgsl_device *device; + struct kgsl_process_private *process_priv; +}; + +/** + * struct kgsl_snapshot - details for a specific snapshot instance + * @ib1base: Active IB1 base address at the time of fault + * @ib2base: Active IB2 base address at the time of fault + * @ib1size: Number of DWORDS pending in IB1 at the time of fault + * @ib2size: Number of DWORDS pending in IB2 at the time of fault + * @ib1dumped: Active IB1 dump status to sansphot binary + * @ib2dumped: Active IB2 dump status to sansphot binary + * @start: Pointer to the start of the static snapshot region + * @size: Size of the current snapshot instance + * @ptr: Pointer to the next block of memory to write to during snapshotting + * @remain: Bytes left in the snapshot region + * @timestamp: Timestamp of the snapshot instance (in seconds since boot) + * @mempool: Pointer to the memory pool for storing memory objects + * @mempool_size: Size of the memory pool + * @obj_list: List of frozen GPU buffers that are waiting to be dumped. + * @cp_list: List of IB's to be dumped. + * @work: worker to dump the frozen memory + * @dump_gate: completion gate signaled by worker when it is finished. + * @process: the process that caused the hang, if known. + * @sysfs_read: Count of current reads via sysfs + * @first_read: True until the snapshot read is started + * @recovered: True if GPU was recovered after previous snapshot + */ +struct kgsl_snapshot { + uint64_t ib1base; + uint64_t ib2base; + unsigned int ib1size; + unsigned int ib2size; + bool ib1dumped; + bool ib2dumped; + u8 *start; + size_t size; + u8 *ptr; + size_t remain; + unsigned long timestamp; + u8 *mempool; + size_t mempool_size; + struct list_head obj_list; + struct list_head cp_list; + struct work_struct work; + struct completion dump_gate; + struct kgsl_process_private *process; + unsigned int sysfs_read; + bool first_read; + bool recovered; + struct kgsl_device *device; +}; + +/** + * struct kgsl_snapshot_object - GPU memory in the snapshot + * @gpuaddr: The GPU address identified during snapshot + * @size: The buffer size identified during snapshot + * @offset: offset from start of the allocated kgsl_mem_entry + * @type: SNAPSHOT_OBJ_TYPE_* identifier. + * @entry: the reference counted memory entry for this buffer + * @node: node for kgsl_snapshot.obj_list + */ +struct kgsl_snapshot_object { + uint64_t gpuaddr; + uint64_t size; + uint64_t offset; + int type; + struct kgsl_mem_entry *entry; + struct list_head node; +}; + +struct kgsl_device *kgsl_get_device(int dev_idx); + +static inline void kgsl_regread(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int *value) +{ + *value = kgsl_regmap_read(&device->regmap, offsetwords); +} + +static inline void kgsl_regwrite(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int value) +{ + kgsl_regmap_write(&device->regmap, value, offsetwords); +} + +static inline void kgsl_regrmw(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int mask, unsigned int bits) +{ + kgsl_regmap_rmw(&device->regmap, offsetwords, mask, bits); +} + +static inline bool kgsl_state_is_awake(struct kgsl_device *device) +{ + return (device->state == KGSL_STATE_ACTIVE || + device->state == KGSL_STATE_AWARE); +} + +static inline bool kgsl_state_is_nap_or_minbw(struct kgsl_device *device) +{ + return (device->state == KGSL_STATE_NAP || + device->state == KGSL_STATE_MINBW); +} + +/** + * kgsl_start_idle_timer - Start the idle timer + * @device: A KGSL device handle + * + * Start the idle timer to expire in 'interval_timeout' milliseconds + */ +static inline void kgsl_start_idle_timer(struct kgsl_device *device) +{ + mod_timer(&device->idle_timer, + jiffies + msecs_to_jiffies(device->pwrctrl.interval_timeout)); +} + +int kgsl_readtimestamp(struct kgsl_device *device, void *priv, + enum kgsl_timestamp_type type, unsigned int *timestamp); + +bool kgsl_check_timestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp); + +int kgsl_device_platform_probe(struct kgsl_device *device); + +void kgsl_device_platform_remove(struct kgsl_device *device); + +const char *kgsl_pwrstate_to_str(unsigned int state); + +/** + * kgsl_device_snapshot_probe - add resources for the device GPU snapshot + * @device: The device to initialize + * @size: The size of the static region to allocate + * + * Allocate memory for a GPU snapshot for the specified device, + * and create the sysfs files to manage it + */ +void kgsl_device_snapshot_probe(struct kgsl_device *device, u32 size); + +void kgsl_device_snapshot(struct kgsl_device *device, + struct kgsl_context *context, bool gmu_fault); +void kgsl_device_snapshot_close(struct kgsl_device *device); + +void kgsl_events_init(void); +void kgsl_events_exit(void); + +/** + * kgsl_device_events_probe - Set up events for the KGSL device + * @device: A KGSL GPU device handle + * + * Set up the list and lock for GPU events for this device + */ +void kgsl_device_events_probe(struct kgsl_device *device); + +/** + * kgsl_device_events_remove - Remove all event groups from the KGSL device + * @device: A KGSL GPU device handle + * + * Remove all of the GPU event groups from the device and warn if any of them + * still have events pending + */ +void kgsl_device_events_remove(struct kgsl_device *device); + +void kgsl_context_detach(struct kgsl_context *context); + +/** + * kgsl_del_event_group - Remove a GPU event group from a device + * @device: A KGSL GPU device handle + * @group: Event group to be removed + * + * Remove the specified group from the list of event groups on @device. + */ +void kgsl_del_event_group(struct kgsl_device *device, + struct kgsl_event_group *group); + +/** + * kgsl_add_event_group - Add a new GPU event group + * @device: A KGSL GPU device handle + * @group: Pointer to the new group to add to the list + * @context: Context that owns the group (or NULL for global) + * @readtimestamp: Function pointer to the readtimestamp function to call when + * processing events + * @priv: Priv member to pass to the readtimestamp function + * @fmt: The format string to use to build the event name + * @...: Arguments for the format string + */ +void kgsl_add_event_group(struct kgsl_device *device, + struct kgsl_event_group *group, + struct kgsl_context *context, readtimestamp_func readtimestamp, + void *priv, const char *fmt, ...); + +void kgsl_cancel_events_timestamp(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp); +void kgsl_cancel_events(struct kgsl_device *device, + struct kgsl_event_group *group); +void kgsl_cancel_event(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp, + kgsl_event_func func, void *priv); +bool kgsl_event_pending(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp, + kgsl_event_func func, void *priv); +int kgsl_add_event(struct kgsl_device *device, struct kgsl_event_group *group, + unsigned int timestamp, kgsl_event_func func, void *priv); +void kgsl_process_event_group(struct kgsl_device *device, + struct kgsl_event_group *group); +void kgsl_flush_event_group(struct kgsl_device *device, + struct kgsl_event_group *group); +void kgsl_process_event_groups(struct kgsl_device *device); + +void kgsl_context_destroy(struct kref *kref); + +int kgsl_context_init(struct kgsl_device_private *dev_priv, + struct kgsl_context *context); + +void kgsl_context_dump(struct kgsl_context *context); + +int kgsl_memfree_find_entry(pid_t ptname, uint64_t *gpuaddr, + uint64_t *size, uint64_t *flags, pid_t *pid); + +long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); + +long kgsl_ioctl_copy_in(unsigned int kernel_cmd, unsigned int user_cmd, + unsigned long arg, unsigned char *ptr); + +long kgsl_ioctl_copy_out(unsigned int kernel_cmd, unsigned int user_cmd, + unsigned long arg, unsigned char *ptr); + +/** + * kgsl_context_type - Return a symbolic string for the context type + * @type: Context type + * + * Return: Symbolic string representing the context type + */ +const char *kgsl_context_type(int type); + +/** + * kgsl_context_put() - Release context reference count + * @context: Pointer to the KGSL context to be released + * + * Reduce the reference count on a KGSL context and destroy it if it is no + * longer needed + */ +static inline void +kgsl_context_put(struct kgsl_context *context) +{ + if (context) + kref_put(&context->refcount, kgsl_context_destroy); +} + +/** + * kgsl_context_detached() - check if a context is detached + * @context: the context + * + * Check if a context has been destroyed by userspace and is only waiting + * for reference counts to go away. This check is used to weed out + * contexts that shouldn't use the gpu so NULL is considered detached. + */ +static inline bool kgsl_context_detached(struct kgsl_context *context) +{ + return (context == NULL || test_bit(KGSL_CONTEXT_PRIV_DETACHED, + &context->priv)); +} + +/** + * kgsl_context_invalid() - check if a context is invalid + * @context: the context + * + * Check if a context has been invalidated by the kernel and may no + * longer use the GPU. + */ +static inline bool kgsl_context_invalid(struct kgsl_context *context) +{ + return (context == NULL || test_bit(KGSL_CONTEXT_PRIV_INVALID, + &context->priv)); +} + +/** kgsl_context_is_bad - Check if a context is detached or invalid + * @context: Pointer to a KGSL context handle + * + * Return: True if the context has been detached or is invalid + */ +static inline bool kgsl_context_is_bad(struct kgsl_context *context) +{ + return (kgsl_context_detached(context) || + kgsl_context_invalid(context)); +} + +/** + * kgsl_context_get() - get a pointer to a KGSL context + * @device: Pointer to the KGSL device that owns the context + * @id: Context ID + * + * Find the context associated with the given ID number, increase the reference + * count on it and return it. The caller must make sure that this call is + * paired with a kgsl_context_put. This function is for internal use because it + * doesn't validate the ownership of the context with the calling process - use + * kgsl_context_get_owner for that + */ +static inline struct kgsl_context *kgsl_context_get(struct kgsl_device *device, + uint32_t id) +{ + int result = 0; + struct kgsl_context *context = NULL; + + read_lock(&device->context_lock); + + context = idr_find(&device->context_idr, id); + + /* Don't return a context that has been detached */ + if (kgsl_context_detached(context)) + context = NULL; + else + result = kref_get_unless_zero(&context->refcount); + + read_unlock(&device->context_lock); + + if (!result) + return NULL; + return context; +} + +/** + * _kgsl_context_get() - lightweight function to just increment the ref count + * @context: Pointer to the KGSL context + * + * Get a reference to the specified KGSL context structure. This is a + * lightweight way to just increase the refcount on a known context rather than + * walking through kgsl_context_get and searching the iterator + */ +static inline int _kgsl_context_get(struct kgsl_context *context) +{ + int ret = 0; + + if (context) + ret = kref_get_unless_zero(&context->refcount); + + return ret; +} + +/** + * kgsl_context_get_owner() - get a pointer to a KGSL context in a specific + * process + * @dev_priv: Pointer to the process struct + * @id: Context ID to return + * + * Find the context associated with the given ID number, increase the reference + * count on it and return it. The caller must make sure that this call is + * paired with a kgsl_context_put. This function validates that the context id + * given is owned by the dev_priv instancet that is passed in. See + * kgsl_context_get for the internal version that doesn't do the check + */ +static inline struct kgsl_context *kgsl_context_get_owner( + struct kgsl_device_private *dev_priv, uint32_t id) +{ + struct kgsl_context *context; + + context = kgsl_context_get(dev_priv->device, id); + + /* Verify that the context belongs to current calling fd. */ + if (context != NULL && context->dev_priv != dev_priv) { + kgsl_context_put(context); + return NULL; + } + + return context; +} + +/** + * kgsl_process_private_get() - increment the refcount on a + * kgsl_process_private struct + * @process: Pointer to the KGSL process_private + * + * Returns 0 if the structure is invalid and a reference count could not be + * obtained, nonzero otherwise. + */ +static inline int kgsl_process_private_get(struct kgsl_process_private *process) +{ + if (process != NULL) + return kref_get_unless_zero(&process->refcount); + return 0; +} + +void kgsl_process_private_put(struct kgsl_process_private *private); + + +struct kgsl_process_private *kgsl_process_private_find(pid_t pid); + +/* + * A helper macro to print out "not enough memory functions" - this + * makes it easy to standardize the messages as well as cut down on + * the number of strings in the binary + */ +#define SNAPSHOT_ERR_NOMEM(_d, _s) \ + dev_err((_d)->dev, \ + "snapshot: not enough snapshot memory for section %s\n", (_s)) + +/** + * struct kgsl_snapshot_registers - list of registers to snapshot + * @regs: Pointer to an array of register ranges + * @count: Number of entries in the array + */ +struct kgsl_snapshot_registers { + const unsigned int *regs; + unsigned int count; +}; + +size_t kgsl_snapshot_dump_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); + +void kgsl_snapshot_indexed_registers(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, unsigned int index, + unsigned int data, unsigned int start, unsigned int count); + +int kgsl_snapshot_get_object(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, uint64_t gpuaddr, + uint64_t size, unsigned int type); + +int kgsl_snapshot_have_object(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t size); + +struct adreno_ib_object_list; + +int kgsl_snapshot_add_ib_obj_list(struct kgsl_snapshot *snapshot, + struct adreno_ib_object_list *ib_obj_list); + +void kgsl_snapshot_add_section(struct kgsl_device *device, u16 id, + struct kgsl_snapshot *snapshot, + size_t (*func)(struct kgsl_device *, u8 *, size_t, void *), + void *priv); + +/** + * kgsl_of_property_read_ddrtype - Get property from devicetree based on + * the type of DDR. + * @node: Devicetree node + * @base: prefix string of the property + * @ptr: Pointer to store the value of the property + * + * First look up the devicetree property based on the prefix string and DDR + * type. If property is not specified per DDR type, then look for the property + * based on prefix string only. + * + * Return: 0 on success or error code on failure. + */ +int kgsl_of_property_read_ddrtype(struct device_node *node, const char *base, + u32 *ptr); + +/** + * kgsl_query_property_list - Get a list of valid properties + * @device: A KGSL device handle + * @list: Pointer to a list of u32s + * @count: Number of items in @list + * + * Populate a list with the IDs for supported properties. If @list is NULL, + * just return the number of properties available, otherwise fill up to @count + * items in the list with property identifiers. + * + * Returns the number of total properties if @list is NULL or the number of + * properties copied to @list. + */ +int kgsl_query_property_list(struct kgsl_device *device, u32 *list, u32 count); + +static inline bool kgsl_mmu_has_feature(struct kgsl_device *device, + enum kgsl_mmu_feature feature) +{ + return test_bit(feature, &device->mmu.features); +} + +static inline void kgsl_mmu_set_feature(struct kgsl_device *device, + enum kgsl_mmu_feature feature) +{ + set_bit(feature, &device->mmu.features); +} + +/** + * kgsl_trace_gpu_mem_total - Overall gpu memory usage tracking which includes + * process allocations, imported dmabufs and kgsl globals + * @device: A KGSL device handle + * @delta: delta of total mapped memory size + */ +#ifdef CONFIG_TRACE_GPU_MEM +static inline void kgsl_trace_gpu_mem_total(struct kgsl_device *device, + s64 delta) +{ + u64 total_size; + + total_size = atomic64_add_return(delta, &device->total_mapped); + trace_gpu_mem_total(0, 0, total_size); +} +#else +static inline void kgsl_trace_gpu_mem_total(struct kgsl_device *device, + s64 delta) {} +#endif + +#endif /* __KGSL_DEVICE_H */ diff --git a/kgsl_drawobj.c b/kgsl_drawobj.c new file mode 100644 index 0000000000..75807d6350 --- /dev/null +++ b/kgsl_drawobj.c @@ -0,0 +1,1489 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. + */ + +/* + * KGSL drawobj management + * A drawobj is a single submission from userland. The drawobj + * encapsulates everything about the submission : command buffers, flags and + * sync points. + * + * Sync points are events that need to expire before the + * drawobj can be queued to the hardware. All synpoints are contained in an + * array of kgsl_drawobj_sync_event structs in the drawobj. There can be + * multiple types of events both internal ones (GPU events) and external + * triggers. As the events expire bits are cleared in a pending bitmap stored + * in the drawobj. The GPU will submit the command as soon as the bitmap + * goes to zero indicating no more pending events. + */ + +#include +#include + +#include "adreno_drawctxt.h" +#include "kgsl_compat.h" +#include "kgsl_device.h" +#include "kgsl_drawobj.h" +#include "kgsl_eventlog.h" +#include "kgsl_sync.h" +#include "kgsl_timeline.h" +#include "kgsl_trace.h" + +/* + * Define an kmem cache for the memobj structures since we + * allocate and free them so frequently + */ +static struct kmem_cache *memobjs_cache; + +static void syncobj_destroy_object(struct kgsl_drawobj *drawobj) +{ + struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); + int i; + + for (i = 0; i < syncobj->numsyncs; i++) { + struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i]; + + if (event->type == KGSL_CMD_SYNCPOINT_TYPE_FENCE) { + struct event_fence_info *priv = event->priv; + + if (priv) { + kfree(priv->fences); + kfree(priv); + } + } else if (event->type == KGSL_CMD_SYNCPOINT_TYPE_TIMELINE) { + kfree(event->priv); + } + } + + kfree(syncobj->synclist); + kfree(syncobj); +} + +static void cmdobj_destroy_object(struct kgsl_drawobj *drawobj) +{ + kfree(CMDOBJ(drawobj)); +} + +static void bindobj_destroy_object(struct kgsl_drawobj *drawobj) +{ + kfree(BINDOBJ(drawobj)); +} + +static void timelineobj_destroy_object(struct kgsl_drawobj *drawobj) +{ + kfree(TIMELINEOBJ(drawobj)); +} + +void kgsl_drawobj_destroy_object(struct kref *kref) +{ + struct kgsl_drawobj *drawobj = container_of(kref, + struct kgsl_drawobj, refcount); + + kgsl_context_put(drawobj->context); + drawobj->destroy_object(drawobj); +} + +void kgsl_dump_syncpoints(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj) +{ + struct kgsl_drawobj_sync_event *event; + unsigned int i; + + for (i = 0; i < syncobj->numsyncs; i++) { + event = &syncobj->synclist[i]; + + if (!kgsl_drawobj_event_pending(syncobj, i)) + continue; + + switch (event->type) { + case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: { + unsigned int retired; + + kgsl_readtimestamp(event->device, + event->context, KGSL_TIMESTAMP_RETIRED, + &retired); + + dev_err(device->dev, + " [timestamp] context %u timestamp %u (retired %u)\n", + event->context->id, event->timestamp, + retired); + break; + } + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: { + int j; + struct event_fence_info *info = event->priv; + + for (j = 0; info && j < info->num_fences; j++) + dev_err(device->dev, "[%d] fence: %s\n", + i, info->fences[j].name); + break; + } + case KGSL_CMD_SYNCPOINT_TYPE_TIMELINE: { + int j; + struct event_timeline_info *info = event->priv; + + for (j = 0; info && info[j].timeline; j++) + dev_err(device->dev, "[%d] timeline: %d seqno %lld\n", + i, info[j].timeline, info[j].seqno); + break; + } + } + } +} + +static void syncobj_timer(struct timer_list *t) +{ + struct kgsl_device *device; + struct kgsl_drawobj_sync *syncobj = from_timer(syncobj, t, timer); + struct kgsl_drawobj *drawobj; + struct kgsl_drawobj_sync_event *event; + unsigned int i; + + if (syncobj == NULL) + return; + + drawobj = DRAWOBJ(syncobj); + + if (!kref_get_unless_zero(&drawobj->refcount)) + return; + + if (drawobj->context == NULL) { + kgsl_drawobj_put(drawobj); + return; + } + + device = drawobj->context->device; + + dev_err(device->dev, + "kgsl: possible gpu syncpoint deadlock for context %u timestamp %u\n", + drawobj->context->id, drawobj->timestamp); + + set_bit(ADRENO_CONTEXT_FENCE_LOG, &drawobj->context->priv); + kgsl_context_dump(drawobj->context); + clear_bit(ADRENO_CONTEXT_FENCE_LOG, &drawobj->context->priv); + + dev_err(device->dev, " pending events:\n"); + + for (i = 0; i < syncobj->numsyncs; i++) { + event = &syncobj->synclist[i]; + + if (!kgsl_drawobj_event_pending(syncobj, i)) + continue; + + switch (event->type) { + case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: + dev_err(device->dev, " [%u] TIMESTAMP %u:%u\n", + i, event->context->id, event->timestamp); + break; + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: { + int j; + struct event_fence_info *info = event->priv; + + for (j = 0; info && j < info->num_fences; j++) + dev_err(device->dev, " [%u] FENCE %s\n", + i, info->fences[j].name); + break; + } + case KGSL_CMD_SYNCPOINT_TYPE_TIMELINE: { + int j; + struct event_timeline_info *info = event->priv; + + dev_err(device->dev, " [%u] FENCE %s\n", + i, dma_fence_is_signaled(event->fence) ? + "signaled" : "not signaled"); + + for (j = 0; info && info[j].timeline; j++) + dev_err(device->dev, " TIMELINE %d SEQNO %lld\n", + info[j].timeline, info[j].seqno); + break; + } + } + } + + kgsl_drawobj_put(drawobj); + dev_err(device->dev, "--gpu syncpoint deadlock print end--\n"); +} + +/* + * a generic function to retire a pending sync event and (possibly) kick the + * dispatcher. + * Returns false if the event was already marked for cancellation in another + * thread. This function should return true if this thread is responsible for + * freeing up the memory, and the event will not be cancelled. + */ +static bool drawobj_sync_expire(struct kgsl_device *device, + struct kgsl_drawobj_sync_event *event) +{ + struct kgsl_drawobj_sync *syncobj = event->syncobj; + /* + * Clear the event from the pending mask - if it is already clear, then + * leave without doing anything useful + */ + if (!test_and_clear_bit(event->id, &syncobj->pending)) + return false; + + /* + * If no more pending events, delete the timer and schedule the command + * for dispatch + */ + if (!kgsl_drawobj_events_pending(event->syncobj)) { + del_timer(&syncobj->timer); + + if (device->ftbl->drawctxt_sched) + device->ftbl->drawctxt_sched(device, + event->syncobj->base.context); + } + return true; +} + +/* + * This function is called by the GPU event when the sync event timestamp + * expires + */ +static void drawobj_sync_func(struct kgsl_device *device, + struct kgsl_event_group *group, void *priv, int result) +{ + struct kgsl_drawobj_sync_event *event = priv; + + trace_syncpoint_timestamp_expire(event->syncobj, + event->context, event->timestamp); + + /* + * Put down the context ref count only if + * this thread successfully clears the pending bit mask. + */ + if (drawobj_sync_expire(device, event)) + kgsl_context_put(event->context); + + kgsl_drawobj_put(&event->syncobj->base); +} + +static void drawobj_sync_timeline_fence_work(struct irq_work *work) +{ + struct kgsl_drawobj_sync_event *event = container_of(work, + struct kgsl_drawobj_sync_event, work); + + dma_fence_put(event->fence); + kgsl_drawobj_put(&event->syncobj->base); +} + +static void trace_syncpoint_timeline_fence(struct kgsl_drawobj_sync *syncobj, + struct dma_fence *f, bool expire) +{ + struct dma_fence_array *array = to_dma_fence_array(f); + struct dma_fence **fences = &f; + u32 num_fences = 1; + int i; + + if (array) { + num_fences = array->num_fences; + fences = array->fences; + } + + for (i = 0; i < num_fences; i++) { + char fence_name[KGSL_FENCE_NAME_LEN]; + + snprintf(fence_name, sizeof(fence_name), "%s:%llu", + fences[i]->ops->get_timeline_name(fences[i]), + fences[i]->seqno); + if (expire) { + trace_syncpoint_fence_expire(syncobj, fence_name); + log_kgsl_syncpoint_fence_expire_event( + syncobj->base.context->id, fence_name); + } else { + trace_syncpoint_fence(syncobj, fence_name); + log_kgsl_syncpoint_fence_event( + syncobj->base.context->id, fence_name); + } + } +} + +static void drawobj_sync_timeline_fence_callback(struct dma_fence *f, + struct dma_fence_cb *cb) +{ + struct kgsl_drawobj_sync_event *event = container_of(cb, + struct kgsl_drawobj_sync_event, cb); + + trace_syncpoint_timeline_fence(event->syncobj, f, true); + + /* + * Mark the event as synced and then fire off a worker to handle + * removing the fence + */ + if (drawobj_sync_expire(event->device, event)) + irq_work_queue(&event->work); +} + +static void syncobj_destroy(struct kgsl_drawobj *drawobj) +{ + struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); + unsigned int i; + + /* Zap the canary timer */ + del_timer_sync(&syncobj->timer); + + /* + * Clear all pending events - this will render any subsequent async + * callbacks harmless + */ + for (i = 0; i < syncobj->numsyncs; i++) { + struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i]; + + /* + * Don't do anything if the event has already expired. + * If this thread clears the pending bit mask then it is + * responsible for doing context put. + */ + if (!test_and_clear_bit(i, &syncobj->pending)) + continue; + + switch (event->type) { + case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: + kgsl_cancel_event(drawobj->device, + &event->context->events, event->timestamp, + drawobj_sync_func, event); + /* + * Do context put here to make sure the context is alive + * till this thread cancels kgsl event. + */ + kgsl_context_put(event->context); + break; + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: + kgsl_sync_fence_async_cancel(event->handle); + kgsl_drawobj_put(drawobj); + break; + case KGSL_CMD_SYNCPOINT_TYPE_TIMELINE: + dma_fence_remove_callback(event->fence, &event->cb); + dma_fence_put(event->fence); + kgsl_drawobj_put(drawobj); + break; + } + } + + /* + * If we cancelled an event, there's a good chance that the context is + * on a dispatcher queue, so schedule to get it removed. + */ + if (!bitmap_empty(&syncobj->pending, KGSL_MAX_SYNCPOINTS) && + drawobj->device->ftbl->drawctxt_sched) + drawobj->device->ftbl->drawctxt_sched(drawobj->device, + drawobj->context); + +} + +static void timelineobj_destroy(struct kgsl_drawobj *drawobj) +{ + struct kgsl_drawobj_timeline *timelineobj = TIMELINEOBJ(drawobj); + int i; + + for (i = 0; i < timelineobj->count; i++) + kgsl_timeline_put(timelineobj->timelines[i].timeline); + + kvfree(timelineobj->timelines); + timelineobj->timelines = NULL; + timelineobj->count = 0; +} + +static void bindobj_destroy(struct kgsl_drawobj *drawobj) +{ + struct kgsl_drawobj_bind *bindobj = BINDOBJ(drawobj); + + kgsl_sharedmem_put_bind_op(bindobj->bind); +} + +static void cmdobj_destroy(struct kgsl_drawobj *drawobj) +{ + struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj); + struct kgsl_memobj_node *mem, *tmpmem; + + /* + * Release the refcount on the mem entry associated with the + * ib profiling buffer + */ + if (cmdobj->base.flags & KGSL_DRAWOBJ_PROFILING) + kgsl_mem_entry_put(cmdobj->profiling_buf_entry); + + /* Destroy the command list */ + list_for_each_entry_safe(mem, tmpmem, &cmdobj->cmdlist, node) { + list_del_init(&mem->node); + kmem_cache_free(memobjs_cache, mem); + } + + /* Destroy the memory list */ + list_for_each_entry_safe(mem, tmpmem, &cmdobj->memlist, node) { + list_del_init(&mem->node); + kmem_cache_free(memobjs_cache, mem); + } + + if (drawobj->type & CMDOBJ_TYPE) + atomic_dec(&drawobj->context->proc_priv->cmd_count); +} + +/** + * kgsl_drawobj_destroy() - Destroy a kgsl object structure + * @obj: Pointer to the kgsl object to destroy + * + * Start the process of destroying a command batch. Cancel any pending events + * and decrement the refcount. Asynchronous events can still signal after + * kgsl_drawobj_destroy has returned. + */ +void kgsl_drawobj_destroy(struct kgsl_drawobj *drawobj) +{ + if (IS_ERR_OR_NULL(drawobj)) + return; + + drawobj->destroy(drawobj); + + kgsl_drawobj_put(drawobj); +} + +static bool drawobj_sync_fence_func(void *priv) +{ + struct kgsl_drawobj_sync_event *event = priv; + struct event_fence_info *info = event->priv; + int i; + + for (i = 0; info && i < info->num_fences; i++) { + trace_syncpoint_fence_expire(event->syncobj, + info->fences[i].name); + log_kgsl_syncpoint_fence_expire_event( + event->syncobj->base.context->id, info->fences[i].name); + } + + /* + * Only call kgsl_drawobj_put() if it's not marked for cancellation + * in another thread. + */ + if (drawobj_sync_expire(event->device, event)) { + kgsl_drawobj_put(&event->syncobj->base); + return true; + } + return false; +} + +static struct event_timeline_info * +drawobj_get_sync_timeline_priv(void __user *uptr, u64 usize, u32 count) +{ + int i; + struct event_timeline_info *priv; + + /* Make sure we don't accidently overflow count */ + if (count == UINT_MAX) + return NULL; + + priv = kcalloc(count + 1, sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + for (i = 0; i < count; i++, uptr += usize) { + struct kgsl_timeline_val val; + + if (copy_struct_from_user(&val, sizeof(val), uptr, usize)) + continue; + + priv[i].timeline = val.timeline; + priv[i].seqno = val.seqno; + } + + priv[i].timeline = 0; + return priv; +} + +static int drawobj_add_sync_timeline(struct kgsl_device *device, + + struct kgsl_drawobj_sync *syncobj, void __user *uptr, + u64 usize) +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj); + struct kgsl_cmd_syncpoint_timeline sync; + struct kgsl_drawobj_sync_event *event; + struct dma_fence *fence; + unsigned int id; + int ret; + + if (copy_struct_from_user(&sync, sizeof(sync), uptr, usize)) + return -EFAULT; + + fence = kgsl_timelines_to_fence_array(device, sync.timelines, + sync.count, sync.timelines_size, false); + if (IS_ERR(fence)) + return PTR_ERR(fence); + + kref_get(&drawobj->refcount); + + id = syncobj->numsyncs++; + + event = &syncobj->synclist[id]; + + event->id = id; + event->type = KGSL_CMD_SYNCPOINT_TYPE_TIMELINE; + event->syncobj = syncobj; + event->device = device; + event->context = NULL; + event->fence = fence; + init_irq_work(&event->work, drawobj_sync_timeline_fence_work); + + INIT_LIST_HEAD(&event->cb.node); + + event->priv = + drawobj_get_sync_timeline_priv(u64_to_user_ptr(sync.timelines), + sync.timelines_size, sync.count); + + ret = dma_fence_add_callback(event->fence, + &event->cb, drawobj_sync_timeline_fence_callback); + + set_bit(event->id, &syncobj->pending); + + if (ret) { + clear_bit(event->id, &syncobj->pending); + + if (dma_fence_is_signaled(event->fence)) { + trace_syncpoint_fence_expire(syncobj, "signaled"); + log_kgsl_syncpoint_fence_expire_event( + syncobj->base.context->id, "signaled"); + dma_fence_put(event->fence); + ret = 0; + } + + kgsl_drawobj_put(drawobj); + return ret; + } + + trace_syncpoint_timeline_fence(event->syncobj, event->fence, false); + return 0; +} + +static int drawobj_add_sync_fence(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *data, + u64 datasize) +{ + struct kgsl_cmd_syncpoint_fence sync; + struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj); + struct kgsl_drawobj_sync_event *event; + struct event_fence_info *priv; + unsigned int id, i; + + if (copy_struct_from_user(&sync, sizeof(sync), data, datasize)) + return -EFAULT; + + kref_get(&drawobj->refcount); + + id = syncobj->numsyncs++; + + event = &syncobj->synclist[id]; + + event->id = id; + event->type = KGSL_CMD_SYNCPOINT_TYPE_FENCE; + event->syncobj = syncobj; + event->device = device; + event->context = NULL; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + + set_bit(event->id, &syncobj->pending); + + event->handle = kgsl_sync_fence_async_wait(sync.fd, + drawobj_sync_fence_func, event, priv); + + event->priv = priv; + + if (IS_ERR_OR_NULL(event->handle)) { + int ret = PTR_ERR(event->handle); + + clear_bit(event->id, &syncobj->pending); + event->handle = NULL; + + kgsl_drawobj_put(drawobj); + + /* + * If ret == 0 the fence was already signaled - print a trace + * message so we can track that + */ + if (ret == 0) { + trace_syncpoint_fence_expire(syncobj, "signaled"); + log_kgsl_syncpoint_fence_expire_event( + syncobj->base.context->id, "signaled"); + } + + return ret; + } + + for (i = 0; priv && i < priv->num_fences; i++) { + trace_syncpoint_fence(syncobj, priv->fences[i].name); + log_kgsl_syncpoint_fence_event(syncobj->base.context->id, + priv->fences[i].name); + } + + return 0; +} + +/* drawobj_add_sync_timestamp() - Add a new sync point for a sync obj + * @device: KGSL device + * @syncobj: KGSL sync obj to add the sync point to + * @priv: Private structure passed by the user + * + * Add a new sync point timestamp event to the sync obj. + */ +static int drawobj_add_sync_timestamp(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, + struct kgsl_cmd_syncpoint_timestamp *timestamp) + +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj); + struct kgsl_context *context = kgsl_context_get(device, + timestamp->context_id); + struct kgsl_drawobj_sync_event *event; + int ret = -EINVAL; + unsigned int id; + + if (context == NULL) + return -EINVAL; + + /* + * We allow somebody to create a sync point on their own context. + * This has the effect of delaying a command from submitting until the + * dependent command has cleared. That said we obviously can't let them + * create a sync point on a future timestamp. + */ + + if (context == drawobj->context) { + unsigned int queued; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, + &queued); + + if (timestamp_cmp(timestamp->timestamp, queued) > 0) { + dev_err(device->dev, + "Cannot create syncpoint for future timestamp %d (current %d)\n", + timestamp->timestamp, queued); + goto done; + } + } + + kref_get(&drawobj->refcount); + + id = syncobj->numsyncs++; + + event = &syncobj->synclist[id]; + event->id = id; + + event->type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP; + event->syncobj = syncobj; + event->context = context; + event->timestamp = timestamp->timestamp; + event->device = device; + + set_bit(event->id, &syncobj->pending); + + ret = kgsl_add_event(device, &context->events, timestamp->timestamp, + drawobj_sync_func, event); + + if (ret) { + clear_bit(event->id, &syncobj->pending); + kgsl_drawobj_put(drawobj); + } else { + trace_syncpoint_timestamp(syncobj, context, + timestamp->timestamp); + } + +done: + if (ret) + kgsl_context_put(context); + + return ret; +} + +static int drawobj_add_sync_timestamp_from_user(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *data, + u64 datasize) +{ + struct kgsl_cmd_syncpoint_timestamp timestamp; + + if (copy_struct_from_user(×tamp, sizeof(timestamp), + data, datasize)) + return -EFAULT; + + return drawobj_add_sync_timestamp(device, syncobj, ×tamp); +} + +/** + * kgsl_drawobj_sync_add_sync() - Add a sync point to a command + * batch + * @device: Pointer to the KGSL device struct for the GPU + * @syncobj: Pointer to the sync obj + * @sync: Pointer to the user-specified struct defining the syncpoint + * + * Create a new sync point in the sync obj based on the + * user specified parameters + */ +int kgsl_drawobj_sync_add_sync(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, + struct kgsl_cmd_syncpoint *sync) +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj); + + if (sync->type == KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP) + return drawobj_add_sync_timestamp_from_user(device, + syncobj, sync->priv, sync->size); + else if (sync->type == KGSL_CMD_SYNCPOINT_TYPE_FENCE) + return drawobj_add_sync_fence(device, + syncobj, sync->priv, sync->size); + else if (sync->type == KGSL_CMD_SYNCPOINT_TYPE_TIMELINE) + return drawobj_add_sync_timeline(device, + syncobj, sync->priv, sync->size); + + dev_err(device->dev, "bad syncpoint type %d for ctxt %d\n", + sync->type, drawobj->context->id); + + return -EINVAL; +} + +static void add_profiling_buffer(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, + uint64_t gpuaddr, uint64_t size, + unsigned int id, uint64_t offset) +{ + struct kgsl_mem_entry *entry; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + + if (!(drawobj->flags & KGSL_DRAWOBJ_PROFILING)) + return; + + /* Only the first buffer entry counts - ignore the rest */ + if (cmdobj->profiling_buf_entry != NULL) + return; + + if (id != 0) + entry = kgsl_sharedmem_find_id(drawobj->context->proc_priv, + id); + else + entry = kgsl_sharedmem_find(drawobj->context->proc_priv, + gpuaddr); + + if (entry != NULL) { + if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) { + kgsl_mem_entry_put(entry); + entry = NULL; + } + } + + if (entry == NULL) { + dev_err(device->dev, + "ignore bad profile buffer ctxt %d id %d offset %lld gpuaddr %llx size %lld\n", + drawobj->context->id, id, offset, gpuaddr, size); + return; + } + + + if (!id) { + cmdobj->profiling_buffer_gpuaddr = gpuaddr; + } else { + u64 off = offset + sizeof(struct kgsl_drawobj_profiling_buffer); + + /* + * Make sure there is enough room in the object to store the + * entire profiling buffer object + */ + if (off < offset || off >= entry->memdesc.size) { + dev_err(device->dev, + "ignore invalid profile offset ctxt %d id %d offset %lld gpuaddr %llx size %lld\n", + drawobj->context->id, id, offset, gpuaddr, size); + kgsl_mem_entry_put(entry); + return; + } + + cmdobj->profiling_buffer_gpuaddr = + entry->memdesc.gpuaddr + offset; + } + + cmdobj->profiling_buf_entry = entry; +} + +/** + * kgsl_drawobj_cmd_add_ibdesc() - Add a legacy ibdesc to a command + * batch + * @cmdobj: Pointer to the ib + * @ibdesc: Pointer to the user-specified struct defining the memory or IB + * + * Create a new memory entry in the ib based on the + * user specified parameters + */ +int kgsl_drawobj_cmd_add_ibdesc(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, struct kgsl_ibdesc *ibdesc) +{ + uint64_t gpuaddr = (uint64_t) ibdesc->gpuaddr; + uint64_t size = (uint64_t) ibdesc->sizedwords << 2; + struct kgsl_memobj_node *mem; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + + /* sanitize the ibdesc ctrl flags */ + ibdesc->ctrl &= KGSL_IBDESC_MEMLIST | KGSL_IBDESC_PROFILING_BUFFER; + + if (drawobj->flags & KGSL_DRAWOBJ_MEMLIST && + ibdesc->ctrl & KGSL_IBDESC_MEMLIST) { + if (ibdesc->ctrl & KGSL_IBDESC_PROFILING_BUFFER) { + add_profiling_buffer(device, cmdobj, + gpuaddr, size, 0, 0); + return 0; + } + } + + /* Ignore if SYNC or MARKER is specified */ + if (drawobj->type & (SYNCOBJ_TYPE | MARKEROBJ_TYPE)) + return 0; + + mem = kmem_cache_alloc(memobjs_cache, GFP_KERNEL); + if (mem == NULL) + return -ENOMEM; + + mem->gpuaddr = gpuaddr; + mem->size = size; + mem->priv = 0; + mem->id = 0; + mem->offset = 0; + mem->flags = 0; + + if (drawobj->flags & KGSL_DRAWOBJ_MEMLIST && + ibdesc->ctrl & KGSL_IBDESC_MEMLIST) + /* add to the memlist */ + list_add_tail(&mem->node, &cmdobj->memlist); + else { + /* set the preamble flag if directed to */ + if (drawobj->context->flags & KGSL_CONTEXT_PREAMBLE && + list_empty(&cmdobj->cmdlist)) + mem->flags = KGSL_CMDLIST_CTXTSWITCH_PREAMBLE; + + /* add to the cmd list */ + list_add_tail(&mem->node, &cmdobj->cmdlist); + } + + return 0; +} + +static int drawobj_init(struct kgsl_device *device, + struct kgsl_context *context, struct kgsl_drawobj *drawobj, + int type) +{ + /* + * Increase the reference count on the context so it doesn't disappear + * during the lifetime of this object + */ + if (!_kgsl_context_get(context)) + return -ENOENT; + + kref_init(&drawobj->refcount); + + drawobj->device = device; + drawobj->context = context; + drawobj->type = type; + + return 0; +} + +static int get_aux_command(void __user *ptr, u64 generic_size, + int type, void *auxcmd, size_t auxcmd_size) +{ + struct kgsl_gpu_aux_command_generic generic; + u64 size; + + if (copy_struct_from_user(&generic, sizeof(generic), ptr, generic_size)) + return -EFAULT; + + if (generic.type != type) + return -EINVAL; + + size = min_t(u64, auxcmd_size, generic.size); + if (copy_from_user(auxcmd, u64_to_user_ptr(generic.priv), size)) + return -EFAULT; + + return 0; +} + +struct kgsl_drawobj_timeline * +kgsl_drawobj_timeline_create(struct kgsl_device *device, + struct kgsl_context *context) +{ + int ret; + struct kgsl_drawobj_timeline *timelineobj = + kzalloc(sizeof(*timelineobj), GFP_KERNEL); + + if (!timelineobj) + return ERR_PTR(-ENOMEM); + + ret = drawobj_init(device, context, &timelineobj->base, + TIMELINEOBJ_TYPE); + if (ret) { + kfree(timelineobj); + return ERR_PTR(ret); + } + + timelineobj->base.destroy = timelineobj_destroy; + timelineobj->base.destroy_object = timelineobj_destroy_object; + + return timelineobj; +} + +int kgsl_drawobj_add_timeline(struct kgsl_device_private *dev_priv, + struct kgsl_drawobj_timeline *timelineobj, + void __user *src, u64 cmdsize) +{ + struct kgsl_gpu_aux_command_timeline cmd; + int i, ret; + + memset(&cmd, 0, sizeof(cmd)); + + ret = get_aux_command(src, cmdsize, + KGSL_GPU_AUX_COMMAND_TIMELINE, &cmd, sizeof(cmd)); + if (ret) + return ret; + + if (!cmd.count) + return -EINVAL; + + timelineobj->timelines = kvcalloc(cmd.count, + sizeof(*timelineobj->timelines), + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + if (!timelineobj->timelines) + return -ENOMEM; + + src = u64_to_user_ptr(cmd.timelines); + + for (i = 0; i < cmd.count; i++) { + struct kgsl_timeline_val val; + + if (copy_struct_from_user(&val, sizeof(val), src, + cmd.timelines_size)) { + ret = -EFAULT; + goto err; + } + + if (val.padding) { + ret = -EINVAL; + goto err; + } + + timelineobj->timelines[i].timeline = + kgsl_timeline_by_id(dev_priv->device, + val.timeline); + + if (!timelineobj->timelines[i].timeline) { + ret = -ENODEV; + goto err; + } + + trace_kgsl_drawobj_timeline(val.timeline, val.seqno); + timelineobj->timelines[i].seqno = val.seqno; + + src += cmd.timelines_size; + } + + timelineobj->count = cmd.count; + return 0; +err: + for (i = 0; i < cmd.count; i++) + kgsl_timeline_put(timelineobj->timelines[i].timeline); + + kvfree(timelineobj->timelines); + timelineobj->timelines = NULL; + return ret; +} + +static void kgsl_drawobj_bind_callback(struct kgsl_sharedmem_bind_op *op) +{ + struct kgsl_drawobj_bind *bindobj = op->data; + struct kgsl_drawobj *drawobj = DRAWOBJ(bindobj); + struct kgsl_device *device = drawobj->device; + + set_bit(KGSL_BINDOBJ_STATE_DONE, &bindobj->state); + + /* Re-schedule the context */ + if (device->ftbl->drawctxt_sched) + device->ftbl->drawctxt_sched(device, + drawobj->context); + + /* Put back the reference we took when we started the operation */ + kgsl_context_put(drawobj->context); + kgsl_drawobj_put(drawobj); +} + +int kgsl_drawobj_add_bind(struct kgsl_device_private *dev_priv, + struct kgsl_drawobj_bind *bindobj, + void __user *src, u64 cmdsize) +{ + struct kgsl_gpu_aux_command_bind cmd; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_sharedmem_bind_op *op; + int ret; + + ret = get_aux_command(src, cmdsize, + KGSL_GPU_AUX_COMMAND_BIND, &cmd, sizeof(cmd)); + if (ret) + return ret; + + op = kgsl_sharedmem_create_bind_op(private, cmd.target, + u64_to_user_ptr(cmd.rangeslist), cmd.numranges, + cmd.rangesize); + + if (IS_ERR(op)) + return PTR_ERR(op); + + op->callback = kgsl_drawobj_bind_callback; + op->data = bindobj; + + bindobj->bind = op; + return 0; +} + +struct kgsl_drawobj_bind *kgsl_drawobj_bind_create(struct kgsl_device *device, + struct kgsl_context *context) +{ + int ret; + struct kgsl_drawobj_bind *bindobj = + kzalloc(sizeof(*bindobj), GFP_KERNEL); + + if (!bindobj) + return ERR_PTR(-ENOMEM); + + ret = drawobj_init(device, context, &bindobj->base, BINDOBJ_TYPE); + if (ret) { + kfree(bindobj); + return ERR_PTR(ret); + } + + bindobj->base.destroy = bindobj_destroy; + bindobj->base.destroy_object = bindobj_destroy_object; + + return bindobj; +} + +/** + * kgsl_drawobj_sync_create() - Create a new sync obj + * structure + * @device: Pointer to a KGSL device struct + * @context: Pointer to a KGSL context struct + * + * Allocate an new kgsl_drawobj_sync structure + */ +struct kgsl_drawobj_sync *kgsl_drawobj_sync_create(struct kgsl_device *device, + struct kgsl_context *context) +{ + struct kgsl_drawobj_sync *syncobj = + kzalloc(sizeof(*syncobj), GFP_KERNEL); + int ret; + + if (!syncobj) + return ERR_PTR(-ENOMEM); + + ret = drawobj_init(device, context, &syncobj->base, SYNCOBJ_TYPE); + if (ret) { + kfree(syncobj); + return ERR_PTR(ret); + } + + syncobj->base.destroy = syncobj_destroy; + syncobj->base.destroy_object = syncobj_destroy_object; + + timer_setup(&syncobj->timer, syncobj_timer, 0); + + return syncobj; +} + +/** + * kgsl_drawobj_cmd_create() - Create a new command obj + * structure + * @device: Pointer to a KGSL device struct + * @context: Pointer to a KGSL context struct + * @flags: Flags for the command obj + * @type: type of cmdobj MARKER/CMD + * + * Allocate a new kgsl_drawobj_cmd structure + */ +struct kgsl_drawobj_cmd *kgsl_drawobj_cmd_create(struct kgsl_device *device, + struct kgsl_context *context, unsigned int flags, + unsigned int type) +{ + struct kgsl_drawobj_cmd *cmdobj = kzalloc(sizeof(*cmdobj), GFP_KERNEL); + int ret; + + if (!cmdobj) + return ERR_PTR(-ENOMEM); + + ret = drawobj_init(device, context, &cmdobj->base, + (type & (CMDOBJ_TYPE | MARKEROBJ_TYPE))); + if (ret) { + kfree(cmdobj); + return ERR_PTR(ret); + } + + cmdobj->base.destroy = cmdobj_destroy; + cmdobj->base.destroy_object = cmdobj_destroy_object; + + /* sanitize our flags for drawobjs */ + cmdobj->base.flags = flags & (KGSL_DRAWOBJ_CTX_SWITCH + | KGSL_DRAWOBJ_MARKER + | KGSL_DRAWOBJ_END_OF_FRAME + | KGSL_DRAWOBJ_PWR_CONSTRAINT + | KGSL_DRAWOBJ_MEMLIST + | KGSL_DRAWOBJ_PROFILING + | KGSL_DRAWOBJ_PROFILING_KTIME); + + INIT_LIST_HEAD(&cmdobj->cmdlist); + INIT_LIST_HEAD(&cmdobj->memlist); + + if (type & CMDOBJ_TYPE) + atomic_inc(&context->proc_priv->cmd_count); + + return cmdobj; +} + +#ifdef CONFIG_COMPAT +static int add_ibdesc_list_compat(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, int count) +{ + int i, ret = 0; + struct kgsl_ibdesc_compat ibdesc32; + struct kgsl_ibdesc ibdesc; + + for (i = 0; i < count; i++) { + memset(&ibdesc32, 0, sizeof(ibdesc32)); + + if (copy_from_user(&ibdesc32, ptr, sizeof(ibdesc32))) { + ret = -EFAULT; + break; + } + + ibdesc.gpuaddr = (unsigned long) ibdesc32.gpuaddr; + ibdesc.sizedwords = (size_t) ibdesc32.sizedwords; + ibdesc.ctrl = (unsigned int) ibdesc32.ctrl; + + ret = kgsl_drawobj_cmd_add_ibdesc(device, cmdobj, &ibdesc); + if (ret) + break; + + ptr += sizeof(ibdesc32); + } + + return ret; +} + +static int add_syncpoints_compat(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *ptr, int count) +{ + struct kgsl_cmd_syncpoint_compat sync32; + struct kgsl_cmd_syncpoint sync; + int i, ret = 0; + + for (i = 0; i < count; i++) { + memset(&sync32, 0, sizeof(sync32)); + + if (copy_from_user(&sync32, ptr, sizeof(sync32))) { + ret = -EFAULT; + break; + } + + sync.type = sync32.type; + sync.priv = compat_ptr(sync32.priv); + sync.size = (size_t) sync32.size; + + ret = kgsl_drawobj_sync_add_sync(device, syncobj, &sync); + if (ret) + break; + + ptr += sizeof(sync32); + } + + return ret; +} +#else +static int add_ibdesc_list_compat(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, int count) +{ + return -EINVAL; +} + +static int add_syncpoints_compat(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *ptr, int count) +{ + return -EINVAL; +} +#endif + +/* Returns: + * -EINVAL: Bad data + * 0: All data fields are empty (nothing to do) + * 1: All list information is valid + */ +static int _verify_input_list(unsigned int count, void __user *ptr, + unsigned int size) +{ + /* Return early if nothing going on */ + if (count == 0 && ptr == NULL && size == 0) + return 0; + + /* Sanity check inputs */ + if (count == 0 || ptr == NULL || size == 0) + return -EINVAL; + + return 1; +} + +int kgsl_drawobj_cmd_add_ibdesc_list(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, int count) +{ + struct kgsl_ibdesc ibdesc; + struct kgsl_drawobj *baseobj = DRAWOBJ(cmdobj); + int i, ret; + + /* Ignore everything if this is a MARKER */ + if (baseobj->type & MARKEROBJ_TYPE) + return 0; + + ret = _verify_input_list(count, ptr, sizeof(ibdesc)); + if (ret <= 0) + return -EINVAL; + + if (is_compat_task()) + return add_ibdesc_list_compat(device, cmdobj, ptr, count); + + for (i = 0; i < count; i++) { + memset(&ibdesc, 0, sizeof(ibdesc)); + + if (copy_from_user(&ibdesc, ptr, sizeof(ibdesc))) + return -EFAULT; + + ret = kgsl_drawobj_cmd_add_ibdesc(device, cmdobj, &ibdesc); + if (ret) + return ret; + + ptr += sizeof(ibdesc); + } + + return 0; +} + +int kgsl_drawobj_sync_add_syncpoints(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *ptr, int count) +{ + struct kgsl_cmd_syncpoint sync; + int i, ret; + + if (count == 0) + return 0; + + syncobj->synclist = kcalloc(count, + sizeof(struct kgsl_drawobj_sync_event), GFP_KERNEL); + + if (syncobj->synclist == NULL) + return -ENOMEM; + + if (is_compat_task()) + return add_syncpoints_compat(device, syncobj, ptr, count); + + for (i = 0; i < count; i++) { + memset(&sync, 0, sizeof(sync)); + + if (copy_from_user(&sync, ptr, sizeof(sync))) + return -EFAULT; + + ret = kgsl_drawobj_sync_add_sync(device, syncobj, &sync); + if (ret) + return ret; + + ptr += sizeof(sync); + } + + return 0; +} + +static int kgsl_drawobj_add_memobject(struct list_head *head, + struct kgsl_command_object *obj) +{ + struct kgsl_memobj_node *mem; + + mem = kmem_cache_alloc(memobjs_cache, GFP_KERNEL); + if (mem == NULL) + return -ENOMEM; + + mem->gpuaddr = obj->gpuaddr; + mem->size = obj->size; + mem->id = obj->id; + mem->offset = obj->offset; + mem->flags = obj->flags; + mem->priv = 0; + + list_add_tail(&mem->node, head); + return 0; +} + +#define CMDLIST_FLAGS \ + (KGSL_CMDLIST_IB | \ + KGSL_CMDLIST_CTXTSWITCH_PREAMBLE | \ + KGSL_CMDLIST_IB_PREAMBLE) + +/* This can only accept MARKEROBJ_TYPE and CMDOBJ_TYPE */ +int kgsl_drawobj_cmd_add_cmdlist(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, + unsigned int size, unsigned int count) +{ + struct kgsl_command_object obj; + struct kgsl_drawobj *baseobj = DRAWOBJ(cmdobj); + int i, ret; + + /* Ignore everything if this is a MARKER */ + if (baseobj->type & MARKEROBJ_TYPE) + return 0; + + ret = _verify_input_list(count, ptr, size); + if (ret <= 0) + return ret; + + for (i = 0; i < count; i++) { + if (copy_struct_from_user(&obj, sizeof(obj), ptr, size)) + return -EFAULT; + + /* Sanity check the flags */ + if (!(obj.flags & CMDLIST_FLAGS)) { + dev_err(device->dev, + "invalid cmdobj ctxt %d flags %d id %d offset %llu addr %llx size %llu\n", + baseobj->context->id, obj.flags, obj.id, + obj.offset, obj.gpuaddr, obj.size); + return -EINVAL; + } + + ret = kgsl_drawobj_add_memobject(&cmdobj->cmdlist, &obj); + if (ret) + return ret; + + ptr += sizeof(obj); + } + + return 0; +} + +int kgsl_drawobj_cmd_add_memlist(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, + unsigned int size, unsigned int count) +{ + struct kgsl_command_object obj; + struct kgsl_drawobj *baseobj = DRAWOBJ(cmdobj); + int i, ret; + + /* Ignore everything if this is a MARKER */ + if (baseobj->type & MARKEROBJ_TYPE) + return 0; + + ret = _verify_input_list(count, ptr, size); + if (ret <= 0) + return ret; + + for (i = 0; i < count; i++) { + if (copy_struct_from_user(&obj, sizeof(obj), ptr, size)) + return -EFAULT; + + if (!(obj.flags & KGSL_OBJLIST_MEMOBJ)) { + dev_err(device->dev, + "invalid memobj ctxt %d flags %d id %d offset %lld addr %lld size %lld\n", + DRAWOBJ(cmdobj)->context->id, obj.flags, + obj.id, obj.offset, obj.gpuaddr, + obj.size); + return -EINVAL; + } + + if (obj.flags & KGSL_OBJLIST_PROFILE) + add_profiling_buffer(device, cmdobj, obj.gpuaddr, + obj.size, obj.id, obj.offset); + else { + ret = kgsl_drawobj_add_memobject(&cmdobj->memlist, + &obj); + if (ret) + return ret; + } + + ptr += sizeof(obj); + } + + return 0; +} + +struct kgsl_drawobj_sync * +kgsl_drawobj_create_timestamp_syncobj(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp) +{ + struct kgsl_drawobj_sync *syncobj; + struct kgsl_cmd_syncpoint_timestamp priv; + int ret; + + syncobj = kgsl_drawobj_sync_create(device, context); + if (IS_ERR(syncobj)) + return syncobj; + + syncobj->synclist = kzalloc(sizeof(*syncobj->synclist), GFP_KERNEL); + if (!syncobj->synclist) { + kgsl_drawobj_destroy(DRAWOBJ(syncobj)); + return ERR_PTR(-ENOMEM); + } + + priv.timestamp = timestamp; + priv.context_id = context->id; + + ret = drawobj_add_sync_timestamp(device, syncobj, &priv); + if (ret) { + kgsl_drawobj_destroy(DRAWOBJ(syncobj)); + return ERR_PTR(ret); + } + + return syncobj; +} + +int kgsl_drawobj_sync_add_synclist(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *ptr, + unsigned int size, unsigned int count) +{ + struct kgsl_command_syncpoint syncpoint; + struct kgsl_cmd_syncpoint sync; + int i, ret; + + /* If creating a sync and the data is not there or wrong then error */ + ret = _verify_input_list(count, ptr, size); + if (ret <= 0) + return -EINVAL; + + syncobj->synclist = kcalloc(count, + sizeof(struct kgsl_drawobj_sync_event), GFP_KERNEL); + + if (syncobj->synclist == NULL) + return -ENOMEM; + + for (i = 0; i < count; i++) { + if (copy_struct_from_user(&syncpoint, sizeof(syncpoint), ptr, size)) + return -EFAULT; + + sync.type = syncpoint.type; + sync.priv = u64_to_user_ptr(syncpoint.priv); + sync.size = syncpoint.size; + + ret = kgsl_drawobj_sync_add_sync(device, syncobj, &sync); + if (ret) + return ret; + + ptr += sizeof(syncpoint); + } + + return 0; +} + +void kgsl_drawobjs_cache_exit(void) +{ + kmem_cache_destroy(memobjs_cache); +} + +int kgsl_drawobjs_cache_init(void) +{ + memobjs_cache = KMEM_CACHE(kgsl_memobj_node, 0); + + if (!memobjs_cache) + return -ENOMEM; + + return 0; +} diff --git a/kgsl_drawobj.h b/kgsl_drawobj.h new file mode 100644 index 0000000000..faf396ba74 --- /dev/null +++ b/kgsl_drawobj.h @@ -0,0 +1,332 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef __KGSL_DRAWOBJ_H +#define __KGSL_DRAWOBJ_H + +#include +#include +#include + +#define DRAWOBJ(obj) (&obj->base) +#define SYNCOBJ(obj) \ + container_of(obj, struct kgsl_drawobj_sync, base) +#define CMDOBJ(obj) \ + container_of(obj, struct kgsl_drawobj_cmd, base) + +#define CMDOBJ_TYPE BIT(0) +#define MARKEROBJ_TYPE BIT(1) +#define SYNCOBJ_TYPE BIT(2) +#define BINDOBJ_TYPE BIT(3) +#define TIMELINEOBJ_TYPE BIT(4) + +/** + * struct kgsl_drawobj - KGSL drawobj descriptor + * @device: KGSL GPU device that the command was created for + * @context: KGSL context that created the command + * @type: Object type + * @timestamp: Timestamp assigned to the command + * @flags: flags + * @refcount: kref structure to maintain the reference count + */ +struct kgsl_drawobj { + struct kgsl_device *device; + struct kgsl_context *context; + uint32_t type; + uint32_t timestamp; + unsigned long flags; + struct kref refcount; + /** @destroy: Callbak function to take down the object */ + void (*destroy)(struct kgsl_drawobj *drawobj); + /** @destroy_object: Callback function to free the object memory */ + void (*destroy_object)(struct kgsl_drawobj *drawobj); +}; + +/** + * struct kgsl_drawobj_cmd - KGSL command obj, This covers marker + * cmds also since markers are special form of cmds that do not + * need their cmds to be executed. + * @base: Base kgsl_drawobj, this needs to be the first entry + * @priv: Internal flags + * @global_ts: The ringbuffer timestamp corresponding to this + * command obj + * @fault_policy: Internal policy describing how to handle this command in case + * of a fault + * @fault_recovery: recovery actions actually tried for this batch + * be hung + * @refcount: kref structure to maintain the reference count + * @cmdlist: List of IBs to issue + * @memlist: List of all memory used in this command batch + * @marker_timestamp: For markers, the timestamp of the last "real" command that + * was queued + * @profiling_buf_entry: Mem entry containing the profiling buffer + * @profiling_buffer_gpuaddr: GPU virt address of the profile buffer added here + * for easy access + * @profile_index: Index to store the start/stop ticks in the kernel profiling + * buffer + * @submit_ticks: Variable to hold ticks at the time of + * command obj submit. + + */ +struct kgsl_drawobj_cmd { + struct kgsl_drawobj base; + unsigned long priv; + unsigned int global_ts; + unsigned long fault_policy; + unsigned long fault_recovery; + struct list_head cmdlist; + struct list_head memlist; + unsigned int marker_timestamp; + struct kgsl_mem_entry *profiling_buf_entry; + uint64_t profiling_buffer_gpuaddr; + unsigned int profile_index; + uint64_t submit_ticks; + /* @numibs: Number of ibs in this cmdobj */ + u32 numibs; +}; + +/** + * struct kgsl_drawobj_sync - KGSL sync object + * @base: Base kgsl_drawobj, this needs to be the first entry + * @synclist: Array of context/timestamp tuples to wait for before issuing + * @numsyncs: Number of sync entries in the array + * @pending: Bitmask of sync events that are active + * @timer: a timer used to track possible sync timeouts for this + * sync obj + * @timeout_jiffies: For a sync obj the jiffies at + * which the timer will expire + */ +struct kgsl_drawobj_sync { + struct kgsl_drawobj base; + struct kgsl_drawobj_sync_event *synclist; + unsigned int numsyncs; + unsigned long pending; + struct timer_list timer; + unsigned long timeout_jiffies; +}; + +#define KGSL_BINDOBJ_STATE_START 0 +#define KGSL_BINDOBJ_STATE_DONE 1 + +/** + * struct kgsl_drawobj_bind - KGSL virtual buffer object bind operation + * @base: &struct kgsl_drawobj container + * @state: Current state of the draw operation + * @bind: Pointer to the VBO bind operation struct + */ +struct kgsl_drawobj_bind { + struct kgsl_drawobj base; + unsigned long state; + struct kgsl_sharedmem_bind_op *bind; +}; + +static inline struct kgsl_drawobj_bind *BINDOBJ(struct kgsl_drawobj *obj) +{ + return container_of(obj, struct kgsl_drawobj_bind, base); +} + +/** + * struct kgsl_drawobj_timeline - KGSL timeline signal operation + */ +struct kgsl_drawobj_timeline { + /** @base: &struct kgsl_drawobj container */ + struct kgsl_drawobj base; + struct { + /** @timeline: Pointer to a &struct kgsl_timeline */ + struct kgsl_timeline *timeline; + /** @seqno: Sequence number to signal */ + u64 seqno; + } *timelines; + /** @count: Number of items in timelines */ + int count; +}; + +static inline struct kgsl_drawobj_timeline * +TIMELINEOBJ(struct kgsl_drawobj *obj) +{ + return container_of(obj, struct kgsl_drawobj_timeline, base); +} + +#define KGSL_FENCE_NAME_LEN 74 + +struct fence_info { + char name[KGSL_FENCE_NAME_LEN]; +}; + +struct event_fence_info { + struct fence_info *fences; + int num_fences; +}; + +struct event_timeline_info { + u64 seqno; + u32 timeline; +}; + +/** + * struct kgsl_drawobj_sync_event + * @id: identifer (positiion within the pending bitmap) + * @type: Syncpoint type + * @syncobj: Pointer to the syncobj that owns the sync event + * @context: KGSL context for whose timestamp we want to + * register this event + * @timestamp: Pending timestamp for the event + * @handle: Pointer to a sync fence handle + * @device: Pointer to the KGSL device + */ +struct kgsl_drawobj_sync_event { + unsigned int id; + int type; + struct kgsl_drawobj_sync *syncobj; + struct kgsl_context *context; + unsigned int timestamp; + struct kgsl_sync_fence_cb *handle; + struct kgsl_device *device; + /** @priv: Type specific private information */ + void *priv; + /** + * @fence: Pointer to a dma fence for KGSL_CMD_SYNCPOINT_TYPE_TIMELINE + * events + */ + struct dma_fence *fence; + /** @cb: Callback struct for KGSL_CMD_SYNCPOINT_TYPE_TIMELINE */ + struct dma_fence_cb cb; + /** @work : irq worker for KGSL_CMD_SYNCPOINT_TYPE_TIMELINE */ + struct irq_work work; +}; + +#define KGSL_DRAWOBJ_FLAGS \ + { KGSL_DRAWOBJ_MARKER, "MARKER" }, \ + { KGSL_DRAWOBJ_CTX_SWITCH, "CTX_SWITCH" }, \ + { KGSL_DRAWOBJ_SYNC, "SYNC" }, \ + { KGSL_DRAWOBJ_END_OF_FRAME, "EOF" }, \ + { KGSL_DRAWOBJ_PWR_CONSTRAINT, "PWR_CONSTRAINT" }, \ + { KGSL_DRAWOBJ_SUBMIT_IB_LIST, "IB_LIST" } + +/** + * enum kgsl_drawobj_cmd_priv - Internal command obj flags + * @CMDOBJ_SKIP - skip the entire command obj + * @CMDOBJ_FORCE_PREAMBLE - Force the preamble on for + * command obj + * @CMDOBJ_WFI - Force wait-for-idle for the submission + * @CMDOBJ_PROFILE - store the start / retire ticks for + * @CMDOBJ_FAULT - Mark the command object as faulted + * the command obj in the profiling buffer + */ +enum kgsl_drawobj_cmd_priv { + CMDOBJ_SKIP = 0, + CMDOBJ_FORCE_PREAMBLE, + CMDOBJ_WFI, + CMDOBJ_PROFILE, + CMDOBJ_FAULT, +}; + +struct kgsl_ibdesc; +struct kgsl_cmd_syncpoint; + +struct kgsl_drawobj_cmd *kgsl_drawobj_cmd_create(struct kgsl_device *device, + struct kgsl_context *context, unsigned int flags, + unsigned int type); +int kgsl_drawobj_cmd_add_ibdesc(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, struct kgsl_ibdesc *ibdesc); +int kgsl_drawobj_cmd_add_ibdesc_list(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, int count); +int kgsl_drawobj_cmd_add_cmdlist(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, + unsigned int size, unsigned int count); +int kgsl_drawobj_cmd_add_memlist(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, + unsigned int size, unsigned int count); + +struct kgsl_drawobj_sync *kgsl_drawobj_sync_create(struct kgsl_device *device, + struct kgsl_context *context); +int kgsl_drawobj_sync_add_syncpoints(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *ptr, + int count); +int kgsl_drawobj_sync_add_synclist(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *ptr, + unsigned int size, unsigned int count); +int kgsl_drawobj_sync_add_sync(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, + struct kgsl_cmd_syncpoint *sync); + +int kgsl_drawobjs_cache_init(void); +void kgsl_drawobjs_cache_exit(void); + +void kgsl_dump_syncpoints(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj); + +void kgsl_drawobj_destroy(struct kgsl_drawobj *drawobj); + +void kgsl_drawobj_destroy_object(struct kref *kref); + +static inline bool kgsl_drawobj_events_pending( + struct kgsl_drawobj_sync *syncobj) +{ + return !bitmap_empty(&syncobj->pending, KGSL_MAX_SYNCPOINTS); +} + +static inline bool kgsl_drawobj_event_pending( + struct kgsl_drawobj_sync *syncobj, unsigned int bit) +{ + if (bit >= KGSL_MAX_SYNCPOINTS) + return false; + + return test_bit(bit, &syncobj->pending); +} + +static inline void kgsl_drawobj_put(struct kgsl_drawobj *drawobj) +{ + if (drawobj) + kref_put(&drawobj->refcount, kgsl_drawobj_destroy_object); +} + +/** + * kgsl_drawobj_create_timestamp_syncobj - Create a syncobj for a timestamp + * @device: A GPU device handle + * @context: Draw context for the syncobj + * @timestamp: Timestamp to sync on + * + * Create a sync object for @timestamp on @context. + * Return: A pointer to the sync object + */ +struct kgsl_drawobj_sync * +kgsl_drawobj_create_timestamp_syncobj(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp); + +struct kgsl_drawobj_bind *kgsl_drawobj_bind_create(struct kgsl_device *device, + struct kgsl_context *context); + +int kgsl_drawobj_add_bind(struct kgsl_device_private *dev_priv, + struct kgsl_drawobj_bind *bindobj, + void __user *src, u64 cmdsize); + +/** + * kgsl_drawobj_timeline_create - Create a timeline draw object + * @device: A GPU device handle + * @context: Draw context for the drawobj + * + * Create a timeline draw object on @context. + * Return: A pointer to the draw object + */ +struct kgsl_drawobj_timeline * +kgsl_drawobj_timeline_create(struct kgsl_device *device, + struct kgsl_context *context); + +/** + * kgsl_drwobj_add_timeline - Add a timeline to a timeline drawobj + * @dev_priv: Pointer to the process private data + * @timelineobj: Pointer to a timeline drawobject + * @src: Ponter to the &struct kgsl_timeline_val from userspace + * @cmdsize: size of the object in @src + * + * Add a timeline to an draw object. + * Return: 0 on success or negative on failure + */ +int kgsl_drawobj_add_timeline(struct kgsl_device_private *dev_priv, + struct kgsl_drawobj_timeline *timelineobj, + void __user *src, u64 cmdsize); + +#endif /* __KGSL_DRAWOBJ_H */ diff --git a/kgsl_eventlog.c b/kgsl_eventlog.c new file mode 100644 index 0000000000..71987cea1f --- /dev/null +++ b/kgsl_eventlog.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include "kgsl_device.h" +#include "kgsl_eventlog.h" +#include "kgsl_util.h" + +#define EVENTLOG_SIZE SZ_8K +#define MAGIC 0xabbaabba +#define LOG_FENCE_NAME_LEN 74 + +/* + * This an internal event used to skip empty space at the bottom of the + * ringbuffer + */ + +#define LOG_SKIP 1 +#define LOG_FIRE_EVENT 2 +#define LOG_CMDBATCH_SUBMITTED_EVENT 3 +#define LOG_CMDBATCH_RETIRED_EVENT 4 +#define LOG_SYNCPOINT_FENCE_EVENT 5 +#define LOG_SYNCPOINT_FENCE_EXPIRE_EVENT 6 +#define LOG_TIMELINE_FENCE_ALLOC_EVENT 7 +#define LOG_TIMELINE_FENCE_RELEASE_EVENT 8 + +static spinlock_t lock; +static void *kgsl_eventlog; +static int eventlog_wptr; + +struct kgsl_log_header { + u32 magic; + int pid; + u64 time; + u32 eventid; + u32 size; +}; + +/* Add a marker to skip the rest of the eventlog and start over fresh */ +static void add_skip_header(u32 offset) +{ + struct kgsl_log_header *header = kgsl_eventlog + offset; + + header->magic = MAGIC; + header->time = local_clock(); + header->pid = 0; + header->eventid = LOG_SKIP; + header->size = EVENTLOG_SIZE - sizeof(*header) - offset; +} + +static void *kgsl_eventlog_alloc(u32 eventid, u32 size) +{ + struct kgsl_log_header *header; + u32 datasize = size + sizeof(*header); + unsigned long flags; + void *data; + + if (!kgsl_eventlog) + return NULL; + + spin_lock_irqsave(&lock, flags); + if (eventlog_wptr + datasize > (EVENTLOG_SIZE - sizeof(*header))) { + add_skip_header(eventlog_wptr); + eventlog_wptr = datasize; + data = kgsl_eventlog; + } else { + data = kgsl_eventlog + eventlog_wptr; + eventlog_wptr += datasize; + } + spin_unlock_irqrestore(&lock, flags); + + header = data; + + header->magic = MAGIC; + header->time = local_clock(); + header->pid = current->pid; + header->eventid = eventid; + header->size = size; + + return data + sizeof(*header); +} + +void kgsl_eventlog_init(void) +{ + kgsl_eventlog = kzalloc(EVENTLOG_SIZE, GFP_KERNEL); + eventlog_wptr = 0; + + spin_lock_init(&lock); + + kgsl_add_to_minidump("KGSL_EVENTLOG", (u64) kgsl_eventlog, + __pa(kgsl_eventlog), EVENTLOG_SIZE); +} + +void kgsl_eventlog_exit(void) +{ + kgsl_remove_from_minidump("KGSL_EVENTLOG", (u64) kgsl_eventlog, + __pa(kgsl_eventlog), EVENTLOG_SIZE); + + kfree(kgsl_eventlog); + kgsl_eventlog = NULL; + eventlog_wptr = 0; +} + +void log_kgsl_fire_event(u32 id, u32 ts, u32 type, u32 age) +{ + struct { + u32 id; + u32 ts; + u32 type; + u32 age; + } *entry; + + entry = kgsl_eventlog_alloc(LOG_FIRE_EVENT, sizeof(*entry)); + if (!entry) + return; + + entry->id = id; + entry->ts = ts; + entry->type = type; + entry->age = age; +} + +void log_kgsl_cmdbatch_submitted_event(u32 id, u32 ts, u32 prio, u64 flags) +{ + struct { + u32 id; + u32 ts; + u32 prio; + u64 flags; + } *entry; + + entry = kgsl_eventlog_alloc(LOG_CMDBATCH_SUBMITTED_EVENT, sizeof(*entry)); + if (!entry) + return; + + entry->id = id; + entry->ts = ts; + entry->prio = prio; + entry->flags = flags; +} + +void log_kgsl_cmdbatch_retired_event(u32 id, u32 ts, u32 prio, u64 flags, + u64 start, u64 retire) +{ + struct { + u32 id; + u32 ts; + u32 prio; + u64 flags; + u64 start; + u64 retire; + } *entry; + + entry = kgsl_eventlog_alloc(LOG_CMDBATCH_RETIRED_EVENT, sizeof(*entry)); + if (!entry) + return; + + entry->id = id; + entry->ts = ts; + entry->prio = prio; + entry->flags = flags; + entry->start = start; + entry->retire = retire; +} + +void log_kgsl_syncpoint_fence_event(u32 id, char *fence_name) +{ + struct { + u32 id; + char name[LOG_FENCE_NAME_LEN]; + } *entry; + + entry = kgsl_eventlog_alloc(LOG_SYNCPOINT_FENCE_EVENT, sizeof(*entry)); + if (!entry) + return; + + entry->id = id; + memset(entry->name, 0, sizeof(entry->name)); + strlcpy(entry->name, fence_name, sizeof(entry->name)); +} + +void log_kgsl_syncpoint_fence_expire_event(u32 id, char *fence_name) +{ + struct { + u32 id; + char name[LOG_FENCE_NAME_LEN]; + } *entry; + + entry = kgsl_eventlog_alloc(LOG_SYNCPOINT_FENCE_EXPIRE_EVENT, sizeof(*entry)); + if (!entry) + return; + + entry->id = id; + memset(entry->name, 0, sizeof(entry->name)); + strlcpy(entry->name, fence_name, sizeof(entry->name)); +} + +void log_kgsl_timeline_fence_alloc_event(u32 id, u64 seqno) +{ + struct { + u32 id; + u64 seqno; + } *entry; + + entry = kgsl_eventlog_alloc(LOG_TIMELINE_FENCE_ALLOC_EVENT, sizeof(*entry)); + if (!entry) + return; + + entry->id = id; + entry->seqno = seqno; +} + +void log_kgsl_timeline_fence_release_event(u32 id, u64 seqno) +{ + struct { + u32 id; + u64 seqno; + } *entry; + + entry = kgsl_eventlog_alloc(LOG_TIMELINE_FENCE_RELEASE_EVENT, sizeof(*entry)); + if (!entry) + return; + + entry->id = id; + entry->seqno = seqno; +} diff --git a/kgsl_eventlog.h b/kgsl_eventlog.h new file mode 100644 index 0000000000..5e6e0176be --- /dev/null +++ b/kgsl_eventlog.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _KGSL_EVENTLOG_H +#define _KGSL_EVENTLOG_H + +void kgsl_eventlog_init(void); +void kgsl_eventlog_exit(void); + +void log_kgsl_fire_event(u32 id, u32 ts, u32 type, u32 age); +void log_kgsl_cmdbatch_submitted_event(u32 id, u32 ts, u32 prio, u64 flags); +void log_kgsl_cmdbatch_retired_event(u32 id, u32 ts, u32 prio, u64 flags, + u64 start, u64 retire); +void log_kgsl_syncpoint_fence_event(u32 id, char *fence_name); +void log_kgsl_syncpoint_fence_expire_event(u32 id, char *fence_name); +void log_kgsl_timeline_fence_alloc_event(u32 id, u64 seqno); +void log_kgsl_timeline_fence_release_event(u32 id, u64 seqno); +#endif diff --git a/kgsl_events.c b/kgsl_events.c new file mode 100644 index 0000000000..644ddfd183 --- /dev/null +++ b/kgsl_events.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2011-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "kgsl_debugfs.h" +#include "kgsl_device.h" +#include "kgsl_eventlog.h" +#include "kgsl_trace.h" + +/* + * Define an kmem cache for the event structures since we allocate and free them + * so frequently + */ +static struct kmem_cache *events_cache; + +static inline void signal_event(struct kgsl_device *device, + struct kgsl_event *event, int result) +{ + list_del(&event->node); + event->result = result; + queue_work(device->events_wq, &event->work); +} + +/** + * _kgsl_event_worker() - Work handler for processing GPU event callbacks + * @work: Pointer to the work_struct for the event + * + * Each event callback has its own work struct and is run on a event specific + * workqeuue. This is the worker that queues up the event callback function. + */ +static void _kgsl_event_worker(struct work_struct *work) +{ + struct kgsl_event *event = container_of(work, struct kgsl_event, work); + int id = KGSL_CONTEXT_ID(event->context); + + trace_kgsl_fire_event(id, event->timestamp, event->result, + jiffies - event->created, event->func); + + log_kgsl_fire_event(id, event->timestamp, event->result, + jiffies - event->created); + + event->func(event->device, event->group, event->priv, event->result); + + kgsl_context_put(event->context); + kmem_cache_free(events_cache, event); +} + +/* return true if the group needs to be processed */ +static bool _do_process_group(unsigned int processed, unsigned int cur) +{ + if (processed == cur) + return false; + + /* + * This ensures that the timestamp didn't slip back accidently, maybe + * due to a memory barrier issue. This is highly unlikely but we've + * been burned here in the past. + */ + if ((cur < processed) && ((processed - cur) < KGSL_TIMESTAMP_WINDOW)) + return false; + + return true; +} + +static void _process_event_group(struct kgsl_device *device, + struct kgsl_event_group *group, bool flush) +{ + struct kgsl_event *event, *tmp; + unsigned int timestamp; + struct kgsl_context *context; + + if (group == NULL) + return; + + context = group->context; + + /* + * Sanity check to be sure that we we aren't racing with the context + * getting destroyed + */ + if (WARN_ON(context != NULL && !_kgsl_context_get(context))) + return; + + spin_lock(&group->lock); + + group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_RETIRED, + ×tamp); + + if (!flush && !_do_process_group(group->processed, timestamp)) + goto out; + + list_for_each_entry_safe(event, tmp, &group->events, node) { + if (timestamp_cmp(event->timestamp, timestamp) <= 0) + signal_event(device, event, KGSL_EVENT_RETIRED); + else if (flush) + signal_event(device, event, KGSL_EVENT_CANCELLED); + + } + + group->processed = timestamp; + +out: + spin_unlock(&group->lock); + kgsl_context_put(context); +} + +/** + * kgsl_process_event_group() - Handle all the retired events in a group + * @device: Pointer to a KGSL device + * @group: Pointer to a GPU events group to process + */ + +void kgsl_process_event_group(struct kgsl_device *device, + struct kgsl_event_group *group) +{ + _process_event_group(device, group, false); +} + +/** + * kgsl_flush_event_group() - flush all the events in a group by retiring the + * ones can be retired and cancelling the ones that are pending + * @device: Pointer to a KGSL device + * @group: Pointer to a GPU events group to process + */ +void kgsl_flush_event_group(struct kgsl_device *device, + struct kgsl_event_group *group) +{ + _process_event_group(device, group, true); +} + +/** + * kgsl_cancel_events_timestamp() - Cancel pending events for a given timestamp + * @device: Pointer to a KGSL device + * @group: Ponter to the GPU event group that owns the event + * @timestamp: Registered expiry timestamp for the event + */ +void kgsl_cancel_events_timestamp(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp) +{ + struct kgsl_event *event, *tmp; + + spin_lock(&group->lock); + + list_for_each_entry_safe(event, tmp, &group->events, node) { + if (timestamp_cmp(timestamp, event->timestamp) == 0) + signal_event(device, event, KGSL_EVENT_CANCELLED); + } + + spin_unlock(&group->lock); +} + +/** + * kgsl_cancel_events() - Cancel all pending events in the group + * @device: Pointer to a KGSL device + * @group: Pointer to a kgsl_events_group + */ +void kgsl_cancel_events(struct kgsl_device *device, + struct kgsl_event_group *group) +{ + struct kgsl_event *event, *tmp; + + spin_lock(&group->lock); + + list_for_each_entry_safe(event, tmp, &group->events, node) + signal_event(device, event, KGSL_EVENT_CANCELLED); + + spin_unlock(&group->lock); +} + +/** + * kgsl_cancel_event() - Cancel a specific event from a group + * @device: Pointer to a KGSL device + * @group: Pointer to the group that contains the events + * @timestamp: Registered expiry timestamp for the event + * @func: Registered callback for the function + * @priv: Registered priv data for the function + */ +void kgsl_cancel_event(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp, + kgsl_event_func func, void *priv) +{ + struct kgsl_event *event, *tmp; + + spin_lock(&group->lock); + + list_for_each_entry_safe(event, tmp, &group->events, node) { + if (timestamp == event->timestamp && func == event->func && + event->priv == priv) + signal_event(device, event, KGSL_EVENT_CANCELLED); + } + + spin_unlock(&group->lock); +} + +/** + * kgsl_event_pending() - Searches for an event in an event group + * @device: Pointer to a KGSL device + * @group: Pointer to the group that contains the events + * @timestamp: Registered expiry timestamp for the event + * @func: Registered callback for the function + * @priv: Registered priv data for the function + */ +bool kgsl_event_pending(struct kgsl_device *device, + struct kgsl_event_group *group, + unsigned int timestamp, kgsl_event_func func, void *priv) +{ + struct kgsl_event *event; + bool result = false; + + spin_lock(&group->lock); + list_for_each_entry(event, &group->events, node) { + if (timestamp == event->timestamp && func == event->func && + event->priv == priv) { + result = true; + break; + } + } + spin_unlock(&group->lock); + return result; +} +/** + * kgsl_add_event() - Add a new GPU event to a group + * @device: Pointer to a KGSL device + * @group: Pointer to the group to add the event to + * @timestamp: Timestamp that the event will expire on + * @func: Callback function for the event + * @priv: Private data to send to the callback function + */ +int kgsl_add_event(struct kgsl_device *device, struct kgsl_event_group *group, + unsigned int timestamp, kgsl_event_func func, void *priv) +{ + unsigned int queued; + struct kgsl_context *context = group->context; + struct kgsl_event *event; + unsigned int retired; + + if (!func) + return -EINVAL; + + /* + * If the caller is creating their own timestamps, let them schedule + * events in the future. Otherwise only allow timestamps that have been + * queued. + */ + if (!context || !(context->flags & KGSL_CONTEXT_USER_GENERATED_TS)) { + group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_QUEUED, + &queued); + + if (timestamp_cmp(timestamp, queued) > 0) + return -EINVAL; + } + + event = kmem_cache_alloc(events_cache, GFP_KERNEL); + if (event == NULL) + return -ENOMEM; + + /* Get a reference to the context while the event is active */ + if (context != NULL && !_kgsl_context_get(context)) { + kmem_cache_free(events_cache, event); + return -ENOENT; + } + + event->device = device; + event->context = context; + event->timestamp = timestamp; + event->priv = priv; + event->func = func; + event->created = jiffies; + event->group = group; + + INIT_WORK(&event->work, _kgsl_event_worker); + + trace_kgsl_register_event(KGSL_CONTEXT_ID(context), timestamp, func); + + spin_lock(&group->lock); + + /* + * Check to see if the requested timestamp has already retired. If so, + * schedule the callback right away + */ + group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_RETIRED, + &retired); + + if (timestamp_cmp(retired, timestamp) >= 0) { + event->result = KGSL_EVENT_RETIRED; + queue_work(device->events_wq, &event->work); + spin_unlock(&group->lock); + return 0; + } + + /* Add the event to the group list */ + list_add_tail(&event->node, &group->events); + + spin_unlock(&group->lock); + + return 0; +} + +void kgsl_process_event_groups(struct kgsl_device *device) +{ + struct kgsl_event_group *group; + + read_lock(&device->event_groups_lock); + list_for_each_entry(group, &device->event_groups, group) + _process_event_group(device, group, false); + read_unlock(&device->event_groups_lock); +} + +void kgsl_del_event_group(struct kgsl_device *device, + struct kgsl_event_group *group) +{ + /* Check if the group is uninintalized */ + if (!group->context) + return; + + /* Make sure that all the events have been deleted from the list */ + WARN_ON(!list_empty(&group->events)); + + write_lock(&device->event_groups_lock); + list_del(&group->group); + write_unlock(&device->event_groups_lock); +} + +void kgsl_add_event_group(struct kgsl_device *device, + struct kgsl_event_group *group, struct kgsl_context *context, + readtimestamp_func readtimestamp, + void *priv, const char *fmt, ...) +{ + va_list args; + + WARN_ON(readtimestamp == NULL); + + spin_lock_init(&group->lock); + INIT_LIST_HEAD(&group->events); + + group->context = context; + group->readtimestamp = readtimestamp; + group->priv = priv; + + if (fmt) { + va_start(args, fmt); + vsnprintf(group->name, sizeof(group->name), fmt, args); + va_end(args); + } + + write_lock(&device->event_groups_lock); + list_add_tail(&group->group, &device->event_groups); + write_unlock(&device->event_groups_lock); +} + +static void events_debugfs_print_group(struct seq_file *s, + struct kgsl_event_group *group) +{ + struct kgsl_event *event; + unsigned int retired; + + spin_lock(&group->lock); + + seq_printf(s, "%s: last=%d\n", group->name, group->processed); + + list_for_each_entry(event, &group->events, node) { + + group->readtimestamp(event->device, group->priv, + KGSL_TIMESTAMP_RETIRED, &retired); + + seq_printf(s, "\t%u:%u age=%lu func=%ps [retired=%u]\n", + group->context ? group->context->id : + KGSL_MEMSTORE_GLOBAL, + event->timestamp, jiffies - event->created, + event->func, retired); + } + spin_unlock(&group->lock); +} + +static int events_show(struct seq_file *s, void *unused) +{ + struct kgsl_device *device = s->private; + struct kgsl_event_group *group; + + seq_puts(s, "event groups:\n"); + seq_puts(s, "--------------\n"); + + read_lock(&device->event_groups_lock); + list_for_each_entry(group, &device->event_groups, group) { + events_debugfs_print_group(s, group); + seq_puts(s, "\n"); + } + read_unlock(&device->event_groups_lock); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(events); + +void kgsl_device_events_remove(struct kgsl_device *device) +{ + struct kgsl_event_group *group, *tmp; + + write_lock(&device->event_groups_lock); + list_for_each_entry_safe(group, tmp, &device->event_groups, group) { + WARN_ON(!list_empty(&group->events)); + list_del(&group->group); + } + write_unlock(&device->event_groups_lock); +} + +void kgsl_device_events_probe(struct kgsl_device *device) +{ + INIT_LIST_HEAD(&device->event_groups); + rwlock_init(&device->event_groups_lock); + + debugfs_create_file("events", 0444, device->d_debugfs, device, + &events_fops); +} + +/** + * kgsl_events_exit() - Destroy the event kmem cache on module exit + */ +void kgsl_events_exit(void) +{ + kmem_cache_destroy(events_cache); +} + +/** + * kgsl_events_init() - Create the event kmem cache on module start + */ +void __init kgsl_events_init(void) +{ + events_cache = KMEM_CACHE(kgsl_event, 0); +} diff --git a/kgsl_gmu_core.c b/kgsl_gmu_core.c new file mode 100644 index 0000000000..83c3d53936 --- /dev/null +++ b/kgsl_gmu_core.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include + +#include "adreno.h" +#include "kgsl_device.h" +#include "kgsl_gmu_core.h" +#include "kgsl_trace.h" + +static const struct of_device_id gmu_match_table[] = { + { .compatible = "qcom,gpu-gmu", .data = &a6xx_gmu_driver }, + { .compatible = "qcom,gpu-rgmu", .data = &a6xx_rgmu_driver }, + { .compatible = "qcom,gen7-gmu", .data = &gen7_gmu_driver }, + {}, +}; + +void __init gmu_core_register(void) +{ + const struct of_device_id *match; + struct device_node *node; + + node = of_find_matching_node_and_match(NULL, gmu_match_table, + &match); + if (!node) + return; + + platform_driver_register((struct platform_driver *) match->data); + of_node_put(node); +} + +void __exit gmu_core_unregister(void) +{ + const struct of_device_id *match; + struct device_node *node; + + node = of_find_matching_node_and_match(NULL, gmu_match_table, + &match); + if (!node) + return; + + platform_driver_unregister((struct platform_driver *) match->data); + of_node_put(node); +} + +bool gmu_core_isenabled(struct kgsl_device *device) +{ + return test_bit(GMU_ENABLED, &device->gmu_core.flags); +} + +bool gmu_core_gpmu_isenabled(struct kgsl_device *device) +{ + return (device->gmu_core.dev_ops != NULL); +} + +bool gmu_core_scales_bandwidth(struct kgsl_device *device) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->scales_bandwidth) + return ops->scales_bandwidth(device); + + return false; +} + +int gmu_core_dev_acd_set(struct kgsl_device *device, bool val) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->acd_set) + return ops->acd_set(device, val); + + return -EINVAL; +} + +void gmu_core_regread(struct kgsl_device *device, unsigned int offsetwords, + unsigned int *value) +{ + u32 val = kgsl_regmap_read(&device->regmap, offsetwords); + *value = val; +} + +void gmu_core_regwrite(struct kgsl_device *device, unsigned int offsetwords, + unsigned int value) +{ + kgsl_regmap_write(&device->regmap, value, offsetwords); +} + +void gmu_core_blkwrite(struct kgsl_device *device, unsigned int offsetwords, + const void *buffer, size_t size) +{ + kgsl_regmap_bulk_write(&device->regmap, offsetwords, + buffer, size >> 2); +} + +void gmu_core_regrmw(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int mask, unsigned int bits) +{ + kgsl_regmap_rmw(&device->regmap, offsetwords, mask, bits); +} + +int gmu_core_dev_oob_set(struct kgsl_device *device, enum oob_request req) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->oob_set) + return ops->oob_set(device, req); + + return 0; +} + +void gmu_core_dev_oob_clear(struct kgsl_device *device, enum oob_request req) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->oob_clear) + ops->oob_clear(device, req); +} + +void gmu_core_dev_cooperative_reset(struct kgsl_device *device) +{ + + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->cooperative_reset) + ops->cooperative_reset(device); +} + +bool gmu_core_dev_gx_is_on(struct kgsl_device *device) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->gx_is_on) + return ops->gx_is_on(device); + + return true; +} + +int gmu_core_dev_ifpc_show(struct kgsl_device *device) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->ifpc_show) + return ops->ifpc_show(device); + + return 0; +} + +int gmu_core_dev_ifpc_store(struct kgsl_device *device, unsigned int val) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->ifpc_store) + return ops->ifpc_store(device, val); + + return -EINVAL; +} + +int gmu_core_dev_wait_for_active_transition(struct kgsl_device *device) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->wait_for_active_transition) + return ops->wait_for_active_transition(device); + + return 0; +} + +void gmu_core_fault_snapshot(struct kgsl_device *device) +{ + device->gmu_fault = true; + kgsl_device_snapshot(device, NULL, true); +} + +int gmu_core_timed_poll_check(struct kgsl_device *device, + unsigned int offset, unsigned int expected_ret, + unsigned int timeout_ms, unsigned int mask) +{ + u32 val; + + return kgsl_regmap_read_poll_timeout(&device->regmap, offset, + val, (val & mask) == expected_ret, 100, timeout_ms * 1000); +} + +int gmu_core_map_memdesc(struct iommu_domain *domain, struct kgsl_memdesc *memdesc, + u64 gmuaddr, int attrs) +{ + size_t mapped; + + if (!memdesc->pages) { + mapped = iommu_map_sg(domain, gmuaddr, memdesc->sgt->sgl, + memdesc->sgt->nents, attrs); + } else { + struct sg_table sgt = { 0 }; + int ret; + + ret = sg_alloc_table_from_pages(&sgt, memdesc->pages, + memdesc->page_count, 0, memdesc->size, GFP_KERNEL); + + if (ret) + return ret; + + mapped = iommu_map_sg(domain, gmuaddr, sgt.sgl, sgt.nents, attrs); + sg_free_table(&sgt); + } + + return mapped == 0 ? -ENOMEM : 0; +} diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h new file mode 100644 index 0000000000..0ae12a8e04 --- /dev/null +++ b/kgsl_gmu_core.h @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_GMU_CORE_H +#define __KGSL_GMU_CORE_H + +#include + +/* GMU_DEVICE - Given an KGSL device return the GMU specific struct */ +#define GMU_DEVICE_OPS(_a) ((_a)->gmu_core.dev_ops) + +#define MAX_GX_LEVELS 16 +#define MAX_CX_LEVELS 4 +#define MAX_CNOC_LEVELS 2 +#define MAX_CNOC_CMDS 6 +#define MAX_BW_CMDS 8 +#define INVALID_DCVS_IDX 0xFF + +#if MAX_CNOC_LEVELS > MAX_GX_LEVELS +#error "CNOC levels cannot exceed GX levels" +#endif + +/* + * These are the different ways the GMU can boot. GMU_WARM_BOOT is waking up + * from slumber. GMU_COLD_BOOT is booting for the first time. GMU_RESET + * is a soft reset of the GMU. + */ +enum gmu_core_boot { + GMU_WARM_BOOT = 0, + GMU_COLD_BOOT = 1, + GMU_RESET = 2 +}; + +/* Bits for the flags field in the gmu structure */ +enum gmu_core_flags { + GMU_BOOT_INIT_DONE = 0, + GMU_HFI_ON, + GMU_FAULT, + GMU_DCVS_REPLAY, + GMU_ENABLED, + GMU_RSCC_SLEEP_SEQ_DONE, + GMU_DISABLE_SLUMBER, +}; + +/* + * OOB requests values. These range from 0 to 7 and then + * the BIT() offset into the actual value is calculated + * later based on the request. This keeps the math clean + * and easy to ensure not reaching over/under the range + * of 8 bits. + */ +enum oob_request { + oob_gpu = 0, + oob_perfcntr = 1, + oob_boot_slumber = 6, /* reserved special case */ + oob_dcvs = 7, /* reserved special case */ + oob_max, +}; + +enum gmu_pwrctrl_mode { + GMU_FW_START, + GMU_FW_STOP, + GMU_SUSPEND, + GMU_DCVS_NOHFI, + GMU_NOTIFY_SLUMBER, + INVALID_POWER_CTRL +}; + +#define GPU_HW_ACTIVE 0x00 +#define GPU_HW_IFPC 0x03 +#define GPU_HW_SLUMBER 0x0f + +/* + * Wait time before trying to write the register again. + * Hopefully the GMU has finished waking up during this delay. + * This delay must be less than the IFPC main hysteresis or + * the GMU will start shutting down before we try again. + */ +#define GMU_CORE_WAKEUP_DELAY_US 10 + +/* Max amount of tries to wake up the GMU. The short retry + * limit is half of the long retry limit. After the short + * number of retries, we print an informational message to say + * exiting IFPC is taking longer than expected. We continue + * to retry after this until the long retry limit. + */ +#define GMU_CORE_SHORT_WAKEUP_RETRY_LIMIT 100 +#define GMU_CORE_LONG_WAKEUP_RETRY_LIMIT 200 + +#define FENCE_STATUS_WRITEDROPPED0_MASK 0x1 +#define FENCE_STATUS_WRITEDROPPED1_MASK 0x2 + +#define GMU_FREQ_MIN 200000000 +#define GMU_FREQ_MAX 500000000 + +#define GMU_VER_MAJOR(ver) (((ver) >> 28) & 0xF) +#define GMU_VER_MINOR(ver) (((ver) >> 16) & 0xFFF) +#define GMU_VER_STEP(ver) ((ver) & 0xFFFF) +#define GMU_VERSION(major, minor) \ + ((((major) & 0xF) << 28) | (((minor) & 0xFFF) << 16)) + +#define GMU_INT_WDOG_BITE BIT(0) +#define GMU_INT_RSCC_COMP BIT(1) +#define GMU_INT_FENCE_ERR BIT(3) +#define GMU_INT_DBD_WAKEUP BIT(4) +#define GMU_INT_HOST_AHB_BUS_ERR BIT(5) +#define GMU_AO_INT_MASK \ + (GMU_INT_WDOG_BITE | \ + GMU_INT_FENCE_ERR | \ + GMU_INT_HOST_AHB_BUS_ERR) + +/* Bitmask for GPU low power mode enabling and hysterisis*/ +#define SPTP_ENABLE_MASK (BIT(2) | BIT(0)) +#define IFPC_ENABLE_MASK (BIT(1) | BIT(0)) + +/* Bitmask for RPMH capability enabling */ +#define RPMH_INTERFACE_ENABLE BIT(0) +#define LLC_VOTE_ENABLE BIT(4) +#define DDR_VOTE_ENABLE BIT(8) +#define MX_VOTE_ENABLE BIT(9) +#define CX_VOTE_ENABLE BIT(10) +#define GFX_VOTE_ENABLE BIT(11) +#define RPMH_ENABLE_MASK (RPMH_INTERFACE_ENABLE | \ + LLC_VOTE_ENABLE | \ + DDR_VOTE_ENABLE | \ + MX_VOTE_ENABLE | \ + CX_VOTE_ENABLE | \ + GFX_VOTE_ENABLE) + +/* Constants for GMU OOBs */ +#define OOB_BOOT_OPTION 0 +#define OOB_SLUMBER_OPTION 1 + +/* Gmu FW block header format */ +struct gmu_block_header { + u32 addr; + u32 size; + u32 type; + u32 value; +}; + +/* GMU Block types */ +#define GMU_BLK_TYPE_DATA 0 +#define GMU_BLK_TYPE_PREALLOC_REQ 1 +#define GMU_BLK_TYPE_CORE_VER 2 +#define GMU_BLK_TYPE_CORE_DEV_VER 3 +#define GMU_BLK_TYPE_PWR_VER 4 +#define GMU_BLK_TYPE_PWR_DEV_VER 5 +#define GMU_BLK_TYPE_HFI_VER 6 +#define GMU_BLK_TYPE_PREALLOC_PERSIST_REQ 7 + +/* For GMU Logs*/ +#define GMU_LOG_SIZE SZ_16K + +/* GMU memdesc entries */ +#define GMU_KERNEL_ENTRIES 16 + +enum gmu_mem_type { + GMU_ITCM = 0, + GMU_ICACHE, + GMU_CACHE = GMU_ICACHE, + GMU_DTCM, + GMU_DCACHE, + GMU_NONCACHED_KERNEL, + GMU_NONCACHED_USER, + GMU_MEM_TYPE_MAX, +}; + +/** + * struct gmu_memdesc - Gmu shared memory object descriptor + * @hostptr: Kernel virtual address + * @gmuaddr: GPU virtual address + * @physaddr: Physical address of the memory object + * @size: Size of the memory object + */ +struct gmu_memdesc { + void *hostptr; + u32 gmuaddr; + phys_addr_t physaddr; + u32 size; +}; + +struct kgsl_mailbox { + struct mbox_client client; + struct mbox_chan *channel; +}; + +struct icc_path; + +struct gmu_vma_entry { + /** @start: Starting virtual address of the vma */ + u32 start; + /** @size: Size of this vma */ + u32 size; + /** @next_va: Next available virtual address in this vma */ + u32 next_va; +}; + +enum { + GMU_PRIV_FIRST_BOOT_DONE = 0, + GMU_PRIV_GPU_STARTED, + GMU_PRIV_HFI_STARTED, + GMU_PRIV_RSCC_SLEEP_DONE, + GMU_PRIV_PM_SUSPEND, + GMU_PRIV_PDC_RSC_LOADED, +}; + +struct device_node; +struct kgsl_device; +struct kgsl_snapshot; + +struct gmu_dev_ops { + int (*oob_set)(struct kgsl_device *device, enum oob_request req); + void (*oob_clear)(struct kgsl_device *device, enum oob_request req); + bool (*gx_is_on)(struct kgsl_device *device); + int (*ifpc_store)(struct kgsl_device *device, unsigned int val); + unsigned int (*ifpc_show)(struct kgsl_device *device); + void (*cooperative_reset)(struct kgsl_device *device); + void (*halt_execution)(struct kgsl_device *device); + int (*wait_for_active_transition)(struct kgsl_device *device); + bool (*scales_bandwidth)(struct kgsl_device *device); + int (*acd_set)(struct kgsl_device *device, bool val); +}; + +/** + * struct gmu_core_device - GMU Core device structure + * @ptr: Pointer to GMU device structure + * @dev_ops: Pointer to gmu device operations + * @flags: GMU flags + */ +struct gmu_core_device { + void *ptr; + const struct gmu_dev_ops *dev_ops; + unsigned long flags; +}; + +extern struct platform_driver a6xx_gmu_driver; +extern struct platform_driver a6xx_rgmu_driver; +extern struct platform_driver a6xx_hwsched_driver; +extern struct platform_driver gen7_gmu_driver; +extern struct platform_driver gen7_hwsched_driver; + +/* GMU core functions */ + +void __init gmu_core_register(void); +void __exit gmu_core_unregister(void); + +bool gmu_core_gpmu_isenabled(struct kgsl_device *device); +bool gmu_core_scales_bandwidth(struct kgsl_device *device); +bool gmu_core_isenabled(struct kgsl_device *device); +int gmu_core_dev_acd_set(struct kgsl_device *device, bool val); +void gmu_core_regread(struct kgsl_device *device, unsigned int offsetwords, + unsigned int *value); +void gmu_core_regwrite(struct kgsl_device *device, unsigned int offsetwords, + unsigned int value); + +/** + * gmu_core_blkwrite - Do a bulk I/O write to GMU + * @device: Pointer to the kgsl device + * @offsetwords: Destination dword offset + * @buffer: Pointer to the source buffer + * @size: Number of bytes to copy + * + * Write a series of GMU registers quickly without bothering to spend time + * logging the register writes. The logging of these writes causes extra + * delays that could allow IRQs arrive and be serviced before finishing + * all the writes. + */ +void gmu_core_blkwrite(struct kgsl_device *device, unsigned int offsetwords, + const void *buffer, size_t size); +void gmu_core_regrmw(struct kgsl_device *device, unsigned int offsetwords, + unsigned int mask, unsigned int bits); +int gmu_core_dev_oob_set(struct kgsl_device *device, enum oob_request req); +void gmu_core_dev_oob_clear(struct kgsl_device *device, enum oob_request req); +bool gmu_core_dev_gx_is_on(struct kgsl_device *device); +int gmu_core_dev_ifpc_show(struct kgsl_device *device); +int gmu_core_dev_ifpc_store(struct kgsl_device *device, unsigned int val); +int gmu_core_dev_wait_for_active_transition(struct kgsl_device *device); +void gmu_core_dev_cooperative_reset(struct kgsl_device *device); + +/** + * gmu_core_fault_snapshot - Set gmu fault and trigger snapshot + * @device: Pointer to the kgsl device + * + * Set the gmu fault and take snapshot when we hit a gmu fault + */ +void gmu_core_fault_snapshot(struct kgsl_device *device); + +/** + * gmu_core_timed_poll_check() - polling *gmu* register at given offset until + * its value changed to match expected value. The function times + * out and returns after given duration if register is not updated + * as expected. + * + * @device: Pointer to KGSL device + * @offset: Register offset in dwords + * @expected_ret: expected register value that stops polling + * @timeout_ms: time in milliseconds to poll the register + * @mask: bitmask to filter register value to match expected_ret + */ +int gmu_core_timed_poll_check(struct kgsl_device *device, + unsigned int offset, unsigned int expected_ret, + unsigned int timeout_ms, unsigned int mask); + +struct kgsl_memdesc; +struct iommu_domain; + +/** + * gmu_core_map_memdesc - Map the memdesc into the GMU IOMMU domain + * @domain: Domain to map the memory into + * @memdesc: Memory descriptor to map + * @gmuaddr: Virtual GMU address to map the memory into + * @attrs: Attributes for the mapping + * + * Return: 0 on success or -ENOMEM on failure + */ +int gmu_core_map_memdesc(struct iommu_domain *domain, struct kgsl_memdesc *memdesc, + u64 gmuaddr, int attrs); + +#endif /* __KGSL_GMU_CORE_H */ diff --git a/kgsl_ioctl.c b/kgsl_ioctl.c new file mode 100644 index 0000000000..c6b55641a8 --- /dev/null +++ b/kgsl_ioctl.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. + */ + +#include "kgsl_device.h" +#include "kgsl_sync.h" + +static const struct kgsl_ioctl kgsl_ioctl_funcs[] = { + KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY, + kgsl_ioctl_device_getproperty), + /* IOCTL_KGSL_DEVICE_WAITTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, + kgsl_ioctl_device_waittimestamp_ctxtid), + KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS, + kgsl_ioctl_rb_issueibcmds), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SUBMIT_COMMANDS, + kgsl_ioctl_submit_commands), + /* IOCTL_KGSL_CMDSTREAM_READTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID, + kgsl_ioctl_cmdstream_readtimestamp_ctxtid), + /* IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID, + kgsl_ioctl_cmdstream_freememontimestamp_ctxtid), + KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE, + kgsl_ioctl_drawctxt_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY, + kgsl_ioctl_drawctxt_destroy), + KGSL_IOCTL_FUNC(IOCTL_KGSL_MAP_USER_MEM, + kgsl_ioctl_map_user_mem), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FROM_PMEM, + kgsl_ioctl_map_user_mem), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FREE, + kgsl_ioctl_sharedmem_free), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE, + kgsl_ioctl_sharedmem_flush_cache), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC, + kgsl_ioctl_gpumem_alloc), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT, + kgsl_ioctl_timestamp_event), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY, + kgsl_ioctl_device_setproperty), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_ID, + kgsl_ioctl_gpumem_alloc_id), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_FREE_ID, + kgsl_ioctl_gpumem_free_id), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_GET_INFO, + kgsl_ioctl_gpumem_get_info), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE, + kgsl_ioctl_gpumem_sync_cache), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK, + kgsl_ioctl_gpumem_sync_cache_bulk), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE, + kgsl_ioctl_syncsource_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_DESTROY, + kgsl_ioctl_syncsource_destroy), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE, + kgsl_ioctl_syncsource_create_fence), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE, + kgsl_ioctl_syncsource_signal_fence), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_ALLOC, + kgsl_ioctl_gpuobj_alloc), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_FREE, + kgsl_ioctl_gpuobj_free), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_INFO, + kgsl_ioctl_gpuobj_info), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_IMPORT, + kgsl_ioctl_gpuobj_import), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SYNC, + kgsl_ioctl_gpuobj_sync), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_COMMAND, + kgsl_ioctl_gpu_command), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SET_INFO, + kgsl_ioctl_gpuobj_set_info), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_PHYS_ALLOC, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_PHYS_FREE, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_VIRT_ALLOC, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_VIRT_FREE, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_BIND, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_SPARSE_COMMAND, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_BIND_RANGES, + kgsl_ioctl_gpumem_bind_ranges), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_AUX_COMMAND, + kgsl_ioctl_gpu_aux_command), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_CREATE, + kgsl_ioctl_timeline_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_WAIT, + kgsl_ioctl_timeline_wait), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_FENCE_GET, + kgsl_ioctl_timeline_fence_get), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_QUERY, + kgsl_ioctl_timeline_query), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_SIGNAL, + kgsl_ioctl_timeline_signal), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_DESTROY, + kgsl_ioctl_timeline_destroy), +}; + +long kgsl_ioctl_copy_in(unsigned int kernel_cmd, unsigned int user_cmd, + unsigned long arg, unsigned char *ptr) +{ + unsigned int usize = _IOC_SIZE(user_cmd); + unsigned int ksize = _IOC_SIZE(kernel_cmd); + unsigned int copy = ksize < usize ? ksize : usize; + + if ((kernel_cmd & IOC_IN) && (user_cmd & IOC_IN)) { + if (copy > 0 && copy_from_user(ptr, (void __user *) arg, copy)) + return -EFAULT; + } + + return 0; +} + +long kgsl_ioctl_copy_out(unsigned int kernel_cmd, unsigned int user_cmd, + unsigned long arg, unsigned char *ptr) +{ + unsigned int usize = _IOC_SIZE(user_cmd); + unsigned int ksize = _IOC_SIZE(kernel_cmd); + unsigned int copy = ksize < usize ? ksize : usize; + + if ((kernel_cmd & IOC_OUT) && (user_cmd & IOC_OUT)) { + if (copy > 0 && copy_to_user((void __user *) arg, ptr, copy)) + return -EFAULT; + } + + return 0; +} + +long kgsl_ioctl_helper(struct file *filep, unsigned int cmd, unsigned long arg, + const struct kgsl_ioctl *cmds, int len) +{ + struct kgsl_device_private *dev_priv = filep->private_data; + unsigned char data[128] = { 0 }; + unsigned int nr = _IOC_NR(cmd); + long ret; + + if (nr >= len || cmds[nr].func == NULL) + return -ENOIOCTLCMD; + + if (_IOC_SIZE(cmds[nr].cmd) > sizeof(data)) { + dev_err_ratelimited(dev_priv->device->dev, + "data too big for ioctl 0x%08x: %d/%zu\n", + cmd, _IOC_SIZE(cmds[nr].cmd), sizeof(data)); + return -EINVAL; + } + + if (_IOC_SIZE(cmds[nr].cmd)) { + ret = kgsl_ioctl_copy_in(cmds[nr].cmd, cmd, arg, data); + if (ret) + return ret; + } + + ret = cmds[nr].func(dev_priv, cmd, data); + + if (ret == 0 && _IOC_SIZE(cmds[nr].cmd)) + ret = kgsl_ioctl_copy_out(cmds[nr].cmd, cmd, arg, data); + + return ret; +} + +long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct kgsl_device_private *dev_priv = filep->private_data; + struct kgsl_device *device = dev_priv->device; + long ret; + + ret = kgsl_ioctl_helper(filep, cmd, arg, kgsl_ioctl_funcs, + ARRAY_SIZE(kgsl_ioctl_funcs)); + + /* + * If the command was unrecognized in the generic core, try the device + * specific function + */ + + if (ret == -ENOIOCTLCMD) { + if (is_compat_task()) + return device->ftbl->compat_ioctl(dev_priv, cmd, arg); + + return device->ftbl->ioctl(dev_priv, cmd, arg); + } + + return ret; +} diff --git a/kgsl_iommu.c b/kgsl_iommu.c new file mode 100644 index 0000000000..282a231b90 --- /dev/null +++ b/kgsl_iommu.c @@ -0,0 +1,2419 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2011-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "kgsl_device.h" +#include "kgsl_iommu.h" +#include "kgsl_mmu.h" +#include "kgsl_pwrctrl.h" +#include "kgsl_sharedmem.h" +#include "kgsl_trace.h" + +#define KGSL_IOMMU_SPLIT_TABLE_BASE 0x0001ff8000000000ULL + +#define KGSL_IOMMU_IDR1_OFFSET 0x24 +#define IDR1_NUMPAGENDXB GENMASK(30, 28) +#define IDR1_PAGESIZE BIT(31) + +static const struct kgsl_mmu_pt_ops secure_pt_ops; +static const struct kgsl_mmu_pt_ops default_pt_ops; +static const struct kgsl_mmu_pt_ops iopgtbl_pt_ops; + +/* Zero page for non-secure VBOs */ +static struct page *kgsl_vbo_zero_page; + +/* + * struct kgsl_iommu_addr_entry - entry in the kgsl_pagetable rbtree. + * @base: starting virtual address of the entry + * @size: size of the entry + * @node: the rbtree node + */ +struct kgsl_iommu_addr_entry { + uint64_t base; + uint64_t size; + struct rb_node node; +}; + +static struct kmem_cache *addr_entry_cache; + +/* These are dummy TLB ops for the io-pgtable instances */ + +static void _tlb_flush_all(void *cookie) +{ +} + +static void _tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void _tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, void *cookie) +{ +} + +static const struct iommu_flush_ops kgsl_iopgtbl_tlb_ops = { + .tlb_flush_all = _tlb_flush_all, + .tlb_flush_walk = _tlb_flush_walk, + .tlb_add_page = _tlb_add_page, +}; + +static bool _iommu_domain_check_bool(struct iommu_domain *domain, int attr) +{ + u32 val; + int ret = iommu_domain_get_attr(domain, attr, &val); + + return (!ret && val); +} + +static int _iommu_domain_context_bank(struct iommu_domain *domain) +{ + int val, ret; + + ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_CONTEXT_BANK, &val); + + return ret ? ret : val; +} + +static struct kgsl_iommu_pt *to_iommu_pt(struct kgsl_pagetable *pagetable) +{ + return container_of(pagetable, struct kgsl_iommu_pt, base); +} + +static u32 get_llcc_flags(struct iommu_domain *domain) +{ + if (_iommu_domain_check_bool(domain, DOMAIN_ATTR_USE_LLC_NWA)) + return IOMMU_USE_LLC_NWA; + + if (_iommu_domain_check_bool(domain, DOMAIN_ATTR_USE_UPSTREAM_HINT)) + return IOMMU_USE_UPSTREAM_HINT; + + return 0; +} + + +static int _iommu_get_protection_flags(struct iommu_domain *domain, + struct kgsl_memdesc *memdesc) +{ + int flags = IOMMU_READ | IOMMU_WRITE | IOMMU_NOEXEC; + + flags |= get_llcc_flags(domain); + + if (memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY) + flags &= ~IOMMU_WRITE; + + if (memdesc->priv & KGSL_MEMDESC_PRIVILEGED) + flags |= IOMMU_PRIV; + + if (memdesc->flags & KGSL_MEMFLAGS_IOCOHERENT) + flags |= IOMMU_CACHE; + + if (memdesc->priv & KGSL_MEMDESC_UCODE) + flags &= ~IOMMU_NOEXEC; + + return flags; +} + +/* Get a scattterlist for the subrange in the child memdesc */ +static int get_sg_from_child(struct sg_table *sgt, struct kgsl_memdesc *child, + u64 offset, u64 length) +{ + int npages = (length >> PAGE_SHIFT); + int pgoffset = (offset >> PAGE_SHIFT); + struct scatterlist *target_sg; + struct sg_page_iter iter; + int ret; + + if (child->pages) + return sg_alloc_table_from_pages(sgt, + child->pages + pgoffset, npages, 0, + length, GFP_KERNEL); + + ret = sg_alloc_table(sgt, npages, GFP_KERNEL); + if (ret) + return ret; + + target_sg = sgt->sgl; + + for_each_sg_page(child->sgt->sgl, &iter, npages, pgoffset) { + sg_set_page(target_sg, sg_page_iter_page(&iter), PAGE_SIZE, 0); + target_sg = sg_next(target_sg); + } + + return 0; +} + +static struct iommu_domain *to_iommu_domain(struct kgsl_iommu_context *context) +{ + return context->domain; +} + +static struct kgsl_iommu *to_kgsl_iommu(struct kgsl_pagetable *pt) +{ + return &pt->mmu->iommu; +} + +/* + * One page allocation for a guard region to protect against over-zealous + * GPU pre-fetch + */ +static struct page *kgsl_guard_page; +static struct page *kgsl_secure_guard_page; + +static struct page *iommu_get_guard_page(struct kgsl_memdesc *memdesc) +{ + if (kgsl_memdesc_is_secured(memdesc)) { + if (!kgsl_secure_guard_page) + kgsl_secure_guard_page = kgsl_alloc_secure_page(); + + return kgsl_secure_guard_page; + } + + if (!kgsl_guard_page) + kgsl_guard_page = alloc_page(GFP_KERNEL | __GFP_ZERO | + __GFP_NORETRY | __GFP_HIGHMEM); + + return kgsl_guard_page; +} + +static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) +{ + struct kgsl_iommu *iommu = &pt->base.mmu->iommu; + struct io_pgtable_ops *ops = pt->pgtbl_ops; + + while (size) { + if ((ops->unmap(ops, gpuaddr, PAGE_SIZE, NULL)) != PAGE_SIZE) + return -EINVAL; + + gpuaddr += PAGE_SIZE; + size -= PAGE_SIZE; + } + + iommu_flush_iotlb_all(to_iommu_domain(&iommu->user_context)); + iommu_flush_iotlb_all(to_iommu_domain(&iommu->lpac_context)); + + return 0; +} + +static size_t _iopgtbl_map_pages(struct kgsl_iommu_pt *pt, u64 gpuaddr, + struct page **pages, int npages, int prot) +{ + struct io_pgtable_ops *ops = pt->pgtbl_ops; + size_t mapped = 0; + u64 addr = gpuaddr; + int ret, i; + + for (i = 0; i < npages; i++) { + ret = ops->map(ops, addr, page_to_phys(pages[i]), PAGE_SIZE, + prot, GFP_KERNEL); + if (ret) { + _iopgtbl_unmap(pt, gpuaddr, mapped); + return 0; + } + + mapped += PAGE_SIZE; + addr += PAGE_SIZE; + } + + return mapped; +} + +static int _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, + struct sg_table *sgt, int prot) +{ + struct io_pgtable_ops *ops = pt->pgtbl_ops; + struct scatterlist *sg; + size_t mapped = 0; + u64 addr = gpuaddr; + int ret, i; + + for_each_sg(sgt->sgl, sg, sgt->nents, i) { + size_t size = sg->length; + phys_addr_t phys = sg_phys(sg); + + while (size) { + ret = ops->map(ops, addr, phys, PAGE_SIZE, prot, GFP_KERNEL); + + if (ret) { + _iopgtbl_unmap(pt, gpuaddr, mapped); + return 0; + } + + phys += PAGE_SIZE; + mapped += PAGE_SIZE; + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + } + + return mapped; +} + + +static int +kgsl_iopgtbl_map_child(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, + u64 offset, struct kgsl_memdesc *child, u64 child_offset, u64 length) +{ + struct kgsl_iommu *iommu = &pt->mmu->iommu; + struct iommu_domain *domain = to_iommu_domain(&iommu->user_context); + struct kgsl_iommu_pt *iommu_pt = to_iommu_pt(pt); + struct sg_table sgt; + u32 flags; + int ret; + + ret = get_sg_from_child(&sgt, child, child_offset, length); + if (ret) + return ret; + + /* Inherit the flags from the child for this mapping */ + flags = _iommu_get_protection_flags(domain, child); + + ret = _iopgtbl_map_sg(iommu_pt, memdesc->gpuaddr + offset, &sgt, flags); + + sg_free_table(&sgt); + + return ret ? 0 : -ENOMEM; +} + + +static int +kgsl_iopgtbl_unmap_range(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, + u64 offset, u64 length) +{ + if (WARN_ON(offset >= memdesc->size || + (offset + length) > memdesc->size)) + return -ERANGE; + + return _iopgtbl_unmap(to_iommu_pt(pt), memdesc->gpuaddr + offset, + length); +} + +static size_t _iopgtbl_map_page_to_range(struct kgsl_iommu_pt *pt, + struct page *page, u64 gpuaddr, size_t range, int prot) +{ + struct io_pgtable_ops *ops = pt->pgtbl_ops; + size_t mapped = 0; + u64 addr = gpuaddr; + int ret; + + while (range) { + ret = ops->map(ops, addr, page_to_phys(page), PAGE_SIZE, + prot, GFP_KERNEL); + if (ret) { + _iopgtbl_unmap(pt, gpuaddr, mapped); + return 0; + } + + mapped += PAGE_SIZE; + addr += PAGE_SIZE; + range -= PAGE_SIZE; + } + + return mapped; +} + +static int kgsl_iopgtbl_map_zero_page_to_range(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 offset, u64 length) +{ + struct kgsl_iommu *iommu = &pt->mmu->iommu; + struct iommu_domain *domain = to_iommu_domain(&iommu->user_context); + /* + * The SMMU only does the PRT compare at the bottom level of the page table, because + * there is not an easy way for the hardware to perform this check at earlier levels. + * Mark this page writable to avoid page faults while writing to it. Since the address + * of this zero page is programmed in PRR register, MMU will intercept any accesses to + * the page before they go to DDR and will terminate the transaction. + */ + u32 flags = IOMMU_READ | IOMMU_WRITE | IOMMU_NOEXEC | get_llcc_flags(domain); + struct kgsl_iommu_pt *iommu_pt = to_iommu_pt(pt); + struct page *page = kgsl_vbo_zero_page; + + if (WARN_ON(!page)) + return -ENODEV; + + if (WARN_ON((offset >= memdesc->size) || + (offset + length) > memdesc->size)) + return -ERANGE; + + if (!_iopgtbl_map_page_to_range(iommu_pt, page, memdesc->gpuaddr + offset, + length, flags)) + return -ENOMEM; + + return 0; +} + +static int kgsl_iopgtbl_map(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); + struct kgsl_iommu *iommu = &pagetable->mmu->iommu; + struct iommu_domain *domain = to_iommu_domain(&iommu->user_context); + size_t mapped, padding; + int prot; + + /* Get the protection flags for the user context */ + prot = _iommu_get_protection_flags(domain, memdesc); + + if (memdesc->sgt) + mapped = _iopgtbl_map_sg(pt, memdesc->gpuaddr, + memdesc->sgt, prot); + else + mapped = _iopgtbl_map_pages(pt, memdesc->gpuaddr, + memdesc->pages, memdesc->page_count, prot); + + if (mapped == 0) + return -ENOMEM; + + padding = kgsl_memdesc_footprint(memdesc) - mapped; + + if (padding) { + struct page *page = iommu_get_guard_page(memdesc); + size_t ret; + + if (page) + ret = _iopgtbl_map_page_to_range(pt, page, + memdesc->gpuaddr + mapped, padding, + prot & ~IOMMU_WRITE); + + if (!page || !ret) { + _iopgtbl_unmap(pt, memdesc->gpuaddr, mapped); + return -ENOMEM; + } + } + + return 0; +} + +static int kgsl_iopgtbl_unmap(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + return _iopgtbl_unmap(to_iommu_pt(pagetable), memdesc->gpuaddr, + kgsl_memdesc_footprint(memdesc)); +} + +static int _iommu_unmap(struct iommu_domain *domain, u64 addr, size_t size) +{ + size_t unmapped = 0; + + if (!domain) + return 0; + + /* Sign extend TTBR1 addresses all the way to avoid warning */ + if (addr & (1ULL << 48)) + addr |= 0xffff000000000000; + + unmapped = iommu_unmap(domain, addr, size); + + return (unmapped == size) ? 0 : -ENOMEM; +} + + +static size_t _iommu_map_page_to_range(struct iommu_domain *domain, + struct page *page, u64 gpuaddr, size_t range, int prot) +{ + size_t mapped = 0; + u64 addr = gpuaddr; + + if (!page) + return 0; + + /* Sign extend TTBR1 addresses all the way to avoid warning */ + if (gpuaddr & (1ULL << 48)) + gpuaddr |= 0xffff000000000000; + + + while (range) { + int ret = iommu_map(domain, addr, page_to_phys(page), + PAGE_SIZE, prot); + if (ret) { + iommu_unmap(domain, gpuaddr, mapped); + return 0; + } + + addr += PAGE_SIZE; + mapped += PAGE_SIZE; + range -= PAGE_SIZE; + } + + return mapped; +} + +static size_t _iommu_map_sg(struct iommu_domain *domain, u64 gpuaddr, + struct sg_table *sgt, int prot) +{ + /* Sign extend TTBR1 addresses all the way to avoid warning */ + if (gpuaddr & (1ULL << 48)) + gpuaddr |= 0xffff000000000000; + + return iommu_map_sg(domain, gpuaddr, sgt->sgl, sgt->orig_nents, prot); +} + +static int +_kgsl_iommu_map(struct iommu_domain *domain, struct kgsl_memdesc *memdesc) +{ + int prot = _iommu_get_protection_flags(domain, memdesc); + size_t mapped, padding; + int ret = 0; + + /* + * For paged memory allocated through kgsl, memdesc->pages is not NULL. + * Allocate sgt here just for its map operation. Contiguous memory + * already has its sgt, so no need to allocate it here. + */ + if (!memdesc->pages) { + mapped = _iommu_map_sg(domain, memdesc->gpuaddr, + memdesc->sgt, prot); + } else { + struct sg_table sgt; + + ret = sg_alloc_table_from_pages(&sgt, memdesc->pages, + memdesc->page_count, 0, memdesc->size, GFP_KERNEL); + if (ret) + return ret; + + mapped = _iommu_map_sg(domain, memdesc->gpuaddr, &sgt, prot); + sg_free_table(&sgt); + } + + if (!mapped) + return -ENOMEM; + + padding = kgsl_memdesc_footprint(memdesc) - mapped; + + if (padding) { + struct page *page = iommu_get_guard_page(memdesc); + size_t guard_mapped; + + if (page) + guard_mapped = _iommu_map_page_to_range(domain, page, + memdesc->gpuaddr + mapped, padding, prot & ~IOMMU_WRITE); + + if (!page || !guard_mapped) { + _iommu_unmap(domain, memdesc->gpuaddr, mapped); + ret = -ENOMEM; + } + } + + return ret; +} + +static int kgsl_iommu_secure_map(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + struct kgsl_iommu *iommu = &pagetable->mmu->iommu; + struct iommu_domain *domain = to_iommu_domain(&iommu->secure_context); + + return _kgsl_iommu_map(domain, memdesc); +} + +/* + * Return true if the address is in the TTBR0 region. This is used for cases + * when the "default" pagetable is used for both TTBR0 and TTBR1 + */ +static bool is_lower_address(struct kgsl_mmu *mmu, u64 addr) +{ + return (test_bit(KGSL_MMU_IOPGTABLE, &mmu->features) && + addr < KGSL_IOMMU_SPLIT_TABLE_BASE); +} + +static int _kgsl_iommu_unmap(struct iommu_domain *domain, + struct kgsl_memdesc *memdesc) +{ + if (memdesc->size == 0 || memdesc->gpuaddr == 0) + return -EINVAL; + + return _iommu_unmap(domain, memdesc->gpuaddr, + kgsl_memdesc_footprint(memdesc)); +} + +/* Map on the default pagetable and the LPAC pagetable if it exists */ +static int kgsl_iommu_default_map(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + struct kgsl_mmu *mmu = pagetable->mmu; + struct kgsl_iommu *iommu = &mmu->iommu; + struct iommu_domain *domain, *lpac; + int ret; + + if (is_lower_address(mmu, memdesc->gpuaddr)) + return kgsl_iopgtbl_map(pagetable, memdesc); + + domain = to_iommu_domain(&iommu->user_context); + + /* Map the object to the default GPU domain */ + ret = _kgsl_iommu_map(domain, memdesc); + + /* Also map the object to the LPAC domain if it exists */ + lpac = to_iommu_domain(&iommu->lpac_context); + + if (!ret && lpac) { + ret = _kgsl_iommu_map(lpac, memdesc); + + /* On failure, also unmap from the default domain */ + if (ret) + _kgsl_iommu_unmap(domain, memdesc); + + } + + return ret; +} + +static int kgsl_iommu_secure_unmap(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + struct kgsl_iommu *iommu = &pagetable->mmu->iommu; + + if (memdesc->size == 0 || memdesc->gpuaddr == 0) + return -EINVAL; + + return _kgsl_iommu_unmap(to_iommu_domain(&iommu->secure_context), + memdesc); +} + +static int kgsl_iommu_default_unmap(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + struct kgsl_mmu *mmu = pagetable->mmu; + struct kgsl_iommu *iommu = &mmu->iommu; + int ret; + + if (memdesc->size == 0 || memdesc->gpuaddr == 0) + return -EINVAL; + + if (is_lower_address(mmu, memdesc->gpuaddr)) + return kgsl_iopgtbl_unmap(pagetable, memdesc); + + /* Unmap from the default domain */ + ret = _kgsl_iommu_unmap(to_iommu_domain(&iommu->user_context), memdesc); + + /* Unmap from the LPAC domain if it exists */ + ret |= _kgsl_iommu_unmap(to_iommu_domain(&iommu->lpac_context), memdesc); + return ret; +} + +static bool kgsl_iommu_addr_is_global(struct kgsl_mmu *mmu, u64 addr) +{ + if (test_bit(KGSL_MMU_IOPGTABLE, &mmu->features)) + return (addr >= KGSL_IOMMU_SPLIT_TABLE_BASE); + + return ((addr >= KGSL_IOMMU_GLOBAL_MEM_BASE(mmu)) && + (addr < KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + + KGSL_IOMMU_GLOBAL_MEM_SIZE)); +} + +static void __iomem *kgsl_iommu_reg(struct kgsl_iommu_context *ctx, + u32 offset) +{ + struct kgsl_iommu *iommu = KGSL_IOMMU(ctx->kgsldev); + + if (!iommu->cb0_offset) { + u32 reg = + readl_relaxed(iommu->regbase + KGSL_IOMMU_IDR1_OFFSET); + + iommu->pagesize = + FIELD_GET(IDR1_PAGESIZE, reg) ? SZ_64K : SZ_4K; + + /* + * The number of pages in the global address space or + * translation bank address space is 2^(NUMPAGENDXB + 1). + */ + iommu->cb0_offset = iommu->pagesize * + (1 << (FIELD_GET(IDR1_NUMPAGENDXB, reg) + 1)); + } + + return (void __iomem *) (iommu->regbase + iommu->cb0_offset + + (ctx->cb_num * iommu->pagesize) + offset); +} + +static u64 KGSL_IOMMU_GET_CTX_REG_Q(struct kgsl_iommu_context *ctx, u32 offset) +{ + void __iomem *addr = kgsl_iommu_reg(ctx, offset); + + return readq_relaxed(addr); +} + +static void KGSL_IOMMU_SET_CTX_REG(struct kgsl_iommu_context *ctx, u32 offset, + u32 val) +{ + void __iomem *addr = kgsl_iommu_reg(ctx, offset); + + writel_relaxed(val, addr); +} + +static u32 KGSL_IOMMU_GET_CTX_REG(struct kgsl_iommu_context *ctx, u32 offset) +{ + void __iomem *addr = kgsl_iommu_reg(ctx, offset); + + return readl_relaxed(addr); +} + +static int kgsl_iommu_get_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc); + +static void kgsl_iommu_map_secure_global(struct kgsl_mmu *mmu, + struct kgsl_memdesc *memdesc) +{ + if (IS_ERR_OR_NULL(mmu->securepagetable)) + return; + + if (!memdesc->gpuaddr) { + int ret = kgsl_iommu_get_gpuaddr(mmu->securepagetable, + memdesc); + + if (WARN_ON(ret)) + return; + } + + kgsl_iommu_secure_map(mmu->securepagetable, memdesc); +} + +#define KGSL_GLOBAL_MEM_PAGES (KGSL_IOMMU_GLOBAL_MEM_SIZE >> PAGE_SHIFT) + +static u64 global_get_offset(struct kgsl_device *device, u64 size, + unsigned long priv) +{ + int start = 0, bit; + + if (!device->global_map) { + device->global_map = + kcalloc(BITS_TO_LONGS(KGSL_GLOBAL_MEM_PAGES), + sizeof(unsigned long), GFP_KERNEL); + if (!device->global_map) + return (unsigned long) -ENOMEM; + } + + if (priv & KGSL_MEMDESC_RANDOM) { + u32 offset = KGSL_GLOBAL_MEM_PAGES - (size >> PAGE_SHIFT); + + start = get_random_int() % offset; + } + + while (start >= 0) { + bit = bitmap_find_next_zero_area(device->global_map, + KGSL_GLOBAL_MEM_PAGES, start, size >> PAGE_SHIFT, 0); + + if (bit < KGSL_GLOBAL_MEM_PAGES) + break; + + /* + * Later implementations might want to randomize this to reduce + * predictability + */ + start--; + } + + if (WARN_ON(start < 0)) + return (unsigned long) -ENOMEM; + + bitmap_set(device->global_map, bit, size >> PAGE_SHIFT); + + return bit << PAGE_SHIFT; +} + +static void kgsl_iommu_map_global(struct kgsl_mmu *mmu, + struct kgsl_memdesc *memdesc, u32 padding) +{ + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); + + if (memdesc->flags & KGSL_MEMFLAGS_SECURE) { + kgsl_iommu_map_secure_global(mmu, memdesc); + return; + } + + if (!memdesc->gpuaddr) { + u64 offset; + + offset = global_get_offset(device, memdesc->size + padding, + memdesc->priv); + + if (IS_ERR_VALUE(offset)) + return; + + memdesc->gpuaddr = mmu->defaultpagetable->global_base + offset; + } + + kgsl_iommu_default_map(mmu->defaultpagetable, memdesc); +} + +/* Print the mem entry for the pagefault debugging */ +static void print_entry(struct device *dev, struct kgsl_mem_entry *entry, + pid_t pid) +{ + char name[32]; + + if (!entry) { + dev_crit(dev, "**EMPTY**\n"); + return; + } + + kgsl_get_memory_usage(name, sizeof(name), entry->memdesc.flags); + + dev_err(dev, "[%016llX - %016llX] %s %s (pid = %d) (%s)\n", + entry->memdesc.gpuaddr, + entry->memdesc.gpuaddr + entry->memdesc.size - 1, + entry->memdesc.priv & KGSL_MEMDESC_GUARD_PAGE ? "(+guard)" : "", + entry->pending_free ? "(pending free)" : "", + pid, name); +} + +/* Check if the address in the list of recently freed memory */ +static void kgsl_iommu_check_if_freed(struct device *dev, + struct kgsl_iommu_context *context, u64 addr, u32 ptname) +{ + uint64_t gpuaddr = addr; + uint64_t size = 0; + uint64_t flags = 0; + char name[32]; + pid_t pid; + + if (!kgsl_memfree_find_entry(ptname, &gpuaddr, &size, &flags, &pid)) + return; + + kgsl_get_memory_usage(name, sizeof(name), flags); + + dev_err(dev, "---- premature free ----\n"); + dev_err(dev, "[%8.8llX-%8.8llX] (%s) was already freed by pid %d\n", + gpuaddr, gpuaddr + size, name, pid); +} + +static struct kgsl_process_private *kgsl_iommu_get_process(u64 ptbase) +{ + struct kgsl_process_private *p; + struct kgsl_iommu_pt *iommu_pt; + + read_lock(&kgsl_driver.proclist_lock); + + list_for_each_entry(p, &kgsl_driver.process_list, list) { + iommu_pt = to_iommu_pt(p->pagetable); + if (iommu_pt->ttbr0 == ptbase) { + if (!kgsl_process_private_get(p)) + p = NULL; + + read_unlock(&kgsl_driver.proclist_lock); + return p; + } + } + + read_unlock(&kgsl_driver.proclist_lock); + + return NULL; +} + +static void kgsl_iommu_print_fault(struct kgsl_mmu *mmu, + struct kgsl_iommu_context *ctxt, unsigned long addr, + u64 ptbase, u32 contextid, + int flags, struct kgsl_process_private *private, + struct kgsl_context *context) +{ + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct kgsl_mem_entry *prev = NULL, *next = NULL, *entry; + const char *fault_type; + const char *comm = NULL; + u32 ptname = KGSL_MMU_GLOBAL_PT; + int id; + + if (private) { + comm = private->comm; + ptname = pid_nr(private->pid); + } + + trace_kgsl_mmu_pagefault(device, addr, + ptname, comm, + (flags & IOMMU_FAULT_WRITE) ? "write" : "read"); + + if (flags & IOMMU_FAULT_TRANSLATION) + fault_type = "translation"; + else if (flags & IOMMU_FAULT_PERMISSION) + fault_type = "permission"; + else if (flags & IOMMU_FAULT_EXTERNAL) + fault_type = "external"; + else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) + fault_type = "transaction stalled"; + + /* FIXME: This seems buggy */ + if (test_bit(KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE, &mmu->pfpolicy)) + if (!kgsl_mmu_log_fault_addr(mmu, ptbase, addr)) + return; + + if (!__ratelimit(&ctxt->ratelimit)) + return; + + dev_crit(device->dev, + "GPU PAGE FAULT: addr = %lX pid= %d name=%s drawctxt=%d context pid = %d\n", addr, + ptname, comm, contextid, context ? context->tid : 0); + + dev_crit(device->dev, + "context=%s TTBR0=0x%llx (%s %s fault)\n", + ctxt->name, ptbase, + (flags & IOMMU_FAULT_WRITE) ? "write" : "read", fault_type); + + if (gpudev->iommu_fault_block) { + u32 fsynr1 = KGSL_IOMMU_GET_CTX_REG(ctxt, + KGSL_IOMMU_CTX_FSYNR1); + + dev_crit(device->dev, + "FAULTING BLOCK: %s\n", + gpudev->iommu_fault_block(device, fsynr1)); + } + + /* Don't print the debug if this is a permissions fault */ + if ((flags & IOMMU_FAULT_PERMISSION)) + return; + + kgsl_iommu_check_if_freed(device->dev, ctxt, addr, ptname); + + /* + * Don't print any debug information if the address is + * in the global region. These are rare and nobody needs + * to know the addresses that are in here + */ + if (kgsl_iommu_addr_is_global(mmu, addr)) { + dev_crit(device->dev, "Fault in global memory\n"); + return; + } + + if (!private) + return; + + dev_crit(device->dev, "---- nearby memory ----\n"); + + spin_lock(&private->mem_lock); + idr_for_each_entry(&private->mem_idr, entry, id) { + u64 cur = entry->memdesc.gpuaddr; + + if (cur < addr) { + if (!prev || prev->memdesc.gpuaddr < cur) + prev = entry; + } + + if (cur > addr) { + if (!next || next->memdesc.gpuaddr > cur) + next = entry; + } + } + + print_entry(device->dev, prev, pid_nr(private->pid)); + dev_crit(device->dev, "<- fault @ %16.16lx\n", addr); + print_entry(device->dev, next, pid_nr(private->pid)); + + spin_unlock(&private->mem_lock); +} + +/* + * Return true if the IOMMU should stall and trigger a snasphot on a pagefault + */ +static bool kgsl_iommu_check_stall_on_fault(struct kgsl_iommu_context *ctx, + struct kgsl_mmu *mmu, int flags) +{ + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); + + if (!(flags & IOMMU_FAULT_TRANSACTION_STALLED)) + return false; + + if (!test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &mmu->pfpolicy)) + return false; + + /* + * Sometimes, there can be multiple invocations of the fault handler. + * Make sure we trigger reset/recovery only once. + */ + if (ctx->stalled_on_fault) + return false; + + if (!mutex_trylock(&device->mutex)) + return true; + + /* + * Turn off GPU IRQ so we don't get faults from it too. + * The device mutex must be held to change power state + */ + if (gmu_core_isenabled(device)) + kgsl_pwrctrl_irq(device, false); + else + kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + + mutex_unlock(&device->mutex); + return true; +} + +static int kgsl_iommu_fault_handler(struct kgsl_mmu *mmu, + struct kgsl_iommu_context *ctx, unsigned long addr, int flags) +{ + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); + u64 ptbase; + u32 contextidr; + bool stall; + struct kgsl_process_private *private; + struct kgsl_context *context; + + ptbase = KGSL_IOMMU_GET_CTX_REG_Q(ctx, KGSL_IOMMU_CTX_TTBR0); + contextidr = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_CONTEXTIDR); + + private = kgsl_iommu_get_process(ptbase); + context = kgsl_context_get(device, contextidr); + + stall = kgsl_iommu_check_stall_on_fault(ctx, mmu, flags); + + kgsl_iommu_print_fault(mmu, ctx, addr, ptbase, contextidr, flags, private, + context); + + if (stall) { + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 sctlr; + + /* + * Disable context fault interrupts as we do not clear FSR in + * the ISR. Will be re-enabled after FSR is cleared. + */ + sctlr = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR); + sctlr &= ~(0x1 << KGSL_IOMMU_SCTLR_CFIE_SHIFT); + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR, sctlr); + + /* This is used by reset/recovery path */ + ctx->stalled_on_fault = true; + + /* Go ahead with recovery*/ + if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->fault) + adreno_dev->dispatch_ops->fault(adreno_dev, + ADRENO_IOMMU_PAGE_FAULT); + } + + kgsl_context_put(context); + kgsl_process_private_put(private); + + /* Return -EBUSY to keep the IOMMU driver from resuming on a stall */ + return stall ? -EBUSY : 0; +} + +static int kgsl_iommu_default_fault_handler(struct iommu_domain *domain, + struct device *dev, unsigned long addr, int flags, void *token) +{ + struct kgsl_mmu *mmu = token; + struct kgsl_iommu *iommu = &mmu->iommu; + + return kgsl_iommu_fault_handler(mmu, &iommu->user_context, + addr, flags); +} + +static int kgsl_iommu_lpac_fault_handler(struct iommu_domain *domain, + struct device *dev, unsigned long addr, int flags, void *token) +{ + struct kgsl_mmu *mmu = token; + struct kgsl_iommu *iommu = &mmu->iommu; + struct kgsl_iommu_context *ctx = &iommu->lpac_context; + u32 fsynr0, fsynr1; + + fsynr0 = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_FSYNR0); + fsynr1 = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_FSYNR1); + + dev_crit(KGSL_MMU_DEVICE(mmu)->dev, + "LPAC PAGE FAULT iova=0x%16lx, fsynr0=0x%x, fsynr1=0x%x\n", + addr, fsynr0, fsynr1); + + return 0; +} + +static int kgsl_iommu_secure_fault_handler(struct iommu_domain *domain, + struct device *dev, unsigned long addr, int flags, void *token) +{ + struct kgsl_mmu *mmu = token; + struct kgsl_iommu *iommu = &mmu->iommu; + + return kgsl_iommu_fault_handler(mmu, &iommu->secure_context, + addr, flags); +} + +/* + * kgsl_iommu_disable_clk() - Disable iommu clocks + * Disable IOMMU clocks + */ +static void kgsl_iommu_disable_clk(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + + atomic_dec(&iommu->clk_enable_count); + + /* + * Make sure the clk refcounts are good. An unbalance may + * cause the clocks to be off when we need them on. + */ + WARN_ON(atomic_read(&iommu->clk_enable_count) < 0); + + clk_bulk_disable_unprepare(iommu->num_clks, iommu->clks); + + if (!IS_ERR_OR_NULL(iommu->cx_gdsc)) + regulator_disable(iommu->cx_gdsc); +} + +/* + * kgsl_iommu_enable_clk - Enable iommu clocks + * Enable all the IOMMU clocks + */ +static void kgsl_iommu_enable_clk(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + + if (!IS_ERR_OR_NULL(iommu->cx_gdsc)) + WARN_ON(regulator_enable(iommu->cx_gdsc)); + + clk_bulk_prepare_enable(iommu->num_clks, iommu->clks); + + atomic_inc(&iommu->clk_enable_count); +} + +/* kgsl_iommu_get_ttbr0 - Get TTBR0 setting for a pagetable */ +static u64 kgsl_iommu_get_ttbr0(struct kgsl_pagetable *pagetable) +{ + struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); + + /* This will be zero if KGSL_MMU_IOPGTABLE is not enabled */ + return pt->ttbr0; +} + +/* FIXME: This is broken for LPAC. For now return the default context bank */ +static int kgsl_iommu_get_context_bank(struct kgsl_pagetable *pt) +{ + struct kgsl_iommu *iommu = to_kgsl_iommu(pt); + struct iommu_domain *domain = to_iommu_domain(&iommu->user_context); + + return _iommu_domain_context_bank(domain); +} + +static void kgsl_iommu_destroy_default_pagetable(struct kgsl_pagetable *pagetable) +{ + struct kgsl_device *device = KGSL_MMU_DEVICE(pagetable->mmu); + struct kgsl_iommu *iommu = to_kgsl_iommu(pagetable); + struct kgsl_iommu_context *context = &iommu->user_context; + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&context->pdev->dev); + struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); + struct kgsl_global_memdesc *md; + + list_for_each_entry(md, &device->globals, node) { + if (md->memdesc.flags & KGSL_MEMFLAGS_SECURE) + continue; + + kgsl_iommu_default_unmap(pagetable, &md->memdesc); + } + + adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, NULL); + + kfree(pt); +} + +static void kgsl_iommu_destroy_pagetable(struct kgsl_pagetable *pagetable) +{ + struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); + + kfree(pt); +} + +static void _enable_gpuhtw_llc(struct kgsl_mmu *mmu, struct iommu_domain *domain) +{ + int val = 1; + + if (!test_bit(KGSL_MMU_LLCC_ENABLE, &mmu->features)) + return; + + if (mmu->subtype == KGSL_IOMMU_SMMU_V500) + iommu_domain_set_attr(domain, DOMAIN_ATTR_USE_LLC_NWA, &val); + else + iommu_domain_set_attr(domain, DOMAIN_ATTR_USE_UPSTREAM_HINT, &val); +} + +static int set_smmu_aperture(struct kgsl_device *device, + struct kgsl_iommu_context *context) +{ + int ret; + + if (!test_bit(KGSL_MMU_SMMU_APERTURE, &device->mmu.features)) + return 0; + + ret = qcom_scm_kgsl_set_smmu_aperture(context->cb_num); + if (ret == -EBUSY) + ret = qcom_scm_kgsl_set_smmu_aperture(context->cb_num); + + if (ret) + dev_err(device->dev, "Unable to set the SMMU aperture: %d. The aperture needs to be set to use per-process pagetables\n", + ret); + + return ret; +} + +/* FIXME: better name feor this function */ +static int kgsl_iopgtbl_alloc(struct kgsl_iommu_context *ctx, struct kgsl_iommu_pt *pt) +{ + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&ctx->pdev->dev); + const struct io_pgtable_cfg *cfg = NULL; + + if (adreno_smmu->cookie) + cfg = adreno_smmu->get_ttbr1_cfg(adreno_smmu->cookie); + if (!cfg) + return -ENODEV; + + pt->cfg = *cfg; + pt->cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; + pt->cfg.tlb = &kgsl_iopgtbl_tlb_ops; + + pt->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, &pt->cfg, NULL); + + if (!pt->pgtbl_ops) + return -ENOMEM; + + pt->ttbr0 = pt->cfg.arm_lpae_s1_cfg.ttbr; + + return 0; +} + +/* Enable TTBR0 for the given context with the specific configuration */ +static void kgsl_iommu_enable_ttbr0(struct kgsl_iommu_context *context, + struct kgsl_iommu_pt *pt) +{ + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&context->pdev->dev); + struct kgsl_mmu *mmu = pt->base.mmu; + + /* Quietly return if the context doesn't have a domain */ + if (!context->domain) + return; + + /* Enable CX and clocks before we call into SMMU to setup registers */ + kgsl_iommu_enable_clk(mmu); + adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, &pt->cfg); + kgsl_iommu_disable_clk(mmu); +} + +static struct kgsl_pagetable *kgsl_iommu_default_pagetable(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + struct kgsl_iommu_pt *iommu_pt; + int ret; + + iommu_pt = kzalloc(sizeof(*iommu_pt), GFP_KERNEL); + if (!iommu_pt) + return ERR_PTR(-ENOMEM); + + kgsl_mmu_pagetable_init(mmu, &iommu_pt->base, KGSL_MMU_GLOBAL_PT); + + iommu_pt->base.fault_addr = U64_MAX; + iommu_pt->base.rbtree = RB_ROOT; + iommu_pt->base.pt_ops = &default_pt_ops; + + if (test_bit(KGSL_MMU_64BIT, &mmu->features)) { + iommu_pt->base.compat_va_start = KGSL_IOMMU_SVM_BASE32; + iommu_pt->base.compat_va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + iommu_pt->base.va_start = KGSL_IOMMU_VA_BASE64; + iommu_pt->base.va_end = KGSL_IOMMU_VA_END64; + + } else { + iommu_pt->base.va_start = KGSL_IOMMU_SVM_BASE32; + + if (mmu->secured) + iommu_pt->base.va_end = KGSL_IOMMU_SECURE_BASE(mmu); + else + iommu_pt->base.va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + + iommu_pt->base.compat_va_start = iommu_pt->base.va_start; + iommu_pt->base.compat_va_end = iommu_pt->base.va_end; + } + + if (!test_bit(KGSL_MMU_IOPGTABLE, &mmu->features)) { + iommu_pt->base.global_base = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + + kgsl_mmu_pagetable_add(mmu, &iommu_pt->base); + return &iommu_pt->base; + } + + iommu_pt->base.global_base = KGSL_IOMMU_SPLIT_TABLE_BASE; + + /* + * Set up a "default' TTBR0 for the pagetable - this would only be used + * in cases when the per-process pagetable allocation failed for some + * reason + */ + ret = kgsl_iopgtbl_alloc(&iommu->user_context, iommu_pt); + if (ret) { + kfree(iommu_pt); + return ERR_PTR(ret); + } + + kgsl_mmu_pagetable_add(mmu, &iommu_pt->base); + return &iommu_pt->base; + +} + +#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) +static struct kgsl_pagetable *kgsl_iommu_secure_pagetable(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu_pt *iommu_pt; + + if (!mmu->secured) + return ERR_PTR(-EPERM); + + iommu_pt = kzalloc(sizeof(*iommu_pt), GFP_KERNEL); + if (!iommu_pt) + return ERR_PTR(-ENOMEM); + + kgsl_mmu_pagetable_init(mmu, &iommu_pt->base, KGSL_MMU_SECURE_PT); + iommu_pt->base.fault_addr = U64_MAX; + iommu_pt->base.rbtree = RB_ROOT; + iommu_pt->base.pt_ops = &secure_pt_ops; + + iommu_pt->base.compat_va_start = KGSL_IOMMU_SECURE_BASE(mmu); + iommu_pt->base.compat_va_end = KGSL_IOMMU_SECURE_END(mmu); + iommu_pt->base.va_start = KGSL_IOMMU_SECURE_BASE(mmu); + iommu_pt->base.va_end = KGSL_IOMMU_SECURE_END(mmu); + + kgsl_mmu_pagetable_add(mmu, &iommu_pt->base); + return &iommu_pt->base; +} +#else +static struct kgsl_pagetable *kgsl_iommu_secure_pagetable(struct kgsl_mmu *mmu) +{ + return ERR_PTR(-EPERM); +} +#endif + +static struct kgsl_pagetable *kgsl_iopgtbl_pagetable(struct kgsl_mmu *mmu, u32 name) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + struct kgsl_iommu_pt *pt; + int ret; + + pt = kzalloc(sizeof(*pt), GFP_KERNEL); + if (!pt) + return ERR_PTR(-ENOMEM); + + kgsl_mmu_pagetable_init(mmu, &pt->base, name); + + pt->base.fault_addr = U64_MAX; + pt->base.rbtree = RB_ROOT; + pt->base.pt_ops = &iopgtbl_pt_ops; + + if (test_bit(KGSL_MMU_64BIT, &mmu->features)) { + pt->base.compat_va_start = KGSL_IOMMU_SVM_BASE32; + pt->base.compat_va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + pt->base.va_start = KGSL_IOMMU_VA_BASE64; + pt->base.va_end = KGSL_IOMMU_VA_END64; + + if (is_compat_task()) { + pt->base.svm_start = KGSL_IOMMU_SVM_BASE32; + pt->base.svm_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + } else { + pt->base.svm_start = KGSL_IOMMU_SVM_BASE64; + pt->base.svm_end = KGSL_IOMMU_SVM_END64; + } + + } else { + pt->base.va_start = KGSL_IOMMU_SVM_BASE32; + + if (mmu->secured) + pt->base.va_end = KGSL_IOMMU_SECURE_BASE(mmu); + else + pt->base.va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + + pt->base.compat_va_start = pt->base.va_start; + pt->base.compat_va_end = pt->base.va_end; + pt->base.svm_start = KGSL_IOMMU_SVM_BASE32; + pt->base.svm_end = KGSL_IOMMU_SVM_END32; + } + + ret = kgsl_iopgtbl_alloc(&iommu->user_context, pt); + if (ret) { + kfree(pt); + return ERR_PTR(ret); + } + + kgsl_mmu_pagetable_add(mmu, &pt->base); + return &pt->base; +} + +static struct kgsl_pagetable *kgsl_iommu_getpagetable(struct kgsl_mmu *mmu, + unsigned long name) +{ + struct kgsl_pagetable *pt; + + /* If we already know the pagetable, return it */ + pt = kgsl_get_pagetable(name); + if (pt) + return pt; + + /* If io-pgtables are not in effect, just use the default pagetable */ + if (!test_bit(KGSL_MMU_IOPGTABLE, &mmu->features)) + return mmu->defaultpagetable; + + pt = kgsl_iopgtbl_pagetable(mmu, name); + + /* + * If the io-pgtable allocation didn't work then fall back to the + * default pagetable for this cycle + */ + if (!pt) + return mmu->defaultpagetable; + + return pt; +} + +static void kgsl_iommu_detach_context(struct kgsl_iommu_context *context) +{ + if (!context->domain) + return; + + iommu_detach_device(context->domain, &context->pdev->dev); + iommu_domain_free(context->domain); + + context->domain = NULL; + + platform_device_put(context->pdev); + + context->pdev = NULL; +} + +static void kgsl_iommu_close(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + + /* First put away the default pagetables */ + kgsl_mmu_putpagetable(mmu->defaultpagetable); + mmu->defaultpagetable = NULL; + + kgsl_mmu_putpagetable(mmu->securepagetable); + mmu->securepagetable = NULL; + + /* Next, detach the context banks */ + kgsl_iommu_detach_context(&iommu->user_context); + kgsl_iommu_detach_context(&iommu->lpac_context); + kgsl_iommu_detach_context(&iommu->secure_context); + + kgsl_free_secure_page(kgsl_secure_guard_page); + kgsl_secure_guard_page = NULL; + + if (kgsl_guard_page != NULL) { + __free_page(kgsl_guard_page); + kgsl_guard_page = NULL; + } + + of_platform_depopulate(&iommu->pdev->dev); + platform_device_put(iommu->pdev); + + kmem_cache_destroy(addr_entry_cache); + addr_entry_cache = NULL; +} + +/* Program the PRR marker and enable it in the ACTLR register */ +static void _iommu_context_set_prr(struct kgsl_mmu *mmu, + struct kgsl_iommu_context *ctx) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + struct page *page = kgsl_vbo_zero_page; + u32 val; + + if (ctx->cb_num < 0) + return; + + if (!page) + return; + + writel_relaxed(lower_32_bits(page_to_phys(page)), + iommu->regbase + KGSL_IOMMU_PRR_CFG_LADDR); + + writel_relaxed(upper_32_bits(page_to_phys(page)), + iommu->regbase + KGSL_IOMMU_PRR_CFG_UADDR); + + val = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_ACTLR); + val |= FIELD_PREP(KGSL_IOMMU_ACTLR_PRR_ENABLE, 1); + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_ACTLR, val); + + /* Make sure all of the preceding writes have posted */ + wmb(); +} + +static void _setup_user_context(struct kgsl_mmu *mmu) +{ + unsigned int sctlr_val; + struct kgsl_iommu_context *ctx = &mmu->iommu.user_context; + + sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR); + + /* + * If pagefault policy is GPUHALT_ENABLE, + * 1) Program CFCFG to 1 to enable STALL mode + * 2) Program HUPCF to 0 (Stall or terminate subsequent + * transactions in the presence of an outstanding fault) + * else + * 1) Program CFCFG to 0 to disable STALL mode (0=Terminate) + * 2) Program HUPCF to 1 (Process subsequent transactions + * independently of any outstanding fault) + */ + + if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &mmu->pfpolicy)) { + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT); + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT); + } else { + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT); + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT); + } + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR, sctlr_val); +} + +static int kgsl_iommu_start(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + + kgsl_iommu_enable_clk(mmu); + + /* Set the following registers only when the MMU type is QSMMU */ + if (mmu->subtype != KGSL_IOMMU_SMMU_V500) { + /* Enable hazard check from GPU_SMMU_HUM_CFG */ + writel_relaxed(0x02, iommu->regbase + 0x6800); + + /* Write to GPU_SMMU_DORA_ORDERING to disable reordering */ + writel_relaxed(0x01, iommu->regbase + 0x64a0); + + /* make sure register write committed */ + wmb(); + } + + /* FIXME: We would need to program stall on fault for LPAC too */ + _setup_user_context(mmu); + + _iommu_context_set_prr(mmu, &iommu->user_context); + if (mmu->secured) + _iommu_context_set_prr(mmu, &iommu->secure_context); + _iommu_context_set_prr(mmu, &iommu->lpac_context); + + kgsl_iommu_disable_clk(mmu); + return 0; +} + +static void kgsl_iommu_clear_fsr(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + struct kgsl_iommu_context *ctx = &iommu->user_context; + unsigned int sctlr_val; + + if (ctx->stalled_on_fault) { + kgsl_iommu_enable_clk(mmu); + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_FSR, 0xffffffff); + /* + * Re-enable context fault interrupts after clearing + * FSR to prevent the interrupt from firing repeatedly + */ + sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR); + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFIE_SHIFT); + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR, sctlr_val); + /* + * Make sure the above register writes + * are not reordered across the barrier + * as we use writel_relaxed to write them + */ + wmb(); + kgsl_iommu_disable_clk(mmu); + ctx->stalled_on_fault = false; + } +} + +static void kgsl_iommu_pagefault_resume(struct kgsl_mmu *mmu, bool terminate) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + struct kgsl_iommu_context *ctx = &iommu->user_context; + u32 sctlr_val = 0; + + if (!ctx->stalled_on_fault) + return; + + if (!terminate) + goto clear_fsr; + + sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR); + /* + * As part of recovery, GBIF halt sequence should be performed. + * In a worst case scenario, if any GPU block is generating a + * stream of un-ending faulting transactions, SMMU would enter + * stall-on-fault mode again after resuming and not let GBIF + * halt succeed. In order to avoid that situation and terminate + * those faulty transactions, set CFCFG and HUPCF to 0. + */ + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT); + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT); + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR, sctlr_val); + /* + * Make sure the above register write is not reordered across + * the barrier as we use writel_relaxed to write it. + */ + wmb(); + +clear_fsr: + /* + * This will only clear fault bits in FSR. FSR.SS will still + * be set. Writing to RESUME (below) is the only way to clear + * FSR.SS bit. + */ + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_FSR, 0xffffffff); + /* + * Make sure the above register write is not reordered across + * the barrier as we use writel_relaxed to write it. + */ + wmb(); + + /* + * Write 1 to RESUME.TnR to terminate the stalled transaction. + * This will also allow the SMMU to process new transactions. + */ + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_RESUME, 1); + /* + * Make sure the above register writes are not reordered across + * the barrier as we use writel_relaxed to write them. + */ + wmb(); +} + +static u64 +kgsl_iommu_get_current_ttbr0(struct kgsl_mmu *mmu) +{ + u64 val; + struct kgsl_iommu *iommu = &mmu->iommu; + struct kgsl_iommu_context *ctx = &iommu->user_context; + + /* + * We cannot enable or disable the clocks in interrupt context, this + * function is called from interrupt context if there is an axi error + */ + if (in_interrupt()) + return 0; + + if (ctx->cb_num < 0) + return 0; + + kgsl_iommu_enable_clk(mmu); + val = KGSL_IOMMU_GET_CTX_REG_Q(ctx, KGSL_IOMMU_CTX_TTBR0); + kgsl_iommu_disable_clk(mmu); + return val; +} + +/* + * kgsl_iommu_set_pf_policy() - Set the pagefault policy for IOMMU + * @mmu: Pointer to mmu structure + * @pf_policy: The pagefault polict to set + * + * Check if the new policy indicated by pf_policy is same as current + * policy, if same then return else set the policy + */ +static int kgsl_iommu_set_pf_policy(struct kgsl_mmu *mmu, + unsigned long pf_policy) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + struct kgsl_iommu_context *ctx = &iommu->user_context; + unsigned int sctlr_val; + int cur, new; + + cur = test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &mmu->pfpolicy); + new = test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &pf_policy); + + if (cur == new) + return 0; + + kgsl_iommu_enable_clk(mmu); + + sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR); + + if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &pf_policy)) { + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT); + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT); + } else { + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT); + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT); + } + + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR, sctlr_val); + + kgsl_iommu_disable_clk(mmu); + return 0; +} + +static struct kgsl_iommu_addr_entry *_find_gpuaddr( + struct kgsl_pagetable *pagetable, uint64_t gpuaddr) +{ + struct rb_node *node = pagetable->rbtree.rb_node; + + while (node != NULL) { + struct kgsl_iommu_addr_entry *entry = rb_entry(node, + struct kgsl_iommu_addr_entry, node); + + if (gpuaddr < entry->base) + node = node->rb_left; + else if (gpuaddr > entry->base) + node = node->rb_right; + else + return entry; + } + + return NULL; +} + +static int _remove_gpuaddr(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr) +{ + struct kgsl_iommu_addr_entry *entry; + + entry = _find_gpuaddr(pagetable, gpuaddr); + + if (WARN(!entry, "GPU address %llx doesn't exist\n", gpuaddr)) + return -ENOMEM; + + rb_erase(&entry->node, &pagetable->rbtree); + kmem_cache_free(addr_entry_cache, entry); + return 0; +} + +static int _insert_gpuaddr(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr, uint64_t size) +{ + struct rb_node **node, *parent = NULL; + struct kgsl_iommu_addr_entry *new = + kmem_cache_alloc(addr_entry_cache, GFP_ATOMIC); + + if (new == NULL) + return -ENOMEM; + + new->base = gpuaddr; + new->size = size; + + node = &pagetable->rbtree.rb_node; + + while (*node != NULL) { + struct kgsl_iommu_addr_entry *this; + + parent = *node; + this = rb_entry(parent, struct kgsl_iommu_addr_entry, node); + + if (new->base < this->base) + node = &parent->rb_left; + else if (new->base > this->base) + node = &parent->rb_right; + else { + /* Duplicate entry */ + WARN(1, "duplicate gpuaddr: 0x%llx\n", gpuaddr); + kmem_cache_free(addr_entry_cache, new); + return -EEXIST; + } + } + + rb_link_node(&new->node, parent, node); + rb_insert_color(&new->node, &pagetable->rbtree); + + return 0; +} + +static uint64_t _get_unmapped_area(struct kgsl_pagetable *pagetable, + uint64_t bottom, uint64_t top, uint64_t size, + uint64_t align) +{ + struct rb_node *node = rb_first(&pagetable->rbtree); + uint64_t start; + + bottom = ALIGN(bottom, align); + start = bottom; + + while (node != NULL) { + uint64_t gap; + struct kgsl_iommu_addr_entry *entry = rb_entry(node, + struct kgsl_iommu_addr_entry, node); + + /* + * Skip any entries that are outside of the range, but make sure + * to account for some that might straddle the lower bound + */ + if (entry->base < bottom) { + if (entry->base + entry->size > bottom) + start = ALIGN(entry->base + entry->size, align); + node = rb_next(node); + continue; + } + + /* Stop if we went over the top */ + if (entry->base >= top) + break; + + /* Make sure there is a gap to consider */ + if (start < entry->base) { + gap = entry->base - start; + + if (gap >= size) + return start; + } + + /* Stop if there is no more room in the region */ + if (entry->base + entry->size >= top) + return (uint64_t) -ENOMEM; + + /* Start the next cycle at the end of the current entry */ + start = ALIGN(entry->base + entry->size, align); + node = rb_next(node); + } + + if (start + size <= top) + return start; + + return (uint64_t) -ENOMEM; +} + +static uint64_t _get_unmapped_area_topdown(struct kgsl_pagetable *pagetable, + uint64_t bottom, uint64_t top, uint64_t size, + uint64_t align) +{ + struct rb_node *node = rb_last(&pagetable->rbtree); + uint64_t end = top; + uint64_t mask = ~(align - 1); + struct kgsl_iommu_addr_entry *entry; + + /* Make sure that the bottom is correctly aligned */ + bottom = ALIGN(bottom, align); + + /* Make sure the requested size will fit in the range */ + if (size > (top - bottom)) + return -ENOMEM; + + /* Walk back through the list to find the highest entry in the range */ + for (node = rb_last(&pagetable->rbtree); node != NULL; node = rb_prev(node)) { + entry = rb_entry(node, struct kgsl_iommu_addr_entry, node); + if (entry->base < top) + break; + } + + while (node != NULL) { + uint64_t offset; + + entry = rb_entry(node, struct kgsl_iommu_addr_entry, node); + + /* If the entire entry is below the range the search is over */ + if ((entry->base + entry->size) < bottom) + break; + + /* Get the top of the entry properly aligned */ + offset = ALIGN(entry->base + entry->size, align); + + /* + * Try to allocate the memory from the top of the gap, + * making sure that it fits between the top of this entry and + * the bottom of the previous one + */ + + if ((end > size) && (offset < end)) { + uint64_t chunk = (end - size) & mask; + + if (chunk >= offset) + return chunk; + } + + /* + * If we get here and the current entry is outside of the range + * then we are officially out of room + */ + + if (entry->base < bottom) + return (uint64_t) -ENOMEM; + + /* Set the top of the gap to the current entry->base */ + end = entry->base; + + /* And move on to the next lower entry */ + node = rb_prev(node); + } + + /* If we get here then there are no more entries in the region */ + if ((end > size) && (((end - size) & mask) >= bottom)) + return (end - size) & mask; + + return (uint64_t) -ENOMEM; +} + +static uint64_t kgsl_iommu_find_svm_region(struct kgsl_pagetable *pagetable, + uint64_t start, uint64_t end, uint64_t size, + uint64_t alignment) +{ + uint64_t addr; + + /* Avoid black holes */ + if (WARN(end <= start, "Bad search range: 0x%llx-0x%llx", start, end)) + return (uint64_t) -EINVAL; + + spin_lock(&pagetable->lock); + addr = _get_unmapped_area_topdown(pagetable, + start, end, size, alignment); + spin_unlock(&pagetable->lock); + return addr; +} + +static bool iommu_addr_in_svm_ranges(struct kgsl_pagetable *pagetable, + u64 gpuaddr, u64 size) +{ + if ((gpuaddr >= pagetable->compat_va_start && gpuaddr < pagetable->compat_va_end) && + ((gpuaddr + size) > pagetable->compat_va_start && + (gpuaddr + size) <= pagetable->compat_va_end)) + return true; + + if ((gpuaddr >= pagetable->svm_start && gpuaddr < pagetable->svm_end) && + ((gpuaddr + size) > pagetable->svm_start && + (gpuaddr + size) <= pagetable->svm_end)) + return true; + + return false; +} + +static int kgsl_iommu_set_svm_region(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr, uint64_t size) +{ + int ret = -ENOMEM; + struct rb_node *node; + + /* Make sure the requested address doesn't fall out of SVM range */ + if (!iommu_addr_in_svm_ranges(pagetable, gpuaddr, size)) + return -ENOMEM; + + spin_lock(&pagetable->lock); + node = pagetable->rbtree.rb_node; + + while (node != NULL) { + uint64_t start, end; + struct kgsl_iommu_addr_entry *entry = rb_entry(node, + struct kgsl_iommu_addr_entry, node); + + start = entry->base; + end = entry->base + entry->size; + + if (gpuaddr + size <= start) + node = node->rb_left; + else if (end <= gpuaddr) + node = node->rb_right; + else + goto out; + } + + ret = _insert_gpuaddr(pagetable, gpuaddr, size); +out: + spin_unlock(&pagetable->lock); + return ret; +} + + +static int kgsl_iommu_get_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + int ret = 0; + uint64_t addr, start, end, size; + unsigned int align; + + if (WARN_ON(kgsl_memdesc_use_cpu_map(memdesc))) + return -EINVAL; + + if (memdesc->flags & KGSL_MEMFLAGS_SECURE && + pagetable->name != KGSL_MMU_SECURE_PT) + return -EINVAL; + + size = kgsl_memdesc_footprint(memdesc); + + align = max_t(uint64_t, 1 << kgsl_memdesc_get_align(memdesc), + PAGE_SIZE); + + if (memdesc->flags & KGSL_MEMFLAGS_FORCE_32BIT) { + start = pagetable->compat_va_start; + end = pagetable->compat_va_end; + } else { + start = pagetable->va_start; + end = pagetable->va_end; + } + + spin_lock(&pagetable->lock); + + addr = _get_unmapped_area(pagetable, start, end, size, align); + + if (addr == (uint64_t) -ENOMEM) { + ret = -ENOMEM; + goto out; + } + + /* + * This path is only called in a non-SVM path with locks so we can be + * sure we aren't racing with anybody so we don't need to worry about + * taking the lock + */ + ret = _insert_gpuaddr(pagetable, addr, size); + if (ret == 0) { + memdesc->gpuaddr = addr; + memdesc->pagetable = pagetable; + } + +out: + spin_unlock(&pagetable->lock); + return ret; +} + +static void kgsl_iommu_put_gpuaddr(struct kgsl_memdesc *memdesc) +{ + if (memdesc->pagetable == NULL) + return; + + spin_lock(&memdesc->pagetable->lock); + + _remove_gpuaddr(memdesc->pagetable, memdesc->gpuaddr); + + spin_unlock(&memdesc->pagetable->lock); +} + +static int kgsl_iommu_svm_range(struct kgsl_pagetable *pagetable, + uint64_t *lo, uint64_t *hi, uint64_t memflags) +{ + bool gpu_compat = (memflags & KGSL_MEMFLAGS_FORCE_32BIT) != 0; + + if (lo != NULL) + *lo = gpu_compat ? pagetable->compat_va_start : pagetable->svm_start; + if (hi != NULL) + *hi = gpu_compat ? pagetable->compat_va_end : pagetable->svm_end; + + return 0; +} + +static bool kgsl_iommu_addr_in_range(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr) +{ + if (gpuaddr == 0) + return false; + + if (gpuaddr >= pagetable->va_start && gpuaddr < pagetable->va_end) + return true; + + if (gpuaddr >= pagetable->compat_va_start && gpuaddr < pagetable->compat_va_end) + return true; + + if (gpuaddr >= pagetable->svm_start && gpuaddr < pagetable->svm_end) + return true; + + return false; +} + +static int kgsl_iommu_setup_context(struct kgsl_mmu *mmu, + struct device_node *parent, + struct kgsl_iommu_context *context, const char *name, + iommu_fault_handler_t handler) +{ + struct device_node *node = of_find_node_by_name(parent, name); + struct platform_device *pdev; + int ret; + + if (!node) + return -ENOENT; + + pdev = of_find_device_by_node(node); + ret = of_dma_configure(&pdev->dev, node, true); + of_node_put(node); + + if (ret) + return ret; + + context->cb_num = -1; + context->name = name; + context->kgsldev = KGSL_MMU_DEVICE(mmu); + context->pdev = pdev; + ratelimit_default_init(&context->ratelimit); + + /* Set the adreno_smmu priv data for the device */ + dev_set_drvdata(&pdev->dev, &context->adreno_smmu); + + /* Create a new context */ + context->domain = iommu_domain_alloc(&platform_bus_type); + if (!context->domain) { + /*FIXME: Put back the pdev here? */ + return -ENODEV; + } + + _enable_gpuhtw_llc(mmu, context->domain); + + ret = iommu_attach_device(context->domain, &context->pdev->dev); + if (ret) { + /* FIXME: put back the device here? */ + iommu_domain_free(context->domain); + context->domain = NULL; + return ret; + } + + iommu_set_fault_handler(context->domain, handler, mmu); + + context->cb_num = _iommu_domain_context_bank(context->domain); + + if (context->cb_num >= 0) + return 0; + + dev_err(KGSL_MMU_DEVICE(mmu)->dev, "Couldn't get the context bank for %s: %d\n", + context->name, context->cb_num); + + iommu_detach_device(context->domain, &context->pdev->dev); + iommu_domain_free(context->domain); + + /* FIXME: put back the device here? */ + context->domain = NULL; + + return context->cb_num; +} + +static int iommu_probe_user_context(struct kgsl_device *device, + struct device_node *node) +{ + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct kgsl_mmu *mmu = &device->mmu; + int ret; + + ret = kgsl_iommu_setup_context(mmu, node, &iommu->user_context, + "gfx3d_user", kgsl_iommu_default_fault_handler); + if (ret) + return ret; + + /* LPAC is optional so don't worry if it returns error */ + kgsl_iommu_setup_context(mmu, node, &iommu->lpac_context, + "gfx3d_lpac", kgsl_iommu_lpac_fault_handler); + + /* + * FIXME: If adreno_smmu->cookie wasn't initialized then we can't do + * IOPGTABLE + */ + + /* Make the default pagetable */ + mmu->defaultpagetable = kgsl_iommu_default_pagetable(mmu); + if (IS_ERR(mmu->defaultpagetable)) + return PTR_ERR(mmu->defaultpagetable); + + /* If IOPGTABLE isn't enabled then we are done */ + if (!test_bit(KGSL_MMU_IOPGTABLE, &mmu->features)) + return 0; + + /* Enable TTBR0 on the default and LPAC contexts */ + kgsl_iommu_enable_ttbr0(&iommu->user_context, + to_iommu_pt(mmu->defaultpagetable)); + + set_smmu_aperture(device, &iommu->user_context); + + kgsl_iommu_enable_ttbr0(&iommu->lpac_context, + to_iommu_pt(mmu->defaultpagetable)); + + /* FIXME: set LPAC SMMU aperture */ + return 0; +} + +static int iommu_probe_secure_context(struct kgsl_device *device, + struct device_node *parent) +{ + struct device_node *node; + struct platform_device *pdev; + int ret; + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct kgsl_mmu *mmu = &device->mmu; + struct kgsl_iommu_context *context = &iommu->secure_context; + int secure_vmid = VMID_CP_PIXEL; + + if (!mmu->secured) + return -EPERM; + + node = of_find_node_by_name(parent, "gfx3d_secure"); + if (!node) + return -ENOENT; + + pdev = of_find_device_by_node(node); + ret = of_dma_configure(&pdev->dev, node, true); + of_node_put(node); + + if (ret) + return ret; + + context->cb_num = -1; + context->name = "gfx3d_secure"; + context->kgsldev = device; + context->pdev = pdev; + ratelimit_default_init(&context->ratelimit); + + context->domain = iommu_domain_alloc(&platform_bus_type); + if (!context->domain) { + /* FIXME: put away the device */ + return -ENODEV; + } + + ret = iommu_domain_set_attr(context->domain, DOMAIN_ATTR_SECURE_VMID, + &secure_vmid); + if (ret) { + dev_err(device->dev, "Unable to set the secure VMID: %d\n", ret); + iommu_domain_free(context->domain); + context->domain = NULL; + + /* FIXME: put away the device */ + return ret; + } + + _enable_gpuhtw_llc(mmu, context->domain); + + ret = iommu_attach_device(context->domain, &context->pdev->dev); + if (ret) { + iommu_domain_free(context->domain); + /* FIXME: Put way the device */ + context->domain = NULL; + return ret; + } + + iommu_set_fault_handler(context->domain, + kgsl_iommu_secure_fault_handler, mmu); + + context->cb_num = _iommu_domain_context_bank(context->domain); + + if (context->cb_num < 0) { + iommu_detach_device(context->domain, &context->pdev->dev); + iommu_domain_free(context->domain); + context->domain = NULL; + return context->cb_num; + } + + mmu->securepagetable = kgsl_iommu_secure_pagetable(mmu); + + if (IS_ERR(mmu->securepagetable)) + mmu->secured = false; + + return 0; +} + +static const char * const kgsl_iommu_clocks[] = { + "gcc_gpu_memnoc_gfx", + "gcc_gpu_snoc_dvm_gfx", + "gpu_cc_ahb", + "gpu_cc_cx_gmu", + "gpu_cc_hlos1_vote_gpu_smmu", + "gpu_cc_hub_aon", + "gpu_cc_hub_cx_int", + "gcc_bimc_gpu_axi", + "gcc_gpu_ahb", + "gcc_gpu_axi_clk", +}; + +static const struct kgsl_mmu_ops kgsl_iommu_ops; + +static void kgsl_iommu_check_config(struct kgsl_mmu *mmu, + struct device_node *parent) +{ + struct device_node *node = of_find_node_by_name(parent, "gfx3d_user"); + struct device_node *phandle; + + if (!node) + return; + + phandle = of_parse_phandle(node, "iommus", 0); + + if (phandle) { + if (of_device_is_compatible(phandle, "qcom,qsmmu-v500")) + mmu->subtype = KGSL_IOMMU_SMMU_V500; + if (of_device_is_compatible(phandle, "qcom,adreno-smmu")) + set_bit(KGSL_MMU_IOPGTABLE, &mmu->features); + + of_node_put(phandle); + } + + of_node_put(node); +} + +int kgsl_iommu_probe(struct kgsl_device *device) +{ + u32 val[2]; + int ret, i; + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct platform_device *pdev; + struct kgsl_mmu *mmu = &device->mmu; + struct device_node *node; + struct kgsl_global_memdesc *md; + + node = of_find_compatible_node(NULL, NULL, "qcom,kgsl-smmu-v2"); + if (!node) + return -ENODEV; + + /* Create a kmem cache for the pagetable address objects */ + if (!addr_entry_cache) { + addr_entry_cache = KMEM_CACHE(kgsl_iommu_addr_entry, 0); + if (!addr_entry_cache) { + ret = -ENOMEM; + goto err; + } + } + + ret = of_property_read_u32_array(node, "reg", val, 2); + if (ret) { + dev_err(device->dev, + "%pOF: Unable to read KGSL IOMMU register range\n", + node); + goto err; + } + + iommu->regbase = devm_ioremap(&device->pdev->dev, val[0], val[1]); + if (!iommu->regbase) { + dev_err(&device->pdev->dev, "Couldn't map IOMMU registers\n"); + ret = -ENOMEM; + goto err; + } + + pdev = of_find_device_by_node(node); + iommu->pdev = pdev; + iommu->num_clks = 0; + + iommu->clks = devm_kcalloc(&pdev->dev, ARRAY_SIZE(kgsl_iommu_clocks), + sizeof(*iommu->clks), GFP_KERNEL); + if (!iommu->clks) { + platform_device_put(pdev); + ret = -ENOMEM; + goto err; + } + + for (i = 0; i < ARRAY_SIZE(kgsl_iommu_clocks); i++) { + struct clk *c; + + c = devm_clk_get(&device->pdev->dev, kgsl_iommu_clocks[i]); + if (IS_ERR(c)) + continue; + + iommu->clks[iommu->num_clks].id = kgsl_iommu_clocks[i]; + iommu->clks[iommu->num_clks++].clk = c; + } + + /* Get the CX regulator if it is available */ + iommu->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); + + set_bit(KGSL_MMU_PAGED, &mmu->features); + + mmu->type = KGSL_MMU_TYPE_IOMMU; + mmu->mmu_ops = &kgsl_iommu_ops; + + /* Fill out the rest of the devices in the node */ + of_platform_populate(node, NULL, NULL, &pdev->dev); + + /* Peek at the phandle to set up configuration */ + kgsl_iommu_check_config(mmu, node); + + /* Probe the default pagetable */ + ret = iommu_probe_user_context(device, node); + if (ret) { + of_platform_depopulate(&pdev->dev); + platform_device_put(pdev); + goto err; + } + + /* Probe the secure pagetable (this is optional) */ + iommu_probe_secure_context(device, node); + of_node_put(node); + + /* Map any globals that might have been created early */ + list_for_each_entry(md, &device->globals, node) { + + if (md->memdesc.flags & KGSL_MEMFLAGS_SECURE) { + if (IS_ERR_OR_NULL(mmu->securepagetable)) + continue; + + kgsl_iommu_secure_map(mmu->securepagetable, + &md->memdesc); + } else + kgsl_iommu_default_map(mmu->defaultpagetable, + &md->memdesc); + } + + /* QDSS is supported only when QCOM_KGSL_QDSS_STM is enabled */ + if (IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) + device->qdss_desc = kgsl_allocate_global_fixed(device, + "qcom,gpu-qdss-stm", "gpu-qdss"); + + device->qtimer_desc = kgsl_allocate_global_fixed(device, + "qcom,gpu-timer", "gpu-qtimer"); + + /* + * Only support VBOs on MMU500 hardware that supports the PRR + * marker register to ignore writes to the zero page + */ + if (mmu->subtype == KGSL_IOMMU_SMMU_V500) { + /* + * We need to allocate a page because we need a known physical + * address to program in the PRR register but the hardware + * should intercept accesses to the page before they go to DDR + * so this should be mostly just a placeholder + */ + kgsl_vbo_zero_page = alloc_page(GFP_KERNEL | __GFP_ZERO | + __GFP_NORETRY | __GFP_HIGHMEM); + if (kgsl_vbo_zero_page) + set_bit(KGSL_MMU_SUPPORT_VBO, &mmu->features); + } + + return 0; + +err: + kmem_cache_destroy(addr_entry_cache); + addr_entry_cache = NULL; + + of_node_put(node); + return ret; +} + +static const struct kgsl_mmu_ops kgsl_iommu_ops = { + .mmu_close = kgsl_iommu_close, + .mmu_start = kgsl_iommu_start, + .mmu_clear_fsr = kgsl_iommu_clear_fsr, + .mmu_get_current_ttbr0 = kgsl_iommu_get_current_ttbr0, + .mmu_enable_clk = kgsl_iommu_enable_clk, + .mmu_disable_clk = kgsl_iommu_disable_clk, + .mmu_set_pf_policy = kgsl_iommu_set_pf_policy, + .mmu_pagefault_resume = kgsl_iommu_pagefault_resume, + .mmu_getpagetable = kgsl_iommu_getpagetable, + .mmu_map_global = kgsl_iommu_map_global, +}; + +static const struct kgsl_mmu_pt_ops iopgtbl_pt_ops = { + .mmu_map = kgsl_iopgtbl_map, + .mmu_map_child = kgsl_iopgtbl_map_child, + .mmu_map_zero_page_to_range = kgsl_iopgtbl_map_zero_page_to_range, + .mmu_unmap = kgsl_iopgtbl_unmap, + .mmu_unmap_range = kgsl_iopgtbl_unmap_range, + .mmu_destroy_pagetable = kgsl_iommu_destroy_pagetable, + .get_ttbr0 = kgsl_iommu_get_ttbr0, + .get_context_bank = kgsl_iommu_get_context_bank, + .get_gpuaddr = kgsl_iommu_get_gpuaddr, + .put_gpuaddr = kgsl_iommu_put_gpuaddr, + .set_svm_region = kgsl_iommu_set_svm_region, + .find_svm_region = kgsl_iommu_find_svm_region, + .svm_range = kgsl_iommu_svm_range, + .addr_in_range = kgsl_iommu_addr_in_range, +}; + +static const struct kgsl_mmu_pt_ops secure_pt_ops = { + .mmu_map = kgsl_iommu_secure_map, + .mmu_unmap = kgsl_iommu_secure_unmap, + .mmu_destroy_pagetable = kgsl_iommu_destroy_pagetable, + .get_context_bank = kgsl_iommu_get_context_bank, + .get_gpuaddr = kgsl_iommu_get_gpuaddr, + .put_gpuaddr = kgsl_iommu_put_gpuaddr, + .addr_in_range = kgsl_iommu_addr_in_range, +}; + +static const struct kgsl_mmu_pt_ops default_pt_ops = { + .mmu_map = kgsl_iommu_default_map, + .mmu_unmap = kgsl_iommu_default_unmap, + .mmu_destroy_pagetable = kgsl_iommu_destroy_default_pagetable, + .get_ttbr0 = kgsl_iommu_get_ttbr0, + .get_context_bank = kgsl_iommu_get_context_bank, + .get_gpuaddr = kgsl_iommu_get_gpuaddr, + .put_gpuaddr = kgsl_iommu_put_gpuaddr, + .addr_in_range = kgsl_iommu_addr_in_range, +}; diff --git a/kgsl_iommu.h b/kgsl_iommu.h new file mode 100644 index 0000000000..4632992831 --- /dev/null +++ b/kgsl_iommu.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_IOMMU_H +#define __KGSL_IOMMU_H + +#include +#include +/* + * These defines control the address range for allocations that + * are mapped into all pagetables. + */ +#define KGSL_IOMMU_GLOBAL_MEM_SIZE (20 * SZ_1M) +#define KGSL_IOMMU_GLOBAL_MEM_BASE32 0xf8000000 +#define KGSL_IOMMU_GLOBAL_MEM_BASE64 0xfc000000 + +/* + * This is a dummy token address that we use to identify memstore when the user + * wants to map it. mmap() uses a unsigned long for the offset so we need a 32 + * bit value that works with all sized apps. We chose a value that was purposely + * unmapped so if you increase the global memory size make sure it doesn't + * conflict + */ + +#define KGSL_MEMSTORE_TOKEN_ADDRESS 0xfff00000 + +#define KGSL_IOMMU_GLOBAL_MEM_BASE(__mmu) \ + (test_bit(KGSL_MMU_64BIT, &(__mmu)->features) ? \ + KGSL_IOMMU_GLOBAL_MEM_BASE64 : KGSL_IOMMU_GLOBAL_MEM_BASE32) + +#define KGSL_IOMMU_SVM_BASE32 0x300000 +#define KGSL_IOMMU_SVM_END32 (0xC0000000 - SZ_16M) + +/* + * Limit secure size to 256MB for 32bit kernels. + */ +#define KGSL_IOMMU_SECURE_SIZE32 SZ_256M +#define KGSL_IOMMU_SECURE_END32(_mmu) KGSL_IOMMU_GLOBAL_MEM_BASE(_mmu) +#define KGSL_IOMMU_SECURE_BASE32(_mmu) \ + (KGSL_IOMMU_GLOBAL_MEM_BASE(_mmu) - KGSL_IOMMU_SECURE_SIZE32) + +/* + * Try to use maximum allowed secure size i.e 0xFFFFF000 + * for both 32bit and 64bit secure apps when using 64bit kernel. + */ +#define KGSL_IOMMU_SECURE_BASE64 0x0100000000ULL +#define KGSL_IOMMU_SECURE_END64 0x01FFFFF000ULL +#define KGSL_IOMMU_SECURE_SIZE64 \ + (KGSL_IOMMU_SECURE_END64 - KGSL_IOMMU_SECURE_BASE64) + +#define KGSL_IOMMU_SECURE_BASE(_mmu) (test_bit(KGSL_MMU_64BIT, \ + &(_mmu)->features) ? KGSL_IOMMU_SECURE_BASE64 : \ + KGSL_IOMMU_SECURE_BASE32(_mmu)) +#define KGSL_IOMMU_SECURE_END(_mmu) (test_bit(KGSL_MMU_64BIT, \ + &(_mmu)->features) ? KGSL_IOMMU_SECURE_END64 : \ + KGSL_IOMMU_SECURE_END32(_mmu)) +#define KGSL_IOMMU_SECURE_SIZE(_mmu) (test_bit(KGSL_MMU_64BIT, \ + &(_mmu)->features) ? KGSL_IOMMU_SECURE_SIZE64 : \ + KGSL_IOMMU_SECURE_SIZE32) + +/* The CPU supports 39 bit addresses */ +#define KGSL_IOMMU_SVM_BASE64 0x1000000000ULL +#define KGSL_IOMMU_SVM_END64 0x4000000000ULL +#define KGSL_IOMMU_VA_BASE64 0x4000000000ULL +#define KGSL_IOMMU_VA_END64 0x8000000000ULL + +#define CP_APERTURE_REG 0 +#define CP_SMMU_APERTURE_ID 0x1B + +/* Global SMMU register offsets */ +#define KGSL_IOMMU_PRR_CFG_LADDR 0x6008 +#define KGSL_IOMMU_PRR_CFG_UADDR 0x600c + +/* Register offsets */ +#define KGSL_IOMMU_CTX_SCTLR 0x0000 +#define KGSL_IOMMU_CTX_ACTLR 0x0004 +#define KGSL_IOMMU_CTX_TTBR0 0x0020 +#define KGSL_IOMMU_CTX_CONTEXTIDR 0x0034 +#define KGSL_IOMMU_CTX_FSR 0x0058 +#define KGSL_IOMMU_CTX_TLBIALL 0x0618 +#define KGSL_IOMMU_CTX_RESUME 0x0008 +#define KGSL_IOMMU_CTX_FSYNR0 0x0068 +#define KGSL_IOMMU_CTX_FSYNR1 0x006c +#define KGSL_IOMMU_CTX_TLBSYNC 0x07f0 +#define KGSL_IOMMU_CTX_TLBSTATUS 0x07f4 + +/* TLBSTATUS register fields */ +#define KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE BIT(0) + +/* SCTLR fields */ +#define KGSL_IOMMU_SCTLR_HUPCF_SHIFT 8 +#define KGSL_IOMMU_SCTLR_CFCFG_SHIFT 7 +#define KGSL_IOMMU_SCTLR_CFIE_SHIFT 6 + +#define KGSL_IOMMU_ACTLR_PRR_ENABLE BIT(5) + +/* FSR fields */ +#define KGSL_IOMMU_FSR_SS_SHIFT 30 + +/* offset at which a nop command is placed in setstate */ +#define KGSL_IOMMU_SETSTATE_NOP_OFFSET 1024 + +/* + * struct kgsl_iommu_context - Structure holding data about an iommu context + * bank + * @pdev: pointer to the iommu context's platform device + * @name: context name + * @id: The id of the context, used for deciding how it is used. + * @cb_num: The hardware context bank number, used for calculating register + * offsets. + * @kgsldev: The kgsl device that uses this context. + * @stalled_on_fault: Flag when set indicates that this iommu device is stalled + * on a page fault + */ +struct kgsl_iommu_context { + struct platform_device *pdev; + const char *name; + int cb_num; + struct kgsl_device *kgsldev; + bool stalled_on_fault; + /** ratelimit: Ratelimit state for the context */ + struct ratelimit_state ratelimit; + struct iommu_domain *domain; + struct adreno_smmu_priv adreno_smmu; +}; + +/* + * struct kgsl_iommu - Structure holding iommu data for kgsl driver + * @regbase: Virtual address of the IOMMU register base + * @regstart: Physical address of the iommu registers + * @regsize: Length of the iommu register region. + * @setstate: Scratch GPU memory for IOMMU operations + * @clk_enable_count: The ref count of clock enable calls + * @clks: Array of pointers to IOMMU clocks + * @smmu_info: smmu info used in a5xx preemption + */ +struct kgsl_iommu { + /** @user_context: Container for the user iommu context */ + struct kgsl_iommu_context user_context; + /** @secure_context: Container for the secure iommu context */ + struct kgsl_iommu_context secure_context; + /** @lpac_context: Container for the LPAC iommu context */ + struct kgsl_iommu_context lpac_context; + void __iomem *regbase; + struct kgsl_memdesc *setstate; + atomic_t clk_enable_count; + struct clk_bulk_data *clks; + int num_clks; + struct kgsl_memdesc *smmu_info; + /** @pdev: Pointer to the platform device for the IOMMU device */ + struct platform_device *pdev; + /** + * @ppt_active: Set when the first per process pagetable is created. + * This is used to warn when global buffers are created that might not + * be mapped in all contexts + */ + bool ppt_active; + /** @cb0_offset: Offset of context bank 0 from iommu register base */ + u32 cb0_offset; + /** @pagesize: Size of each context bank register space */ + u32 pagesize; + /** @cx_gdsc: CX GDSC handle in case the IOMMU needs it */ + struct regulator *cx_gdsc; +}; + +/* + * struct kgsl_iommu_pt - Iommu pagetable structure private to kgsl driver + * @domain: Pointer to the iommu domain that contains the iommu pagetable + * @ttbr0: register value to set when using this pagetable + */ +struct kgsl_iommu_pt { + struct kgsl_pagetable base; + u64 ttbr0; + + struct io_pgtable_ops *pgtbl_ops; + struct io_pgtable_cfg cfg; +}; + +#endif diff --git a/kgsl_mmu.c b/kgsl_mmu.c new file mode 100644 index 0000000000..c0cc54f202 --- /dev/null +++ b/kgsl_mmu.c @@ -0,0 +1,618 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ + +#include + +#include "kgsl_device.h" +#include "kgsl_mmu.h" +#include "kgsl_sharedmem.h" + +static void pagetable_remove_sysfs_objects(struct kgsl_pagetable *pagetable); + +static void _deferred_destroy(struct work_struct *ws) +{ + struct kgsl_pagetable *pagetable = container_of(ws, + struct kgsl_pagetable, destroy_ws); + + WARN_ON(!list_empty(&pagetable->list)); + + pagetable->pt_ops->mmu_destroy_pagetable(pagetable); +} + +static void kgsl_destroy_pagetable(struct kref *kref) +{ + struct kgsl_pagetable *pagetable = container_of(kref, + struct kgsl_pagetable, refcount); + + kgsl_mmu_detach_pagetable(pagetable); + + kgsl_schedule_work(&pagetable->destroy_ws); +} + +struct kgsl_pagetable * +kgsl_get_pagetable(unsigned long name) +{ + struct kgsl_pagetable *pt, *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) { + if (name == pt->name && kref_get_unless_zero(&pt->refcount)) { + ret = pt; + break; + } + } + + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); + return ret; +} + +static struct kgsl_pagetable * +_get_pt_from_kobj(struct kobject *kobj) +{ + unsigned int ptname; + + if (!kobj) + return NULL; + + if (kstrtou32(kobj->name, 0, &ptname)) + return NULL; + + return kgsl_get_pagetable(ptname); +} + +static ssize_t +sysfs_show_entries(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_pagetable *pt; + int ret = 0; + + pt = _get_pt_from_kobj(kobj); + + if (pt) { + unsigned int val = atomic_read(&pt->stats.entries); + + ret += scnprintf(buf, PAGE_SIZE, "%d\n", val); + } + + kref_put(&pt->refcount, kgsl_destroy_pagetable); + return ret; +} + +static ssize_t +sysfs_show_mapped(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_pagetable *pt; + int ret = 0; + + pt = _get_pt_from_kobj(kobj); + + if (pt) { + uint64_t val = atomic_long_read(&pt->stats.mapped); + + ret += scnprintf(buf, PAGE_SIZE, "%llu\n", val); + } + + kref_put(&pt->refcount, kgsl_destroy_pagetable); + return ret; +} + +static ssize_t +sysfs_show_max_mapped(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_pagetable *pt; + int ret = 0; + + pt = _get_pt_from_kobj(kobj); + + if (pt) { + uint64_t val = atomic_long_read(&pt->stats.max_mapped); + + ret += scnprintf(buf, PAGE_SIZE, "%llu\n", val); + } + + kref_put(&pt->refcount, kgsl_destroy_pagetable); + return ret; +} + +static struct kobj_attribute attr_entries = { + .attr = { .name = "entries", .mode = 0444 }, + .show = sysfs_show_entries, + .store = NULL, +}; + +static struct kobj_attribute attr_mapped = { + .attr = { .name = "mapped", .mode = 0444 }, + .show = sysfs_show_mapped, + .store = NULL, +}; + +static struct kobj_attribute attr_max_mapped = { + .attr = { .name = "max_mapped", .mode = 0444 }, + .show = sysfs_show_max_mapped, + .store = NULL, +}; + +static struct attribute *pagetable_attrs[] = { + &attr_entries.attr, + &attr_mapped.attr, + &attr_max_mapped.attr, + NULL, +}; + +static struct attribute_group pagetable_attr_group = { + .attrs = pagetable_attrs, +}; + +static void +pagetable_remove_sysfs_objects(struct kgsl_pagetable *pagetable) +{ + if (pagetable->kobj) + sysfs_remove_group(pagetable->kobj, + &pagetable_attr_group); + + kobject_put(pagetable->kobj); + pagetable->kobj = NULL; +} + +static int +pagetable_add_sysfs_objects(struct kgsl_pagetable *pagetable) +{ + char ptname[16]; + int ret = -ENOMEM; + + snprintf(ptname, sizeof(ptname), "%d", pagetable->name); + pagetable->kobj = kobject_create_and_add(ptname, + kgsl_driver.ptkobj); + if (pagetable->kobj == NULL) + goto err; + + ret = sysfs_create_group(pagetable->kobj, &pagetable_attr_group); + +err: + if (ret) { + if (pagetable->kobj) + kobject_put(pagetable->kobj); + + pagetable->kobj = NULL; + } + + return ret; +} + +#ifdef CONFIG_TRACE_GPU_MEM +static void kgsl_mmu_trace_gpu_mem_pagetable(struct kgsl_pagetable *pagetable) +{ + if (pagetable->name == KGSL_MMU_GLOBAL_PT || + pagetable->name == KGSL_MMU_SECURE_PT) + return; + + trace_gpu_mem_total(0, pagetable->name, + (u64)atomic_long_read(&pagetable->stats.mapped)); +} +#else +static void kgsl_mmu_trace_gpu_mem_pagetable(struct kgsl_pagetable *pagetable) +{ +} +#endif + +void +kgsl_mmu_detach_pagetable(struct kgsl_pagetable *pagetable) +{ + unsigned long flags; + + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + + if (!list_empty(&pagetable->list)) + list_del_init(&pagetable->list); + + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); + + pagetable_remove_sysfs_objects(pagetable); +} + +unsigned int +kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu, u64 pt_base, + uint64_t addr) +{ + struct kgsl_pagetable *pt; + unsigned int ret = 0; + + spin_lock(&kgsl_driver.ptlock); + list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) { + if (kgsl_mmu_pagetable_get_ttbr0(pt) == pt_base) { + if ((addr & ~(PAGE_SIZE-1)) == pt->fault_addr) { + ret = 1; + break; + } + pt->fault_addr = (addr & ~(PAGE_SIZE-1)); + ret = 0; + break; + } + } + spin_unlock(&kgsl_driver.ptlock); + + return ret; +} + +int kgsl_mmu_start(struct kgsl_device *device) +{ + struct kgsl_mmu *mmu = &device->mmu; + + if (MMU_OP_VALID(mmu, mmu_start)) + return mmu->mmu_ops->mmu_start(mmu); + + return 0; +} + +void kgsl_mmu_pagetable_init(struct kgsl_mmu *mmu, + struct kgsl_pagetable *pagetable, u32 name) +{ + kref_init(&pagetable->refcount); + + spin_lock_init(&pagetable->lock); + INIT_WORK(&pagetable->destroy_ws, _deferred_destroy); + + pagetable->mmu = mmu; + pagetable->name = name; + + atomic_set(&pagetable->stats.entries, 0); + atomic_long_set(&pagetable->stats.mapped, 0); + atomic_long_set(&pagetable->stats.max_mapped, 0); +} + +void kgsl_mmu_pagetable_add(struct kgsl_mmu *mmu, struct kgsl_pagetable *pagetable) +{ + unsigned long flags; + + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + list_add(&pagetable->list, &kgsl_driver.pagetable_list); + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); + + /* Create the sysfs entries */ + pagetable_add_sysfs_objects(pagetable); +} + +void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable) +{ + if (!IS_ERR_OR_NULL(pagetable)) + kref_put(&pagetable->refcount, kgsl_destroy_pagetable); +} + +/** + * kgsl_mmu_find_svm_region() - Find a empty spot in the SVM region + * @pagetable: KGSL pagetable to search + * @start: start of search range, must be within kgsl_mmu_svm_range() + * @end: end of search range, must be within kgsl_mmu_svm_range() + * @size: Size of the region to find + * @align: Desired alignment of the address + */ +uint64_t kgsl_mmu_find_svm_region(struct kgsl_pagetable *pagetable, + uint64_t start, uint64_t end, uint64_t size, + uint64_t align) +{ + if (PT_OP_VALID(pagetable, find_svm_region)) + return pagetable->pt_ops->find_svm_region(pagetable, start, + end, size, align); + return -ENOMEM; +} + +/** + * kgsl_mmu_set_svm_region() - Check if a region is empty and reserve it if so + * @pagetable: KGSL pagetable to search + * @gpuaddr: GPU address to check/reserve + * @size: Size of the region to check/reserve + */ +int kgsl_mmu_set_svm_region(struct kgsl_pagetable *pagetable, uint64_t gpuaddr, + uint64_t size) +{ + if (PT_OP_VALID(pagetable, set_svm_region)) + return pagetable->pt_ops->set_svm_region(pagetable, gpuaddr, + size); + return -ENOMEM; +} + +int +kgsl_mmu_map(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + int size; + struct kgsl_device *device = KGSL_MMU_DEVICE(pagetable->mmu); + + if (!memdesc->gpuaddr) + return -EINVAL; + /* Only global mappings should be mapped multiple times */ + if (!kgsl_memdesc_is_global(memdesc) && + (KGSL_MEMDESC_MAPPED & memdesc->priv)) + return -EINVAL; + + if (memdesc->flags & KGSL_MEMFLAGS_VBO) + return -EINVAL; + + size = kgsl_memdesc_footprint(memdesc); + + if (PT_OP_VALID(pagetable, mmu_map)) { + int ret; + + ret = pagetable->pt_ops->mmu_map(pagetable, memdesc); + if (ret) + return ret; + + atomic_inc(&pagetable->stats.entries); + KGSL_STATS_ADD(size, &pagetable->stats.mapped, + &pagetable->stats.max_mapped); + kgsl_mmu_trace_gpu_mem_pagetable(pagetable); + + if (!kgsl_memdesc_is_global(memdesc) + && !(memdesc->flags & KGSL_MEMFLAGS_USERMEM_ION)) { + kgsl_trace_gpu_mem_total(device, size); + } + + memdesc->priv |= KGSL_MEMDESC_MAPPED; + } + + return 0; +} + +int kgsl_mmu_map_child(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 offset, + struct kgsl_memdesc *child, u64 child_offset, + u64 length) +{ + /* This only makes sense for virtual buffer objects */ + if (!(memdesc->flags & KGSL_MEMFLAGS_VBO)) + return -EINVAL; + + if (!memdesc->gpuaddr) + return -EINVAL; + + if (PT_OP_VALID(pt, mmu_map_child)) { + int ret; + + ret = pt->pt_ops->mmu_map_child(pt, memdesc, + offset, child, child_offset, length); + if (ret) + return ret; + + KGSL_STATS_ADD(length, &pt->stats.mapped, + &pt->stats.max_mapped); + } + + return 0; +} + +int kgsl_mmu_map_zero_page_to_range(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 start, u64 length) +{ + int ret = -EINVAL; + + /* This only makes sense for virtual buffer objects */ + if (!(memdesc->flags & KGSL_MEMFLAGS_VBO)) + return -EINVAL; + + if (!memdesc->gpuaddr) + return -EINVAL; + + if (PT_OP_VALID(pt, mmu_map_zero_page_to_range)) { + ret = pt->pt_ops->mmu_map_zero_page_to_range(pt, + memdesc, start, length); + if (ret) + return ret; + + KGSL_STATS_ADD(length, &pt->stats.mapped, + &pt->stats.max_mapped); + } + + return 0; +} + +/** + * kgsl_mmu_svm_range() - Return the range for SVM (if applicable) + * @pagetable: Pagetable to query the range from + * @lo: Pointer to store the start of the SVM range + * @hi: Pointer to store the end of the SVM range + * @memflags: Flags from the buffer we are mapping + */ +int kgsl_mmu_svm_range(struct kgsl_pagetable *pagetable, + uint64_t *lo, uint64_t *hi, uint64_t memflags) +{ + if (PT_OP_VALID(pagetable, svm_range)) + return pagetable->pt_ops->svm_range(pagetable, lo, hi, + memflags); + + return -ENODEV; +} + +int +kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + int ret = 0; + struct kgsl_device *device = KGSL_MMU_DEVICE(pagetable->mmu); + + if (memdesc->size == 0) + return -EINVAL; + + if ((memdesc->flags & KGSL_MEMFLAGS_VBO)) + return -EINVAL; + + /* Only global mappings should be mapped multiple times */ + if (!(KGSL_MEMDESC_MAPPED & memdesc->priv)) + return -EINVAL; + + if (PT_OP_VALID(pagetable, mmu_unmap)) { + uint64_t size; + + size = kgsl_memdesc_footprint(memdesc); + + ret = pagetable->pt_ops->mmu_unmap(pagetable, memdesc); + + atomic_dec(&pagetable->stats.entries); + atomic_long_sub(size, &pagetable->stats.mapped); + kgsl_mmu_trace_gpu_mem_pagetable(pagetable); + + if (!kgsl_memdesc_is_global(memdesc)) { + memdesc->priv &= ~KGSL_MEMDESC_MAPPED; + if (!(memdesc->flags & KGSL_MEMFLAGS_USERMEM_ION)) + kgsl_trace_gpu_mem_total(device, -(size)); + } + } + + return ret; +} + +int +kgsl_mmu_unmap_range(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc, u64 offset, u64 length) +{ + int ret = 0; + + /* Only allow virtual buffer objects to use this function */ + if (!(memdesc->flags & KGSL_MEMFLAGS_VBO)) + return -EINVAL; + + if (PT_OP_VALID(pagetable, mmu_unmap_range)) { + ret = pagetable->pt_ops->mmu_unmap_range(pagetable, memdesc, + offset, length); + + atomic_long_sub(length, &pagetable->stats.mapped); + } + + return ret; +} + +void kgsl_mmu_map_global(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u32 padding) +{ + struct kgsl_mmu *mmu = &(device->mmu); + + if (MMU_OP_VALID(mmu, mmu_map_global)) + mmu->mmu_ops->mmu_map_global(mmu, memdesc, padding); +} + +void kgsl_mmu_close(struct kgsl_device *device) +{ + struct kgsl_mmu *mmu = &(device->mmu); + + if (MMU_OP_VALID(mmu, mmu_close)) + mmu->mmu_ops->mmu_close(mmu); +} + +int kgsl_mmu_pagetable_get_context_bank(struct kgsl_pagetable *pagetable) +{ + if (PT_OP_VALID(pagetable, get_context_bank)) + return pagetable->pt_ops->get_context_bank(pagetable); + + return -ENOENT; +} + +enum kgsl_mmutype kgsl_mmu_get_mmutype(struct kgsl_device *device) +{ + return device ? device->mmu.type : KGSL_MMU_TYPE_NONE; +} + +bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr) +{ + if (PT_OP_VALID(pagetable, addr_in_range)) + return pagetable->pt_ops->addr_in_range(pagetable, gpuaddr); + + return false; +} + +/* + * NOMMU definitions - NOMMU really just means that the MMU is kept in pass + * through and the GPU directly accesses physical memory. Used in debug mode + * and when a real MMU isn't up and running yet. + */ + +static bool nommu_gpuaddr_in_range(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr) +{ + return (gpuaddr != 0) ? true : false; +} + +static int nommu_get_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + if (WARN_ONCE(memdesc->sgt->nents > 1, + "Attempt to map non-contiguous memory with NOMMU\n")) + return -EINVAL; + + memdesc->gpuaddr = (uint64_t) sg_phys(memdesc->sgt->sgl); + + if (memdesc->gpuaddr) { + memdesc->pagetable = pagetable; + return 0; + } + + return -ENOMEM; +} + +static void nommu_destroy_pagetable(struct kgsl_pagetable *pt) +{ + kfree(pt); +} + +static const struct kgsl_mmu_pt_ops nommu_pt_ops = { + .get_gpuaddr = nommu_get_gpuaddr, + .addr_in_range = nommu_gpuaddr_in_range, + .mmu_destroy_pagetable = nommu_destroy_pagetable, +}; + +static struct kgsl_pagetable *nommu_getpagetable(struct kgsl_mmu *mmu, + unsigned long name) +{ + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); + struct kgsl_pagetable *pagetable; + struct kgsl_global_memdesc *md; + + pagetable = kgsl_get_pagetable(KGSL_MMU_GLOBAL_PT); + + if (pagetable == NULL) { + pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL); + if (!pagetable) + return ERR_PTR(-ENOMEM); + + kgsl_mmu_pagetable_init(mmu, pagetable, KGSL_MMU_GLOBAL_PT); + pagetable->pt_ops = &nommu_pt_ops; + + list_for_each_entry(md, &device->globals, node) + md->memdesc.gpuaddr = + (uint64_t) sg_phys(md->memdesc.sgt->sgl); + + kgsl_mmu_pagetable_add(mmu, pagetable); + } + + return pagetable; +} + +static struct kgsl_mmu_ops kgsl_nommu_ops = { + .mmu_getpagetable = nommu_getpagetable, +}; + +int kgsl_mmu_probe(struct kgsl_device *device) +{ + struct kgsl_mmu *mmu = &device->mmu; + int ret; + + /* + * Try to probe for the IOMMU and if it doesn't exist for some reason + * go for the NOMMU option instead + */ + ret = kgsl_iommu_probe(device); + if (!ret || ret == -EPROBE_DEFER) + return ret; + + mmu->mmu_ops = &kgsl_nommu_ops; + mmu->type = KGSL_MMU_TYPE_NONE; + return 0; +} diff --git a/kgsl_mmu.h b/kgsl_mmu.h new file mode 100644 index 0000000000..0852ca7097 --- /dev/null +++ b/kgsl_mmu.h @@ -0,0 +1,393 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_MMU_H +#define __KGSL_MMU_H + +#include + +/* Identifier for the global page table */ +/* + * Per process page tables will probably pass in the thread group + * as an identifier + */ +#define KGSL_MMU_GLOBAL_PT 0 +#define KGSL_MMU_SECURE_PT 1 + +#define MMU_DEFAULT_TTBR0(_d) \ + (kgsl_mmu_pagetable_get_ttbr0((_d)->mmu.defaultpagetable)) + +#define KGSL_MMU_DEVICE(_mmu) \ + container_of((_mmu), struct kgsl_device, mmu) + +/** + * enum kgsl_ft_pagefault_policy_bits - KGSL pagefault policy bits + * @KGSL_FT_PAGEFAULT_INT_ENABLE: No longer used, but retained for compatibility + * @KGSL_FT_PAGEFAULT_GPUHALT_ENABLE: enable GPU halt on pagefaults + * @KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE: log one pagefault per page + * @KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT: log one pagefault per interrupt + */ +enum { + KGSL_FT_PAGEFAULT_INT_ENABLE = 0, + KGSL_FT_PAGEFAULT_GPUHALT_ENABLE = 1, + KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE = 2, + KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT = 3, + /* KGSL_FT_PAGEFAULT_MAX_BITS is used to calculate the mask */ + KGSL_FT_PAGEFAULT_MAX_BITS, +}; + +#define KGSL_FT_PAGEFAULT_MASK GENMASK(KGSL_FT_PAGEFAULT_MAX_BITS - 1, 0) + +#define KGSL_FT_PAGEFAULT_DEFAULT_POLICY 0 + +struct kgsl_device; + +enum kgsl_mmutype { + KGSL_MMU_TYPE_IOMMU = 0, + KGSL_MMU_TYPE_NONE +}; + +#define KGSL_IOMMU_SMMU_V500 1 + +struct kgsl_pagetable { + spinlock_t lock; + struct kref refcount; + struct list_head list; + unsigned int name; + struct kobject *kobj; + struct work_struct destroy_ws; + + struct { + atomic_t entries; + atomic_long_t mapped; + atomic_long_t max_mapped; + } stats; + const struct kgsl_mmu_pt_ops *pt_ops; + uint64_t fault_addr; + struct kgsl_mmu *mmu; + /** @rbtree: all buffers mapped into the pagetable, indexed by gpuaddr */ + struct rb_root rbtree; + /** @va_start: Start of virtual range used in this pagetable */ + u64 va_start; + /** @va_end: End of virtual range */ + u64 va_end; + /** + * @svm_start: Start of shared virtual memory range. Addresses in this + * range are also valid in the process's CPU address space. + */ + u64 svm_start; + /** @svm_end: end of 32 bit compatible range */ + u64 svm_end; + /** + * @compat_va_start - Start of the "compat" virtual address range for + * forced 32 bit allocations + */ + u64 compat_va_start; + /** + * @compat_va_end - End of the "compat" virtual address range for + * forced 32 bit allocations + */ + u64 compat_va_end; + u64 global_base; +}; + +struct kgsl_mmu; + +struct kgsl_mmu_ops { + void (*mmu_close)(struct kgsl_mmu *mmu); + int (*mmu_start)(struct kgsl_mmu *mmu); + uint64_t (*mmu_get_current_ttbr0)(struct kgsl_mmu *mmu); + void (*mmu_pagefault_resume)(struct kgsl_mmu *mmu, bool terminate); + void (*mmu_clear_fsr)(struct kgsl_mmu *mmu); + void (*mmu_enable_clk)(struct kgsl_mmu *mmu); + void (*mmu_disable_clk)(struct kgsl_mmu *mmu); + int (*mmu_set_pf_policy)(struct kgsl_mmu *mmu, unsigned long pf_policy); + int (*mmu_init_pt)(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt); + struct kgsl_pagetable * (*mmu_getpagetable)(struct kgsl_mmu *mmu, + unsigned long name); + void (*mmu_map_global)(struct kgsl_mmu *mmu, + struct kgsl_memdesc *memdesc, u32 padding); +}; + +struct kgsl_mmu_pt_ops { + int (*mmu_map)(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc); + int (*mmu_map_child)(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 offset, + struct kgsl_memdesc *child, u64 child_offset, + u64 length); + int (*mmu_map_zero_page_to_range)(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 start, u64 length); + int (*mmu_unmap)(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc); + int (*mmu_unmap_range)(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 offset, u64 length); + void (*mmu_destroy_pagetable)(struct kgsl_pagetable *pt); + u64 (*get_ttbr0)(struct kgsl_pagetable *pt); + int (*get_context_bank)(struct kgsl_pagetable *pt); + int (*get_gpuaddr)(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc); + void (*put_gpuaddr)(struct kgsl_memdesc *memdesc); + uint64_t (*find_svm_region)(struct kgsl_pagetable *pt, uint64_t start, + uint64_t end, uint64_t size, uint64_t align); + int (*set_svm_region)(struct kgsl_pagetable *pt, + uint64_t gpuaddr, uint64_t size); + int (*svm_range)(struct kgsl_pagetable *pt, uint64_t *lo, uint64_t *hi, + uint64_t memflags); + bool (*addr_in_range)(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr); +}; + +enum kgsl_mmu_feature { + /* @KGSL_MMU_64BIT: Use 64 bit virtual address space */ + KGSL_MMU_64BIT, + /* @KGSL_MMU_PAGED: Support paged memory */ + KGSL_MMU_PAGED, + /* + * @KGSL_MMU_NEED_GUARD_PAGE: Set if a guard page is needed for each + * mapped region + */ + KGSL_MMU_NEED_GUARD_PAGE, + /** @KGSL_MMU_IO_COHERENT: Set if a device supports I/O coherency */ + KGSL_MMU_IO_COHERENT, + /** @KGSL_MMU_LLC_ENABLE: Set if LLC is activated for the target */ + KGSL_MMU_LLCC_ENABLE, + /** @KGSL_MMU_SMMU_APERTURE: Set the SMMU aperture */ + KGSL_MMU_SMMU_APERTURE, + /** + * @KGSL_MMU_IOPGTABLE: Set if the qcom,adreno-smmu implementation is + * available. Implies split address space and per-process pagetables + */ + KGSL_MMU_IOPGTABLE, + /** @KGSL_MMU_SUPPORT_VBO: Non-secure VBOs are supported */ + KGSL_MMU_SUPPORT_VBO, +}; + +#include "kgsl_iommu.h" + +/** + * struct kgsl_mmu - Master definition for KGSL MMU devices + * @flags: MMU device flags + * @type: Type of MMU that is attached + * @subtype: Sub Type of MMU that is attached + * @defaultpagetable: Default pagetable object for the MMU + * @securepagetable: Default secure pagetable object for the MMU + * @mmu_ops: Function pointers for the MMU sub-type + * @secured: True if the MMU needs to be secured + * @feature: Static list of MMU features + */ +struct kgsl_mmu { + unsigned long flags; + enum kgsl_mmutype type; + u32 subtype; + struct kgsl_pagetable *defaultpagetable; + struct kgsl_pagetable *securepagetable; + const struct kgsl_mmu_ops *mmu_ops; + bool secured; + unsigned long features; + /** @pfpolicy: The current pagefault policy for the device */ + unsigned long pfpolicy; + /** mmu: Pointer to the IOMMU sub-device */ + struct kgsl_iommu iommu; +}; + +#define KGSL_IOMMU(d) (&((d)->mmu.iommu)) + +int kgsl_mmu_probe(struct kgsl_device *device); +int kgsl_mmu_start(struct kgsl_device *device); + +void kgsl_print_global_pt_entries(struct seq_file *s); +void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable); + +int kgsl_mmu_map(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc); +int kgsl_mmu_map_child(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 offset, + struct kgsl_memdesc *child, u64 child_offset, + u64 length); +int kgsl_mmu_map_zero_page_to_range(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 start, u64 length); +int kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc); +int kgsl_mmu_unmap_range(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 offset, u64 length); +unsigned int kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu, + u64 ttbr0, uint64_t addr); +bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, uint64_t gpuaddr); + +int kgsl_mmu_get_region(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr, uint64_t size); + +int kgsl_mmu_find_region(struct kgsl_pagetable *pagetable, + uint64_t region_start, uint64_t region_end, + uint64_t *gpuaddr, uint64_t size, unsigned int align); + +void kgsl_mmu_close(struct kgsl_device *device); + +uint64_t kgsl_mmu_find_svm_region(struct kgsl_pagetable *pagetable, + uint64_t start, uint64_t end, uint64_t size, + uint64_t alignment); + +int kgsl_mmu_set_svm_region(struct kgsl_pagetable *pagetable, uint64_t gpuaddr, + uint64_t size); + +void kgsl_mmu_detach_pagetable(struct kgsl_pagetable *pagetable); + +int kgsl_mmu_svm_range(struct kgsl_pagetable *pagetable, + uint64_t *lo, uint64_t *hi, uint64_t memflags); + +struct kgsl_pagetable *kgsl_get_pagetable(unsigned long name); + +/* + * Static inline functions of MMU that simply call the SMMU specific + * function using a function pointer. These functions can be thought + * of as wrappers around the actual function + */ + +#define MMU_OP_VALID(_mmu, _field) \ + (((_mmu) != NULL) && \ + ((_mmu)->mmu_ops != NULL) && \ + ((_mmu)->mmu_ops->_field != NULL)) + +#define PT_OP_VALID(_pt, _field) \ + (((_pt) != NULL) && \ + ((_pt)->pt_ops != NULL) && \ + ((_pt)->pt_ops->_field != NULL)) + +/** + * kgsl_mmu_get_gpuaddr - Assign a GPU address to the memdesc + * @pagetable: GPU pagetable to assign the address in + * @memdesc: mem descriptor to assign the memory to + * + * Return: 0 on success or negative on failure + */ +static inline int kgsl_mmu_get_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + if (PT_OP_VALID(pagetable, get_gpuaddr)) + return pagetable->pt_ops->get_gpuaddr(pagetable, memdesc); + + return -ENOMEM; +} + +/** + * kgsl_mmu_put_gpuaddr - Remove a GPU address from a pagetable + * @pagetable: Pagetable to release the memory from + * @memdesc: Memory descriptor containing the GPU address to free + * + * Release a GPU address in the MMU virtual address space. + */ +static inline void kgsl_mmu_put_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + if (PT_OP_VALID(pagetable, put_gpuaddr)) + pagetable->pt_ops->put_gpuaddr(memdesc); +} + +static inline u64 kgsl_mmu_get_current_ttbr0(struct kgsl_mmu *mmu) +{ + if (MMU_OP_VALID(mmu, mmu_get_current_ttbr0)) + return mmu->mmu_ops->mmu_get_current_ttbr0(mmu); + + return 0; +} + +static inline struct kgsl_pagetable *kgsl_mmu_getpagetable(struct kgsl_mmu *mmu, + unsigned long name) +{ + if (MMU_OP_VALID(mmu, mmu_getpagetable)) + return mmu->mmu_ops->mmu_getpagetable(mmu, name); + + return NULL; +} + +static inline void kgsl_mmu_enable_clk(struct kgsl_mmu *mmu) +{ + if (MMU_OP_VALID(mmu, mmu_enable_clk)) + mmu->mmu_ops->mmu_enable_clk(mmu); +} + +static inline void kgsl_mmu_disable_clk(struct kgsl_mmu *mmu) +{ + if (MMU_OP_VALID(mmu, mmu_disable_clk)) + mmu->mmu_ops->mmu_disable_clk(mmu); +} + +static inline int kgsl_mmu_set_pagefault_policy(struct kgsl_mmu *mmu, + unsigned long pf_policy) +{ + if (MMU_OP_VALID(mmu, mmu_set_pf_policy)) + return mmu->mmu_ops->mmu_set_pf_policy(mmu, pf_policy); + + return 0; +} + +static inline void kgsl_mmu_pagefault_resume(struct kgsl_mmu *mmu, + bool terminate) +{ + if (MMU_OP_VALID(mmu, mmu_pagefault_resume)) + return mmu->mmu_ops->mmu_pagefault_resume(mmu, terminate); +} + +static inline void kgsl_mmu_clear_fsr(struct kgsl_mmu *mmu) +{ + if (MMU_OP_VALID(mmu, mmu_clear_fsr)) + return mmu->mmu_ops->mmu_clear_fsr(mmu); +} + +static inline bool kgsl_mmu_is_perprocess(struct kgsl_mmu *mmu) +{ + return test_bit(KGSL_MMU_IOPGTABLE, &mmu->features); +} + +static inline bool kgsl_mmu_is_secured(struct kgsl_mmu *mmu) +{ + return mmu && (mmu->secured) && (!IS_ERR_OR_NULL(mmu->securepagetable)); +} + +static inline u64 +kgsl_mmu_pagetable_get_ttbr0(struct kgsl_pagetable *pagetable) +{ + if (PT_OP_VALID(pagetable, get_ttbr0)) + return pagetable->pt_ops->get_ttbr0(pagetable); + + return 0; +} + +/** + * kgsl_mmu_map_global - Map a memdesc as a global buffer + * @device: A KGSL GPU device handle + * @memdesc: Pointer to a GPU memory descriptor + * @padding: Any padding to add to the end of the VA allotment (in bytes) + * + * Map a buffer as globally accessible in all pagetable contexts + */ +void kgsl_mmu_map_global(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u32 padding); + +/** + * kgsl_mmu_pagetable_get_context_bank - Return the context bank number + * @pagetable: A handle to a given pagetable + * + * This function will find the context number of the given pagetable + + * Return: The context bank number the pagetable is attached to or + * negative error on failure. + */ +int kgsl_mmu_pagetable_get_context_bank(struct kgsl_pagetable *pagetable); + +void kgsl_mmu_pagetable_init(struct kgsl_mmu *mmu, + struct kgsl_pagetable *pagetable, u32 name); + +void kgsl_mmu_pagetable_add(struct kgsl_mmu *mmu, struct kgsl_pagetable *pagetable); + +#if IS_ENABLED(CONFIG_ARM_SMMU) +int kgsl_iommu_probe(struct kgsl_device *device); +#else +static inline int kgsl_iommu_probe(struct kgsl_device *device) +{ + return -ENODEV; +} +#endif +#endif /* __KGSL_MMU_H */ diff --git a/kgsl_pool.c b/kgsl_pool.c new file mode 100644 index 0000000000..18f6a8e28d --- /dev/null +++ b/kgsl_pool.c @@ -0,0 +1,641 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include "kgsl_device.h" +#include "kgsl_pool.h" +#include "kgsl_sharedmem.h" +#include "kgsl_trace.h" + +#ifdef CONFIG_QCOM_KGSL_SORT_POOL + +struct kgsl_pool_page_entry { + phys_addr_t physaddr; + struct page *page; + struct rb_node node; +}; + +static struct kmem_cache *addr_page_cache; + +/** + * struct kgsl_page_pool - Structure to hold information for the pool + * @pool_order: Page order describing the size of the page + * @page_count: Number of pages currently present in the pool + * @reserved_pages: Number of pages reserved at init for the pool + * @list_lock: Spinlock for page list in the pool + * @pool_rbtree: RB tree with all pages held/reserved in this pool + */ +struct kgsl_page_pool { + unsigned int pool_order; + unsigned int page_count; + unsigned int reserved_pages; + spinlock_t list_lock; + struct rb_root pool_rbtree; +}; + +static int +__kgsl_pool_add_page(struct kgsl_page_pool *pool, struct page *p) +{ + struct rb_node **node, *parent; + struct kgsl_pool_page_entry *new_page, *entry; + + new_page = kmem_cache_alloc(addr_page_cache, GFP_KERNEL); + if (new_page == NULL) + return -ENOMEM; + + spin_lock(&pool->list_lock); + node = &pool->pool_rbtree.rb_node; + new_page->physaddr = page_to_phys(p); + new_page->page = p; + + while (*node != NULL) { + parent = *node; + entry = rb_entry(parent, struct kgsl_pool_page_entry, node); + + if (new_page->physaddr < entry->physaddr) + node = &parent->rb_left; + else + node = &parent->rb_right; + } + + rb_link_node(&new_page->node, parent, node); + rb_insert_color(&new_page->node, &pool->pool_rbtree); + pool->page_count++; + spin_unlock(&pool->list_lock); + + return 0; +} + +static struct page * +__kgsl_pool_get_page(struct kgsl_page_pool *pool) +{ + struct rb_node *node; + struct kgsl_pool_page_entry *entry; + struct page *p; + + node = rb_first(&pool->pool_rbtree); + if (!node) + return NULL; + + entry = rb_entry(node, struct kgsl_pool_page_entry, node); + p = entry->page; + rb_erase(&entry->node, &pool->pool_rbtree); + kmem_cache_free(addr_page_cache, entry); + pool->page_count--; + return p; +} + +static void kgsl_pool_list_init(struct kgsl_page_pool *pool) +{ + pool->pool_rbtree = RB_ROOT; +} + +static void kgsl_pool_cache_init(void) +{ + addr_page_cache = KMEM_CACHE(kgsl_pool_page_entry, 0); +} +#else +/** + * struct kgsl_page_pool - Structure to hold information for the pool + * @pool_order: Page order describing the size of the page + * @page_count: Number of pages currently present in the pool + * @reserved_pages: Number of pages reserved at init for the pool + * @list_lock: Spinlock for page list in the pool + * @page_list: List of pages held/reserved in this pool + */ +struct kgsl_page_pool { + unsigned int pool_order; + unsigned int page_count; + unsigned int reserved_pages; + spinlock_t list_lock; + struct list_head page_list; +}; + +static int +__kgsl_pool_add_page(struct kgsl_page_pool *pool, struct page *p) +{ + spin_lock(&pool->list_lock); + list_add_tail(&p->lru, &pool->page_list); + pool->page_count++; + spin_unlock(&pool->list_lock); + + return 0; +} + +static struct page * +__kgsl_pool_get_page(struct kgsl_page_pool *pool) +{ + struct page *p; + + p = list_first_entry_or_null(&pool->page_list, struct page, lru); + if (p) { + pool->page_count--; + list_del(&p->lru); + } + + return p; +} + +static void kgsl_pool_list_init(struct kgsl_page_pool *pool) +{ + INIT_LIST_HEAD(&pool->page_list); +} + +static void kgsl_pool_cache_init(void) +{ +} +#endif + +static struct kgsl_page_pool kgsl_pools[6]; +static int kgsl_num_pools; +static int kgsl_pool_max_pages; + +/* Return the index of the pool for the specified order */ +static int kgsl_get_pool_index(int order) +{ + int i; + + for (i = 0; i < kgsl_num_pools; i++) { + if (kgsl_pools[i].pool_order == order) + return i; + } + + return -EINVAL; +} + +/* Returns KGSL pool corresponding to input page order*/ +static struct kgsl_page_pool * +_kgsl_get_pool_from_order(int order) +{ + int index = kgsl_get_pool_index(order); + + return index >= 0 ? &kgsl_pools[index] : NULL; +} + +/* Add a page to specified pool */ +static void +_kgsl_pool_add_page(struct kgsl_page_pool *pool, struct page *p) +{ + if (!p) + return; + + /* + * Sanity check to make sure we don't re-pool a page that + * somebody else has a reference to. + */ + if (WARN_ON(unlikely(page_count(p) > 1))) { + __free_pages(p, pool->pool_order); + return; + } + + if (__kgsl_pool_add_page(pool, p)) { + __free_pages(p, pool->pool_order); + trace_kgsl_pool_free_page(pool->pool_order); + return; + } + + trace_kgsl_pool_add_page(pool->pool_order, pool->page_count); + mod_node_page_state(page_pgdat(p), NR_KERNEL_MISC_RECLAIMABLE, + (1 << pool->pool_order)); +} + +/* Returns a page from specified pool */ +static struct page * +_kgsl_pool_get_page(struct kgsl_page_pool *pool) +{ + struct page *p = NULL; + + spin_lock(&pool->list_lock); + p = __kgsl_pool_get_page(pool); + spin_unlock(&pool->list_lock); + if (p != NULL) { + trace_kgsl_pool_get_page(pool->pool_order, pool->page_count); + mod_node_page_state(page_pgdat(p), NR_KERNEL_MISC_RECLAIMABLE, + -(1 << pool->pool_order)); + } + return p; +} + +/* Returns the number of pages in all kgsl page pools */ +static int kgsl_pool_size_total(void) +{ + int i; + int total = 0; + + for (i = 0; i < kgsl_num_pools; i++) { + struct kgsl_page_pool *kgsl_pool = &kgsl_pools[i]; + + spin_lock(&kgsl_pool->list_lock); + total += kgsl_pool->page_count * (1 << kgsl_pool->pool_order); + spin_unlock(&kgsl_pool->list_lock); + } + + return total; +} + +/* Returns the total number of pages in all pools excluding reserved pages */ +static unsigned long kgsl_pool_size_nonreserved(void) +{ + int i; + unsigned long total = 0; + + for (i = 0; i < kgsl_num_pools; i++) { + struct kgsl_page_pool *pool = &kgsl_pools[i]; + + spin_lock(&pool->list_lock); + if (pool->page_count > pool->reserved_pages) + total += (pool->page_count - pool->reserved_pages) * + (1 << pool->pool_order); + spin_unlock(&pool->list_lock); + } + + return total; +} + +/* + * Returns a page from specified pool only if pool + * currently holds more number of pages than reserved + * pages. + */ +static struct page * +_kgsl_pool_get_nonreserved_page(struct kgsl_page_pool *pool) +{ + struct page *p = NULL; + + spin_lock(&pool->list_lock); + if (pool->page_count <= pool->reserved_pages) { + spin_unlock(&pool->list_lock); + return NULL; + } + + p = __kgsl_pool_get_page(pool); + spin_unlock(&pool->list_lock); + if (p != NULL) { + trace_kgsl_pool_get_page(pool->pool_order, pool->page_count); + mod_node_page_state(page_pgdat(p), NR_KERNEL_MISC_RECLAIMABLE, + -(1 << pool->pool_order)); + } + return p; +} + +/* + * This will shrink the specified pool by num_pages or by + * (page_count - reserved_pages), whichever is smaller. + */ +static unsigned int +_kgsl_pool_shrink(struct kgsl_page_pool *pool, + unsigned int num_pages, bool exit) +{ + int j; + unsigned int pcount = 0; + struct page *(*get_page)(struct kgsl_page_pool *) = + _kgsl_pool_get_nonreserved_page; + + if (pool == NULL || num_pages == 0) + return pcount; + + num_pages = (num_pages + (1 << pool->pool_order) - 1) >> + pool->pool_order; + + /* This is to ensure that we free reserved pages */ + if (exit) + get_page = _kgsl_pool_get_page; + + for (j = 0; j < num_pages; j++) { + struct page *page = get_page(pool); + + if (!page) + break; + + __free_pages(page, pool->pool_order); + pcount += (1 << pool->pool_order); + trace_kgsl_pool_free_page(pool->pool_order); + } + + return pcount; +} + +/* + * This function removes number of pages specified by + * target_pages from the total pool size. + * + * Remove target_pages from the pool, starting from higher order pool. + */ +static unsigned long +kgsl_pool_reduce(int target_pages, bool exit) +{ + int i, ret; + unsigned long pcount = 0; + + for (i = (kgsl_num_pools - 1); i >= 0; i--) { + if (target_pages <= 0) + return pcount; + + /* Remove target_pages pages from this pool */ + ret = _kgsl_pool_shrink(&kgsl_pools[i], target_pages, exit); + target_pages -= ret; + pcount += ret; + } + + return pcount; +} + +void kgsl_pool_free_pages(struct page **pages, unsigned int pcount) +{ + int i; + + if (!pages) + return; + + for (i = 0; i < pcount;) { + /* + * Free each page or compound page group individually. + */ + struct page *p = pages[i]; + + i += 1 << compound_order(p); + kgsl_pool_free_page(p); + } +} + +static int kgsl_pool_get_retry_order(unsigned int order) +{ + int i; + + for (i = kgsl_num_pools-1; i > 0; i--) + if (order >= kgsl_pools[i].pool_order) + return kgsl_pools[i].pool_order; + + return 0; +} + +/* + * Return true if the pool of specified page size is supported + * or no pools are supported otherwise return false. + */ +static bool kgsl_pool_available(unsigned int page_size) +{ + int order = get_order(page_size); + + if (!kgsl_num_pools) + return true; + + return (kgsl_get_pool_index(order) >= 0); +} + +int kgsl_get_page_size(size_t size, unsigned int align) +{ + size_t pool; + + for (pool = SZ_1M; pool > PAGE_SIZE; pool >>= 1) + if ((align >= ilog2(pool)) && (size >= pool) && + kgsl_pool_available(pool)) + return pool; + + return PAGE_SIZE; +} + +int kgsl_pool_alloc_page(int *page_size, struct page **pages, + unsigned int pages_len, unsigned int *align, + struct device *dev) +{ + int j; + int pcount = 0; + struct kgsl_page_pool *pool; + struct page *page = NULL; + struct page *p = NULL; + int order = get_order(*page_size); + int pool_idx; + size_t size = 0; + + if ((pages == NULL) || pages_len < (*page_size >> PAGE_SHIFT)) + return -EINVAL; + + /* If the pool is not configured get pages from the system */ + if (!kgsl_num_pools) { + gfp_t gfp_mask = kgsl_gfp_mask(order); + + page = alloc_pages(gfp_mask, order); + if (page == NULL) { + /* Retry with lower order pages */ + if (order > 0) { + size = PAGE_SIZE << --order; + goto eagain; + + } else + return -ENOMEM; + } + trace_kgsl_pool_alloc_page_system(order); + goto done; + } + + pool = _kgsl_get_pool_from_order(order); + if (pool == NULL) { + /* Retry with lower order pages */ + if (order > 0) { + size = PAGE_SIZE << kgsl_pool_get_retry_order(order); + goto eagain; + } else { + /* + * Fall back to direct allocation in case + * pool with zero order is not present + */ + gfp_t gfp_mask = kgsl_gfp_mask(order); + + page = alloc_pages(gfp_mask, order); + if (page == NULL) + return -ENOMEM; + trace_kgsl_pool_alloc_page_system(order); + goto done; + } + } + + pool_idx = kgsl_get_pool_index(order); + page = _kgsl_pool_get_page(pool); + + /* Allocate a new page if not allocated from pool */ + if (page == NULL) { + gfp_t gfp_mask = kgsl_gfp_mask(order); + + page = alloc_pages(gfp_mask, order); + + if (!page) { + if (pool_idx > 0) { + /* Retry with lower order pages */ + size = PAGE_SIZE << + kgsl_pools[pool_idx-1].pool_order; + goto eagain; + } else + return -ENOMEM; + } + trace_kgsl_pool_alloc_page_system(order); + } + +done: + kgsl_zero_page(page, order, dev); + + for (j = 0; j < (*page_size >> PAGE_SHIFT); j++) { + p = nth_page(page, j); + pages[pcount] = p; + pcount++; + } + + return pcount; + +eagain: + trace_kgsl_pool_try_page_lower(get_order(*page_size)); + *page_size = kgsl_get_page_size(size, ilog2(size)); + *align = ilog2(*page_size); + return -EAGAIN; +} + +void kgsl_pool_free_page(struct page *page) +{ + struct kgsl_page_pool *pool; + int page_order; + + if (page == NULL) + return; + + page_order = compound_order(page); + + if (!kgsl_pool_max_pages || + (kgsl_pool_size_total() < kgsl_pool_max_pages)) { + pool = _kgsl_get_pool_from_order(page_order); + if (pool != NULL) { + _kgsl_pool_add_page(pool, page); + return; + } + } + + /* Give back to system as not added to pool */ + __free_pages(page, page_order); + trace_kgsl_pool_free_page(page_order); +} + +/* Functions for the shrinker */ + +static unsigned long +kgsl_pool_shrink_scan_objects(struct shrinker *shrinker, + struct shrink_control *sc) +{ + /* sc->nr_to_scan represents number of pages to be removed*/ + unsigned long pcount = kgsl_pool_reduce(sc->nr_to_scan, false); + + /* If pools are exhausted return SHRINK_STOP */ + return pcount ? pcount : SHRINK_STOP; +} + +static unsigned long +kgsl_pool_shrink_count_objects(struct shrinker *shrinker, + struct shrink_control *sc) +{ + /* + * Return non-reserved pool size as we don't + * want shrinker to free reserved pages. + */ + return kgsl_pool_size_nonreserved(); +} + +/* Shrinker callback data*/ +static struct shrinker kgsl_pool_shrinker = { + .count_objects = kgsl_pool_shrink_count_objects, + .scan_objects = kgsl_pool_shrink_scan_objects, + .seeks = DEFAULT_SEEKS, + .batch = 0, +}; + +static void kgsl_pool_reserve_pages(struct kgsl_page_pool *pool, + struct device_node *node) +{ + u32 reserved = 0; + int i; + + of_property_read_u32(node, "qcom,mempool-reserved", &reserved); + + /* Limit the total number of reserved pages to 4096 */ + pool->reserved_pages = min_t(u32, reserved, 4096); + + for (i = 0; i < pool->reserved_pages; i++) { + gfp_t gfp_mask = kgsl_gfp_mask(pool->pool_order); + struct page *page; + + page = alloc_pages(gfp_mask, pool->pool_order); + _kgsl_pool_add_page(pool, page); + } +} + +static int kgsl_of_parse_mempool(struct kgsl_page_pool *pool, + struct device_node *node) +{ + u32 size; + int order; + + if (of_property_read_u32(node, "qcom,mempool-page-size", &size)) + return -EINVAL; + + order = get_order(size); + + if (order > 8) { + pr_err("kgsl: %pOF: pool order %d is too big\n", node, order); + return -EINVAL; + } + + pool->pool_order = order; + + spin_lock_init(&pool->list_lock); + kgsl_pool_list_init(pool); + + kgsl_pool_reserve_pages(pool, node); + + return 0; +} + +void kgsl_probe_page_pools(void) +{ + struct device_node *node, *child; + int index = 0; + + node = of_find_compatible_node(NULL, NULL, "qcom,gpu-mempools"); + if (!node) + return; + + /* Get Max pages limit for mempool */ + of_property_read_u32(node, "qcom,mempool-max-pages", + &kgsl_pool_max_pages); + + kgsl_pool_cache_init(); + + for_each_child_of_node(node, child) { + if (!kgsl_of_parse_mempool(&kgsl_pools[index], child)) + index++; + + if (index == ARRAY_SIZE(kgsl_pools)) { + of_node_put(child); + break; + } + } + + kgsl_num_pools = index; + of_node_put(node); + + /* Initialize shrinker */ + register_shrinker(&kgsl_pool_shrinker); +} + +void kgsl_exit_page_pools(void) +{ + /* Release all pages in pools, if any.*/ + kgsl_pool_reduce(INT_MAX, true); + + /* Unregister shrinker */ + unregister_shrinker(&kgsl_pool_shrinker); +} + diff --git a/kgsl_pool.h b/kgsl_pool.h new file mode 100644 index 0000000000..c375c31ae8 --- /dev/null +++ b/kgsl_pool.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016-2017,2019,2021 The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_POOL_H +#define __KGSL_POOL_H + +#ifdef CONFIG_QCOM_KGSL_USE_SHMEM +static inline void kgsl_probe_page_pools(void) { } +static inline void kgsl_exit_page_pools(void) { } +static inline int kgsl_get_page_size(size_t size, unsigned int align) +{ + return PAGE_SIZE; +} +#else +/** + * kgsl_pool_free_page - Frees the page and adds it back to pool/system memory + * @page: Pointer to page struct that needs to be freed + */ +void kgsl_pool_free_page(struct page *page); + +/** + * kgsl_get_page_size - Get supported pagesize + * @size: Size of the page + * @align: Desired alignment of the size + * + * Return largest available page size from pools that can be used to meet + * given size and alignment requirements + */ +int kgsl_get_page_size(size_t size, unsigned int align); + +/** + * kgsl_pool_alloc_page - Allocate a page of requested size + * @page_size: Size of the page to be allocated + * @pages: pointer to hold list of pages, should be big enough to hold + * requested page + * @len: Length of array pages + * + * Return total page count on success and negative value on failure + */ +int kgsl_pool_alloc_page(int *page_size, struct page **pages, + unsigned int pages_len, unsigned int *align, + struct device *dev); + +/** + * kgsl_pool_free_pages - Free pages in an pages array + * @pages: pointer to an array of page structs + * @page_count: Number of entries in @pages + * + * Free the pages by collapsing any physical adjacent pages. + * Pages are added back to the pool, if pool has sufficient space + * otherwise they are given back to system. + */ +void kgsl_pool_free_pages(struct page **pages, unsigned int page_count); + +/** + * kgsl_probe_page_pools - Initialize the memory pools pools + */ +void kgsl_probe_page_pools(void); + +/** + * kgsl_exit_page_pools - Free outstanding pooled memory + */ +void kgsl_exit_page_pools(void); + +#endif +#endif /* __KGSL_POOL_H */ + diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c new file mode 100644 index 0000000000..5a3e52c3a7 --- /dev/null +++ b/kgsl_pwrctrl.c @@ -0,0 +1,2329 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kgsl_device.h" +#include "kgsl_bus.h" +#include "kgsl_pwrscale.h" +#include "kgsl_sysfs.h" +#include "kgsl_trace.h" +#include "kgsl_util.h" + +#define UPDATE_BUSY_VAL 1000000 + +#define KGSL_MAX_BUSLEVELS 20 + +/* Order deeply matters here because reasons. New entries go on the end */ +static const char * const clocks[] = { + "src_clk", + "core_clk", + "iface_clk", + "mem_clk", + "mem_iface_clk", + "alt_mem_iface_clk", + "rbbmtimer_clk", + "gtcu_clk", + "gtbu_clk", + "gtcu_iface_clk", + "alwayson_clk", + "isense_clk", + "rbcpr_clk", + "iref_clk", + "gmu_clk", + "ahb_clk", + "smmu_vote", + "apb_pclk", +}; + +static void kgsl_pwrctrl_clk(struct kgsl_device *device, bool state, + int requested_state); +static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state); +static void kgsl_pwrctrl_set_state(struct kgsl_device *device, + unsigned int state); +static int _isense_clk_set_rate(struct kgsl_pwrctrl *pwr, int level); +static int kgsl_pwrctrl_clk_set_rate(struct clk *grp_clk, unsigned int freq, + const char *name); +static void _gpu_clk_prepare_enable(struct kgsl_device *device, + struct clk *clk, const char *name); +static void _bimc_clk_prepare_enable(struct kgsl_device *device, + struct clk *clk, const char *name); + +/** + * _adjust_pwrlevel() - Given a requested power level do bounds checking on the + * constraints and return the nearest possible level + * @device: Pointer to the kgsl_device struct + * @level: Requested level + * @pwrc: Pointer to the power constraint to be applied + * + * Apply thermal and max/min limits first. Then force the level with a + * constraint if one exists. + */ +static unsigned int _adjust_pwrlevel(struct kgsl_pwrctrl *pwr, int level, + struct kgsl_pwr_constraint *pwrc) +{ + unsigned int max_pwrlevel = max_t(unsigned int, pwr->thermal_pwrlevel, + pwr->max_pwrlevel); + unsigned int min_pwrlevel = min_t(unsigned int, + pwr->thermal_pwrlevel_floor, + pwr->min_pwrlevel); + + /* Ensure that max/min pwrlevels are within thermal max/min limits */ + max_pwrlevel = min_t(unsigned int, max_pwrlevel, + pwr->thermal_pwrlevel_floor); + min_pwrlevel = max_t(unsigned int, min_pwrlevel, + pwr->thermal_pwrlevel); + + switch (pwrc->type) { + case KGSL_CONSTRAINT_PWRLEVEL: { + switch (pwrc->sub_type) { + case KGSL_CONSTRAINT_PWR_MAX: + return max_pwrlevel; + case KGSL_CONSTRAINT_PWR_MIN: + return min_pwrlevel; + default: + break; + } + } + break; + } + + if (level < max_pwrlevel) + return max_pwrlevel; + if (level > min_pwrlevel) + return min_pwrlevel; + + return level; +} + +/** + * kgsl_pwrctrl_pwrlevel_change_settings() - Program h/w during powerlevel + * transitions + * @device: Pointer to the kgsl_device struct + * @post: flag to check if the call is before/after the clk_rate change + * @wake_up: flag to check if device is active or waking up + */ +static void kgsl_pwrctrl_pwrlevel_change_settings(struct kgsl_device *device, + bool post) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + unsigned int old = pwr->previous_pwrlevel; + unsigned int new = pwr->active_pwrlevel; + + if (device->state != KGSL_STATE_ACTIVE) + return; + if (old == new) + return; + + device->ftbl->pwrlevel_change_settings(device, old, new, post); +} + +/** + * kgsl_pwrctrl_adjust_pwrlevel() - Adjust the power level if + * required by thermal, max/min, constraints, etc + * @device: Pointer to the kgsl_device struct + * @new_level: Requested powerlevel, an index into the pwrlevel array + */ +unsigned int kgsl_pwrctrl_adjust_pwrlevel(struct kgsl_device *device, + unsigned int new_level) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + unsigned int old_level = pwr->active_pwrlevel; + bool reset = false; + + /* If a pwr constraint is expired, remove it */ + if ((pwr->constraint.type != KGSL_CONSTRAINT_NONE) && + (time_after(jiffies, pwr->constraint.expires))) { + + struct kgsl_context *context = kgsl_context_get(device, + pwr->constraint.owner_id); + + /* We couldn't get a reference, clear the constraint */ + if (!context) { + reset = true; + goto done; + } + + /* + * If the last timestamp that set the constraint has retired, + * clear the constraint + */ + if (kgsl_check_timestamp(device, context, + pwr->constraint.owner_timestamp)) { + reset = true; + kgsl_context_put(context); + goto done; + } + + /* + * Increase the timeout to keep the constraint at least till + * the timestamp retires + */ + pwr->constraint.expires = jiffies + + msecs_to_jiffies(device->pwrctrl.interval_timeout); + + kgsl_context_put(context); + } + +done: + if (reset) { + /* Trace the constraint being un-set by the driver */ + trace_kgsl_constraint(device, pwr->constraint.type, + old_level, 0); + /*Invalidate the constraint set */ + pwr->constraint.expires = 0; + pwr->constraint.type = KGSL_CONSTRAINT_NONE; + } + + /* + * Adjust the power level if required by thermal, max/min, + * constraints, etc + */ + return _adjust_pwrlevel(pwr, new_level, &pwr->constraint); +} + +/** + * kgsl_pwrctrl_pwrlevel_change() - Validate and change power levels + * @device: Pointer to the kgsl_device struct + * @new_level: Requested powerlevel, an index into the pwrlevel array + * + * Check that any power level constraints are still valid. Update the + * requested level according to any thermal, max/min, or power constraints. + * If a new GPU level is going to be set, update the bus to that level's + * default value. Do not change the bus if a constraint keeps the new + * level at the current level. Set the new GPU frequency. + */ +void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device, + unsigned int new_level) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrlevel *pwrlevel; + unsigned int old_level = pwr->active_pwrlevel; + + new_level = kgsl_pwrctrl_adjust_pwrlevel(device, new_level); + + if (new_level == old_level) + return; + + kgsl_pwrscale_update_stats(device); + + /* + * Set the active and previous powerlevel first in case the clocks are + * off - if we don't do this then the pwrlevel change won't take effect + * when the clocks come back + */ + pwr->active_pwrlevel = new_level; + pwr->previous_pwrlevel = old_level; + + /* + * If the bus is running faster than its default level and the GPU + * frequency is moving down keep the DDR at a relatively high level. + */ + if (pwr->bus_mod < 0 || new_level < old_level) { + pwr->bus_mod = 0; + pwr->bus_percent_ab = 0; + } + /* + * Update the bus before the GPU clock to prevent underrun during + * frequency increases. + */ + kgsl_bus_update(device, KGSL_BUS_VOTE_ON); + + pwrlevel = &pwr->pwrlevels[pwr->active_pwrlevel]; + /* Change register settings if any BEFORE pwrlevel change*/ + kgsl_pwrctrl_pwrlevel_change_settings(device, 0); + device->ftbl->gpu_clock_set(device, pwr->active_pwrlevel); + _isense_clk_set_rate(pwr, pwr->active_pwrlevel); + + trace_kgsl_pwrlevel(device, + pwr->active_pwrlevel, pwrlevel->gpu_freq, + pwr->previous_pwrlevel, + pwr->pwrlevels[old_level].gpu_freq); + + trace_gpu_frequency(pwrlevel->gpu_freq/1000, 0); + + /* + * Some targets do not support the bandwidth requirement of + * GPU at TURBO, for such targets we need to set GPU-BIMC + * interface clocks to TURBO directly whenever GPU runs at + * TURBO. The TURBO frequency of gfx-bimc need to be defined + * in target device tree. + */ + if (pwr->gpu_bimc_int_clk) { + if (pwr->active_pwrlevel == 0 && + !pwr->gpu_bimc_interface_enabled) { + kgsl_pwrctrl_clk_set_rate(pwr->gpu_bimc_int_clk, + pwr->gpu_bimc_int_clk_freq, + "bimc_gpu_clk"); + _bimc_clk_prepare_enable(device, + pwr->gpu_bimc_int_clk, + "bimc_gpu_clk"); + pwr->gpu_bimc_interface_enabled = true; + } else if (pwr->previous_pwrlevel == 0 + && pwr->gpu_bimc_interface_enabled) { + clk_disable_unprepare(pwr->gpu_bimc_int_clk); + pwr->gpu_bimc_interface_enabled = false; + } + } + + /* Change register settings if any AFTER pwrlevel change*/ + kgsl_pwrctrl_pwrlevel_change_settings(device, 1); +} + +void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, + struct kgsl_pwr_constraint *pwrc, uint32_t id, u32 ts) +{ + unsigned int constraint; + struct kgsl_pwr_constraint *pwrc_old; + + if (device == NULL || pwrc == NULL) + return; + constraint = _adjust_pwrlevel(&device->pwrctrl, + device->pwrctrl.active_pwrlevel, pwrc); + pwrc_old = &device->pwrctrl.constraint; + + /* + * If a constraint is already set, set a new constraint only + * if it is faster. If the requested constraint is the same + * as the current one, update ownership and timestamp. + */ + if ((pwrc_old->type == KGSL_CONSTRAINT_NONE) || + (constraint < pwrc_old->hint.pwrlevel.level)) { + pwrc_old->type = pwrc->type; + pwrc_old->sub_type = pwrc->sub_type; + pwrc_old->hint.pwrlevel.level = constraint; + pwrc_old->owner_id = id; + pwrc_old->expires = jiffies + + msecs_to_jiffies(device->pwrctrl.interval_timeout); + pwrc_old->owner_timestamp = ts; + kgsl_pwrctrl_pwrlevel_change(device, constraint); + /* Trace the constraint being set by the driver */ + trace_kgsl_constraint(device, pwrc_old->type, constraint, 1); + } else if ((pwrc_old->type == pwrc->type) && + (pwrc_old->hint.pwrlevel.level == constraint)) { + pwrc_old->owner_id = id; + pwrc_old->owner_timestamp = ts; + pwrc_old->expires = jiffies + + msecs_to_jiffies(device->pwrctrl.interval_timeout); + } +} + +static int kgsl_pwrctrl_set_thermal_limit(struct kgsl_device *device, + u32 level) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret = -EINVAL; + + if (level >= pwr->num_pwrlevels) + level = pwr->num_pwrlevels - 1; + + if (dev_pm_qos_request_active(&pwr->sysfs_thermal_req)) + ret = dev_pm_qos_update_request(&pwr->sysfs_thermal_req, + (pwr->pwrlevels[level].gpu_freq / 1000)); + + return (ret < 0) ? ret : 0; +} + +static ssize_t thermal_pwrlevel_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + int ret; + u32 level; + + ret = kstrtou32(buf, 0, &level); + if (ret) + return ret; + + ret = kgsl_pwrctrl_set_thermal_limit(device, level); + if (ret) + return ret; + + return count; +} + +static ssize_t thermal_pwrlevel_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return scnprintf(buf, PAGE_SIZE, "%d\n", pwr->thermal_pwrlevel); +} + +static ssize_t max_pwrlevel_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret; + unsigned int level = 0; + + ret = kstrtou32(buf, 0, &level); + if (ret) + return ret; + + mutex_lock(&device->mutex); + + /* You can't set a maximum power level lower than the minimum */ + if (level > pwr->min_pwrlevel) + level = pwr->min_pwrlevel; + + pwr->max_pwrlevel = level; + + /* Update the current level using the new limit */ + kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel); + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t max_pwrlevel_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return scnprintf(buf, PAGE_SIZE, "%u\n", pwr->max_pwrlevel); +} + +static void kgsl_pwrctrl_min_pwrlevel_set(struct kgsl_device *device, + int level) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + mutex_lock(&device->mutex); + if (level >= pwr->num_pwrlevels) + level = pwr->num_pwrlevels - 1; + + /* You can't set a minimum power level lower than the maximum */ + if (level < pwr->max_pwrlevel) + level = pwr->max_pwrlevel; + + pwr->min_pwrlevel = level; + + /* Update the current level using the new limit */ + kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel); + + mutex_unlock(&device->mutex); +} + +static ssize_t min_pwrlevel_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + int ret; + unsigned int level = 0; + + ret = kstrtou32(buf, 0, &level); + if (ret) + return ret; + + kgsl_pwrctrl_min_pwrlevel_set(device, level); + + return count; +} + +static ssize_t min_pwrlevel_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return scnprintf(buf, PAGE_SIZE, "%u\n", pwr->min_pwrlevel); +} + +static ssize_t num_pwrlevels_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return scnprintf(buf, PAGE_SIZE, "%d\n", pwr->num_pwrlevels); +} + +/* Given a GPU clock value, return the lowest matching powerlevel */ + +static int _get_nearest_pwrlevel(struct kgsl_pwrctrl *pwr, unsigned int clock) +{ + int i; + + for (i = pwr->num_pwrlevels - 1; i >= 0; i--) { + if (abs(pwr->pwrlevels[i].gpu_freq - clock) < 5000000) + return i; + } + + return -ERANGE; +} + +static ssize_t max_gpuclk_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + u32 freq; + int ret, level; + + ret = kstrtou32(buf, 0, &freq); + if (ret) + return ret; + + level = _get_nearest_pwrlevel(&device->pwrctrl, freq); + if (level < 0) + return level; + + /* + * You would think this would set max_pwrlevel but the legacy behavior + * is that it set thermal_pwrlevel instead so we don't want to mess with + * that. + */ + ret = kgsl_pwrctrl_set_thermal_limit(device, level); + if (ret) + return ret; + + return count; +} + +static ssize_t max_gpuclk_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return scnprintf(buf, PAGE_SIZE, "%d\n", + device->pwrctrl.pwrlevels[pwr->thermal_pwrlevel].gpu_freq); +} + +static ssize_t gpuclk_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + unsigned int val = 0; + int ret, level; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + mutex_lock(&device->mutex); + level = _get_nearest_pwrlevel(pwr, val); + if (level >= 0) + kgsl_pwrctrl_pwrlevel_change(device, (unsigned int) level); + + mutex_unlock(&device->mutex); + return count; +} + +static ssize_t gpuclk_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%ld\n", + kgsl_pwrctrl_active_freq(&device->pwrctrl)); +} + +static ssize_t idle_timer_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int val = 0; + struct kgsl_device *device = dev_get_drvdata(dev); + int ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + /* + * We don't quite accept a maximum of 0xFFFFFFFF due to internal jiffy + * math, so make sure the value falls within the largest offset we can + * deal with + */ + + if (val > jiffies_to_usecs(MAX_JIFFY_OFFSET)) + return -EINVAL; + + mutex_lock(&device->mutex); + device->pwrctrl.interval_timeout = val; + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t idle_timer_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", device->pwrctrl.interval_timeout); +} + +static ssize_t minbw_timer_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + u32 val; + int ret; + + if (device->pwrctrl.ctrl_flags & BIT(KGSL_PWRFLAGS_NAP_OFF)) + return -EINVAL; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + device->pwrctrl.minbw_timeout = val; + return count; +} + +static ssize_t minbw_timer_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", + device->pwrctrl.minbw_timeout); +} + +static ssize_t gpubusy_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_clk_stats *stats = &device->pwrctrl.clk_stats; + + ret = scnprintf(buf, PAGE_SIZE, "%7d %7d\n", + stats->busy_old, stats->total_old); + if (!test_bit(KGSL_PWRFLAGS_AXI_ON, &device->pwrctrl.power_flags)) { + stats->busy_old = 0; + stats->total_old = 0; + } + return ret; +} + +static ssize_t gpu_available_frequencies_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int index, num_chars = 0; + + for (index = 0; index < pwr->num_pwrlevels; index++) { + num_chars += scnprintf(buf + num_chars, + PAGE_SIZE - num_chars - 1, + "%d ", pwr->pwrlevels[index].gpu_freq); + /* One space for trailing null and another for the newline */ + if (num_chars >= PAGE_SIZE - 2) + break; + } + buf[num_chars++] = '\n'; + return num_chars; +} + +static ssize_t gpu_clock_stats_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int index, num_chars = 0; + + mutex_lock(&device->mutex); + kgsl_pwrscale_update_stats(device); + mutex_unlock(&device->mutex); + for (index = 0; index < pwr->num_pwrlevels; index++) + num_chars += scnprintf(buf + num_chars, PAGE_SIZE - num_chars, + "%llu ", pwr->clock_times[index]); + + if (num_chars < PAGE_SIZE) + buf[num_chars++] = '\n'; + + return num_chars; +} + +static ssize_t reset_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", device->reset_counter); +} + +static void __force_on(struct kgsl_device *device, int flag, int on) +{ + if (on) { + switch (flag) { + case KGSL_PWRFLAGS_CLK_ON: + /* make sure pwrrail is ON before enabling clocks */ + kgsl_pwrctrl_pwrrail(device, true); + kgsl_pwrctrl_clk(device, true, + KGSL_STATE_ACTIVE); + break; + case KGSL_PWRFLAGS_AXI_ON: + kgsl_pwrctrl_axi(device, true); + break; + case KGSL_PWRFLAGS_POWER_ON: + kgsl_pwrctrl_pwrrail(device, true); + break; + } + set_bit(flag, &device->pwrctrl.ctrl_flags); + } else { + clear_bit(flag, &device->pwrctrl.ctrl_flags); + } +} + +static ssize_t __force_on_show(struct device *dev, + struct device_attribute *attr, + char *buf, int flag) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + test_bit(flag, &device->pwrctrl.ctrl_flags)); +} + +static ssize_t __force_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count, + int flag) +{ + unsigned int val = 0; + struct kgsl_device *device = dev_get_drvdata(dev); + int ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + mutex_lock(&device->mutex); + __force_on(device, flag, val); + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t force_clk_on_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_CLK_ON); +} + +static ssize_t force_clk_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_CLK_ON); +} + +static ssize_t force_bus_on_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_AXI_ON); +} + +static ssize_t force_bus_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_AXI_ON); +} + +static ssize_t force_rail_on_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_POWER_ON); +} + +static ssize_t force_rail_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_POWER_ON); +} + +static ssize_t force_no_nap_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_NAP_OFF); +} + +static ssize_t force_no_nap_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return __force_on_store(dev, attr, buf, count, + KGSL_PWRFLAGS_NAP_OFF); +} + +static ssize_t bus_split_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + device->pwrctrl.bus_control); +} + +static ssize_t bus_split_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int val = 0; + struct kgsl_device *device = dev_get_drvdata(dev); + int ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + mutex_lock(&device->mutex); + device->pwrctrl.bus_control = val ? true : false; + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t default_pwrlevel_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + device->pwrctrl.default_pwrlevel); +} + +static ssize_t default_pwrlevel_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrscale *pwrscale = &device->pwrscale; + int ret; + unsigned int level = 0; + + ret = kstrtou32(buf, 0, &level); + if (ret) + return ret; + + if (level >= pwr->num_pwrlevels) + return count; + + mutex_lock(&device->mutex); + pwr->default_pwrlevel = level; + pwrscale->gpu_profile.profile.initial_freq + = pwr->pwrlevels[level].gpu_freq; + + mutex_unlock(&device->mutex); + return count; +} + +static ssize_t popp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + /* POPP is deprecated, so return it as always disabled */ + return scnprintf(buf, PAGE_SIZE, "0\n"); +} + +static ssize_t _gpu_busy_show(struct kgsl_device *device, + char *buf) +{ + int ret; + struct kgsl_clk_stats *stats = &device->pwrctrl.clk_stats; + unsigned int busy_percent = 0; + + if (stats->total_old != 0) + busy_percent = (stats->busy_old * 100) / stats->total_old; + + ret = scnprintf(buf, PAGE_SIZE, "%d %%\n", busy_percent); + + /* Reset the stats if GPU is OFF */ + if (!test_bit(KGSL_PWRFLAGS_AXI_ON, &device->pwrctrl.power_flags)) { + stats->busy_old = 0; + stats->total_old = 0; + } + return ret; +} + +static ssize_t gpu_busy_percentage_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _gpu_busy_show(device, buf); +} + +static ssize_t _min_clock_mhz_show(struct kgsl_device *device, + char *buf) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return scnprintf(buf, PAGE_SIZE, "%d\n", + pwr->pwrlevels[pwr->min_pwrlevel].gpu_freq / 1000000); +} + + +static ssize_t min_clock_mhz_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _min_clock_mhz_show(device, buf); +} + +static ssize_t _min_clock_mhz_store(struct kgsl_device *device, + const char *buf, size_t count) +{ + int level, ret; + unsigned int freq; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + ret = kstrtou32(buf, 0, &freq); + if (ret) + return ret; + + freq *= 1000000; + level = _get_nearest_pwrlevel(pwr, freq); + + if (level >= 0) + kgsl_pwrctrl_min_pwrlevel_set(device, level); + + return count; +} + +static ssize_t min_clock_mhz_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _min_clock_mhz_store(device, buf, count); +} + +static ssize_t _max_clock_mhz_show(struct kgsl_device *device, char *buf) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return scnprintf(buf, PAGE_SIZE, "%d\n", + pwr->pwrlevels[pwr->thermal_pwrlevel].gpu_freq / 1000000); +} + +static ssize_t max_clock_mhz_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _max_clock_mhz_show(device, buf); +} + +static ssize_t _max_clock_mhz_store(struct kgsl_device *device, + const char *buf, size_t count) +{ + u32 freq; + int ret, level; + + ret = kstrtou32(buf, 0, &freq); + if (ret) + return ret; + + level = _get_nearest_pwrlevel(&device->pwrctrl, freq * 1000000); + if (level < 0) + return level; + + /* + * You would think this would set max_pwrlevel but the legacy behavior + * is that it set thermal_pwrlevel instead so we don't want to mess with + * that. + */ + ret = kgsl_pwrctrl_set_thermal_limit(device, level); + if (ret) + return ret; + + return count; +} + +static ssize_t max_clock_mhz_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _max_clock_mhz_store(device, buf, count); +} + +static ssize_t _clock_mhz_show(struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%ld\n", + kgsl_pwrctrl_active_freq(&device->pwrctrl) / 1000000); +} + +static ssize_t clock_mhz_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _clock_mhz_show(device, buf); +} + +static ssize_t _freq_table_mhz_show(struct kgsl_device *device, + char *buf) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int index, num_chars = 0; + + for (index = 0; index < pwr->num_pwrlevels; index++) { + num_chars += scnprintf(buf + num_chars, + PAGE_SIZE - num_chars - 1, + "%d ", pwr->pwrlevels[index].gpu_freq / 1000000); + /* One space for trailing null and another for the newline */ + if (num_chars >= PAGE_SIZE - 2) + break; + } + + buf[num_chars++] = '\n'; + + return num_chars; +} + +static ssize_t freq_table_mhz_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _freq_table_mhz_show(device, buf); +} + +static ssize_t _gpu_tmu_show(struct kgsl_device *device, + char *buf) +{ + struct device *dev; + struct thermal_zone_device *thermal_dev; + int temperature = 0, max_temp = 0; + const char *name; + struct property *prop; + + dev = &device->pdev->dev; + + of_property_for_each_string(dev->of_node, "qcom,tzone-names", prop, name) { + thermal_dev = thermal_zone_get_zone_by_name(name); + if (IS_ERR(thermal_dev)) + continue; + + if (thermal_zone_get_temp(thermal_dev, &temperature)) + continue; + + max_temp = max(temperature, max_temp); + } + + return scnprintf(buf, PAGE_SIZE, "%d\n", + max_temp); +} + +static ssize_t temp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _gpu_tmu_show(device, buf); +} + +static ssize_t pwrscale_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + int ret; + unsigned int enable = 0; + + ret = kstrtou32(buf, 0, &enable); + if (ret) + return ret; + + mutex_lock(&device->mutex); + + if (enable) + kgsl_pwrscale_enable(device); + else + kgsl_pwrscale_disable(device, false); + + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t pwrscale_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrscale *psc = &device->pwrscale; + + return scnprintf(buf, PAGE_SIZE, "%u\n", psc->enabled); +} + +static DEVICE_ATTR_RO(temp); +static DEVICE_ATTR_RW(gpuclk); +static DEVICE_ATTR_RW(max_gpuclk); +static DEVICE_ATTR_RW(idle_timer); +static DEVICE_ATTR_RW(minbw_timer); +static DEVICE_ATTR_RO(gpubusy); +static DEVICE_ATTR_RO(gpu_available_frequencies); +static DEVICE_ATTR_RO(gpu_clock_stats); +static DEVICE_ATTR_RW(max_pwrlevel); +static DEVICE_ATTR_RW(min_pwrlevel); +static DEVICE_ATTR_RW(thermal_pwrlevel); +static DEVICE_ATTR_RO(num_pwrlevels); +static DEVICE_ATTR_RO(reset_count); +static DEVICE_ATTR_RW(force_clk_on); +static DEVICE_ATTR_RW(force_bus_on); +static DEVICE_ATTR_RW(force_rail_on); +static DEVICE_ATTR_RW(bus_split); +static DEVICE_ATTR_RW(default_pwrlevel); +static DEVICE_ATTR_RO(popp); +static DEVICE_ATTR_RW(force_no_nap); +static DEVICE_ATTR_RO(gpu_busy_percentage); +static DEVICE_ATTR_RW(min_clock_mhz); +static DEVICE_ATTR_RW(max_clock_mhz); +static DEVICE_ATTR_RO(clock_mhz); +static DEVICE_ATTR_RO(freq_table_mhz); +static DEVICE_ATTR_RW(pwrscale); + +static const struct attribute *pwrctrl_attr_list[] = { + &dev_attr_gpuclk.attr, + &dev_attr_max_gpuclk.attr, + &dev_attr_idle_timer.attr, + &dev_attr_minbw_timer.attr, + &dev_attr_gpubusy.attr, + &dev_attr_gpu_available_frequencies.attr, + &dev_attr_gpu_clock_stats.attr, + &dev_attr_max_pwrlevel.attr, + &dev_attr_min_pwrlevel.attr, + &dev_attr_thermal_pwrlevel.attr, + &dev_attr_num_pwrlevels.attr, + &dev_attr_reset_count.attr, + &dev_attr_force_clk_on.attr, + &dev_attr_force_bus_on.attr, + &dev_attr_force_rail_on.attr, + &dev_attr_force_no_nap.attr, + &dev_attr_bus_split.attr, + &dev_attr_default_pwrlevel.attr, + &dev_attr_popp.attr, + &dev_attr_gpu_busy_percentage.attr, + &dev_attr_min_clock_mhz.attr, + &dev_attr_max_clock_mhz.attr, + &dev_attr_clock_mhz.attr, + &dev_attr_freq_table_mhz.attr, + &dev_attr_temp.attr, + &dev_attr_pwrscale.attr, + NULL, +}; + +static GPU_SYSFS_ATTR(gpu_busy, 0444, _gpu_busy_show, NULL); +static GPU_SYSFS_ATTR(gpu_min_clock, 0644, _min_clock_mhz_show, + _min_clock_mhz_store); +static GPU_SYSFS_ATTR(gpu_max_clock, 0644, _max_clock_mhz_show, + _max_clock_mhz_store); +static GPU_SYSFS_ATTR(gpu_clock, 0444, _clock_mhz_show, NULL); +static GPU_SYSFS_ATTR(gpu_freq_table, 0444, _freq_table_mhz_show, NULL); +static GPU_SYSFS_ATTR(gpu_tmu, 0444, _gpu_tmu_show, NULL); + +static const struct attribute *gpu_sysfs_attr_list[] = { + &gpu_sysfs_attr_gpu_busy.attr, + &gpu_sysfs_attr_gpu_min_clock.attr, + &gpu_sysfs_attr_gpu_max_clock.attr, + &gpu_sysfs_attr_gpu_clock.attr, + &gpu_sysfs_attr_gpu_freq_table.attr, + &gpu_sysfs_attr_gpu_tmu.attr, + NULL, +}; + +int kgsl_pwrctrl_init_sysfs(struct kgsl_device *device) +{ + int ret; + + ret = sysfs_create_files(&device->dev->kobj, pwrctrl_attr_list); + if (ret) + return ret; + + if (!device->gpu_sysfs_kobj.state_in_sysfs) + return 0; + + return sysfs_create_files(&device->gpu_sysfs_kobj, gpu_sysfs_attr_list); +} + +/* + * Track the amount of time the gpu is on vs the total system time. + * Regularly update the percentage of busy time displayed by sysfs. + */ +void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy) +{ + struct kgsl_clk_stats *stats = &device->pwrctrl.clk_stats; + + stats->total += time; + stats->busy += busy; + + if (stats->total < UPDATE_BUSY_VAL) + return; + + /* Update the output regularly and reset the counters. */ + stats->total_old = stats->total; + stats->busy_old = stats->busy; + stats->total = 0; + stats->busy = 0; + + trace_kgsl_gpubusy(device, stats->busy_old, stats->total_old); +} + +static void kgsl_pwrctrl_clk(struct kgsl_device *device, bool state, + int requested_state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int i = 0; + + if (gmu_core_gpmu_isenabled(device)) + return; + if (test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->ctrl_flags)) + return; + + if (!state) { + if (test_and_clear_bit(KGSL_PWRFLAGS_CLK_ON, + &pwr->power_flags)) { + trace_kgsl_clk(device, state, + kgsl_pwrctrl_active_freq(pwr)); + /* Disable gpu-bimc-interface clocks */ + if (pwr->gpu_bimc_int_clk && + pwr->gpu_bimc_interface_enabled) { + clk_disable_unprepare(pwr->gpu_bimc_int_clk); + pwr->gpu_bimc_interface_enabled = false; + } + + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + clk_disable(pwr->grp_clks[i]); + /* High latency clock maintenance. */ + if ((pwr->pwrlevels[0].gpu_freq > 0) && + (requested_state != KGSL_STATE_NAP) && + (requested_state != KGSL_STATE_MINBW)) { + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + clk_unprepare(pwr->grp_clks[i]); + device->ftbl->gpu_clock_set(device, + pwr->num_pwrlevels - 1); + _isense_clk_set_rate(pwr, + pwr->num_pwrlevels - 1); + } + + /* Turn off the IOMMU clocks */ + kgsl_mmu_disable_clk(&device->mmu); + } else if (requested_state == KGSL_STATE_SLUMBER) { + /* High latency clock maintenance. */ + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + clk_unprepare(pwr->grp_clks[i]); + if ((pwr->pwrlevels[0].gpu_freq > 0)) { + device->ftbl->gpu_clock_set(device, + pwr->num_pwrlevels - 1); + _isense_clk_set_rate(pwr, + pwr->num_pwrlevels - 1); + } + } + } else { + if (!test_and_set_bit(KGSL_PWRFLAGS_CLK_ON, + &pwr->power_flags)) { + trace_kgsl_clk(device, state, + kgsl_pwrctrl_active_freq(pwr)); + /* High latency clock maintenance. */ + if ((device->state != KGSL_STATE_NAP) && + (device->state != KGSL_STATE_MINBW)) { + if (pwr->pwrlevels[0].gpu_freq > 0) { + device->ftbl->gpu_clock_set(device, + pwr->active_pwrlevel); + _isense_clk_set_rate(pwr, + pwr->active_pwrlevel); + } + } + + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + _gpu_clk_prepare_enable(device, + pwr->grp_clks[i], clocks[i]); + + /* Enable the gpu-bimc-interface clocks */ + if (pwr->gpu_bimc_int_clk) { + if (pwr->active_pwrlevel == 0 && + !pwr->gpu_bimc_interface_enabled) { + kgsl_pwrctrl_clk_set_rate( + pwr->gpu_bimc_int_clk, + pwr->gpu_bimc_int_clk_freq, + "bimc_gpu_clk"); + _bimc_clk_prepare_enable(device, + pwr->gpu_bimc_int_clk, + "bimc_gpu_clk"); + pwr->gpu_bimc_interface_enabled = true; + } + } + + /* Turn on the IOMMU clocks */ + kgsl_mmu_enable_clk(&device->mmu); + } + + } +} + +int kgsl_pwrctrl_axi(struct kgsl_device *device, bool state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + if (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->ctrl_flags)) + return 0; + + if (!state) { + if (test_and_clear_bit(KGSL_PWRFLAGS_AXI_ON, + &pwr->power_flags)) { + trace_kgsl_bus(device, state); + return kgsl_bus_update(device, KGSL_BUS_VOTE_OFF); + } + } else { + if (!test_and_set_bit(KGSL_PWRFLAGS_AXI_ON, + &pwr->power_flags)) { + trace_kgsl_bus(device, state); + return kgsl_bus_update(device, KGSL_BUS_VOTE_ON); + } + } + + return 0; +} + +static int enable_regulator(struct device *dev, struct regulator *regulator, + const char *name) +{ + int ret; + + if (IS_ERR_OR_NULL(regulator)) + return 0; + + ret = regulator_enable(regulator); + if (ret) + dev_err(dev, "Unable to enable regulator %s: %d\n", name, ret); + return ret; +} + +static int enable_regulators(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret; + + if (test_and_set_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->power_flags)) + return 0; + + ret = enable_regulator(&device->pdev->dev, pwr->cx_gdsc, "vddcx"); + if (!ret) { + /* Set parent in retention voltage to power up vdd supply */ + ret = kgsl_regulator_set_voltage(device->dev, + pwr->gx_gdsc_parent, + pwr->gx_gdsc_parent_min_corner); + if (!ret) + ret = enable_regulator(&device->pdev->dev, + pwr->gx_gdsc, "vdd"); + } + + if (ret) { + clear_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->power_flags); + return ret; + } + + trace_kgsl_rail(device, KGSL_PWRFLAGS_POWER_ON); + return 0; +} + +static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int status = 0; + + if (gmu_core_gpmu_isenabled(device)) + return 0; + /* + * Disabling the regulator means also disabling dependent clocks. + * Hence don't disable it if force clock ON is set. + */ + if (test_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->ctrl_flags) || + test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->ctrl_flags)) + return 0; + + if (!state) { + if (test_and_clear_bit(KGSL_PWRFLAGS_POWER_ON, + &pwr->power_flags)) { + trace_kgsl_rail(device, state); + if (!kgsl_regulator_disable_wait(pwr->gx_gdsc, 200)) + dev_err(device->dev, "Regulator vdd is stuck on\n"); + if (!kgsl_regulator_disable_wait(pwr->cx_gdsc, 200)) + dev_err(device->dev, "Regulator vddcx is stuck on\n"); + } + } else + status = enable_regulators(device); + + return status; +} + +void kgsl_pwrctrl_irq(struct kgsl_device *device, bool state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + if (state) { + if (!test_and_set_bit(KGSL_PWRFLAGS_IRQ_ON, + &pwr->power_flags)) { + trace_kgsl_irq(device, state); + enable_irq(pwr->interrupt_num); + } + } else { + if (test_and_clear_bit(KGSL_PWRFLAGS_IRQ_ON, + &pwr->power_flags)) { + trace_kgsl_irq(device, state); + if (in_interrupt()) + disable_irq_nosync(pwr->interrupt_num); + else + disable_irq(pwr->interrupt_num); + } + } +} + +static void kgsl_minbw_timer(struct timer_list *t) +{ + struct kgsl_pwrctrl *pwr = from_timer(pwr, t, minbw_timer); + struct kgsl_device *device = container_of(pwr, + struct kgsl_device, pwrctrl); + + if (device->state == KGSL_STATE_NAP) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_MINBW); + kgsl_schedule_work(&device->idle_check_ws); + } +} + +static int _get_clocks(struct kgsl_device *device) +{ + struct device *dev = &device->pdev->dev; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + const char *name; + struct property *prop; + + pwr->isense_clk_indx = 0; + of_property_for_each_string(dev->of_node, "clock-names", prop, name) { + int i; + + for (i = 0; i < KGSL_MAX_CLKS; i++) { + if (pwr->grp_clks[i] || strcmp(clocks[i], name)) + continue; + /* apb_pclk should only be enabled if QCOM_KGSL_QDSS_STM is enabled */ + if (!strcmp(name, "apb_pclk") && !IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) + continue; + + pwr->grp_clks[i] = devm_clk_get(dev, name); + + if (IS_ERR(pwr->grp_clks[i])) { + int ret = PTR_ERR(pwr->grp_clks[i]); + + dev_err(dev, "Couldn't get clock: %s (%d)\n", + name, ret); + pwr->grp_clks[i] = NULL; + return ret; + } + + if (!strcmp(name, "isense_clk")) + pwr->isense_clk_indx = i; + break; + } + } + + if (pwr->isense_clk_indx && of_property_read_u32(dev->of_node, + "qcom,isense-clk-on-level", &pwr->isense_clk_on_level)) { + dev_err(dev, "Couldn't get isense clock on level\n"); + return -ENXIO; + } + return 0; +} + +static int _isense_clk_set_rate(struct kgsl_pwrctrl *pwr, int level) +{ + int rate; + + if (!pwr->isense_clk_indx) + return -EINVAL; + + rate = clk_round_rate(pwr->grp_clks[pwr->isense_clk_indx], + level > pwr->isense_clk_on_level ? + KGSL_XO_CLK_FREQ : KGSL_ISENSE_CLK_FREQ); + return kgsl_pwrctrl_clk_set_rate(pwr->grp_clks[pwr->isense_clk_indx], + rate, clocks[pwr->isense_clk_indx]); +} + +/* + * _gpu_clk_prepare_enable - Enable the specified GPU clock + * Try once to enable it and then BUG() for debug + */ +static void _gpu_clk_prepare_enable(struct kgsl_device *device, + struct clk *clk, const char *name) +{ + int ret; + + if (kgsl_state_is_nap_or_minbw(device)) { + ret = clk_enable(clk); + if (ret) + goto err; + return; + } + + ret = clk_prepare_enable(clk); + if (!ret) + return; +err: + /* Failure is fatal so BUG() to facilitate debug */ + dev_err(device->dev, "GPU Clock %s enable error:%d\n", name, ret); +} + +/* + * _bimc_clk_prepare_enable - Enable the specified GPU clock + * Try once to enable it and then BUG() for debug + */ +static void _bimc_clk_prepare_enable(struct kgsl_device *device, + struct clk *clk, const char *name) +{ + int ret = clk_prepare_enable(clk); + /* Failure is fatal so BUG() to facilitate debug */ + if (ret) + dev_err(device->dev, "GPU clock %s enable error:%d\n", + name, ret); +} + +static int kgsl_pwrctrl_clk_set_rate(struct clk *grp_clk, unsigned int freq, + const char *name) +{ + int ret = clk_set_rate(grp_clk, freq); + + WARN(ret, "%s set freq %d failed:%d\n", name, freq, ret); + return ret; +} + +int kgsl_pwrctrl_init(struct kgsl_device *device) +{ + int i, result, freq; + struct platform_device *pdev = device->pdev; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + result = _get_clocks(device); + if (result) + return result; + + /* Make sure we have a source clk for freq setting */ + if (pwr->grp_clks[0] == NULL) + pwr->grp_clks[0] = pwr->grp_clks[1]; + + /* Getting gfx-bimc-interface-clk frequency */ + if (!of_property_read_u32(pdev->dev.of_node, + "qcom,gpu-bimc-interface-clk-freq", + &pwr->gpu_bimc_int_clk_freq)) + pwr->gpu_bimc_int_clk = devm_clk_get(&pdev->dev, + "bimc_gpu_clk"); + + if (of_property_read_bool(pdev->dev.of_node, "qcom,no-nap")) + device->pwrctrl.ctrl_flags |= BIT(KGSL_PWRFLAGS_NAP_OFF); + else if (!IS_ENABLED(CONFIG_COMMON_CLK_QCOM)) { + dev_warn(device->dev, "KGSL nap state is not supported\n"); + device->pwrctrl.ctrl_flags |= BIT(KGSL_PWRFLAGS_NAP_OFF); + } + + if (pwr->num_pwrlevels == 0) { + dev_err(device->dev, "No power levels are defined\n"); + return -EINVAL; + } + + init_waitqueue_head(&device->active_cnt_wq); + + /* Initialize the user and thermal clock constraints */ + + pwr->max_pwrlevel = 0; + pwr->min_pwrlevel = pwr->num_pwrlevels - 1; + pwr->thermal_pwrlevel = 0; + pwr->thermal_pwrlevel_floor = pwr->min_pwrlevel; + + pwr->wakeup_maxpwrlevel = 0; + + result = dev_pm_qos_add_request(&pdev->dev, &pwr->sysfs_thermal_req, + DEV_PM_QOS_MAX_FREQUENCY, + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); + if (result < 0) + dev_err(device->dev, "PM QoS thermal request failed:\n", result); + + for (i = 0; i < pwr->num_pwrlevels; i++) { + freq = pwr->pwrlevels[i].gpu_freq; + + if (freq > 0) + freq = clk_round_rate(pwr->grp_clks[0], freq); + + if (freq >= pwr->pwrlevels[i].gpu_freq) + pwr->pwrlevels[i].gpu_freq = freq; + } + + clk_set_rate(pwr->grp_clks[0], + pwr->pwrlevels[pwr->num_pwrlevels - 1].gpu_freq); + + freq = clk_round_rate(pwr->grp_clks[6], KGSL_XO_CLK_FREQ); + if (freq > 0) + kgsl_pwrctrl_clk_set_rate(pwr->grp_clks[6], + freq, clocks[6]); + + _isense_clk_set_rate(pwr, pwr->num_pwrlevels - 1); + + if (of_property_read_bool(pdev->dev.of_node, "vddcx-supply")) + pwr->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); + + if (of_property_read_bool(pdev->dev.of_node, "vdd-supply")) + pwr->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); + + if (of_property_read_bool(pdev->dev.of_node, "vdd-parent-supply")) { + pwr->gx_gdsc_parent = devm_regulator_get(&pdev->dev, + "vdd-parent"); + if (IS_ERR(pwr->gx_gdsc_parent)) { + dev_err(device->dev, + "Failed to get vdd-parent regulator:%ld\n", + PTR_ERR(pwr->gx_gdsc_parent)); + return -ENODEV; + } + if (of_property_read_u32(pdev->dev.of_node, + "vdd-parent-min-corner", + &pwr->gx_gdsc_parent_min_corner)) { + dev_err(device->dev, + "vdd-parent-min-corner not found\n"); + return -ENODEV; + } + } + + pwr->power_flags = 0; + + pm_runtime_enable(&pdev->dev); + + timer_setup(&pwr->minbw_timer, kgsl_minbw_timer, 0); + + return 0; +} + +void kgsl_pwrctrl_close(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + pwr->power_flags = 0; + + kgsl_bus_close(device); + + if (dev_pm_qos_request_active(&pwr->sysfs_thermal_req)) + dev_pm_qos_remove_request(&pwr->sysfs_thermal_req); + + pm_runtime_disable(&device->pdev->dev); +} + +void kgsl_idle_check(struct work_struct *work) +{ + struct kgsl_device *device = container_of(work, struct kgsl_device, + idle_check_ws); + int ret = 0; + unsigned int requested_state; + + mutex_lock(&device->mutex); + + /* + * After scheduling idle work for transitioning to either NAP or + * SLUMBER, it's possible that requested state can change to NONE + * if any new workload comes before kgsl_idle_check is executed or + * it gets the device mutex. In such case, no need to change state + * to NONE. + */ + if (device->requested_state == KGSL_STATE_NONE) { + mutex_unlock(&device->mutex); + return; + } + + requested_state = device->requested_state; + + if (device->state == KGSL_STATE_ACTIVE + || kgsl_state_is_nap_or_minbw(device)) { + + if (!atomic_read(&device->active_cnt)) { + spin_lock(&device->submit_lock); + if (device->submit_now) { + spin_unlock(&device->submit_lock); + goto done; + } + /* Don't allow GPU inline submission in SLUMBER */ + if (requested_state == KGSL_STATE_SLUMBER) + device->slumber = true; + spin_unlock(&device->submit_lock); + + ret = kgsl_pwrctrl_change_state(device, + device->requested_state); + if (ret == -EBUSY) { + if (requested_state == KGSL_STATE_SLUMBER) { + spin_lock(&device->submit_lock); + device->slumber = false; + spin_unlock(&device->submit_lock); + } + /* + * If the GPU is currently busy, restore + * the requested state and reschedule + * idle work. + */ + kgsl_pwrctrl_request_state(device, + requested_state); + kgsl_schedule_work(&device->idle_check_ws); + } + } +done: + if (!ret) + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + + if (device->state == KGSL_STATE_ACTIVE) + kgsl_start_idle_timer(device); + } + + if (device->state != KGSL_STATE_MINBW) + kgsl_pwrscale_update(device); + mutex_unlock(&device->mutex); +} + +void kgsl_timer(struct timer_list *t) +{ + struct kgsl_device *device = from_timer(device, t, idle_timer); + + if (device->requested_state != KGSL_STATE_SUSPEND) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER); + /* Have work run in a non-interrupt context. */ + kgsl_schedule_work(&device->idle_check_ws); + } +} + +static bool kgsl_pwrctrl_isenabled(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return ((test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->power_flags) != 0) && + (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags) != 0)); +} + +/** + * kgsl_pre_hwaccess - Enforce preconditions for touching registers + * @device: The device + * + * This function ensures that the correct lock is held and that the GPU + * clock is on immediately before a register is read or written. Note + * that this function does not check active_cnt because the registers + * must be accessed during device start and stop, when the active_cnt + * may legitimately be 0. + */ +void kgsl_pre_hwaccess(struct kgsl_device *device) +{ + /* In order to touch a register you must hold the device mutex */ + WARN_ON(!mutex_is_locked(&device->mutex)); + + /* + * A register access without device power will cause a fatal timeout. + * This is not valid for targets with a GMU. + */ + if (!gmu_core_gpmu_isenabled(device)) + WARN_ON(!kgsl_pwrctrl_isenabled(device)); +} + +static int kgsl_pwrctrl_enable(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int level, status; + + if (pwr->wakeup_maxpwrlevel) { + level = pwr->max_pwrlevel; + pwr->wakeup_maxpwrlevel = 0; + } else { + level = pwr->default_pwrlevel; + } + + kgsl_pwrctrl_pwrlevel_change(device, level); + + /* Order pwrrail/clk sequence based upon platform */ + status = kgsl_pwrctrl_pwrrail(device, true); + if (status) + return status; + kgsl_pwrctrl_clk(device, true, KGSL_STATE_ACTIVE); + kgsl_pwrctrl_axi(device, true); + + return device->ftbl->regulator_enable(device); +} + +void kgsl_pwrctrl_clear_l3_vote(struct kgsl_device *device) +{ + int status; + struct dcvs_freq freq = {0}; + + if (!device->num_l3_pwrlevels) + return; + + freq.hw_type = DCVS_L3; + + status = qcom_dcvs_update_votes(KGSL_L3_DEVICE, &freq, 1, + DCVS_SLOW_PATH); + if (!status) + device->cur_l3_pwrlevel = 0; + else + dev_err(device->dev, "Could not clear l3_vote: %d\n", + status); +} + +static void kgsl_pwrctrl_disable(struct kgsl_device *device) +{ + kgsl_pwrctrl_clear_l3_vote(device); + + /* Order pwrrail/clk sequence based upon platform */ + device->ftbl->regulator_disable(device); + kgsl_pwrctrl_axi(device, false); + kgsl_pwrctrl_clk(device, false, KGSL_STATE_SLUMBER); + kgsl_pwrctrl_pwrrail(device, false); +} + +static void +kgsl_pwrctrl_clk_set_options(struct kgsl_device *device, bool on) +{ + int i; + + for (i = 0; i < KGSL_MAX_CLKS; i++) + device->ftbl->clk_set_options(device, clocks[i], + device->pwrctrl.grp_clks[i], on); +} + +/** + * _init() - Get the GPU ready to start, but don't turn anything on + * @device - Pointer to the kgsl_device struct + */ +static int _init(struct kgsl_device *device) +{ + int status = 0; + + switch (device->state) { + case KGSL_STATE_MINBW: + fallthrough; + case KGSL_STATE_NAP: + del_timer_sync(&device->pwrctrl.minbw_timer); + /* Force power on to do the stop */ + status = kgsl_pwrctrl_enable(device); + fallthrough; + case KGSL_STATE_ACTIVE: + kgsl_pwrctrl_irq(device, false); + del_timer_sync(&device->idle_timer); + device->ftbl->stop(device); + fallthrough; + case KGSL_STATE_AWARE: + kgsl_pwrctrl_disable(device); + fallthrough; + case KGSL_STATE_SLUMBER: + fallthrough; + case KGSL_STATE_NONE: + kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT); + } + + return status; +} + +/** + * _wake() - Power up the GPU from a slumber state + * @device - Pointer to the kgsl_device struct + * + * Resume the GPU from a lower power state to ACTIVE. + */ +static int _wake(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int status = 0; + + switch (device->state) { + case KGSL_STATE_SUSPEND: + complete_all(&device->hwaccess_gate); + /* Call the GPU specific resume function */ + device->ftbl->resume(device); + fallthrough; + case KGSL_STATE_SLUMBER: + kgsl_pwrctrl_clk_set_options(device, true); + status = device->ftbl->start(device, + device->pwrctrl.superfast); + device->pwrctrl.superfast = false; + + if (status) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + dev_err(device->dev, "start failed %d\n", status); + break; + } + kgsl_pwrctrl_axi(device, true); + kgsl_pwrscale_wake(device); + kgsl_pwrctrl_irq(device, true); + trace_gpu_frequency( + pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq/1000, 0); + fallthrough; + case KGSL_STATE_MINBW: + kgsl_bus_update(device, KGSL_BUS_VOTE_ON); + fallthrough; + case KGSL_STATE_NAP: + /* Turn on the core clocks */ + kgsl_pwrctrl_clk(device, true, KGSL_STATE_ACTIVE); + + device->ftbl->deassert_gbif_halt(device); + pwr->last_stat_updated = ktime_get(); + /* + * No need to turn on/off irq here as it no longer affects + * power collapse + */ + kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); + + /* + * Change register settings if any after pwrlevel change. + * If there was dcvs level change during nap - call + * pre and post in the row after clock is enabled. + */ + kgsl_pwrctrl_pwrlevel_change_settings(device, 0); + kgsl_pwrctrl_pwrlevel_change_settings(device, 1); + /* All settings for power level transitions are complete*/ + pwr->previous_pwrlevel = pwr->active_pwrlevel; + kgsl_start_idle_timer(device); + del_timer_sync(&device->pwrctrl.minbw_timer); + break; + case KGSL_STATE_AWARE: + kgsl_pwrctrl_clk_set_options(device, true); + /* Enable state before turning on irq */ + kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); + kgsl_pwrctrl_irq(device, true); + kgsl_start_idle_timer(device); + del_timer_sync(&device->pwrctrl.minbw_timer); + break; + default: + dev_warn(device->dev, "unhandled state %s\n", + kgsl_pwrstate_to_str(device->state)); + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + status = -EINVAL; + break; + } + return status; +} + +/* + * _aware() - Put device into AWARE + * @device: Device pointer + * + * The GPU should be available for register reads/writes and able + * to communicate with the rest of the system. However disable all + * paths that allow a switch to an interrupt context (interrupts & + * timers). + * Return 0 on success else error code + */ +static int +_aware(struct kgsl_device *device) +{ + int status = 0; + + switch (device->state) { + case KGSL_STATE_INIT: + status = kgsl_pwrctrl_enable(device); + break; + /* The following 4 cases shouldn't occur, but don't panic. */ + case KGSL_STATE_MINBW: + fallthrough; + case KGSL_STATE_NAP: + status = _wake(device); + fallthrough; + case KGSL_STATE_ACTIVE: + kgsl_pwrctrl_irq(device, false); + del_timer_sync(&device->idle_timer); + break; + case KGSL_STATE_SLUMBER: + status = kgsl_pwrctrl_enable(device); + break; + default: + status = -EINVAL; + } + + if (!status) + kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE); + + return status; +} + +static int +_nap(struct kgsl_device *device) +{ + switch (device->state) { + case KGSL_STATE_ACTIVE: + if (!device->ftbl->is_hw_collapsible(device)) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + return -EBUSY; + } + + + /* + * Read HW busy counters before going to NAP state. + * The data might be used by power scale governors + * independently of the HW activity. For example + * the simple-on-demand governor will get the latest + * busy_time data even if the gpu isn't active. + */ + kgsl_pwrscale_update_stats(device); + + mod_timer(&device->pwrctrl.minbw_timer, jiffies + + msecs_to_jiffies(device->pwrctrl.minbw_timeout)); + + kgsl_pwrctrl_clk(device, false, KGSL_STATE_NAP); + kgsl_pwrctrl_set_state(device, KGSL_STATE_NAP); + fallthrough; + case KGSL_STATE_SLUMBER: + break; + case KGSL_STATE_AWARE: + dev_warn(device->dev, + "transition AWARE -> NAP is not permitted\n"); + fallthrough; + default: + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + break; + } + return 0; +} + +static int +_minbw(struct kgsl_device *device) +{ + switch (device->state) { + /* + * Device is expected to be clock gated to move to + * a deeper low power state. No other transition is + * permitted + */ + case KGSL_STATE_NAP: + kgsl_bus_update(device, KGSL_BUS_VOTE_MINIMUM); + kgsl_pwrctrl_set_state(device, KGSL_STATE_MINBW); + break; + default: + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + break; + } + return 0; +} + +static int +_slumber(struct kgsl_device *device) +{ + int status = 0; + + switch (device->state) { + case KGSL_STATE_ACTIVE: + if (!device->ftbl->is_hw_collapsible(device)) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + return -EBUSY; + } + fallthrough; + case KGSL_STATE_NAP: + fallthrough; + case KGSL_STATE_MINBW: + del_timer_sync(&device->pwrctrl.minbw_timer); + del_timer_sync(&device->idle_timer); + kgsl_pwrctrl_irq(device, false); + /* make sure power is on to stop the device*/ + status = kgsl_pwrctrl_enable(device); + device->ftbl->suspend_context(device); + device->ftbl->stop(device); + kgsl_pwrctrl_clk_set_options(device, false); + kgsl_pwrctrl_disable(device); + kgsl_pwrscale_sleep(device); + trace_gpu_frequency(0, 0); + kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); + break; + case KGSL_STATE_SUSPEND: + complete_all(&device->hwaccess_gate); + device->ftbl->resume(device); + kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); + break; + case KGSL_STATE_AWARE: + kgsl_pwrctrl_disable(device); + trace_gpu_frequency(0, 0); + kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); + break; + default: + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + break; + + } + return status; +} + +/* + * _suspend() - Put device into suspend + * @device: Device pointer + * + * Return 0 on success else error code + */ +static int _suspend(struct kgsl_device *device) +{ + int ret = 0; + + if ((device->state == KGSL_STATE_NONE) || + (device->state == KGSL_STATE_INIT) || + (device->state == KGSL_STATE_SUSPEND)) + return ret; + + /* + * drain to prevent from more commands being submitted + * and wait for it to go idle + */ + ret = device->ftbl->drain_and_idle(device); + if (ret) + goto err; + + ret = _slumber(device); + if (ret) + goto err; + + kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND); + return ret; + +err: + device->ftbl->resume(device); + dev_err(device->dev, "device failed to SUSPEND %d\n", ret); + return ret; +} + +/* + * kgsl_pwrctrl_change_state() changes the GPU state to the input + * @device: Pointer to a KGSL device + * @state: desired KGSL state + * + * Caller must hold the device mutex. If the requested state change + * is valid, execute it. Otherwise return an error code explaining + * why the change has not taken place. Also print an error if an + * unexpected state change failure occurs. For example, a change to + * NAP may be rejected because the GPU is busy, this is not an error. + * A change to SUSPEND should go through no matter what, so if it + * fails an additional error message will be printed to dmesg. + */ +int kgsl_pwrctrl_change_state(struct kgsl_device *device, int state) +{ + int status = 0; + + if (device->state == state) + return status; + kgsl_pwrctrl_request_state(device, state); + + /* Work through the legal state transitions */ + switch (state) { + case KGSL_STATE_INIT: + status = _init(device); + break; + case KGSL_STATE_AWARE: + status = _aware(device); + break; + case KGSL_STATE_ACTIVE: + status = _wake(device); + break; + case KGSL_STATE_NAP: + status = _nap(device); + break; + case KGSL_STATE_MINBW: + status = _minbw(device); + break; + case KGSL_STATE_SLUMBER: + status = _slumber(device); + break; + case KGSL_STATE_SUSPEND: + status = _suspend(device); + break; + default: + dev_err(device->dev, "bad state request 0x%x\n", state); + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + status = -EINVAL; + break; + } + + return status; +} + +static void kgsl_pwrctrl_set_state(struct kgsl_device *device, + unsigned int state) +{ + trace_kgsl_pwr_set_state(device, state); + device->state = state; + device->requested_state = KGSL_STATE_NONE; + + spin_lock(&device->submit_lock); + if (state == KGSL_STATE_SLUMBER || state == KGSL_STATE_SUSPEND) + device->slumber = true; + else + device->slumber = false; + spin_unlock(&device->submit_lock); +} + +void kgsl_pwrctrl_request_state(struct kgsl_device *device, + unsigned int state) +{ + if (state != KGSL_STATE_NONE && state != device->requested_state) + trace_kgsl_pwr_request_state(device, state); + device->requested_state = state; +} + +const char *kgsl_pwrstate_to_str(unsigned int state) +{ + switch (state) { + case KGSL_STATE_NONE: + return "NONE"; + case KGSL_STATE_INIT: + return "INIT"; + case KGSL_STATE_AWARE: + return "AWARE"; + case KGSL_STATE_ACTIVE: + return "ACTIVE"; + case KGSL_STATE_NAP: + return "NAP"; + case KGSL_STATE_MINBW: + return "MINBW"; + case KGSL_STATE_SUSPEND: + return "SUSPEND"; + case KGSL_STATE_SLUMBER: + return "SLUMBER"; + default: + break; + } + return "UNKNOWN"; +} + +static int _check_active_count(struct kgsl_device *device, int count) +{ + /* Return 0 if the active count is greater than the desired value */ + return atomic_read(&device->active_cnt) > count ? 0 : 1; +} + +int kgsl_active_count_wait(struct kgsl_device *device, int count, + unsigned long wait_jiffies) +{ + int result = 0; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EINVAL; + + while (atomic_read(&device->active_cnt) > count) { + long ret; + + mutex_unlock(&device->mutex); + ret = wait_event_timeout(device->active_cnt_wq, + _check_active_count(device, count), wait_jiffies); + mutex_lock(&device->mutex); + result = ret == 0 ? -ETIMEDOUT : 0; + if (!result) + wait_jiffies = ret; + else + break; + } + + return result; +} + +/** + * kgsl_pwrctrl_set_default_gpu_pwrlevel() - Set GPU to default power level + * @device: Pointer to the kgsl_device struct + */ +int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + unsigned int new_level = pwr->default_pwrlevel; + unsigned int old_level = pwr->active_pwrlevel; + + /* + * Update the level according to any thermal, + * max/min, or power constraints. + */ + new_level = kgsl_pwrctrl_adjust_pwrlevel(device, new_level); + + pwr->active_pwrlevel = new_level; + pwr->previous_pwrlevel = old_level; + + /* Request adjusted DCVS level */ + return device->ftbl->gpu_clock_set(device, pwr->active_pwrlevel); +} + +/** + * kgsl_pwrctrl_update_thermal_pwrlevel() - Update GPU thermal power level + * @device: Pointer to the kgsl_device struct + */ +void kgsl_pwrctrl_update_thermal_pwrlevel(struct kgsl_device *device) +{ + s32 qos_max_freq = dev_pm_qos_read_value(&device->pdev->dev, + DEV_PM_QOS_MAX_FREQUENCY); + int level = 0; + + if (qos_max_freq != PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE) { + level = _get_nearest_pwrlevel(&device->pwrctrl, + qos_max_freq * 1000); + if (level < 0) + return; + } + + if (level != device->pwrctrl.thermal_pwrlevel) { + trace_kgsl_thermal_constraint( + device->pwrctrl.pwrlevels[level].gpu_freq); + + device->pwrctrl.thermal_pwrlevel = level; + } +} + +int kgsl_gpu_num_freqs(void) +{ + struct kgsl_device *device = kgsl_get_device(0); + + if (!device) + return -ENODEV; + + return device->pwrctrl.num_pwrlevels; +} +EXPORT_SYMBOL(kgsl_gpu_num_freqs); + +int kgsl_gpu_stat(struct kgsl_gpu_freq_stat *stats, u32 numfreq) +{ + struct kgsl_device *device = kgsl_get_device(0); + struct kgsl_pwrctrl *pwr; + int i; + + if (!device) + return -ENODEV; + + pwr = &device->pwrctrl; + + if (!stats || (numfreq < pwr->num_pwrlevels)) + return -EINVAL; + + mutex_lock(&device->mutex); + kgsl_pwrscale_update_stats(device); + + for (i = 0; i < pwr->num_pwrlevels; i++) { + stats[i].freq = pwr->pwrlevels[i].gpu_freq; + stats[i].active_time = pwr->clock_times[i]; + stats[i].idle_time = pwr->time_in_pwrlevel[i] - pwr->clock_times[i]; + } + mutex_unlock(&device->mutex); + + return 0; +} +EXPORT_SYMBOL(kgsl_gpu_stat); diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h new file mode 100644 index 0000000000..925aceeef5 --- /dev/null +++ b/kgsl_pwrctrl.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_PWRCTRL_H +#define __KGSL_PWRCTRL_H + +#include +#include + +/***************************************************************************** + * power flags + ****************************************************************************/ +#define KGSL_MAX_CLKS 18 + +#define KGSL_MAX_PWRLEVELS 16 + +#define KGSL_PWRFLAGS_POWER_ON 0 +#define KGSL_PWRFLAGS_CLK_ON 1 +#define KGSL_PWRFLAGS_AXI_ON 2 +#define KGSL_PWRFLAGS_IRQ_ON 3 +#define KGSL_PWRFLAGS_NAP_OFF 5 + +/* Use to enable all the force power on states at once */ +#define KGSL_PWR_ON GENMASK(5, 0) + +/* Only two supported levels, min & max */ +#define KGSL_CONSTRAINT_PWR_MAXLEVELS 2 + +#define KGSL_XO_CLK_FREQ 19200000 +#define KGSL_ISENSE_CLK_FREQ 200000000 + +struct platform_device; +struct icc_path; + +struct kgsl_clk_stats { + unsigned int busy; + unsigned int total; + unsigned int busy_old; + unsigned int total_old; +}; + +struct kgsl_pwr_constraint { + unsigned int type; + unsigned int sub_type; + union { + struct { + unsigned int level; + } pwrlevel; + } hint; + unsigned long expires; + uint32_t owner_id; + u32 owner_timestamp; +}; + +/** + * struct kgsl_pwrlevel - Struct holding different pwrlevel info obtained from + * from dtsi file + * @gpu_freq: GPU frequency vote in Hz + * @bus_freq: Bus bandwidth vote index + * @bus_min: Min bus index @gpu_freq + * @bus_max: Max bus index @gpu_freq + */ +struct kgsl_pwrlevel { + unsigned int gpu_freq; + unsigned int bus_freq; + unsigned int bus_min; + unsigned int bus_max; + unsigned int acd_level; + /** @voltage_level: Voltage level used by the GMU to vote RPMh */ + u32 voltage_level; +}; + +/** + * struct kgsl_pwrctrl - Power control settings for a KGSL device + * @interrupt_num - The interrupt number for the device + * @grp_clks - Array of clocks structures that we control + * @power_flags - Control flags for power + * @pwrlevels - List of supported power levels + * @active_pwrlevel - The currently active power level + * @previous_pwrlevel - The power level before transition + * @thermal_pwrlevel - maximum powerlevel constraint from thermal + * @thermal_pwrlevel_floor - minimum powerlevel constraint from thermal + * @default_pwrlevel - device wake up power level + * @max_pwrlevel - maximum allowable powerlevel per the user + * @min_pwrlevel - minimum allowable powerlevel per the user + * @num_pwrlevels - number of available power levels + * @throttle_mask - LM throttle mask + * @interval_timeout - timeout to be idle before a power event + * @clock_times - Each GPU frequency's accumulated active time in us + * @clk_stats - structure of clock statistics + * @input_disable - To disable GPU wakeup on touch input event + * @bus_control - true if the bus calculation is independent + * @bus_mod - modifier from the current power level for the bus vote + * @bus_percent_ab - current percent of total possible bus usage + * @bus_width - target specific bus width in number of bytes + * @bus_ab_mbytes - AB vote in Mbytes for current bus usage + * @constraint - currently active power constraint + * @superfast - Boolean flag to indicate that the GPU start should be run in the + * higher priority thread + * isense_clk_indx - index of isense clock, 0 if no isense + * isense_clk_on_level - isense clock rate is XO rate below this level. + */ + +struct kgsl_pwrctrl { + int interrupt_num; + struct clk *grp_clks[KGSL_MAX_CLKS]; + struct clk *gpu_bimc_int_clk; + /** @cx_gdsc: Pointer to the CX domain regulator if applicable */ + struct regulator *cx_gdsc; + /** @gx_gdsc: Pointer to the GX domain regulator if applicable */ + struct regulator *gx_gdsc; + /** @gx_gdsc: Pointer to the GX domain parent supply */ + struct regulator *gx_gdsc_parent; + /** @gx_gdsc_parent_min_corner: Minimum supply voltage for GX parent */ + u32 gx_gdsc_parent_min_corner; + int isense_clk_indx; + int isense_clk_on_level; + unsigned long power_flags; + unsigned long ctrl_flags; + struct kgsl_pwrlevel pwrlevels[KGSL_MAX_PWRLEVELS]; + unsigned int active_pwrlevel; + unsigned int previous_pwrlevel; + unsigned int thermal_pwrlevel; + unsigned int thermal_pwrlevel_floor; + unsigned int default_pwrlevel; + unsigned int wakeup_maxpwrlevel; + unsigned int max_pwrlevel; + unsigned int min_pwrlevel; + unsigned int num_pwrlevels; + unsigned int throttle_mask; + u32 interval_timeout; + u64 clock_times[KGSL_MAX_PWRLEVELS]; + struct kgsl_clk_stats clk_stats; + bool bus_control; + int bus_mod; + unsigned int bus_percent_ab; + unsigned int bus_width; + unsigned long bus_ab_mbytes; + /** @ddr_table: List of the DDR bandwidths in KBps for the target */ + u32 *ddr_table; + /** @ddr_table_count: Number of objects in @ddr_table */ + int ddr_table_count; + /** cur_buslevel: The last buslevel voted by the driver */ + int cur_buslevel; + /** @bus_max: The maximum bandwidth available to the device */ + unsigned long bus_max; + struct kgsl_pwr_constraint constraint; + bool superfast; + unsigned int gpu_bimc_int_clk_freq; + bool gpu_bimc_interface_enabled; + /** @icc_path: Interconnect path for the GPU (if applicable) */ + struct icc_path *icc_path; + /** cur_ab: The last ab voted by the driver */ + u32 cur_ab; + /** @minbw_timer - Timer struct for entering minimum bandwidth state */ + struct timer_list minbw_timer; + /** @minbw_timeout - Timeout for entering minimum bandwidth state */ + u32 minbw_timeout; + /** @sysfs_thermal_req - PM QoS maximum frequency request from user (via sysfs) */ + struct dev_pm_qos_request sysfs_thermal_req; + /** @time_in_pwrlevel: Each pwrlevel active duration in usec */ + u64 time_in_pwrlevel[KGSL_MAX_PWRLEVELS]; + /** @last_stat_updated: The last time stats were updated */ + ktime_t last_stat_updated; +}; + +int kgsl_pwrctrl_init(struct kgsl_device *device); +void kgsl_pwrctrl_close(struct kgsl_device *device); +void kgsl_timer(struct timer_list *t); +void kgsl_pre_hwaccess(struct kgsl_device *device); +void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device, + unsigned int level); +int kgsl_pwrctrl_init_sysfs(struct kgsl_device *device); +int kgsl_pwrctrl_change_state(struct kgsl_device *device, int state); + +unsigned int kgsl_pwrctrl_adjust_pwrlevel(struct kgsl_device *device, + unsigned int new_level); + +/* + * kgsl_pwrctrl_active_freq - get currently configured frequency + * @pwr: kgsl_pwrctrl structure for the device + * + * Returns the currently configured frequency for the device. + */ +static inline unsigned long +kgsl_pwrctrl_active_freq(struct kgsl_pwrctrl *pwr) +{ + return pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq; +} + +/** + * kgsl_active_count_wait() - Wait for activity to finish. + * @device: Pointer to a KGSL device + * @count: Active count value to wait for + * @wait_jiffies: Jiffies to wait + * + * Block until the active_cnt value hits the desired value + */ +int kgsl_active_count_wait(struct kgsl_device *device, int count, + unsigned long wait_jiffies); +void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy); + +/** + * kgsl_pwrctrl_set_constraint() - Validate and change enforced constraint + * @device: Pointer to the kgsl_device struct + * @pwrc: Pointer to requested constraint + * @id: Context id which owns the constraint + * @ts: The timestamp for which this constraint is enforced + * + * Accept the new constraint if no previous constraint existed or if the + * new constraint is faster than the previous one. If the new and previous + * constraints are equal, update the timestamp and ownership to make sure + * the constraint expires at the correct time. + */ +void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, + struct kgsl_pwr_constraint *pwrc, u32 id, u32 ts); +int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device); +void kgsl_pwrctrl_update_thermal_pwrlevel(struct kgsl_device *device); + +/** + * kgsl_pwrctrl_request_state - Request a specific power state + * @device: Pointer to the kgsl device + * @state: Power state requested + */ +void kgsl_pwrctrl_request_state(struct kgsl_device *device, u32 state); + +/** + * kgsl_pwrctrl_axi - Propagate bus votes during slumber entry and exit + * @device: Pointer to the kgsl device + * @state: Whether we are going to slumber or coming out of slumber + * + * This function will propagate the default bus vote when coming out of + * slumber and set bus bandwidth to 0 when going into slumber + * + * Return: 0 on success or negative error on failure + */ +int kgsl_pwrctrl_axi(struct kgsl_device *device, bool state); + +/** + * kgsl_idle_check - kgsl idle function + * @work: work item being run by the function + * + * This function is called for work that is queued by the interrupt + * handler or the idle timer. It attempts to transition to a clocks + * off state if the active_cnt is 0 and the hardware is idle. + */ +void kgsl_idle_check(struct work_struct *work); + +/** + * kgsl_pwrctrl_irq - Enable or disable gpu interrupts + * @device: Handle to the kgsl device + * @state: Variable to decide whether interrupts need to be enabled or disabled + * + */ +void kgsl_pwrctrl_irq(struct kgsl_device *device, bool state); + +/** + * kgsl_pwrctrl_clear_l3_vote - Relinquish l3 vote + * @device: Handle to the kgsl device + * + * Clear the l3 vote when going into slumber + */ +void kgsl_pwrctrl_clear_l3_vote(struct kgsl_device *device); +#endif /* __KGSL_PWRCTRL_H */ diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c new file mode 100644 index 0000000000..c7d0ff0d66 --- /dev/null +++ b/kgsl_pwrscale.c @@ -0,0 +1,805 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "kgsl_bus.h" +#include "kgsl_device.h" +#include "kgsl_pwrscale.h" +#include "kgsl_trace.h" + +static struct devfreq_msm_adreno_tz_data adreno_tz_data = { + .bus = { + .max = 350, + .floating = true, + }, + .mod_percent = 100, +}; + +static void do_devfreq_suspend(struct work_struct *work); +static void do_devfreq_resume(struct work_struct *work); +static void do_devfreq_notify(struct work_struct *work); + +/* + * These variables are used to keep the latest data + * returned by kgsl_devfreq_get_dev_status + */ +static struct xstats last_xstats; +static struct devfreq_dev_status last_status = { .private_data = &last_xstats }; + +/* + * kgsl_pwrscale_sleep - notify governor that device is going off + * @device: The device + * + * Called shortly after all pending work is completed. + */ +void kgsl_pwrscale_sleep(struct kgsl_device *device) +{ + if (!device->pwrscale.enabled) + return; + device->pwrscale.on_time = 0; + + /* to call devfreq_suspend_device() from a kernel thread */ + queue_work(device->pwrscale.devfreq_wq, + &device->pwrscale.devfreq_suspend_ws); +} + +/* + * kgsl_pwrscale_wake - notify governor that device is going on + * @device: The device + * + * Called when the device is returning to an active state. + */ +void kgsl_pwrscale_wake(struct kgsl_device *device) +{ + struct kgsl_power_stats stats; + struct kgsl_pwrscale *psc = &device->pwrscale; + + if (!device->pwrscale.enabled) + return; + /* clear old stats before waking */ + memset(&psc->accum_stats, 0, sizeof(psc->accum_stats)); + memset(&last_xstats, 0, sizeof(last_xstats)); + + /* and any hw activity from waking up*/ + device->ftbl->power_stats(device, &stats); + + psc->time = ktime_get(); + + psc->next_governor_call = ktime_add_us(psc->time, + KGSL_GOVERNOR_CALL_INTERVAL); + + /* to call devfreq_resume_device() from a kernel thread */ + queue_work(psc->devfreq_wq, &psc->devfreq_resume_ws); +} + +/* + * kgsl_pwrscale_busy - update pwrscale state for new work + * @device: The device + * + * Called when new work is submitted to the device. + * This function must be called with the device mutex locked. + */ +void kgsl_pwrscale_busy(struct kgsl_device *device) +{ + if (!device->pwrscale.enabled) + return; + if (device->pwrscale.on_time == 0) + device->pwrscale.on_time = ktime_to_us(ktime_get()); +} + +/** + * kgsl_pwrscale_update_stats() - update device busy statistics + * @device: The device + * + * Read hardware busy counters and accumulate the results. + */ +void kgsl_pwrscale_update_stats(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwrctrl = &device->pwrctrl; + struct kgsl_pwrscale *psc = &device->pwrscale; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (!psc->enabled) + return; + + if (device->state == KGSL_STATE_ACTIVE) { + struct kgsl_power_stats stats; + ktime_t cur_time = ktime_get(); + + device->ftbl->power_stats(device, &stats); + device->pwrscale.accum_stats.busy_time += stats.busy_time; + device->pwrscale.accum_stats.ram_time += stats.ram_time; + device->pwrscale.accum_stats.ram_wait += stats.ram_wait; + pwrctrl->clock_times[pwrctrl->active_pwrlevel] += + stats.busy_time; + pwrctrl->time_in_pwrlevel[pwrctrl->active_pwrlevel] += + ktime_us_delta(cur_time, pwrctrl->last_stat_updated); + pwrctrl->last_stat_updated = cur_time; + } +} + +/** + * kgsl_pwrscale_update() - update device busy statistics + * @device: The device + * + * If enough time has passed schedule the next call to devfreq + * get_dev_status. + */ +void kgsl_pwrscale_update(struct kgsl_device *device) +{ + ktime_t t; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (!device->pwrscale.enabled) + return; + + t = ktime_get(); + if (ktime_compare(t, device->pwrscale.next_governor_call) < 0) + return; + + device->pwrscale.next_governor_call = ktime_add_us(t, + KGSL_GOVERNOR_CALL_INTERVAL); + + /* to call update_devfreq() from a kernel thread */ + if (device->state != KGSL_STATE_SLUMBER) + queue_work(device->pwrscale.devfreq_wq, + &device->pwrscale.devfreq_notify_ws); +} + +/* + * kgsl_pwrscale_disable - temporarily disable the governor + * @device: The device + * @turbo: Indicates if pwrlevel should be forced to turbo + * + * Temporarily disable the governor, to prevent interference + * with profiling tools that expect a fixed clock frequency. + * This function must be called with the device mutex locked. + */ +void kgsl_pwrscale_disable(struct kgsl_device *device, bool turbo) +{ + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (device->pwrscale.devfreqptr) + queue_work(device->pwrscale.devfreq_wq, + &device->pwrscale.devfreq_suspend_ws); + device->pwrscale.enabled = false; + if (turbo) + kgsl_pwrctrl_pwrlevel_change(device, 0); +} + +/* + * kgsl_pwrscale_enable - re-enable the governor + * @device: The device + * + * Reenable the governor after a kgsl_pwrscale_disable() call. + * This function must be called with the device mutex locked. + */ +void kgsl_pwrscale_enable(struct kgsl_device *device) +{ + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (device->pwrscale.devfreqptr) { + queue_work(device->pwrscale.devfreq_wq, + &device->pwrscale.devfreq_resume_ws); + device->pwrscale.enabled = true; + } else { + /* + * Don't enable it if devfreq is not set and let the device + * run at default level; + */ + kgsl_pwrctrl_pwrlevel_change(device, + device->pwrctrl.default_pwrlevel); + device->pwrscale.enabled = false; + } +} + +#ifdef DEVFREQ_FLAG_WAKEUP_MAXFREQ +static inline bool _check_maxfreq(u32 flags) +{ + return (flags & DEVFREQ_FLAG_WAKEUP_MAXFREQ); +} +#else +static inline bool _check_maxfreq(u32 flags) +{ + return false; +} +#endif + +/* + * kgsl_devfreq_target - devfreq_dev_profile.target callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This is a devfreq callback function for dcvs recommendations and + * thermal constraints. If any thermal constraints are present, + * devfreq adjusts the gpu frequency range to cap the max frequency + * thereby not recommending anything above the constraint. + * This function expects the device mutex to be unlocked. + */ +int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr; + int level; + unsigned int i; + unsigned long cur_freq, rec_freq; + + if (device == NULL) + return -ENODEV; + if (freq == NULL) + return -EINVAL; + if (!device->pwrscale.devfreq_enabled) + return -EPROTO; + + pwr = &device->pwrctrl; + + if (_check_maxfreq(flags)) { + /* + * The GPU is about to get suspended, + * but it needs to be at the max power level when waking up + */ + pwr->wakeup_maxpwrlevel = 1; + return 0; + } + + rec_freq = *freq; + + mutex_lock(&device->mutex); + cur_freq = kgsl_pwrctrl_active_freq(pwr); + level = pwr->active_pwrlevel; + + kgsl_pwrctrl_update_thermal_pwrlevel(device); + + /* If the governor recommends a new frequency, update it here */ + if (rec_freq != cur_freq) { + for (i = 0; i < pwr->num_pwrlevels; i++) + if (rec_freq == pwr->pwrlevels[i].gpu_freq) { + level = i; + break; + } + if (level != pwr->active_pwrlevel) + kgsl_pwrctrl_pwrlevel_change(device, level); + } + + *freq = kgsl_pwrctrl_active_freq(pwr); + + mutex_unlock(&device->mutex); + return 0; +} + +/* + * kgsl_devfreq_get_dev_status - devfreq_dev_profile.get_dev_status callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This function expects the device mutex to be unlocked. + */ +int kgsl_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwrctrl; + struct kgsl_pwrscale *pwrscale; + ktime_t tmp1, tmp2; + + if (device == NULL) + return -ENODEV; + if (stat == NULL) + return -EINVAL; + if (!device->pwrscale.devfreq_enabled) + return -EPROTO; + + pwrscale = &device->pwrscale; + pwrctrl = &device->pwrctrl; + + mutex_lock(&device->mutex); + + tmp1 = ktime_get(); + /* + * If the GPU clock is on grab the latest power counter + * values. Otherwise the most recent ACTIVE values will + * already be stored in accum_stats. + */ + kgsl_pwrscale_update_stats(device); + + tmp2 = ktime_get(); + stat->total_time = ktime_us_delta(tmp2, pwrscale->time); + pwrscale->time = tmp1; + + stat->busy_time = pwrscale->accum_stats.busy_time; + + stat->current_frequency = kgsl_pwrctrl_active_freq(&device->pwrctrl); + + stat->private_data = &device->active_context_count; + + /* + * keep the latest devfreq_dev_status values + * and vbif counters data + * to be (re)used by kgsl_busmon_get_dev_status() + */ + if (pwrctrl->bus_control) { + struct xstats *last_b = + (struct xstats *)last_status.private_data; + + last_status.total_time = stat->total_time; + last_status.busy_time = stat->busy_time; + last_status.current_frequency = stat->current_frequency; + + last_b->ram_time = device->pwrscale.accum_stats.ram_time; + last_b->ram_wait = device->pwrscale.accum_stats.ram_wait; + last_b->buslevel = device->pwrctrl.cur_buslevel; + } + + kgsl_pwrctrl_busy_time(device, stat->total_time, stat->busy_time); + trace_kgsl_pwrstats(device, stat->total_time, + &pwrscale->accum_stats, device->active_context_count); + memset(&pwrscale->accum_stats, 0, sizeof(pwrscale->accum_stats)); + + mutex_unlock(&device->mutex); + + return 0; +} + +/* + * kgsl_devfreq_get_cur_freq - devfreq_dev_profile.get_cur_freq callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This function expects the device mutex to be unlocked. + */ +int kgsl_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + if (device == NULL) + return -ENODEV; + if (freq == NULL) + return -EINVAL; + if (!device->pwrscale.devfreq_enabled) + return -EPROTO; + + mutex_lock(&device->mutex); + *freq = kgsl_pwrctrl_active_freq(&device->pwrctrl); + mutex_unlock(&device->mutex); + + return 0; +} + +/* + * kgsl_busmon_get_dev_status - devfreq_dev_profile.get_dev_status callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This function expects the device mutex to be unlocked. + */ +int kgsl_busmon_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + struct xstats *b; + struct kgsl_device *device = dev_get_drvdata(dev); + + if (!device->pwrscale.devfreq_enabled) + return -EPROTO; + + stat->total_time = last_status.total_time; + stat->busy_time = last_status.busy_time; + stat->current_frequency = last_status.current_frequency; + + if (stat->private_data) { + struct xstats *last_b = + (struct xstats *)last_status.private_data; + b = (struct xstats *)stat->private_data; + b->ram_time = last_b->ram_time; + b->ram_wait = last_b->ram_wait; + b->buslevel = last_b->buslevel; + } + return 0; +} + +#ifdef DEVFREQ_FLAG_FAST_HINT +static inline bool _check_fast_hint(u32 flags) +{ + return (flags & DEVFREQ_FLAG_FAST_HINT); +} +#else +static inline bool _check_fast_hint(u32 flags) +{ + return false; +} +#endif + +#ifdef DEVFREQ_FLAG_SLOW_HINT +static inline bool _check_slow_hint(u32 flags) +{ + return (flags & DEVFREQ_FLAG_SLOW_HINT); +} +#else +static inline bool _check_slow_hint(u32 flags) +{ + return false; +} +#endif + +/* + * kgsl_busmon_target - devfreq_dev_profile.target callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This function expects the device mutex to be unlocked. + */ +int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr; + struct kgsl_pwrlevel *pwr_level; + int level, b; + u32 bus_flag; + unsigned long ab_mbytes; + + if (device == NULL) + return -ENODEV; + if (freq == NULL) + return -EINVAL; + if (!device->pwrscale.enabled) + return 0; + if (!device->pwrscale.devfreq_enabled) + return -EPROTO; + + pwr = &device->pwrctrl; + + if (!pwr->bus_control) + return 0; + + mutex_lock(&device->mutex); + level = pwr->active_pwrlevel; + pwr_level = &pwr->pwrlevels[level]; + bus_flag = device->pwrscale.bus_profile.flag; + device->pwrscale.bus_profile.flag = 0; + ab_mbytes = device->pwrscale.bus_profile.ab_mbytes; + + /* + * Bus devfreq governor has calculated its recomendations + * when gpu was running with *freq frequency. + * If the gpu frequency is different now it's better to + * ignore the call + */ + if (pwr_level->gpu_freq != *freq) { + mutex_unlock(&device->mutex); + return 0; + } + + b = pwr->bus_mod; + if (_check_fast_hint(bus_flag)) + pwr->bus_mod++; + else if (_check_slow_hint(bus_flag)) + pwr->bus_mod--; + + /* trim calculated change to fit range */ + if (pwr_level->bus_freq + pwr->bus_mod < pwr_level->bus_min) + pwr->bus_mod = -(pwr_level->bus_freq - pwr_level->bus_min); + else if (pwr_level->bus_freq + pwr->bus_mod > pwr_level->bus_max) + pwr->bus_mod = pwr_level->bus_max - pwr_level->bus_freq; + + /* Update bus vote if AB or IB is modified */ + if ((pwr->bus_mod != b) || (pwr->bus_ab_mbytes != ab_mbytes)) { + pwr->bus_percent_ab = device->pwrscale.bus_profile.percent_ab; + pwr->bus_ab_mbytes = ab_mbytes; + kgsl_bus_update(device, KGSL_BUS_VOTE_ON); + } + + mutex_unlock(&device->mutex); + return 0; +} + +int kgsl_busmon_get_cur_freq(struct device *dev, unsigned long *freq) +{ + return 0; +} + +static void pwrscale_busmon_create(struct kgsl_device *device, + struct platform_device *pdev, unsigned long *table) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrscale *pwrscale = &device->pwrscale; + struct device *dev = &pwrscale->busmondev; + struct msm_busmon_extended_profile *bus_profile; + struct devfreq *bus_devfreq; + int i, ret; + + bus_profile = &pwrscale->bus_profile; + bus_profile->private_data = &adreno_tz_data; + + bus_profile->profile.target = kgsl_busmon_target; + bus_profile->profile.get_dev_status = kgsl_busmon_get_dev_status; + bus_profile->profile.get_cur_freq = kgsl_busmon_get_cur_freq; + + bus_profile->profile.max_state = pwr->num_pwrlevels; + bus_profile->profile.freq_table = table; + + dev->parent = &pdev->dev; + + dev_set_name(dev, "kgsl-busmon"); + dev_set_drvdata(dev, device); + if (device_register(dev)) + return; + + /* Build out the OPP table for the busmon device */ + for (i = 0; i < pwr->num_pwrlevels; i++) { + if (!pwr->pwrlevels[i].gpu_freq) + continue; + + dev_pm_opp_add(dev, pwr->pwrlevels[i].gpu_freq, 0); + } + + ret = devfreq_gpubw_init(); + if (ret) { + dev_err(&pdev->dev, "Failed to add busmon governor: %d\n", ret); + put_device(dev); + return; + } + + bus_devfreq = devfreq_add_device(dev, &pwrscale->bus_profile.profile, + "gpubw_mon", NULL); + + if (IS_ERR_OR_NULL(bus_devfreq)) { + dev_err(&pdev->dev, "Bus scaling not enabled\n"); + devfreq_gpubw_exit(); + put_device(dev); + return; + } + + pwrscale->bus_devfreq = bus_devfreq; +} + +static void pwrscale_of_get_ca_target_pwrlevel(struct kgsl_device *device, + struct device_node *node) +{ + u32 pwrlevel = 1; + + of_property_read_u32(node, "qcom,ca-target-pwrlevel", &pwrlevel); + + if (pwrlevel >= device->pwrctrl.num_pwrlevels) + pwrlevel = 1; + + device->pwrscale.ctxt_aware_target_pwrlevel = pwrlevel; +} + +/* Get context aware properties */ +static void pwrscale_of_ca_aware(struct kgsl_device *device) +{ + struct kgsl_pwrscale *pwrscale = &device->pwrscale; + struct device_node *parent = device->pdev->dev.of_node; + struct device_node *node, *child; + + pwrscale->ctxt_aware_enable = + of_property_read_bool(parent, "qcom,enable-ca-jump"); + + if (!pwrscale->ctxt_aware_enable) + return; + + pwrscale->ctxt_aware_busy_penalty = 12000; + of_property_read_u32(parent, "qcom,ca-busy-penalty", + &pwrscale->ctxt_aware_busy_penalty); + + + pwrscale->ctxt_aware_target_pwrlevel = 1; + + node = of_find_node_by_name(parent, "qcom,gpu-pwrlevel-bins"); + if (node == NULL) { + pwrscale_of_get_ca_target_pwrlevel(device, parent); + return; + } + + for_each_child_of_node(node, child) { + u32 bin; + + if (of_property_read_u32(child, "qcom,speed-bin", &bin)) + continue; + + if (bin == device->speed_bin) { + pwrscale_of_get_ca_target_pwrlevel(device, child); + of_node_put(child); + break; + } + } + + of_node_put(node); +} + +int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, + const char *governor) +{ + struct kgsl_pwrscale *pwrscale = &device->pwrscale; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct devfreq *devfreq; + struct msm_adreno_extended_profile *gpu_profile; + int i, ret; + + pwrscale->enabled = true; + + gpu_profile = &pwrscale->gpu_profile; + gpu_profile->private_data = &adreno_tz_data; + + gpu_profile->profile.target = kgsl_devfreq_target; + gpu_profile->profile.get_dev_status = kgsl_devfreq_get_dev_status; + gpu_profile->profile.get_cur_freq = kgsl_devfreq_get_cur_freq; + + gpu_profile->profile.initial_freq = + pwr->pwrlevels[pwr->default_pwrlevel].gpu_freq; + + gpu_profile->profile.polling_ms = 10; + + pwrscale_of_ca_aware(device); + + for (i = 0; i < pwr->num_pwrlevels; i++) + pwrscale->freq_table[i] = pwr->pwrlevels[i].gpu_freq; + + /* + * Max_state is the number of valid power levels. + * The valid power levels range from 0 - (max_state - 1) + */ + gpu_profile->profile.max_state = pwr->num_pwrlevels; + /* link storage array to the devfreq profile pointer */ + gpu_profile->profile.freq_table = pwrscale->freq_table; + + /* if there is only 1 freq, no point in running a governor */ + if (gpu_profile->profile.max_state == 1) + governor = "performance"; + + /* initialize msm-adreno-tz governor specific data here */ + adreno_tz_data.disable_busy_time_burst = + of_property_read_bool(pdev->dev.of_node, + "qcom,disable-busy-time-burst"); + + if (pwrscale->ctxt_aware_enable) { + adreno_tz_data.ctxt_aware_enable = pwrscale->ctxt_aware_enable; + adreno_tz_data.bin.ctxt_aware_target_pwrlevel = + pwrscale->ctxt_aware_target_pwrlevel; + adreno_tz_data.bin.ctxt_aware_busy_penalty = + pwrscale->ctxt_aware_busy_penalty; + } + + /* + * If there is a separate GX power rail, allow + * independent modification to its voltage through + * the bus bandwidth vote. + */ + if (pwr->bus_control) { + adreno_tz_data.bus.num = pwr->ddr_table_count; + adreno_tz_data.bus.ib_kbps = pwr->ddr_table; + adreno_tz_data.bus.width = pwr->bus_width; + + if (!kgsl_of_property_read_ddrtype(device->pdev->dev.of_node, + "qcom,bus-accesses", &adreno_tz_data.bus.max)) + adreno_tz_data.bus.floating = false; + } + + pwrscale->devfreq_wq = create_freezable_workqueue("kgsl_devfreq_wq"); + if (!pwrscale->devfreq_wq) { + dev_err(device->dev, "Failed to allocate kgsl devfreq workqueue\n"); + device->pwrscale.enabled = false; + return -ENOMEM; + } + + ret = msm_adreno_tz_init(); + if (ret) { + dev_err(device->dev, "Failed to add adreno tz governor: %d\n", ret); + device->pwrscale.enabled = false; + return ret; + } + + devfreq = devfreq_add_device(&pdev->dev, &gpu_profile->profile, + governor, &adreno_tz_data); + if (IS_ERR(devfreq)) { + device->pwrscale.enabled = false; + msm_adreno_tz_exit(); + return PTR_ERR(devfreq); + } + + pwrscale->devfreqptr = devfreq; + pwrscale->cooling_dev = of_devfreq_cooling_register(pdev->dev.of_node, + devfreq); + if (IS_ERR(pwrscale->cooling_dev)) + pwrscale->cooling_dev = NULL; + + if (adreno_tz_data.bus.num) + pwrscale_busmon_create(device, pdev, pwrscale->freq_table); + + WARN_ON(sysfs_create_link(&device->dev->kobj, + &devfreq->dev.kobj, "devfreq")); + + INIT_WORK(&pwrscale->devfreq_suspend_ws, do_devfreq_suspend); + INIT_WORK(&pwrscale->devfreq_resume_ws, do_devfreq_resume); + INIT_WORK(&pwrscale->devfreq_notify_ws, do_devfreq_notify); + + pwrscale->next_governor_call = ktime_add_us(ktime_get(), + KGSL_GOVERNOR_CALL_INTERVAL); + + return 0; +} + +/* + * kgsl_pwrscale_close - clean up pwrscale + * @device: the device + * + * This function should be called with the device mutex locked. + */ +void kgsl_pwrscale_close(struct kgsl_device *device) +{ + struct kgsl_pwrscale *pwrscale; + struct kgsl_pwrctrl *pwr; + + pwr = &device->pwrctrl; + pwrscale = &device->pwrscale; + + if (pwrscale->bus_devfreq) { + devfreq_remove_device(pwrscale->bus_devfreq); + pwrscale->bus_devfreq = NULL; + put_device(&pwrscale->busmondev); + devfreq_gpubw_exit(); + } + + if (!pwrscale->devfreqptr) + return; + if (pwrscale->cooling_dev) + devfreq_cooling_unregister(pwrscale->cooling_dev); + + if (pwrscale->devfreq_wq) { + flush_workqueue(pwrscale->devfreq_wq); + destroy_workqueue(pwrscale->devfreq_wq); + pwrscale->devfreq_wq = NULL; + } + + devfreq_remove_device(device->pwrscale.devfreqptr); + device->pwrscale.devfreqptr = NULL; + msm_adreno_tz_exit(); +} + +static void do_devfreq_suspend(struct work_struct *work) +{ + struct kgsl_pwrscale *pwrscale = container_of(work, + struct kgsl_pwrscale, devfreq_suspend_ws); + + devfreq_suspend_device(pwrscale->devfreqptr); + devfreq_suspend_device(pwrscale->bus_devfreq); +} + +static void do_devfreq_resume(struct work_struct *work) +{ + struct kgsl_pwrscale *pwrscale = container_of(work, + struct kgsl_pwrscale, devfreq_resume_ws); + + devfreq_resume_device(pwrscale->devfreqptr); + devfreq_resume_device(pwrscale->bus_devfreq); +} + +static void do_devfreq_notify(struct work_struct *work) +{ + struct kgsl_pwrscale *pwrscale = container_of(work, + struct kgsl_pwrscale, devfreq_notify_ws); + + mutex_lock(&pwrscale->devfreqptr->lock); + update_devfreq(pwrscale->devfreqptr); + mutex_unlock(&pwrscale->devfreqptr->lock); + + if (pwrscale->bus_devfreq) { + mutex_lock(&pwrscale->bus_devfreq->lock); + update_devfreq(pwrscale->bus_devfreq); + mutex_unlock(&pwrscale->bus_devfreq->lock); + } +} diff --git a/kgsl_pwrscale.h b/kgsl_pwrscale.h new file mode 100644 index 0000000000..2bdc9db0d9 --- /dev/null +++ b/kgsl_pwrscale.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef __KGSL_PWRSCALE_H +#define __KGSL_PWRSCALE_H + +#include "kgsl_pwrctrl.h" +#include "msm_adreno_devfreq.h" + +/* devfreq governor call window in usec */ +#define KGSL_GOVERNOR_CALL_INTERVAL 10000 + +struct kgsl_power_stats { + u64 busy_time; + u64 ram_time; + u64 ram_wait; +}; + +/** + * struct kgsl_pwrscale - Power scaling settings for a KGSL device + * @devfreqptr - Pointer to the devfreq device + * @gpu_profile - GPU profile data for the devfreq device + * @bus_profile - Bus specific data for the bus devfreq device + * @freq_table - GPU frequencies for the DCVS algorithm + * @last_governor - Prior devfreq governor + * @accum_stats - Accumulated statistics for various frequency calculations + * @enabled - Whether or not power scaling is enabled + * @time - Last submitted sample timestamp + * @on_time - Timestamp when gpu busy begins + * @devfreq_wq - Main devfreq workqueue + * @devfreq_suspend_ws - Pass device suspension to devfreq + * @devfreq_resume_ws - Pass device resume to devfreq + * @devfreq_notify_ws - Notify devfreq to update sampling + * @next_governor_call - Timestamp after which the governor may be notified of + * a new sample + * @cooling_dev - Thermal cooling device handle + * @ctxt_aware_enable - Whether or not ctxt aware DCVS feature is enabled + * @ctxt_aware_busy_penalty - The time in microseconds required to trigger + * ctxt aware power level jump + * @ctxt_aware_target_pwrlevel - pwrlevel to jump on in case of ctxt aware + * power level jump + */ +struct kgsl_pwrscale { + struct devfreq *devfreqptr; + struct msm_adreno_extended_profile gpu_profile; + struct msm_busmon_extended_profile bus_profile; + unsigned long freq_table[KGSL_MAX_PWRLEVELS]; + char last_governor[DEVFREQ_NAME_LEN]; + struct kgsl_power_stats accum_stats; + bool enabled; + ktime_t time; + s64 on_time; + struct workqueue_struct *devfreq_wq; + struct work_struct devfreq_suspend_ws; + struct work_struct devfreq_resume_ws; + struct work_struct devfreq_notify_ws; + ktime_t next_governor_call; + struct thermal_cooling_device *cooling_dev; + bool ctxt_aware_enable; + unsigned int ctxt_aware_target_pwrlevel; + unsigned int ctxt_aware_busy_penalty; + /** @busmondev: A child device for the busmon governor */ + struct device busmondev; + /** @bus_devfreq: Pointer to the bus devfreq device */ + struct devfreq *bus_devfreq; + /** @devfreq_enabled: Whether or not devfreq is enabled */ + bool devfreq_enabled; +}; + +/** + * kgsl_pwrscale_init - Initialize the pwrscale subsystem + * @device: A GPU device handle + * @pdev: A pointer to the GPU platform device + * @governor: default devfreq governor to use for GPU frequency scaling + * + * Return: 0 on success or negative on failure + */ +int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, + const char *governor); +void kgsl_pwrscale_close(struct kgsl_device *device); + +void kgsl_pwrscale_update(struct kgsl_device *device); +void kgsl_pwrscale_update_stats(struct kgsl_device *device); +void kgsl_pwrscale_busy(struct kgsl_device *device); +void kgsl_pwrscale_sleep(struct kgsl_device *device); +void kgsl_pwrscale_wake(struct kgsl_device *device); + +void kgsl_pwrscale_enable(struct kgsl_device *device); +void kgsl_pwrscale_disable(struct kgsl_device *device, bool turbo); + +int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags); +int kgsl_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat); +int kgsl_devfreq_get_cur_freq(struct device *dev, unsigned long *freq); + +int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags); +int kgsl_busmon_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat); +int kgsl_busmon_get_cur_freq(struct device *dev, unsigned long *freq); + +int msm_adreno_tz_init(void); + +void msm_adreno_tz_exit(void); + +int devfreq_gpubw_init(void); + +void devfreq_gpubw_exit(void); +#endif diff --git a/kgsl_reclaim.c b/kgsl_reclaim.c new file mode 100644 index 0000000000..b0d6804456 --- /dev/null +++ b/kgsl_reclaim.c @@ -0,0 +1,422 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include + +#include "kgsl_reclaim.h" +#include "kgsl_sharedmem.h" + +/* + * Reclaiming excessive number of pages from a process will impact launch + * latency for the subsequent launch of the process. After measuring the + * launch latencies by having various maximum limits, it has been decided + * that allowing 30MB (7680 pages) of relcaim per process will have little + * impact and the latency will be within acceptable limit. + */ +static u32 kgsl_reclaim_max_page_limit = 7680; + +/* Setting this to 0 means we reclaim pages as specified in shrinker call */ +static u32 kgsl_nr_to_scan; +static atomic_t kgsl_shrinker_active = ATOMIC_INIT(0); + +static unsigned long shmem_swap_pages(struct address_space *mapping) +{ + struct inode *inode = mapping->host; + struct shmem_inode_info *info = SHMEM_I(inode); + unsigned long swapped; + + swapped = READ_ONCE(info->swapped); + return swapped; +} + +static unsigned long kgsl_process_get_reclaim_count( + struct kgsl_process_private *process) +{ + struct kgsl_mem_entry *entry; + struct kgsl_memdesc *memdesc; + unsigned long reclaim_count = 0; + int id; + + spin_lock(&process->mem_lock); + idr_for_each_entry(&process->mem_idr, entry, id) { + memdesc = &entry->memdesc; + if (memdesc->shmem_filp) + reclaim_count += shmem_swap_pages( + memdesc->shmem_filp->f_mapping); + } + spin_unlock(&process->mem_lock); + + return reclaim_count; +} + +static int kgsl_memdesc_get_reclaimed_pages(struct kgsl_mem_entry *entry) +{ + struct kgsl_memdesc *memdesc = &entry->memdesc; + int i, ret; + struct page *page; + + for (i = 0; i < memdesc->page_count; i++) { + if (memdesc->pages[i]) + continue; + + page = shmem_read_mapping_page_gfp( + memdesc->shmem_filp->f_mapping, i, kgsl_gfp_mask(0)); + + if (IS_ERR(page)) + return PTR_ERR(page); + + kgsl_page_sync_for_device(memdesc->dev, page, PAGE_SIZE); + + /* + * Update the pages array only if vmfault has not + * updated it meanwhile + */ + spin_lock(&memdesc->lock); + if (!memdesc->pages[i]) { + memdesc->pages[i] = page; + atomic_dec(&entry->priv->unpinned_page_count); + } else + put_page(page); + spin_unlock(&memdesc->lock); + } + + ret = kgsl_mmu_map(memdesc->pagetable, memdesc); + if (ret) + return ret; + + memdesc->priv &= ~KGSL_MEMDESC_RECLAIMED; + memdesc->priv &= ~KGSL_MEMDESC_SKIP_RECLAIM; + + return 0; +} + +int kgsl_reclaim_to_pinned_state( + struct kgsl_process_private *process) +{ + struct kgsl_mem_entry *entry, *valid_entry; + int next = 0, ret = 0; + + mutex_lock(&process->reclaim_lock); + + if (test_bit(KGSL_PROC_PINNED_STATE, &process->state)) + goto done; + + for ( ; ; ) { + valid_entry = NULL; + spin_lock(&process->mem_lock); + entry = idr_get_next(&process->mem_idr, &next); + if (entry == NULL) { + spin_unlock(&process->mem_lock); + break; + } + + if (!entry->pending_free && + (entry->memdesc.priv & KGSL_MEMDESC_RECLAIMED)) + valid_entry = kgsl_mem_entry_get(entry); + spin_unlock(&process->mem_lock); + + if (valid_entry) { + ret = kgsl_memdesc_get_reclaimed_pages(entry); + kgsl_mem_entry_put(entry); + if (ret) + goto done; + } + + next++; + } + + set_bit(KGSL_PROC_PINNED_STATE, &process->state); +done: + mutex_unlock(&process->reclaim_lock); + return ret; +} + +static void kgsl_reclaim_foreground_work(struct work_struct *work) +{ + struct kgsl_process_private *process = + container_of(work, struct kgsl_process_private, fg_work); + + if (test_bit(KGSL_PROC_STATE, &process->state)) + kgsl_reclaim_to_pinned_state(process); + kgsl_process_private_put(process); +} + +static ssize_t kgsl_proc_state_show(struct kobject *kobj, + struct kgsl_process_attribute *attr, char *buf) +{ + struct kgsl_process_private *process = + container_of(kobj, struct kgsl_process_private, kobj); + + if (test_bit(KGSL_PROC_STATE, &process->state)) + return scnprintf(buf, PAGE_SIZE, "foreground\n"); + else + return scnprintf(buf, PAGE_SIZE, "background\n"); +} + +static ssize_t kgsl_proc_state_store(struct kobject *kobj, + struct kgsl_process_attribute *attr, const char *buf, ssize_t count) +{ + struct kgsl_process_private *process = + container_of(kobj, struct kgsl_process_private, kobj); + + if (sysfs_streq(buf, "foreground")) { + if (!test_and_set_bit(KGSL_PROC_STATE, &process->state) && + kgsl_process_private_get(process)) + kgsl_schedule_work(&process->fg_work); + } else if (sysfs_streq(buf, "background")) { + clear_bit(KGSL_PROC_STATE, &process->state); + } else + return -EINVAL; + + return count; +} + +static ssize_t gpumem_reclaimed_show(struct kobject *kobj, + struct kgsl_process_attribute *attr, char *buf) +{ + struct kgsl_process_private *process = + container_of(kobj, struct kgsl_process_private, kobj); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + kgsl_process_get_reclaim_count(process) << PAGE_SHIFT); +} + +PROCESS_ATTR(state, 0644, kgsl_proc_state_show, kgsl_proc_state_store); +PROCESS_ATTR(gpumem_reclaimed, 0444, gpumem_reclaimed_show, NULL); + +static const struct attribute *proc_reclaim_attrs[] = { + &attr_state.attr, + &attr_gpumem_reclaimed.attr, + NULL, +}; + +void kgsl_reclaim_proc_sysfs_init(struct kgsl_process_private *process) +{ + WARN_ON(sysfs_create_files(&process->kobj, proc_reclaim_attrs)); +} + +ssize_t kgsl_proc_max_reclaim_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int ret; + + ret = kstrtou32(buf, 0, &kgsl_reclaim_max_page_limit); + return ret ? ret : count; +} + +ssize_t kgsl_proc_max_reclaim_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", kgsl_reclaim_max_page_limit); +} + +ssize_t kgsl_nr_to_scan_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int ret; + + ret = kstrtou32(buf, 0, &kgsl_nr_to_scan); + return ret ? ret : count; +} + +ssize_t kgsl_nr_to_scan_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", kgsl_nr_to_scan); +} + +static u32 kgsl_reclaim_process(struct kgsl_process_private *process, + u32 pages_to_reclaim) +{ + struct kgsl_memdesc *memdesc; + struct kgsl_mem_entry *entry, *valid_entry; + u32 next = 0, remaining = pages_to_reclaim; + + /* + * If we do not get the lock here, it means that the buffers are + * being pinned back. So do not keep waiting here as we would anyway + * return empty handed once the lock is acquired. + */ + if (!mutex_trylock(&process->reclaim_lock)) + return 0; + + while (remaining) { + + if (atomic_read(&process->unpinned_page_count) >= + kgsl_reclaim_max_page_limit) + break; + + /* Abort reclaim if process submitted work. */ + if (atomic_read(&process->cmd_count)) + break; + + /* Abort reclaim if process foreground hint is received. */ + if (test_bit(KGSL_PROC_STATE, &process->state)) + break; + + valid_entry = NULL; + spin_lock(&process->mem_lock); + entry = idr_get_next(&process->mem_idr, &next); + if (entry == NULL) { + spin_unlock(&process->mem_lock); + break; + } + + memdesc = &entry->memdesc; + if (!entry->pending_free && + (memdesc->priv & KGSL_MEMDESC_CAN_RECLAIM) && + !(memdesc->priv & KGSL_MEMDESC_RECLAIMED) && + !(memdesc->priv & KGSL_MEMDESC_SKIP_RECLAIM)) + valid_entry = kgsl_mem_entry_get(entry); + spin_unlock(&process->mem_lock); + + if (!valid_entry) { + next++; + continue; + } + + if ((atomic_read(&process->unpinned_page_count) + + memdesc->page_count) > kgsl_reclaim_max_page_limit) { + kgsl_mem_entry_put(entry); + next++; + continue; + } + + if (memdesc->page_count > remaining) { + kgsl_mem_entry_put(entry); + next++; + continue; + } + + if (!kgsl_mmu_unmap(memdesc->pagetable, memdesc)) { + int i; + + for (i = 0; i < memdesc->page_count; i++) { + set_page_dirty_lock(memdesc->pages[i]); + shmem_mark_page_lazyfree(memdesc->pages[i]); + spin_lock(&memdesc->lock); + put_page(memdesc->pages[i]); + memdesc->pages[i] = NULL; + atomic_inc(&process->unpinned_page_count); + spin_unlock(&memdesc->lock); + remaining--; + } + + memdesc->priv |= KGSL_MEMDESC_RECLAIMED; + } + + kgsl_mem_entry_put(entry); + next++; + } + if (next) + clear_bit(KGSL_PROC_PINNED_STATE, &process->state); + mutex_unlock(&process->reclaim_lock); + return (pages_to_reclaim - remaining); +} + +/* Functions for the shrinker */ + +static unsigned long +kgsl_reclaim_shrink_scan_objects(struct shrinker *shrinker, + struct shrink_control *sc) +{ + /* nr_pages represents number of pages to be reclaimed*/ + u32 nr_pages = kgsl_nr_to_scan ? kgsl_nr_to_scan : sc->nr_to_scan; + u32 bg_proc = 0; + u64 pp_nr_pages; + struct list_head kgsl_reclaim_process_list; + struct kgsl_process_private *process, *next; + + if (atomic_inc_return(&kgsl_shrinker_active) > 1) { + atomic_dec(&kgsl_shrinker_active); + return 0; + } + + INIT_LIST_HEAD(&kgsl_reclaim_process_list); + read_lock(&kgsl_driver.proclist_lock); + list_for_each_entry(process, &kgsl_driver.process_list, list) { + if (test_bit(KGSL_PROC_STATE, &process->state) || + !kgsl_process_private_get(process)) + continue; + + bg_proc++; + list_add(&process->reclaim_list, &kgsl_reclaim_process_list); + } + read_unlock(&kgsl_driver.proclist_lock); + + list_for_each_entry(process, &kgsl_reclaim_process_list, reclaim_list) { + if (!nr_pages) + break; + + pp_nr_pages = nr_pages; + do_div(pp_nr_pages, bg_proc--); + nr_pages -= kgsl_reclaim_process(process, pp_nr_pages); + } + + list_for_each_entry_safe(process, next, + &kgsl_reclaim_process_list, reclaim_list) { + list_del(&process->reclaim_list); + kgsl_process_private_put(process); + } + + atomic_dec(&kgsl_shrinker_active); + return ((kgsl_nr_to_scan ? + kgsl_nr_to_scan : sc->nr_to_scan) - nr_pages); +} + +static unsigned long +kgsl_reclaim_shrink_count_objects(struct shrinker *shrinker, + struct shrink_control *sc) +{ + struct kgsl_process_private *process; + unsigned long count_reclaimable = 0; + + read_lock(&kgsl_driver.proclist_lock); + list_for_each_entry(process, &kgsl_driver.process_list, list) { + if (!test_bit(KGSL_PROC_STATE, &process->state)) + count_reclaimable += kgsl_reclaim_max_page_limit - + atomic_read(&process->unpinned_page_count); + } + read_unlock(&kgsl_driver.proclist_lock); + + return (count_reclaimable << PAGE_SHIFT); +} + +/* Shrinker callback data*/ +static struct shrinker kgsl_reclaim_shrinker = { + .count_objects = kgsl_reclaim_shrink_count_objects, + .scan_objects = kgsl_reclaim_shrink_scan_objects, + .seeks = DEFAULT_SEEKS, + .batch = 0, +}; + +void kgsl_reclaim_proc_private_init(struct kgsl_process_private *process) +{ + mutex_init(&process->reclaim_lock); + INIT_WORK(&process->fg_work, kgsl_reclaim_foreground_work); + set_bit(KGSL_PROC_PINNED_STATE, &process->state); + set_bit(KGSL_PROC_STATE, &process->state); + atomic_set(&process->unpinned_page_count, 0); +} + +int kgsl_reclaim_init(void) +{ + int ret; + + /* Initialize shrinker */ + ret = register_shrinker(&kgsl_reclaim_shrinker); + if (ret) + pr_err("kgsl: reclaim: Failed to register shrinker\n"); + + return ret; +} + +void kgsl_reclaim_close(void) +{ + /* Unregister shrinker */ + unregister_shrinker(&kgsl_reclaim_shrinker); +} diff --git a/kgsl_reclaim.h b/kgsl_reclaim.h new file mode 100644 index 0000000000..bb2c01861c --- /dev/null +++ b/kgsl_reclaim.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_RECLAIM_H +#define __KGSL_RECLAIM_H + + +#include "kgsl_device.h" + +#ifdef CONFIG_QCOM_KGSL_PROCESS_RECLAIM + +/* Set if all the memdescs of this process are pinned */ +#define KGSL_PROC_PINNED_STATE 0 +/* Process foreground/background state. Set if process is in foreground */ +#define KGSL_PROC_STATE 1 + +int kgsl_reclaim_init(void); +void kgsl_reclaim_close(void); +int kgsl_reclaim_to_pinned_state(struct kgsl_process_private *priv); +void kgsl_reclaim_proc_sysfs_init(struct kgsl_process_private *process); +void kgsl_reclaim_proc_private_init(struct kgsl_process_private *process); +ssize_t kgsl_proc_max_reclaim_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count); +ssize_t kgsl_proc_max_reclaim_limit_show(struct device *dev, + struct device_attribute *attr, char *buf); +ssize_t kgsl_nr_to_scan_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count); +ssize_t kgsl_nr_to_scan_show(struct device *dev, + struct device_attribute *attr, char *buf); +#else +static inline int kgsl_reclaim_init(void) +{ + return 0; +} + +static inline void kgsl_reclaim_close(void) { } + +static inline int kgsl_reclaim_to_pinned_state( + struct kgsl_process_private *priv) +{ + return 0; +} + +static inline void kgsl_reclaim_proc_sysfs_init + (struct kgsl_process_private *process) { } + +static inline void kgsl_reclaim_proc_private_init + (struct kgsl_process_private *process) { } + +#endif +#endif /* __KGSL_RECLAIM_H */ diff --git a/kgsl_regmap.c b/kgsl_regmap.c new file mode 100644 index 0000000000..d20608b711 --- /dev/null +++ b/kgsl_regmap.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "kgsl_regmap.h" +#include "kgsl_trace.h" + +#define region_addr(region, _offset) \ + ((region)->virt + (((_offset) - (region)->offset) << 2)) + +static int kgsl_regmap_init_region(struct kgsl_regmap *regmap, + struct platform_device *pdev, + struct kgsl_regmap_region *region, + struct resource *res, const struct kgsl_regmap_ops *ops, + void *priv) +{ + void __iomem *ptr; + + ptr = devm_ioremap(&pdev->dev, res->start, resource_size(res)); + if (!ptr) + return -ENOMEM; + + region->virt = ptr; + region->offset = (res->start - regmap->base->start) >> 2; + region->size = resource_size(res) >> 2; + region->ops = ops; + region->priv = priv; + + return 0; +} + +/* Initialize the regmap with the base region. All added regions will be offset + * from this base + */ +int kgsl_regmap_init(struct platform_device *pdev, struct kgsl_regmap *regmap, + const char *name, const struct kgsl_regmap_ops *ops, + void *priv) +{ + struct kgsl_regmap_region *region; + struct resource *res; + int ret; + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name); + if (!res) + return -ENODEV; + + regmap->base = res; + + region = ®map->region[0]; + ret = kgsl_regmap_init_region(regmap, pdev, region, res, ops, priv); + + if (!ret) + regmap->count = 1; + + return ret; +} + +/* Add a new region to the regmap */ +int kgsl_regmap_add_region(struct kgsl_regmap *regmap, struct platform_device *pdev, + const char *name, const struct kgsl_regmap_ops *ops, void *priv) +{ + struct kgsl_regmap_region *region; + struct resource *res; + int ret; + + if (WARN_ON(regmap->count >= ARRAY_SIZE(regmap->region))) + return -ENODEV; + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name); + if (!res) + return -ENODEV; + + region = ®map->region[regmap->count]; + + ret = kgsl_regmap_init_region(regmap, pdev, region, res, ops, priv); + if (!ret) + regmap->count++; + + return ret; +} + +#define in_range(a, base, len) \ + (((a) >= (base)) && ((a) < ((base) + (len)))) + +struct kgsl_regmap_region *kgsl_regmap_get_region(struct kgsl_regmap *regmap, + u32 offset) +{ + int i; + + for (i = 0; i < regmap->count; i++) { + struct kgsl_regmap_region *region = ®map->region[i]; + + if (in_range(offset, region->offset, region->size)) + return region; + } + + return NULL; +} + +u32 kgsl_regmap_read(struct kgsl_regmap *regmap, u32 offset) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, offset); + u32 val; + + if (WARN(!region, "Out of bounds register read offset: 0x%x\n", offset)) + return 0; + + if (region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + val = readl_relaxed(region_addr(region, offset)); + /* Allow previous read to post before returning the value */ + rmb(); + + return val; +} + +void kgsl_regmap_write(struct kgsl_regmap *regmap, u32 value, u32 offset) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, offset); + + if (WARN(!region, "Out of bounds register write offset: 0x%x\n", offset)) + return; + + if (region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + /* Make sure all pending writes have posted first */ + wmb(); + writel_relaxed(value, region_addr(region, offset)); + + trace_kgsl_regwrite(offset, value); +} + +void kgsl_regmap_multi_write(struct kgsl_regmap *regmap, + const struct kgsl_regmap_list *list, int count) +{ + struct kgsl_regmap_region *region, *prev = NULL; + int i; + + /* + * do one write barrier to ensure all previous writes are done before + * starting the list + */ + wmb(); + + for (i = 0; i < count; i++) { + region = kgsl_regmap_get_region(regmap, list[i].offset); + + if (WARN(!region, "Out of bounds register write offset: 0x%x\n", + list[i].offset)) + continue; + + /* + * The registers might be in different regions. If a region has + * a preaccess function we need to call it at least once before + * writing registers but we don't want to call it every time if + * we can avoid it. "cache" the current region and don't call + * pre-access if it is the same region from the previous access. + * This isn't perfect but it should cut down on some unneeded + * cpu cycles + */ + + if (region != prev && region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + prev = region; + + writel_relaxed(list[i].val, region_addr(region, list[i].offset)); + trace_kgsl_regwrite(list[i].val, list[i].offset); + } +} + +void kgsl_regmap_rmw(struct kgsl_regmap *regmap, u32 offset, u32 mask, + u32 or) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, offset); + u32 val; + + if (WARN(!region, "Out of bounds register read-modify-write offset: 0x%x\n", + offset)) + return; + + if (region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + val = readl_relaxed(region_addr(region, offset)); + /* Make sure the read posted and all pending writes are done */ + mb(); + writel_relaxed((val & ~mask) | or, region_addr(region, offset)); + + trace_kgsl_regwrite(offset, (val & ~mask) | or); +} + +void kgsl_regmap_bulk_write(struct kgsl_regmap *regmap, u32 offset, + const void *data, int dwords) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, offset); + + if (WARN(!region, "Out of bounds register bulk write offset: 0x%x\n", offset)) + return; + + if (region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + /* + * A bulk write operation can only be in one region - it cannot + * cross boundaries + */ + if (WARN((offset - region->offset) + dwords > region->size, + "OUt of bounds bulk write size: 0x%x\n", offset + dwords)) + return; + + /* Make sure all pending write are done first */ + wmb(); + memcpy_toio(region_addr(region, offset), data, dwords << 2); +} + +void kgsl_regmap_bulk_read(struct kgsl_regmap *regmap, u32 offset, + const void *data, int dwords) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, offset); + + if (WARN(!region, "Out of bounds register bulk read offset: 0x%x\n", offset)) + return; + + if (region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + /* + * A bulk read operation can only be in one region - it cannot + * cross boundaries + */ + if (WARN((offset - region->offset) + dwords > region->size, + "Out of bounds bulk read size: 0x%x\n", offset + dwords)) + return; + + memcpy_fromio(region_addr(region, offset), data, dwords << 2); + + /* Make sure the copy is finished before moving on */ + rmb(); +} + +void __iomem *kgsl_regmap_virt(struct kgsl_regmap *regmap, u32 offset) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, offset); + + if (region) + return region_addr(region, offset); + + return NULL; +} + +void kgsl_regmap_read_indexed(struct kgsl_regmap *regmap, u32 addr, + u32 data, u32 *dest, int count) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, addr); + int i; + + if (!region) + return; + + /* Make sure the offset is in the same region */ + if (kgsl_regmap_get_region(regmap, data) != region) + return; + + if (region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + /* Write the address register */ + writel_relaxed(0, region_addr(region, addr)); + + /* Make sure the write finishes */ + wmb(); + + for (i = 0; i < count; i++) + dest[i] = readl_relaxed(region_addr(region, data)); + + /* Do one barrier at the end to make sure all the data is posted */ + rmb(); +} + +void kgsl_regmap_read_indexed_interleaved(struct kgsl_regmap *regmap, u32 addr, + u32 data, u32 *dest, u32 start, int count) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, addr); + int i; + + if (!region) + return; + + /* Make sure the offset is in the same region */ + if (kgsl_regmap_get_region(regmap, data) != region) + return; + + if (region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + for (i = 0; i < count; i++) { + /* Write the address register */ + writel_relaxed(start + i, region_addr(region, addr)); + /* Make sure the write finishes */ + wmb(); + + dest[i] = readl_relaxed(region_addr(region, data)); + /* Make sure the read finishes */ + rmb(); + } +} + +/* A special helper function to work with read_poll_timeout */ +int kgsl_regmap_poll_read(struct kgsl_regmap_region *region, u32 offset, + u32 *val) +{ + /* FIXME: WARN on !region? */ + if (WARN(!region, "Out of bounds poll read: 0x%x\n", offset)) + return -ENODEV; + + *val = readl_relaxed(region_addr(region, offset)); + /* Make sure the read is finished before moving on */ + rmb(); + + return 0; +} diff --git a/kgsl_regmap.h b/kgsl_regmap.h new file mode 100644 index 0000000000..0e7ed20689 --- /dev/null +++ b/kgsl_regmap.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef KGSL_REGMAP_H +#define KGSL_REGMAP_H + +struct kgsl_regmap; +struct kgsl_regmap_region; + +/** + * @ksgl_regmap_ops - Helper functions to access registers in a regmap region + */ +struct kgsl_regmap_ops { + /** + * @preaccess: called before accesses to the register. This is used by + * adreno to call kgsl_pre_hwaccess() + */ + void (*preaccess)(struct kgsl_regmap_region *region); +}; + +/** + * struct kgsl_regmap_region - Defines a region of registers in a kgsl_regmap + */ +struct kgsl_regmap_region { + /** @virt: Kernel address for the re-mapped region */ + void __iomem *virt; + /** @offset: Dword offset of the region from the regmap base */ + u32 offset; + /** @size: Size of the region in dwords */ + u32 size; + /** @ops: Helper functions to access registers in the region */ + const struct kgsl_regmap_ops *ops; + /** @priv: Private data to send to the ops */ + void *priv; +}; + +/** + * struct kgsl_regmap - Define a set of individual regions that are all indexed + * from a commmon base. This is used to access GPU and GMU registers in + * separate io-remmaped regions from a single set of function calls. + */ +struct kgsl_regmap { + /** + * @base: Resource pointer for the "base" region (the region that all + * other regions are indexed from) + */ + struct resource *base; + /** @region: Array of regions for this regmap */ + struct kgsl_regmap_region region[3]; + /** @count: Number of active regions in @region */ + int count; +}; + +/** + * struct kgsl_regmap_list + */ +struct kgsl_regmap_list { + /** offset: Dword offset of the register to write */ + u32 offset; + /** val: Value to write */ + u32 val; +}; + +/** + * kgsl_regmap_init - Initialize a regmap + * @pdev: Pointer to the platform device that owns @name + * @regmap: Pointer to the regmap to initialize + * @name: Name of the resource to map + * @ops: Pointer to the regmap ops for this region + * @priv: Private data to pass to the regmap ops + * + * Initialize a regmap and set the resource @name as the base region in the + * regmap. All other regions will be indexed from the start of this region. + * This will nominally be the start of the GPU register region. + * + * Return: 0 on success or negative error on failure. + */ +int kgsl_regmap_init(struct platform_device *pdev, struct kgsl_regmap *regmap, + const char *name, const struct kgsl_regmap_ops *ops, + void *priv); + +/** + * kgsl_regmap_add_region - Add a region to an existing regmap + * @regmap: The regmap to add the region to + * @pdev: Pointer to the platform device that owns @name + * @name: Name of the resource to map + * @ops: Pointer to the regmap ops for this region + * @priv: Private data to pass to the regmap ops + * + * Add a new region to the regmap. It will be indexed against the base + * address already defined when the regmap was initialized. For example, + * if the base GPU address is at physical address 0x3d000000 and the new + * region is at physical address 0x3d010000 this region will be added at + * (0x3d010000 - 0x3d000000) or dword offset 0x4000. + * + * Return: 0 on success or negative error on failure. + */ +int kgsl_regmap_add_region(struct kgsl_regmap *regmap, struct platform_device *pdev, + const char *name, const struct kgsl_regmap_ops *ops, void *priv); + +/** + * kgsl_regmap_read - Read a register from the regmap + * @regmap: The regmap to read from + * @offset: The dword offset to read + * + * Read the register at the specified offset indexed against the base address in + * the regmap. An offset that falls out of mapped regions will WARN and return + * 0. + * + * Return: The value of the register at @offset + */ +u32 kgsl_regmap_read(struct kgsl_regmap *regmap, u32 offset); + +/** + * kgsl_regmap_write - Write a register to the regmap + * @regmap: The regmap to write to + * @data: The value to write to @offset + * @offset: The dword offset to write + * + * Write @data to the register at the specified offset indexed against the base + * address in he regmap. An offset that falls out of mapped regions will WARN + * and skip the write. + */ +void kgsl_regmap_write(struct kgsl_regmap *regmap, u32 value, u32 offset); + +/** + * kgsl_regmap_multi_write - Write a list of registers + * @regmap: The regmap to write to + * @list: A pointer to an array of &strut kgsl_regmap_list items + * @count: NUmber of items in @list + * + * Write all the registers in @list to the regmap. + */ + +void kgsl_regmap_multi_write(struct kgsl_regmap *regmap, + const struct kgsl_regmap_list *list, int count); + +/** + * kgsl_regmap_rmw - read-modify-write a register in the regmap + * @regmap: The regmap to write to + * @offset: The dword offset to write + * @mask: Mask the register contents against this mask + * @or: OR these bits into the register before writing it back again + * + * Read the register at @offset, mask it against @mask, OR the bits in @or and + * write it back to @offset. @offset will be indexed against the base + * address in the regmap. An offset that falls out of mapped regions will WARN + * and skip the operation. + */ +void kgsl_regmap_rmw(struct kgsl_regmap *regmap, u32 offset, u32 mask, + u32 or); + +/** + * kgsl_regmap_bulk_write - Write an array of values to a I/O region + * @regmap: The regmap to write to + * @offset: The dword offset to start writing to + * @data: The data to write + * @dwords: Number of dwords to write + * + * Bulk write @data to the I/O region starting at @offset for @dwords. + * The write operation must fit fully inside a single region (no crossing the + * boundaries). @offset will be indexed against the base + * address in he regmap. An offset that falls out of mapped regions will WARN + * and skip the operation. + */ +void kgsl_regmap_bulk_write(struct kgsl_regmap *regmap, u32 offset, + const void *data, int dwords); + +/** + * kgsl_regmap_bulk_read - Read an array of values to a I/O region + * @regmap: The regmap to read from + * @offset: The dword offset to start reading from + * @data: The data pointer to read into + * @dwords: Number of dwords to read + * + * Bulk read into @data the I/O region starting at @offset for @dwords. + * The read operation must fit fully inside a single region (no crossing the + * boundaries). @offset will be indexed against the base + * address in the regmap. An offset that falls out of mapped regions will WARN + * and skip the operation. + */ +void kgsl_regmap_bulk_read(struct kgsl_regmap *regmap, u32 offset, + const void *data, int dwords); + +/** + * kgsl_regmap_virt - Return the kernel address for a offset + * @regmap: The regmap to write to + * @offset: The dword offset to map to a kernel address + * + * Return: The kernel address for @offset or NULL if out of range. + */ +void __iomem *kgsl_regmap_virt(struct kgsl_regmap *regmap, u32 offset); + +/** + * kgsl_regmap_read_indexed - Read a indexed pair of registers + * @regmap: The regmap to read from + * @addr: The offset of the address register for the index pair + * @data: The offset of the data register for the index pair + * @dest: An array to put the values + * @count: Number of dwords to read from @data + * + * This function configures the address register once and then + * reads from the data register in a loop. + */ +void kgsl_regmap_read_indexed(struct kgsl_regmap *regmap, u32 addr, + u32 data, u32 *dest, int count); + +/** + * kgsl_regmap_read_indexed_interleaved - Dump an indexed pair of registers + * @regmap: The regmap to read from + * @addr: The offset of the address register for the index pair + * @data: The offset of the data register for the index pair + * @dest: An array to put the values + * @start: Starting value to be programmed in the address register + * @count: Number of dwords to read from @data + * + * This function is slightly different than kgsl_regmap_read_indexed() + * in that it takes as argument a start value that is to be programmed + * in the address register and secondly, the address register is to be + * configured before every read of the data register. + */ +void kgsl_regmap_read_indexed_interleaved(struct kgsl_regmap *regmap, u32 addr, + u32 data, u32 *dest, u32 start, int count); + +/** + * kgsl_regmap_get_region - Return the region for the given offset + * @regmap: The regmap to query + * @offset: The offset to query + * + * Return: The &struct kgsl_regmap_region that owns the offset or NULL + */ +struct kgsl_regmap_region *kgsl_regmap_get_region(struct kgsl_regmap *regmap, + u32 offset); + +/** + * kgsl_regmap_poll_read - A helper function for kgsl_regmap_read_poll_timeout + * @region: Pointer to a &struct kgsl_regmap_region + * @offset: Offset to read + * @val: Pointer for the result + * + * This is a special helper function to be called only from + * kgsl_regmap_read_poll_timeout. + * + * Return: 0 on success or -ENODEV if the region is NULL. + */ +int kgsl_regmap_poll_read(struct kgsl_regmap_region *region, u32 offset, + u32 *val); + +#define kgsl_regmap_read_poll_timeout(regmap, offset, val, cond, \ + sleep_us, timeout_us) \ +({ \ + int __ret, __tmp; \ + struct kgsl_regmap_region *region = \ + kgsl_regmap_get_region(regmap, offset); \ + \ + if (region && region->ops && region->ops->preaccess) \ + region->ops->preaccess(region); \ + __tmp = read_poll_timeout(kgsl_regmap_poll_read, __ret, __ret || (cond),\ + sleep_us, timeout_us, false, region, offset, &(val)); \ + __ret ?: __tmp; \ +}) + +#endif diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c new file mode 100644 index 0000000000..42311fd494 --- /dev/null +++ b/kgsl_sharedmem.c @@ -0,0 +1,1605 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include + +#include "kgsl_device.h" +#include "kgsl_pool.h" +#include "kgsl_reclaim.h" +#include "kgsl_sharedmem.h" + +/* + * The user can set this from debugfs to force failed memory allocations to + * fail without trying OOM first. This is a debug setting useful for + * stress applications that want to test failure cases without pushing the + * system into unrecoverable OOM panics + */ + +bool kgsl_sharedmem_noretry_flag; + +static DEFINE_MUTEX(kernel_map_global_lock); + +/* An attribute for showing per-process memory statistics */ +struct kgsl_mem_entry_attribute { + struct kgsl_process_attribute attr; + int memtype; + ssize_t (*show)(struct kgsl_process_private *priv, + int type, char *buf); +}; + +static inline struct kgsl_process_attribute *to_process_attr( + struct attribute *attr) +{ + return container_of(attr, struct kgsl_process_attribute, attr); +} + +#define to_mem_entry_attr(a) \ +container_of(a, struct kgsl_mem_entry_attribute, attr) + +#define __MEM_ENTRY_ATTR(_type, _name, _show) \ +{ \ + .attr = __ATTR(_name, 0444, mem_entry_sysfs_show, NULL), \ + .memtype = _type, \ + .show = _show, \ +} + +#define MEM_ENTRY_ATTR(_type, _name, _show) \ + static struct kgsl_mem_entry_attribute mem_entry_##_name = \ + __MEM_ENTRY_ATTR(_type, _name, _show) + +static ssize_t mem_entry_sysfs_show(struct kobject *kobj, + struct kgsl_process_attribute *attr, char *buf) +{ + struct kgsl_mem_entry_attribute *pattr = to_mem_entry_attr(attr); + struct kgsl_process_private *priv = + container_of(kobj, struct kgsl_process_private, kobj); + + return pattr->show(priv, pattr->memtype, buf); +} + +static ssize_t +imported_mem_show(struct kgsl_process_private *priv, + int type, char *buf) +{ + struct kgsl_mem_entry *entry; + uint64_t imported_mem = 0; + int id = 0; + + spin_lock(&priv->mem_lock); + for (entry = idr_get_next(&priv->mem_idr, &id); entry; + id++, entry = idr_get_next(&priv->mem_idr, &id)) { + + int egl_surface_count = 0, egl_image_count = 0; + struct kgsl_memdesc *m; + + if (!kgsl_mem_entry_get(entry)) + continue; + spin_unlock(&priv->mem_lock); + + m = &entry->memdesc; + if (kgsl_memdesc_usermem_type(m) == KGSL_MEM_ENTRY_ION) { + kgsl_get_egl_counts(entry, &egl_surface_count, + &egl_image_count); + + if (kgsl_memdesc_get_memtype(m) == + KGSL_MEMTYPE_EGL_SURFACE) + imported_mem += m->size; + else if (egl_surface_count == 0) { + uint64_t size = m->size; + + do_div(size, (egl_image_count ? + egl_image_count : 1)); + imported_mem += size; + } + } + + kgsl_mem_entry_put(entry); + spin_lock(&priv->mem_lock); + } + spin_unlock(&priv->mem_lock); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", imported_mem); +} + +static ssize_t +gpumem_mapped_show(struct kgsl_process_private *priv, + int type, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%lld\n", + atomic64_read(&priv->gpumem_mapped)); +} + +static ssize_t +gpumem_unmapped_show(struct kgsl_process_private *priv, int type, char *buf) +{ + u64 gpumem_total = atomic64_read(&priv->stats[type].cur); + u64 gpumem_mapped = atomic64_read(&priv->gpumem_mapped); + + if (gpumem_mapped > gpumem_total) + return -EIO; + + return scnprintf(buf, PAGE_SIZE, "%llu\n", + gpumem_total - gpumem_mapped); +} + +/** + * Show the current amount of memory allocated for the given memtype + */ + +static ssize_t +mem_entry_show(struct kgsl_process_private *priv, int type, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%lld\n", + atomic64_read(&priv->stats[type].cur)); +} + +/** + * Show the maximum memory allocated for the given memtype through the life of + * the process + */ + +static ssize_t +mem_entry_max_show(struct kgsl_process_private *priv, int type, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%llu\n", priv->stats[type].max); +} + +static ssize_t process_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kgsl_process_attribute *pattr = to_process_attr(attr); + + return pattr->show(kobj, pattr, buf); +} + +static ssize_t process_sysfs_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct kgsl_process_attribute *pattr = to_process_attr(attr); + + if (pattr->store) + return pattr->store(kobj, pattr, buf, count); + return -EIO; +} + +/* Dummy release function - we have nothing to do here */ +static void process_sysfs_release(struct kobject *kobj) +{ +} + +static const struct sysfs_ops process_sysfs_ops = { + .show = process_sysfs_show, + .store = process_sysfs_store, +}; + +MEM_ENTRY_ATTR(KGSL_MEM_ENTRY_KERNEL, kernel, mem_entry_show); +MEM_ENTRY_ATTR(KGSL_MEM_ENTRY_KERNEL, kernel_max, mem_entry_max_show); +MEM_ENTRY_ATTR(KGSL_MEM_ENTRY_USER, user, mem_entry_show); +MEM_ENTRY_ATTR(KGSL_MEM_ENTRY_USER, user_max, mem_entry_max_show); +#ifdef CONFIG_ION +MEM_ENTRY_ATTR(KGSL_MEM_ENTRY_USER, ion, mem_entry_show); +MEM_ENTRY_ATTR(KGSL_MEM_ENTRY_USER, ion_max, mem_entry_max_show); +#endif +MEM_ENTRY_ATTR(0, imported_mem, imported_mem_show); +MEM_ENTRY_ATTR(0, gpumem_mapped, gpumem_mapped_show); +MEM_ENTRY_ATTR(KGSL_MEM_ENTRY_KERNEL, gpumem_unmapped, gpumem_unmapped_show); + +static struct attribute *mem_entry_attrs[] = { + &mem_entry_kernel.attr.attr, + &mem_entry_kernel_max.attr.attr, + &mem_entry_user.attr.attr, + &mem_entry_user_max.attr.attr, +#ifdef CONFIG_ION + &mem_entry_ion.attr.attr, + &mem_entry_ion_max.attr.attr, +#endif + &mem_entry_imported_mem.attr.attr, + &mem_entry_gpumem_mapped.attr.attr, + &mem_entry_gpumem_unmapped.attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(mem_entry); + +static struct kobj_type process_ktype = { + .sysfs_ops = &process_sysfs_ops, + .release = &process_sysfs_release, + .default_groups = mem_entry_groups, +}; +#ifdef CONFIG_QCOM_KGSL_PROCESS_RECLAIM +static struct device_attribute dev_attr_max_reclaim_limit = { + .attr = { .name = "max_reclaim_limit", .mode = 0644 }, + .show = kgsl_proc_max_reclaim_limit_show, + .store = kgsl_proc_max_reclaim_limit_store, +}; + +static struct device_attribute dev_attr_page_reclaim_per_call = { + .attr = { .name = "page_reclaim_per_call", .mode = 0644 }, + .show = kgsl_nr_to_scan_show, + .store = kgsl_nr_to_scan_store, +}; +#endif + +/** + * kgsl_process_init_sysfs() - Initialize and create sysfs files for a process + * + * @device: Pointer to kgsl device struct + * @private: Pointer to the structure for the process + * + * kgsl_process_init_sysfs() is called at the time of creating the + * process struct when a process opens the kgsl device for the first time. + * This function creates the sysfs files for the process. + */ +void kgsl_process_init_sysfs(struct kgsl_device *device, + struct kgsl_process_private *private) +{ + if (kobject_init_and_add(&private->kobj, &process_ktype, + kgsl_driver.prockobj, "%d", pid_nr(private->pid))) { + dev_err(device->dev, "Unable to add sysfs for process %d\n", + pid_nr(private->pid)); + kgsl_process_private_put(private); + } + + kgsl_reclaim_proc_sysfs_init(private); +} + +static ssize_t memstat_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + uint64_t val = 0; + + if (!strcmp(attr->attr.name, "vmalloc")) + val = atomic_long_read(&kgsl_driver.stats.vmalloc); + else if (!strcmp(attr->attr.name, "vmalloc_max")) + val = atomic_long_read(&kgsl_driver.stats.vmalloc_max); + else if (!strcmp(attr->attr.name, "page_alloc")) + val = atomic_long_read(&kgsl_driver.stats.page_alloc); + else if (!strcmp(attr->attr.name, "page_alloc_max")) + val = atomic_long_read(&kgsl_driver.stats.page_alloc_max); + else if (!strcmp(attr->attr.name, "coherent")) + val = atomic_long_read(&kgsl_driver.stats.coherent); + else if (!strcmp(attr->attr.name, "coherent_max")) + val = atomic_long_read(&kgsl_driver.stats.coherent_max); + else if (!strcmp(attr->attr.name, "secure")) + val = atomic_long_read(&kgsl_driver.stats.secure); + else if (!strcmp(attr->attr.name, "secure_max")) + val = atomic_long_read(&kgsl_driver.stats.secure_max); + else if (!strcmp(attr->attr.name, "mapped")) + val = atomic_long_read(&kgsl_driver.stats.mapped); + else if (!strcmp(attr->attr.name, "mapped_max")) + val = atomic_long_read(&kgsl_driver.stats.mapped_max); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", val); +} + +static ssize_t full_cache_threshold_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned int thresh = 0; + + ret = kstrtou32(buf, 0, &thresh); + if (ret) + return ret; + + kgsl_driver.full_cache_threshold = thresh; + return count; +} + +static ssize_t full_cache_threshold_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", + kgsl_driver.full_cache_threshold); +} + +static DEVICE_ATTR(vmalloc, 0444, memstat_show, NULL); +static DEVICE_ATTR(vmalloc_max, 0444, memstat_show, NULL); +static DEVICE_ATTR(page_alloc, 0444, memstat_show, NULL); +static DEVICE_ATTR(page_alloc_max, 0444, memstat_show, NULL); +static DEVICE_ATTR(coherent, 0444, memstat_show, NULL); +static DEVICE_ATTR(coherent_max, 0444, memstat_show, NULL); +static DEVICE_ATTR(secure, 0444, memstat_show, NULL); +static DEVICE_ATTR(secure_max, 0444, memstat_show, NULL); +static DEVICE_ATTR(mapped, 0444, memstat_show, NULL); +static DEVICE_ATTR(mapped_max, 0444, memstat_show, NULL); +static DEVICE_ATTR_RW(full_cache_threshold); + +static const struct attribute *drv_attr_list[] = { + &dev_attr_vmalloc.attr, + &dev_attr_vmalloc_max.attr, + &dev_attr_page_alloc.attr, + &dev_attr_page_alloc_max.attr, + &dev_attr_coherent.attr, + &dev_attr_coherent_max.attr, + &dev_attr_secure.attr, + &dev_attr_secure_max.attr, + &dev_attr_mapped.attr, + &dev_attr_mapped_max.attr, + &dev_attr_full_cache_threshold.attr, +#ifdef CONFIG_QCOM_KGSL_PROCESS_RECLAIM + &dev_attr_max_reclaim_limit.attr, + &dev_attr_page_reclaim_per_call.attr, +#endif + NULL, +}; + +int +kgsl_sharedmem_init_sysfs(void) +{ + return sysfs_create_files(&kgsl_driver.virtdev.kobj, drv_attr_list); +} + +static vm_fault_t kgsl_paged_vmfault(struct kgsl_memdesc *memdesc, + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + int pgoff, ret; + struct page *page; + unsigned int offset = vmf->address - vma->vm_start; + + if (offset >= memdesc->size) + return VM_FAULT_SIGBUS; + + pgoff = offset >> PAGE_SHIFT; + + spin_lock(&memdesc->lock); + if (memdesc->pages[pgoff]) { + page = memdesc->pages[pgoff]; + get_page(page); + } else { + struct kgsl_process_private *priv = + ((struct kgsl_mem_entry *)vma->vm_private_data)->priv; + + /* We are here because page was reclaimed */ + memdesc->priv |= KGSL_MEMDESC_SKIP_RECLAIM; + spin_unlock(&memdesc->lock); + + page = shmem_read_mapping_page_gfp( + memdesc->shmem_filp->f_mapping, pgoff, + kgsl_gfp_mask(0)); + if (IS_ERR(page)) + return VM_FAULT_SIGBUS; + kgsl_page_sync_for_device(memdesc->dev, page, PAGE_SIZE); + + spin_lock(&memdesc->lock); + /* + * Update the pages array only if the page was + * not already brought back. + */ + if (!memdesc->pages[pgoff]) { + memdesc->pages[pgoff] = page; + atomic_dec(&priv->unpinned_page_count); + get_page(page); + } + } + spin_unlock(&memdesc->lock); + + ret = vmf_insert_page(vma, vmf->address, page); + put_page(page); + return ret; +} + +static void kgsl_paged_unmap_kernel(struct kgsl_memdesc *memdesc) +{ + mutex_lock(&kernel_map_global_lock); + if (!memdesc->hostptr) { + /* If already unmapped the refcount should be 0 */ + WARN_ON(memdesc->hostptr_count); + goto done; + } + memdesc->hostptr_count--; + if (memdesc->hostptr_count) + goto done; + vunmap(memdesc->hostptr); + + atomic_long_sub(memdesc->size, &kgsl_driver.stats.vmalloc); + memdesc->hostptr = NULL; +done: + mutex_unlock(&kernel_map_global_lock); +} + +#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) + +#include + +static int lock_sgt(struct sg_table *sgt, u64 size) +{ + int dest_perms = PERM_READ | PERM_WRITE; + int source_vm = VMID_HLOS; + int dest_vm = VMID_CP_PIXEL; + int ret; + + do { + ret = hyp_assign_table(sgt, &source_vm, 1, &dest_vm, + &dest_perms, 1); + } while (ret == -EAGAIN); + + if (ret) { + /* + * If returned error code is EADDRNOTAVAIL, then this + * memory may no longer be in a usable state as security + * state of the pages is unknown after this failure. This + * memory can neither be added back to the pool nor buddy + * system. + */ + if (ret == -EADDRNOTAVAIL) + pr_err("Failure to lock secure GPU memory 0x%llx bytes will not be recoverable\n", + size); + + return ret; + } + + return 0; +} + +static int unlock_sgt(struct sg_table *sgt) +{ + int dest_perms = PERM_READ | PERM_WRITE | PERM_EXEC; + int source_vm = VMID_CP_PIXEL; + int dest_vm = VMID_HLOS; + int ret; + + do { + ret = hyp_assign_table(sgt, &source_vm, 1, &dest_vm, + &dest_perms, 1); + } while (ret == -EAGAIN); + + if (ret) + return ret; + + return 0; +} +#endif + +static int kgsl_paged_map_kernel(struct kgsl_memdesc *memdesc) +{ + int ret = 0; + + /* Sanity check - don't map more than we could possibly chew */ + if (memdesc->size > ULONG_MAX) + return -ENOMEM; + + mutex_lock(&kernel_map_global_lock); + if ((!memdesc->hostptr) && (memdesc->pages != NULL)) { + pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); + + memdesc->hostptr = vmap(memdesc->pages, memdesc->page_count, + VM_IOREMAP, page_prot); + if (memdesc->hostptr) + KGSL_STATS_ADD(memdesc->size, + &kgsl_driver.stats.vmalloc, + &kgsl_driver.stats.vmalloc_max); + else + ret = -ENOMEM; + } + if (memdesc->hostptr) + memdesc->hostptr_count++; + + mutex_unlock(&kernel_map_global_lock); + + return ret; +} + +static vm_fault_t kgsl_contiguous_vmfault(struct kgsl_memdesc *memdesc, + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + unsigned long offset, pfn; + + offset = ((unsigned long) vmf->address - vma->vm_start) >> + PAGE_SHIFT; + + pfn = (memdesc->physaddr >> PAGE_SHIFT) + offset; + return vmf_insert_pfn(vma, vmf->address, pfn); +} + +static void _dma_cache_op(struct device *dev, struct page *page, + unsigned int op) +{ + struct scatterlist sgl; + + sg_init_table(&sgl, 1); + sg_set_page(&sgl, page, PAGE_SIZE, 0); + sg_dma_address(&sgl) = page_to_phys(page); + + switch (op) { + case KGSL_CACHE_OP_FLUSH: + dma_sync_sg_for_device(dev, &sgl, 1, DMA_TO_DEVICE); + dma_sync_sg_for_device(dev, &sgl, 1, DMA_FROM_DEVICE); + break; + case KGSL_CACHE_OP_CLEAN: + dma_sync_sg_for_device(dev, &sgl, 1, DMA_TO_DEVICE); + break; + case KGSL_CACHE_OP_INV: + dma_sync_sg_for_device(dev, &sgl, 1, DMA_FROM_DEVICE); + break; + } +} + +int kgsl_cache_range_op(struct kgsl_memdesc *memdesc, uint64_t offset, + uint64_t size, unsigned int op) +{ + int i; + + if (memdesc->flags & KGSL_MEMFLAGS_IOCOHERENT) + return 0; + + if (size == 0 || size > UINT_MAX) + return -EINVAL; + + /* Make sure that the offset + size does not overflow */ + if ((offset + size < offset) || (offset + size < size)) + return -ERANGE; + + /* Check that offset+length does not exceed memdesc->size */ + if (offset + size > memdesc->size) + return -ERANGE; + + size += offset & PAGE_MASK; + offset &= ~PAGE_MASK; + + /* If there is a sgt, use for_each_sg_page to walk it */ + if (memdesc->sgt) { + struct sg_page_iter sg_iter; + + for_each_sg_page(memdesc->sgt->sgl, &sg_iter, + PAGE_ALIGN(size) >> PAGE_SHIFT, offset >> PAGE_SHIFT) + _dma_cache_op(memdesc->dev, sg_page_iter_page(&sg_iter), op); + return 0; + } + + /* Otherwise just walk through the list of pages */ + for (i = 0; i < memdesc->page_count; i++) { + u64 cur = (i << PAGE_SHIFT); + + if ((cur < offset) || (cur >= (offset + size))) + continue; + + _dma_cache_op(memdesc->dev, memdesc->pages[i], op); + } + + return 0; +} + +void kgsl_memdesc_init(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, uint64_t flags) +{ + struct kgsl_mmu *mmu = &device->mmu; + unsigned int align; + + memset(memdesc, 0, sizeof(*memdesc)); + /* Turn off SVM if the system doesn't support it */ + if (!kgsl_mmu_is_perprocess(mmu)) + flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); + + /* Secure memory disables advanced addressing modes */ + if (flags & KGSL_MEMFLAGS_SECURE) + flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); + + /* Disable IO coherence if it is not supported on the chip */ + if (!kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT)) { + flags &= ~((uint64_t) KGSL_MEMFLAGS_IOCOHERENT); + + WARN_ONCE(IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT), + "I/O coherency is not supported on this target\n"); + } else if (IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT)) + flags |= KGSL_MEMFLAGS_IOCOHERENT; + + /* + * We can't enable I/O coherency on uncached surfaces because of + * situations where hardware might snoop the cpu caches which can + * have stale data. This happens primarily due to the limitations + * of dma caching APIs available on arm64 + */ + if (!kgsl_cachemode_is_cached(flags)) + flags &= ~((u64) KGSL_MEMFLAGS_IOCOHERENT); + + if (kgsl_mmu_has_feature(device, KGSL_MMU_NEED_GUARD_PAGE) || + (flags & KGSL_MEMFLAGS_GUARD_PAGE)) + memdesc->priv |= KGSL_MEMDESC_GUARD_PAGE; + + if (flags & KGSL_MEMFLAGS_SECURE) + memdesc->priv |= KGSL_MEMDESC_SECURE; + + memdesc->flags = flags; + memdesc->dev = &device->pdev->dev; + + align = max_t(unsigned int, + kgsl_memdesc_get_align(memdesc), ilog2(PAGE_SIZE)); + kgsl_memdesc_set_align(memdesc, align); + + spin_lock_init(&memdesc->lock); +} + +void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc) +{ + if (!memdesc || !memdesc->size) + return; + + /* Assume if no operations were specified something went bad early */ + if (!memdesc->ops) + return; + + if (memdesc->ops->put_gpuaddr) + memdesc->ops->put_gpuaddr(memdesc); + + if (memdesc->ops->free) + memdesc->ops->free(memdesc); +} + +#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) +void kgsl_free_secure_page(struct page *page) +{ + struct sg_table sgt; + struct scatterlist sgl; + + if (!page) + return; + + sgt.sgl = &sgl; + sgt.nents = 1; + sgt.orig_nents = 1; + sg_init_table(&sgl, 1); + sg_set_page(&sgl, page, PAGE_SIZE, 0); + + unlock_sgt(&sgt); + __free_page(page); +} + +struct page *kgsl_alloc_secure_page(void) +{ + struct page *page; + struct sg_table sgt; + struct scatterlist sgl; + int status; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO | + __GFP_NORETRY | __GFP_HIGHMEM); + if (!page) + return NULL; + + sgt.sgl = &sgl; + sgt.nents = 1; + sgt.orig_nents = 1; + sg_init_table(&sgl, 1); + sg_set_page(&sgl, page, PAGE_SIZE, 0); + + status = lock_sgt(&sgt, PAGE_SIZE); + if (status) { + if (status == -EADDRNOTAVAIL) + return NULL; + + __free_page(page); + return NULL; + } + return page; +} +#else +void kgsl_free_secure_page(struct page *page) +{ +} + +struct page *kgsl_alloc_secure_page(void) +{ + return NULL; +} +#endif + +int +kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc, + uint32_t *dst, + uint64_t offsetbytes) +{ + uint32_t *src; + + if (WARN_ON(memdesc == NULL || memdesc->hostptr == NULL || + dst == NULL)) + return -EINVAL; + + WARN_ON(offsetbytes % sizeof(uint32_t) != 0); + if (offsetbytes % sizeof(uint32_t) != 0) + return -EINVAL; + + WARN_ON(offsetbytes > (memdesc->size - sizeof(uint32_t))); + if (offsetbytes > (memdesc->size - sizeof(uint32_t))) + return -ERANGE; + + /* + * We are reading shared memory between CPU and GPU. + * Make sure reads before this are complete + */ + rmb(); + src = (uint32_t *)(memdesc->hostptr + offsetbytes); + *dst = *src; + return 0; +} + +void +kgsl_sharedmem_writel(const struct kgsl_memdesc *memdesc, + uint64_t offsetbytes, + uint32_t src) +{ + /* Quietly return if the memdesc isn't valid */ + if (IS_ERR_OR_NULL(memdesc) || WARN_ON(!memdesc->hostptr)) + return; + + if (WARN_ON(!IS_ALIGNED(offsetbytes, sizeof(u32)))) + return; + + if (WARN_ON(offsetbytes > (memdesc->size - sizeof(u32)))) + return; + + *((u32 *) (memdesc->hostptr + offsetbytes)) = src; + + /* Make sure the write is posted before continuing */ + wmb(); +} + +int +kgsl_sharedmem_readq(const struct kgsl_memdesc *memdesc, + uint64_t *dst, + uint64_t offsetbytes) +{ + uint64_t *src; + + if (WARN_ON(memdesc == NULL || memdesc->hostptr == NULL || + dst == NULL)) + return -EINVAL; + + WARN_ON(offsetbytes % sizeof(uint32_t) != 0); + if (offsetbytes % sizeof(uint32_t) != 0) + return -EINVAL; + + WARN_ON(offsetbytes > (memdesc->size - sizeof(uint32_t))); + if (offsetbytes > (memdesc->size - sizeof(uint32_t))) + return -ERANGE; + + /* + * We are reading shared memory between CPU and GPU. + * Make sure reads before this are complete + */ + rmb(); + src = (uint64_t *)(memdesc->hostptr + offsetbytes); + *dst = *src; + return 0; +} + +void +kgsl_sharedmem_writeq(const struct kgsl_memdesc *memdesc, + uint64_t offsetbytes, + uint64_t src) +{ + /* Quietly return if the memdesc isn't valid */ + if (IS_ERR_OR_NULL(memdesc) || WARN_ON(!memdesc->hostptr)) + return; + + if (WARN_ON(!IS_ALIGNED(offsetbytes, sizeof(u64)))) + return; + + if (WARN_ON(offsetbytes > (memdesc->size - sizeof(u64)))) + return; + + *((u64 *) (memdesc->hostptr + offsetbytes)) = src; + + /* Make sure the write is posted before continuing */ + wmb(); +} + +static const char * const memtype_str[] = { + [KGSL_MEMTYPE_OBJECTANY] = "any(0)", + [KGSL_MEMTYPE_FRAMEBUFFER] = "framebuffer", + [KGSL_MEMTYPE_RENDERBUFFER] = "renderbuffer", + [KGSL_MEMTYPE_ARRAYBUFFER] = "arraybuffer", + [KGSL_MEMTYPE_ELEMENTARRAYBUFFER] = "elementarraybuffer", + [KGSL_MEMTYPE_VERTEXARRAYBUFFER] = "vertexarraybuffer", + [KGSL_MEMTYPE_TEXTURE] = "texture", + [KGSL_MEMTYPE_SURFACE] = "surface", + [KGSL_MEMTYPE_EGL_SURFACE] = "egl_surface", + [KGSL_MEMTYPE_GL] = "gl", + [KGSL_MEMTYPE_CL] = "cl", + [KGSL_MEMTYPE_CL_BUFFER_MAP] = "cl_buffer_map", + [KGSL_MEMTYPE_CL_BUFFER_NOMAP] = "cl_buffer_nomap", + [KGSL_MEMTYPE_CL_IMAGE_MAP] = "cl_image_map", + [KGSL_MEMTYPE_CL_IMAGE_NOMAP] = "cl_image_nomap", + [KGSL_MEMTYPE_CL_KERNEL_STACK] = "cl_kernel_stack", + [KGSL_MEMTYPE_COMMAND] = "command", + [KGSL_MEMTYPE_2D] = "2d", + [KGSL_MEMTYPE_EGL_IMAGE] = "egl_image", + [KGSL_MEMTYPE_EGL_SHADOW] = "egl_shadow", + [KGSL_MEMTYPE_MULTISAMPLE] = "egl_multisample", + /* KGSL_MEMTYPE_KERNEL handled below, to avoid huge array */ +}; + +void kgsl_get_memory_usage(char *name, size_t name_size, uint64_t memflags) +{ + unsigned int type = FIELD_GET(KGSL_MEMTYPE_MASK, memflags); + + if (type == KGSL_MEMTYPE_KERNEL) + strlcpy(name, "kernel", name_size); + else if (type < ARRAY_SIZE(memtype_str) && memtype_str[type] != NULL) + strlcpy(name, memtype_str[type], name_size); + else + snprintf(name, name_size, "VK/others(%3d)", type); +} + +int kgsl_memdesc_sg_dma(struct kgsl_memdesc *memdesc, + phys_addr_t addr, u64 size) +{ + int ret; + struct page *page = phys_to_page(addr); + + memdesc->sgt = kmalloc(sizeof(*memdesc->sgt), GFP_KERNEL); + if (memdesc->sgt == NULL) + return -ENOMEM; + + ret = sg_alloc_table(memdesc->sgt, 1, GFP_KERNEL); + if (ret) { + kfree(memdesc->sgt); + memdesc->sgt = NULL; + return ret; + } + + sg_set_page(memdesc->sgt->sgl, page, (size_t) size, 0); + return 0; +} + +static void _kgsl_contiguous_free(struct kgsl_memdesc *memdesc) +{ + dma_free_attrs(memdesc->dev, memdesc->size, + memdesc->hostptr, memdesc->physaddr, + memdesc->attrs); + + sg_free_table(memdesc->sgt); + kfree(memdesc->sgt); + + memdesc->sgt = NULL; +} + +static void kgsl_contiguous_free(struct kgsl_memdesc *memdesc) +{ + if (!memdesc->hostptr) + return; + + atomic_long_sub(memdesc->size, &kgsl_driver.stats.coherent); + + _kgsl_contiguous_free(memdesc); +} + +#ifdef CONFIG_QCOM_KGSL_USE_SHMEM +static int kgsl_alloc_page(int *page_size, struct page **pages, + unsigned int pages_len, unsigned int *align, + unsigned int page_off, struct file *shmem_filp, + struct device *dev) +{ + struct page *page; + + if (pages == NULL) + return -EINVAL; + + page = shmem_read_mapping_page_gfp(shmem_filp->f_mapping, page_off, + kgsl_gfp_mask(0)); + if (IS_ERR(page)) + return PTR_ERR(page); + + kgsl_zero_page(page, 0, dev); + *pages = page; + + return 1; +} + +static int kgsl_memdesc_file_setup(struct kgsl_memdesc *memdesc, uint64_t size) +{ + int ret; + + memdesc->shmem_filp = shmem_file_setup("kgsl-3d0", size, + VM_NORESERVE); + if (IS_ERR(memdesc->shmem_filp)) { + ret = PTR_ERR(memdesc->shmem_filp); + pr_err("kgsl: unable to setup shmem file err %d\n", + ret); + memdesc->shmem_filp = NULL; + return ret; + } + + return 0; +} + +static void kgsl_free_page(struct page *p) +{ + put_page(p); +} + +static void _kgsl_free_pages(struct kgsl_memdesc *memdesc, unsigned int pcount) +{ + int i; + + for (i = 0; i < memdesc->page_count; i++) + if (memdesc->pages[i]) + put_page(memdesc->pages[i]); + + fput(memdesc->shmem_filp); +} +#else +static int kgsl_alloc_page(int *page_size, struct page **pages, + unsigned int pages_len, unsigned int *align, + unsigned int page_off, struct file *shmem_filp, + struct device *dev) +{ + return kgsl_pool_alloc_page(page_size, pages, + pages_len, align, dev); +} + +static int kgsl_memdesc_file_setup(struct kgsl_memdesc *memdesc, uint64_t size) +{ + return 0; +} + +static void kgsl_free_page(struct page *p) +{ + kgsl_pool_free_page(p); +} + +static void _kgsl_free_pages(struct kgsl_memdesc *memdesc, unsigned int pcount) +{ + kgsl_pool_free_pages(memdesc->pages, pcount); +} +#endif + +static void kgsl_free_pages_from_sgt(struct kgsl_memdesc *memdesc) +{ + int i; + struct scatterlist *sg; + + for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) { + /* + * sg_alloc_table_from_pages() will collapse any physically + * adjacent pages into a single scatterlist entry. We cannot + * just call __free_pages() on the entire set since we cannot + * ensure that the size is a whole order. Instead, free each + * page or compound page group individually. + */ + struct page *p = sg_page(sg), *next; + unsigned int count; + unsigned int j = 0; + + while (j < (sg->length/PAGE_SIZE)) { + count = 1 << compound_order(p); + next = nth_page(p, count); + kgsl_free_page(p); + + p = next; + j += count; + } + } + + if (memdesc->shmem_filp) + fput(memdesc->shmem_filp); +} + +void kgsl_page_sync_for_device(struct device *dev, struct page *page, + size_t size) +{ + struct scatterlist sg; + + /* The caller may choose not to specify a device on purpose */ + if (!dev) + return; + + sg_init_table(&sg, 1); + sg_set_page(&sg, page, size, 0); + sg_dma_address(&sg) = page_to_phys(page); + + dma_sync_sg_for_device(dev, &sg, 1, DMA_BIDIRECTIONAL); +} + +void kgsl_zero_page(struct page *p, unsigned int order, + struct device *dev) +{ + int i; + + for (i = 0; i < (1 << order); i++) { + struct page *page = nth_page(p, i); + + clear_highpage(page); + } + + kgsl_page_sync_for_device(dev, p, PAGE_SIZE << order); +} + +gfp_t kgsl_gfp_mask(int page_order) +{ + gfp_t gfp_mask = __GFP_HIGHMEM; + + if (page_order > 0) { + gfp_mask |= __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN; + gfp_mask &= ~__GFP_RECLAIM; + } else + gfp_mask |= GFP_KERNEL; + + if (kgsl_sharedmem_noretry_flag) + gfp_mask |= __GFP_NORETRY | __GFP_NOWARN; + + return gfp_mask; +} + +static int _kgsl_alloc_pages(struct kgsl_memdesc *memdesc, + u64 size, struct page ***pages, struct device *dev) +{ + int count = 0; + int npages = size >> PAGE_SHIFT; + struct page **local = kvcalloc(npages, sizeof(*local), GFP_KERNEL); + u32 page_size, align; + u64 len = size; + + if (!local) + return -ENOMEM; + + count = kgsl_memdesc_file_setup(memdesc, size); + if (count) { + kvfree(local); + return count; + } + + /* Start with 1MB alignment to get the biggest page we can */ + align = ilog2(SZ_1M); + + page_size = kgsl_get_page_size(len, align); + + while (len) { + int ret = kgsl_alloc_page(&page_size, &local[count], + npages, &align, count, memdesc->shmem_filp, dev); + + if (ret == -EAGAIN) + continue; + else if (ret <= 0) { + int i; + + for (i = 0; i < count; ) { + int n = 1 << compound_order(local[i]); + + kgsl_free_page(local[i]); + i += n; + } + kvfree(local); + + if (!kgsl_sharedmem_noretry_flag) + pr_err_ratelimited("kgsl: out of memory: only allocated %lldKb of %lldKb requested\n", + (size - len) >> 10, size >> 10); + + if (memdesc->shmem_filp) + fput(memdesc->shmem_filp); + + return -ENOMEM; + } + + count += ret; + npages -= ret; + len -= page_size; + + page_size = kgsl_get_page_size(len, align); + } + + *pages = local; + + return count; +} + +#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) +static void kgsl_free_secure_system_pages(struct kgsl_memdesc *memdesc) +{ + int i; + struct scatterlist *sg; + int ret = unlock_sgt(memdesc->sgt); + + if (ret) { + /* + * Unlock of the secure buffer failed. This buffer will + * be stuck in secure side forever and is unrecoverable. + * Give up on the buffer and don't return it to the + * pool. + */ + pr_err("kgsl: secure buf unlock failed: gpuaddr: %llx size: %llx ret: %d\n", + memdesc->gpuaddr, memdesc->size, ret); + return; + } + + atomic_long_sub(memdesc->size, &kgsl_driver.stats.secure); + + for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) { + struct page *page = sg_page(sg); + + __free_pages(page, get_order(PAGE_SIZE)); + } + + sg_free_table(memdesc->sgt); + kfree(memdesc->sgt); + + memdesc->sgt = NULL; +} + +static void kgsl_free_secure_pages(struct kgsl_memdesc *memdesc) +{ + int ret = unlock_sgt(memdesc->sgt); + + if (ret) { + /* + * Unlock of the secure buffer failed. This buffer will + * be stuck in secure side forever and is unrecoverable. + * Give up on the buffer and don't return it to the + * pool. + */ + pr_err("kgsl: secure buf unlock failed: gpuaddr: %llx size: %llx ret: %d\n", + memdesc->gpuaddr, memdesc->size, ret); + return; + } + + atomic_long_sub(memdesc->size, &kgsl_driver.stats.secure); + + kgsl_free_pages_from_sgt(memdesc); + + sg_free_table(memdesc->sgt); + kfree(memdesc->sgt); + + memdesc->sgt = NULL; +} +#endif + +static void kgsl_free_pages(struct kgsl_memdesc *memdesc) +{ + kgsl_paged_unmap_kernel(memdesc); + WARN_ON(memdesc->hostptr); + + atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc); + + _kgsl_free_pages(memdesc, memdesc->page_count); + + memdesc->page_count = 0; + kvfree(memdesc->pages); + + memdesc->pages = NULL; +} + + +static void kgsl_free_system_pages(struct kgsl_memdesc *memdesc) +{ + int i; + + kgsl_paged_unmap_kernel(memdesc); + WARN_ON(memdesc->hostptr); + + atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc); + + for (i = 0; i < memdesc->page_count; i++) + __free_pages(memdesc->pages[i], get_order(PAGE_SIZE)); + + memdesc->page_count = 0; + kvfree(memdesc->pages); + memdesc->pages = NULL; +} + +void kgsl_unmap_and_put_gpuaddr(struct kgsl_memdesc *memdesc) +{ + if (!memdesc->size || !memdesc->gpuaddr) + return; + + if (WARN_ON(kgsl_memdesc_is_global(memdesc))) + return; + + /* + * Don't release the GPU address if the memory fails to unmap because + * the IOMMU driver will BUG later if we reallocated the address and + * tried to map it + */ + if (!kgsl_memdesc_is_reclaimed(memdesc) && + kgsl_mmu_unmap(memdesc->pagetable, memdesc)) + return; + + kgsl_mmu_put_gpuaddr(memdesc->pagetable, memdesc); + + memdesc->gpuaddr = 0; + memdesc->pagetable = NULL; +} + +static const struct kgsl_memdesc_ops kgsl_contiguous_ops = { + .free = kgsl_contiguous_free, + .vmflags = VM_DONTDUMP | VM_PFNMAP | VM_DONTEXPAND | VM_DONTCOPY, + .vmfault = kgsl_contiguous_vmfault, + .put_gpuaddr = kgsl_unmap_and_put_gpuaddr, +}; + +#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) +static const struct kgsl_memdesc_ops kgsl_secure_system_ops = { + .free = kgsl_free_secure_system_pages, + /* FIXME: Make sure vmflags / vmfault does the right thing here */ +}; + +static const struct kgsl_memdesc_ops kgsl_secure_page_ops = { + .free = kgsl_free_secure_pages, + /* FIXME: Make sure vmflags / vmfault does the right thing here */ + .put_gpuaddr = kgsl_unmap_and_put_gpuaddr, +}; +#endif + +static const struct kgsl_memdesc_ops kgsl_page_ops = { + .free = kgsl_free_pages, + .vmflags = VM_DONTDUMP | VM_DONTEXPAND | VM_DONTCOPY | VM_MIXEDMAP, + .vmfault = kgsl_paged_vmfault, + .map_kernel = kgsl_paged_map_kernel, + .unmap_kernel = kgsl_paged_unmap_kernel, + .put_gpuaddr = kgsl_unmap_and_put_gpuaddr, +}; + +static const struct kgsl_memdesc_ops kgsl_system_ops = { + .free = kgsl_free_system_pages, + .vmflags = VM_DONTDUMP | VM_DONTEXPAND | VM_DONTCOPY | VM_MIXEDMAP, + .vmfault = kgsl_paged_vmfault, + .map_kernel = kgsl_paged_map_kernel, + .unmap_kernel = kgsl_paged_unmap_kernel, +}; + +static int kgsl_system_alloc_pages(u64 size, struct page ***pages, + struct device *dev) +{ + struct scatterlist sg; + struct page **local; + int i, npages = size >> PAGE_SHIFT; + + local = kvcalloc(npages, sizeof(*pages), GFP_KERNEL | __GFP_NORETRY); + if (!local) + return -ENOMEM; + + for (i = 0; i < npages; i++) { + gfp_t gfp = __GFP_ZERO | __GFP_HIGHMEM | + GFP_KERNEL | __GFP_NORETRY; + + local[i] = alloc_pages(gfp, get_order(PAGE_SIZE)); + if (!local[i]) { + for (i = i - 1; i >= 0; i--) + __free_pages(local[i], get_order(PAGE_SIZE)); + kvfree(local); + return -ENOMEM; + } + + /* Make sure the cache is clean */ + sg_init_table(&sg, 1); + sg_set_page(&sg, local[i], PAGE_SIZE, 0); + sg_dma_address(&sg) = page_to_phys(local[i]); + + dma_sync_sg_for_device(dev, &sg, 1, DMA_BIDIRECTIONAL); + } + + *pages = local; + return npages; +} + +#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) +static int kgsl_alloc_secure_pages(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv) +{ + struct page **pages; + int count; + struct sg_table *sgt; + int ret; + + size = PAGE_ALIGN(size); + + if (!size || size > UINT_MAX) + return -EINVAL; + + kgsl_memdesc_init(device, memdesc, flags); + memdesc->priv |= priv; + + if (priv & KGSL_MEMDESC_SYSMEM) { + memdesc->ops = &kgsl_secure_system_ops; + count = kgsl_system_alloc_pages(size, &pages, device->dev); + } else { + memdesc->ops = &kgsl_secure_page_ops; + count = _kgsl_alloc_pages(memdesc, size, &pages, device->dev); + } + + if (count < 0) + return count; + + sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + _kgsl_free_pages(memdesc, count); + kvfree(pages); + return -ENOMEM; + } + + ret = sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL); + if (ret) { + kfree(sgt); + _kgsl_free_pages(memdesc, count); + kvfree(pages); + return ret; + } + + /* Now that we've moved to a sg table don't need the pages anymore */ + kvfree(pages); + + ret = lock_sgt(sgt, size); + if (ret) { + if (ret != -EADDRNOTAVAIL) + kgsl_free_pages_from_sgt(memdesc); + sg_free_table(sgt); + kfree(sgt); + return ret; + } + + memdesc->sgt = sgt; + memdesc->size = size; + + KGSL_STATS_ADD(size, &kgsl_driver.stats.secure, + &kgsl_driver.stats.secure_max); + + return 0; +} +#endif + +static int kgsl_alloc_pages(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv) +{ + struct page **pages; + int count; + + size = PAGE_ALIGN(size); + + if (!size || size > UINT_MAX) + return -EINVAL; + + kgsl_memdesc_init(device, memdesc, flags); + memdesc->priv |= priv; + + if (priv & KGSL_MEMDESC_SYSMEM) { + memdesc->ops = &kgsl_system_ops; + count = kgsl_system_alloc_pages(size, &pages, device->dev); + } else { + memdesc->ops = &kgsl_page_ops; + count = _kgsl_alloc_pages(memdesc, size, &pages, device->dev); + } + + if (count < 0) + return count; + + memdesc->pages = pages; + memdesc->size = size; + memdesc->page_count = count; + + KGSL_STATS_ADD(size, &kgsl_driver.stats.page_alloc, + &kgsl_driver.stats.page_alloc_max); + + return 0; +} + +static int _kgsl_alloc_contiguous(struct device *dev, + struct kgsl_memdesc *memdesc, u64 size, unsigned long attrs) +{ + int ret; + phys_addr_t phys; + void *ptr; + + ptr = dma_alloc_attrs(dev, (size_t) size, &phys, + GFP_KERNEL, attrs); + if (!ptr) + return -ENOMEM; + + memdesc->size = size; + memdesc->dev = dev; + memdesc->hostptr = ptr; + memdesc->physaddr = phys; + memdesc->gpuaddr = phys; + memdesc->attrs = attrs; + + ret = kgsl_memdesc_sg_dma(memdesc, phys, size); + if (ret) + dma_free_attrs(dev, (size_t) size, ptr, phys, attrs); + + return ret; +} + +static int kgsl_alloc_contiguous(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv) +{ + int ret; + + size = PAGE_ALIGN(size); + + if (!size || size > UINT_MAX) + return -EINVAL; + + kgsl_memdesc_init(device, memdesc, flags); + memdesc->priv |= priv; + + memdesc->ops = &kgsl_contiguous_ops; + ret = _kgsl_alloc_contiguous(&device->pdev->dev, memdesc, size, 0); + + if (!ret) + KGSL_STATS_ADD(size, &kgsl_driver.stats.coherent, + &kgsl_driver.stats.coherent_max); + + return ret; +} + +#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) +static int kgsl_allocate_secure(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv) +{ + return kgsl_alloc_secure_pages(device, memdesc, size, flags, priv); +} +#else +static int kgsl_allocate_secure(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv) +{ + return -ENODEV; +} +#endif + +int kgsl_allocate_user(struct kgsl_device *device, struct kgsl_memdesc *memdesc, + u64 size, u64 flags, u32 priv) +{ + if (device->mmu.type == KGSL_MMU_TYPE_NONE) + return kgsl_alloc_contiguous(device, memdesc, size, flags, + priv); + else if (flags & KGSL_MEMFLAGS_SECURE) + return kgsl_allocate_secure(device, memdesc, size, flags, priv); + + return kgsl_alloc_pages(device, memdesc, size, flags, priv); +} + +int kgsl_allocate_kernel(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv) +{ + int ret; + + ret = kgsl_allocate_user(device, memdesc, size, flags, priv); + if (ret) + return ret; + + if (memdesc->ops->map_kernel) { + ret = memdesc->ops->map_kernel(memdesc); + if (ret) { + kgsl_sharedmem_free(memdesc); + return ret; + } + } + + return 0; +} + +struct kgsl_memdesc *kgsl_allocate_global_fixed(struct kgsl_device *device, + const char *resource, const char *name) +{ + struct kgsl_global_memdesc *md; + u32 entry[2]; + int ret; + + if (of_property_read_u32_array(device->pdev->dev.of_node, + resource, entry, 2)) + return ERR_PTR(-ENODEV); + + md = kzalloc(sizeof(*md), GFP_KERNEL); + if (!md) + return ERR_PTR(-ENOMEM); + + kgsl_memdesc_init(device, &md->memdesc, 0); + md->memdesc.priv = KGSL_MEMDESC_GLOBAL; + md->memdesc.physaddr = entry[0]; + md->memdesc.size = entry[1]; + + ret = kgsl_memdesc_sg_dma(&md->memdesc, entry[0], entry[1]); + if (ret) { + kfree(md); + return ERR_PTR(ret); + } + + md->name = name; + + /* + * No lock here, because this function is only called during probe/init + * while the caller is holding the mutex + */ + list_add_tail(&md->node, &device->globals); + + kgsl_mmu_map_global(device, &md->memdesc, 0); + + return &md->memdesc; +} + +static struct kgsl_memdesc * +kgsl_allocate_secure_global(struct kgsl_device *device, + u64 size, u64 flags, u32 priv, const char *name) +{ + struct kgsl_global_memdesc *md; + int ret; + + md = kzalloc(sizeof(*md), GFP_KERNEL); + if (!md) + return ERR_PTR(-ENOMEM); + + /* Make sure that we get global memory from system memory */ + priv |= KGSL_MEMDESC_GLOBAL | KGSL_MEMDESC_SYSMEM; + + ret = kgsl_allocate_secure(device, &md->memdesc, size, flags, priv); + if (ret) { + kfree(md); + return ERR_PTR(ret); + } + + md->name = name; + + /* + * No lock here, because this function is only called during probe/init + * while the caller is holding the mutex + */ + list_add_tail(&md->node, &device->globals); + + /* + * No offset needed, we'll get an address inside of the pagetable + * normally + */ + kgsl_mmu_map_global(device, &md->memdesc, 0); + kgsl_trace_gpu_mem_total(device, md->memdesc.size); + + return &md->memdesc; +} + +struct kgsl_memdesc *kgsl_allocate_global(struct kgsl_device *device, + u64 size, u32 padding, u64 flags, u32 priv, const char *name) +{ + int ret; + struct kgsl_global_memdesc *md; + + if (flags & KGSL_MEMFLAGS_SECURE) + return kgsl_allocate_secure_global(device, size, flags, priv, + name); + + md = kzalloc(sizeof(*md), GFP_KERNEL); + if (!md) + return ERR_PTR(-ENOMEM); + + /* + * Make sure that we get global memory from system memory to keep from + * taking up pool memory for the life of the driver + */ + priv |= KGSL_MEMDESC_GLOBAL | KGSL_MEMDESC_SYSMEM; + + ret = kgsl_allocate_kernel(device, &md->memdesc, size, flags, priv); + if (ret) { + kfree(md); + return ERR_PTR(ret); + } + + md->name = name; + + /* + * No lock here, because this function is only called during probe/init + * while the caller is holding the mute + */ + list_add_tail(&md->node, &device->globals); + + kgsl_mmu_map_global(device, &md->memdesc, padding); + kgsl_trace_gpu_mem_total(device, md->memdesc.size); + + return &md->memdesc; +} + +void kgsl_free_globals(struct kgsl_device *device) +{ + struct kgsl_global_memdesc *md, *tmp; + + list_for_each_entry_safe(md, tmp, &device->globals, node) { + kgsl_sharedmem_free(&md->memdesc); + list_del(&md->node); + kfree(md); + } +} diff --git a/kgsl_sharedmem.h b/kgsl_sharedmem.h new file mode 100644 index 0000000000..8183283f0f --- /dev/null +++ b/kgsl_sharedmem.h @@ -0,0 +1,463 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_SHAREDMEM_H +#define __KGSL_SHAREDMEM_H + +#include +#include +#include +#include + +#include "kgsl.h" +#include "kgsl_mmu.h" + +struct kgsl_device; +struct kgsl_process_private; + +extern bool kgsl_sharedmem_noretry_flag; + +#define KGSL_CACHE_OP_INV 0x01 +#define KGSL_CACHE_OP_FLUSH 0x02 +#define KGSL_CACHE_OP_CLEAN 0x03 + +void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc); + +int kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc, + uint32_t *dst, + uint64_t offsetbytes); + +/** + * kgsl_sharedmem_writel - write a 32 bit value to a shared memory object + * @memdesc: Pointer to a GPU memory object + * @offsetbytes: Offset inside of @memdesc to write to + * @src: Value to write + * + * Write @src to @offsetbytes from the start of @memdesc + */ +void kgsl_sharedmem_writel(const struct kgsl_memdesc *memdesc, + uint64_t offsetbytes, + uint32_t src); + +int kgsl_sharedmem_readq(const struct kgsl_memdesc *memdesc, + uint64_t *dst, + uint64_t offsetbytes); + +/** + * kgsl_sharedmem_writeq - write a 64 bit value to a shared memory object + * @memdesc: Pointer to a GPU memory object + * @offsetbytes: Offset inside of @memdesc to write to + * @src: Value to write + * + * Write @src to @offsetbytes from the start of @memdesc + */ +void kgsl_sharedmem_writeq(const struct kgsl_memdesc *memdesc, + uint64_t offsetbytes, + uint64_t src); + +int kgsl_cache_range_op(struct kgsl_memdesc *memdesc, + uint64_t offset, uint64_t size, + unsigned int op); + +void kgsl_memdesc_init(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, uint64_t flags); + +void kgsl_process_init_sysfs(struct kgsl_device *device, + struct kgsl_process_private *private); + +int kgsl_sharedmem_init_sysfs(void); + +void kgsl_get_memory_usage(char *str, size_t len, uint64_t memflags); + +void kgsl_free_secure_page(struct page *page); + +struct page *kgsl_alloc_secure_page(void); + +/** + * kgsl_zero_page() - zero out a page + * @p: pointer to the struct page + * @order: order of the page + * @dev: A &struct device pointer + * + * Map a page into kernel and zero it out + */ +void kgsl_zero_page(struct page *p, unsigned int order, + struct device *dev); + +/** + * kgsl_gfp_mask() - get gfp_mask to be used + * @page_order: order of the page + * + * Get the gfp_mask to be used for page allocation + * based on the order of the page + * + * Return appropriate gfp_mask + */ +gfp_t kgsl_gfp_mask(int page_order); + +/** + * kgsl_allocate_user - Allocate user visible GPU memory + * @device: A GPU device handle + * @memdesc: Memory descriptor for the object + * @size: Size of the allocation in bytes + * @flags: Control flags for the allocation + * @priv: Internal flags for the allocation + * + * Allocate GPU memory on behalf of the user. + * Return: 0 on success or negative on failure. + */ +int kgsl_allocate_user(struct kgsl_device *device, struct kgsl_memdesc *memdesc, + u64 size, u64 flags, u32 priv); + +/** + * kgsl_allocate_kernel - Allocate kernel visible GPU memory + * @device: A GPU device handle + * @memdesc: Memory descriptor for the object + * @size: Size of the allocation in bytes + * @flags: Control flags for the allocation + * @priv: Internal flags for the allocation + * + * Allocate GPU memory on for use by the kernel. Kernel objects are + * automatically mapped into the kernel address space (except for secure). + * Return: 0 on success or negative on failure. + */ +int kgsl_allocate_kernel(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv); + +/** + * kgsl_allocate_global - Allocate a global GPU memory object + * @device: A GPU device handle + * @size: Size of the allocation in bytes + * @padding: Amount of extra adding to add to the VA allocation + * @flags: Control flags for the allocation + * @priv: Internal flags for the allocation + * @name: Name of the allocation (for the debugfs file) + * + * Allocate a global GPU object for use by all processes. The buffer is + * automatically mapped into the kernel address space and added to the list of + * global buffers that get mapped into each newly created pagetable. + * Return: The memory descriptor on success or a ERR_PTR encoded error on + * failure. + */ +struct kgsl_memdesc *kgsl_allocate_global(struct kgsl_device *device, + u64 size, u32 padding, u64 flags, u32 priv, const char *name); + +/** + * kgsl_allocate_global_fixed - Allocate a global GPU memory object from a fixed + * region defined in the device tree + * @device: A GPU device handle + * @size: Size of the allocation in bytes + * @flags: Control flags for the allocation + * @priv: Internal flags for the allocation + * + * Allocate a global GPU object for use by all processes. The buffer is + * added to the list of global buffers that get mapped into each newly created + * pagetable. + * + * Return: The memory descriptor on success or a ERR_PTR encoded error on + * failure. + */ +struct kgsl_memdesc *kgsl_allocate_global_fixed(struct kgsl_device *device, + const char *resource, const char *name); + +/** + * kgsl_free_globals - Free all global objects + * @device: A GPU device handle + * + * Free all the global buffer objects. Should only be called during shutdown + * after the pagetables have been freed + */ +void kgsl_free_globals(struct kgsl_device *device); + +/** + * kgsl_page_sync_for_device - Initialize SG table with page & sync it for device + * @dev: A GPU device handle + * @page: Pointer to the struct page + * @size: Size of the page + */ +void kgsl_page_sync_for_device(struct device *dev, struct page *page, + size_t size); + +/* + * kgsl_memdesc_get_align - Get alignment flags from a memdesc + * @memdesc - the memdesc + * + * Returns the alignment requested, as power of 2 exponent. + */ +static inline int +kgsl_memdesc_get_align(const struct kgsl_memdesc *memdesc) +{ + return FIELD_GET(KGSL_MEMALIGN_MASK, memdesc->flags); +} + +/* + * kgsl_memdesc_get_cachemode - Get cache mode of a memdesc + * @memdesc: the memdesc + * + * Returns a KGSL_CACHEMODE* value. + */ +static inline int +kgsl_memdesc_get_cachemode(const struct kgsl_memdesc *memdesc) +{ + return FIELD_GET(KGSL_CACHEMODE_MASK, memdesc->flags); +} + +static inline unsigned int +kgsl_memdesc_get_memtype(const struct kgsl_memdesc *memdesc) +{ + return FIELD_GET(KGSL_MEMTYPE_MASK, memdesc->flags); +} +/* + * kgsl_memdesc_set_align - Set alignment flags of a memdesc + * @memdesc - the memdesc + * @align - alignment requested, as a power of 2 exponent. + */ +static inline int +kgsl_memdesc_set_align(struct kgsl_memdesc *memdesc, unsigned int align) +{ + if (align > 32) + align = 32; + + memdesc->flags &= ~(uint64_t)KGSL_MEMALIGN_MASK; + memdesc->flags |= FIELD_PREP(KGSL_MEMALIGN_MASK, align); + return 0; +} + +/** + * kgsl_memdesc_usermem_type - return buffer type + * @memdesc - the memdesc + * + * Returns a KGSL_MEM_ENTRY_* value for this buffer, which + * identifies if was allocated by us, or imported from + * another allocator. + */ +static inline unsigned int +kgsl_memdesc_usermem_type(const struct kgsl_memdesc *memdesc) +{ + return FIELD_GET(KGSL_MEMFLAGS_USERMEM_MASK, memdesc->flags); +} + +/** + * kgsl_memdesc_sg_dma - Turn a dma_addr (from CMA) into a sg table + * @memdesc: Pointer to a memory descriptor + * @addr: Physical address from the dma_alloc function + * @size: Size of the chunk + * + * Create a sg table for the contiguous chunk specified by addr and size. + * + * Return: 0 on success or negative on failure. + */ +int kgsl_memdesc_sg_dma(struct kgsl_memdesc *memdesc, + phys_addr_t addr, u64 size); + +/* + * kgsl_memdesc_is_global - is this a globally mapped buffer? + * @memdesc: the memdesc + * + * Return: True if this is a global mapping + */ +static inline bool kgsl_memdesc_is_global(const struct kgsl_memdesc *memdesc) +{ + return memdesc && (memdesc->priv & KGSL_MEMDESC_GLOBAL); +} + +/* + * kgsl_memdesc_is_secured - is this a secure buffer? + * @memdesc: the memdesc + * + * Returns true if this is a secure mapping, false otherwise + */ +static inline bool kgsl_memdesc_is_secured(const struct kgsl_memdesc *memdesc) +{ + return memdesc && (memdesc->priv & KGSL_MEMDESC_SECURE); +} + +/* + * kgsl_memdesc_is_reclaimed - check if a buffer is reclaimed + * @memdesc: the memdesc + * + * Return: true if the memdesc pages were reclaimed, false otherwise + */ +static inline bool kgsl_memdesc_is_reclaimed(const struct kgsl_memdesc *memdesc) +{ + return memdesc && (memdesc->priv & KGSL_MEMDESC_RECLAIMED); +} + +/* + * kgsl_memdesc_use_cpu_map - use the same virtual mapping on CPU and GPU? + * @memdesc: the memdesc + * + * Return: true if the memdesc is using SVM mapping + */ +static inline bool +kgsl_memdesc_use_cpu_map(const struct kgsl_memdesc *memdesc) +{ + return memdesc && (memdesc->flags & KGSL_MEMFLAGS_USE_CPU_MAP); +} + +/* + * kgsl_memdesc_footprint - get the size of the mmap region + * @memdesc - the memdesc + * + * The entire memdesc must be mapped. Additionally if the + * CPU mapping is going to be mirrored, there must be room + * for the guard page to be mapped so that the address spaces + * match up. + */ +static inline uint64_t +kgsl_memdesc_footprint(const struct kgsl_memdesc *memdesc) +{ + if (!(memdesc->priv & KGSL_MEMDESC_GUARD_PAGE)) + return memdesc->size; + + return PAGE_ALIGN(memdesc->size + PAGE_SIZE); +} + +/** + * kgsl_memdesc_put_gpuaddr - Release the gpuaddr assigned to a memdesc + * @memdesc: Pointer to a GPU memory object + * + * Call the memdesc specific function to release the GPU address assigned to the + * memdesc and unmap the memory + */ +static inline void kgsl_sharedmem_put_gpuaddr(struct kgsl_memdesc *memdesc) +{ + if (memdesc && memdesc->ops->put_gpuaddr) + memdesc->ops->put_gpuaddr(memdesc); +} + +/** + * kgsl_cachemode_is_cached - Return true if the passed flags indicate a cached + * buffer + * @flags: A bitmask of KGSL_MEMDESC_ flags + * + * Return: true if the flags indicate a cached buffer + */ +static inline bool kgsl_cachemode_is_cached(u64 flags) +{ + u64 mode = FIELD_GET(KGSL_CACHEMODE_MASK, flags); + + return (mode != KGSL_CACHEMODE_UNCACHED && + mode != KGSL_CACHEMODE_WRITECOMBINE); +} + +/** + * kgsl_unmap_and_put_gpuaddr - Unmap the memory and release the gpuaddr + * assigned to a memdesc + * @memdesc: Pointer to a GPU memory object + * + * Remove the mapping from pagetable and release the GPU address assigned + * to the memdesc + */ +void kgsl_unmap_and_put_gpuaddr(struct kgsl_memdesc *memdesc); + +/** + * struct kgsl_process_attribute - basic attribute for a process + * @attr: Underlying struct attribute + * @show: Attribute show function + * @store: Attribute store function + */ +struct kgsl_process_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, + struct kgsl_process_attribute *attr, char *buf); + ssize_t (*store)(struct kobject *kobj, + struct kgsl_process_attribute *attr, const char *buf, + ssize_t count); +}; + +#define PROCESS_ATTR(_name, _mode, _show, _store) \ + static struct kgsl_process_attribute attr_##_name = \ + __ATTR(_name, _mode, _show, _store) + +struct kgsl_sharedmem_bind_op_range { + u64 start; + u64 last; + u32 child_offset; + u32 op; + struct kgsl_mem_entry *entry; +}; + +struct kgsl_sharedmem_bind_op { + struct kgsl_mem_entry *target; + struct kgsl_sharedmem_bind_op_range *ops; + int nr_ops; + void (*callback)(struct kgsl_sharedmem_bind_op *op); + void *data; + struct work_struct work; + struct kref ref; +}; + +/** + * kgsl_sharedmem_allocate_vbo - Allocate a new virtual buffer object + * @device: A KGSL GPU handle + * @memdesc: Memory descriptor container to initialize + * @size: Size of the VBO + * @flags: Bitmask of KGSL_MEMFLAGS_* + * + * Initialize a new virtual buffer object memory descriptor + * + * Return: 0 on success or negative on failure. + */ +int kgsl_sharedmem_allocate_vbo(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags); + +/** + * kgsl_memdesc_print_vbo_ranges - Print a new virtual buffer object + * @entry: A KGSL memory entry + * @s: seq_file pointer + * + * Print virtual buffer object memory ranges + */ +void kgsl_memdesc_print_vbo_ranges(struct kgsl_mem_entry *entry, + struct seq_file *s); + +/** + * kgsl_sharedmem_create_bind_op - Create a new bind op + * @private: A KGSL process private + * @target_id: Target virtual buffer object id + * @ranges: User memory pointer to an array of range operations of type &struct + * kgsl_gpumem_bind_range + * @ranges_nents: Number of entries in @ranges + * @ranges_size: Size of each entry in @ranges in bytes + * + * Create a new bind op to be used to map ranges + * + * Return: On success return kgsl_sharedmem_bind_op pointer or negative + * on failure + * + */ +struct kgsl_sharedmem_bind_op * +kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, + u32 target_id, void __user *ranges, u32 ranges_nents, + u64 ranges_size); + +/** + * kgsl_sharedmem_bind_ranges - Bind ranges to virtual buffer object + * @op: One of KGSL_GPUMEM_RANGE_OP_BIND or KGSL_GPUMEM_RANGE_OP_UNBIND + * + * Add or remove a range from kgsl memory descriptor + */ +void kgsl_sharedmem_bind_ranges(struct kgsl_sharedmem_bind_op *op); + +/** + * kgsl_sharedmem_bind_range_destroy - Bind ranges to virtual buffer object + * @kref: kref to bind kgsl_sharedmem_bind_op + * + * Destroy bind ranges object + */ +void kgsl_sharedmem_bind_range_destroy(struct kref *kref); + +/** + * kgsl_sharedmem_put_bind_op - Bind ranges to virtual buffer object + * @op: One of KGSL_GPUMEM_RANGE_OP_BIND or KGSL_GPUMEM_RANGE_OP_UNBIND + * + * Put kgsl_sharedmem_bind_range_destroy to free resources + */ +static inline void kgsl_sharedmem_put_bind_op(struct kgsl_sharedmem_bind_op *op) +{ + if (!IS_ERR_OR_NULL(op)) + kref_put(&op->ref, kgsl_sharedmem_bind_range_destroy); +} +#endif /* __KGSL_SHAREDMEM_H */ diff --git a/kgsl_snapshot.c b/kgsl_snapshot.c new file mode 100644 index 0000000000..c2cac0c881 --- /dev/null +++ b/kgsl_snapshot.c @@ -0,0 +1,1273 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include "adreno_cp_parser.h" +#include "kgsl_device.h" +#include "kgsl_sharedmem.h" +#include "kgsl_snapshot.h" +#include "kgsl_util.h" + +static void kgsl_snapshot_save_frozen_objs(struct work_struct *work); + +/* Placeholder for list of ib objects that contain all objects in that IB */ + +struct kgsl_snapshot_cp_obj { + struct adreno_ib_object_list *ib_obj_list; + struct list_head node; +}; + +struct snapshot_obj_itr { + u8 *buf; /* Buffer pointer to write to */ + int pos; /* Current position in the sequence */ + loff_t offset; /* file offset to start writing from */ + size_t remain; /* Bytes remaining in buffer */ + size_t write; /* Bytes written so far */ +}; + +static inline u64 snapshot_phy_addr(struct kgsl_device *device) +{ + return device->snapshot_memory.dma_handle ? + device->snapshot_memory.dma_handle : __pa(device->snapshot_memory.ptr); +} + +static inline u64 atomic_snapshot_phy_addr(struct kgsl_device *device) +{ + return device->snapshot_memory_atomic.ptr == device->snapshot_memory.ptr ? + snapshot_phy_addr(device) : __pa(device->snapshot_memory_atomic.ptr); +} + +static void obj_itr_init(struct snapshot_obj_itr *itr, u8 *buf, + loff_t offset, size_t remain) +{ + itr->buf = buf; + itr->offset = offset; + itr->remain = remain; + itr->pos = 0; + itr->write = 0; +} + +static int obj_itr_out(struct snapshot_obj_itr *itr, void *src, int size) +{ + if (itr->remain == 0) + return 0; + + if ((itr->pos + size) <= itr->offset) + goto done; + + /* Handle the case that offset is in the middle of the buffer */ + + if (itr->offset > itr->pos) { + src += (itr->offset - itr->pos); + size -= (itr->offset - itr->pos); + + /* Advance pos to the offset start */ + itr->pos = itr->offset; + } + + if (size > itr->remain) + size = itr->remain; + + memcpy(itr->buf, src, size); + + itr->buf += size; + itr->write += size; + itr->remain -= size; + +done: + itr->pos += size; + return size; +} + +static void kgsl_snapshot_put_object(struct kgsl_snapshot_object *obj) +{ + list_del(&obj->node); + + obj->entry->memdesc.priv &= ~KGSL_MEMDESC_FROZEN; + obj->entry->memdesc.priv &= ~KGSL_MEMDESC_SKIP_RECLAIM; + kgsl_mem_entry_put(obj->entry); + + kfree(obj); +} + +/** + * kgsl_snapshot_have_object() - return 1 if the object has been processed + * @snapshot: the snapshot data + * @process: The process that owns the the object to freeze + * @gpuaddr: The gpu address of the object to freeze + * @size: the size of the object (may not always be the size of the region) + * + * Return 1 if the object is already in the list - this can save us from + * having to parse the same thing over again. There are 2 lists that are + * tracking objects so check for the object in both lists + */ +int kgsl_snapshot_have_object(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t size) +{ + struct kgsl_snapshot_object *obj; + struct kgsl_snapshot_cp_obj *obj_cp; + struct adreno_ib_object *ib_obj; + int i; + + /* Check whether the object is tracked already in ib list */ + list_for_each_entry(obj_cp, &snapshot->cp_list, node) { + if (obj_cp->ib_obj_list == NULL + || obj_cp->ib_obj_list->num_objs == 0) + continue; + + ib_obj = &(obj_cp->ib_obj_list->obj_list[0]); + if (ib_obj->entry == NULL || ib_obj->entry->priv != process) + continue; + + for (i = 0; i < obj_cp->ib_obj_list->num_objs; i++) { + ib_obj = &(obj_cp->ib_obj_list->obj_list[i]); + if ((gpuaddr >= ib_obj->gpuaddr) && + ((gpuaddr + size) <= + (ib_obj->gpuaddr + ib_obj->size))) + return 1; + } + } + + list_for_each_entry(obj, &snapshot->obj_list, node) { + if (obj->entry == NULL || obj->entry->priv != process) + continue; + + if ((gpuaddr >= obj->gpuaddr) && + ((gpuaddr + size) <= (obj->gpuaddr + obj->size))) + return 1; + } + + return 0; +} + +/** + * kgsl_snapshot_get_object() - Mark a GPU buffer to be frozen + * @snapshot: The snapshot data + * @process: The process that owns the object we want to freeze + * @gpuaddr: The gpu address of the object to freeze + * @size: the size of the object (may not always be the size of the region) + * @type: the type of object being saved (shader, vbo, etc) + * + * Mark and freeze a GPU buffer object. This will prevent it from being + * freed until it can be copied out as part of the snapshot dump. Returns the + * size of the object being frozen + */ +int kgsl_snapshot_get_object(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, uint64_t gpuaddr, + uint64_t size, unsigned int type) +{ + struct kgsl_mem_entry *entry; + struct kgsl_snapshot_object *obj; + uint64_t offset; + int ret = -EINVAL; + unsigned int mem_type; + + if (!gpuaddr) + return 0; + + entry = kgsl_sharedmem_find(process, gpuaddr); + + if (entry == NULL) + return -EINVAL; + + /* We can't freeze external memory, because we don't own it */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_USERMEM_MASK) + goto err_put; + /* + * Do not save texture and render targets in snapshot, + * they can be just too big + */ + + mem_type = kgsl_memdesc_get_memtype(&entry->memdesc); + if (mem_type == KGSL_MEMTYPE_TEXTURE || + mem_type == KGSL_MEMTYPE_EGL_SURFACE || + mem_type == KGSL_MEMTYPE_EGL_IMAGE) { + ret = 0; + goto err_put; + } + + /* + * size indicates the number of bytes in the region to save. This might + * not always be the entire size of the region because some buffers are + * sub-allocated from a larger region. However, if size 0 was passed + * thats a flag that the caller wants to capture the entire buffer + */ + + if (size == 0) { + size = entry->memdesc.size; + offset = 0; + + /* Adjust the gpuaddr to the start of the object */ + gpuaddr = entry->memdesc.gpuaddr; + } else { + offset = gpuaddr - entry->memdesc.gpuaddr; + } + + if (size + offset > entry->memdesc.size) { + dev_err(snapshot->device->dev, + "snapshot: invalid size for GPU buffer 0x%016llx\n", + gpuaddr); + goto err_put; + } + + /* If the buffer is already on the list, skip it */ + list_for_each_entry(obj, &snapshot->obj_list, node) { + /* combine the range with existing object if they overlap */ + if (obj->entry->priv == process && obj->type == type && + kgsl_addr_range_overlap(obj->gpuaddr, obj->size, + gpuaddr, size)) { + uint64_t end1 = obj->gpuaddr + obj->size; + uint64_t end2 = gpuaddr + size; + + if (obj->gpuaddr > gpuaddr) + obj->gpuaddr = gpuaddr; + if (end1 > end2) + obj->size = end1 - obj->gpuaddr; + else + obj->size = end2 - obj->gpuaddr; + obj->offset = obj->gpuaddr - entry->memdesc.gpuaddr; + ret = 0; + goto err_put; + } + } + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + + if (obj == NULL) + goto err_put; + + obj->type = type; + obj->entry = entry; + obj->gpuaddr = gpuaddr; + obj->size = size; + obj->offset = offset; + + list_add(&obj->node, &snapshot->obj_list); + + /* + * Return the size of the entire mem entry that was frozen - this gets + * used for tracking how much memory is frozen for a hang. Also, mark + * the memory entry as frozen. If the entry was already marked as + * frozen, then another buffer already got to it. In that case, return + * 0 so it doesn't get counted twice + */ + + ret = (entry->memdesc.priv & KGSL_MEMDESC_FROZEN) ? 0 + : entry->memdesc.size; + + entry->memdesc.priv |= KGSL_MEMDESC_FROZEN; + + return ret; +err_put: + entry->memdesc.priv &= ~KGSL_MEMDESC_SKIP_RECLAIM; + kgsl_mem_entry_put(entry); + return ret; +} + +/** + * kgsl_snapshot_dump_registers - helper function to dump device registers + * @device - the device to dump registers from + * @snapshot - pointer to the start of the region of memory for the snapshot + * @remain - a pointer to the number of bytes remaining in the snapshot + * @priv - A pointer to the kgsl_snapshot_registers data + * + * Given an array of register ranges pairs (start,end [inclusive]), dump the + * registers into a snapshot register section. The snapshot region stores a + * part of dwords for each register - the word address of the register, and + * the value. + */ +size_t kgsl_snapshot_dump_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; + struct kgsl_snapshot_registers *regs = priv; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int count = 0, j, k; + + /* Figure out how many registers we are going to dump */ + + for (j = 0; j < regs->count; j++) { + int start = regs->regs[j * 2]; + int end = regs->regs[j * 2 + 1]; + + count += (end - start + 1); + } + + if (remain < (count * 8) + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + for (j = 0; j < regs->count; j++) { + unsigned int start = regs->regs[j * 2]; + unsigned int end = regs->regs[j * 2 + 1]; + + for (k = start; k <= end; k++) { + unsigned int val; + + kgsl_regread(device, k, &val); + *data++ = k; + *data++ = val; + } + } + + header->count = count; + + /* Return the size of the section */ + return (count * 8) + sizeof(*header); +} + +struct kgsl_snapshot_indexed_registers { + unsigned int index; + unsigned int data; + unsigned int start; + unsigned int count; +}; + +static size_t kgsl_snapshot_dump_indexed_regs(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_indexed_registers *iregs = priv; + struct kgsl_snapshot_indexed_regs *header = + (struct kgsl_snapshot_indexed_regs *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + + if (remain < (iregs->count * 4) + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "INDEXED REGS"); + return 0; + } + + header->index_reg = iregs->index; + header->data_reg = iregs->data; + header->count = iregs->count; + header->start = iregs->start; + + kgsl_regmap_read_indexed_interleaved(&device->regmap, iregs->index, + iregs->data, data, iregs->start, iregs->count); + + return (iregs->count * 4) + sizeof(*header); +} + +/** + * kgsl_snapshot_indexed_registers - Add a set of indexed registers to the + * snapshot + * @device: Pointer to the KGSL device being snapshotted + * @snapshot: Snapshot instance + * @index: Offset for the index register + * @data: Offset for the data register + * @start: Index to start reading + * @count: Number of entries to read + * + * Dump the values from an indexed register group into the snapshot + */ +void kgsl_snapshot_indexed_registers(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + unsigned int index, unsigned int data, + unsigned int start, + unsigned int count) +{ + struct kgsl_snapshot_indexed_registers iregs; + + iregs.index = index; + iregs.data = data; + iregs.start = start; + iregs.count = count; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_INDEXED_REGS, + snapshot, kgsl_snapshot_dump_indexed_regs, &iregs); +} + +/** + * kgsl_snapshot_add_section() - Add a new section to the GPU snapshot + * @device: the KGSL device being snapshotted + * @id: the section id + * @snapshot: pointer to the snapshot instance + * @func: Function pointer to fill the section + * @priv: Private pointer to pass to the function + * + * Set up a KGSL snapshot header by filling the memory with the callback + * function and adding the standard section header + */ +void kgsl_snapshot_add_section(struct kgsl_device *device, u16 id, + struct kgsl_snapshot *snapshot, + size_t (*func)(struct kgsl_device *, u8 *, size_t, void *), + void *priv) +{ + struct kgsl_snapshot_section_header *header = + (struct kgsl_snapshot_section_header *)snapshot->ptr; + u8 *data = snapshot->ptr + sizeof(*header); + size_t ret = 0; + + /* + * Sanity check to make sure there is enough for the header. The + * callback will check to make sure there is enough for the rest + * of the data. If there isn't enough room then don't advance the + * pointer. + */ + + if (snapshot->remain < sizeof(*header)) + return; + + /* It is legal to have no function (i.e. - make an empty section) */ + if (func) { + ret = func(device, data, snapshot->remain - sizeof(*header), + priv); + + /* + * If there wasn't enough room for the data then don't bother + * setting up the header. + */ + + if (ret == 0) + return; + } + + header->magic = SNAPSHOT_SECTION_MAGIC; + header->id = id; + header->size = ret + sizeof(*header); + + snapshot->ptr += header->size; + snapshot->remain -= header->size; + snapshot->size += header->size; +} + +static void kgsl_free_snapshot(struct kgsl_snapshot *snapshot) +{ + struct kgsl_snapshot_object *obj, *tmp; + struct kgsl_device *device = snapshot->device; + + wait_for_completion(&snapshot->dump_gate); + + list_for_each_entry_safe(obj, tmp, + &snapshot->obj_list, node) + kgsl_snapshot_put_object(obj); + + if (snapshot->mempool) + vfree(snapshot->mempool); + + kfree(snapshot); + dev_err(device->dev, "snapshot: objects released\n"); +} + +#define SP0_ISDB_ISDB_BRKPT_CFG 0x40014 +#define SP0_ISDB_ISDB_EN 0x40004 +#define SP0_ISDB_ISDB_CMD 0x4000C + +static void isdb_write(void __iomem *base, u32 offset) +{ + /* To set the SCHBREAKTYPE bit */ + __raw_writel(0x801, base + SP0_ISDB_ISDB_BRKPT_CFG + offset); + + /* + * ensure the configurations are set before + * enabling ISDB + */ + wmb(); + /* To set the ISDBCLKON and ISDB_EN bits*/ + __raw_writel(0x03, base + SP0_ISDB_ISDB_EN + offset); + + /* + * ensure previous write to enable isdb posts + * before issuing the break command + */ + wmb(); + /*To issue ISDB_0_ISDB_CMD_BREAK*/ + __raw_writel(0x1, base + SP0_ISDB_ISDB_CMD + offset); +} + +static void set_isdb_breakpoint_registers(struct kgsl_device *device) +{ + struct clk *clk; + int ret; + + if (!device->set_isdb_breakpoint || device->ftbl->is_hwcg_on(device) + || device->qdss_gfx_virt == NULL) + return; + + clk = clk_get(&device->pdev->dev, "apb_pclk"); + + if (IS_ERR(clk)) { + dev_err(device->dev, "Unable to get QDSS clock\n"); + goto err; + } + + ret = clk_prepare_enable(clk); + + if (ret) { + dev_err(device->dev, "QDSS Clock enable error: %d\n", ret); + clk_put(clk); + goto err; + } + + /* Issue break command for all eight SPs */ + isdb_write(device->qdss_gfx_virt, 0x0000); + isdb_write(device->qdss_gfx_virt, 0x1000); + isdb_write(device->qdss_gfx_virt, 0x2000); + isdb_write(device->qdss_gfx_virt, 0x3000); + isdb_write(device->qdss_gfx_virt, 0x4000); + isdb_write(device->qdss_gfx_virt, 0x5000); + isdb_write(device->qdss_gfx_virt, 0x6000); + isdb_write(device->qdss_gfx_virt, 0x7000); + + clk_disable_unprepare(clk); + clk_put(clk); + + return; + +err: + /* Do not force kernel panic if isdb writes did not go through */ + device->force_panic = false; +} + +static void kgsl_device_snapshot_atomic(struct kgsl_device *device) +{ + struct kgsl_snapshot *snapshot; + struct timespec64 boot; + + if (device->snapshot && device->force_panic) + return; + + if (!atomic_read(&device->active_cnt)) { + dev_err(device->dev, "snapshot: device is powered off\n"); + return; + } + + device->snapshot_memory_atomic.size = device->snapshot_memory.size; + if (!device->snapshot_faultcount) { + /* Use non-atomic snapshot memory if it is unused */ + device->snapshot_memory_atomic.ptr = device->snapshot_memory.ptr; + } else { + /* Limit size to 3MB to avoid failure for atomic snapshot memory */ + if (device->snapshot_memory_atomic.size > (SZ_2M + SZ_1M)) + device->snapshot_memory_atomic.size = (SZ_2M + SZ_1M); + + device->snapshot_memory_atomic.ptr = devm_kzalloc(&device->pdev->dev, + device->snapshot_memory_atomic.size, GFP_ATOMIC); + + /* If we fail to allocate more than 1MB fall back to 1MB */ + if (WARN_ON((!device->snapshot_memory_atomic.ptr) && + device->snapshot_memory_atomic.size > SZ_1M)) { + device->snapshot_memory_atomic.size = SZ_1M; + device->snapshot_memory_atomic.ptr = devm_kzalloc(&device->pdev->dev, + device->snapshot_memory_atomic.size, GFP_ATOMIC); + } + + if (!device->snapshot_memory_atomic.ptr) { + dev_err(device->dev, + "Failed to allocate memory for atomic snapshot\n"); + return; + } + } + + /* Allocate memory for the snapshot instance */ + snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); + if (snapshot == NULL) + return; + + device->snapshot_atomic = true; + INIT_LIST_HEAD(&snapshot->obj_list); + INIT_LIST_HEAD(&snapshot->cp_list); + + snapshot->start = device->snapshot_memory_atomic.ptr; + snapshot->ptr = device->snapshot_memory_atomic.ptr; + snapshot->remain = device->snapshot_memory_atomic.size; + + /* + * Trigger both GPU and GMU snapshot. GPU specific code + * will take care of whether to dumps full state or only + * GMU state based on current GPU power state. + */ + if (device->ftbl->snapshot) + device->ftbl->snapshot(device, snapshot, NULL); + + /* + * The timestamp is the seconds since boot so it is easier to match to + * the kernel log + */ + getboottime64(&boot); + snapshot->timestamp = get_seconds() - boot.tv_sec; + + kgsl_add_to_minidump("ATOMIC_GPU_SNAPSHOT", (u64) device->snapshot_memory_atomic.ptr, + atomic_snapshot_phy_addr(device), device->snapshot_memory_atomic.size); + + /* log buffer info to aid in ramdump fault tolerance */ + dev_err(device->dev, "Atomic GPU snapshot created at pa %llx++0x%zx\n", + atomic_snapshot_phy_addr(device), snapshot->size); +} + +/** + * kgsl_snapshot() - construct a device snapshot + * @device: device to snapshot + * @context: the context that is hung, might be NULL if unknown. + * @gmu_fault: whether this snapshot is triggered by a GMU fault. + * + * Given a device, construct a binary snapshot dump of the current device state + * and store it in the device snapshot memory. + */ +void kgsl_device_snapshot(struct kgsl_device *device, + struct kgsl_context *context, bool gmu_fault) +{ + struct kgsl_snapshot *snapshot; + struct timespec64 boot; + + set_isdb_breakpoint_registers(device); + + if (device->snapshot_memory.ptr == NULL) { + dev_err(device->dev, + "snapshot: no snapshot memory available\n"); + return; + } + + if (WARN(!kgsl_state_is_awake(device), + "snapshot: device is powered off\n")) + return; + + /* increment the hang count for good book keeping */ + device->snapshot_faultcount++; + + if (device->snapshot != NULL) { + + /* + * Snapshot over-write policy: + * 1. By default, don't over-write the very first snapshot, + * be it a gmu or gpu fault. + * 2. Never over-write existing snapshot on a gpu fault. + * 3. Never over-write a snapshot that we didn't recover from. + * 4. In order to over-write a new gmu fault snapshot with a + * previously recovered fault, then set the sysfs knob + * prioritize_recoverable to true. + */ + if (!device->prioritize_unrecoverable || + !device->snapshot->recovered || !gmu_fault) + return; + + /* + * If another thread is currently reading it, that thread + * will free it, otherwise free it now. + */ + if (!device->snapshot->sysfs_read) + kgsl_free_snapshot(device->snapshot); + device->snapshot = NULL; + } + + /* Allocate memory for the snapshot instance */ + snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL); + if (snapshot == NULL) + return; + + init_completion(&snapshot->dump_gate); + INIT_LIST_HEAD(&snapshot->obj_list); + INIT_LIST_HEAD(&snapshot->cp_list); + INIT_WORK(&snapshot->work, kgsl_snapshot_save_frozen_objs); + + snapshot->start = device->snapshot_memory.ptr; + snapshot->ptr = device->snapshot_memory.ptr; + snapshot->remain = device->snapshot_memory.size; + snapshot->recovered = false; + snapshot->first_read = true; + snapshot->sysfs_read = 0; + + device->ftbl->snapshot(device, snapshot, context); + + /* + * The timestamp is the seconds since boot so it is easier to match to + * the kernel log + */ + + getboottime64(&boot); + snapshot->timestamp = get_seconds() - boot.tv_sec; + + /* Store the instance in the device until it gets dumped */ + device->snapshot = snapshot; + snapshot->device = device; + + /* log buffer info to aid in ramdump fault tolerance */ + dev_err(device->dev, "%s snapshot created at pa %llx++0x%zx\n", + gmu_fault ? "GMU" : "GPU", snapshot_phy_addr(device), + snapshot->size); + + kgsl_add_to_minidump("GPU_SNAPSHOT", (u64) device->snapshot_memory.ptr, + snapshot_phy_addr(device), device->snapshot_memory.size); + + if (device->skip_ib_capture) + BUG_ON(device->force_panic); + + sysfs_notify(&device->snapshot_kobj, NULL, "timestamp"); + + /* + * Queue a work item that will save the IB data in snapshot into + * static memory to prevent loss of data due to overwriting of + * memory. + * + */ + kgsl_schedule_work(&snapshot->work); +} + +/* An attribute for showing snapshot details */ +struct kgsl_snapshot_attribute { + struct attribute attr; + ssize_t (*show)(struct kgsl_device *device, char *buf); + ssize_t (*store)(struct kgsl_device *device, const char *buf, + size_t count); +}; + +/** + * kgsl_snapshot_process_ib_obj_list() - Go through the list of IB's which need + * to be dumped for snapshot and move them to the global snapshot list so + * they will get dumped when the global list is dumped + * @device: device being snapshotted + */ +static void kgsl_snapshot_process_ib_obj_list(struct kgsl_snapshot *snapshot) +{ + struct kgsl_snapshot_cp_obj *obj, *obj_temp; + struct adreno_ib_object *ib_obj; + int i; + + list_for_each_entry_safe(obj, obj_temp, &snapshot->cp_list, + node) { + for (i = 0; i < obj->ib_obj_list->num_objs; i++) { + ib_obj = &(obj->ib_obj_list->obj_list[i]); + kgsl_snapshot_get_object(snapshot, ib_obj->entry->priv, + ib_obj->gpuaddr, ib_obj->size, + ib_obj->snapshot_obj_type); + } + list_del(&obj->node); + adreno_ib_destroy_obj_list(obj->ib_obj_list); + kfree(obj); + } +} + +#define to_snapshot_attr(a) \ +container_of(a, struct kgsl_snapshot_attribute, attr) + +#define kobj_to_device(a) \ +container_of(a, struct kgsl_device, snapshot_kobj) + +static int snapshot_release(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + bool snapshot_free = false; + int ret = 0; + + mutex_lock(&device->mutex); + snapshot->sysfs_read--; + + /* + * If someone's replaced the snapshot, return an error and free + * the snapshot if this is the last thread to read it. + */ + if (device->snapshot != snapshot) { + ret = -EIO; + if (!snapshot->sysfs_read) + snapshot_free = true; + } + mutex_unlock(&device->mutex); + if (snapshot_free) + kgsl_free_snapshot(snapshot); + return ret; +} + +/* Dump the sysfs binary data to the user */ +static ssize_t snapshot_show(struct file *filep, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, + size_t count) +{ + struct kgsl_device *device = kobj_to_device(kobj); + struct kgsl_snapshot *snapshot; + struct kgsl_snapshot_section_header head; + struct snapshot_obj_itr itr; + int ret = 0; + + mutex_lock(&device->mutex); + snapshot = device->snapshot; + if (snapshot != NULL) { + /* + * If we're reading at a non-zero offset from a new snapshot, + * that means we want to read from the previous snapshot (which + * was overwritten), so return an error + */ + if (snapshot->first_read) { + if (off) + ret = -EIO; + else + snapshot->first_read = false; + } + if (!ret) + snapshot->sysfs_read++; + } + mutex_unlock(&device->mutex); + + if (ret) + return ret; + + /* Return nothing if we haven't taken a snapshot yet */ + if (snapshot == NULL) + return 0; + + /* + * Wait for the dump worker to finish. This is interruptible + * to allow userspace to bail if things go horribly wrong. + */ + ret = wait_for_completion_interruptible(&snapshot->dump_gate); + if (ret) { + snapshot_release(device, snapshot); + return ret; + } + + obj_itr_init(&itr, buf, off, count); + + ret = obj_itr_out(&itr, snapshot->start, snapshot->size); + if (ret == 0) + goto done; + + /* Dump the memory pool if it exists */ + if (snapshot->mempool) { + ret = obj_itr_out(&itr, snapshot->mempool, + snapshot->mempool_size); + if (ret == 0) + goto done; + } + + { + head.magic = SNAPSHOT_SECTION_MAGIC; + head.id = KGSL_SNAPSHOT_SECTION_END; + head.size = sizeof(head); + + obj_itr_out(&itr, &head, sizeof(head)); + } + + /* + * Make sure everything has been written out before destroying things. + * The best way to confirm this is to go all the way through without + * writing any bytes - so only release if we get this far and + * itr->write is 0 and there are no concurrent reads pending + */ + + if (itr.write == 0) { + bool snapshot_free = false; + + mutex_lock(&device->mutex); + if (--snapshot->sysfs_read == 0) { + if (device->snapshot == snapshot) + device->snapshot = NULL; + snapshot_free = true; + } + mutex_unlock(&device->mutex); + + if (snapshot_free) + kgsl_free_snapshot(snapshot); + return 0; + } + +done: + ret = snapshot_release(device, snapshot); + return (ret < 0) ? ret : itr.write; +} + +/* Show the total number of hangs since device boot */ +static ssize_t faultcount_show(struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", device->snapshot_faultcount); +} + +/* Reset the total number of hangs since device boot */ +static ssize_t faultcount_store(struct kgsl_device *device, const char *buf, + size_t count) +{ + if (count) + device->snapshot_faultcount = 0; + + return count; +} + +/* Show the force_panic request status */ +static ssize_t force_panic_show(struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", device->force_panic); +} + +/* Store the panic request value to force_panic */ +static ssize_t force_panic_store(struct kgsl_device *device, const char *buf, + size_t count) +{ + if (strtobool(buf, &device->force_panic)) + return -EINVAL; + return count; +} + +/* Show the break_ib request status */ +static ssize_t skip_ib_capture_show(struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", device->skip_ib_capture); +} + +/* Store the panic request value to break_ib */ +static ssize_t skip_ib_capture_store(struct kgsl_device *device, + const char *buf, size_t count) +{ + int ret; + + ret = kstrtobool(buf, &device->skip_ib_capture); + return ret ? ret : count; +} + +/* Show the prioritize_unrecoverable status */ +static ssize_t prioritize_unrecoverable_show( + struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", + device->prioritize_unrecoverable); +} + +/* Store the priority value to prioritize unrecoverable */ +static ssize_t prioritize_unrecoverable_store( + struct kgsl_device *device, const char *buf, size_t count) +{ + if (strtobool(buf, &device->prioritize_unrecoverable)) + return -EINVAL; + + return count; +} + +/* Show the snapshot_crashdumper request status */ +static ssize_t snapshot_crashdumper_show(struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", device->snapshot_crashdumper); +} + + +/* Store the value to snapshot_crashdumper*/ +static ssize_t snapshot_crashdumper_store(struct kgsl_device *device, + const char *buf, size_t count) +{ + if (strtobool(buf, &device->snapshot_crashdumper)) + return -EINVAL; + return count; +} + +/* Show the timestamp of the last collected snapshot */ +static ssize_t timestamp_show(struct kgsl_device *device, char *buf) +{ + unsigned long timestamp; + + mutex_lock(&device->mutex); + timestamp = device->snapshot ? device->snapshot->timestamp : 0; + mutex_unlock(&device->mutex); + return scnprintf(buf, PAGE_SIZE, "%lu\n", timestamp); +} + +static ssize_t snapshot_legacy_show(struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", device->snapshot_legacy); +} + +static ssize_t snapshot_legacy_store(struct kgsl_device *device, + const char *buf, size_t count) +{ + if (strtobool(buf, &device->snapshot_legacy)) + return -EINVAL; + + return count; +} + +static struct bin_attribute snapshot_attr = { + .attr.name = "dump", + .attr.mode = 0444, + .size = 0, + .read = snapshot_show +}; + +#define SNAPSHOT_ATTR(_name, _mode, _show, _store) \ +struct kgsl_snapshot_attribute attr_##_name = { \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +} + +static SNAPSHOT_ATTR(timestamp, 0444, timestamp_show, NULL); +static SNAPSHOT_ATTR(faultcount, 0644, faultcount_show, faultcount_store); +static SNAPSHOT_ATTR(force_panic, 0644, force_panic_show, force_panic_store); +static SNAPSHOT_ATTR(prioritize_unrecoverable, 0644, + prioritize_unrecoverable_show, prioritize_unrecoverable_store); +static SNAPSHOT_ATTR(snapshot_crashdumper, 0644, snapshot_crashdumper_show, + snapshot_crashdumper_store); +static SNAPSHOT_ATTR(snapshot_legacy, 0644, snapshot_legacy_show, + snapshot_legacy_store); +static SNAPSHOT_ATTR(skip_ib_capture, 0644, skip_ib_capture_show, + skip_ib_capture_store); + +static ssize_t snapshot_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kgsl_snapshot_attribute *pattr = to_snapshot_attr(attr); + struct kgsl_device *device = kobj_to_device(kobj); + ssize_t ret; + + if (device && pattr->show) + ret = pattr->show(device, buf); + else + ret = -EIO; + + return ret; +} + +static ssize_t snapshot_sysfs_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct kgsl_snapshot_attribute *pattr = to_snapshot_attr(attr); + struct kgsl_device *device = kobj_to_device(kobj); + ssize_t ret = -EIO; + + if (pattr->store) + ret = pattr->store(device, buf, count); + + return ret; +} + +static const struct sysfs_ops snapshot_sysfs_ops = { + .show = snapshot_sysfs_show, + .store = snapshot_sysfs_store, +}; + +static struct kobj_type ktype_snapshot = { + .sysfs_ops = &snapshot_sysfs_ops, +}; + +static const struct attribute *snapshot_attrs[] = { + &attr_timestamp.attr, + &attr_faultcount.attr, + &attr_force_panic.attr, + &attr_prioritize_unrecoverable.attr, + &attr_snapshot_crashdumper.attr, + &attr_snapshot_legacy.attr, + &attr_skip_ib_capture.attr, + NULL, +}; + +static int kgsl_panic_notifier_callback(struct notifier_block *nb, + unsigned long action, void *unused) +{ + struct kgsl_device *device = container_of(nb, struct kgsl_device, + panic_nb); + + /* To send NMI to GMU */ + device->gmu_fault = true; + kgsl_device_snapshot_atomic(device); + + return NOTIFY_OK; +} + +void kgsl_device_snapshot_probe(struct kgsl_device *device, u32 size) +{ + device->snapshot_memory.size = size; + + device->snapshot_memory.ptr = dma_alloc_coherent(&device->pdev->dev, + device->snapshot_memory.size, &device->snapshot_memory.dma_handle, + GFP_KERNEL); + /* + * If we fail to allocate more than 1MB for snapshot fall back + * to 1MB + */ + if (WARN_ON((!device->snapshot_memory.ptr) && size > SZ_1M)) { + device->snapshot_memory.size = SZ_1M; + device->snapshot_memory.ptr = devm_kzalloc(&device->pdev->dev, + device->snapshot_memory.size, GFP_KERNEL); + } + + if (!device->snapshot_memory.ptr) { + dev_err(device->dev, + "KGSL failed to allocate memory for snapshot\n"); + return; + } + + device->snapshot = NULL; + device->snapshot_faultcount = 0; + device->force_panic = false; + device->snapshot_crashdumper = true; + device->snapshot_legacy = false; + + device->snapshot_atomic = false; + device->panic_nb.notifier_call = kgsl_panic_notifier_callback; + device->panic_nb.priority = 1; + device->snapshot_ctxt_record_size = 64 * 1024; + + /* + * Set this to false so that we only ever keep the first snapshot around + * If we want to over-write with a gmu snapshot, then set it to true + * via sysfs + */ + device->prioritize_unrecoverable = false; + + if (kobject_init_and_add(&device->snapshot_kobj, &ktype_snapshot, + &device->dev->kobj, "snapshot")) + return; + + WARN_ON(sysfs_create_bin_file(&device->snapshot_kobj, &snapshot_attr)); + WARN_ON(sysfs_create_files(&device->snapshot_kobj, snapshot_attrs)); + atomic_notifier_chain_register(&panic_notifier_list, + &device->panic_nb); +} + +/** + * kgsl_device_snapshot_close() - take down snapshot memory for a device + * @device: Pointer to the kgsl_device + * + * Remove the sysfs files and free the memory allocated for the GPU + * snapshot + */ +void kgsl_device_snapshot_close(struct kgsl_device *device) +{ + kgsl_remove_from_minidump("GPU_SNAPSHOT", (u64) device->snapshot_memory.ptr, + snapshot_phy_addr(device), device->snapshot_memory.size); + + sysfs_remove_bin_file(&device->snapshot_kobj, &snapshot_attr); + sysfs_remove_files(&device->snapshot_kobj, snapshot_attrs); + + kobject_put(&device->snapshot_kobj); + + if (device->snapshot_memory.dma_handle) + dma_free_coherent(&device->pdev->dev, device->snapshot_memory.size, + device->snapshot_memory.ptr, device->snapshot_memory.dma_handle); +} + +/** + * kgsl_snapshot_add_ib_obj_list() - Add a IB object list to the snapshot + * object list + * @device: the device that is being snapshotted + * @ib_obj_list: The IB list that has objects required to execute an IB + * @num_objs: Number of IB objects + * @ptbase: The pagetable base in which the IB is mapped + * + * Adds a new IB to the list of IB objects maintained when getting snapshot + * Returns 0 on success else -ENOMEM on error + */ +int kgsl_snapshot_add_ib_obj_list(struct kgsl_snapshot *snapshot, + struct adreno_ib_object_list *ib_obj_list) +{ + struct kgsl_snapshot_cp_obj *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + obj->ib_obj_list = ib_obj_list; + list_add(&obj->node, &snapshot->cp_list); + return 0; +} + +static size_t _mempool_add_object(struct kgsl_snapshot *snapshot, u8 *data, + struct kgsl_snapshot_object *obj) +{ + struct kgsl_snapshot_section_header *section = + (struct kgsl_snapshot_section_header *)data; + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)(data + sizeof(*section)); + u8 *dest = data + sizeof(*section) + sizeof(*header); + uint64_t size; + + size = obj->size; + + if (!kgsl_memdesc_map(&obj->entry->memdesc)) { + dev_err(snapshot->device->dev, + "snapshot: failed to map GPU object\n"); + return 0; + } + + section->magic = SNAPSHOT_SECTION_MAGIC; + section->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2; + section->size = size + sizeof(*header) + sizeof(*section); + + header->size = size >> 2; + header->gpuaddr = obj->gpuaddr; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(obj->entry->priv->pagetable); + header->type = obj->type; + + if (kgsl_addr_range_overlap(obj->gpuaddr, obj->size, + snapshot->ib1base, snapshot->ib1size)) + snapshot->ib1dumped = true; + + if (kgsl_addr_range_overlap(obj->gpuaddr, obj->size, + snapshot->ib2base, snapshot->ib2size)) + snapshot->ib2dumped = true; + + memcpy(dest, obj->entry->memdesc.hostptr + obj->offset, size); + kgsl_memdesc_unmap(&obj->entry->memdesc); + + return section->size; +} + +/** + * kgsl_snapshot_save_frozen_objs() - Save the objects frozen in snapshot into + * memory so that the data reported in these objects is correct when snapshot + * is taken + * @work: The work item that scheduled this work + */ +static void kgsl_snapshot_save_frozen_objs(struct work_struct *work) +{ + struct kgsl_snapshot *snapshot = container_of(work, + struct kgsl_snapshot, work); + struct kgsl_snapshot_object *obj, *tmp; + size_t size = 0; + void *ptr; + + if (snapshot->device->gmu_fault) + goto gmu_only; + + kgsl_snapshot_process_ib_obj_list(snapshot); + + list_for_each_entry(obj, &snapshot->obj_list, node) { + obj->size = ALIGN(obj->size, 4); + + size += ((size_t) obj->size + + sizeof(struct kgsl_snapshot_gpu_object_v2) + + sizeof(struct kgsl_snapshot_section_header)); + } + + if (size == 0) + goto done; + + snapshot->mempool = vmalloc(size); + + ptr = snapshot->mempool; + snapshot->mempool_size = 0; + + /* even if vmalloc fails, make sure we clean up the obj_list */ + list_for_each_entry_safe(obj, tmp, &snapshot->obj_list, node) { + if (snapshot->mempool) { + size_t ret = _mempool_add_object(snapshot, ptr, obj); + + ptr += ret; + snapshot->mempool_size += ret; + } + + kgsl_snapshot_put_object(obj); + } +done: + /* + * Get rid of the process struct here, so that it doesn't sit + * around until someone bothers to read the snapshot file. + */ + kgsl_process_private_put(snapshot->process); + snapshot->process = NULL; + + if (snapshot->ib1base && !snapshot->ib1dumped) + dev_err(snapshot->device->dev, + "snapshot: Active IB1:%016llx not dumped\n", + snapshot->ib1base); + else if (snapshot->ib2base && !snapshot->ib2dumped) + dev_err(snapshot->device->dev, + "snapshot: Active IB2:%016llx not dumped\n", + snapshot->ib2base); + +gmu_only: + BUG_ON(!snapshot->device->skip_ib_capture && + snapshot->device->force_panic); + complete_all(&snapshot->dump_gate); +} diff --git a/kgsl_snapshot.h b/kgsl_snapshot.h new file mode 100644 index 0000000000..a9a94152e2 --- /dev/null +++ b/kgsl_snapshot.h @@ -0,0 +1,314 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _KGSL_SNAPSHOT_H_ +#define _KGSL_SNAPSHOT_H_ + +#include + +/* Snapshot header */ + +/* High word is static, low word is snapshot version ID */ +#define SNAPSHOT_MAGIC 0x504D0002 + +/* GPU ID scheme: + * [16:31] - core identifer (0x0002 for 2D or 0x0003 for 3D) + * [00:16] - GPU specific identifier + */ + +struct kgsl_snapshot_header { + __u32 magic; /* Magic identifier */ + __u32 gpuid; /* GPU ID - see above */ + /* Added in snapshot version 2 */ + __u32 chipid; /* Chip ID from the GPU */ +} __packed; + +/* Section header */ +#define SNAPSHOT_SECTION_MAGIC 0xABCD + +struct kgsl_snapshot_section_header { + __u16 magic; /* Magic identifier */ + __u16 id; /* Type of section */ + __u32 size; /* Size of the section including this header */ +} __packed; + +/* Section identifiers */ +#define KGSL_SNAPSHOT_SECTION_OS 0x0101 +#define KGSL_SNAPSHOT_SECTION_REGS 0x0201 +#define KGSL_SNAPSHOT_SECTION_REGS_V2 0x0202 +#define KGSL_SNAPSHOT_SECTION_RB 0x0301 +#define KGSL_SNAPSHOT_SECTION_RB_V2 0x0302 +#define KGSL_SNAPSHOT_SECTION_IB 0x0401 +#define KGSL_SNAPSHOT_SECTION_IB_V2 0x0402 +#define KGSL_SNAPSHOT_SECTION_INDEXED_REGS 0x0501 +#define KGSL_SNAPSHOT_SECTION_ISTORE 0x0801 +#define KGSL_SNAPSHOT_SECTION_DEBUG 0x0901 +#define KGSL_SNAPSHOT_SECTION_DEBUGBUS 0x0A01 +#define KGSL_SNAPSHOT_SECTION_GPU_OBJECT 0x0B01 +#define KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2 0x0B02 +#define KGSL_SNAPSHOT_SECTION_MEMLIST 0x0E01 +#define KGSL_SNAPSHOT_SECTION_MEMLIST_V2 0x0E02 +#define KGSL_SNAPSHOT_SECTION_SHADER 0x1201 +#define KGSL_SNAPSHOT_SECTION_SHADER_V2 0x1202 +#define KGSL_SNAPSHOT_SECTION_MVC 0x1501 +#define KGSL_SNAPSHOT_SECTION_MVC_V2 0x1502 +#define KGSL_SNAPSHOT_SECTION_GMU 0x1601 +#define KGSL_SNAPSHOT_SECTION_GMU_MEMORY 0x1701 +#define KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS 0x1801 + +#define KGSL_SNAPSHOT_SECTION_END 0xFFFF + +/* OS sub-section header */ +#define KGSL_SNAPSHOT_OS_LINUX 0x0001 +#define KGSL_SNAPSHOT_OS_LINUX_V3 0x00000202 + +/* Linux OS specific information */ +struct kgsl_snapshot_linux { + int osid; /* subsection OS identifier */ + int state; /* 1 if the thread is running, 0 for hung */ + __u32 seconds; /* Unix timestamp for the snapshot */ + __u32 power_flags; /* Current power flags */ + __u32 power_level; /* Current power level */ + __u32 power_interval_timeout; /* Power interval timeout */ + __u32 grpclk; /* Current GP clock value */ + __u32 busclk; /* Current busclk value */ + __u32 ptbase; /* Current ptbase */ + __u32 pid; /* PID of the process that owns the PT */ + __u32 current_context; /* ID of the current context */ + __u32 ctxtcount; /* Number of contexts appended to section */ + unsigned char release[32]; /* kernel release */ + unsigned char version[32]; /* kernel version */ + unsigned char comm[16]; /* Name of the process that owns the PT */ +} __packed; + +struct kgsl_snapshot_linux_v2 { + int osid; /* subsection OS identifier */ + __u32 seconds; /* Unix timestamp for the snapshot */ + __u32 power_flags; /* Current power flags */ + __u32 power_level; /* Current power level */ + __u32 power_interval_timeout; /* Power interval timeout */ + __u32 grpclk; /* Current GP clock value */ + __u32 busclk; /* Current busclk value */ + __u64 ptbase; /* Current ptbase */ + __u32 pid; /* PID of the process that owns the PT */ + __u32 current_context; /* ID of the current context */ + __u32 ctxtcount; /* Number of contexts appended to section */ + unsigned char release[32]; /* kernel release */ + unsigned char version[32]; /* kernel version */ + unsigned char comm[16]; /* Name of the process that owns the PT */ +} __packed; + +/* + * This structure contains a record of an active context. + * These are appended one after another in the OS section below + * the header above + */ + +struct kgsl_snapshot_linux_context { + __u32 id; /* The context ID */ + __u32 timestamp_queued; /* The last queued timestamp */ + __u32 timestamp_retired; /* The last timestamp retired by HW */ +}; + +struct kgsl_snapshot_linux_context_v2 { + __u32 id; /* The context ID */ + __u32 timestamp_queued; /* The last queued timestamp */ + __u32 timestamp_consumed; /* The last timestamp consumed by HW */ + __u32 timestamp_retired; /* The last timestamp retired by HW */ +}; +/* Ringbuffer sub-section header */ +struct kgsl_snapshot_rb { + int start; /* dword at the start of the dump */ + int end; /* dword at the end of the dump */ + int rbsize; /* Size (in dwords) of the ringbuffer */ + int wptr; /* Current index of the CPU write pointer */ + int rptr; /* Current index of the GPU read pointer */ + int count; /* Number of dwords in the dump */ + __u32 timestamp_queued; /* The last queued timestamp */ + __u32 timestamp_retired; /* The last timestamp retired by HW */ +} __packed; + +struct kgsl_snapshot_rb_v2 { + int start; /* dword at the start of the dump */ + int end; /* dword at the end of the dump */ + int rbsize; /* Size (in dwords) of the ringbuffer */ + int wptr; /* Current index of the CPU write pointer */ + int rptr; /* Current index of the GPU read pointer */ + int count; /* Number of dwords in the dump */ + __u32 timestamp_queued; /* The last queued timestamp */ + __u32 timestamp_retired; /* The last timestamp retired by HW */ + __u64 gpuaddr; /* The GPU address of the ringbuffer */ + __u32 id; /* Ringbuffer identifier */ +} __packed; + + +/* Replay or Memory list section, both sections have same header */ +struct kgsl_snapshot_replay_mem_list { + /* + * Number of IBs to replay for replay section or + * number of memory list entries for mem list section + */ + int num_entries; + /* Pagetable base to which the replay IBs or memory entries belong */ + __u32 ptbase; +} __packed; + +/* Replay or Memory list section, both sections have same header */ +struct kgsl_snapshot_mem_list_v2 { + /* + * Number of IBs to replay for replay section or + * number of memory list entries for mem list section + */ + int num_entries; + /* Pagetable base to which the replay IBs or memory entries belong */ + __u64 ptbase; +} __packed; + + +/* Indirect buffer sub-section header */ +struct kgsl_snapshot_ib { + __u32 gpuaddr; /* GPU address of the the IB */ + __u32 ptbase; /* Base for the pagetable the GPU address is valid in */ + int size; /* Size of the IB */ +} __packed; + +/* Indirect buffer sub-section header (v2) */ +struct kgsl_snapshot_ib_v2 { + __u64 gpuaddr; /* GPU address of the the IB */ + __u64 ptbase; /* Base for the pagetable the GPU address is valid in */ + __u64 size; /* Size of the IB */ +} __packed; + +/* GMU memory ID's */ +#define SNAPSHOT_GMU_MEM_UNKNOWN 0x00 +#define SNAPSHOT_GMU_MEM_HFI 0x01 +#define SNAPSHOT_GMU_MEM_LOG 0x02 +#define SNAPSHOT_GMU_MEM_BWTABLE 0x03 +#define SNAPSHOT_GMU_MEM_DEBUG 0x04 +#define SNAPSHOT_GMU_MEM_BIN_BLOCK 0x05 + +/* GMU memory section data */ +struct kgsl_snapshot_gmu_mem { + int type; + uint64_t hostaddr; + uint64_t gmuaddr; + uint64_t gpuaddr; +} __packed; + +/* Register sub-section header */ +struct kgsl_snapshot_regs { + __u32 count; /* Number of register pairs in the section */ +} __packed; + +/* Indexed register sub-section header */ +struct kgsl_snapshot_indexed_regs { + __u32 index_reg; /* Offset of the index register for this section */ + __u32 data_reg; /* Offset of the data register for this section */ + int start; /* Starting index */ + int count; /* Number of dwords in the data */ +} __packed; + +/* MVC register sub-section header */ +struct kgsl_snapshot_mvc_regs { + int ctxt_id; + int cluster_id; +} __packed; + +struct kgsl_snapshot_mvc_regs_v2 { + int ctxt_id; + int cluster_id; + int pipe_id; + int location_id; +} __packed; + +/* Istore sub-section header */ +struct kgsl_snapshot_istore { + int count; /* Number of instructions in the istore */ +} __packed; + +/* Debug data sub-section header */ + +/* A2XX debug sections */ +#define SNAPSHOT_DEBUG_SX 1 +#define SNAPSHOT_DEBUG_CP 2 +#define SNAPSHOT_DEBUG_SQ 3 +#define SNAPSHOT_DEBUG_SQTHREAD 4 +#define SNAPSHOT_DEBUG_MIU 5 + +/* A3XX debug sections */ +#define SNAPSHOT_DEBUG_VPC_MEMORY 6 +#define SNAPSHOT_DEBUG_CP_MEQ 7 +#define SNAPSHOT_DEBUG_CP_PM4_RAM 8 +#define SNAPSHOT_DEBUG_CP_PFP_RAM 9 +#define SNAPSHOT_DEBUG_CP_ROQ 10 +#define SNAPSHOT_DEBUG_SHADER_MEMORY 11 +#define SNAPSHOT_DEBUG_CP_MERCIU 12 +#define SNAPSHOT_DEBUG_SQE_VERSION 14 + +/* GMU Version information */ +#define SNAPSHOT_DEBUG_GMU_CORE_VERSION 15 +#define SNAPSHOT_DEBUG_GMU_CORE_DEV_VERSION 16 +#define SNAPSHOT_DEBUG_GMU_PWR_VERSION 17 +#define SNAPSHOT_DEBUG_GMU_PWR_DEV_VERSION 18 +#define SNAPSHOT_DEBUG_GMU_HFI_VERSION 19 + +struct kgsl_snapshot_debug { + int type; /* Type identifier for the attached tata */ + int size; /* Size of the section in dwords */ +} __packed; + +struct kgsl_snapshot_debugbus { + int id; /* Debug bus ID */ + int count; /* Number of dwords in the dump */ +} __packed; + +struct kgsl_snapshot_side_debugbus { + int id; /* Debug bus ID */ + int size; /* Number of dwords in the dump */ + int valid_data; /* Mask of valid bits of the side debugbus */ +} __packed; + +struct kgsl_snapshot_shader { + int type; /* SP/TP statetype */ + int index; /* SP/TP index */ + int size; /* Number of dwords in the dump */ +} __packed; + +struct kgsl_snapshot_shader_v2 { + int type; /* SP/TP statetype */ + int index; /* SP/TP index */ + int usptp; /* USPTP index */ + int pipe_id; /* Pipe id */ + int location; /* Location value */ + u32 size; /* Number of dwords in the dump */ +} __packed; + +#define SNAPSHOT_GPU_OBJECT_SHADER 1 +#define SNAPSHOT_GPU_OBJECT_IB 2 +#define SNAPSHOT_GPU_OBJECT_GENERIC 3 +#define SNAPSHOT_GPU_OBJECT_DRAW 4 +#define SNAPSHOT_GPU_OBJECT_GLOBAL 5 + +struct kgsl_snapshot_gpu_object { + int type; /* Type of GPU object */ + __u32 gpuaddr; /* GPU address of the the object */ + __u32 ptbase; /* Base for the pagetable the GPU address is valid in */ + int size; /* Size of the object (in dwords) */ +}; + +struct kgsl_snapshot_gpu_object_v2 { + int type; /* Type of GPU object */ + __u64 gpuaddr; /* GPU address of the the object */ + __u64 ptbase; /* Base for the pagetable the GPU address is valid in */ + __u64 size; /* Size of the object (in dwords) */ +} __packed; + +struct kgsl_device; +struct kgsl_process_private; + +void kgsl_snapshot_push_object(struct kgsl_device *device, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t dwords); +#endif diff --git a/kgsl_sync.c b/kgsl_sync.c new file mode 100644 index 0000000000..1103b51248 --- /dev/null +++ b/kgsl_sync.c @@ -0,0 +1,884 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2012-2019, 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include + +#include "kgsl_device.h" +#include "kgsl_sync.h" + +static void kgsl_sync_timeline_signal(struct kgsl_sync_timeline *timeline, + unsigned int timestamp); + +static const struct dma_fence_ops kgsl_sync_fence_ops; + +static struct kgsl_sync_fence *kgsl_sync_fence_create( + struct kgsl_context *context, + unsigned int timestamp) +{ + struct kgsl_sync_fence *kfence; + struct kgsl_sync_timeline *ktimeline = context->ktimeline; + unsigned long flags; + + /* Get a refcount to the timeline. Put when released */ + if (!kref_get_unless_zero(&ktimeline->kref)) + return NULL; + + kfence = kzalloc(sizeof(*kfence), GFP_KERNEL); + if (kfence == NULL) { + kgsl_sync_timeline_put(ktimeline); + return NULL; + } + + kfence->parent = ktimeline; + kfence->context_id = context->id; + kfence->timestamp = timestamp; + + dma_fence_init(&kfence->fence, &kgsl_sync_fence_ops, &ktimeline->lock, + ktimeline->fence_context, timestamp); + + /* + * sync_file_create() takes a refcount to the fence. This refcount is + * put when the fence is signaled. + */ + kfence->sync_file = sync_file_create(&kfence->fence); + + if (kfence->sync_file == NULL) { + kgsl_sync_timeline_put(ktimeline); + dev_err(context->device->dev, "Create sync_file failed\n"); + kfree(kfence); + return NULL; + } + + spin_lock_irqsave(&ktimeline->lock, flags); + list_add_tail(&kfence->child_list, &ktimeline->child_list_head); + spin_unlock_irqrestore(&ktimeline->lock, flags); + + return kfence; +} + +static void kgsl_sync_fence_release(struct dma_fence *fence) +{ + struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; + + kgsl_sync_timeline_put(kfence->parent); + kfree(kfence); +} + +/* Called with ktimeline->lock held */ +static bool kgsl_sync_fence_has_signaled(struct dma_fence *fence) +{ + struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; + struct kgsl_sync_timeline *ktimeline = kfence->parent; + unsigned int ts = kfence->timestamp; + + return (timestamp_cmp(ktimeline->last_timestamp, ts) >= 0); +} + +static bool kgsl_enable_signaling(struct dma_fence *fence) +{ + return !kgsl_sync_fence_has_signaled(fence); +} + +struct kgsl_sync_fence_event_priv { + struct kgsl_context *context; + unsigned int timestamp; +}; + +/** + * kgsl_sync_fence_event_cb - Event callback for a fence timestamp event + * @device - The KGSL device that expired the timestamp + * @context- Pointer to the context that owns the event + * @priv: Private data for the callback + * @result - Result of the event (retired or canceled) + * + * Signal a fence following the expiration of a timestamp + */ + +static void kgsl_sync_fence_event_cb(struct kgsl_device *device, + struct kgsl_event_group *group, void *priv, int result) +{ + struct kgsl_sync_fence_event_priv *ev = priv; + + kgsl_sync_timeline_signal(ev->context->ktimeline, ev->timestamp); + kgsl_context_put(ev->context); + kfree(ev); +} + +static int _add_fence_event(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp) +{ + struct kgsl_sync_fence_event_priv *event; + int ret; + + event = kmalloc(sizeof(*event), GFP_KERNEL); + if (event == NULL) + return -ENOMEM; + + /* + * Increase the refcount for the context to keep it through the + * callback + */ + if (!_kgsl_context_get(context)) { + kfree(event); + return -ENOENT; + } + + event->context = context; + event->timestamp = timestamp; + + ret = kgsl_add_event(device, &context->events, timestamp, + kgsl_sync_fence_event_cb, event); + + if (ret) { + kgsl_context_put(context); + kfree(event); + } + + return ret; +} + +/* Only to be used if creating a related event failed */ +static void kgsl_sync_cancel(struct kgsl_sync_fence *kfence) +{ + spin_lock(&kfence->parent->lock); + if (!list_empty(&kfence->child_list)) { + list_del_init(&kfence->child_list); + dma_fence_put(&kfence->fence); + } + spin_unlock(&kfence->parent->lock); +} + +/** + * kgsl_add_fence_event - Create a new fence event + * @device - KGSL device to create the event on + * @timestamp - Timestamp to trigger the event + * @data - Return fence fd stored in struct kgsl_timestamp_event_fence + * @len - length of the fence event + * @owner - driver instance that owns this event + * @returns 0 on success or error code on error + * + * Create a fence and register an event to signal the fence when + * the timestamp expires + */ + +int kgsl_add_fence_event(struct kgsl_device *device, + u32 context_id, u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner) +{ + struct kgsl_timestamp_event_fence priv; + struct kgsl_context *context; + struct kgsl_sync_fence *kfence = NULL; + int ret = -EINVAL; + unsigned int cur; + + priv.fence_fd = -1; + + if (len != sizeof(priv)) + return -EINVAL; + + context = kgsl_context_get_owner(owner, context_id); + + if (context == NULL) + return -EINVAL; + + if (test_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv)) + goto out; + + kfence = kgsl_sync_fence_create(context, timestamp); + if (kfence == NULL) { + ret = -ENOMEM; + goto out; + } + + priv.fence_fd = get_unused_fd_flags(0); + if (priv.fence_fd < 0) { + dev_crit_ratelimited(device->dev, + "Unable to get a file descriptor: %d\n", + priv.fence_fd); + ret = priv.fence_fd; + goto out; + } + + /* + * If the timestamp hasn't expired yet create an event to trigger it. + * Otherwise, just signal the fence - there is no reason to go through + * the effort of creating a fence we don't need. + */ + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &cur); + + if (timestamp_cmp(cur, timestamp) >= 0) { + ret = 0; + kgsl_sync_timeline_signal(context->ktimeline, cur); + } else { + ret = _add_fence_event(device, context, timestamp); + if (ret) + goto out; + } + + if (copy_to_user(data, &priv, sizeof(priv))) { + ret = -EFAULT; + goto out; + } + fd_install(priv.fence_fd, kfence->sync_file->file); + +out: + kgsl_context_put(context); + if (ret) { + if (priv.fence_fd >= 0) + put_unused_fd(priv.fence_fd); + + if (kfence) { + kgsl_sync_cancel(kfence); + /* + * Put the refcount of sync file. This will release + * kfence->fence as well. + */ + fput(kfence->sync_file->file); + } + } + return ret; +} + +static void kgsl_sync_timeline_value_str(struct dma_fence *fence, + char *str, int size) +{ + struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; + struct kgsl_sync_timeline *ktimeline = kfence->parent; + struct kgsl_context *context = NULL; + unsigned long flags; + int ret = 0; + + unsigned int timestamp_retired; + unsigned int timestamp_queued; + + if (!kref_get_unless_zero(&ktimeline->kref)) + return; + if (!ktimeline->device) + goto put_timeline; + + spin_lock_irqsave(&ktimeline->lock, flags); + ret = _kgsl_context_get(ktimeline->context); + context = ret ? ktimeline->context : NULL; + spin_unlock_irqrestore(&ktimeline->lock, flags); + + /* Get the last signaled timestamp if the context is not valid */ + timestamp_queued = ktimeline->last_timestamp; + timestamp_retired = timestamp_queued; + if (context) { + kgsl_readtimestamp(ktimeline->device, context, + KGSL_TIMESTAMP_RETIRED, ×tamp_retired); + + kgsl_readtimestamp(ktimeline->device, context, + KGSL_TIMESTAMP_QUEUED, ×tamp_queued); + + kgsl_context_put(context); + } + + snprintf(str, size, "%u queued:%u retired:%u", + ktimeline->last_timestamp, + timestamp_queued, timestamp_retired); + +put_timeline: + kgsl_sync_timeline_put(ktimeline); +} + +static void kgsl_sync_fence_value_str(struct dma_fence *fence, + char *str, int size) +{ + struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; + + snprintf(str, size, "%u", kfence->timestamp); +} + +static const char *kgsl_sync_fence_driver_name(struct dma_fence *fence) +{ + return "kgsl-timeline"; +} + +static const char *kgsl_sync_timeline_name(struct dma_fence *fence) +{ + struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; + struct kgsl_sync_timeline *ktimeline = kfence->parent; + + return ktimeline->name; +} + +int kgsl_sync_timeline_create(struct kgsl_context *context) +{ + struct kgsl_sync_timeline *ktimeline; + + /* Put context at detach time */ + if (!_kgsl_context_get(context)) + return -ENOENT; + + ktimeline = kzalloc(sizeof(*ktimeline), GFP_KERNEL); + if (ktimeline == NULL) { + kgsl_context_put(context); + return -ENOMEM; + } + + kref_init(&ktimeline->kref); + snprintf(ktimeline->name, sizeof(ktimeline->name), + "%s_%d-%.15s(%d)-%.15s(%d)", + context->device->name, context->id, + current->group_leader->comm, current->group_leader->pid, + current->comm, current->pid); + + ktimeline->fence_context = dma_fence_context_alloc(1); + ktimeline->last_timestamp = 0; + INIT_LIST_HEAD(&ktimeline->child_list_head); + spin_lock_init(&ktimeline->lock); + ktimeline->device = context->device; + + /* + * The context pointer is valid till detach time, where we put the + * refcount on the context + */ + ktimeline->context = context; + + context->ktimeline = ktimeline; + + return 0; +} + +static void kgsl_sync_timeline_signal(struct kgsl_sync_timeline *ktimeline, + unsigned int timestamp) +{ + unsigned long flags; + struct kgsl_sync_fence *kfence, *next; + + if (!kref_get_unless_zero(&ktimeline->kref)) + return; + + spin_lock_irqsave(&ktimeline->lock, flags); + if (timestamp_cmp(timestamp, ktimeline->last_timestamp) > 0) + ktimeline->last_timestamp = timestamp; + + list_for_each_entry_safe(kfence, next, &ktimeline->child_list_head, + child_list) { + if (dma_fence_is_signaled_locked(&kfence->fence)) { + list_del_init(&kfence->child_list); + dma_fence_put(&kfence->fence); + } + } + + spin_unlock_irqrestore(&ktimeline->lock, flags); + kgsl_sync_timeline_put(ktimeline); +} + +void kgsl_sync_timeline_detach(struct kgsl_sync_timeline *ktimeline) +{ + unsigned long flags; + struct kgsl_context *context = ktimeline->context; + + /* Set context pointer to NULL and drop our refcount on the context */ + spin_lock_irqsave(&ktimeline->lock, flags); + ktimeline->context = NULL; + spin_unlock_irqrestore(&ktimeline->lock, flags); + kgsl_context_put(context); +} + +static void kgsl_sync_timeline_destroy(struct kref *kref) +{ + struct kgsl_sync_timeline *ktimeline = + container_of(kref, struct kgsl_sync_timeline, kref); + + kfree(ktimeline); +} + +void kgsl_sync_timeline_put(struct kgsl_sync_timeline *ktimeline) +{ + if (ktimeline) + kref_put(&ktimeline->kref, kgsl_sync_timeline_destroy); +} + +static const struct dma_fence_ops kgsl_sync_fence_ops = { + .get_driver_name = kgsl_sync_fence_driver_name, + .get_timeline_name = kgsl_sync_timeline_name, + .enable_signaling = kgsl_enable_signaling, + .signaled = kgsl_sync_fence_has_signaled, + .wait = dma_fence_default_wait, + .release = kgsl_sync_fence_release, + + .fence_value_str = kgsl_sync_fence_value_str, + .timeline_value_str = kgsl_sync_timeline_value_str, +}; + +static void kgsl_sync_fence_callback(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct kgsl_sync_fence_cb *kcb = (struct kgsl_sync_fence_cb *)cb; + + /* + * If the callback is marked for cancellation in a separate thread, + * let the other thread do the cleanup. + */ + if (kcb->func(kcb->priv)) { + dma_fence_put(kcb->fence); + kfree(kcb); + } +} + +static void kgsl_get_fence_names(struct dma_fence *fence, + struct event_fence_info *info_ptr) +{ + unsigned int num_fences; + struct dma_fence **fences; + struct dma_fence_array *array; + int i; + + if (!info_ptr) + return; + + array = to_dma_fence_array(fence); + + if (array != NULL) { + num_fences = array->num_fences; + fences = array->fences; + } else { + num_fences = 1; + fences = &fence; + } + + info_ptr->fences = kcalloc(num_fences, sizeof(struct fence_info), + GFP_ATOMIC); + if (info_ptr->fences == NULL) + return; + + info_ptr->num_fences = num_fences; + + for (i = 0; i < num_fences; i++) { + struct dma_fence *f = fences[i]; + struct fence_info *fi = &info_ptr->fences[i]; + int len; + + len = scnprintf(fi->name, sizeof(fi->name), "%s %s", + f->ops->get_driver_name(f), + f->ops->get_timeline_name(f)); + + if (f->ops->fence_value_str) { + len += scnprintf(fi->name + len, sizeof(fi->name) - len, + ": "); + f->ops->fence_value_str(f, fi->name + len, + sizeof(fi->name) - len); + } + } +} + +struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, + bool (*func)(void *priv), void *priv, struct event_fence_info *info_ptr) +{ + struct kgsl_sync_fence_cb *kcb; + struct dma_fence *fence; + int status; + + fence = sync_file_get_fence(fd); + if (fence == NULL) + return ERR_PTR(-EINVAL); + + /* create the callback */ + kcb = kzalloc(sizeof(*kcb), GFP_ATOMIC); + if (kcb == NULL) { + dma_fence_put(fence); + return ERR_PTR(-ENOMEM); + } + + kcb->fence = fence; + kcb->priv = priv; + kcb->func = func; + + kgsl_get_fence_names(fence, info_ptr); + + /* if status then error or signaled */ + status = dma_fence_add_callback(fence, &kcb->fence_cb, + kgsl_sync_fence_callback); + + if (status) { + kfree(kcb); + if (!dma_fence_is_signaled(fence)) + kcb = ERR_PTR(status); + else + kcb = NULL; + dma_fence_put(fence); + } + + return kcb; +} + +/* + * Cancel the fence async callback and do the cleanup. The caller must make + * sure that the callback (if run before cancelling) returns false, so that + * no other thread frees the pointer. + */ +void kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_cb *kcb) +{ + if (kcb == NULL) + return; + + /* + * After fence_remove_callback() returns, the fence callback is + * either not called at all, or completed without freeing kcb. + * This thread can then put the fence refcount and free kcb. + */ + dma_fence_remove_callback(kcb->fence, &kcb->fence_cb); + dma_fence_put(kcb->fence); + kfree(kcb); +} + +struct kgsl_syncsource { + struct kref refcount; + char name[32]; + int id; + struct kgsl_process_private *private; + struct list_head child_list_head; + spinlock_t lock; +}; + +struct kgsl_syncsource_fence { + struct dma_fence fence; + struct kgsl_syncsource *parent; + struct list_head child_list; +}; + +static const struct dma_fence_ops kgsl_syncsource_fence_ops; + +long kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_syncsource *syncsource = NULL; + struct kgsl_syncsource_create *param = data; + int ret = -EINVAL; + int id = 0; + struct kgsl_process_private *private = dev_priv->process_priv; + + if (!kgsl_process_private_get(private)) + return ret; + + syncsource = kzalloc(sizeof(*syncsource), GFP_KERNEL); + if (syncsource == NULL) { + ret = -ENOMEM; + goto out; + } + + kref_init(&syncsource->refcount); + snprintf(syncsource->name, sizeof(syncsource->name), + "kgsl-syncsource-pid-%d", current->group_leader->pid); + syncsource->private = private; + INIT_LIST_HEAD(&syncsource->child_list_head); + spin_lock_init(&syncsource->lock); + + idr_preload(GFP_KERNEL); + spin_lock(&private->syncsource_lock); + id = idr_alloc(&private->syncsource_idr, syncsource, 1, 0, GFP_NOWAIT); + if (id > 0) { + syncsource->id = id; + param->id = id; + ret = 0; + } else { + ret = id; + } + + spin_unlock(&private->syncsource_lock); + idr_preload_end(); + +out: + if (ret) { + kgsl_process_private_put(private); + kfree(syncsource); + } + + return ret; +} + +static struct kgsl_syncsource * +kgsl_syncsource_get(struct kgsl_process_private *private, int id) +{ + int result = 0; + struct kgsl_syncsource *syncsource = NULL; + + spin_lock(&private->syncsource_lock); + + syncsource = idr_find(&private->syncsource_idr, id); + if (syncsource) + result = kref_get_unless_zero(&syncsource->refcount); + + spin_unlock(&private->syncsource_lock); + + return result ? syncsource : NULL; +} + +static void kgsl_syncsource_destroy(struct kref *kref) +{ + struct kgsl_syncsource *syncsource = container_of(kref, + struct kgsl_syncsource, + refcount); + + struct kgsl_process_private *private = syncsource->private; + + /* Done with process private. Release the refcount */ + kgsl_process_private_put(private); + + kfree(syncsource); +} + +void kgsl_syncsource_put(struct kgsl_syncsource *syncsource) +{ + if (syncsource) + kref_put(&syncsource->refcount, kgsl_syncsource_destroy); +} + +static void kgsl_syncsource_cleanup(struct kgsl_process_private *private, + struct kgsl_syncsource *syncsource) +{ + struct kgsl_syncsource_fence *sfence, *next; + + /* Signal all fences to release any callbacks */ + spin_lock(&syncsource->lock); + + list_for_each_entry_safe(sfence, next, &syncsource->child_list_head, + child_list) { + dma_fence_signal_locked(&sfence->fence); + list_del_init(&sfence->child_list); + } + + spin_unlock(&syncsource->lock); + + /* put reference from syncsource creation */ + kgsl_syncsource_put(syncsource); +} + +long kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_syncsource_destroy *param = data; + struct kgsl_syncsource *syncsource = NULL; + struct kgsl_process_private *private = dev_priv->process_priv; + + spin_lock(&private->syncsource_lock); + syncsource = idr_find(&private->syncsource_idr, param->id); + + if (syncsource == NULL) { + spin_unlock(&private->syncsource_lock); + return -EINVAL; + } + + if (syncsource->id != 0) { + idr_remove(&private->syncsource_idr, syncsource->id); + syncsource->id = 0; + } + spin_unlock(&private->syncsource_lock); + + kgsl_syncsource_cleanup(private, syncsource); + return 0; +} + +long kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_syncsource_create_fence *param = data; + struct kgsl_syncsource *syncsource = NULL; + int ret = -EINVAL; + struct kgsl_syncsource_fence *sfence = NULL; + struct sync_file *sync_file = NULL; + int fd = -1; + + /* + * Take a refcount that is released when the fence is released + * (or if fence can't be added to the syncsource). + */ + syncsource = kgsl_syncsource_get(dev_priv->process_priv, + param->id); + if (syncsource == NULL) + goto out; + + sfence = kzalloc(sizeof(*sfence), GFP_KERNEL); + if (sfence == NULL) { + ret = -ENOMEM; + goto out; + } + sfence->parent = syncsource; + + /* Use a new fence context for each fence */ + dma_fence_init(&sfence->fence, &kgsl_syncsource_fence_ops, + &syncsource->lock, dma_fence_context_alloc(1), 1); + + sync_file = sync_file_create(&sfence->fence); + + if (sync_file == NULL) { + dev_err(dev_priv->device->dev, + "Create sync_file failed\n"); + ret = -ENOMEM; + goto out; + } + + fd = get_unused_fd_flags(0); + if (fd < 0) { + ret = -EBADF; + goto out; + } + ret = 0; + + fd_install(fd, sync_file->file); + + param->fence_fd = fd; + + spin_lock(&syncsource->lock); + list_add_tail(&sfence->child_list, &syncsource->child_list_head); + spin_unlock(&syncsource->lock); +out: + /* + * We're transferring ownership of the fence to the sync file. + * The sync file takes an extra refcount when it is created, so put + * our refcount. + */ + if (sync_file) + dma_fence_put(&sfence->fence); + + if (ret) { + if (sync_file) + fput(sync_file->file); + else if (sfence) + dma_fence_put(&sfence->fence); + else + kgsl_syncsource_put(syncsource); + } + + return ret; +} + +static int kgsl_syncsource_signal(struct kgsl_syncsource *syncsource, + struct dma_fence *fence) +{ + struct kgsl_syncsource_fence *sfence, *next; + int ret = -EINVAL; + + spin_lock(&syncsource->lock); + + list_for_each_entry_safe(sfence, next, &syncsource->child_list_head, + child_list) { + if (fence == &sfence->fence) { + dma_fence_signal_locked(fence); + list_del_init(&sfence->child_list); + + ret = 0; + break; + } + } + + spin_unlock(&syncsource->lock); + + return ret; +} + +long kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int ret = -EINVAL; + struct kgsl_syncsource_signal_fence *param = data; + struct kgsl_syncsource *syncsource = NULL; + struct dma_fence *fence = NULL; + + syncsource = kgsl_syncsource_get(dev_priv->process_priv, + param->id); + if (syncsource == NULL) + goto out; + + fence = sync_file_get_fence(param->fence_fd); + if (fence == NULL) { + ret = -EBADF; + goto out; + } + + ret = kgsl_syncsource_signal(syncsource, fence); +out: + if (fence) + dma_fence_put(fence); + if (syncsource) + kgsl_syncsource_put(syncsource); + return ret; +} + +static void kgsl_syncsource_fence_release(struct dma_fence *fence) +{ + struct kgsl_syncsource_fence *sfence = + (struct kgsl_syncsource_fence *)fence; + + /* Signal if it's not signaled yet */ + kgsl_syncsource_signal(sfence->parent, fence); + + /* Release the refcount on the syncsource */ + kgsl_syncsource_put(sfence->parent); + + kfree(sfence); +} + +void kgsl_syncsource_process_release_syncsources( + struct kgsl_process_private *private) +{ + struct kgsl_syncsource *syncsource; + int next = 0; + + while (1) { + spin_lock(&private->syncsource_lock); + syncsource = idr_get_next(&private->syncsource_idr, &next); + + if (syncsource == NULL) { + spin_unlock(&private->syncsource_lock); + break; + } + + if (syncsource->id != 0) { + idr_remove(&private->syncsource_idr, syncsource->id); + syncsource->id = 0; + } + spin_unlock(&private->syncsource_lock); + + kgsl_syncsource_cleanup(private, syncsource); + next = next + 1; + } +} + +static const char *kgsl_syncsource_get_timeline_name(struct dma_fence *fence) +{ + struct kgsl_syncsource_fence *sfence = + (struct kgsl_syncsource_fence *)fence; + struct kgsl_syncsource *syncsource = sfence->parent; + + return syncsource->name; +} + +static bool kgsl_syncsource_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static const char *kgsl_syncsource_driver_name(struct dma_fence *fence) +{ + return "kgsl-syncsource-timeline"; +} + +static void kgsl_syncsource_fence_value_str(struct dma_fence *fence, + char *str, int size) +{ + /* + * Each fence is independent of the others on the same timeline. + * We use a different context for each of them. + */ + snprintf(str, size, "%llu", fence->context); +} + +static const struct dma_fence_ops kgsl_syncsource_fence_ops = { + .get_driver_name = kgsl_syncsource_driver_name, + .get_timeline_name = kgsl_syncsource_get_timeline_name, + .enable_signaling = kgsl_syncsource_enable_signaling, + .wait = dma_fence_default_wait, + .release = kgsl_syncsource_fence_release, + + .fence_value_str = kgsl_syncsource_fence_value_str, +}; + diff --git a/kgsl_sync.h b/kgsl_sync.h new file mode 100644 index 0000000000..b8655ddc9c --- /dev/null +++ b/kgsl_sync.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2012-2014,2018-2019, 2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_SYNC_H +#define __KGSL_SYNC_H + +#include + +/** + * struct kgsl_sync_timeline - A sync timeline associated with a kgsl context + * @kref: Refcount to keep the struct alive until all its fences are signaled, + and as long as the context exists + * @name: String to describe this timeline + * @fence_context: Used by the fence driver to identify fences belonging to + * this context + * @child_list_head: List head for all fences on this timeline + * @lock: Spinlock to protect this timeline + * @last_timestamp: Last timestamp when signaling fences + * @device: kgsl device + * @context: kgsl context + */ +struct kgsl_sync_timeline { + struct kref kref; + char name[32]; + + u64 fence_context; + + struct list_head child_list_head; + + spinlock_t lock; + unsigned int last_timestamp; + struct kgsl_device *device; + struct kgsl_context *context; +}; + +/** + * struct kgsl_sync_fence - A struct containing a fence and other data + * associated with it + * @fence: The fence struct + * @sync_file: Pointer to the sync file + * @parent: Pointer to the kgsl sync timeline this fence is on + * @child_list: List of fences on the same timeline + * @context_id: kgsl context id + * @timestamp: Context timestamp that this fence is associated with + */ +struct kgsl_sync_fence { + struct dma_fence fence; + struct sync_file *sync_file; + struct kgsl_sync_timeline *parent; + struct list_head child_list; + u32 context_id; + unsigned int timestamp; +}; + +/** + * struct kgsl_sync_fence_cb - Used for fence callbacks + * fence_cb: Fence callback struct + * fence: Pointer to the fence for which the callback is done + * priv: Private data for the callback + * func: Pointer to the kgsl function to call. This function should return + * false if the sync callback is marked for cancellation in a separate thread. + */ +struct kgsl_sync_fence_cb { + struct dma_fence_cb fence_cb; + struct dma_fence *fence; + void *priv; + bool (*func)(void *priv); +}; + +struct kgsl_device_private; +struct kgsl_drawobj_sync_event; +struct event_fence_info; +struct kgsl_process_private; +struct kgsl_syncsource; + +#if defined(CONFIG_SYNC_FILE) +int kgsl_add_fence_event(struct kgsl_device *device, + u32 context_id, u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner); + +int kgsl_sync_timeline_create(struct kgsl_context *context); + +void kgsl_sync_timeline_detach(struct kgsl_sync_timeline *ktimeline); + +void kgsl_sync_timeline_put(struct kgsl_sync_timeline *ktimeline); + +struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, + bool (*func)(void *priv), void *priv, + struct event_fence_info *info_ptr); + +void kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_cb *kcb); + +long kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); + +void kgsl_syncsource_put(struct kgsl_syncsource *syncsource); + +void kgsl_syncsource_process_release_syncsources( + struct kgsl_process_private *private); + +#else +static inline int kgsl_add_fence_event(struct kgsl_device *device, + u32 context_id, u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner) +{ + return -EINVAL; +} + +static inline int kgsl_sync_timeline_create(struct kgsl_context *context) +{ + context->ktimeline = NULL; + return 0; +} + +static inline void kgsl_sync_timeline_detach(struct kgsl_sync_timeline *ktimeline) +{ +} + +static inline void kgsl_sync_timeline_put(struct kgsl_sync_timeline *ktimeline) +{ +} + + +static inline struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, + bool (*func)(void *priv), void *priv, + struct event_fence_info *info_ptr) +{ + return NULL; +} + +static inline void +kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_cb *kcb) +{ +} + +static inline long +kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + return -ENOIOCTLCMD; +} + +static inline long +kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + return -ENOIOCTLCMD; +} + +static inline long +kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + return -ENOIOCTLCMD; +} + +static inline long +kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + return -ENOIOCTLCMD; +} + +static inline void kgsl_syncsource_put(struct kgsl_syncsource *syncsource) +{ + +} + +static inline void kgsl_syncsource_process_release_syncsources( + struct kgsl_process_private *private) +{ + +} + +#endif /* CONFIG_SYNC_FILE */ + +#endif /* __KGSL_SYNC_H */ diff --git a/kgsl_sysfs.h b/kgsl_sysfs.h new file mode 100644 index 0000000000..1afceb3f48 --- /dev/null +++ b/kgsl_sysfs.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + */ +#ifndef _KGSL_SYSFS_H_ +#define _KGSL_SYSFS_H_ + +struct kgsl_device; + +/** + * struct kgsl_gpu_sysfs_attr - Attribute definition for sysfs objects in the + * /sys/kernel/gpu kobject + */ +struct kgsl_gpu_sysfs_attr { + /** @attr: Attribute for the sysfs node */ + struct attribute attr; + /** @show: Show function for the node */ + ssize_t (*show)(struct kgsl_device *device, char *buf); + /** @store: Store function for the node */ + ssize_t (*store)(struct kgsl_device *device, const char *buf, + size_t count); +}; + +#define GPU_SYSFS_ATTR(_name, _mode, _show, _store) \ +const struct kgsl_gpu_sysfs_attr gpu_sysfs_attr_##_name = { \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +} + +#endif diff --git a/kgsl_timeline.c b/kgsl_timeline.c new file mode 100644 index 0000000000..d7b64abab4 --- /dev/null +++ b/kgsl_timeline.c @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include "kgsl_device.h" +#include "kgsl_eventlog.h" +#include "kgsl_sharedmem.h" +#include "kgsl_timeline.h" +#include "kgsl_trace.h" + +struct kgsl_timeline_fence { + struct dma_fence base; + struct kgsl_timeline *timeline; + struct list_head node; +}; + +struct dma_fence *kgsl_timelines_to_fence_array(struct kgsl_device *device, + u64 timelines, u32 count, u64 usize, bool any) +{ + void __user *uptr = u64_to_user_ptr(timelines); + struct dma_fence_array *array; + struct dma_fence **fences; + int i, ret = 0; + + if (!count || count > INT_MAX) + return ERR_PTR(-EINVAL); + + fences = kcalloc(count, sizeof(*fences), + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + + if (!fences) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + struct kgsl_timeline_val val; + struct kgsl_timeline *timeline; + + if (copy_struct_from_user(&val, sizeof(val), uptr, usize)) { + ret = -EFAULT; + goto err; + } + + if (val.padding) { + ret = -EINVAL; + goto err; + } + + timeline = kgsl_timeline_by_id(device, val.timeline); + if (!timeline) { + ret = -ENOENT; + goto err; + } + + fences[i] = kgsl_timeline_fence_alloc(timeline, val.seqno); + kgsl_timeline_put(timeline); + + if (IS_ERR(fences[i])) { + ret = PTR_ERR(fences[i]); + goto err; + } + + uptr += usize; + } + + /* No need for a fence array for only one fence */ + if (count == 1) { + struct dma_fence *fence = fences[0]; + + kfree(fences); + return fence; + } + + array = dma_fence_array_create(count, fences, + dma_fence_context_alloc(1), 0, any); + + if (array) + return &array->base; + + ret = -ENOMEM; +err: + for (i = 0; i < count; i++) { + if (!IS_ERR_OR_NULL(fences[i])) + dma_fence_put(fences[i]); + } + + kfree(fences); + return ERR_PTR(ret); +} + +void kgsl_timeline_destroy(struct kref *kref) +{ + struct kgsl_timeline *timeline = container_of(kref, + struct kgsl_timeline, ref); + + WARN_ON(!list_empty(&timeline->fences)); + + trace_kgsl_timeline_destroy(timeline->id); + + kfree(timeline); +} + +struct kgsl_timeline *kgsl_timeline_get(struct kgsl_timeline *timeline) +{ + if (timeline) { + if (!kref_get_unless_zero(&timeline->ref)) + return NULL; + } + + return timeline; +} + +static struct kgsl_timeline *kgsl_timeline_alloc(struct kgsl_device_private *dev_priv, + u64 initial) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_timeline *timeline; + int id; + + timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); + if (!timeline) + return ERR_PTR(-ENOMEM); + + idr_preload(GFP_KERNEL); + spin_lock(&device->timelines_lock); + /* Allocate the ID but don't attach the pointer just yet */ + id = idr_alloc(&device->timelines, NULL, 1, 0, GFP_NOWAIT); + spin_unlock(&device->timelines_lock); + idr_preload_end(); + + if (id < 0) { + kfree(timeline); + return ERR_PTR(id); + } + + timeline->context = dma_fence_context_alloc(1); + timeline->id = id; + INIT_LIST_HEAD(&timeline->fences); + timeline->value = initial; + timeline->dev_priv = dev_priv; + + snprintf((char *) timeline->name, sizeof(timeline->name), + "kgsl-sw-timeline-%d", id); + + trace_kgsl_timeline_alloc(id, initial); + + spin_lock_init(&timeline->lock); + spin_lock_init(&timeline->fence_lock); + + kref_init(&timeline->ref); + + return timeline; +} + +static struct kgsl_timeline_fence *to_timeline_fence(struct dma_fence *fence) +{ + return container_of(fence, struct kgsl_timeline_fence, base); +} + +static void timeline_fence_release(struct dma_fence *fence) +{ + struct kgsl_timeline_fence *f = to_timeline_fence(fence); + struct kgsl_timeline *timeline = f->timeline; + struct kgsl_timeline_fence *cur, *temp; + unsigned long flags; + + spin_lock_irqsave(&timeline->fence_lock, flags); + + /* If the fence is still on the active list, remove it */ + list_for_each_entry_safe(cur, temp, &timeline->fences, node) { + if (f != cur) + continue; + + list_del_init(&f->node); + break; + } + spin_unlock_irqrestore(&timeline->fence_lock, flags); + trace_kgsl_timeline_fence_release(f->timeline->id, fence->seqno); + log_kgsl_timeline_fence_release_event(f->timeline->id, fence->seqno); + + kgsl_timeline_put(f->timeline); + dma_fence_free(fence); +} + +static bool timeline_fence_signaled(struct dma_fence *fence) +{ + struct kgsl_timeline_fence *f = to_timeline_fence(fence); + + return !__dma_fence_is_later(fence->seqno, f->timeline->value, + fence->ops); +} + +static const char *timeline_get_driver_name(struct dma_fence *fence) +{ + return "kgsl-sw-timeline"; +} + +static const char *timeline_get_timeline_name(struct dma_fence *fence) +{ + struct kgsl_timeline_fence *f = to_timeline_fence(fence); + + return f->timeline->name; +} + +static void timeline_get_value_str(struct dma_fence *fence, + char *str, int size) +{ + struct kgsl_timeline_fence *f = to_timeline_fence(fence); + + snprintf(str, size, "%lld", f->timeline->value); +} + +static const struct dma_fence_ops timeline_fence_ops = { + .get_driver_name = timeline_get_driver_name, + .get_timeline_name = timeline_get_timeline_name, + .signaled = timeline_fence_signaled, + .release = timeline_fence_release, + .timeline_value_str = timeline_get_value_str, + .use_64bit_seqno = true, +}; + +static void kgsl_timeline_add_fence(struct kgsl_timeline *timeline, + struct kgsl_timeline_fence *fence) +{ + struct kgsl_timeline_fence *entry; + unsigned long flags; + + spin_lock_irqsave(&timeline->fence_lock, flags); + list_for_each_entry(entry, &timeline->fences, node) { + if (fence->base.seqno < entry->base.seqno) { + list_add_tail(&fence->node, &entry->node); + spin_unlock_irqrestore(&timeline->fence_lock, flags); + return; + } + } + + list_add_tail(&fence->node, &timeline->fences); + spin_unlock_irqrestore(&timeline->fence_lock, flags); +} + +void kgsl_timeline_signal(struct kgsl_timeline *timeline, u64 seqno) +{ + struct kgsl_timeline_fence *fence, *tmp; + struct list_head temp; + + INIT_LIST_HEAD(&temp); + + spin_lock_irq(&timeline->lock); + + if (seqno < timeline->value) + goto unlock; + + trace_kgsl_timeline_signal(timeline->id, seqno); + + timeline->value = seqno; + + spin_lock(&timeline->fence_lock); + list_for_each_entry_safe(fence, tmp, &timeline->fences, node) { + if (timeline_fence_signaled(&fence->base)) { + dma_fence_get(&fence->base); + list_move(&fence->node, &temp); + } + } + spin_unlock(&timeline->fence_lock); + + list_for_each_entry_safe(fence, tmp, &temp, node) { + dma_fence_signal_locked(&fence->base); + dma_fence_put(&fence->base); + } + +unlock: + spin_unlock_irq(&timeline->lock); +} + +struct dma_fence *kgsl_timeline_fence_alloc(struct kgsl_timeline *timeline, + u64 seqno) +{ + struct kgsl_timeline_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + fence->timeline = kgsl_timeline_get(timeline); + if (!fence->timeline) { + kfree(fence); + return ERR_PTR(-ENOENT); + } + + dma_fence_init(&fence->base, &timeline_fence_ops, + &timeline->lock, timeline->context, seqno); + + INIT_LIST_HEAD(&fence->node); + + if (!dma_fence_is_signaled(&fence->base)) + kgsl_timeline_add_fence(timeline, fence); + + trace_kgsl_timeline_fence_alloc(timeline->id, seqno); + log_kgsl_timeline_fence_alloc_event(timeline->id, seqno); + + return &fence->base; +} + +long kgsl_ioctl_timeline_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_timeline_create *param = data; + struct kgsl_timeline *timeline; + + timeline = kgsl_timeline_alloc(dev_priv, param->seqno); + if (IS_ERR(timeline)) + return PTR_ERR(timeline); + + /* Commit the pointer to the timeline in timeline idr */ + spin_lock(&device->timelines_lock); + idr_replace(&device->timelines, timeline, timeline->id); + param->id = timeline->id; + spin_unlock(&device->timelines_lock); + return 0; +} + +struct kgsl_timeline *kgsl_timeline_by_id(struct kgsl_device *device, + u32 id) +{ + struct kgsl_timeline *timeline; + int ret = 0; + + spin_lock(&device->timelines_lock); + timeline = idr_find(&device->timelines, id); + + if (timeline) + ret = kref_get_unless_zero(&timeline->ref); + spin_unlock(&device->timelines_lock); + + return ret ? timeline : NULL; +} + +long kgsl_ioctl_timeline_wait(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_timeline_wait *param = data; + struct dma_fence *fence; + unsigned long timeout; + signed long ret; + + if (param->flags != KGSL_TIMELINE_WAIT_ANY && + param->flags != KGSL_TIMELINE_WAIT_ALL) + return -EINVAL; + + if (param->padding) + return -EINVAL; + + fence = kgsl_timelines_to_fence_array(device, param->timelines, + param->count, param->timelines_size, + (param->flags == KGSL_TIMELINE_WAIT_ANY)); + + if (IS_ERR(fence)) + return PTR_ERR(fence); + + if (param->tv_sec >= KTIME_SEC_MAX) + timeout = MAX_SCHEDULE_TIMEOUT; + else { + ktime_t time = ktime_set(param->tv_sec, param->tv_nsec); + + timeout = msecs_to_jiffies(ktime_to_ms(time)); + } + + trace_kgsl_timeline_wait(param->flags, param->tv_sec, param->tv_nsec); + + /* secs.nsecs to jiffies */ + if (!timeout) + ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY; + else { + ret = dma_fence_wait_timeout(fence, true, timeout); + + if (!ret) + ret = -ETIMEDOUT; + else if (ret > 0) + ret = 0; + } + + dma_fence_put(fence); + + return ret; +} + +long kgsl_ioctl_timeline_query(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_timeline_val *param = data; + struct kgsl_timeline *timeline; + + if (param->padding) + return -EINVAL; + + timeline = kgsl_timeline_by_id(dev_priv->device, param->timeline); + if (!timeline) + return -ENODEV; + + param->seqno = timeline->value; + kgsl_timeline_put(timeline); + + return 0; +} + +long kgsl_ioctl_timeline_fence_get(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_timeline_fence_get *param = data; + struct kgsl_timeline *timeline; + struct sync_file *sync_file; + struct dma_fence *fence; + int ret = 0, fd; + + timeline = kgsl_timeline_by_id(device, param->timeline); + if (!timeline) + return -ENODEV; + + fence = kgsl_timeline_fence_alloc(timeline, param->seqno); + + if (IS_ERR(fence)) { + kgsl_timeline_put(timeline); + return PTR_ERR(fence); + } + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + ret = fd; + goto out; + } + + sync_file = sync_file_create(fence); + if (sync_file) { + fd_install(fd, sync_file->file); + param->handle = fd; + } else { + put_unused_fd(fd); + ret = -ENOMEM; + } + +out: + dma_fence_put(fence); + kgsl_timeline_put(timeline); + + return ret; +} + +long kgsl_ioctl_timeline_signal(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_timeline_signal *param = data; + u64 timelines; + int i; + + if (!param->timelines_size) { + param->timelines_size = sizeof(struct kgsl_timeline_val); + return -EAGAIN; + } + + if (!param->count) + return -EINVAL; + + timelines = param->timelines; + + for (i = 0; i < param->count; i++) { + struct kgsl_timeline *timeline; + struct kgsl_timeline_val val; + + if (copy_struct_from_user(&val, sizeof(val), + u64_to_user_ptr(timelines), param->timelines_size)) + return -EFAULT; + + if (val.padding) + return -EINVAL; + + timeline = kgsl_timeline_by_id(device, val.timeline); + if (!timeline) + return -ENODEV; + + kgsl_timeline_signal(timeline, val.seqno); + + kgsl_timeline_put(timeline); + + timelines += param->timelines_size; + } + + return 0; +} + +long kgsl_ioctl_timeline_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_timeline_fence *fence, *tmp; + struct kgsl_timeline *timeline; + struct list_head temp; + u32 *param = data; + + if (*param == 0) + return -ENODEV; + + spin_lock(&device->timelines_lock); + timeline = idr_find(&device->timelines, *param); + + if (timeline == NULL) { + spin_unlock(&device->timelines_lock); + return -ENODEV; + } + + /* + * Validate that the id given is owned by the dev_priv + * instance that is passed in. If not, abort. + */ + if (timeline->dev_priv != dev_priv) { + spin_unlock(&device->timelines_lock); + return -EINVAL; + } + + idr_remove(&device->timelines, timeline->id); + spin_unlock(&device->timelines_lock); + + INIT_LIST_HEAD(&temp); + + spin_lock(&timeline->fence_lock); + list_for_each_entry_safe(fence, tmp, &timeline->fences, node) + dma_fence_get(&fence->base); + list_replace_init(&timeline->fences, &temp); + spin_unlock(&timeline->fence_lock); + + spin_lock_irq(&timeline->lock); + list_for_each_entry_safe(fence, tmp, &temp, node) { + dma_fence_set_error(&fence->base, -ENOENT); + dma_fence_signal_locked(&fence->base); + dma_fence_put(&fence->base); + } + spin_unlock_irq(&timeline->lock); + + kgsl_timeline_put(timeline); + + return 0; +} diff --git a/kgsl_timeline.h b/kgsl_timeline.h new file mode 100644 index 0000000000..55223eaeab --- /dev/null +++ b/kgsl_timeline.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef __KGSL_TIMELINE_H +#define __KGSL_TIMELINE_H + +/** + * struct kgsl_timeline - Container for a timeline object + */ +struct kgsl_timeline { + /** @context: dma-fence timeline context */ + u64 context; + /** @id: Timeline identifier */ + int id; + /** @value: Current value of the timeline */ + u64 value; + /** @fence_lock: Lock to protect @fences */ + spinlock_t fence_lock; + /** @lock: Lock to use for locking each fence in @fences */ + spinlock_t lock; + /** @ref: Reference count for the struct */ + struct kref ref; + /** @fences: sorted list of active fences */ + struct list_head fences; + /** @name: Name of the timeline for debugging */ + const char name[32]; + /** @dev_priv: pointer to the owning device instance */ + struct kgsl_device_private *dev_priv; +}; + +/** + * kgsl_timeline_signal - Signal the timeline + * @timeline: Pointer to a timeline container + * @seqno: Seqeuence number to signal + * + * Advance @timeline to sequence number @seqno and signal any fences that might + * have expired. + */ +void kgsl_timeline_signal(struct kgsl_timeline *timeline, u64 seqno); + +/** + * kgsl_timeline_destroy - Timeline destroy callback + * @kref: Refcount pointer for the timeline + * + * Reference count callback for the timeline called when the all the object + * references have been released. + */ +void kgsl_timeline_destroy(struct kref *kref); + +/** + * kgsl_timeline_fence_alloc - Allocate a new fence on a timeline + * @timeline: Pointer to a timeline container + * @seqno: Sequence number for the new fence to wait for + * + * Create and return a new fence on the timeline that will expire when the + * timeline value is greater or equal to @seqno. + * Return: A pointer to the newly created fence + */ +struct dma_fence *kgsl_timeline_fence_alloc(struct kgsl_timeline *timeline, + u64 seqno); + +/** + * kgsl_timeline_by_id - Look up a timeline by an id + * @device: A KGSL device handle + * @id: Lookup identifier + * + * Find and return the timeline associated with identifer @id. + * Return: A pointer to a timeline or PTR_ERR() encoded error on failure. + */ +struct kgsl_timeline *kgsl_timeline_by_id(struct kgsl_device *device, + u32 id); + +/** + * kgsl_timeline_get - Get a reference to an existing timeline + * @timeline: Pointer to a timeline container + * + * Get a new reference to the timeline and return the pointer back to the user. + * Return: The pointer to the timeline or PTR_ERR encoded error on failure + */ +struct kgsl_timeline *kgsl_timeline_get(struct kgsl_timeline *timeline); + +/** + * kgsl_timeline_put - Release a reference to a timeline + * @timeline: Pointer to a timeline container + * + * Release a reference to a timeline and destroy it if there are no other + * references + */ +static inline void kgsl_timeline_put(struct kgsl_timeline *timeline) +{ + if (!IS_ERR_OR_NULL(timeline)) + kref_put(&timeline->ref, kgsl_timeline_destroy); +} + +/** + * kgsl_timelines_to_fence_array - Return a dma-fence array of timeline fences + * @device: A KGSL device handle + * @timelines: Userspace pointer to an array of &struct kgsl_timeline_val + * @count: Number of entries in @timelines + * @usize: Size of each entry in @timelines + * @any: True if the fence should expire on any timeline expiring or false if it + * should wait until all timelines have expired + * + * Give a list of &struct kgsl_timeline_val entries, create a dma-fence-array + * containing fences for each timeline/seqno pair. If @any is set the + * dma-fence-array will be set to expire if any of the encapsulated timeline + * fences expire. If @any is false, then the fence will wait for ALL of the + * encapsulated timeline fences to expire. + */ +struct dma_fence *kgsl_timelines_to_fence_array(struct kgsl_device *device, + u64 timelines, u32 count, u64 usize, bool any); + +#endif diff --git a/kgsl_trace.c b/kgsl_trace.c new file mode 100644 index 0000000000..6fb63d520a --- /dev/null +++ b/kgsl_trace.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2011,2013,2015,2019 The Linux Foundation. All rights reserved. + */ + +#include + +#include "kgsl_device.h" + +/* Instantiate tracepoints */ +#define CREATE_TRACE_POINTS +#include "kgsl_trace.h" diff --git a/kgsl_trace.h b/kgsl_trace.h new file mode 100644 index 0000000000..88fa471661 --- /dev/null +++ b/kgsl_trace.h @@ -0,0 +1,1522 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2011-2021, The Linux Foundation. All rights reserved. + */ + +#if !defined(_KGSL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _KGSL_TRACE_H + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kgsl +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE kgsl_trace + +#include + +#include "kgsl.h" +#include "kgsl_drawobj.h" +#include "kgsl_sharedmem.h" + +#define show_memtype(type) \ + __print_symbolic(type, \ + { KGSL_MEM_ENTRY_KERNEL, "gpumem" }, \ + { KGSL_MEM_ENTRY_USER, "usermem" }, \ + { KGSL_MEM_ENTRY_ION, "ion" }) + +#define show_constraint(type) \ + __print_symbolic(type, \ + { KGSL_CONSTRAINT_NONE, "None" }, \ + { KGSL_CONSTRAINT_PWRLEVEL, "Pwrlevel" }, \ + { KGSL_CONSTRAINT_L3_NONE, "L3_none" }, \ + { KGSL_CONSTRAINT_L3_PWRLEVEL, "L3_pwrlevel" }) + +struct kgsl_ringbuffer_issueibcmds; +struct kgsl_device_waittimestamp; + +/* + * Tracepoint for kgsl issue ib commands + */ +TRACE_EVENT(kgsl_issueibcmds, + + TP_PROTO(struct kgsl_device *device, + int drawctxt_id, + unsigned int numibs, + int timestamp, + int flags, + int result, + unsigned int type), + + TP_ARGS(device, drawctxt_id, numibs, timestamp, + flags, result, type), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, drawctxt_id) + __field(unsigned int, numibs) + __field(unsigned int, timestamp) + __field(unsigned int, flags) + __field(int, result) + __field(unsigned int, drawctxt_type) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->drawctxt_id = drawctxt_id; + __entry->numibs = numibs; + __entry->timestamp = timestamp; + __entry->flags = flags; + __entry->result = result; + __entry->drawctxt_type = type; + ), + + TP_printk( + "d_name=%s ctx=%u ib=0x0 numibs=%u ts=%u flags=%s result=%d type=%s", + __get_str(device_name), + __entry->drawctxt_id, + __entry->numibs, + __entry->timestamp, + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_DRAWOBJ_FLAGS) : "None", + __entry->result, + kgsl_context_type(__entry->drawctxt_type) + ) +); + +/* + * Tracepoint for kgsl readtimestamp + */ +TRACE_EVENT(kgsl_readtimestamp, + + TP_PROTO(struct kgsl_device *device, + unsigned int context_id, + unsigned int type, + unsigned int timestamp), + + TP_ARGS(device, context_id, type, timestamp), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, context_id) + __field(unsigned int, type) + __field(unsigned int, timestamp) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->context_id = context_id; + __entry->type = type; + __entry->timestamp = timestamp; + ), + + TP_printk( + "d_name=%s context_id=%u type=%u ts=%u", + __get_str(device_name), + __entry->context_id, + __entry->type, + __entry->timestamp + ) +); + +/* + * Tracepoint for kgsl waittimestamp entry + */ +TRACE_EVENT(kgsl_waittimestamp_entry, + + TP_PROTO(struct kgsl_device *device, + unsigned int context_id, + unsigned int curr_ts, + unsigned int wait_ts, + unsigned int timeout), + + TP_ARGS(device, context_id, curr_ts, wait_ts, timeout), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, context_id) + __field(unsigned int, curr_ts) + __field(unsigned int, wait_ts) + __field(unsigned int, timeout) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->context_id = context_id; + __entry->curr_ts = curr_ts; + __entry->wait_ts = wait_ts; + __entry->timeout = timeout; + ), + + TP_printk( + "d_name=%s ctx=%u curr_ts=%u ts=%u timeout=%u", + __get_str(device_name), + __entry->context_id, + __entry->curr_ts, + __entry->wait_ts, + __entry->timeout + ) +); + +/* + * Tracepoint for kgsl waittimestamp exit + */ +TRACE_EVENT(kgsl_waittimestamp_exit, + + TP_PROTO(struct kgsl_device *device, unsigned int curr_ts, + int result), + + TP_ARGS(device, curr_ts, result), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, curr_ts) + __field(int, result) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->curr_ts = curr_ts; + __entry->result = result; + ), + + TP_printk( + "d_name=%s curr_ts=%u result=%d", + __get_str(device_name), + __entry->curr_ts, + __entry->result + ) +); + +DECLARE_EVENT_CLASS(kgsl_pwr_template, + TP_PROTO(struct kgsl_device *device, bool on), + + TP_ARGS(device, on), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(bool, on) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->on = on; + ), + + TP_printk( + "d_name=%s flag=%s", + __get_str(device_name), + __entry->on ? "on" : "off" + ) +); + +DEFINE_EVENT(kgsl_pwr_template, kgsl_irq, + TP_PROTO(struct kgsl_device *device, bool on), + TP_ARGS(device, on) +); + +DEFINE_EVENT(kgsl_pwr_template, kgsl_bus, + TP_PROTO(struct kgsl_device *device, bool on), + TP_ARGS(device, on) +); + +DEFINE_EVENT(kgsl_pwr_template, kgsl_rail, + TP_PROTO(struct kgsl_device *device, bool on), + TP_ARGS(device, on) +); + +TRACE_EVENT(kgsl_clk, + + TP_PROTO(struct kgsl_device *device, bool on, + unsigned int freq), + + TP_ARGS(device, on, freq), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(bool, on) + __field(unsigned int, freq) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->on = on; + __entry->freq = freq; + ), + + TP_printk( + "d_name=%s flag=%s active_freq=%d", + __get_str(device_name), + __entry->on ? "on" : "off", + __entry->freq + ) +); + +TRACE_EVENT(kgsl_gmu_pwrlevel, + + TP_PROTO(unsigned long freq, + unsigned long prev_freq), + + TP_ARGS(freq, prev_freq), + + TP_STRUCT__entry( + __field(unsigned long, freq) + __field(unsigned long, prev_freq) + ), + + TP_fast_assign( + __entry->freq = freq; + __entry->prev_freq = prev_freq; + ), + + TP_printk( + "gmu_freq=%ld gmu_prev_freq=%ld", + __entry->freq, + __entry->prev_freq + ) +); + +TRACE_EVENT(kgsl_pwrlevel, + + TP_PROTO(struct kgsl_device *device, + unsigned int pwrlevel, + unsigned int freq, + unsigned int prev_pwrlevel, + unsigned int prev_freq), + + TP_ARGS(device, pwrlevel, freq, prev_pwrlevel, prev_freq), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, pwrlevel) + __field(unsigned int, freq) + __field(unsigned int, prev_pwrlevel) + __field(unsigned int, prev_freq) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->pwrlevel = pwrlevel; + __entry->freq = freq; + __entry->prev_pwrlevel = prev_pwrlevel; + __entry->prev_freq = prev_freq; + ), + + TP_printk( + "d_name=%s pwrlevel=%d freq=%d prev_pwrlevel=%d prev_freq=%d", + __get_str(device_name), + __entry->pwrlevel, + __entry->freq, + __entry->prev_pwrlevel, + __entry->prev_freq + ) +); + +/* + * Tracepoint for kgsl gpu_frequency + */ +TRACE_EVENT(gpu_frequency, + TP_PROTO(unsigned int gpu_freq, unsigned int gpu_id), + TP_ARGS(gpu_freq, gpu_id), + TP_STRUCT__entry( + __field(unsigned int, gpu_freq) + __field(unsigned int, gpu_id) + ), + TP_fast_assign( + __entry->gpu_freq = gpu_freq; + __entry->gpu_id = gpu_id; + ), + + TP_printk("gpu_freq=%luKhz gpu_id=%lu", + (unsigned long)__entry->gpu_freq, + (unsigned long)__entry->gpu_id) +); + +TRACE_EVENT(kgsl_buslevel, + + TP_PROTO(struct kgsl_device *device, unsigned int pwrlevel, + unsigned int bus), + + TP_ARGS(device, pwrlevel, bus), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, pwrlevel) + __field(unsigned int, bus) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->pwrlevel = pwrlevel; + __entry->bus = bus; + ), + + TP_printk( + "d_name=%s pwrlevel=%d bus=%d", + __get_str(device_name), + __entry->pwrlevel, + __entry->bus + ) +); + +TRACE_EVENT(kgsl_gpubusy, + TP_PROTO(struct kgsl_device *device, unsigned int busy, + unsigned int elapsed), + + TP_ARGS(device, busy, elapsed), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, busy) + __field(unsigned int, elapsed) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->busy = busy; + __entry->elapsed = elapsed; + ), + + TP_printk( + "d_name=%s busy=%u elapsed=%d", + __get_str(device_name), + __entry->busy, + __entry->elapsed + ) +); + +TRACE_EVENT(kgsl_pwrstats, + TP_PROTO(struct kgsl_device *device, s64 time, + struct kgsl_power_stats *pstats, u32 ctxt_count), + + TP_ARGS(device, time, pstats, ctxt_count), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(s64, total_time) + __field(u64, busy_time) + __field(u64, ram_time) + __field(u64, ram_wait) + __field(u32, context_count) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->total_time = time; + __entry->busy_time = pstats->busy_time; + __entry->ram_time = pstats->ram_time; + __entry->ram_wait = pstats->ram_wait; + __entry->context_count = ctxt_count; + ), + + TP_printk( + "d_name=%s total=%lld busy=%lld ram_time=%lld ram_wait=%lld context_count=%u", + __get_str(device_name), __entry->total_time, __entry->busy_time, + __entry->ram_time, __entry->ram_wait, __entry->context_count + ) +); + +DECLARE_EVENT_CLASS(kgsl_pwrstate_template, + TP_PROTO(struct kgsl_device *device, unsigned int state), + + TP_ARGS(device, state), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, state) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->state = state; + ), + + TP_printk( + "d_name=%s state=%s", + __get_str(device_name), + kgsl_pwrstate_to_str(__entry->state) + ) +); + +DEFINE_EVENT(kgsl_pwrstate_template, kgsl_pwr_set_state, + TP_PROTO(struct kgsl_device *device, unsigned int state), + TP_ARGS(device, state) +); + +DEFINE_EVENT(kgsl_pwrstate_template, kgsl_pwr_request_state, + TP_PROTO(struct kgsl_device *device, unsigned int state), + TP_ARGS(device, state) +); + +TRACE_EVENT(kgsl_mem_alloc, + + TP_PROTO(struct kgsl_mem_entry *mem_entry), + + TP_ARGS(mem_entry), + + TP_STRUCT__entry( + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __field(unsigned int, tgid) + __array(char, usage, 16) + __field(unsigned int, id) + __field(uint64_t, flags) + ), + + TP_fast_assign( + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + __entry->tgid = pid_nr(mem_entry->priv->pid); + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + __entry->flags = mem_entry->memdesc.flags; + ), + + TP_printk( + "gpuaddr=0x%llx size=%llu tgid=%u usage=%s id=%u flags=0x%llx", + __entry->gpuaddr, __entry->size, __entry->tgid, + __entry->usage, __entry->id, __entry->flags + ) +); + +TRACE_EVENT(kgsl_mem_mmap, + + TP_PROTO(struct kgsl_mem_entry *mem_entry, unsigned long useraddr), + + TP_ARGS(mem_entry, useraddr), + + TP_STRUCT__entry( + __field(unsigned long, useraddr) + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __array(char, usage, 16) + __field(unsigned int, id) + __field(uint64_t, flags) + ), + + TP_fast_assign( + __entry->useraddr = useraddr; + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + __entry->flags = mem_entry->memdesc.flags; + ), + + TP_printk( + "useraddr=0x%lx gpuaddr=0x%llx size=%llu usage=%s id=%u flags=0x%llx", + __entry->useraddr, __entry->gpuaddr, __entry->size, + __entry->usage, __entry->id, __entry->flags + ) +); + +TRACE_EVENT(kgsl_mem_unmapped_area_collision, + + TP_PROTO(struct kgsl_mem_entry *mem_entry, + unsigned long addr, + unsigned long len), + + TP_ARGS(mem_entry, addr, len), + + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned long, addr) + __field(unsigned long, len) + ), + + TP_fast_assign( + __entry->id = mem_entry->id; + __entry->len = len; + __entry->addr = addr; + ), + + TP_printk( + "id=%u len=%lu addr=0x%lx", + __entry->id, __entry->len, __entry->addr + ) +); + +TRACE_EVENT(kgsl_mem_map, + + TP_PROTO(struct kgsl_mem_entry *mem_entry, int fd), + + TP_ARGS(mem_entry, fd), + + TP_STRUCT__entry( + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __field(int, fd) + __field(int, type) + __field(unsigned int, tgid) + __array(char, usage, 16) + __field(unsigned int, id) + ), + + TP_fast_assign( + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + __entry->fd = fd; + __entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc); + __entry->tgid = pid_nr(mem_entry->priv->pid); + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + ), + + TP_printk( + "gpuaddr=0x%llx size=%llu type=%s fd=%d tgid=%u usage=%s id=%u", + __entry->gpuaddr, __entry->size, + show_memtype(__entry->type), + __entry->fd, __entry->tgid, + __entry->usage, __entry->id + ) +); + +TRACE_EVENT(kgsl_mem_free, + + TP_PROTO(struct kgsl_mem_entry *mem_entry), + + TP_ARGS(mem_entry), + + TP_STRUCT__entry( + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __field(int, type) + __field(int, fd) + __field(unsigned int, tgid) + __array(char, usage, 16) + __field(unsigned int, id) + ), + + TP_fast_assign( + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + __entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc); + __entry->tgid = pid_nr(mem_entry->priv->pid); + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + ), + + TP_printk( + "gpuaddr=0x%llx size=%llu type=%s tgid=%u usage=%s id=%u", + __entry->gpuaddr, __entry->size, + show_memtype(__entry->type), + __entry->tgid, __entry->usage, __entry->id + ) +); + +TRACE_EVENT(kgsl_mem_sync_cache, + + TP_PROTO(struct kgsl_mem_entry *mem_entry, uint64_t offset, + uint64_t length, unsigned int op), + + TP_ARGS(mem_entry, offset, length, op), + + TP_STRUCT__entry( + __field(uint64_t, gpuaddr) + __array(char, usage, 16) + __field(unsigned int, tgid) + __field(unsigned int, id) + __field(unsigned int, op) + __field(uint64_t, offset) + __field(uint64_t, length) + ), + + TP_fast_assign( + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->tgid = pid_nr(mem_entry->priv->pid); + __entry->id = mem_entry->id; + __entry->op = op; + __entry->offset = offset; + __entry->length = (length == 0) ? + mem_entry->memdesc.size : length; + ), + + TP_printk( + "gpuaddr=0x%llx size=%llu tgid=%u usage=%s id=%u op=%c%c offset=%llu", + __entry->gpuaddr, __entry->length, + __entry->tgid, __entry->usage, __entry->id, + (__entry->op & KGSL_GPUMEM_CACHE_CLEAN) ? 'c' : '.', + (__entry->op & KGSL_GPUMEM_CACHE_INV) ? 'i' : '.', + __entry->offset + ) +); + +TRACE_EVENT(kgsl_mem_add_bind_range, + TP_PROTO(struct kgsl_mem_entry *target, u64 offset, + struct kgsl_mem_entry *child, u64 length), + + TP_ARGS(target, offset, child, length), + + TP_STRUCT__entry( + __field(u64, gpuaddr) + __field(u32, target) + __field(u32, tgid) + __field(u32, child) + __field(u64, length) + ), + + TP_fast_assign( + __entry->gpuaddr = target->memdesc.gpuaddr + offset; + __entry->tgid = pid_nr(target->priv->pid); + __entry->target = target->id; + __entry->child = child->id; + __entry->length = length; + ), + + TP_printk( + "tgid=%u target=%d gpuaddr=%llx length %llu child=%d", + __entry->tgid, __entry->target, __entry->gpuaddr, + __entry->length, __entry->child + ) +); + +TRACE_EVENT(kgsl_mem_remove_bind_range, + TP_PROTO(struct kgsl_mem_entry *target, u64 offset, + struct kgsl_mem_entry *child, u64 length), + + TP_ARGS(target, offset, child, length), + + TP_STRUCT__entry( + __field(u64, gpuaddr) + __field(u32, target) + __field(u32, tgid) + __field(u32, child) + __field(u64, length) + ), + + TP_fast_assign( + __entry->gpuaddr = target->memdesc.gpuaddr + offset; + __entry->tgid = pid_nr(target->priv->pid); + __entry->target = target->id; + __entry->child = child->id; + __entry->length = length; + ), + + TP_printk( + "tgid=%u target=%d gpuaddr=%llx length %llu child=%d", + __entry->tgid, __entry->target, __entry->gpuaddr, + __entry->length, __entry->child + ) +); + +TRACE_EVENT(kgsl_mem_sync_full_cache, + + TP_PROTO(unsigned int num_bufs, uint64_t bulk_size), + TP_ARGS(num_bufs, bulk_size), + + TP_STRUCT__entry( + __field(unsigned int, num_bufs) + __field(uint64_t, bulk_size) + ), + + TP_fast_assign( + __entry->num_bufs = num_bufs; + __entry->bulk_size = bulk_size; + ), + + TP_printk( + "num_bufs=%u bulk_size=%llu op=ci", + __entry->num_bufs, __entry->bulk_size + ) +); + +DECLARE_EVENT_CLASS(kgsl_mem_timestamp_template, + + TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry, + unsigned int id, unsigned int curr_ts, unsigned int free_ts), + + TP_ARGS(device, mem_entry, id, curr_ts, free_ts), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __field(int, type) + __array(char, usage, 16) + __field(unsigned int, id) + __field(unsigned int, drawctxt_id) + __field(unsigned int, curr_ts) + __field(unsigned int, free_ts) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + __entry->drawctxt_id = id; + __entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc); + __entry->curr_ts = curr_ts; + __entry->free_ts = free_ts; + ), + + TP_printk( + "d_name=%s gpuaddr=0x%llx size=%llu type=%s usage=%s id=%u ctx=%u curr_ts=%u free_ts=%u", + __get_str(device_name), + __entry->gpuaddr, + __entry->size, + show_memtype(__entry->type), + __entry->usage, + __entry->id, + __entry->drawctxt_id, + __entry->curr_ts, + __entry->free_ts + ) +); + +DEFINE_EVENT(kgsl_mem_timestamp_template, kgsl_mem_timestamp_queue, + TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry, + unsigned int id, unsigned int curr_ts, unsigned int free_ts), + TP_ARGS(device, mem_entry, id, curr_ts, free_ts) +); + +DEFINE_EVENT(kgsl_mem_timestamp_template, kgsl_mem_timestamp_free, + TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry, + unsigned int id, unsigned int curr_ts, unsigned int free_ts), + TP_ARGS(device, mem_entry, id, curr_ts, free_ts) +); + +TRACE_EVENT(kgsl_context_create, + + TP_PROTO(struct kgsl_device *device, struct kgsl_context *context, + unsigned int flags), + + TP_ARGS(device, context, flags), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, id) + __field(unsigned int, flags) + __field(unsigned int, priority) + __field(unsigned int, type) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->id = context->id; + __entry->flags = flags & ~(KGSL_CONTEXT_PRIORITY_MASK | + KGSL_CONTEXT_TYPE_MASK); + __entry->priority = + (flags & KGSL_CONTEXT_PRIORITY_MASK) + >> KGSL_CONTEXT_PRIORITY_SHIFT; + __entry->type = + (flags & KGSL_CONTEXT_TYPE_MASK) + >> KGSL_CONTEXT_TYPE_SHIFT; + ), + + TP_printk( + "d_name=%s ctx=%u flags=%s priority=%u type=%s", + __get_str(device_name), __entry->id, + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_CONTEXT_FLAGS) : "None", + __entry->priority, + kgsl_context_type(__entry->type) + ) +); + +TRACE_EVENT(kgsl_context_detach, + + TP_PROTO(struct kgsl_device *device, struct kgsl_context *context), + + TP_ARGS(device, context), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, id) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->id = context->id; + ), + + TP_printk( + "d_name=%s ctx=%u", + __get_str(device_name), __entry->id + ) +); + +TRACE_EVENT(kgsl_context_destroy, + + TP_PROTO(struct kgsl_device *device, struct kgsl_context *context), + + TP_ARGS(device, context), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, id) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->id = context->id; + ), + + TP_printk( + "d_name=%s ctx=%u", + __get_str(device_name), __entry->id + ) +); + +TRACE_EVENT(kgsl_user_pwrlevel_constraint, + + TP_PROTO(struct kgsl_device *device, unsigned int id, unsigned int type, + unsigned int sub_type), + + TP_ARGS(device, id, type, sub_type), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, id) + __field(unsigned int, type) + __field(unsigned int, sub_type) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->id = id; + __entry->type = type; + __entry->sub_type = sub_type; + ), + + TP_printk( + "d_name=%s ctx=%u constraint_type=%s constraint_subtype=%s", + __get_str(device_name), __entry->id, + show_constraint(__entry->type), + __print_symbolic(__entry->sub_type, + { KGSL_CONSTRAINT_PWR_MIN, "Min" }, + { KGSL_CONSTRAINT_PWR_MAX, "Max" }) + ) +); + +TRACE_EVENT(kgsl_constraint, + + TP_PROTO(struct kgsl_device *device, unsigned int type, + unsigned int value, unsigned int on), + + TP_ARGS(device, type, value, on), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, type) + __field(unsigned int, value) + __field(unsigned int, on) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->type = type; + __entry->value = value; + __entry->on = on; + ), + + TP_printk( + "d_name=%s constraint_type=%s constraint_value=%u status=%s", + __get_str(device_name), + show_constraint(__entry->type), + __entry->value, + __entry->on ? "ON" : "OFF" + ) +); + +TRACE_EVENT(kgsl_mmu_pagefault, + + TP_PROTO(struct kgsl_device *device, unsigned long page, + unsigned int pt, const char *name, const char *op), + + TP_ARGS(device, page, pt, name, op), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned long, page) + __field(unsigned int, pt) + __string(name, name) + __string(op, op) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->page = page; + __entry->pt = pt; + __assign_str(name, name); + __assign_str(op, op); + ), + + TP_printk( + "d_name=%s page=0x%lx pt=%u op=%s name=%s", + __get_str(device_name), __entry->page, __entry->pt, + __get_str(op), __get_str(name) + ) +); + +TRACE_EVENT(kgsl_regwrite, + + TP_PROTO(unsigned int offset, unsigned int value), + + TP_ARGS(offset, value), + + TP_STRUCT__entry( + __string(device_name, "kgsl-3d0") + __field(unsigned int, offset) + __field(unsigned int, value) + ), + + TP_fast_assign( + __assign_str(device_name, "kgsl-3d0"); + __entry->offset = offset; + __entry->value = value; + ), + + TP_printk( + "d_name=%s reg=0x%x value=0x%x", + __get_str(device_name), __entry->offset, __entry->value + ) +); + +TRACE_EVENT(kgsl_register_event, + TP_PROTO(unsigned int id, unsigned int timestamp, void *func), + TP_ARGS(id, timestamp, func), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(void *, func) + ), + TP_fast_assign( + __entry->id = id; + __entry->timestamp = timestamp; + __entry->func = func; + ), + TP_printk( + "ctx=%u ts=%u cb=%pS", + __entry->id, __entry->timestamp, __entry->func) +); + +TRACE_EVENT(kgsl_fire_event, + TP_PROTO(unsigned int id, unsigned int ts, + unsigned int type, unsigned int age, void *func), + TP_ARGS(id, ts, type, age, func), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, ts) + __field(unsigned int, type) + __field(unsigned int, age) + __field(void *, func) + ), + TP_fast_assign( + __entry->id = id; + __entry->ts = ts; + __entry->type = type; + __entry->age = age; + __entry->func = func; + ), + TP_printk( + "ctx=%u ts=%u type=%s age=%u cb=%pS", + __entry->id, __entry->ts, + __print_symbolic(__entry->type, + { KGSL_EVENT_RETIRED, "retired" }, + { KGSL_EVENT_CANCELLED, "cancelled" }), + __entry->age, __entry->func) +); + +TRACE_EVENT(kgsl_active_count, + + TP_PROTO(struct kgsl_device *device, unsigned long ip), + + TP_ARGS(device, ip), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, count) + __field(unsigned long, ip) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->count = atomic_read(&device->active_cnt); + __entry->ip = ip; + ), + + TP_printk( + "d_name=%s active_cnt=%u func=%ps", + __get_str(device_name), __entry->count, (void *) __entry->ip + ) +); + +TRACE_EVENT(kgsl_pagetable_destroy, + TP_PROTO(u64 ptbase, unsigned int name), + TP_ARGS(ptbase, name), + TP_STRUCT__entry( + __field(u64, ptbase) + __field(unsigned int, name) + ), + TP_fast_assign( + __entry->ptbase = ptbase; + __entry->name = name; + ), + TP_printk("ptbase=%llx name=%u", __entry->ptbase, __entry->name) +); + +DECLARE_EVENT_CLASS(syncpoint_timestamp_template, + TP_PROTO(struct kgsl_drawobj_sync *syncobj, + struct kgsl_context *context, + unsigned int timestamp), + TP_ARGS(syncobj, context, timestamp), + TP_STRUCT__entry( + __field(unsigned int, syncobj_context_id) + __field(unsigned int, context_id) + __field(unsigned int, timestamp) + ), + TP_fast_assign( + __entry->syncobj_context_id = syncobj->base.context->id; + __entry->context_id = context->id; + __entry->timestamp = timestamp; + ), + TP_printk("ctx=%d sync ctx=%d ts=%d", + __entry->syncobj_context_id, __entry->context_id, + __entry->timestamp) +); + +DEFINE_EVENT(syncpoint_timestamp_template, syncpoint_timestamp, + TP_PROTO(struct kgsl_drawobj_sync *syncobj, + struct kgsl_context *context, + unsigned int timestamp), + TP_ARGS(syncobj, context, timestamp) +); + +DEFINE_EVENT(syncpoint_timestamp_template, syncpoint_timestamp_expire, + TP_PROTO(struct kgsl_drawobj_sync *syncobj, + struct kgsl_context *context, + unsigned int timestamp), + TP_ARGS(syncobj, context, timestamp) +); + +DECLARE_EVENT_CLASS(syncpoint_fence_template, + TP_PROTO(struct kgsl_drawobj_sync *syncobj, char *name), + TP_ARGS(syncobj, name), + TP_STRUCT__entry( + __string(fence_name, name) + __field(unsigned int, syncobj_context_id) + ), + TP_fast_assign( + __entry->syncobj_context_id = syncobj->base.context->id; + __assign_str(fence_name, name); + ), + TP_printk("ctx=%d fence=%s", + __entry->syncobj_context_id, __get_str(fence_name)) +); + +DEFINE_EVENT(syncpoint_fence_template, syncpoint_fence, + TP_PROTO(struct kgsl_drawobj_sync *syncobj, char *name), + TP_ARGS(syncobj, name) +); + +DEFINE_EVENT(syncpoint_fence_template, syncpoint_fence_expire, + TP_PROTO(struct kgsl_drawobj_sync *syncobj, char *name), + TP_ARGS(syncobj, name) +); + +TRACE_EVENT(kgsl_msg, + TP_PROTO(const char *msg), + TP_ARGS(msg), + TP_STRUCT__entry( + __string(msg, msg) + ), + TP_fast_assign( + __assign_str(msg, msg); + ), + TP_printk( + "%s", __get_str(msg) + ) +); + +TRACE_EVENT(kgsl_clock_throttling, + TP_PROTO( + int idle_10pct, + int crc_50pct, + int crc_more50pct, + int crc_less50pct, + int64_t adj + ), + TP_ARGS( + idle_10pct, + crc_50pct, + crc_more50pct, + crc_less50pct, + adj + ), + TP_STRUCT__entry( + __field(int, idle_10pct) + __field(int, crc_50pct) + __field(int, crc_more50pct) + __field(int, crc_less50pct) + __field(int64_t, adj) + ), + TP_fast_assign( + __entry->idle_10pct = idle_10pct; + __entry->crc_50pct = crc_50pct; + __entry->crc_more50pct = crc_more50pct; + __entry->crc_less50pct = crc_less50pct; + __entry->adj = adj; + ), + TP_printk("idle_10=%d crc_50=%d crc_more50=%d crc_less50=%d adj=%lld", + __entry->idle_10pct, __entry->crc_50pct, __entry->crc_more50pct, + __entry->crc_less50pct, __entry->adj + ) +); + +TRACE_EVENT(kgsl_bcl_clock_throttling, + TP_PROTO( + int crc_25pct, + int crc_58pct, + int crc_75pct + ), + TP_ARGS( + crc_25pct, + crc_58pct, + crc_75pct + ), + TP_STRUCT__entry( + __field(int, crc_25pct) + __field(int, crc_58pct) + __field(int, crc_75pct) + ), + TP_fast_assign( + __entry->crc_25pct = crc_25pct; + __entry->crc_58pct = crc_58pct; + __entry->crc_75pct = crc_75pct; + ), + TP_printk("crc_25=%d crc_58=%d crc_75=%d", + __entry->crc_25pct, __entry->crc_58pct, + __entry->crc_75pct + ) +); + +DECLARE_EVENT_CLASS(gmu_oob_template, + TP_PROTO(unsigned int mask), + TP_ARGS(mask), + TP_STRUCT__entry( + __field(unsigned int, mask) + ), + TP_fast_assign( + __entry->mask = mask; + ), + TP_printk("mask=0x%08x", __entry->mask) +); + +DEFINE_EVENT(gmu_oob_template, kgsl_gmu_oob_set, + TP_PROTO(unsigned int mask), + TP_ARGS(mask) +); + +DEFINE_EVENT(gmu_oob_template, kgsl_gmu_oob_clear, + TP_PROTO(unsigned int mask), + TP_ARGS(mask) +); + +DECLARE_EVENT_CLASS(hfi_msg_template, + TP_PROTO(unsigned int id, unsigned int size, unsigned int seqnum), + TP_ARGS(id, size, seqnum), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, size) + __field(unsigned int, seq) + ), + TP_fast_assign( + __entry->id = id; + __entry->size = size; + __entry->seq = seqnum; + ), + TP_printk("id=0x%x size=0x%x seqnum=0x%x", + __entry->id, __entry->size, __entry->seq) +); + +DEFINE_EVENT(hfi_msg_template, kgsl_hfi_send, + TP_PROTO(unsigned int id, unsigned int size, unsigned int seqnum), + TP_ARGS(id, size, seqnum) +); + +DEFINE_EVENT(hfi_msg_template, kgsl_hfi_receive, + TP_PROTO(unsigned int id, unsigned int size, unsigned int seqnum), + TP_ARGS(id, size, seqnum) +); + +TRACE_EVENT(kgsl_timeline_alloc, + TP_PROTO( + u32 id, + u64 seqno + ), + TP_ARGS( + id, + seqno + ), + TP_STRUCT__entry( + __field(u32, id) + __field(u64, seqno) + ), + TP_fast_assign( + __entry->id = id; + __entry->seqno = seqno; + ), + TP_printk("id=%u initial=%llu", + __entry->id, __entry->seqno + ) +); + +TRACE_EVENT(kgsl_timeline_destroy, + TP_PROTO( + u32 id + ), + TP_ARGS( + id + ), + TP_STRUCT__entry( + __field(u32, id) + ), + TP_fast_assign( + __entry->id = id; + ), + TP_printk("id=%u", + __entry->id + ) +); + + +TRACE_EVENT(kgsl_timeline_signal, + TP_PROTO( + u32 id, + u64 seqno + ), + TP_ARGS( + id, + seqno + ), + TP_STRUCT__entry( + __field(u32, id) + __field(u64, seqno) + ), + TP_fast_assign( + __entry->id = id; + __entry->seqno = seqno; + ), + TP_printk("id=%u seqno=%llu", + __entry->id, __entry->seqno + ) +); + +TRACE_EVENT(kgsl_timeline_fence_alloc, + TP_PROTO( + u32 timeline, + u64 seqno + ), + TP_ARGS( + timeline, + seqno + ), + TP_STRUCT__entry( + __field(u32, timeline) + __field(u64, seqno) + ), + TP_fast_assign( + __entry->timeline = timeline; + __entry->seqno = seqno; + ), + TP_printk("timeline=%u seqno=%llu", + __entry->timeline, __entry->seqno + ) +); + +TRACE_EVENT(kgsl_timeline_fence_release, + TP_PROTO( + u32 timeline, + u64 seqno + ), + TP_ARGS( + timeline, + seqno + ), + TP_STRUCT__entry( + __field(u32, timeline) + __field(u64, seqno) + ), + TP_fast_assign( + __entry->timeline = timeline; + __entry->seqno = seqno; + ), + TP_printk("timeline=%u seqno=%llu", + __entry->timeline, __entry->seqno + ) +); + + +TRACE_EVENT(kgsl_timeline_wait, + TP_PROTO( + u32 flags, + s64 tv_sec, + s64 tv_nsec + ), + TP_ARGS( + flags, + tv_sec, + tv_nsec + ), + TP_STRUCT__entry( + __field(u32, flags) + __field(s64, tv_sec) + __field(s64, tv_nsec) + ), + TP_fast_assign( + __entry->flags = flags; + __entry->tv_sec = tv_sec; + __entry->tv_nsec = tv_nsec; + ), + TP_printk("flags=0x%x tv_sec=%llu tv_nsec=%llu", + __entry->flags, __entry->tv_sec, __entry->tv_nsec + + ) +); + +TRACE_EVENT(kgsl_aux_command, + TP_PROTO(u32 drawctxt_id, u32 numcmds, u32 flags, u32 timestamp + ), + TP_ARGS(drawctxt_id, numcmds, flags, timestamp + ), + TP_STRUCT__entry( + __field(u32, drawctxt_id) + __field(u32, numcmds) + __field(u32, flags) + __field(u32, timestamp) + ), + TP_fast_assign( + __entry->drawctxt_id = drawctxt_id; + __entry->numcmds = numcmds; + __entry->flags = flags; + __entry->timestamp = timestamp; + ), + TP_printk("context=%u numcmds=%u flags=0x%x timestamp=%u", + __entry->drawctxt_id, __entry->numcmds, __entry->flags, + __entry->timestamp + ) +); + +TRACE_EVENT(kgsl_drawobj_timeline, + TP_PROTO(u32 timeline, u64 seqno + ), + TP_ARGS(timeline, seqno + ), + TP_STRUCT__entry( + __field(u32, timeline) + __field(u64, seqno) + ), + TP_fast_assign( + __entry->timeline = timeline; + __entry->seqno = seqno; + ), + TP_printk("timeline=%u seqno=%llu", + __entry->timeline, __entry->seqno + ) +); + +TRACE_EVENT(kgsl_thermal_constraint, + TP_PROTO( + s32 max_freq + ), + TP_ARGS( + max_freq + ), + TP_STRUCT__entry( + __field(s32, max_freq) + ), + TP_fast_assign( + __entry->max_freq = max_freq; + ), + TP_printk("Thermal max freq=%d", + __entry->max_freq + ) +); + +TRACE_EVENT(kgsl_pool_add_page, + TP_PROTO(int order, u32 count), + TP_ARGS(order, count), + TP_STRUCT__entry( + __field(int, order) + __field(u32, count) + ), + TP_fast_assign( + __entry->order = order; + __entry->count = count; + ), + TP_printk("order=%d count=%u", + __entry->order, __entry->count + ) +); + +TRACE_EVENT(kgsl_pool_get_page, + TP_PROTO(int order, u32 count), + TP_ARGS(order, count), + TP_STRUCT__entry( + __field(int, order) + __field(u32, count) + ), + TP_fast_assign( + __entry->order = order; + __entry->count = count; + ), + TP_printk("order=%d count=%u", + __entry->order, __entry->count + ) +); + +TRACE_EVENT(kgsl_pool_alloc_page_system, + TP_PROTO(int order), + TP_ARGS(order), + TP_STRUCT__entry( + __field(int, order) + ), + TP_fast_assign( + __entry->order = order; + ), + TP_printk("order=%d", + __entry->order + ) +); + +TRACE_EVENT(kgsl_pool_try_page_lower, + TP_PROTO(int order), + TP_ARGS(order), + TP_STRUCT__entry( + __field(int, order) + ), + TP_fast_assign( + __entry->order = order; + ), + TP_printk("order=%d", + __entry->order + ) +); + +TRACE_EVENT(kgsl_pool_free_page, + TP_PROTO(int order), + TP_ARGS(order), + TP_STRUCT__entry( + __field(int, order) + ), + TP_fast_assign( + __entry->order = order; + ), + TP_printk("order=%d", + __entry->order + ) +); + +#endif /* _KGSL_TRACE_H */ + +/* This part must be outside protection */ +#include diff --git a/kgsl_util.c b/kgsl_util.c new file mode 100644 index 0000000000..8ebbe349d9 --- /dev/null +++ b/kgsl_util.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "kgsl_util.h" + +bool kgsl_regulator_disable_wait(struct regulator *reg, u32 timeout) +{ + ktime_t tout = ktime_add_us(ktime_get(), timeout * 1000); + + if (IS_ERR_OR_NULL(reg)) + return true; + + regulator_disable(reg); + + for (;;) { + if (!regulator_is_enabled(reg)) + return true; + + if (ktime_compare(ktime_get(), tout) > 0) + return (!regulator_is_enabled(reg)); + + usleep_range((100 >> 2) + 1, 100); + } +} + +struct clk *kgsl_of_clk_by_name(struct clk_bulk_data *clks, int count, + const char *id) +{ + int i; + + for (i = 0; clks && i < count; i++) + if (!strcmp(clks[i].id, id)) + return clks[i].clk; + + return NULL; +} + +int kgsl_regulator_set_voltage(struct device *dev, + struct regulator *reg, u32 voltage) +{ + int ret; + + if (IS_ERR_OR_NULL(reg)) + return 0; + + ret = regulator_set_voltage(reg, voltage, INT_MAX); + if (ret) + dev_err(dev, "Regulator set voltage:%d failed:%d\n", voltage, ret); + + return ret; +} + +int kgsl_clk_set_rate(struct clk_bulk_data *clks, int num_clks, + const char *id, unsigned long rate) +{ + struct clk *clk; + + clk = kgsl_of_clk_by_name(clks, num_clks, id); + if (!clk) + return -ENODEV; + + return clk_set_rate(clk, rate); +} + +/* + * The PASID has stayed consistent across all targets thus far so we are + * cautiously optimistic that we can hard code it + */ +#define GPU_PASID 13 + +int kgsl_zap_shader_load(struct device *dev, const char *name) +{ + struct device_node *np, *mem_np; + const struct firmware *fw; + void *mem_region = NULL; + phys_addr_t mem_phys; + struct resource res; + const char *fwname; + ssize_t mem_size; + int ret; + + np = of_get_child_by_name(dev->of_node, "zap-shader"); + if (!np) { + dev_err(dev, "zap-shader node not found. Please update the device tree\n"); + return -ENODEV; + } + + mem_np = of_parse_phandle(np, "memory-region", 0); + of_node_put(np); + if (!mem_np) { + dev_err(dev, "Couldn't parse the mem-region from the zap-shader node\n"); + return -EINVAL; + } + + ret = of_address_to_resource(mem_np, 0, &res); + of_node_put(mem_np); + if (ret) + return ret; + + /* + * To avoid confusion we will keep the "legacy" naming scheme + * without the .mdt postfix (i.e. "a660_zap") outside of this function + * so we have to fix it up here + */ + fwname = kasprintf(GFP_KERNEL, "%s.mdt", name); + if (!fwname) + return -ENOMEM; + + ret = request_firmware(&fw, fwname, dev); + if (ret) { + dev_err(dev, "Couldn't load the firmware %s\n", fwname); + kfree(fwname); + return ret; + } + + mem_size = qcom_mdt_get_size(fw); + if (mem_size < 0) { + ret = mem_size; + goto out; + } + + if (mem_size > resource_size(&res)) { + ret = -E2BIG; + goto out; + } + + mem_phys = res.start; + + mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC); + if (!mem_region) { + ret = -ENOMEM; + goto out; + } + + ret = qcom_mdt_load(dev, fw, fwname, GPU_PASID, mem_region, + mem_phys, mem_size, NULL); + if (ret) { + dev_err(dev, "Error %d while loading the MDT\n", ret); + goto out; + } + + ret = qcom_scm_pas_auth_and_reset(GPU_PASID); + +out: + if (mem_region) + memunmap(mem_region); + + release_firmware(fw); + kfree(fwname); + return ret; +} + +int kgsl_hwlock(struct cpu_gpu_lock *lock) +{ + unsigned long timeout = jiffies + msecs_to_jiffies(1000); + + /* Indicate that the CPU wants the lock */ + lock->cpu_req = 1; + + /* post the request */ + wmb(); + + /* Wait for our turn */ + lock->turn = 0; + + /* Finish all memory transactions before moving on */ + mb(); + + /* + * Spin here while GPU ucode holds the lock, lock->gpu_req will + * be set to 0 after GPU ucode releases the lock. Maximum wait time + * is 1 second and this should be enough for GPU to release the lock. + */ + while (lock->gpu_req && lock->turn == 0) { + cpu_relax(); + /* Get the latest updates from GPU */ + rmb(); + + if (time_after(jiffies, timeout)) + break; + } + + if (lock->gpu_req && lock->turn == 0) + return -EBUSY; + + return 0; +} + +void kgsl_hwunlock(struct cpu_gpu_lock *lock) +{ + /* Make sure all writes are done before releasing the lock */ + wmb(); + lock->cpu_req = 0; +} + +void kgsl_add_to_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size) +{ + struct md_region md_entry = {0}; + int ret; + + if (!msm_minidump_enabled()) + return; + + scnprintf(md_entry.name, sizeof(md_entry.name), name); + md_entry.virt_addr = virt_addr; + md_entry.phys_addr = phy_addr; + md_entry.size = size; + ret = msm_minidump_add_region(&md_entry); + if (ret < 0 && ret != -EEXIST) + pr_err("kgsl: Failed to register %s with minidump:%d\n", name, ret); + +} + +void kgsl_remove_from_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size) +{ + struct md_region md_entry = {0}; + int ret; + + if (!msm_minidump_enabled()) + return; + + scnprintf(md_entry.name, sizeof(md_entry.name), name); + md_entry.virt_addr = virt_addr; + md_entry.phys_addr = phy_addr; + md_entry.size = size; + ret = msm_minidump_remove_region(&md_entry); + if (ret < 0 && ret != -ENOENT) + pr_err("kgsl: Failed to remove %s from minidump\n", name); +} + +int kgsl_add_va_to_minidump(struct device *dev, const char *name, void *ptr, + size_t size) +{ + struct va_md_entry entry = {0}; + int ret; + + scnprintf(entry.owner, sizeof(entry.owner), name); + entry.vaddr = (u64)(ptr); + entry.size = size; + ret = qcom_va_md_add_region(&entry); + if (ret < 0) + dev_err(dev, "Failed to register %s with va_minidump: %d\n", name, + ret); + + return ret; +} + +static int kgsl_add_driver_data_to_va_minidump(struct kgsl_device *device) +{ + int ret; + char name[32]; + struct kgsl_pagetable *pt; + struct adreno_context *ctxt; + struct kgsl_process_private *p; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + ret = kgsl_add_va_to_minidump(device->dev, KGSL_DRIVER, + (void *)(&kgsl_driver), sizeof(struct kgsl_driver)); + if (ret) + return ret; + + ret = kgsl_add_va_to_minidump(device->dev, KGSL_SCRATCH_ENTRY, + device->scratch->hostptr, device->scratch->size); + if (ret) + return ret; + + ret = kgsl_add_va_to_minidump(device->dev, KGSL_MEMSTORE_ENTRY, + device->memstore->hostptr, device->memstore->size); + if (ret) + return ret; + + spin_lock(&adreno_dev->active_list_lock); + list_for_each_entry(ctxt, &adreno_dev->active_list, active_node) { + snprintf(name, sizeof(name), "kgsl_adreno_ctx_%d", ctxt->base.id); + ret = kgsl_add_va_to_minidump(device->dev, name, + (void *)(ctxt), sizeof(struct adreno_context)); + if (ret) + break; + } + spin_unlock(&adreno_dev->active_list_lock); + + read_lock(&kgsl_driver.proclist_lock); + list_for_each_entry(p, &kgsl_driver.process_list, list) { + snprintf(name, sizeof(name), "kgsl_proc_priv_%d", pid_nr(p->pid)); + ret = kgsl_add_va_to_minidump(device->dev, name, + (void *)(p), sizeof(struct kgsl_process_private)); + if (ret) + break; + } + read_unlock(&kgsl_driver.proclist_lock); + + spin_lock(&kgsl_driver.ptlock); + list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) { + snprintf(name, sizeof(name), "kgsl_pgtable_%d", pt->name); + ret = kgsl_add_va_to_minidump(device->dev, name, + (void *)(pt), sizeof(struct kgsl_pagetable)); + if (ret) + break; + } + spin_unlock(&kgsl_driver.ptlock); + + return ret; +} + +static int kgsl_va_minidump_callback(struct notifier_block *nb, + unsigned long action, void *unused) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(kgsl_driver.devp[0]); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (kgsl_add_driver_data_to_va_minidump(kgsl_driver.devp[0])) + return NOTIFY_BAD; + + if (gpudev->add_to_va_minidump(adreno_dev)) + return NOTIFY_BAD; + + return NOTIFY_OK; +} + +static struct notifier_block kgsl_va_minidump_nb = { + .priority = INT_MAX, + .notifier_call = kgsl_va_minidump_callback, +}; + +void kgsl_qcom_va_md_register(struct kgsl_device *device) +{ + if (!qcom_va_md_enabled()) + return; + + if (qcom_va_md_register("KGSL", &kgsl_va_minidump_nb)) + dev_err(device->dev, "Failed to register notifier with va_minidump\n"); +} diff --git a/kgsl_util.h b/kgsl_util.h new file mode 100644 index 0000000000..f2da379828 --- /dev/null +++ b/kgsl_util.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _KGSL_UTIL_H_ +#define _KGSL_UTIL_H_ + +#define KGSL_DRIVER "kgsl_driver" +#define KGSL_ADRENO_DEVICE "kgsl_adreno_device" +#define KGSL_A6XX_DEVICE "kgsl_a6xx_device" +#define KGSL_GEN7_DEVICE "kgsl_gen7_device" +#define KGSL_HWSCHED_DEVICE "kgsl_hwsched_device" + +#define KGSL_SCRATCH_ENTRY "kgsl_scratch" +#define KGSL_MEMSTORE_ENTRY "kgsl_memstore" +#define KGSL_GMU_LOG_ENTRY "kgsl_gmu_log" +#define KGSL_HFIMEM_ENTRY "kgsl_hfi_mem" +#define KGSL_GMU_DUMPMEM_ENTRY "kgsl_gmu_dump_mem" + +struct regulator; +struct clk_bulk_data; + +/** + * struct cpu_gpu_lock - CP spinlock structure for power up list + * @gpu_req: flag value set by CP + * @cpu_req: flag value set by KMD + * @turn: turn variable set by both CP and KMD + * @list_length: this tells CP the last dword in the list: + * 16 + (4 * (List_Length - 1)) + * @list_offset: this tells CP the start of preemption only list: + * 16 + (4 * List_Offset) + */ +struct cpu_gpu_lock { + u32 gpu_req; + u32 cpu_req; + u32 turn; + u16 list_length; + u16 list_offset; +}; + +/** + * kgsl_hwlock - Try to get the spinlock + * @lock: cpu_gpu_lock structure + * + * Spin while the GPU has the lock. + * + * Return: 0 if lock is successful, -EBUSY if timed out waiting for lock + */ +int kgsl_hwlock(struct cpu_gpu_lock *lock); + +/** + * kgsl_hwunlock - Release a previously grabbed lock + * @lock: cpu_gpu_lock structure + */ +void kgsl_hwunlock(struct cpu_gpu_lock *lock); + +/** + * kgsl_regulator_disable_wait - Disable a regulator and wait for it + * @reg: A &struct regulator handle + * @timeout: Time to wait (in milliseconds) + * + * Disable the regulator and wait @timeout milliseconds for it to enter the + * disabled state. + * + * Return: True if the regulator was disabled or false if it timed out + */ +bool kgsl_regulator_disable_wait(struct regulator *reg, u32 timeout); + +/** + * kgsl_of_clk_by_name - Return a clock device for a given name + * @clks: Pointer to an array of bulk clk data + * @count: Number of entries in the array + * @id: Name of the clock to search for + * + * Returns: A pointer to the clock device for the given name or NULL if not + * found + */ +struct clk *kgsl_of_clk_by_name(struct clk_bulk_data *clks, int count, + const char *id); +/** + * kgsl_regulator_set_voltage - Set voltage level for regulator + * @dev: A &struct device pointer + * @reg: A &struct regulator handle + * @voltage: Voltage value to set regulator + * + * Return: 0 on success and negative error on failure. + */ +int kgsl_regulator_set_voltage(struct device *dev, + struct regulator *reg, u32 voltage); + +/** + * kgsl_clk_set_rate - Set a clock to a given rate + * @clks: Pointer to an array of bulk clk data + * @count: Number of entries in the array + * @id: Name of the clock to search for + * @rate: Rate to st the clock to + * + * Return: 0 on success or negative error on failure + */ +int kgsl_clk_set_rate(struct clk_bulk_data *clks, int num_clks, + const char *id, unsigned long rate); + +/** + * kgsl_zap_shader_load - Load a zap shader + * @dev: Pointer to the struct device for the GPU platform device + * @name: Basename of the zap shader to load (without the postfix) + * + * Load and install the zap shader named @name. Name should be specified without + * the extension for example "a660_zap" instead of "a660_zap.mdt". + * + * Return: 0 on success or negative on failure + */ +int kgsl_zap_shader_load(struct device *dev, const char *name); + +/** + * kgsl_add_to_minidump - Add a physically contiguous section to minidump + * @name: Name of the section + * @virt_addr: Virtual address of the section + * @phy_addr: Physical address of the section + * @size: Size of the section + */ +void kgsl_add_to_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size); + +/** + * kgsl_remove_from_minidump - Remove a contiguous section from minidump + * @name: Name of the section + * @virt_addr: Virtual address of the section + * @phy_addr: Physical address of the section + * @size: Size of the section + */ +void kgsl_remove_from_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size); + +/** + * kgsl_add_va_to_minidump - Add a physically non-contiguous section to minidump + * @dev: Pointer to the struct device for the GPU platform device + * @name: Name of the section + * @ptr: Virtual address of the section + * @size: Size of the section + */ +int kgsl_add_va_to_minidump(struct device *dev, const char *name, void *ptr, + size_t size); + +/** + * kgsl_qcom_va_md_register - Register driver with va-minidump + * @device: Pointer to kgsl device + */ +void kgsl_qcom_va_md_register(struct kgsl_device *device); + +#endif diff --git a/kgsl_vbo.c b/kgsl_vbo.c new file mode 100644 index 0000000000..75959b1475 --- /dev/null +++ b/kgsl_vbo.c @@ -0,0 +1,627 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include "kgsl_device.h" +#include "kgsl_mmu.h" +#include "kgsl_sharedmem.h" +#include "kgsl_trace.h" + +struct kgsl_memdesc_bind_range { + struct kgsl_mem_entry *entry; + struct interval_tree_node range; +}; + +static struct kgsl_memdesc_bind_range *bind_to_range(struct interval_tree_node *node) +{ + return container_of(node, struct kgsl_memdesc_bind_range, range); +} + +static struct kgsl_memdesc_bind_range *bind_range_create(u64 start, u64 last, + struct kgsl_mem_entry *entry) +{ + struct kgsl_memdesc_bind_range *range = + kzalloc(sizeof(*range), GFP_KERNEL); + + if (!range) + return ERR_PTR(-ENOMEM); + + range->range.start = start; + range->range.last = last; + range->entry = kgsl_mem_entry_get(entry); + + if (!range->entry) { + kfree(range); + return ERR_PTR(-EINVAL); + } + + return range; +} + +static u64 bind_range_len(struct kgsl_memdesc_bind_range *range) +{ + return (range->range.last - range->range.start) + 1; +} + +void kgsl_memdesc_print_vbo_ranges(struct kgsl_mem_entry *entry, + struct seq_file *s) +{ + struct interval_tree_node *next; + struct kgsl_memdesc *memdesc = &entry->memdesc; + + if (!(memdesc->flags & KGSL_MEMFLAGS_VBO)) + return; + + /* + * We are called in an atomic context so try to get the mutex but if we + * don't then skip this item + */ + if (!mutex_trylock(&memdesc->ranges_lock)) + return; + + next = interval_tree_iter_first(&memdesc->ranges, 0, ~0UL); + while (next) { + struct kgsl_memdesc_bind_range *range = bind_to_range(next); + + seq_printf(s, "%5d %5d 0x%16.16lx-0x%16.16lx\n", + entry->id, range->entry->id, range->range.start, + range->range.last); + + next = interval_tree_iter_next(next, 0, ~0UL); + } + + mutex_unlock(&memdesc->ranges_lock); +} + +static void kgsl_memdesc_remove_range(struct kgsl_mem_entry *target, + u64 start, u64 last, struct kgsl_mem_entry *entry) +{ + struct interval_tree_node *node, *next; + struct kgsl_memdesc_bind_range *range; + struct kgsl_memdesc *memdesc = &target->memdesc; + + mutex_lock(&memdesc->ranges_lock); + + next = interval_tree_iter_first(&memdesc->ranges, start, last); + while (next) { + node = next; + range = bind_to_range(node); + next = interval_tree_iter_next(node, start, last); + + if (range->entry->id == entry->id) { + interval_tree_remove(node, &memdesc->ranges); + trace_kgsl_mem_remove_bind_range(target, + range->range.start, range->entry, + bind_range_len(range)); + + kgsl_mmu_unmap_range(memdesc->pagetable, + memdesc, range->range.start, bind_range_len(range)); + + kgsl_mmu_map_zero_page_to_range(memdesc->pagetable, + memdesc, range->range.start, bind_range_len(range)); + + kfree(range); + } + } + + mutex_unlock(&memdesc->ranges_lock); +} + +static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, + u64 start, u64 last, struct kgsl_mem_entry *entry, u64 offset) +{ + struct interval_tree_node *node, *next; + struct kgsl_memdesc *memdesc = &target->memdesc; + struct kgsl_memdesc_bind_range *range = + bind_range_create(start, last, entry); + + if (IS_ERR(range)) + return PTR_ERR(range); + + mutex_lock(&memdesc->ranges_lock); + + /* + * Unmap the range first. This increases the potential for a page fault + * but is safer in case something goes bad while updating the interval + * tree + */ + kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, start, + last - start + 1); + + next = interval_tree_iter_first(&memdesc->ranges, start, last); + + while (next) { + struct kgsl_memdesc_bind_range *cur; + + node = next; + cur = bind_to_range(node); + next = interval_tree_iter_next(node, start, last); + + trace_kgsl_mem_remove_bind_range(target, cur->range.start, + cur->entry, bind_range_len(cur)); + + interval_tree_remove(node, &memdesc->ranges); + + if (start <= cur->range.start) { + if (last >= cur->range.last) { + kgsl_mem_entry_put(cur->entry); + kfree(cur); + continue; + } + /* Adjust the start of the mapping */ + cur->range.start = last + 1; + /* And put it back into the tree */ + interval_tree_insert(node, &memdesc->ranges); + + trace_kgsl_mem_add_bind_range(target, + cur->range.start, cur->entry, bind_range_len(cur)); + } else { + if (last < cur->range.last) { + struct kgsl_memdesc_bind_range *temp; + + /* + * The range is split into two so make a new + * entry for the far side + */ + temp = bind_range_create(last + 1, cur->range.last, + cur->entry); + /* FIXME: Uhoh, this would be bad */ + BUG_ON(IS_ERR(temp)); + + interval_tree_insert(&temp->range, + &memdesc->ranges); + + trace_kgsl_mem_add_bind_range(target, + temp->range.start, + temp->entry, bind_range_len(temp)); + } + + cur->range.last = start - 1; + interval_tree_insert(node, &memdesc->ranges); + + trace_kgsl_mem_add_bind_range(target, cur->range.start, + cur->entry, bind_range_len(cur)); + } + } + + /* Add the new range */ + interval_tree_insert(&range->range, &memdesc->ranges); + + trace_kgsl_mem_add_bind_range(target, range->range.start, + range->entry, bind_range_len(range)); + mutex_unlock(&memdesc->ranges_lock); + + return kgsl_mmu_map_child(memdesc->pagetable, memdesc, start, + &entry->memdesc, offset, last - start + 1); +} + +static void kgsl_sharedmem_vbo_put_gpuaddr(struct kgsl_memdesc *memdesc) +{ + struct interval_tree_node *node, *next; + struct kgsl_memdesc_bind_range *range; + + /* Unmap the entire pagetable region */ + kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, + 0, memdesc->size); + + /* Put back the GPU address */ + kgsl_mmu_put_gpuaddr(memdesc->pagetable, memdesc); + + memdesc->gpuaddr = 0; + memdesc->pagetable = NULL; + + /* + * FIXME: do we have a use after free potential here? We might need to + * lock this and set a "do not update" bit + */ + + /* Now delete each range and release the mem entries */ + next = interval_tree_iter_first(&memdesc->ranges, 0, ~0UL); + + while (next) { + node = next; + range = bind_to_range(node); + next = interval_tree_iter_next(node, 0, ~0UL); + + interval_tree_remove(node, &memdesc->ranges); + kgsl_mem_entry_put(range->entry); + kfree(range); + } +} + +static struct kgsl_memdesc_ops kgsl_vbo_ops = { + .put_gpuaddr = kgsl_sharedmem_vbo_put_gpuaddr, +}; + +int kgsl_sharedmem_allocate_vbo(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags) +{ + size = PAGE_ALIGN(size); + + /* Make sure that VBOs are supported by the MMU */ + if (WARN_ON_ONCE(!kgsl_mmu_has_feature(device, + KGSL_MMU_SUPPORT_VBO))) + return -EOPNOTSUPP; + + kgsl_memdesc_init(device, memdesc, flags); + memdesc->priv = 0; + + memdesc->ops = &kgsl_vbo_ops; + memdesc->size = size; + + /* Set up the interval tree and lock */ + memdesc->ranges = RB_ROOT_CACHED; + mutex_init(&memdesc->ranges_lock); + + return 0; +} + +static bool kgsl_memdesc_check_range(struct kgsl_memdesc *memdesc, + u64 offset, u64 length) +{ + return ((offset < memdesc->size) && + (offset + length > offset) && + (offset + length) <= memdesc->size); +} + +static void kgsl_sharedmem_free_bind_op(struct kgsl_sharedmem_bind_op *op) +{ + int i; + + if (IS_ERR_OR_NULL(op)) + return; + + for (i = 0; i < op->nr_ops; i++) + kgsl_mem_entry_put(op->ops[i].entry); + + kgsl_mem_entry_put(op->target); + + kvfree(op->ops); + kfree(op); +} + +struct kgsl_sharedmem_bind_op * +kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, + u32 target_id, void __user *ranges, u32 ranges_nents, + u64 ranges_size) +{ + struct kgsl_sharedmem_bind_op *op; + struct kgsl_mem_entry *target; + int ret, i; + + /* There must be at least one defined operation */ + if (!ranges_nents) + return ERR_PTR(-EINVAL); + + /* Find the target memory entry */ + target = kgsl_sharedmem_find_id(private, target_id); + if (!target) + return ERR_PTR(-ENOENT); + + if (!(target->memdesc.flags & KGSL_MEMFLAGS_VBO)) { + kgsl_mem_entry_put(target); + return ERR_PTR(-EINVAL); + } + + /* Make a container for the bind operations */ + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + kgsl_mem_entry_put(target); + return ERR_PTR(-ENOMEM); + } + + /* + * Make an array for the individual operations. Use __GFP_NOWARN and + * __GFP_NORETRY to make sure a very large request quietly fails + */ + op->ops = kvcalloc(ranges_nents, sizeof(*op->ops), + GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (!op->ops) { + kfree(op); + kgsl_mem_entry_put(target); + return ERR_PTR(-ENOMEM); + } + + op->nr_ops = ranges_nents; + op->target = target; + + for (i = 0; i < ranges_nents; i++) { + struct kgsl_gpumem_bind_range range; + struct kgsl_mem_entry *entry; + u32 size; + + size = min_t(u32, sizeof(range), ranges_size); + + ret = -EINVAL; + + if (copy_from_user(&range, ranges, size)) { + ret = -EFAULT; + goto err; + } + + /* The offset must be page aligned */ + if (!PAGE_ALIGNED(range.target_offset)) + goto err; + + /* The length of the operation must be aligned and non zero */ + if (!range.length || !PAGE_ALIGNED(range.length)) + goto err; + + /* Make sure the range fits in the target */ + if (!kgsl_memdesc_check_range(&target->memdesc, + range.target_offset, range.length)) + goto err; + + /* Get the child object */ + op->ops[i].entry = kgsl_sharedmem_find_id(private, + range.child_id); + entry = op->ops[i].entry; + if (!entry) { + ret = -ENOENT; + goto err; + } + + /* Make sure the child is not a VBO */ + if ((entry->memdesc.flags & KGSL_MEMFLAGS_VBO)) { + ret = -EINVAL; + goto err; + } + + /* + * Make sure that only secure children are mapped in secure VBOs + * and vice versa + */ + if ((target->memdesc.flags & KGSL_MEMFLAGS_SECURE) != + (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE)) { + ret = -EPERM; + goto err; + } + + /* Make sure the range operation is valid */ + if (range.op != KGSL_GPUMEM_RANGE_OP_BIND && + range.op != KGSL_GPUMEM_RANGE_OP_UNBIND) + goto err; + + if (range.op == KGSL_GPUMEM_RANGE_OP_BIND) { + if (!PAGE_ALIGNED(range.child_offset)) + goto err; + + /* Make sure the range fits in the child */ + if (!kgsl_memdesc_check_range(&entry->memdesc, + range.child_offset, range.length)) + goto err; + } else { + /* For unop operations the child offset must be 0 */ + if (range.child_offset) + goto err; + } + + op->ops[i].entry = entry; + op->ops[i].start = range.target_offset; + op->ops[i].last = range.target_offset + range.length - 1; + op->ops[i].child_offset = range.child_offset; + op->ops[i].op = range.op; + + ranges += ranges_size; + } + + kref_init(&op->ref); + + return op; + +err: + kgsl_sharedmem_free_bind_op(op); + return ERR_PTR(ret); +} + +void kgsl_sharedmem_bind_range_destroy(struct kref *kref) +{ + struct kgsl_sharedmem_bind_op *op = container_of(kref, + struct kgsl_sharedmem_bind_op, ref); + + kgsl_sharedmem_free_bind_op(op); +} + +static void kgsl_sharedmem_bind_worker(struct work_struct *work) +{ + struct kgsl_sharedmem_bind_op *op = container_of(work, + struct kgsl_sharedmem_bind_op, work); + int i; + + for (i = 0; i < op->nr_ops; i++) { + if (op->ops[i].op == KGSL_GPUMEM_RANGE_OP_BIND) + kgsl_memdesc_add_range(op->target, + op->ops[i].start, + op->ops[i].last, + op->ops[i].entry, + op->ops[i].child_offset); + else + kgsl_memdesc_remove_range(op->target, + op->ops[i].start, + op->ops[i].last, + op->ops[i].entry); + + /* Release the reference on the child entry */ + kgsl_mem_entry_put(op->ops[i].entry); + op->ops[i].entry = NULL; + } + + /* Release the reference on the target entry */ + kgsl_mem_entry_put(op->target); + op->target = NULL; + + if (op->callback) + op->callback(op); + + kref_put(&op->ref, kgsl_sharedmem_bind_range_destroy); +} + +void kgsl_sharedmem_bind_ranges(struct kgsl_sharedmem_bind_op *op) +{ + /* Take a reference to the operation while it is scheduled */ + kref_get(&op->ref); + + INIT_WORK(&op->work, kgsl_sharedmem_bind_worker); + schedule_work(&op->work); +} + +struct kgsl_sharedmem_bind_fence { + struct dma_fence base; + spinlock_t lock; + int fd; + struct kgsl_sharedmem_bind_op *op; +}; + +static const char *bind_fence_get_driver_name(struct dma_fence *fence) +{ + return "kgsl_sharedmem_bind"; +} + +static const char *bind_fence_get_timeline_name(struct dma_fence *fence) +{ + return "(unbound)"; +} + +static void bind_fence_release(struct dma_fence *fence) +{ + struct kgsl_sharedmem_bind_fence *bind_fence = container_of(fence, + struct kgsl_sharedmem_bind_fence, base); + + kgsl_sharedmem_put_bind_op(bind_fence->op); + kfree(bind_fence); +} + +static void +kgsl_sharedmem_bind_fence_callback(struct kgsl_sharedmem_bind_op *op) +{ + struct kgsl_sharedmem_bind_fence *bind_fence = op->data; + + dma_fence_signal(&bind_fence->base); + dma_fence_put(&bind_fence->base); +} + +static const struct dma_fence_ops kgsl_sharedmem_bind_fence_ops = { + .get_driver_name = bind_fence_get_driver_name, + .get_timeline_name = bind_fence_get_timeline_name, + .release = bind_fence_release, +}; + +static struct kgsl_sharedmem_bind_fence * +kgsl_sharedmem_bind_fence(struct kgsl_sharedmem_bind_op *op) +{ + struct kgsl_sharedmem_bind_fence *fence; + struct sync_file *sync_file; + int fd; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&fence->lock); + + dma_fence_init(&fence->base, &kgsl_sharedmem_bind_fence_ops, + &fence->lock, dma_fence_context_alloc(1), 0); + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + kfree(fence); + return ERR_PTR(fd); + } + + sync_file = sync_file_create(&fence->base); + if (!sync_file) { + put_unused_fd(fd); + kfree(fence); + return ERR_PTR(-ENOMEM); + } + + fd_install(fd, sync_file->file); + + fence->fd = fd; + fence->op = op; + + return fence; +} + +static void +kgsl_sharedmem_bind_async_callback(struct kgsl_sharedmem_bind_op *op) +{ + struct completion *comp = op->data; + + complete(comp); +} + +long kgsl_ioctl_gpumem_bind_ranges(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + DECLARE_COMPLETION_ONSTACK(sync); + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpumem_bind_ranges *param = data; + struct kgsl_sharedmem_bind_op *op; + int ret; + + /* If ranges_size isn't set, return the expected size to the user */ + if (!param->ranges_size) { + param->ranges_size = sizeof(struct kgsl_gpumem_bind_range); + return 0; + } + + /* FENCE_OUT only makes sense with ASYNC */ + if ((param->flags & KGSL_GPUMEM_BIND_FENCE_OUT) && + !(param->flags & KGSL_GPUMEM_BIND_ASYNC)) + return -EINVAL; + + op = kgsl_sharedmem_create_bind_op(private, param->id, + u64_to_user_ptr(param->ranges), param->ranges_nents, + param->ranges_size); + if (IS_ERR(op)) + return PTR_ERR(op); + + if (param->flags & KGSL_GPUMEM_BIND_ASYNC) { + struct kgsl_sharedmem_bind_fence *fence; + + if (param->flags & KGSL_GPUMEM_BIND_FENCE_OUT) { + fence = kgsl_sharedmem_bind_fence(op); + + if (IS_ERR(fence)) { + kgsl_sharedmem_put_bind_op(op); + return PTR_ERR(fence); + } + + op->data = fence; + op->callback = kgsl_sharedmem_bind_fence_callback; + param->fence_id = fence->fd; + } + + kgsl_sharedmem_bind_ranges(op); + + if (!(param->flags & KGSL_GPUMEM_BIND_FENCE_OUT)) + kgsl_sharedmem_put_bind_op(op); + + return 0; + } + + /* For synchronous operations add a completion to wait on */ + op->callback = kgsl_sharedmem_bind_async_callback; + op->data = &sync; + + init_completion(&sync); + + /* + * Schedule the work. All the resources will be released after + * the bind operation is done + */ + kgsl_sharedmem_bind_ranges(op); + + ret = wait_for_completion_interruptible(&sync); + kgsl_sharedmem_put_bind_op(op); + + return ret; +} diff --git a/msm_adreno_devfreq.h b/msm_adreno_devfreq.h new file mode 100644 index 0000000000..be366cda04 --- /dev/null +++ b/msm_adreno_devfreq.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef MSM_ADRENO_DEVFREQ_H +#define MSM_ADRENO_DEVFREQ_H + +#include +#include + +#define DEVFREQ_FLAG_WAKEUP_MAXFREQ 0x2 +#define DEVFREQ_FLAG_FAST_HINT 0x4 +#define DEVFREQ_FLAG_SLOW_HINT 0x8 + +struct device; + +/* same as KGSL_MAX_PWRLEVELS */ +#define MSM_ADRENO_MAX_PWRLEVELS 16 + +struct xstats { + u64 ram_time; + u64 ram_wait; + int buslevel; +}; + +struct devfreq_msm_adreno_tz_data { + struct notifier_block nb; + struct { + s64 total_time; + s64 busy_time; + u32 ctxt_aware_target_pwrlevel; + u32 ctxt_aware_busy_penalty; + } bin; + struct { + u64 total_time; + u64 ram_time; + u64 ram_wait; + u64 gpu_time; + u32 num; + u32 max; + u32 width; + u32 *up; + u32 *down; + s32 *p_up; + s32 *p_down; + u32 *ib_kbps; + bool floating; + } bus; + unsigned int device_id; + bool is_64; + bool disable_busy_time_burst; + bool ctxt_aware_enable; + /* Multiplier to change gpu busy status */ + u32 mod_percent; +}; + +struct msm_adreno_extended_profile { + struct devfreq_msm_adreno_tz_data *private_data; + struct devfreq_dev_profile profile; +}; + +struct msm_busmon_extended_profile { + u32 flag; + u32 sampling_ms; + unsigned long percent_ab; + unsigned long ab_mbytes; + struct devfreq_msm_adreno_tz_data *private_data; + struct devfreq_dev_profile profile; +}; + +typedef void(*getbw_func)(unsigned long *, unsigned long *, void *); + +int devfreq_vbif_update_bw(void); +void devfreq_vbif_register_callback(getbw_func func, void *data); + +#endif From 019de8b3ddc4540d451ffda14228a5eca83b7f96 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Mon, 11 Oct 2021 14:10:22 -0700 Subject: [PATCH 0003/1016] msm: kgsl: Update build files for Kalama Update the build files for the Kalama target. Change-Id: I0b3495715e3affed4e8e0d7ea504bc41973efabf Signed-off-by: Lynus Vaz --- Android.mk | 13 ++++++++++--- Kbuild | 3 +++ gfx_kernel_board.mk | 3 ++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/Android.mk b/Android.mk index cad91bfab6..63924eac51 100644 --- a/Android.mk +++ b/Android.mk @@ -1,9 +1,14 @@ -# Test dlkm -DLKM_DIR := device/qcom/common/dlkm +ifneq ($(TARGET_USES_QMAA),true) KGSL_SELECT := CONFIG_QCOM_KGSL=m -KERN_SRC := $(ANDROID_TOP)/kernel_platform/msm-kernel LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +# This makefile is only for DLKM +ifneq ($(findstring vendor,$(LOCAL_PATH)),) + +DLKM_DIR := device/qcom/common/dlkm +KERN_SRC := $(ANDROID_TOP)/kernel_platform/msm-kernel KBUILD_OPTIONS += BOARD_PLATFORM=$(TARGET_BOARD_PLATFORM) KBUILD_OPTIONS += $(KGSL_SELECT) @@ -27,3 +32,5 @@ LOCAL_MODULE_PATH := $(KERNEL_MODULES_OUT) BOARD_VENDOR_KERNEL_MODULES += $(LOCAL_MODULE_PATH)/$(LOCAL_MODULE) include $(DLKM_DIR)/Build_external_kernelmodule.mk +endif # DLKM check +endif # QMAA check diff --git a/Kbuild b/Kbuild index b1212a31a7..b4d7e582ed 100644 --- a/Kbuild +++ b/Kbuild @@ -9,6 +9,9 @@ endif ifeq ($(CONFIG_ARCH_WAIPIO), y) include $(KGSL_PATH)/config/gki_waipiodisp.conf endif +ifeq ($(CONFIG_ARCH_KALAMA), y) + include $(KGSL_PATH)/config/gki_waipiodisp.conf +endif ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERN_SRC)/drivers/devfreq diff --git a/gfx_kernel_board.mk b/gfx_kernel_board.mk index 238b8ff9ed..33beab8906 100644 --- a/gfx_kernel_board.mk +++ b/gfx_kernel_board.mk @@ -1,5 +1,6 @@ #SPDX-License-Identifier: GPL-2.0-only +ifneq ($(TARGET_USES_QMAA),true) ifneq ($(TARGET_BOARD_AUTO),true) ifeq ($(call is-board-platform-in-list,$(TARGET_BOARD_PLATFORM)),true) BOARD_VENDOR_KERNEL_MODULES += $(KERNEL_MODULES_OUT)/msm_kgsl.ko @@ -7,4 +8,4 @@ ifneq ($(TARGET_BOARD_AUTO),true) BOARD_VENDOR_RAMDISK_RECOVERY_KERNEL_MODULES_LOAD += $(KERNEL_MODULES_OUT)/msm_kgsl.ko endif endif - +endif From a23c8798bf6d56b4e516141fff9e18cfb3593b97 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Thu, 11 Nov 2021 19:06:57 -0800 Subject: [PATCH 0004/1016] msm: kgsl: Remove obsolete IOMMU API calls Convert uses of the obsolete iommu_domain_{get,set}_attr() to the newer specific functions that perform the same purpose. Change-Id: Idb6f27f4cacffca3612e583c54992ae10cb7c55d Signed-off-by: Lynus Vaz --- adreno_a6xx_gmu.c | 4 +-- adreno_gen7_gmu.c | 4 +-- kgsl_iommu.c | 74 +++++++++++++++++------------------------------ 3 files changed, 28 insertions(+), 54 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 2f1f25d194..90fb4b23f8 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -2580,7 +2580,6 @@ static int a6xx_gmu_iommu_fault_handler(struct iommu_domain *domain, static int a6xx_gmu_iommu_init(struct a6xx_gmu_device *gmu) { int ret; - int no_stall = 1; gmu->domain = iommu_domain_alloc(&platform_bus_type); if (gmu->domain == NULL) { @@ -2593,8 +2592,7 @@ static int a6xx_gmu_iommu_init(struct a6xx_gmu_device *gmu) * This sets SCTLR.CFCFG = 0. * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. */ - iommu_domain_set_attr(gmu->domain, - DOMAIN_ATTR_FAULT_MODEL_NO_STALL, &no_stall); + qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL); ret = iommu_attach_device(gmu->domain, &gmu->pdev->dev); if (!ret) { diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 867d683037..da1a55ea50 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1917,7 +1917,6 @@ static int gen7_gmu_iommu_fault_handler(struct iommu_domain *domain, static int gen7_gmu_iommu_init(struct gen7_gmu_device *gmu) { int ret; - int no_stall = 1; gmu->domain = iommu_domain_alloc(&platform_bus_type); if (gmu->domain == NULL) { @@ -1930,8 +1929,7 @@ static int gen7_gmu_iommu_init(struct gen7_gmu_device *gmu) * This sets SCTLR.CFCFG = 0. * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. */ - iommu_domain_set_attr(gmu->domain, - DOMAIN_ATTR_FAULT_MODEL_NO_STALL, &no_stall); + qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL); ret = iommu_attach_device(gmu->domain, &gmu->pdev->dev); if (!ret) { diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 82518dcdc0..16a006c82a 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -74,46 +74,26 @@ static const struct iommu_flush_ops kgsl_iopgtbl_tlb_ops = { .tlb_add_page = _tlb_add_page, }; -static bool _iommu_domain_check_bool(struct iommu_domain *domain, int attr) -{ - u32 val; - int ret = iommu_domain_get_attr(domain, attr, &val); - - return (!ret && val); -} - -static int _iommu_domain_context_bank(struct iommu_domain *domain) -{ - int val, ret; - - ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_CONTEXT_BANK, &val); - - return ret ? ret : val; -} - static struct kgsl_iommu_pt *to_iommu_pt(struct kgsl_pagetable *pagetable) { return container_of(pagetable, struct kgsl_iommu_pt, base); } -static u32 get_llcc_flags(struct iommu_domain *domain) +static u32 get_llcc_flags(struct kgsl_mmu *mmu) { - if (_iommu_domain_check_bool(domain, DOMAIN_ATTR_USE_LLC_NWA)) - return IOMMU_USE_LLC_NWA; - - if (_iommu_domain_check_bool(domain, DOMAIN_ATTR_USE_UPSTREAM_HINT)) + if (mmu->subtype == KGSL_IOMMU_SMMU_V500) + return (test_bit(KGSL_MMU_IO_COHERENT, &mmu->features)) ? + 0 : IOMMU_USE_LLC_NWA; + else return IOMMU_USE_UPSTREAM_HINT; - - return 0; } - -static int _iommu_get_protection_flags(struct iommu_domain *domain, +static int _iommu_get_protection_flags(struct kgsl_mmu *mmu, struct kgsl_memdesc *memdesc) { int flags = IOMMU_READ | IOMMU_WRITE | IOMMU_NOEXEC; - flags |= get_llcc_flags(domain); + flags |= get_llcc_flags(mmu); if (memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY) flags &= ~IOMMU_WRITE; @@ -282,7 +262,7 @@ kgsl_iopgtbl_map_child(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, return ret; /* Inherit the flags from the child for this mapping */ - flags = _iommu_get_protection_flags(domain, child); + flags = _iommu_get_protection_flags(pt->mmu, child); ret = _iopgtbl_map_sg(iommu_pt, memdesc->gpuaddr + offset, &sgt, flags); @@ -332,7 +312,6 @@ static int kgsl_iopgtbl_map_zero_page_to_range(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, u64 offset, u64 length) { struct kgsl_iommu *iommu = &pt->mmu->iommu; - struct iommu_domain *domain = to_iommu_domain(&iommu->user_context); /* * The SMMU only does the PRT compare at the bottom level of the page table, because * there is not an easy way for the hardware to perform this check at earlier levels. @@ -340,7 +319,7 @@ static int kgsl_iopgtbl_map_zero_page_to_range(struct kgsl_pagetable *pt, * of this zero page is programmed in PRR register, MMU will intercept any accesses to * the page before they go to DDR and will terminate the transaction. */ - u32 flags = IOMMU_READ | IOMMU_WRITE | IOMMU_NOEXEC | get_llcc_flags(domain); + u32 flags = IOMMU_READ | IOMMU_WRITE | IOMMU_NOEXEC | get_llcc_flags(pt->mmu); struct kgsl_iommu_pt *iommu_pt = to_iommu_pt(pt); struct page *page = kgsl_vbo_zero_page; @@ -363,12 +342,11 @@ static int kgsl_iopgtbl_map(struct kgsl_pagetable *pagetable, { struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); struct kgsl_iommu *iommu = &pagetable->mmu->iommu; - struct iommu_domain *domain = to_iommu_domain(&iommu->user_context); size_t mapped, padding; int prot; /* Get the protection flags for the user context */ - prot = _iommu_get_protection_flags(domain, memdesc); + prot = _iommu_get_protection_flags(pagetable->mmu, memdesc); if (memdesc->sgt) mapped = _iopgtbl_map_sg(pt, memdesc->gpuaddr, @@ -465,9 +443,10 @@ static size_t _iommu_map_sg(struct iommu_domain *domain, u64 gpuaddr, } static int -_kgsl_iommu_map(struct iommu_domain *domain, struct kgsl_memdesc *memdesc) +_kgsl_iommu_map(struct kgsl_mmu *mmu, struct iommu_domain *domain, + struct kgsl_memdesc *memdesc) { - int prot = _iommu_get_protection_flags(domain, memdesc); + int prot = _iommu_get_protection_flags(mmu, memdesc); size_t mapped, padding; int ret = 0; @@ -519,7 +498,7 @@ static int kgsl_iommu_secure_map(struct kgsl_pagetable *pagetable, struct kgsl_iommu *iommu = &pagetable->mmu->iommu; struct iommu_domain *domain = to_iommu_domain(&iommu->secure_context); - return _kgsl_iommu_map(domain, memdesc); + return _kgsl_iommu_map(pagetable->mmu, domain, memdesc); } /* @@ -557,13 +536,13 @@ static int kgsl_iommu_default_map(struct kgsl_pagetable *pagetable, domain = to_iommu_domain(&iommu->user_context); /* Map the object to the default GPU domain */ - ret = _kgsl_iommu_map(domain, memdesc); + ret = _kgsl_iommu_map(mmu, domain, memdesc); /* Also map the object to the LPAC domain if it exists */ lpac = to_iommu_domain(&iommu->lpac_context); if (!ret && lpac) { - ret = _kgsl_iommu_map(lpac, memdesc); + ret = _kgsl_iommu_map(mmu, lpac, memdesc); /* On failure, also unmap from the default domain */ if (ret) @@ -1097,7 +1076,7 @@ static int kgsl_iommu_get_context_bank(struct kgsl_pagetable *pt) struct kgsl_iommu *iommu = to_kgsl_iommu(pt); struct iommu_domain *domain = to_iommu_domain(&iommu->user_context); - return _iommu_domain_context_bank(domain); + return qcom_iommu_get_context_bank_nr(domain); } static void kgsl_iommu_destroy_default_pagetable(struct kgsl_pagetable *pagetable) @@ -1131,15 +1110,15 @@ static void kgsl_iommu_destroy_pagetable(struct kgsl_pagetable *pagetable) static void _enable_gpuhtw_llc(struct kgsl_mmu *mmu, struct iommu_domain *domain) { - int val = 1; - if (!test_bit(KGSL_MMU_LLCC_ENABLE, &mmu->features)) return; - if (mmu->subtype == KGSL_IOMMU_SMMU_V500) - iommu_domain_set_attr(domain, DOMAIN_ATTR_USE_LLC_NWA, &val); - else - iommu_domain_set_attr(domain, DOMAIN_ATTR_USE_UPSTREAM_HINT, &val); + if (mmu->subtype == KGSL_IOMMU_SMMU_V500) { + if (!test_bit(KGSL_MMU_IO_COHERENT, &mmu->features)) + iommu_set_pgtable_quirks(domain, + IO_PGTABLE_QUIRK_QCOM_USE_LLC_NWA); + } else + iommu_set_pgtable_quirks(domain, IO_PGTABLE_QUIRK_ARM_OUTER_WBWA); } static int set_smmu_aperture(struct kgsl_device *device, @@ -2059,7 +2038,7 @@ static int kgsl_iommu_setup_context(struct kgsl_mmu *mmu, iommu_set_fault_handler(context->domain, handler, mmu); - context->cb_num = _iommu_domain_context_bank(context->domain); + context->cb_num = qcom_iommu_get_context_bank_nr(context->domain); if (context->cb_num >= 0) return 0; @@ -2156,8 +2135,7 @@ static int iommu_probe_secure_context(struct kgsl_device *device, return -ENODEV; } - ret = iommu_domain_set_attr(context->domain, DOMAIN_ATTR_SECURE_VMID, - &secure_vmid); + ret = qcom_iommu_set_secure_vmid(context->domain, secure_vmid); if (ret) { dev_err(device->dev, "Unable to set the secure VMID: %d\n", ret); iommu_domain_free(context->domain); @@ -2180,7 +2158,7 @@ static int iommu_probe_secure_context(struct kgsl_device *device, iommu_set_fault_handler(context->domain, kgsl_iommu_secure_fault_handler, mmu); - context->cb_num = _iommu_domain_context_bank(context->domain); + context->cb_num = qcom_iommu_get_context_bank_nr(context->domain); if (context->cb_num < 0) { iommu_detach_device(context->domain, &context->pdev->dev); From 9ba960e696ba1793100dacd8ca8de4ad29ba5e58 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Thu, 11 Nov 2021 18:57:59 -0800 Subject: [PATCH 0005/1016] msm: kgsl: Catchup from msm-5.10 Update KGSL code to msm-5.10 commit f3801b189e25 (Merge "msm: kgsl: Correct a6xx CP init sequence"). This includes the following changes from msm-5.10: 4aabff3db0cf (msm: kgsl: Add support to query process memory usage) dcf93c3a2f0b (msm: kgsl: Vote ddr perf mode for better performance) dc70d7f2961e (msm: kgsl: Enable preemption for gen7 GPU) 211d943edc87 (msm: kgsl: Provide a mechanism to unbind a range in VBO) 45d7e571a332 (msm: kgsl: Correctly validate devfreq_add_device return value) a07afc4e1477 (msm: kgsl: Reclaim gpu pages asynchronously) 8b5de66d4f1d (msm: kgsl: Fix uninitialized variable use in kgsl_iommu_print_fault()) 0bd2f8ba6a8f (msm: kgsl: Use kmalloc() for HFI message storage) 79f18623e7f1 (msm: kgsl: Make sure BR doesn't race ahead of BV) 48fc67d2bcfb (msm: kgsl: Get rid of per ringbuffer scratch memory) 96f7537ccfcd (msm: kgsl: Remove redundant SET_PSEUDO_REGISTER packets) ca1cbeedfcd6 (msm: kgsl: Fix return value due to devfreq_add_device() failure) 2092f3df2fae (msm: kgsl: Add enable_signaling for timeline fence) 6f24e5a5112e (msm: kgsl: Remove unneeded NULL check when submitting commands) 9d13d7a5ca6d (msm: kgsl: Pre-allocate page pool tracking structures) 7e5a0845ad64 (msm: kgsl: Add Gen7 support for RDPM frequency register writes) 682d0ea32d62 (msm: kgsl: Add RDPM update for GMU frequency changes) 2eddfeae4850 (msm: kgsl: Add support for the new bit in RESET_CONTEXT_STATE packet) 1141005a4787 (msm: kgsl: Add support for A662 GPU) fc04d956e44a (msm: kgsl: Fix gmu power counter reg address) 286d5d09faf1 (msm: kgsl: Remove invalid kgsl_process_private_put) 4b5e16dbec0d (msm: kgsl: Update the list of protected registers for A730) 3a107bfc062a (msm: kgsl: Bail out of allocation loops if a SIG_KILL is pending) ca22fde3705d (msm: kgsl: Fix out of bound write in adreno_profile_submit_time) 73d27010f02f (msm: kgsl: Avoid parsing IBs during atomic snapshot) 684032d39451 (msm: kgsl: Correct VMID flag for secure buffer validation) 8548ed02e179 (msm: kgsl: Expose memtype stats through sysfs) 52757e33639a (msm: kgsl: Enable Preemption on A662 GPU) a8edbf590967 (msm: kgsl: Add support for new GMU uncached VA range) ff25ecb13ad5 (msm: kgsl: Remove process debugfs and kobject without mutex) 397f7d63607e (msm: kgsl: Fix clockgating values for various blocks) 06f837b9da4f (msm: kgsl: Allow concurrent requests for oob_perfcntr) 73a66962fe20 (msm: kgsl: Make the Adreno trace instance enabled through Kconfig) 6d65fed34558 (msm: kgsl: Update range checking when building voltage tables) 698d612ba43d (msm: kgsl: Verify secure access before importing buffers) 1a278333d0c7 (msm: kgsl: Fix syncsource spinlock recursion) 82c89093b63b (msm: kgsl: Fix HWSCHED_MAX_DISPATCH_NUMIBS) e6d7e1d4bdc3 (msm: kgsl: Set max ACD levels to match max power levels) eb56597e5392 (msm: kgsl: Ensure global entry free stays in bounds) 8a8fbdf97a7a (msm: kgsl: Ensure local variables are set before use) 402bb87906b3 (msm: kgsl: Take snapshot if GPU isn't idle before slumber) 6341d395821b (msm: kgsl: Fix syncpoint timeout log for timeline fences) 0058b5eb2760 (msm: kgsl: Add inflight commands to tracepoint) 127ac415117e (msm: kgsl: Fix NULL pointer dereference) 4433948157f8 (msm: kgsl: Do not process HFI queues in F2H daemon during init sequence) 6a298b62c2d7 (msm: kgsl: Increase wait during kgsl_open) e5fd445b298e (msm: kgsl: Update register protection config) eff8f6e07da7 (msm: kgsl: Set min_pwrlevel based on configuration) e13459b66d4a (msm: kgsl: Log unprotected write address) c19ed67ffbcf (msm: kgsl: Free up iommu page tables on process close) d221f9dd6c44 (msm: kgsl: Update the IFPC power up reglist) 697143a7d17a (msm: kgsl: Update GPUCC Offsets for A662) 180c1d5e124a (msm: kgsl: Add support for C501 GPU) f583f456d0a2 (msm: kgsl: Correct a6xx CP init sequence) dde4355ea92d (msm: kgsl: Add GBIF L2 CGC control with A6x CGC) Change-Id: Ib679fb0b2cb47b79e7caed531de0c0aa7ef0558d Signed-off-by: Lynus Vaz --- Kconfig | 29 +++++++ a6xx_reg.h | 7 +- adreno-gpulist.h | 163 +++++++++++++++++++++++++---------- adreno.c | 45 ++++++---- adreno.h | 48 +++++++---- adreno_a5xx.c | 4 +- adreno_a5xx_preempt.c | 21 ++--- adreno_a5xx_ringbuffer.c | 14 +-- adreno_a6xx.c | 62 +++++++++++--- adreno_a6xx.h | 2 +- adreno_a6xx_gmu.c | 16 +++- adreno_a6xx_gmu.h | 2 + adreno_a6xx_hwsched.c | 2 + adreno_a6xx_hwsched_hfi.c | 4 +- adreno_a6xx_preempt.c | 57 +++++-------- adreno_a6xx_rgmu.c | 12 ++- adreno_a6xx_rgmu.h | 2 + adreno_a6xx_ringbuffer.c | 24 ++++-- adreno_gen7.c | 66 +++++++++++++-- adreno_gen7.h | 18 ++++ adreno_gen7_gmu.c | 52 +++++++++++- adreno_gen7_gmu.h | 6 ++ adreno_gen7_hwsched.c | 21 ++++- adreno_gen7_hwsched_hfi.c | 6 +- adreno_gen7_preempt.c | 61 +++++-------- adreno_gen7_ringbuffer.c | 60 ++++++++++--- adreno_gen7_rpmh.c | 13 ++- adreno_gen7_snapshot.c | 2 +- adreno_hfi.h | 21 +---- adreno_hwsched.c | 8 ++ adreno_pm4types.h | 4 + adreno_ringbuffer.c | 9 -- adreno_ringbuffer.h | 32 ++----- adreno_snapshot.c | 8 ++ adreno_trace.c | 2 + gen7_reg.h | 4 +- kgsl.c | 104 ++++++++++++++--------- kgsl.h | 29 ++++--- kgsl_bus.c | 7 ++ kgsl_device.h | 5 ++ kgsl_drawobj.c | 56 ++++++------ kgsl_gmu_core.h | 3 +- kgsl_iommu.c | 6 +- kgsl_pool.c | 59 ++++++++++++- kgsl_pwrctrl.c | 7 +- kgsl_pwrscale.c | 4 +- kgsl_reclaim.c | 69 +++++---------- kgsl_sharedmem.c | 174 ++++++++++++++++++++++++++++++-------- kgsl_sync.c | 15 ++-- kgsl_timeline.c | 21 ++++- kgsl_vbo.c | 23 ++++- 51 files changed, 1023 insertions(+), 466 deletions(-) diff --git a/Kconfig b/Kconfig index 0c04a88e74..9507965009 100644 --- a/Kconfig +++ b/Kconfig @@ -30,6 +30,15 @@ config DEVFREQ_GOV_QCOM_GPUBW_MON This governor will not be useful for non-Adreno based targets. +config QCOM_KGSL_FENCE_TRACE + bool "Enable built-in tracing for adreno fence timeouts" + depends on QCOM_KGSL + help + A boolean flag used to create a KGSL-specific tracing instance + under /tracing/instances/kgsl-fence that can be used + for debugging timeouts for fences between KGSL-contexts and + sync-point blocks. If unsure, say 'N' here. + config QCOM_ADRENO_DEFAULT_GOVERNOR string "devfreq governor for the adreno core" default "msm-adreno-tz" @@ -87,3 +96,23 @@ config QCOM_KGSL_QDSS_STM When enabled, the Adreno GPU QDSS STM support is enabled. GPU QDSS STM memory will be mapped to GPU and QDSS clock needed to access this memory is voted. Debug kernels should say 'Y' here. + +config QCOM_KGSL_USE_SHMEM + bool "Enable using shmem for memory allocations" + depends on QCOM_KGSL + help + Say 'Y' to enable using shmem for memory allocations. If enabled, + there will be no support for the memory pools and higher order pages. + But using shmem will help in making kgsl pages available for + reclaiming. + +config QCOM_KGSL_PROCESS_RECLAIM + bool "Make driver pages available for reclaim" + depends on QCOM_KGSL + select QCOM_KGSL_USE_SHMEM + help + Say 'Y' to make driver pages available for reclaiming. If enabled, + shmem will be used for allocation. kgsl would know the process + foreground/background activity through the sysfs entry exposed per + process. Based on this kgsl can unpin given number of pages from + background processes and make them available to the shrinker. diff --git a/a6xx_reg.h b/a6xx_reg.h index f6b7dcde8a..0c0a118957 100644 --- a/a6xx_reg.h +++ b/a6xx_reg.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. */ #ifndef _A6XX_REG_H @@ -1106,6 +1106,11 @@ #define A6XX_GPU_CC_GX_DOMAIN_MISC3 0x24563 #define A6XX_GPU_CC_CX_GDSCR 0x2441B +/* GPUCC offsets are different for A662 */ +#define A662_GPU_CC_GX_GDSCR 0x26417 +#define A662_GPU_CC_GX_DOMAIN_MISC3 0x26541 +#define A662_GPU_CC_CX_GDSCR 0x26442 + /* GPU CPR registers */ #define A6XX_GPU_CPR_FSM_CTL 0x26801 diff --git a/adreno-gpulist.h b/adreno-gpulist.h index e7e061f76f..91aedbfd4f 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1093,9 +1093,10 @@ static const struct adreno_protected_regs a620_protected_regs[] = { { A6XX_CP_PROTECT_REG + 32, 0x0fc00, 0x11bff, 0 }, { A6XX_CP_PROTECT_REG + 33, 0x18400, 0x1a3ff, 1 }, { A6XX_CP_PROTECT_REG + 34, 0x1a800, 0x1c7ff, 1 }, - { A6XX_CP_PROTECT_REG + 35, 0x1f400, 0x1f843, 1 }, - { A6XX_CP_PROTECT_REG + 36, 0x1f844, 0x1f8bf, 0 }, - { A6XX_CP_PROTECT_REG + 37, 0x1f887, 0x1f8a2, 1 }, + { A6XX_CP_PROTECT_REG + 35, 0x1c800, 0x1e7ff, 1 }, + { A6XX_CP_PROTECT_REG + 36, 0x1f400, 0x1f843, 1 }, + { A6XX_CP_PROTECT_REG + 37, 0x1f844, 0x1f8bf, 0 }, + { A6XX_CP_PROTECT_REG + 38, 0x1f887, 0x1f8a2, 1 }, { A6XX_CP_PROTECT_REG + 47, 0x1f8c0, 0x1f8c0, 1 }, { 0 }, }; @@ -1573,10 +1574,11 @@ static const struct adreno_protected_regs a660_protected_regs[] = { { A6XX_CP_PROTECT_REG + 33, 0x0fc00, 0x11bff, 0 }, { A6XX_CP_PROTECT_REG + 34, 0x18400, 0x1a3ff, 1 }, { A6XX_CP_PROTECT_REG + 35, 0x1a400, 0x1c3ff, 1 }, - { A6XX_CP_PROTECT_REG + 36, 0x1f400, 0x1f843, 1 }, - { A6XX_CP_PROTECT_REG + 37, 0x1f844, 0x1f8bf, 0 }, - { A6XX_CP_PROTECT_REG + 38, 0x1f860, 0x1f860, 1 }, - { A6XX_CP_PROTECT_REG + 39, 0x1f887, 0x1f8a2, 1 }, + { A6XX_CP_PROTECT_REG + 36, 0x1c400, 0x1e3ff, 1 }, + { A6XX_CP_PROTECT_REG + 37, 0x1f400, 0x1f843, 1 }, + { A6XX_CP_PROTECT_REG + 38, 0x1f844, 0x1f8bf, 0 }, + { A6XX_CP_PROTECT_REG + 39, 0x1f860, 0x1f860, 1 }, + { A6XX_CP_PROTECT_REG + 40, 0x1f887, 0x1f8a2, 1 }, { A6XX_CP_PROTECT_REG + 47, 0x1f8c0, 0x1f8c0, 1 }, { 0 }, }; @@ -1710,6 +1712,37 @@ static const struct adreno_a6xx_core adreno_gpu_core_a635 = { .ctxt_record_size = 2496 * 1024, }; +static const struct adreno_a6xx_core adreno_gpu_core_a662 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A662, 6, 6, 2, ANY_ID), + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION | ADRENO_PREEMPTION, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x00200000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a660_sqe.fw", + .gmufw_name = "a662_gmu.bin", + .zap_name = "a662_zap", + .hwcg = a660_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a660_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .hang_detect_cycles = 0x3ffff, + .veto_fal10 = true, + .protected_regs = a660_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 15, + .pdc_in_aop = true, + .ctxt_record_size = 2496 * 1024, +}; + static const struct kgsl_regmap_list gen7_0_0_gbif_regs[] = { { GEN7_GBIF_QSB_SIDE0, 0x00071620 }, { GEN7_GBIF_QSB_SIDE1, 0x00071620 }, @@ -1720,10 +1753,10 @@ static const struct kgsl_regmap_list gen7_0_0_gbif_regs[] = { static const struct kgsl_regmap_list gen7_0_0_hwcg_regs[] = { { GEN7_RBBM_CLOCK_CNTL_SP0, 0x02222222 }, - { GEN7_RBBM_CLOCK_CNTL2_SP0, 0x02222222 }, + { GEN7_RBBM_CLOCK_CNTL2_SP0, 0x02022222 }, { GEN7_RBBM_CLOCK_HYST_SP0, 0x0000f3cf }, { GEN7_RBBM_CLOCK_DELAY_SP0, 0x00000080 }, - { GEN7_RBBM_CLOCK_CNTL_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL_TP0, 0x22222220 }, { GEN7_RBBM_CLOCK_CNTL2_TP0, 0x22222222 }, { GEN7_RBBM_CLOCK_CNTL3_TP0, 0x22222222 }, { GEN7_RBBM_CLOCK_CNTL4_TP0, 0x00222222 }, @@ -1751,7 +1784,7 @@ static const struct kgsl_regmap_list gen7_0_0_hwcg_regs[] = { { GEN7_RBBM_CLOCK_MODE_BV_GRAS, 0x00222222 }, { GEN7_RBBM_CLOCK_MODE_GPC, 0x02222223 }, { GEN7_RBBM_CLOCK_MODE_VFD, 0x00002222 }, - { GEN7_RBBM_CLOCK_MODE_BV_GPC, 0x00222223 }, + { GEN7_RBBM_CLOCK_MODE_BV_GPC, 0x00222222 }, { GEN7_RBBM_CLOCK_MODE_BV_VFD, 0x00002222 }, { GEN7_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000 }, { GEN7_RBBM_CLOCK_HYST_GPC, 0x04104004 }, @@ -1777,42 +1810,50 @@ static const struct kgsl_regmap_list gen7_0_0_hwcg_regs[] = { /* GEN7_0_0 protected register list */ static const struct gen7_protected_regs gen7_0_0_protected_regs[] = { { GEN7_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 }, - { GEN7_CP_PROTECT_REG + 1, 0x0050b, 0x00698, 0 }, + { GEN7_CP_PROTECT_REG + 1, 0x0050b, 0x00563, 0 }, { GEN7_CP_PROTECT_REG + 2, 0x0050e, 0x0050e, 1 }, { GEN7_CP_PROTECT_REG + 3, 0x00510, 0x00510, 1 }, { GEN7_CP_PROTECT_REG + 4, 0x00534, 0x00534, 1 }, - { GEN7_CP_PROTECT_REG + 5, 0x00699, 0x00882, 1 }, - { GEN7_CP_PROTECT_REG + 6, 0x008a0, 0x008a8, 1 }, - { GEN7_CP_PROTECT_REG + 7, 0x008ab, 0x008cf, 1 }, - { GEN7_CP_PROTECT_REG + 8, 0x008d0, 0x00a40, 0 }, - { GEN7_CP_PROTECT_REG + 9, 0x00900, 0x0094d, 1 }, - { GEN7_CP_PROTECT_REG + 10, 0x0098d, 0x00a3f, 1 }, - { GEN7_CP_PROTECT_REG + 11, 0x00a41, 0x00bff, 1 }, - { GEN7_CP_PROTECT_REG + 12, 0x00df0, 0x00df1, 1 }, - { GEN7_CP_PROTECT_REG + 13, 0x00e01, 0x00e01, 1 }, - { GEN7_CP_PROTECT_REG + 14, 0x00e07, 0x00e0f, 1 }, - { GEN7_CP_PROTECT_REG + 15, 0x03c00, 0x03cc3, 1 }, - { GEN7_CP_PROTECT_REG + 16, 0x03cc4, 0x05cc3, 0 }, - { GEN7_CP_PROTECT_REG + 17, 0x08630, 0x087ff, 1 }, - { GEN7_CP_PROTECT_REG + 18, 0x08e00, 0x08e00, 1 }, - { GEN7_CP_PROTECT_REG + 19, 0x08e08, 0x08e08, 1 }, - { GEN7_CP_PROTECT_REG + 20, 0x08e50, 0x08e6f, 1 }, - { GEN7_CP_PROTECT_REG + 21, 0x08e80, 0x09100, 1 }, - { GEN7_CP_PROTECT_REG + 22, 0x09624, 0x097ff, 1 }, - { GEN7_CP_PROTECT_REG + 23, 0x09e40, 0x09e40, 1 }, - { GEN7_CP_PROTECT_REG + 24, 0x09e64, 0x09e71, 1 }, - { GEN7_CP_PROTECT_REG + 25, 0x09e78, 0x09fff, 1 }, - { GEN7_CP_PROTECT_REG + 26, 0x0a630, 0x0a7ff, 1 }, - { GEN7_CP_PROTECT_REG + 27, 0x0ae02, 0x0ae02, 1 }, - { GEN7_CP_PROTECT_REG + 28, 0x0ae50, 0x0ae5f, 1 }, - { GEN7_CP_PROTECT_REG + 29, 0x0ae66, 0x0ae69, 1 }, - { GEN7_CP_PROTECT_REG + 30, 0x0ae6f, 0x0ae72, 1 }, - { GEN7_CP_PROTECT_REG + 31, 0x0b604, 0x0b607, 1 }, - { GEN7_CP_PROTECT_REG + 32, 0x0ec00, 0x0fbff, 1 }, - { GEN7_CP_PROTECT_REG + 33, 0x0fc00, 0x11bff, 0 }, - { GEN7_CP_PROTECT_REG + 34, 0x18400, 0x18453, 1 }, - { GEN7_CP_PROTECT_REG + 35, 0x18454, 0x18458, 0 }, - { GEN7_CP_PROTECT_REG + 47, 0x18459, 0x18459, 1 }, + { GEN7_CP_PROTECT_REG + 5, 0x005fb, 0x00698, 0 }, + { GEN7_CP_PROTECT_REG + 6, 0x00699, 0x00882, 1 }, + { GEN7_CP_PROTECT_REG + 7, 0x008a0, 0x008a8, 1 }, + { GEN7_CP_PROTECT_REG + 8, 0x008ab, 0x008cf, 1 }, + { GEN7_CP_PROTECT_REG + 9, 0x008d0, 0x00a40, 0 }, + { GEN7_CP_PROTECT_REG + 10, 0x00900, 0x0094d, 1 }, + { GEN7_CP_PROTECT_REG + 11, 0x0098d, 0x00a3f, 1 }, + { GEN7_CP_PROTECT_REG + 12, 0x00a41, 0x00bff, 1 }, + { GEN7_CP_PROTECT_REG + 13, 0x00df0, 0x00df1, 1 }, + { GEN7_CP_PROTECT_REG + 14, 0x00e01, 0x00e01, 1 }, + { GEN7_CP_PROTECT_REG + 15, 0x00e07, 0x00e0f, 1 }, + { GEN7_CP_PROTECT_REG + 16, 0x03c00, 0x03cc3, 1 }, + { GEN7_CP_PROTECT_REG + 17, 0x03cc4, 0x05cc3, 0 }, + { GEN7_CP_PROTECT_REG + 18, 0x08630, 0x087ff, 1 }, + { GEN7_CP_PROTECT_REG + 19, 0x08e00, 0x08e00, 1 }, + { GEN7_CP_PROTECT_REG + 20, 0x08e08, 0x08e08, 1 }, + { GEN7_CP_PROTECT_REG + 21, 0x08e50, 0x08e6f, 1 }, + { GEN7_CP_PROTECT_REG + 22, 0x08e80, 0x09100, 1 }, + { GEN7_CP_PROTECT_REG + 23, 0x09624, 0x097ff, 1 }, + { GEN7_CP_PROTECT_REG + 24, 0x09e40, 0x09e40, 1 }, + { GEN7_CP_PROTECT_REG + 25, 0x09e64, 0x09e71, 1 }, + { GEN7_CP_PROTECT_REG + 26, 0x09e78, 0x09fff, 1 }, + { GEN7_CP_PROTECT_REG + 27, 0x0a630, 0x0a7ff, 1 }, + { GEN7_CP_PROTECT_REG + 28, 0x0ae02, 0x0ae02, 1 }, + { GEN7_CP_PROTECT_REG + 29, 0x0ae50, 0x0ae5f, 1 }, + { GEN7_CP_PROTECT_REG + 30, 0x0ae66, 0x0ae69, 1 }, + { GEN7_CP_PROTECT_REG + 31, 0x0ae6f, 0x0ae72, 1 }, + { GEN7_CP_PROTECT_REG + 32, 0x0b604, 0x0b607, 1 }, + { GEN7_CP_PROTECT_REG + 33, 0x0ec00, 0x0fbff, 1 }, + { GEN7_CP_PROTECT_REG + 34, 0x0fc00, 0x11bff, 0 }, + { GEN7_CP_PROTECT_REG + 35, 0x18400, 0x18453, 1 }, + { GEN7_CP_PROTECT_REG + 36, 0x18454, 0x18458, 0 }, + { GEN7_CP_PROTECT_REG + 37, 0x18459, 0x1a458, 1 }, + { GEN7_CP_PROTECT_REG + 38, 0x1a459, 0x1c458, 1 }, + { GEN7_CP_PROTECT_REG + 39, 0x1c459, 0x1e458, 1 }, + { GEN7_CP_PROTECT_REG + 40, 0x1f400, 0x1f843, 1 }, + { GEN7_CP_PROTECT_REG + 41, 0x1f844, 0x1f8bf, 0 }, + { GEN7_CP_PROTECT_REG + 42, 0x1f860, 0x1f860, 1 }, + { GEN7_CP_PROTECT_REG + 43, 0x1f878, 0x1f8a2, 1 }, + { GEN7_CP_PROTECT_REG + 47, 0x1f8c0, 0x1f8c0, 1 }, { 0 }, }; @@ -1824,7 +1865,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_0 = { .chipid = 0x07030000, .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | - ADRENO_ACD | ADRENO_L3_VOTE | ADRENO_BCL, + ADRENO_ACD | ADRENO_L3_VOTE | ADRENO_BCL | + ADRENO_PREEMPTION, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_perfcounters, .gmem_base = 0, @@ -1853,7 +1895,35 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_1 = { .chipid = 0x07030001, .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | - ADRENO_ACD | ADRENO_L3_VOTE | ADRENO_BCL, + ADRENO_ACD | ADRENO_L3_VOTE | ADRENO_BCL | + ADRENO_PREEMPTION, + .gpudev = &adreno_gen7_gmu_gpudev.base, + .perfcounters = &adreno_gen7_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_2M, + .bus_width = 32, + .snapshot_size = SZ_4M, + }, + .sqefw_name = "a730_sqe.fw", + .gmufw_name = "gmu_gen70000.bin", + .gmufw_bak_name = "c500_gmu.bin", + .zap_name = "a730_zap", + .hwcg = gen7_0_0_hwcg_regs, + .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .gbif = gen7_0_0_gbif_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen7_0_0_protected_regs, + .highest_bank_bit = 16, +}; + +static const struct adreno_gen7_core adreno_gpu_core_gen7_4_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN7_4_0, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen7-4-0", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_perfcounters, .gmem_base = 0, @@ -1912,4 +1982,7 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_a660_shima.base, &adreno_gpu_core_gen7_0_0.base, &adreno_gpu_core_gen7_0_1.base, + &adreno_gpu_core_a662.base, + &adreno_gpu_core_gen7_4_0.base, + }; diff --git a/adreno.c b/adreno.c index ed9e778e9f..0f32ad3141 100644 --- a/adreno.c +++ b/adreno.c @@ -166,7 +166,7 @@ unsigned int adreno_get_rptr(struct adreno_ringbuffer *rb) kgsl_regread(device, A3XX_CP_RB_RPTR, &rptr); else kgsl_sharedmem_readl(device->scratch, &rptr, - SCRATCH_RPTR_OFFSET(rb->id)); + SCRATCH_RB_OFFSET(rb->id, rptr)); return rptr; } @@ -682,18 +682,32 @@ out: return ret; } -static void adreno_of_get_initial_pwrlevel(struct kgsl_pwrctrl *pwr, +static void adreno_of_get_initial_pwrlevels(struct kgsl_pwrctrl *pwr, struct device_node *node) { - int init_level = 1; + int level; - of_property_read_u32(node, "qcom,initial-pwrlevel", &init_level); + /* Get and set the initial power level */ + if (of_property_read_u32(node, "qcom,initial-pwrlevel", &level)) + level = 1; - if (init_level < 0 || init_level >= pwr->num_pwrlevels) - init_level = 1; + if (level < 0 || level >= pwr->num_pwrlevels) + level = 1; - pwr->active_pwrlevel = init_level; - pwr->default_pwrlevel = init_level; + pwr->active_pwrlevel = level; + pwr->default_pwrlevel = level; + + /* Set the max power level */ + pwr->max_pwrlevel = 0; + + /* Get and set the min power level */ + if (of_property_read_u32(node, "qcom,initial-min-pwrlevel", &level)) + level = pwr->num_pwrlevels - 1; + + if (level < 0 || level >= pwr->num_pwrlevels || level < pwr->default_pwrlevel) + level = pwr->num_pwrlevels - 1; + + pwr->min_pwrlevel = level; } static void adreno_of_get_limits(struct adreno_device *adreno_dev, @@ -733,7 +747,7 @@ static int adreno_of_get_legacy_pwrlevels(struct adreno_device *adreno_dev, ret = adreno_of_parse_pwrlevels(adreno_dev, node); if (!ret) { - adreno_of_get_initial_pwrlevel(&device->pwrctrl, parent); + adreno_of_get_initial_pwrlevels(&device->pwrctrl, parent); adreno_of_get_limits(adreno_dev, parent); } @@ -766,7 +780,7 @@ static int adreno_of_get_pwrlevels(struct adreno_device *adreno_dev, return ret; } - adreno_of_get_initial_pwrlevel(&device->pwrctrl, child); + adreno_of_get_initial_pwrlevels(&device->pwrctrl, child); /* * Check for global throttle-pwrlevel first and override @@ -1511,14 +1525,16 @@ void adreno_set_active_ctxs_null(struct adreno_device *adreno_dev) { int i; struct adreno_ringbuffer *rb; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { if (rb->drawctxt_active) kgsl_context_put(&(rb->drawctxt_active->base)); rb->drawctxt_active = NULL; - kgsl_sharedmem_writel(rb->pagetable_desc, - PT_INFO_OFFSET(current_rb_ptname), 0); + kgsl_sharedmem_writel(device->scratch, + SCRATCH_RB_OFFSET(rb->id, current_rb_ptname), + 0); } } @@ -1749,8 +1765,6 @@ static int _adreno_start(struct adreno_device *adreno_dev) /* Set the bit to indicate that we've just powered on */ set_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv); - adreno_ringbuffer_set_global(adreno_dev, 0); - /* Clear the busy_data stats - we're starting over from scratch */ memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data)); @@ -2345,9 +2359,6 @@ static int adreno_soft_reset(struct kgsl_device *device) adreno_dev->busy_data.bif_starved_ram = 0; adreno_dev->busy_data.bif_starved_ram_ch1 = 0; - /* Set the page table back to the default page table */ - adreno_ringbuffer_set_global(adreno_dev, 0); - /* Reinitialize the GPU */ gpudev->start(adreno_dev); diff --git a/adreno.h b/adreno.h index 0ed5a5f282..e8838e3eb6 100644 --- a/adreno.h +++ b/adreno.h @@ -16,6 +16,17 @@ #include "adreno_ringbuffer.h" #include "kgsl_sharedmem.h" +/* Used to point CP to the SMMU record during preemption */ +#define SET_PSEUDO_SMMU_INFO 0 +/* Used to inform CP where to save preemption data at the time of switch out */ +#define SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR 1 +/* Used to inform CP where to save secure preemption data at the time of switch out */ +#define SET_PSEUDO_PRIV_SECURE_SAVE_ADDR 2 +/* Used to inform CP where to save per context non-secure data at the time of switch out */ +#define SET_PSEUDO_NON_PRIV_SAVE_ADDR 3 +/* Used to inform CP where to save preemption counter data at the time of switch out */ +#define SET_PSEUDO_COUNTER 4 + /* ADRENO_DEVICE - Given a kgsl_device return the adreno device struct */ #define ADRENO_DEVICE(device) \ container_of(device, struct adreno_device, dev) @@ -178,6 +189,7 @@ enum adreno_gpurev { ADRENO_REV_A640 = 640, ADRENO_REV_A650 = 650, ADRENO_REV_A660 = 660, + ADRENO_REV_A662 = 662, ADRENO_REV_A680 = 680, /* * Gen7 and higher version numbers may exceed 1 digit @@ -187,6 +199,7 @@ enum adreno_gpurev { */ ADRENO_REV_GEN7_0_0 = 0x070000, ADRENO_REV_GEN7_0_1 = 0x070001, + ADRENO_REV_GEN7_4_0 = 0x070400, }; #define ADRENO_SOFT_FAULT BIT(0) @@ -1014,6 +1027,7 @@ ADRENO_TARGET(a619, ADRENO_REV_A619) ADRENO_TARGET(a620, ADRENO_REV_A620) ADRENO_TARGET(a630, ADRENO_REV_A630) ADRENO_TARGET(a635, ADRENO_REV_A635) +ADRENO_TARGET(a662, ADRENO_REV_A662) ADRENO_TARGET(a640, ADRENO_REV_A640) ADRENO_TARGET(a650, ADRENO_REV_A650) ADRENO_TARGET(a680, ADRENO_REV_A680) @@ -1023,7 +1037,8 @@ static inline int adreno_is_a660(struct adreno_device *adreno_dev) { unsigned int rev = ADRENO_GPUREV(adreno_dev); - return (rev == ADRENO_REV_A660 || rev == ADRENO_REV_A635); + return (rev == ADRENO_REV_A660 || rev == ADRENO_REV_A635 || + rev == ADRENO_REV_A662); } /* @@ -1061,7 +1076,8 @@ static inline int adreno_is_a650_family(struct adreno_device *adreno_dev) unsigned int rev = ADRENO_GPUREV(adreno_dev); return (rev == ADRENO_REV_A650 || rev == ADRENO_REV_A620 || - rev == ADRENO_REV_A660 || rev == ADRENO_REV_A635); + rev == ADRENO_REV_A660 || rev == ADRENO_REV_A635 || + rev == ADRENO_REV_A662); } static inline int adreno_is_a619_holi(struct adreno_device *adreno_dev) @@ -1421,28 +1437,22 @@ static inline bool adreno_support_64bit(struct adreno_device *adreno_dev) return (BITS_PER_LONG > 32 && ADRENO_GPUREV(adreno_dev) >= 500); } -static inline void adreno_ringbuffer_set_global( - struct adreno_device *adreno_dev, int name) -{ - kgsl_sharedmem_writel(adreno_dev->ringbuffers[0].pagetable_desc, - PT_INFO_OFFSET(current_global_ptname), name); -} - -static inline void adreno_ringbuffer_set_pagetable(struct adreno_ringbuffer *rb, - struct kgsl_pagetable *pt) +static inline void adreno_ringbuffer_set_pagetable(struct kgsl_device *device, + struct adreno_ringbuffer *rb, struct kgsl_pagetable *pt) { unsigned long flags; spin_lock_irqsave(&rb->preempt_lock, flags); - kgsl_sharedmem_writel(rb->pagetable_desc, - PT_INFO_OFFSET(current_rb_ptname), pt->name); + kgsl_sharedmem_writel(device->scratch, + SCRATCH_RB_OFFSET(rb->id, current_rb_ptname), pt->name); - kgsl_sharedmem_writeq(rb->pagetable_desc, - PT_INFO_OFFSET(ttbr0), kgsl_mmu_pagetable_get_ttbr0(pt)); + kgsl_sharedmem_writeq(device->scratch, + SCRATCH_RB_OFFSET(rb->id, ttbr0), + kgsl_mmu_pagetable_get_ttbr0(pt)); - kgsl_sharedmem_writel(rb->pagetable_desc, - PT_INFO_OFFSET(contextidr), 0); + kgsl_sharedmem_writel(device->scratch, + SCRATCH_RB_OFFSET(rb->id, contextidr), 0); spin_unlock_irqrestore(&rb->preempt_lock, flags); } @@ -1753,6 +1763,7 @@ static inline void adreno_set_dispatch_ops(struct adreno_device *adreno_dev, adreno_dev->dispatch_ops = ops; } +#ifdef CONFIG_QCOM_KGSL_FENCE_TRACE /** * adreno_fence_trace_array_init - Initialize an always on trace array * @device: A GPU device handle @@ -1760,6 +1771,9 @@ static inline void adreno_set_dispatch_ops(struct adreno_device *adreno_dev, * Register an always-on trace array to for fence timeout debugging */ void adreno_fence_trace_array_init(struct kgsl_device *device); +#else +static inline void adreno_fence_trace_array_init(struct kgsl_device *device) {} +#endif /* * adreno_drawobj_set_constraint - Set a power constraint diff --git a/adreno_a5xx.c b/adreno_a5xx.c index 0b48af11fb..83e2b4b389 100644 --- a/adreno_a5xx.c +++ b/adreno_a5xx.c @@ -1850,7 +1850,7 @@ static int a5xx_rb_start(struct adreno_device *adreno_dev) FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); kgsl_sharedmem_writel(device->scratch, - SCRATCH_RPTR_OFFSET(rb->id), 0); + SCRATCH_RB_OFFSET(rb->id, rptr), 0); rb->wptr = 0; rb->_wptr = 0; @@ -1859,7 +1859,7 @@ static int a5xx_rb_start(struct adreno_device *adreno_dev) /* Set up the current ringbuffer */ rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); - addr = SCRATCH_RPTR_GPU_ADDR(device, rb->id); + addr = SCRATCH_RB_GPU_ADDR(device, rb->id, rptr); kgsl_regwrite(device, A5XX_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr)); kgsl_regwrite(device, A5XX_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr)); diff --git a/adreno_a5xx_preempt.c b/adreno_a5xx_preempt.c index bf3126ef74..9008cc320b 100644 --- a/adreno_a5xx_preempt.c +++ b/adreno_a5xx_preempt.c @@ -212,16 +212,11 @@ void a5xx_preemption_trigger(struct adreno_device *adreno_dev) spin_lock_irqsave(&next->preempt_lock, flags); - /* - * Get the pagetable from the pagetable info. - * The pagetable_desc is allocated and mapped at probe time, and - * preemption_desc at init time, so no need to check if - * sharedmem accesses to these memdescs succeed. - */ - kgsl_sharedmem_readq(next->pagetable_desc, &ttbr0, - PT_INFO_OFFSET(ttbr0)); - kgsl_sharedmem_readl(next->pagetable_desc, &contextidr, - PT_INFO_OFFSET(contextidr)); + /* Get the pagetable from the pagetable info. */ + kgsl_sharedmem_readq(device->scratch, &ttbr0, + SCRATCH_RB_OFFSET(next->id, ttbr0)); + kgsl_sharedmem_readl(device->scratch, &contextidr, + SCRATCH_RB_OFFSET(next->id, contextidr)); kgsl_sharedmem_writel(next->preemption_desc, PREEMPT_RECORD(wptr), next->wptr); @@ -458,7 +453,7 @@ void a5xx_preemption_start(struct adreno_device *adreno_dev) kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(wptr), 0); - adreno_ringbuffer_set_pagetable(rb, + adreno_ringbuffer_set_pagetable(device, rb, device->mmu.defaultpagetable); } @@ -490,8 +485,8 @@ static int a5xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev, kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(wptr), 0); kgsl_sharedmem_writeq(rb->preemption_desc, - PREEMPT_RECORD(rptr_addr), SCRATCH_RPTR_GPU_ADDR(device, - rb->id)); + PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR(device, + rb->id, rptr)); kgsl_sharedmem_writeq(rb->preemption_desc, PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr); kgsl_sharedmem_writeq(rb->preemption_desc, diff --git a/adreno_a5xx_ringbuffer.c b/adreno_a5xx_ringbuffer.c index fb973e9412..1862fb2da1 100644 --- a/adreno_a5xx_ringbuffer.c +++ b/adreno_a5xx_ringbuffer.c @@ -32,10 +32,10 @@ static int a5xx_rb_pagetable_switch(struct kgsl_device *device, cmds[7] = 1; cmds[8] = cp_type7_packet(CP_MEM_WRITE, 5); - cmds[9] = lower_32_bits(rb->pagetable_desc->gpuaddr + - PT_INFO_OFFSET(ttbr0)); - cmds[10] = upper_32_bits(rb->pagetable_desc->gpuaddr + - PT_INFO_OFFSET(ttbr0)); + cmds[9] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, + rb->id, ttbr0)); + cmds[10] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, + rb->id, ttbr0)); cmds[11] = lower_32_bits(ttbr0); cmds[12] = upper_32_bits(ttbr0); cmds[13] = id; @@ -75,8 +75,10 @@ int a5xx_ringbuffer_submit(struct adreno_ringbuffer *rb, return PTR_ERR(cmds); cmds[0] = cp_type7_packet(CP_WHERE_AM_I, 2); - cmds[1] = lower_32_bits(SCRATCH_RPTR_GPU_ADDR(device, rb->id)); - cmds[2] = upper_32_bits(SCRATCH_RPTR_GPU_ADDR(device, rb->id)); + cmds[1] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id, + rptr)); + cmds[2] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id, + rptr)); } spin_lock_irqsave(&rb->preempt_lock, flags); diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 44e69b9d94..7bfa013467 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -50,6 +50,7 @@ static u32 a6xx_pwrup_reglist[] = { A6XX_SP_NC_MODE_CNTL, A6XX_PC_DBG_ECO_CNTL, A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, + A6XX_UCHE_GBIF_GX_CONFIG, }; /* IFPC only static powerup restore list */ @@ -92,9 +93,28 @@ static u32 a6xx_ifpc_pwrup_reglist[] = { A6XX_CP_AHB_CNTL, }; +/* Applicable to a620, a635, a650 and a660 */ +static u32 a650_ifpc_pwrup_reglist[] = { + A6XX_CP_PROTECT_REG+32, + A6XX_CP_PROTECT_REG+33, + A6XX_CP_PROTECT_REG+34, + A6XX_CP_PROTECT_REG+35, + A6XX_CP_PROTECT_REG+36, + A6XX_CP_PROTECT_REG+37, + A6XX_CP_PROTECT_REG+38, + A6XX_CP_PROTECT_REG+39, + A6XX_CP_PROTECT_REG+40, + A6XX_CP_PROTECT_REG+41, + A6XX_CP_PROTECT_REG+42, + A6XX_CP_PROTECT_REG+43, + A6XX_CP_PROTECT_REG+44, + A6XX_CP_PROTECT_REG+45, + A6XX_CP_PROTECT_REG+46, + A6XX_CP_PROTECT_REG+47, +}; + /* Applicable to a620, a635, a650 and a660 */ static u32 a650_pwrup_reglist[] = { - A6XX_CP_PROTECT_REG + 47, /* Programmed for infinite span */ A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2, @@ -170,7 +190,8 @@ int a6xx_init(struct adreno_device *adreno_dev) /* If the memory type is DDR 4, override the existing configuration */ if (of_fdt_get_ddrtype() == 0x7) { if (adreno_is_a660_shima(adreno_dev) || - adreno_is_a635(adreno_dev)) + adreno_is_a635(adreno_dev) || + adreno_is_a662(adreno_dev)) adreno_dev->highest_bank_bit = 14; else if ((adreno_is_a650(adreno_dev) || adreno_is_a660(adreno_dev))) @@ -258,6 +279,9 @@ __get_gmu_ao_cgc_mode_cntl(struct adreno_device *adreno_dev) return 0x00000022; else if (adreno_is_a615_family(adreno_dev)) return 0x00000222; + /* a662 should be checked before a660 */ + else if (adreno_is_a662(adreno_dev)) + return 0x00020200; else if (adreno_is_a660(adreno_dev)) return 0x00020000; else @@ -298,7 +322,12 @@ static unsigned int __get_gmu_wfi_config(struct adreno_device *adreno_dev) void a6xx_cx_regulator_disable_wait(struct regulator *reg, struct kgsl_device *device, u32 timeout) { - if (!adreno_regulator_disable_poll(device, reg, A6XX_GPU_CC_CX_GDSCR, timeout)) { + u32 offset; + + offset = adreno_is_a662(ADRENO_DEVICE(device)) ? + A662_GPU_CC_CX_GDSCR : A6XX_GPU_CC_CX_GDSCR; + + if (!adreno_regulator_disable_poll(device, reg, offset, timeout)) { dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); /* Dump the cx regulator consumer list */ qcom_clk_dump(NULL, reg, false); @@ -361,6 +390,10 @@ static void a6xx_hwcg_set(struct adreno_device *adreno_dev, bool on) kgsl_regwrite(device, a6xx_core->hwcg[i].offset, on ? a6xx_core->hwcg[i].val : 0); + /* GBIF L2 CGC control is not part of the UCHE */ + kgsl_regrmw(device, A6XX_UCHE_GBIF_GX_CONFIG, 0x70000, + FIELD_PREP(GENMASK(18, 16), on ? 2 : 0)); + /* * Enable SP clock after programming HWCG registers. * A612 and A610 GPU is not having the GX power domain. @@ -388,14 +421,21 @@ struct a6xx_reglist_list { static void a6xx_patch_pwrup_reglist(struct adreno_device *adreno_dev) { - struct a6xx_reglist_list reglist[3]; + struct a6xx_reglist_list reglist[4]; void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; int items = 0, i, j; u32 *dest = ptr + sizeof(*lock); + u16 list_offset = 0; /* Static IFPC-only registers */ - reglist[items++] = REGLIST(a6xx_ifpc_pwrup_reglist); + reglist[items] = REGLIST(a6xx_ifpc_pwrup_reglist); + list_offset += reglist[items++].count * 2; + + if (adreno_is_a650_family(adreno_dev)) { + reglist[items] = REGLIST(a650_ifpc_pwrup_reglist); + list_offset += reglist[items++].count * 2; + } /* Static IFPC + preemption registers */ reglist[items++] = REGLIST(a6xx_pwrup_reglist); @@ -448,7 +488,7 @@ static void a6xx_patch_pwrup_reglist(struct adreno_device *adreno_dev) * all the lists and list_offset should be specified as the size in * dwords of the first entry in the list. */ - lock->list_offset = reglist[0].count * 2; + lock->list_offset = list_offset; } @@ -958,7 +998,7 @@ static int a6xx_send_cp_init(struct adreno_device *adreno_dev, "CP initialization failed to idle\n"); kgsl_sharedmem_writel(device->scratch, - SCRATCH_RPTR_OFFSET(rb->id), 0); + SCRATCH_RB_OFFSET(rb->id, rptr), 0); rb->wptr = 0; rb->_wptr = 0; } @@ -982,11 +1022,11 @@ static int _preemption_init(struct adreno_device *adreno_dev, cmds += cp_protected_mode(adreno_dev, cmds, 0); *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 6); - *cmds++ = 1; + *cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR; cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr); - *cmds++ = 2; + *cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR; cmds += cp_gpuaddr(adreno_dev, cmds, rb->secure_preemption_desc->gpuaddr); @@ -1052,7 +1092,7 @@ int a6xx_rb_start(struct adreno_device *adreno_dev) FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); kgsl_sharedmem_writel(device->scratch, - SCRATCH_RPTR_OFFSET(rb->id), 0); + SCRATCH_RB_OFFSET(rb->id, rptr), 0); rb->wptr = 0; rb->_wptr = 0; @@ -1063,7 +1103,7 @@ int a6xx_rb_start(struct adreno_device *adreno_dev) /* Set up the current ringbuffer */ rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); - addr = SCRATCH_RPTR_GPU_ADDR(device, rb->id); + addr = SCRATCH_RB_GPU_ADDR(device, rb->id, rptr); kgsl_regwrite(device, A6XX_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr)); kgsl_regwrite(device, A6XX_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr)); diff --git a/adreno_a6xx.h b/adreno_a6xx.h index c70e9156ca..66454750dd 100644 --- a/adreno_a6xx.h +++ b/adreno_a6xx.h @@ -162,7 +162,7 @@ struct a6xx_cp_smmu_info { (ilog2(KGSL_RB_DWORDS >> 1) & 0x3F)) /* Size of the CP_INIT pm4 stream in dwords */ -#define A6XX_CP_INIT_DWORDS 12 +#define A6XX_CP_INIT_DWORDS 11 #define A6XX_INT_MASK \ ((1 << A6XX_INT_CP_AHB_ERROR) | \ diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index d18bee9aa2..2f1f25d194 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -544,7 +544,9 @@ int a6xx_rscc_wakeup_sequence(struct adreno_device *adreno_dev) if (!test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags)) return 0; /* A660 has a replacement register */ - if (adreno_is_a660(ADRENO_DEVICE(device))) + if (adreno_is_a662(ADRENO_DEVICE(device))) + gmu_core_regread(device, A662_GPU_CC_GX_DOMAIN_MISC3, &val); + else if (adreno_is_a660(ADRENO_DEVICE(device))) gmu_core_regread(device, A6XX_GPU_CC_GX_DOMAIN_MISC3, &val); else gmu_core_regread(device, A6XX_GPU_CC_GX_DOMAIN_MISC, &val); @@ -784,6 +786,9 @@ int a6xx_gmu_oob_set(struct kgsl_device *device, int ret = 0; int set, check; + if (req == oob_perfcntr && gmu->num_oob_perfcntr++) + return 0; + if (adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev)) { set = BIT(req + 16); check = BIT(req + 24); @@ -807,6 +812,8 @@ int a6xx_gmu_oob_set(struct kgsl_device *device, if (gmu_core_timed_poll_check(device, A6XX_GMU_GMU2HOST_INTR_INFO, check, GPU_START_TIMEOUT, check)) { + if (req == oob_perfcntr) + gmu->num_oob_perfcntr--; gmu_core_fault_snapshot(device); ret = -ETIMEDOUT; WARN(1, "OOB request %s timed out\n", oob_to_str(req)); @@ -826,6 +833,9 @@ void a6xx_gmu_oob_clear(struct kgsl_device *device, struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); int clear; + if (req == oob_perfcntr && --gmu->num_oob_perfcntr) + return; + if (adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev)) { clear = BIT(req + 24); } else { @@ -2388,7 +2398,7 @@ static void a6xx_free_gmu_globals(struct a6xx_gmu_device *gmu) { int i; - for (i = 0; i < gmu->global_entries; i++) { + for (i = 0; i < gmu->global_entries && i < ARRAY_SIZE(gmu->gmu_globals); i++) { struct kgsl_memdesc *md = &gmu->gmu_globals[i]; if (!md->gmuaddr) @@ -2835,8 +2845,6 @@ static int a6xx_gpu_boot(struct adreno_device *adreno_dev) adreno_set_active_ctxs_null(adreno_dev); - adreno_ringbuffer_set_global(adreno_dev, 0); - ret = kgsl_mmu_start(device); if (ret) goto err; diff --git a/adreno_a6xx_gmu.h b/adreno_a6xx_gmu.h index 12160ede51..6c2ddeda4b 100644 --- a/adreno_a6xx_gmu.h +++ b/adreno_a6xx_gmu.h @@ -90,6 +90,8 @@ struct a6xx_gmu_device { * which GMU can run at 500 Mhz. */ u32 perf_ddr_bw; + /** @num_oob_perfcntr: Number of active oob_perfcntr requests */ + u32 num_oob_perfcntr; }; /* Helper function to get to a6xx gmu device from adreno device */ diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 01d79cd20a..d1a230d8a0 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -940,6 +940,8 @@ static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) return; } + a6xx_rdpm_cx_freq_update(gmu, freq / 1000); + trace_kgsl_gmu_pwrlevel(freq, prev_freq); prev_freq = freq; diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 71f326e939..0a6b6d3a83 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -1364,7 +1364,7 @@ int a6xx_hwsched_submit_cmdobj(struct adreno_device *adreno_dev, if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE)) return -EMSGSIZE; - cmd = kvmalloc(cmd_sizebytes, GFP_KERNEL); + cmd = kmalloc(cmd_sizebytes, GFP_KERNEL); if (cmd == NULL) return -ENOMEM; @@ -1417,7 +1417,7 @@ skipib: adreno_profile_submit_time(&time); free: - kvfree(cmd); + kfree(cmd); return ret; } diff --git a/adreno_a6xx_preempt.c b/adreno_a6xx_preempt.c index de8bd8c014..1d5596dc87 100644 --- a/adreno_a6xx_preempt.c +++ b/adreno_a6xx_preempt.c @@ -14,14 +14,6 @@ #define PREEMPT_SMMU_RECORD(_field) \ offsetof(struct a6xx_cp_smmu_info, _field) -enum { - SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO = 0, - SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_NON_SECURE_SAVE_ADDR, - SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR, - SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR, - SET_PSEUDO_REGISTER_SAVE_REGISTER_COUNTER, -}; - static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer, bool atomic) { @@ -284,16 +276,11 @@ void a6xx_preemption_trigger(struct adreno_device *adreno_dev, bool atomic) spin_lock_irqsave(&next->preempt_lock, flags); - /* - * Get the pagetable from the pagetable info. - * The pagetable_desc is allocated and mapped at probe time, and - * preemption_desc at init time, so no need to check if - * sharedmem accesses to these memdescs succeed. - */ - kgsl_sharedmem_readq(next->pagetable_desc, &ttbr0, - PT_INFO_OFFSET(ttbr0)); - kgsl_sharedmem_readl(next->pagetable_desc, &contextidr, - PT_INFO_OFFSET(contextidr)); + /* Get the pagetable from the pagetable info. */ + kgsl_sharedmem_readq(device->scratch, &ttbr0, + SCRATCH_RB_OFFSET(next->id, ttbr0)); + kgsl_sharedmem_readl(device->scratch, &contextidr, + SCRATCH_RB_OFFSET(next->id, contextidr)); kgsl_sharedmem_writel(next->preemption_desc, PREEMPT_RECORD(wptr), next->wptr); @@ -504,34 +491,26 @@ u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, u32 *cmds) { unsigned int *cmds_orig = cmds; - uint64_t gpuaddr = 0; if (!adreno_is_preemption_enabled(adreno_dev)) return 0; - if (drawctxt) { - gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; - *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 15); - } else { - *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); - } + if (test_and_set_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags)) + goto done; + + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); /* NULL SMMU_INFO buffer - we track in KMD */ - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO; + *cmds++ = SET_PSEUDO_SMMU_INFO; cmds += cp_gpuaddr(adreno_dev, cmds, 0x0); - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_NON_SECURE_SAVE_ADDR; + *cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR; cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr); - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR; + *cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR; cmds += cp_gpuaddr(adreno_dev, cmds, rb->secure_preemption_desc->gpuaddr); - if (drawctxt) { - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR; - cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); - } - /* * There is no need to specify this address when we are about to * trigger preemption. This is because CP internally stores this @@ -539,14 +518,16 @@ u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, * the context record and thus knows from where to restore * the saved perfcounters for the new ringbuffer. */ - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_COUNTER; + *cmds++ = SET_PSEUDO_COUNTER; cmds += cp_gpuaddr(adreno_dev, cmds, rb->perfcounter_save_restore_desc->gpuaddr); +done: if (drawctxt) { struct adreno_ringbuffer *rb = drawctxt->rb; uint64_t dest = adreno_dev->preempt.scratch->gpuaddr + (rb->id * sizeof(u64)); + u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2); cmds += cp_gpuaddr(adreno_dev, cmds, dest); @@ -624,8 +605,10 @@ void a6xx_preemption_start(struct adreno_device *adreno_dev) kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(wptr), 0); - adreno_ringbuffer_set_pagetable(rb, + adreno_ringbuffer_set_pagetable(device, rb, device->mmu.defaultpagetable); + + clear_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags); } } @@ -642,8 +625,8 @@ static void reset_rb_preempt_record(struct adreno_device *adreno_dev, kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(cntl), cp_rb_cntl); kgsl_sharedmem_writeq(rb->preemption_desc, - PREEMPT_RECORD(rptr_addr), SCRATCH_RPTR_GPU_ADDR( - KGSL_DEVICE(adreno_dev), rb->id)); + PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR( + KGSL_DEVICE(adreno_dev), rb->id, rptr)); kgsl_sharedmem_writeq(rb->preemption_desc, PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr); } diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 746f686591..a92f37a3da 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -145,6 +145,9 @@ static int a6xx_rgmu_oob_set(struct kgsl_device *device, struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(ADRENO_DEVICE(device)); int ret, set, check; + if (req == oob_perfcntr && rgmu->num_oob_perfcntr++) + return 0; + set = BIT(req + 16); check = BIT(req + 16); @@ -159,6 +162,8 @@ static int a6xx_rgmu_oob_set(struct kgsl_device *device, if (ret) { unsigned int status; + if (req == oob_perfcntr) + rgmu->num_oob_perfcntr--; gmu_core_regread(device, A6XX_RGMU_CX_PCC_DEBUG, &status); dev_err(&rgmu->pdev->dev, "Timed out while setting OOB req:%s status:0x%x\n", @@ -180,6 +185,11 @@ static int a6xx_rgmu_oob_set(struct kgsl_device *device, static void a6xx_rgmu_oob_clear(struct kgsl_device *device, enum oob_request req) { + struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(ADRENO_DEVICE(device)); + + if (req == oob_perfcntr && --rgmu->num_oob_perfcntr) + return; + gmu_core_regwrite(device, A6XX_GMU_HOST2GMU_INTR_SET, BIT(req + 24)); trace_kgsl_gmu_oob_clear(BIT(req + 24)); } @@ -744,8 +754,6 @@ static int a6xx_gpu_boot(struct adreno_device *adreno_dev) adreno_set_active_ctxs_null(adreno_dev); - adreno_ringbuffer_set_global(adreno_dev, 0); - ret = kgsl_mmu_start(device); if (ret) goto err; diff --git a/adreno_a6xx_rgmu.h b/adreno_a6xx_rgmu.h index 89ce5199fb..1ac472c58e 100644 --- a/adreno_a6xx_rgmu.h +++ b/adreno_a6xx_rgmu.h @@ -58,6 +58,8 @@ struct a6xx_rgmu_device { unsigned int fault_count; /** @flags: rgmu internal flags */ unsigned long flags; + /** @num_oob_perfcntr: Number of active oob_perfcntr requests */ + u32 num_oob_perfcntr; }; /** diff --git a/adreno_a6xx_ringbuffer.c b/adreno_a6xx_ringbuffer.c index f4d7acdc52..6599c264dc 100644 --- a/adreno_a6xx_ringbuffer.c +++ b/adreno_a6xx_ringbuffer.c @@ -35,10 +35,10 @@ static int a6xx_rb_pagetable_switch(struct adreno_device *adreno_dev, } cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 5); - cmds[count++] = lower_32_bits(rb->pagetable_desc->gpuaddr + - PT_INFO_OFFSET(ttbr0)); - cmds[count++] = upper_32_bits(rb->pagetable_desc->gpuaddr + - PT_INFO_OFFSET(ttbr0)); + cmds[count++] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, + rb->id, ttbr0)); + cmds[count++] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, + rb->id, ttbr0)); cmds[count++] = lower_32_bits(ttbr0); cmds[count++] = upper_32_bits(ttbr0); cmds[count++] = id; @@ -61,7 +61,7 @@ static int a6xx_rb_context_switch(struct adreno_device *adreno_dev, adreno_drawctxt_get_pagetable(drawctxt); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int count = 0; - u32 cmds[32]; + u32 cmds[36]; if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) count += a6xx_rb_pagetable_switch(adreno_dev, rb, drawctxt, @@ -87,6 +87,14 @@ static int a6xx_rb_context_switch(struct adreno_device *adreno_dev, cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1); cmds[count++] = 0x31; + if (adreno_is_preemption_enabled(adreno_dev)) { + u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; + + cmds[count++] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 3); + cmds[count++] = SET_PSEUDO_NON_PRIV_SAVE_ADDR; + count += cp_gpuaddr(adreno_dev, &cmds[count], gpuaddr); + } + return a6xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, cmds, count, 0, NULL); } @@ -119,8 +127,10 @@ int a6xx_ringbuffer_submit(struct adreno_ringbuffer *rb, return PTR_ERR(cmds); cmds[0] = cp_type7_packet(CP_WHERE_AM_I, 2); - cmds[1] = lower_32_bits(SCRATCH_RPTR_GPU_ADDR(device, rb->id)); - cmds[2] = upper_32_bits(SCRATCH_RPTR_GPU_ADDR(device, rb->id)); + cmds[1] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id, + rptr)); + cmds[2] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id, + rptr)); } spin_lock_irqsave(&rb->preempt_lock, flags); diff --git a/adreno_gen7.c b/adreno_gen7.c index f559e45669..9a1193215d 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -79,6 +79,19 @@ static const u32 gen7_ifpc_pwrup_reglist[] = { GEN7_CP_PROTECT_REG+31, GEN7_CP_PROTECT_REG+32, GEN7_CP_PROTECT_REG+33, + GEN7_CP_PROTECT_REG+34, + GEN7_CP_PROTECT_REG+35, + GEN7_CP_PROTECT_REG+36, + GEN7_CP_PROTECT_REG+37, + GEN7_CP_PROTECT_REG+38, + GEN7_CP_PROTECT_REG+39, + GEN7_CP_PROTECT_REG+40, + GEN7_CP_PROTECT_REG+41, + GEN7_CP_PROTECT_REG+42, + GEN7_CP_PROTECT_REG+43, + GEN7_CP_PROTECT_REG+44, + GEN7_CP_PROTECT_REG+45, + GEN7_CP_PROTECT_REG+46, GEN7_CP_PROTECT_REG+47, GEN7_CP_AHB_CNTL, }; @@ -474,6 +487,36 @@ int gen7_start(struct adreno_device *adreno_dev) return 0; } +/* Offsets into the MX/CX mapped register regions */ +#define GEN7_RDPM_MX_OFFSET 0xf00 +#define GEN7_RDPM_CX_OFFSET 0xf14 + +void gen7_rdpm_mx_freq_update(struct gen7_gmu_device *gmu, u32 freq) +{ + if (gmu->rdpm_mx_virt) { + writel_relaxed(freq/1000, (gmu->rdpm_mx_virt + GEN7_RDPM_MX_OFFSET)); + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + } +} + +void gen7_rdpm_cx_freq_update(struct gen7_gmu_device *gmu, u32 freq) +{ + if (gmu->rdpm_cx_virt) { + writel_relaxed(freq/1000, (gmu->rdpm_cx_virt + GEN7_RDPM_CX_OFFSET)); + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + } +} + void gen7_spin_idle_debug(struct adreno_device *adreno_dev, const char *str) { @@ -549,11 +592,11 @@ static int gen7_post_start(struct adreno_device *adreno_dev) return PTR_ERR(cmds); cmds[0] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 6); - cmds[1] = 1; + cmds[1] = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR; cmds[2] = lower_32_bits(rb->preemption_desc->gpuaddr); cmds[3] = upper_32_bits(rb->preemption_desc->gpuaddr); - cmds[4] = 2; + cmds[4] = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR; cmds[5] = lower_32_bits(rb->secure_preemption_desc->gpuaddr); cmds[6] = upper_32_bits(rb->secure_preemption_desc->gpuaddr); @@ -589,9 +632,9 @@ int gen7_rb_start(struct adreno_device *adreno_dev) FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); kgsl_sharedmem_writel(device->scratch, - SCRATCH_RPTR_OFFSET(rb->id), 0); + SCRATCH_RB_OFFSET(rb->id, rptr), 0); kgsl_sharedmem_writel(device->scratch, - SCRATCH_BV_RPTR_OFFSET(rb->id), 0); + SCRATCH_RB_OFFSET(rb->id, bv_rptr), 0); rb->wptr = 0; rb->_wptr = 0; @@ -603,11 +646,11 @@ int gen7_rb_start(struct adreno_device *adreno_dev) /* Set up the current ringbuffer */ rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); - addr = SCRATCH_RPTR_GPU_ADDR(device, rb->id); + addr = SCRATCH_RB_GPU_ADDR(device, rb->id, rptr); kgsl_regwrite(device, GEN7_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr)); kgsl_regwrite(device, GEN7_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr)); - addr = SCRATCH_BV_RPTR_GPU_ADDR(device, rb->id); + addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_rptr); kgsl_regwrite(device, GEN7_CP_BV_RB_RPTR_ADDR_LO, lower_32_bits(addr)); kgsl_regwrite(device, GEN7_CP_BV_RB_RPTR_ADDR_HI, upper_32_bits(addr)); @@ -815,8 +858,17 @@ static void gen7_err_callback(struct adreno_device *adreno_dev, int bit) dev_crit_ratelimited(dev, "UCHE: Trap interrupt\n"); break; case GEN7_INT_TSBWRITEERROR: - dev_crit_ratelimited(dev, "TSB: Write error interrupt\n"); + { + u32 lo, hi; + + kgsl_regread(device, GEN7_RBBM_SECVID_TSB_STATUS_LO, &lo); + kgsl_regread(device, GEN7_RBBM_SECVID_TSB_STATUS_HI, &hi); + + dev_crit_ratelimited(dev, "TSB: Write error interrupt: Address: 0x%llx MID: %d\n", + FIELD_GET(GENMASK(16, 0), hi) << 32 | lo, + FIELD_GET(GENMASK(31, 23), hi)); break; + } default: dev_crit_ratelimited(dev, "Unknown interrupt %d\n", bit); } diff --git a/adreno_gen7.h b/adreno_gen7.h index 7e4c910231..761dc14430 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -433,4 +433,22 @@ to_gen7_gpudev(const struct adreno_gpudev *gpudev) * Reset the preemption records at the time of hard reset */ void gen7_reset_preempt_records(struct adreno_device *adreno_dev); + +/** + * gen7_rdpm_mx_freq_update - Update the mx frequency + * @gmu: An Adreno GMU handle + * @freq: Frequency in KHz + * + * This function communicates GPU mx frequency(in Mhz) changes to rdpm. + */ +void gen7_rdpm_mx_freq_update(struct gen7_gmu_device *gmu, u32 freq); + +/** + * gen7_rdpm_cx_freq_update - Update the cx frequency + * @gmu: An Adreno GMU handle + * @freq: Frequency in KHz + * + * This function communicates GPU cx frequency(in Mhz) changes to rdpm. + */ +void gen7_rdpm_cx_freq_update(struct gen7_gmu_device *gmu, u32 freq); #endif diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 8fc1726748..867d683037 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -52,6 +52,11 @@ static struct gmu_vma_entry gen7_gmu_vma[] = { .size = SZ_512M, .next_va = 0x60000000, }, + [GMU_NONCACHED_KERNEL_EXTENDED] = { + .start = 0xc0000000, + .size = SZ_512M, + .next_va = 0xc0000000, + }, }; static ssize_t log_stream_enable_store(struct kobject *kobj, @@ -494,6 +499,9 @@ int gen7_gmu_oob_set(struct kgsl_device *device, int ret = 0; int set, check; + if (req == oob_perfcntr && gmu->num_oob_perfcntr++) + return 0; + if (req >= oob_boot_slumber) { dev_err(&gmu->pdev->dev, "Unsupported OOB request %s\n", @@ -508,6 +516,8 @@ int gen7_gmu_oob_set(struct kgsl_device *device, if (gmu_core_timed_poll_check(device, GEN7_GMU_GMU2HOST_INTR_INFO, check, 100, check)) { + if (req == oob_perfcntr) + gmu->num_oob_perfcntr--; gmu_core_fault_snapshot(device); ret = -ETIMEDOUT; WARN(1, "OOB request %s timed out\n", oob_to_str(req)); @@ -527,6 +537,9 @@ void gen7_gmu_oob_clear(struct kgsl_device *device, struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); int clear = BIT(31 - req * 2); + if (req == oob_perfcntr && --gmu->num_oob_perfcntr) + return; + if (req >= oob_boot_slumber) { dev_err(&gmu->pdev->dev, "Unsupported OOB clear %s\n", oob_to_str(req)); @@ -1147,6 +1160,8 @@ void gen7_gmu_suspend(struct adreno_device *adreno_dev) gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + gen7_rdpm_cx_freq_update(gmu, 0); + dev_err(&gmu->pdev->dev, "Suspended GMU\n"); device->state = KGSL_STATE_NONE; @@ -1205,6 +1220,10 @@ static int gen7_gmu_dcvs_set(struct adreno_device *adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } + if (req.freq != INVALID_DCVS_IDX) + gen7_rdpm_mx_freq_update(gmu, + gmu->hfi.dcvs_table.gx_votes[req.freq].freq); + return ret; } @@ -1467,6 +1486,8 @@ int gen7_gmu_enable_clks(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; + gen7_rdpm_cx_freq_update(gmu, GMU_FREQ_MIN / 1000); + ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", GMU_FREQ_MIN); if (ret) { @@ -1576,6 +1597,8 @@ gdsc_off: /* Poll to make sure that the CX is off */ gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + gen7_rdpm_cx_freq_update(gmu, 0); + return ret; } @@ -1640,6 +1663,8 @@ gdsc_off: /* Poll to make sure that the CX is off */ gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + gen7_rdpm_cx_freq_update(gmu, 0); + return ret; } @@ -1708,7 +1733,7 @@ static void gen7_free_gmu_globals(struct gen7_gmu_device *gmu) { int i; - for (i = 0; i < gmu->global_entries; i++) { + for (i = 0; i < gmu->global_entries && i < ARRAY_SIZE(gmu->gmu_globals); i++) { struct kgsl_memdesc *md = &gmu->gmu_globals[i]; if (!md->gmuaddr) @@ -1812,6 +1837,22 @@ static int gen7_gmu_reg_probe(struct adreno_device *adreno_dev) return ret; } +static void gen7_gmu_rdpm_probe(struct gen7_gmu_device *gmu, + struct kgsl_device *device) +{ + struct resource *res; + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "rdpm_cx"); + if (res) + gmu->rdpm_cx_virt = devm_ioremap(&device->pdev->dev, + res->start, resource_size(res)); + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "rdpm_mx"); + if (res) + gmu->rdpm_mx_virt = devm_ioremap(&device->pdev->dev, + res->start, resource_size(res)); +} + static int gen7_gmu_regulators_probe(struct gen7_gmu_device *gmu, struct platform_device *pdev) { @@ -1933,6 +1974,9 @@ int gen7_gmu_probe(struct kgsl_device *device, } } + /* Setup any rdpm register ranges */ + gen7_gmu_rdpm_probe(gmu, device); + /* Set up GMU regulators */ ret = gen7_gmu_regulators_probe(gmu, pdev); if (ret) @@ -2081,6 +2125,8 @@ static int gen7_gmu_power_off(struct adreno_device *adreno_dev) if (ret) goto error; + gen7_rdpm_mx_freq_update(gmu, 0); + /* Now that we are done with GMU and GPU, Clear the GBIF */ ret = gen7_halt_gbif(adreno_dev); if (ret) @@ -2095,6 +2141,8 @@ static int gen7_gmu_power_off(struct adreno_device *adreno_dev) /* Poll to make sure that the CX is off */ gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + gen7_rdpm_cx_freq_update(gmu, 0); + device->state = KGSL_STATE_NONE; return 0; @@ -2135,8 +2183,6 @@ static int gen7_gpu_boot(struct adreno_device *adreno_dev) adreno_set_active_ctxs_null(adreno_dev); - adreno_ringbuffer_set_global(adreno_dev, 0); - ret = kgsl_mmu_start(device); if (ret) goto err; diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 0702793251..5f40bc575f 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -85,6 +85,12 @@ struct gen7_gmu_device { * which GMU can run at 500 Mhz. */ u32 perf_ddr_bw; + /** @rdpm_cx_virt: Pointer where the RDPM CX block is mapped */ + void __iomem *rdpm_cx_virt; + /** @rdpm_mx_virt: Pointer where the RDPM MX block is mapped */ + void __iomem *rdpm_mx_virt; + /** @num_oob_perfcntr: Number of active oob_perfcntr requests */ + u32 num_oob_perfcntr; }; /* Helper function to get to gen7 gmu device from adreno device */ diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 710c696557..18f6a6178c 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -332,6 +332,8 @@ gdsc_off: /* Poll to make sure that the CX is off */ gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + gen7_rdpm_cx_freq_update(gmu, 0); + return ret; } @@ -392,6 +394,8 @@ gdsc_off: /* Poll to make sure that the CX is off */ gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + gen7_rdpm_cx_freq_update(gmu, 0); + return ret; } @@ -464,6 +468,8 @@ static int gen7_hwsched_gmu_power_off(struct adreno_device *adreno_dev) ret = gen7_rscc_sleep_sequence(adreno_dev); + gen7_rdpm_mx_freq_update(gmu, 0); + /* Now that we are done with GMU and GPU, Clear the GBIF */ ret = gen7_halt_gbif(adreno_dev); @@ -476,6 +482,8 @@ static int gen7_hwsched_gmu_power_off(struct adreno_device *adreno_dev) /* Poll to make sure that the CX is off */ gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + gen7_rdpm_cx_freq_update(gmu, 0); + return ret; error: @@ -703,7 +711,7 @@ static int gen7_hwsched_power_off(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - int ret; + int ret = 0; if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) return 0; @@ -713,8 +721,11 @@ static int gen7_hwsched_power_off(struct adreno_device *adreno_dev) /* process any profiling results that are available */ adreno_profile_process_results(ADRENO_DEVICE(device)); - if (!gen7_hw_isidle(adreno_dev)) + if (!gen7_hw_isidle(adreno_dev)) { dev_err(&gmu->pdev->dev, "GPU isn't idle before SLUMBER\n"); + gmu_core_fault_snapshot(device); + goto no_gx_power; + } ret = gen7_gmu_oob_set(device, oob_gpu); if (ret) { @@ -883,6 +894,10 @@ static int gen7_hwsched_dcvs_set(struct adreno_device *adreno_dev, adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); } + if (req.freq != INVALID_DCVS_IDX) + gen7_rdpm_mx_freq_update(gmu, + gmu->hfi.dcvs_table.gx_votes[req.freq].freq); + return ret; } @@ -919,6 +934,8 @@ static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) return; } + gen7_rdpm_cx_freq_update(gmu, freq / 1000); + trace_kgsl_gmu_pwrlevel(freq, prev_freq); prev_freq = freq; diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 0ee8a7b858..c910ab7581 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1152,7 +1152,8 @@ static int hfi_f2h_main(void *arg) while (!kthread_should_stop()) { wait_event_interruptible(hfi->f2h_wq, !kthread_should_stop() && !(is_queue_empty(adreno_dev, HFI_MSG_ID) && - is_queue_empty(adreno_dev, HFI_DBG_ID))); + is_queue_empty(adreno_dev, HFI_DBG_ID)) && + (hfi->irq_mask & HFI_IRQ_MSGQ_MASK)); if (kthread_should_stop()) break; @@ -1201,6 +1202,7 @@ static void add_profile_events(struct adreno_device *adreno_dev, unsigned long time_in_ns; struct kgsl_context *context = drawobj->context; struct submission_info info = {0}; + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; /* * Here we are attempting to create a mapping between the @@ -1233,7 +1235,7 @@ static void add_profile_events(struct adreno_device *adreno_dev, time_in_s = time->ktime; time_in_ns = do_div(time_in_s, 1000000000); - info.inflight = -1; + info.inflight = hwsched->inflight; info.rb_id = adreno_get_level(context->priority); info.gmu_dispatch_queue = context->gmu_dispatch_queue; diff --git a/adreno_gen7_preempt.c b/adreno_gen7_preempt.c index 4c5da6d497..5185f933b4 100644 --- a/adreno_gen7_preempt.c +++ b/adreno_gen7_preempt.c @@ -14,14 +14,6 @@ #define PREEMPT_SMMU_RECORD(_field) \ offsetof(struct gen7_cp_smmu_info, _field) -enum { - SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO = 0, - SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_NON_SECURE_SAVE_ADDR, - SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR, - SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR, - SET_PSEUDO_REGISTER_SAVE_REGISTER_COUNTER, -}; - static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer, bool atomic) { @@ -257,16 +249,11 @@ void gen7_preemption_trigger(struct adreno_device *adreno_dev, bool atomic) spin_lock_irqsave(&next->preempt_lock, flags); - /* - * Get the pagetable from the pagetable info. - * The pagetable_desc is allocated and mapped at probe time, and - * preemption_desc at init time, so no need to check if - * sharedmem accesses to these memdescs succeed. - */ - kgsl_sharedmem_readq(next->pagetable_desc, &ttbr0, - PT_INFO_OFFSET(ttbr0)); - kgsl_sharedmem_readl(next->pagetable_desc, &contextidr, - PT_INFO_OFFSET(contextidr)); + /* Get the pagetable from the pagetable info. */ + kgsl_sharedmem_readq(device->scratch, &ttbr0, + SCRATCH_RB_OFFSET(next->id, ttbr0)); + kgsl_sharedmem_readl(device->scratch, &contextidr, + SCRATCH_RB_OFFSET(next->id, contextidr)); kgsl_sharedmem_writel(next->preemption_desc, PREEMPT_RECORD(wptr), next->wptr); @@ -476,37 +463,29 @@ u32 gen7_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, u32 *cmds) { u32 *cmds_orig = cmds; - u64 gpuaddr = 0; if (!adreno_is_preemption_enabled(adreno_dev)) return 0; + if (test_and_set_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags)) + goto done; + *cmds++ = cp_type7_packet(CP_THREAD_CONTROL, 1); *cmds++ = CP_SET_THREAD_BR; - if (drawctxt) { - gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; - *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 15); - } else { - *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); - } + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); /* NULL SMMU_INFO buffer - we track in KMD */ - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO; + *cmds++ = SET_PSEUDO_SMMU_INFO; cmds += cp_gpuaddr(adreno_dev, cmds, 0x0); - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_NON_SECURE_SAVE_ADDR; + *cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR; cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr); - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR; + *cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR; cmds += cp_gpuaddr(adreno_dev, cmds, rb->secure_preemption_desc->gpuaddr); - if (drawctxt) { - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR; - cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); - } - /* * There is no need to specify this address when we are about to * trigger preemption. This is because CP internally stores this @@ -514,14 +493,16 @@ u32 gen7_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, * the context record and thus knows from where to restore * the saved perfcounters for the new ringbuffer. */ - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_COUNTER; + *cmds++ = SET_PSEUDO_COUNTER; cmds += cp_gpuaddr(adreno_dev, cmds, rb->perfcounter_save_restore_desc->gpuaddr); +done: if (drawctxt) { struct adreno_ringbuffer *rb = drawctxt->rb; u64 dest = adreno_dev->preempt.scratch->gpuaddr + (rb->id * sizeof(u64)); + u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2); cmds += cp_gpuaddr(adreno_dev, cmds, dest); @@ -601,8 +582,10 @@ void gen7_preemption_start(struct adreno_device *adreno_dev) kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(wptr), 0); - adreno_ringbuffer_set_pagetable(rb, + adreno_ringbuffer_set_pagetable(device, rb, device->mmu.defaultpagetable); + + clear_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags); } } @@ -616,13 +599,13 @@ static void reset_rb_preempt_record(struct adreno_device *adreno_dev, kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(cntl), GEN7_CP_RB_CNTL_DEFAULT); kgsl_sharedmem_writeq(rb->preemption_desc, - PREEMPT_RECORD(rptr_addr), SCRATCH_RPTR_GPU_ADDR( - KGSL_DEVICE(adreno_dev), rb->id)); + PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR( + KGSL_DEVICE(adreno_dev), rb->id, rptr)); kgsl_sharedmem_writeq(rb->preemption_desc, PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr); kgsl_sharedmem_writeq(rb->preemption_desc, - PREEMPT_RECORD(bv_rptr_addr), SCRATCH_BV_RPTR_GPU_ADDR( - KGSL_DEVICE(adreno_dev), rb->id)); + PREEMPT_RECORD(bv_rptr_addr), SCRATCH_RB_GPU_ADDR( + KGSL_DEVICE(adreno_dev), rb->id, bv_rptr)); } void gen7_reset_preempt_records(struct adreno_device *adreno_dev) diff --git a/adreno_gen7_ringbuffer.c b/adreno_gen7_ringbuffer.c index 47277fc51f..8f7305acaf 100644 --- a/adreno_gen7_ringbuffer.c +++ b/adreno_gen7_ringbuffer.c @@ -37,10 +37,10 @@ static int gen7_rb_pagetable_switch(struct adreno_device *adreno_dev, cmds[count++] = id; cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 5); - cmds[count++] = lower_32_bits(rb->pagetable_desc->gpuaddr + - PT_INFO_OFFSET(ttbr0)); - cmds[count++] = upper_32_bits(rb->pagetable_desc->gpuaddr + - PT_INFO_OFFSET(ttbr0)); + cmds[count++] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, + rb->id, ttbr0)); + cmds[count++] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, + rb->id, ttbr0)); cmds[count++] = lower_32_bits(ttbr0); cmds[count++] = upper_32_bits(ttbr0); cmds[count++] = id; @@ -64,15 +64,15 @@ static int gen7_rb_context_switch(struct adreno_device *adreno_dev, adreno_drawctxt_get_pagetable(drawctxt); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int count = 0; - u32 cmds[42]; + u32 cmds[46]; /* Sync both threads */ cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BOTH; /* Reset context state */ cmds[count++] = cp_type7_packet(CP_RESET_CONTEXT_STATE, 1); - cmds[count++] = CP_CLEAR_BV_BR_COUNTER | CP_CLEAR_RESOURCE_TABLE | - CP_CLEAR_ON_CHIP_TS; + cmds[count++] = CP_RESET_GLOBAL_LOCAL_TS | CP_CLEAR_BV_BR_COUNTER | + CP_CLEAR_RESOURCE_TABLE | CP_CLEAR_ON_CHIP_TS; /* * Enable/disable concurrent binning for pagetable switch and * set the thread to BR since only BR can execute the pagetable @@ -87,7 +87,7 @@ static int gen7_rb_context_switch(struct adreno_device *adreno_dev, drawctxt, pagetable, &cmds[count]); else { struct kgsl_iommu *iommu = KGSL_IOMMU(device); - u32 id = drawctxt ? drawctxt->base.id : 0; + u32 offset = GEN7_SMMU_BASE + (iommu->cb0_offset >> 2) + 0x0d; /* @@ -96,7 +96,7 @@ static int gen7_rb_context_switch(struct adreno_device *adreno_dev, * need any special sequence or locking to change it */ cmds[count++] = cp_type4_packet(offset, 1); - cmds[count++] = id; + cmds[count++] = drawctxt->base.id; } cmds[count++] = cp_type7_packet(CP_NOP, 1); @@ -119,6 +119,15 @@ static int gen7_rb_context_switch(struct adreno_device *adreno_dev, cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1); cmds[count++] = 0x31; + if (adreno_is_preemption_enabled(adreno_dev)) { + u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; + + cmds[count++] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 3); + cmds[count++] = SET_PSEUDO_NON_PRIV_SAVE_ADDR; + cmds[count++] = lower_32_bits(gpuaddr); + cmds[count++] = upper_32_bits(gpuaddr); + } + return gen7_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, cmds, count, 0, NULL); } @@ -305,6 +314,37 @@ int gen7_ringbuffer_addcmds(struct adreno_device *adreno_dev, if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &device->mmu.pfpolicy)) cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0); + if (is_concurrent_binning(drawctxt)) { + u64 addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_ts); + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BV; + + /* + * Make sure the timestamp is committed once BV pipe is + * completely done with this submission. + */ + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_CLEAN | BIT(27); + cmds[index++] = lower_32_bits(addr); + cmds[index++] = upper_32_bits(addr); + cmds[index++] = rb->timestamp; + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BR; + + /* + * This makes sure that BR doesn't race ahead and commit + * timestamp to memstore while BV is still processing + * this submission. + */ + cmds[index++] = cp_type7_packet(CP_WAIT_TIMESTAMP, 4); + cmds[index++] = 0; + cmds[index++] = lower_32_bits(addr); + cmds[index++] = upper_32_bits(addr); + cmds[index++] = rb->timestamp; + } + /* * If this is an internal command, just write the ringbuffer timestamp, * otherwise, write both @@ -431,7 +471,7 @@ static int gen7_drawctxt_switch(struct adreno_device *adreno_dev, ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \ field)) -#define GEN7_COMMAND_DWORDS 38 +#define GEN7_COMMAND_DWORDS 52 int gen7_ringbuffer_submitcmd(struct adreno_device *adreno_dev, struct kgsl_drawobj_cmd *cmdobj, u32 flags, diff --git a/adreno_gen7_rpmh.c b/adreno_gen7_rpmh.c index 37e3fcdaf1..aa7841ad93 100644 --- a/adreno_gen7_rpmh.c +++ b/adreno_gen7_rpmh.c @@ -311,8 +311,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, /* Add the zero powerlevel for the perf table */ table->gpu_level_num = device->pwrctrl.num_pwrlevels + 1; - if (table->gpu_level_num > pri_rail->num || - table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) { + if (table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) { dev_err(&gmu->pdev->dev, "Defined more GPU DCVS levels than RPMh can support\n"); return -ERANGE; @@ -462,8 +461,14 @@ int gen7_build_rpmh_tables(struct adreno_device *adreno_dev) int ret; ret = build_dcvs_table(adreno_dev); - if (ret) + if (ret) { + dev_err(adreno_dev->dev.dev, "Failed to build dcvs table\n"); return ret; + } - return build_bw_table(adreno_dev); + ret = build_bw_table(adreno_dev); + if (ret) + dev_err(adreno_dev->dev.dev, "Failed to build bw table\n"); + + return ret; } diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 71a2d37c22..4f40872cce 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -1109,7 +1109,7 @@ void gen7_snapshot(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_ringbuffer *rb; unsigned int i; - u32 hi, lo, cgc, cgc1, cgc2; + u32 hi, lo, cgc = 0, cgc1 = 0, cgc2 = 0; /* * Dump debugbus data here to capture it for both diff --git a/adreno_hfi.h b/adreno_hfi.h index 6b171de136..ead4fa0918 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -422,7 +422,7 @@ struct hfi_dcvstable_cmd { } __packed; #define MAX_ACD_STRIDE 2 -#define MAX_ACD_NUM_LEVELS 6 +#define MAX_ACD_NUM_LEVELS KGSL_MAX_PWRLEVELS /* H2F */ struct hfi_acd_table_cmd { @@ -597,23 +597,6 @@ struct hfi_issue_ib { u32 size; } __packed; -/* H2F */ -struct hfi_issue_cmd_cmd { - u32 hdr; - u32 ctxt_id; - u32 flags; - u32 ts; - u32 count; - struct hfi_issue_ib *ibs[]; -} __packed; - -/* Internal */ -struct hfi_issue_cmd_req { - u32 queue; - u32 ctxt_id; - struct hfi_issue_cmd_cmd cmd; -} __packed; - /* H2F */ /* The length of *buf will be embedded in the hdr */ struct hfi_issue_cmd_raw_cmd { @@ -736,7 +719,7 @@ static inline int _CMD_MSG_HDR(u32 *hdr, int id, size_t size) /* Maximum number of IBs in a submission */ #define HWSCHED_MAX_DISPATCH_NUMIBS \ - ((HFI_MAX_MSG_SIZE - offsetof(struct hfi_issue_cmd_cmd, ibs)) \ + ((HFI_MAX_MSG_SIZE - sizeof(struct hfi_submit_cmd)) \ / sizeof(struct hfi_issue_ib)) /** diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 84a21aec8c..13db07a065 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1588,6 +1588,14 @@ void adreno_hwsched_parse_fault_cmdobj(struct adreno_device *adreno_dev, struct adreno_hwsched *hwsched = &adreno_dev->hwsched; struct cmd_list_obj *obj, *tmp; + /* + * During IB parse, vmalloc is called which can sleep and + * should not be called from atomic context. Since IBs are not + * dumped during atomic snapshot, there is no need to parse it. + */ + if (adreno_dev->dev.snapshot_atomic) + return; + list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) { struct kgsl_drawobj_cmd *cmdobj = obj->cmdobj; diff --git a/adreno_pm4types.h b/adreno_pm4types.h index 1d5ab43fa9..426cbce027 100644 --- a/adreno_pm4types.h +++ b/adreno_pm4types.h @@ -164,13 +164,17 @@ /* Controls which threads execute the PM4 commands the follow this packet */ #define CP_THREAD_CONTROL 0x17 +#define CP_WAIT_TIMESTAMP 0x14 + #define CP_SET_THREAD_BR FIELD_PREP(GENMASK(1, 0), 1) +#define CP_SET_THREAD_BV FIELD_PREP(GENMASK(1, 0), 2) #define CP_SET_THREAD_BOTH FIELD_PREP(GENMASK(1, 0), 3) #define CP_SYNC_THREADS BIT(31) #define CP_CONCURRENT_BIN_DISABLE BIT(27) #define CP_RESET_CONTEXT_STATE 0x1F +#define CP_RESET_GLOBAL_LOCAL_TS BIT(3) #define CP_CLEAR_BV_BR_COUNTER BIT(2) #define CP_CLEAR_RESOURCE_TABLE BIT(1) #define CP_CLEAR_ON_CHIP_TS BIT(0) diff --git a/adreno_ringbuffer.c b/adreno_ringbuffer.c index 5721bb4fb1..3d8af5a086 100644 --- a/adreno_ringbuffer.c +++ b/adreno_ringbuffer.c @@ -127,15 +127,6 @@ int adreno_ringbuffer_setup(struct adreno_device *adreno_dev, unsigned int priv = 0; int ret; - /* - * Allocate mem for storing RB pagetables and commands to - * switch pagetable - */ - ret = adreno_allocate_global(device, &rb->pagetable_desc, PAGE_SIZE, - SZ_16K, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc"); - if (ret) - return ret; - /* allocate a chunk of memory to create user profiling IB1s */ adreno_allocate_global(device, &rb->profile_desc, PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "profile_desc"); diff --git a/adreno_ringbuffer.h b/adreno_ringbuffer.h index 447586e72d..2dea03eebe 100644 --- a/adreno_ringbuffer.h +++ b/adreno_ringbuffer.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. */ #ifndef __ADRENO_RINGBUFFER_H #define __ADRENO_RINGBUFFER_H @@ -67,30 +67,10 @@ struct adreno_submit_time { }; /** - * struct adreno_ringbuffer_pagetable_info - Contains fields used during a - * pagetable switch. - * @current_global_ptname: The current pagetable id being used by the GPU. - * Only the ringbuffers[0] current_global_ptname is used to keep track of - * the current pagetable id - * @current_rb_ptname: The current pagetable active on the given RB - * @incoming_ptname: Contains the incoming pagetable we are switching to. After - * switching of pagetable this value equals current_rb_ptname. - * @switch_pt_enable: Flag used during pagetable switch to check if pt - * switch can be skipped - * @ttbr0: value to program into TTBR0 during pagetable switch. - * @contextidr: value to program into CONTEXTIDR during pagetable switch. + * This is to keep track whether the SET_PSEUDO_REGISTER packet needs to be submitted + * or not */ -struct adreno_ringbuffer_pagetable_info { - int current_global_ptname; - int current_rb_ptname; - int incoming_ptname; - int switch_pt_enable; - uint64_t ttbr0; - unsigned int contextidr; -}; - -#define PT_INFO_OFFSET(_field) \ - offsetof(struct adreno_ringbuffer_pagetable_info, _field) +#define ADRENO_RB_SET_PSEUDO_DONE 0 /** * struct adreno_ringbuffer - Definition for an adreno ringbuffer object @@ -112,7 +92,6 @@ struct adreno_ringbuffer_pagetable_info { * preemption info written/read by CP for secure contexts * @perfcounter_save_restore_desc: Used by CP to save/restore the perfcounter * values across preemption - * @pagetable_desc: Memory to hold information about the pagetables being used * and the commands to switch pagetable on the RB * @dispatch_q: The dispatcher side queue for this ringbuffer * @ts_expire_waitq: Wait queue to wait for rb timestamp to expire @@ -126,7 +105,7 @@ struct adreno_ringbuffer_pagetable_info { * hardware */ struct adreno_ringbuffer { - uint32_t flags; + unsigned long flags; struct kgsl_memdesc *buffer_desc; unsigned int _wptr; unsigned int wptr; @@ -139,7 +118,6 @@ struct adreno_ringbuffer { struct kgsl_memdesc *preemption_desc; struct kgsl_memdesc *secure_preemption_desc; struct kgsl_memdesc *perfcounter_save_restore_desc; - struct kgsl_memdesc *pagetable_desc; struct adreno_dispatcher_drawqueue dispatch_q; wait_queue_head_t ts_expire_waitq; unsigned int wptr_preempt_end; diff --git a/adreno_snapshot.c b/adreno_snapshot.c index ec6defa94f..d85307d966 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -276,6 +276,14 @@ static void snapshot_rb_ibs(struct kgsl_device *device, int index, i; int parse_ibs = 0, ib_parse_start; + /* + * During IB parse, vmalloc is called which can sleep and + * should not be called from atomic context. Since IBs are not + * dumped during atomic snapshot, there is no need to parse it. + */ + if (device->snapshot_atomic) + return; + /* * Figure out the window of ringbuffer data to dump. First we need to * find where the last processed IB ws submitted. Start walking back diff --git a/adreno_trace.c b/adreno_trace.c index 84577f2441..4c27d2c451 100644 --- a/adreno_trace.c +++ b/adreno_trace.c @@ -10,6 +10,7 @@ #define CREATE_TRACE_POINTS #include "adreno_trace.h" +#ifdef CONFIG_QCOM_KGSL_FENCE_TRACE static const char * const kgsl_fence_trace_events[] = { "adreno_cmdbatch_submitted", "adreno_cmdbatch_retired", @@ -34,3 +35,4 @@ void adreno_fence_trace_array_init(struct kgsl_device *device) "kgsl", kgsl_fence_trace_events[i], true); } +#endif diff --git a/gen7_reg.h b/gen7_reg.h index e530a02854..da03e710ff 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -556,6 +556,8 @@ #define GEN7_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0xf801 #define GEN7_RBBM_SECVID_TSB_TRUSTED_SIZE 0xf802 #define GEN7_RBBM_SECVID_TSB_CNTL 0xf803 +#define GEN7_RBBM_SECVID_TSB_STATUS_LO 0xfc00 +#define GEN7_RBBM_SECVID_TSB_STATUS_HI 0xfc01 #define GEN7_RBBM_GBIF_CLIENT_QOS_CNTL 0x00011 #define GEN7_RBBM_GBIF_HALT 0x00016 @@ -1035,7 +1037,7 @@ #define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_L 0x1f870 #define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_H 0x1f871 #define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_L 0x1f872 -#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_H 0x1f843 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_H 0x1f873 #define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_L 0x1f874 #define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_H 0x1f875 #define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_L 0x1f876 diff --git a/kgsl.c b/kgsl.c index c59ccff831..8a423ddbd4 100644 --- a/kgsl.c +++ b/kgsl.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -23,7 +24,7 @@ #include #include #include -#include +#include #include "kgsl_compat.h" #include "kgsl_debugfs.h" @@ -872,10 +873,12 @@ static void kgsl_destroy_process_private(struct kref *kref) struct kgsl_process_private *private = container_of(kref, struct kgsl_process_private, refcount); - mutex_lock(&kgsl_driver.process_mutex); debugfs_remove_recursive(private->debug_root); + kobject_put(&private->kobj_memtype); kobject_put(&private->kobj); + mutex_lock(&kgsl_driver.process_mutex); + /* When using global pagetables, do not detach global pagetable */ if (private->pagetable->name != KGSL_MMU_GLOBAL_PT) kgsl_mmu_detach_pagetable(private->pagetable); @@ -1084,7 +1087,7 @@ static struct kgsl_process_private *kgsl_process_private_open( * private destroy is triggered but didn't complete. Retry creating * process private after sometime to allow previous destroy to complete. */ - for (i = 0; (PTR_ERR_OR_ZERO(private) == -EEXIST) && (i < 5); i++) { + for (i = 0; (PTR_ERR_OR_ZERO(private) == -EEXIST) && (i < 50); i++) { usleep_range(10, 100); private = _process_private_open(device); } @@ -2831,7 +2834,25 @@ static void kgsl_process_add_stats(struct kgsl_process_private *priv, priv->stats[type].max = ret; } +u64 kgsl_get_stats(pid_t pid) +{ + struct kgsl_process_private *process; + u64 ret; + if (pid < 0) + return atomic_long_read(&kgsl_driver.stats.page_alloc); + + process = kgsl_process_private_find(pid); + + if (!process) + return 0; + + ret = atomic64_read(&process->stats[KGSL_MEM_ENTRY_KERNEL].cur); + kgsl_process_private_put(process); + + return ret; +} +EXPORT_SYMBOL(kgsl_get_stats); long kgsl_ioctl_gpuobj_import(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) @@ -2957,6 +2978,43 @@ static int _map_usermem_dma_buf(struct kgsl_device *device, #endif #ifdef CONFIG_DMA_SHARED_BUFFER +static int verify_secure_access(struct kgsl_device *device, + struct kgsl_mem_entry *entry, struct dma_buf *dmabuf) +{ + bool secure = entry->memdesc.priv & KGSL_MEMDESC_SECURE; + uint32_t *vmid_list = NULL, *perms_list = NULL; + uint32_t nelems = 0; + int i; + + if (mem_buf_dma_buf_copy_vmperm(dmabuf, (int **)&vmid_list, + (int **)&perms_list, (int *)&nelems)) { + dev_info(device->dev, "Skipped access check\n"); + return 0; + } + + /* Check if secure buffer is accessible to CP_PIXEL */ + for (i = 0; i < nelems; i++) { + if (vmid_list[i] == VMID_CP_PIXEL) + break; + } + + kfree(vmid_list); + kfree(perms_list); + + /* + * Do not import a buffer if it is accessible to CP_PIXEL but is being imported as + * a buffer accessible to non-secure GPU. Also, make sure if buffer is to be made + * accessible to secure GPU, it must be accessible to CP_PIXEL + */ + if (!(secure ^ (i == nelems))) + return -EPERM; + + if (secure && mem_buf_dma_buf_exclusive_owner(dmabuf)) + return -EPERM; + + return 0; +} + static int kgsl_setup_dma_buf(struct kgsl_device *device, struct kgsl_pagetable *pagetable, struct kgsl_mem_entry *entry, @@ -3012,44 +3070,10 @@ static int kgsl_setup_dma_buf(struct kgsl_device *device, entry->priv_data = meta; entry->memdesc.sgt = sg_table; - if (entry->memdesc.priv & KGSL_MEMDESC_SECURE) { - uint32_t *vmid_list = NULL, *perms_list = NULL; - uint32_t nelems = 0; - int i; + ret = verify_secure_access(device, entry, dmabuf); + if (ret) + goto out; - if (mem_buf_dma_buf_exclusive_owner(dmabuf)) { - ret = -EPERM; - goto out; - } - - ret = mem_buf_dma_buf_copy_vmperm(dmabuf, (int **)&vmid_list, - (int **)&perms_list, (int *)&nelems); - if (ret) { - ret = 0; - dev_info(device->dev, "Skipped access check\n"); - goto skip_access_check; - } - - /* Check if secure buffer is accessible to CP_PIXEL */ - for (i = 0; i < nelems; i++) { - if (vmid_list[i] == QCOM_DMA_HEAP_FLAG_CP_PIXEL) - break; - } - - kfree(vmid_list); - kfree(perms_list); - - if (i == nelems) { - /* - * Secure buffer is not accessible to CP_PIXEL, there is no point - * in importing this buffer. - */ - ret = -EPERM; - goto out; - } - } - -skip_access_check: /* Calculate the size of the memdesc from the sglist */ for (s = entry->memdesc.sgt->sgl; s != NULL; s = sg_next(s)) entry->memdesc.size += (uint64_t) s->length; diff --git a/kgsl.h b/kgsl.h index 98b25cb300..5bc6f64262 100644 --- a/kgsl.h +++ b/kgsl.h @@ -61,19 +61,28 @@ * is mapped into the GPU. This allows for some 'shared' data between * the GPU and CPU. For example, it will be used by the GPU to write * each updated RPTR for each RB. - * - * Used Data: - * Offset: Length(bytes): What - * 0x0: 4 * KGSL_PRIORITY_MAX_RB_LEVELS: RB0 RPTR */ /* Shadow global helpers */ -#define SCRATCH_RPTR_OFFSET(id) ((id) * sizeof(unsigned int)) -#define SCRATCH_RPTR_GPU_ADDR(dev, id) \ - ((dev)->scratch->gpuaddr + SCRATCH_RPTR_OFFSET(id)) -#define SCRATCH_BV_RPTR_OFFSET(id) (0x40 + (id) * sizeof(unsigned int)) -#define SCRATCH_BV_RPTR_GPU_ADDR(dev, id) \ - ((dev)->scratch->gpuaddr + SCRATCH_BV_RPTR_OFFSET(id)) +struct adreno_rb_shadow { + /** @rptr: per ringbuffer address where GPU writes the rptr */ + u32 rptr; + /** @bv_rptr: per ringbuffer address where GPU writes BV rptr */ + u32 bv_rptr; + /** @bv_ts: per ringbuffer address where BV ringbuffer timestamp is written to */ + u32 bv_ts; + /** @current_rb_ptname: The current pagetable active on the given RB */ + u32 current_rb_ptname; + /** @ttbr0: value to program into TTBR0 during pagetable switch */ + u64 ttbr0; + /** @contextidr: value to program into CONTEXTIDR during pagetable switch */ + u32 contextidr; +}; + +#define SCRATCH_RB_OFFSET(id, _field) ((id * sizeof(struct adreno_rb_shadow)) + \ + offsetof(struct adreno_rb_shadow, _field)) +#define SCRATCH_RB_GPU_ADDR(dev, id, _field) \ + ((dev)->scratch->gpuaddr + SCRATCH_RB_OFFSET(id, _field)) /* Timestamp window used to detect rollovers (half of integer range) */ #define KGSL_TIMESTAMP_WINDOW 0x80000000 diff --git a/kgsl_bus.c b/kgsl_bus.c index 2279ce5d2e..e1991d0b22 100644 --- a/kgsl_bus.c +++ b/kgsl_bus.c @@ -31,6 +31,8 @@ static u32 _ab_buslevel_update(struct kgsl_pwrctrl *pwr, return (pwr->bus_percent_ab * pwr->bus_max) / 100; } +#define ACTIVE_ONLY_TAG 0x3 +#define PERF_MODE_TAG 0x8 int kgsl_bus_update(struct kgsl_device *device, enum kgsl_bus_vote vote_state) @@ -69,6 +71,11 @@ int kgsl_bus_update(struct kgsl_device *device, /* buslevel is the IB vote, update the AB */ ab = _ab_buslevel_update(pwr, pwr->ddr_table[buslevel]); + if (buslevel == pwr->pwrlevels[0].bus_max) + icc_set_tag(pwr->icc_path, ACTIVE_ONLY_TAG | PERF_MODE_TAG); + else + icc_set_tag(pwr->icc_path, ACTIVE_ONLY_TAG); + return device->ftbl->gpu_bus_set(device, buslevel, ab); } diff --git a/kgsl_device.h b/kgsl_device.h index ba704d3cb1..dc81abf235 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -461,6 +461,11 @@ struct kgsl_process_private { * @cmd_count: The number of cmds that are active for the process */ atomic_t cmd_count; + /** + * @kobj_memtype: Pointer to a kobj for memtype sysfs directory for this + * process + */ + struct kobject kobj_memtype; }; struct kgsl_device_private { diff --git a/kgsl_drawobj.c b/kgsl_drawobj.c index 75807d6350..83ce37938c 100644 --- a/kgsl_drawobj.c +++ b/kgsl_drawobj.c @@ -188,11 +188,23 @@ static void syncobj_timer(struct timer_list *t) case KGSL_CMD_SYNCPOINT_TYPE_TIMELINE: { int j; struct event_timeline_info *info = event->priv; + struct dma_fence *fence = event->fence; + bool retired = false; + bool signaled = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &fence->flags); + const char *str = NULL; + if (fence->ops->signaled && fence->ops->signaled(fence)) + retired = true; + + if (!retired) + str = "not retired"; + else if (retired && signaled) + str = "signaled"; + else if (retired && !signaled) + str = "retired but not signaled"; dev_err(device->dev, " [%u] FENCE %s\n", - i, dma_fence_is_signaled(event->fence) ? - "signaled" : "not signaled"); - + i, str); for (j = 0; info && info[j].timeline; j++) dev_err(device->dev, " TIMELINE %d SEQNO %lld\n", info[j].timeline, info[j].seqno); @@ -530,11 +542,12 @@ static int drawobj_add_sync_timeline(struct kgsl_device *device, drawobj_get_sync_timeline_priv(u64_to_user_ptr(sync.timelines), sync.timelines_size, sync.count); + /* Set pending flag before adding callback to avoid race */ + set_bit(event->id, &syncobj->pending); + ret = dma_fence_add_callback(event->fence, &event->cb, drawobj_sync_timeline_fence_callback); - set_bit(event->id, &syncobj->pending); - if (ret) { clear_bit(event->id, &syncobj->pending); @@ -746,6 +759,7 @@ static void add_profiling_buffer(struct kgsl_device *device, { struct kgsl_mem_entry *entry; struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + u64 start; if (!(drawobj->flags & KGSL_DRAWOBJ_PROFILING)) return; @@ -762,7 +776,14 @@ static void add_profiling_buffer(struct kgsl_device *device, gpuaddr); if (entry != NULL) { - if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) { + start = id ? (entry->memdesc.gpuaddr + offset) : gpuaddr; + /* + * Make sure there is enough room in the object to store the + * entire profiling buffer object + */ + if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size) || + !kgsl_gpuaddr_in_memdesc(&entry->memdesc, start, + sizeof(struct kgsl_drawobj_profiling_buffer))) { kgsl_mem_entry_put(entry); entry = NULL; } @@ -775,28 +796,7 @@ static void add_profiling_buffer(struct kgsl_device *device, return; } - - if (!id) { - cmdobj->profiling_buffer_gpuaddr = gpuaddr; - } else { - u64 off = offset + sizeof(struct kgsl_drawobj_profiling_buffer); - - /* - * Make sure there is enough room in the object to store the - * entire profiling buffer object - */ - if (off < offset || off >= entry->memdesc.size) { - dev_err(device->dev, - "ignore invalid profile offset ctxt %d id %d offset %lld gpuaddr %llx size %lld\n", - drawobj->context->id, id, offset, gpuaddr, size); - kgsl_mem_entry_put(entry); - return; - } - - cmdobj->profiling_buffer_gpuaddr = - entry->memdesc.gpuaddr + offset; - } - + cmdobj->profiling_buffer_gpuaddr = start; cmdobj->profiling_buf_entry = entry; } diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 0ae12a8e04..3fdc68f498 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -162,7 +162,8 @@ enum gmu_mem_type { GMU_CACHE = GMU_ICACHE, GMU_DTCM, GMU_DCACHE, - GMU_NONCACHED_KERNEL, + GMU_NONCACHED_KERNEL, /* GMU VBIF3 uncached VA range: 0x60000000 - 0x7fffffff */ + GMU_NONCACHED_KERNEL_EXTENDED, /* GMU VBIF3 uncached VA range: 0xc0000000 - 0xdfffffff */ GMU_NONCACHED_USER, GMU_MEM_TYPE_MAX, }; diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 282a231b90..82518dcdc0 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -825,7 +825,7 @@ static void kgsl_iommu_print_fault(struct kgsl_mmu *mmu, struct adreno_device *adreno_dev = ADRENO_DEVICE(device); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); struct kgsl_mem_entry *prev = NULL, *next = NULL, *entry; - const char *fault_type; + const char *fault_type = NULL; const char *comm = NULL; u32 ptname = KGSL_MMU_GLOBAL_PT; int id; @@ -847,6 +847,9 @@ static void kgsl_iommu_print_fault(struct kgsl_mmu *mmu, fault_type = "external"; else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) fault_type = "transaction stalled"; + else + fault_type = "unknown"; + /* FIXME: This seems buggy */ if (test_bit(KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE, &mmu->pfpolicy)) @@ -1122,6 +1125,7 @@ static void kgsl_iommu_destroy_pagetable(struct kgsl_pagetable *pagetable) { struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); + free_io_pgtable_ops(pt->pgtbl_ops); kfree(pt); } diff --git a/kgsl_pool.c b/kgsl_pool.c index 18f6a8e28d..134c225303 100644 --- a/kgsl_pool.c +++ b/kgsl_pool.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -31,6 +32,7 @@ static struct kmem_cache *addr_page_cache; * @reserved_pages: Number of pages reserved at init for the pool * @list_lock: Spinlock for page list in the pool * @pool_rbtree: RB tree with all pages held/reserved in this pool + * @mempool: Mempool to pre-allocate tracking structs for pages in this pool */ struct kgsl_page_pool { unsigned int pool_order; @@ -38,15 +40,28 @@ struct kgsl_page_pool { unsigned int reserved_pages; spinlock_t list_lock; struct rb_root pool_rbtree; + mempool_t *mempool; }; +static void *_pool_entry_alloc(gfp_t gfp_mask, void *arg) +{ + return kmem_cache_alloc(addr_page_cache, gfp_mask); +} + +static void _pool_entry_free(void *element, void *arg) +{ + return kmem_cache_free(addr_page_cache, element); +} + static int __kgsl_pool_add_page(struct kgsl_page_pool *pool, struct page *p) { struct rb_node **node, *parent; struct kgsl_pool_page_entry *new_page, *entry; + gfp_t gfp_mask = GFP_KERNEL & ~__GFP_DIRECT_RECLAIM; - new_page = kmem_cache_alloc(addr_page_cache, GFP_KERNEL); + new_page = pool->mempool ? mempool_alloc(pool->mempool, gfp_mask) : + kmem_cache_alloc(addr_page_cache, gfp_mask); if (new_page == NULL) return -ENOMEM; @@ -87,7 +102,10 @@ __kgsl_pool_get_page(struct kgsl_page_pool *pool) entry = rb_entry(node, struct kgsl_pool_page_entry, node); p = entry->page; rb_erase(&entry->node, &pool->pool_rbtree); - kmem_cache_free(addr_page_cache, entry); + if (pool->mempool) + mempool_free(entry, pool->mempool); + else + kmem_cache_free(addr_page_cache, entry); pool->page_count--; return p; } @@ -101,6 +119,17 @@ static void kgsl_pool_cache_init(void) { addr_page_cache = KMEM_CACHE(kgsl_pool_page_entry, 0); } + +static void kgsl_pool_cache_destroy(void) +{ + kmem_cache_destroy(addr_page_cache); +} + +static void kgsl_destroy_page_pool(struct kgsl_page_pool *pool) +{ + mempool_destroy(pool->mempool); +} + #else /** * struct kgsl_page_pool - Structure to hold information for the pool @@ -151,6 +180,14 @@ static void kgsl_pool_list_init(struct kgsl_page_pool *pool) static void kgsl_pool_cache_init(void) { } + +static void kgsl_pool_cache_destroy(void) +{ +} + +static void kgsl_destroy_page_pool(struct kgsl_page_pool *pool) +{ +} #endif static struct kgsl_page_pool kgsl_pools[6]; @@ -563,6 +600,15 @@ static void kgsl_pool_reserve_pages(struct kgsl_page_pool *pool, /* Limit the total number of reserved pages to 4096 */ pool->reserved_pages = min_t(u32, reserved, 4096); +#if IS_ENABLED(CONFIG_QCOM_KGSL_SORT_POOL) + /* + * Pre-allocate tracking structs for reserved_pages so that + * the pool can hold them even in low memory conditions + */ + pool->mempool = mempool_create(pool->reserved_pages, + _pool_entry_alloc, _pool_entry_free, NULL); +#endif + for (i = 0; i < pool->reserved_pages; i++) { gfp_t gfp_mask = kgsl_gfp_mask(pool->pool_order); struct page *page; @@ -632,10 +678,19 @@ void kgsl_probe_page_pools(void) void kgsl_exit_page_pools(void) { + int i; + /* Release all pages in pools, if any.*/ kgsl_pool_reduce(INT_MAX, true); /* Unregister shrinker */ unregister_shrinker(&kgsl_pool_shrinker); + + /* Destroy helper structures */ + for (i = 0; i < kgsl_num_pools; i++) + kgsl_destroy_page_pool(&kgsl_pools[i]); + + /* Destroy the kmem cache */ + kgsl_pool_cache_destroy(); } diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 5a3e52c3a7..57b2b63499 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1557,12 +1557,9 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) init_waitqueue_head(&device->active_cnt_wq); - /* Initialize the user and thermal clock constraints */ - - pwr->max_pwrlevel = 0; - pwr->min_pwrlevel = pwr->num_pwrlevels - 1; + /* Initialize the thermal clock constraints */ pwr->thermal_pwrlevel = 0; - pwr->thermal_pwrlevel_floor = pwr->min_pwrlevel; + pwr->thermal_pwrlevel_floor = pwr->num_pwrlevels - 1; pwr->wakeup_maxpwrlevel = 0; diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index c7d0ff0d66..fc7e47e177 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -705,10 +705,10 @@ int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, devfreq = devfreq_add_device(&pdev->dev, &gpu_profile->profile, governor, &adreno_tz_data); - if (IS_ERR(devfreq)) { + if (IS_ERR_OR_NULL(devfreq)) { device->pwrscale.enabled = false; msm_adreno_tz_exit(); - return PTR_ERR(devfreq); + return IS_ERR(devfreq) ? PTR_ERR(devfreq) : -EINVAL; } pwrscale->devfreqptr = devfreq; diff --git a/kgsl_reclaim.c b/kgsl_reclaim.c index b0d6804456..f93a1a654a 100644 --- a/kgsl_reclaim.c +++ b/kgsl_reclaim.c @@ -21,37 +21,10 @@ static u32 kgsl_reclaim_max_page_limit = 7680; /* Setting this to 0 means we reclaim pages as specified in shrinker call */ static u32 kgsl_nr_to_scan; -static atomic_t kgsl_shrinker_active = ATOMIC_INIT(0); -static unsigned long shmem_swap_pages(struct address_space *mapping) -{ - struct inode *inode = mapping->host; - struct shmem_inode_info *info = SHMEM_I(inode); - unsigned long swapped; +struct work_struct reclaim_work; - swapped = READ_ONCE(info->swapped); - return swapped; -} - -static unsigned long kgsl_process_get_reclaim_count( - struct kgsl_process_private *process) -{ - struct kgsl_mem_entry *entry; - struct kgsl_memdesc *memdesc; - unsigned long reclaim_count = 0; - int id; - - spin_lock(&process->mem_lock); - idr_for_each_entry(&process->mem_idr, entry, id) { - memdesc = &entry->memdesc; - if (memdesc->shmem_filp) - reclaim_count += shmem_swap_pages( - memdesc->shmem_filp->f_mapping); - } - spin_unlock(&process->mem_lock); - - return reclaim_count; -} +static atomic_t kgsl_nr_to_reclaim; static int kgsl_memdesc_get_reclaimed_pages(struct kgsl_mem_entry *entry) { @@ -182,7 +155,7 @@ static ssize_t gpumem_reclaimed_show(struct kobject *kobj, container_of(kobj, struct kgsl_process_private, kobj); return scnprintf(buf, PAGE_SIZE, "%d\n", - kgsl_process_get_reclaim_count(process) << PAGE_SHIFT); + atomic_read(&process->unpinned_page_count) << PAGE_SHIFT); } PROCESS_ATTR(state, 0644, kgsl_proc_state_show, kgsl_proc_state_store); @@ -297,7 +270,6 @@ static u32 kgsl_reclaim_process(struct kgsl_process_private *process, for (i = 0; i < memdesc->page_count; i++) { set_page_dirty_lock(memdesc->pages[i]); - shmem_mark_page_lazyfree(memdesc->pages[i]); spin_lock(&memdesc->lock); put_page(memdesc->pages[i]); memdesc->pages[i] = NULL; @@ -306,6 +278,7 @@ static u32 kgsl_reclaim_process(struct kgsl_process_private *process, remaining--; } + reclaim_shmem_address_space(memdesc->shmem_filp->f_mapping); memdesc->priv |= KGSL_MEMDESC_RECLAIMED; } @@ -318,24 +291,13 @@ static u32 kgsl_reclaim_process(struct kgsl_process_private *process, return (pages_to_reclaim - remaining); } -/* Functions for the shrinker */ - -static unsigned long -kgsl_reclaim_shrink_scan_objects(struct shrinker *shrinker, - struct shrink_control *sc) +static void kgsl_reclaim_background_work(struct work_struct *work) { - /* nr_pages represents number of pages to be reclaimed*/ - u32 nr_pages = kgsl_nr_to_scan ? kgsl_nr_to_scan : sc->nr_to_scan; - u32 bg_proc = 0; + u32 bg_proc = 0, nr_pages = atomic_read(&kgsl_nr_to_reclaim); u64 pp_nr_pages; struct list_head kgsl_reclaim_process_list; struct kgsl_process_private *process, *next; - if (atomic_inc_return(&kgsl_shrinker_active) > 1) { - atomic_dec(&kgsl_shrinker_active); - return 0; - } - INIT_LIST_HEAD(&kgsl_reclaim_process_list); read_lock(&kgsl_driver.proclist_lock); list_for_each_entry(process, &kgsl_driver.process_list, list) { @@ -362,10 +324,21 @@ kgsl_reclaim_shrink_scan_objects(struct shrinker *shrinker, list_del(&process->reclaim_list); kgsl_process_private_put(process); } +} - atomic_dec(&kgsl_shrinker_active); - return ((kgsl_nr_to_scan ? - kgsl_nr_to_scan : sc->nr_to_scan) - nr_pages); +/* Shrinker callback functions */ +static unsigned long +kgsl_reclaim_shrink_scan_objects(struct shrinker *shrinker, + struct shrink_control *sc) +{ + if (!current_is_kswapd()) + return 0; + + atomic_set(&kgsl_nr_to_reclaim, kgsl_nr_to_scan ? + kgsl_nr_to_scan : sc->nr_to_scan); + kgsl_schedule_work(&reclaim_work); + + return atomic_read(&kgsl_nr_to_reclaim); } static unsigned long @@ -411,6 +384,8 @@ int kgsl_reclaim_init(void) ret = register_shrinker(&kgsl_reclaim_shrinker); if (ret) pr_err("kgsl: reclaim: Failed to register shrinker\n"); + else + INIT_WORK(&reclaim_work, kgsl_reclaim_background_work); return ret; } diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index 42311fd494..53cbf1d8ba 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "kgsl_device.h" #include "kgsl_pool.h" @@ -26,6 +27,73 @@ bool kgsl_sharedmem_noretry_flag; static DEFINE_MUTEX(kernel_map_global_lock); +#define MEMTYPE(_type, _name) \ + static struct kgsl_memtype memtype_##_name = { \ + .type = _type, \ + .attr = { .name = __stringify(_name), .mode = 0444 } \ +} + +struct kgsl_memtype { + unsigned int type; + struct attribute attr; +}; + +/* We can not use macro MEMTYPE for "any(0)" because of special characters */ +static struct kgsl_memtype memtype_any0 = { + .type = KGSL_MEMTYPE_OBJECTANY, + .attr = { .name = "any(0)", .mode = 0444 }, +}; + +MEMTYPE(KGSL_MEMTYPE_FRAMEBUFFER, framebuffer); +MEMTYPE(KGSL_MEMTYPE_RENDERBUFFER, renderbuffer); +MEMTYPE(KGSL_MEMTYPE_ARRAYBUFFER, arraybuffer); +MEMTYPE(KGSL_MEMTYPE_ELEMENTARRAYBUFFER, elementarraybuffer); +MEMTYPE(KGSL_MEMTYPE_VERTEXARRAYBUFFER, vertexarraybuffer); +MEMTYPE(KGSL_MEMTYPE_TEXTURE, texture); +MEMTYPE(KGSL_MEMTYPE_SURFACE, surface); +MEMTYPE(KGSL_MEMTYPE_EGL_SURFACE, egl_surface); +MEMTYPE(KGSL_MEMTYPE_GL, gl); +MEMTYPE(KGSL_MEMTYPE_CL, cl); +MEMTYPE(KGSL_MEMTYPE_CL_BUFFER_MAP, cl_buffer_map); +MEMTYPE(KGSL_MEMTYPE_CL_BUFFER_NOMAP, cl_buffer_nomap); +MEMTYPE(KGSL_MEMTYPE_CL_IMAGE_MAP, cl_image_map); +MEMTYPE(KGSL_MEMTYPE_CL_IMAGE_NOMAP, cl_image_nomap); +MEMTYPE(KGSL_MEMTYPE_CL_KERNEL_STACK, cl_kernel_stack); +MEMTYPE(KGSL_MEMTYPE_COMMAND, command); +MEMTYPE(KGSL_MEMTYPE_2D, 2d); +MEMTYPE(KGSL_MEMTYPE_EGL_IMAGE, egl_image); +MEMTYPE(KGSL_MEMTYPE_EGL_SHADOW, egl_shadow); +MEMTYPE(KGSL_MEMTYPE_MULTISAMPLE, egl_multisample); +MEMTYPE(KGSL_MEMTYPE_KERNEL, kernel); + +static struct attribute *memtype_attrs[] = { + &memtype_any0.attr, + &memtype_framebuffer.attr, + &memtype_renderbuffer.attr, + &memtype_arraybuffer.attr, + &memtype_elementarraybuffer.attr, + &memtype_vertexarraybuffer.attr, + &memtype_texture.attr, + &memtype_surface.attr, + &memtype_egl_surface.attr, + &memtype_gl.attr, + &memtype_cl.attr, + &memtype_cl_buffer_map.attr, + &memtype_cl_buffer_nomap.attr, + &memtype_cl_image_map.attr, + &memtype_cl_image_nomap.attr, + &memtype_cl_kernel_stack.attr, + &memtype_command.attr, + &memtype_2d.attr, + &memtype_egl_image.attr, + &memtype_egl_shadow.attr, + &memtype_egl_multisample.attr, + &memtype_kernel.attr, + NULL, +}; + +ATTRIBUTE_GROUPS(memtype); + /* An attribute for showing per-process memory statistics */ struct kgsl_mem_entry_attribute { struct kgsl_process_attribute attr; @@ -64,6 +132,51 @@ static ssize_t mem_entry_sysfs_show(struct kobject *kobj, return pattr->show(priv, pattr->memtype, buf); } +static ssize_t memtype_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kgsl_process_private *priv; + struct kgsl_memtype *memtype; + struct kgsl_mem_entry *entry; + u64 size = 0; + int id = 0; + + priv = container_of(kobj, struct kgsl_process_private, kobj_memtype); + memtype = container_of(attr, struct kgsl_memtype, attr); + + spin_lock(&priv->mem_lock); + for (entry = idr_get_next(&priv->mem_idr, &id); entry; + id++, entry = idr_get_next(&priv->mem_idr, &id)) { + struct kgsl_memdesc *memdesc; + unsigned int type; + + if (!kgsl_mem_entry_get(entry)) + continue; + spin_unlock(&priv->mem_lock); + + memdesc = &entry->memdesc; + type = kgsl_memdesc_get_memtype(memdesc); + + if (type == memtype->type) + size += memdesc->size; + + kgsl_mem_entry_put(entry); + spin_lock(&priv->mem_lock); + } + spin_unlock(&priv->mem_lock); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", size); +} + +static const struct sysfs_ops memtype_sysfs_ops = { + .show = memtype_sysfs_show, +}; + +static struct kobj_type ktype_memtype = { + .sysfs_ops = &memtype_sysfs_ops, + .default_groups = memtype_groups, +}; + static ssize_t imported_mem_show(struct kgsl_process_private *priv, int type, char *buf) @@ -243,10 +356,15 @@ void kgsl_process_init_sysfs(struct kgsl_device *device, kgsl_driver.prockobj, "%d", pid_nr(private->pid))) { dev_err(device->dev, "Unable to add sysfs for process %d\n", pid_nr(private->pid)); - kgsl_process_private_put(private); } kgsl_reclaim_proc_sysfs_init(private); + + if (kobject_init_and_add(&private->kobj_memtype, &ktype_memtype, + &private->kobj, "memtype")) { + dev_err(device->dev, "Unable to add memtype sysfs for process %d\n", + pid_nr(private->pid)); + } } static ssize_t memstat_show(struct device *dev, @@ -794,41 +912,21 @@ kgsl_sharedmem_writeq(const struct kgsl_memdesc *memdesc, wmb(); } -static const char * const memtype_str[] = { - [KGSL_MEMTYPE_OBJECTANY] = "any(0)", - [KGSL_MEMTYPE_FRAMEBUFFER] = "framebuffer", - [KGSL_MEMTYPE_RENDERBUFFER] = "renderbuffer", - [KGSL_MEMTYPE_ARRAYBUFFER] = "arraybuffer", - [KGSL_MEMTYPE_ELEMENTARRAYBUFFER] = "elementarraybuffer", - [KGSL_MEMTYPE_VERTEXARRAYBUFFER] = "vertexarraybuffer", - [KGSL_MEMTYPE_TEXTURE] = "texture", - [KGSL_MEMTYPE_SURFACE] = "surface", - [KGSL_MEMTYPE_EGL_SURFACE] = "egl_surface", - [KGSL_MEMTYPE_GL] = "gl", - [KGSL_MEMTYPE_CL] = "cl", - [KGSL_MEMTYPE_CL_BUFFER_MAP] = "cl_buffer_map", - [KGSL_MEMTYPE_CL_BUFFER_NOMAP] = "cl_buffer_nomap", - [KGSL_MEMTYPE_CL_IMAGE_MAP] = "cl_image_map", - [KGSL_MEMTYPE_CL_IMAGE_NOMAP] = "cl_image_nomap", - [KGSL_MEMTYPE_CL_KERNEL_STACK] = "cl_kernel_stack", - [KGSL_MEMTYPE_COMMAND] = "command", - [KGSL_MEMTYPE_2D] = "2d", - [KGSL_MEMTYPE_EGL_IMAGE] = "egl_image", - [KGSL_MEMTYPE_EGL_SHADOW] = "egl_shadow", - [KGSL_MEMTYPE_MULTISAMPLE] = "egl_multisample", - /* KGSL_MEMTYPE_KERNEL handled below, to avoid huge array */ -}; - void kgsl_get_memory_usage(char *name, size_t name_size, uint64_t memflags) { unsigned int type = FIELD_GET(KGSL_MEMTYPE_MASK, memflags); + struct kgsl_memtype *memtype; + int i; - if (type == KGSL_MEMTYPE_KERNEL) - strlcpy(name, "kernel", name_size); - else if (type < ARRAY_SIZE(memtype_str) && memtype_str[type] != NULL) - strlcpy(name, memtype_str[type], name_size); - else - snprintf(name, name_size, "VK/others(%3d)", type); + for (i = 0; memtype_attrs[i]; i++) { + memtype = container_of(memtype_attrs[i], struct kgsl_memtype, attr); + if (memtype->type == type) { + strlcpy(name, memtype->attr.name, name_size); + return; + } + } + + snprintf(name, name_size, "VK/others(%3d)", type); } int kgsl_memdesc_sg_dma(struct kgsl_memdesc *memdesc, @@ -885,6 +983,9 @@ static int kgsl_alloc_page(int *page_size, struct page **pages, if (pages == NULL) return -EINVAL; + if (fatal_signal_pending(current)) + return -ENOMEM; + page = shmem_read_mapping_page_gfp(shmem_filp->f_mapping, page_off, kgsl_gfp_mask(0)); if (IS_ERR(page)) @@ -934,6 +1035,9 @@ static int kgsl_alloc_page(int *page_size, struct page **pages, unsigned int page_off, struct file *shmem_filp, struct device *dev) { + if (fatal_signal_pending(current)) + return -ENOMEM; + return kgsl_pool_alloc_page(page_size, pages, pages_len, align, dev); } @@ -1261,7 +1365,11 @@ static int kgsl_system_alloc_pages(u64 size, struct page ***pages, gfp_t gfp = __GFP_ZERO | __GFP_HIGHMEM | GFP_KERNEL | __GFP_NORETRY; - local[i] = alloc_pages(gfp, get_order(PAGE_SIZE)); + if (!fatal_signal_pending(current)) + local[i] = alloc_pages(gfp, get_order(PAGE_SIZE)); + else + local[i] = NULL; + if (!local[i]) { for (i = i - 1; i >= 0; i--) __free_pages(local[i], get_order(PAGE_SIZE)); diff --git a/kgsl_sync.c b/kgsl_sync.c index 1103b51248..c138687bc8 100644 --- a/kgsl_sync.c +++ b/kgsl_sync.c @@ -636,9 +636,10 @@ static void kgsl_syncsource_cleanup(struct kgsl_process_private *private, struct kgsl_syncsource *syncsource) { struct kgsl_syncsource_fence *sfence, *next; + unsigned long flags; /* Signal all fences to release any callbacks */ - spin_lock(&syncsource->lock); + spin_lock_irqsave(&syncsource->lock, flags); list_for_each_entry_safe(sfence, next, &syncsource->child_list_head, child_list) { @@ -646,7 +647,7 @@ static void kgsl_syncsource_cleanup(struct kgsl_process_private *private, list_del_init(&sfence->child_list); } - spin_unlock(&syncsource->lock); + spin_unlock_irqrestore(&syncsource->lock, flags); /* put reference from syncsource creation */ kgsl_syncsource_put(syncsource); @@ -686,6 +687,7 @@ long kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv, struct kgsl_syncsource_fence *sfence = NULL; struct sync_file *sync_file = NULL; int fd = -1; + unsigned long flags; /* * Take a refcount that is released when the fence is released @@ -727,9 +729,9 @@ long kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv, param->fence_fd = fd; - spin_lock(&syncsource->lock); + spin_lock_irqsave(&syncsource->lock, flags); list_add_tail(&sfence->child_list, &syncsource->child_list_head); - spin_unlock(&syncsource->lock); + spin_unlock_irqrestore(&syncsource->lock, flags); out: /* * We're transferring ownership of the fence to the sync file. @@ -756,8 +758,9 @@ static int kgsl_syncsource_signal(struct kgsl_syncsource *syncsource, { struct kgsl_syncsource_fence *sfence, *next; int ret = -EINVAL; + unsigned long flags; - spin_lock(&syncsource->lock); + spin_lock_irqsave(&syncsource->lock, flags); list_for_each_entry_safe(sfence, next, &syncsource->child_list_head, child_list) { @@ -770,7 +773,7 @@ static int kgsl_syncsource_signal(struct kgsl_syncsource *syncsource, } } - spin_unlock(&syncsource->lock); + spin_unlock_irqrestore(&syncsource->lock, flags); return ret; } diff --git a/kgsl_timeline.c b/kgsl_timeline.c index d7b64abab4..b499face00 100644 --- a/kgsl_timeline.c +++ b/kgsl_timeline.c @@ -196,6 +196,16 @@ static bool timeline_fence_signaled(struct dma_fence *fence) fence->ops); } +static bool timeline_fence_enable_signaling(struct dma_fence *fence) +{ + /* + * Return value of false indicates the fence already passed. + * When fence is not passed we return true indicating successful + * enabling. + */ + return !timeline_fence_signaled(fence); +} + static const char *timeline_get_driver_name(struct dma_fence *fence) { return "kgsl-sw-timeline"; @@ -221,6 +231,7 @@ static const struct dma_fence_ops timeline_fence_ops = { .get_timeline_name = timeline_get_timeline_name, .signaled = timeline_fence_signaled, .release = timeline_fence_release, + .enable_signaling = timeline_fence_enable_signaling, .timeline_value_str = timeline_get_value_str, .use_64bit_seqno = true, }; @@ -298,10 +309,18 @@ struct dma_fence *kgsl_timeline_fence_alloc(struct kgsl_timeline *timeline, INIT_LIST_HEAD(&fence->node); - if (!dma_fence_is_signaled(&fence->base)) + /* + * Once fence is checked as not signaled, allow it to be added + * in the list before other thread such as kgsl_timeline_signal + * can get chance to signal. + */ + spin_lock_irq(&timeline->lock); + if (!dma_fence_is_signaled_locked(&fence->base)) kgsl_timeline_add_fence(timeline, fence); trace_kgsl_timeline_fence_alloc(timeline->id, seqno); + spin_unlock_irq(&timeline->lock); + log_kgsl_timeline_fence_alloc_event(timeline->id, seqno); return &fence->base; diff --git a/kgsl_vbo.c b/kgsl_vbo.c index 75959b1475..091f2ecc3f 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -95,7 +95,11 @@ static void kgsl_memdesc_remove_range(struct kgsl_mem_entry *target, range = bind_to_range(node); next = interval_tree_iter_next(node, start, last); - if (range->entry->id == entry->id) { + /* + * If entry is null, consider it as a special request. Unbind + * the entire range between start and last in this case. + */ + if (!entry || range->entry->id == entry->id) { interval_tree_remove(node, &memdesc->ranges); trace_kgsl_mem_remove_bind_range(target, range->range.start, range->entry, @@ -359,6 +363,23 @@ kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, range.target_offset, range.length)) goto err; + /* + * Special case: Consider child id 0 as a special request incase of + * unbind. This helps to unbind the specified range (could span multiple + * child buffers) without supplying backing physical buffer information. + */ + if (range.child_id == 0 && range.op == KGSL_GPUMEM_RANGE_OP_UNBIND) { + op->ops[i].entry = NULL; + op->ops[i].start = range.target_offset; + op->ops[i].last = range.target_offset + range.length - 1; + /* Child offset doesn't matter for unbind. set it to 0 */ + op->ops[i].child_offset = 0; + op->ops[i].op = range.op; + + ranges += ranges_size; + continue; + } + /* Get the child object */ op->ops[i].entry = kgsl_sharedmem_find_id(private, range.child_id); From b5f34d537c72e87c8d9a757585b63a4e30e68e82 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Thu, 11 Nov 2021 19:07:38 -0800 Subject: [PATCH 0006/1016] msm: kgsl: Update changed kernel APIs Use the new kernel APIs. Change-Id: I58b6cff5782ff13fdce9e31e0753a3f277d73a39 Signed-off-by: Lynus Vaz --- adreno_snapshot.c | 2 +- governor_gpubw_mon.c | 2 +- governor_msm_adreno_tz.c | 2 +- kgsl_pwrscale.h | 1 + kgsl_snapshot.c | 6 +++--- kgsl_util.c | 2 ++ kgsl_util.h | 19 +++++++++++++++++++ 7 files changed, 28 insertions(+), 6 deletions(-) diff --git a/adreno_snapshot.c b/adreno_snapshot.c index d85307d966..fd9834f073 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -857,7 +857,7 @@ static void adreno_snapshot_os(struct kgsl_device *device, strlcpy(header->release, init_utsname()->release, sizeof(header->release)); strlcpy(header->version, init_utsname()->version, sizeof(header->version)); - header->seconds = get_seconds(); + header->seconds = ktime_get_real_seconds(); header->power_flags = device->pwrctrl.power_flags; header->power_level = device->pwrctrl.active_pwrlevel; header->power_interval_timeout = device->pwrctrl.interval_timeout; diff --git a/governor_gpubw_mon.c b/governor_gpubw_mon.c index 147c43511f..f7f19c40e5 100644 --- a/governor_gpubw_mon.c +++ b/governor_gpubw_mon.c @@ -299,7 +299,7 @@ static struct devfreq_governor devfreq_gpubw = { .name = "gpubw_mon", .get_target_freq = devfreq_gpubw_get_target, .event_handler = devfreq_gpubw_event_handler, - .immutable = 1, + .flags = DEVFREQ_GOV_FLAG_IMMUTABLE, }; int devfreq_gpubw_init(void) diff --git a/governor_msm_adreno_tz.c b/governor_msm_adreno_tz.c index 18f4f16ead..6004d6d3ac 100644 --- a/governor_msm_adreno_tz.c +++ b/governor_msm_adreno_tz.c @@ -546,7 +546,7 @@ static struct devfreq_governor msm_adreno_tz = { .name = "msm-adreno-tz", .get_target_freq = tz_get_target_freq, .event_handler = tz_handler, - .immutable = 1, + .flags = DEVFREQ_GOV_FLAG_IMMUTABLE, }; int msm_adreno_tz_init(void) diff --git a/kgsl_pwrscale.h b/kgsl_pwrscale.h index 2bdc9db0d9..ed40ea6ccd 100644 --- a/kgsl_pwrscale.h +++ b/kgsl_pwrscale.h @@ -6,6 +6,7 @@ #ifndef __KGSL_PWRSCALE_H #define __KGSL_PWRSCALE_H +#include "governor.h" #include "kgsl_pwrctrl.h" #include "msm_adreno_devfreq.h" diff --git a/kgsl_snapshot.c b/kgsl_snapshot.c index c2cac0c881..40d16820eb 100644 --- a/kgsl_snapshot.c +++ b/kgsl_snapshot.c @@ -3,8 +3,8 @@ * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. */ -#include #include +#include #include #include @@ -592,7 +592,7 @@ static void kgsl_device_snapshot_atomic(struct kgsl_device *device) * the kernel log */ getboottime64(&boot); - snapshot->timestamp = get_seconds() - boot.tv_sec; + snapshot->timestamp = ktime_get_real_seconds() - boot.tv_sec; kgsl_add_to_minidump("ATOMIC_GPU_SNAPSHOT", (u64) device->snapshot_memory_atomic.ptr, atomic_snapshot_phy_addr(device), device->snapshot_memory_atomic.size); @@ -682,7 +682,7 @@ void kgsl_device_snapshot(struct kgsl_device *device, */ getboottime64(&boot); - snapshot->timestamp = get_seconds() - boot.tv_sec; + snapshot->timestamp = ktime_get_real_seconds() - boot.tv_sec; /* Store the instance in the device until it gets dumped */ device->snapshot = snapshot; diff --git a/kgsl_util.c b/kgsl_util.c index 8ebbe349d9..8992fb4b46 100644 --- a/kgsl_util.c +++ b/kgsl_util.c @@ -211,6 +211,7 @@ void kgsl_hwunlock(struct cpu_gpu_lock *lock) lock->cpu_req = 0; } +#if IS_ENABLED(CONFIG_QCOM_VA_MINIDUMP) void kgsl_add_to_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size) { struct md_region md_entry = {0}; @@ -348,3 +349,4 @@ void kgsl_qcom_va_md_register(struct kgsl_device *device) if (qcom_va_md_register("KGSL", &kgsl_va_minidump_nb)) dev_err(device->dev, "Failed to register notifier with va_minidump\n"); } +#endif diff --git a/kgsl_util.h b/kgsl_util.h index f2da379828..0f3af38c10 100644 --- a/kgsl_util.h +++ b/kgsl_util.h @@ -113,6 +113,7 @@ int kgsl_clk_set_rate(struct clk_bulk_data *clks, int num_clks, */ int kgsl_zap_shader_load(struct device *dev, const char *name); +#if IS_ENABLED(CONFIG_QCOM_VA_MINIDUMP) /** * kgsl_add_to_minidump - Add a physically contiguous section to minidump * @name: Name of the section @@ -146,5 +147,23 @@ int kgsl_add_va_to_minidump(struct device *dev, const char *name, void *ptr, * @device: Pointer to kgsl device */ void kgsl_qcom_va_md_register(struct kgsl_device *device); +#else +static inline void kgsl_add_to_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size) +{ +} +static inline void kgsl_remove_from_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size) +{ +} + +static inline int kgsl_add_va_to_minidump(struct device *dev, const char *name, void *ptr, + size_t size) +{ + return 0; +} + +static inline void kgsl_qcom_va_md_register(struct kgsl_device *device) +{ +} +#endif #endif From 4038321e47d76d63d6b4201f0e15549e940bf14c Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Thu, 11 Nov 2021 19:08:18 -0800 Subject: [PATCH 0007/1016] msm: kgsl: Remove is_dma_buf_file() usage The is_dma_buf_file() API is private to dma-buf. Remove code that depends on it. Change-Id: I70562c219b915f3891f2ca89a6c48fd1cd34ca38 Signed-off-by: Lynus Vaz --- kgsl.c | 108 +++------------------------------------------------------ 1 file changed, 5 insertions(+), 103 deletions(-) diff --git a/kgsl.c b/kgsl.c index 8a423ddbd4..e28048eb4d 100644 --- a/kgsl.c +++ b/kgsl.c @@ -2589,103 +2589,6 @@ static int kgsl_setup_anon_useraddr(struct kgsl_pagetable *pagetable, return ret; } -#ifdef CONFIG_DMA_SHARED_BUFFER -static void _setup_cache_mode(struct kgsl_mem_entry *entry, - struct vm_area_struct *vma) -{ - uint64_t mode; - pgprot_t pgprot = vma->vm_page_prot; - - if ((pgprot_val(pgprot) == pgprot_val(pgprot_noncached(pgprot))) || - (pgprot_val(pgprot) == pgprot_val(pgprot_writecombine(pgprot)))) - mode = KGSL_CACHEMODE_WRITECOMBINE; - else - mode = KGSL_CACHEMODE_WRITEBACK; - - entry->memdesc.flags |= FIELD_PREP(KGSL_CACHEMODE_MASK, mode); -} - -static int kgsl_setup_dma_buf(struct kgsl_device *device, - struct kgsl_pagetable *pagetable, - struct kgsl_mem_entry *entry, - struct dma_buf *dmabuf); - -static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device, - struct kgsl_pagetable *pagetable, - struct kgsl_mem_entry *entry, unsigned long hostptr) -{ - struct vm_area_struct *vma; - struct dma_buf *dmabuf = NULL; - int ret; - - /* - * Find the VMA containing this pointer and figure out if it - * is a dma-buf. - */ - mmap_read_lock(current->mm); - vma = find_vma(current->mm, hostptr); - - if (vma && vma->vm_file) { - ret = check_vma_flags(vma, entry->memdesc.flags); - if (ret) { - mmap_read_unlock(current->mm); - return ret; - } - - /* - * Check to see that this isn't our own memory that we have - * already mapped - */ - if (vma->vm_ops == &kgsl_gpumem_vm_ops) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - - if (!is_dma_buf_file(vma->vm_file)) { - mmap_read_unlock(current->mm); - return -ENODEV; - } - - /* Take a refcount because dma_buf_put() decrements the refcount */ - get_file(vma->vm_file); - - dmabuf = vma->vm_file->private_data; - } - - if (!dmabuf) { - mmap_read_unlock(current->mm); - return -ENODEV; - } - - ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf); - if (ret) { - dma_buf_put(dmabuf); - mmap_read_unlock(current->mm); - return ret; - } - - /* Setup the cache mode for cache operations */ - _setup_cache_mode(entry, vma); - - if (kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT) && - (IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT) && - kgsl_cachemode_is_cached(entry->memdesc.flags))) - entry->memdesc.flags |= KGSL_MEMFLAGS_IOCOHERENT; - else - entry->memdesc.flags &= ~((u64) KGSL_MEMFLAGS_IOCOHERENT); - - mmap_read_unlock(current->mm); - return 0; -} -#else -static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device, - struct kgsl_pagetable *pagetable, - struct kgsl_mem_entry *entry, unsigned long hostptr) -{ - return -ENODEV; -} -#endif - static int kgsl_setup_useraddr(struct kgsl_device *device, struct kgsl_pagetable *pagetable, struct kgsl_mem_entry *entry, @@ -2696,12 +2599,6 @@ static int kgsl_setup_useraddr(struct kgsl_device *device, if (hostptr == 0 || !IS_ALIGNED(hostptr, PAGE_SIZE)) return -EINVAL; - /* Try to set up a dmabuf - if it returns -ENODEV assume anonymous */ - ret = kgsl_setup_dmabuf_useraddr(device, pagetable, entry, hostptr); - if (ret != -ENODEV) - return ret; - - /* Okay - lets go legacy */ return kgsl_setup_anon_useraddr(pagetable, entry, hostptr, offset, size); } @@ -2748,6 +2645,11 @@ static bool check_and_warn_secured(struct kgsl_device *device) } #ifdef CONFIG_DMA_SHARED_BUFFER +static int kgsl_setup_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct dma_buf *dmabuf); + static long _gpuobj_map_dma_buf(struct kgsl_device *device, struct kgsl_pagetable *pagetable, struct kgsl_mem_entry *entry, From a9b5f09089b9d4fe17b6a1597e44e4032df5a0cc Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 12 Nov 2021 17:07:25 -0800 Subject: [PATCH 0008/1016] msm: kgsl: Enable the Gen7_2_0 GPU Add in the code to identify the Gen7_2_0 GPU. Change-Id: I7ba07628a6756a57bc386c71864348cb219ed090 Signed-off-by: Lynus Vaz --- adreno-gpulist.h | 104 +++++++++++++++++++++++++++++++++++++++++++++++ adreno.h | 2 + adreno_gen7.c | 12 ++---- adreno_gen7.h | 4 ++ gen7_reg.h | 2 + 5 files changed, 115 insertions(+), 9 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 91aedbfd4f..3c428535bc 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1807,6 +1807,12 @@ static const struct kgsl_regmap_list gen7_0_0_hwcg_regs[] = { { GEN7_RBBM_CLOCK_HYST_GMU_GX, 0x00000555 }, }; +static const struct kgsl_regmap_list gen7_0_0_ao_hwcg_regs[] = { + { GEN7_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 0x00020000 }, + { GEN7_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 0x00010111 }, + { GEN7_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 0x00005555 }, +}; + /* GEN7_0_0 protected register list */ static const struct gen7_protected_regs gen7_0_0_protected_regs[] = { { GEN7_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 }, @@ -1880,6 +1886,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_0 = { .zap_name = "a730_zap", .hwcg = gen7_0_0_hwcg_regs, .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .ao_hwcg = gen7_0_0_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), .gbif = gen7_0_0_gbif_regs, .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), .hang_detect_cycles = 0xcfffff, @@ -1910,6 +1918,99 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_1 = { .zap_name = "a730_zap", .hwcg = gen7_0_0_hwcg_regs, .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .ao_hwcg = gen7_0_0_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), + .gbif = gen7_0_0_gbif_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen7_0_0_protected_regs, + .highest_bank_bit = 16, +}; + +static const struct kgsl_regmap_list gen7_2_0_hwcg_regs[] = { + { GEN7_RBBM_CLOCK_CNTL_SP0, 0x02222222 }, + { GEN7_RBBM_CLOCK_CNTL2_SP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_HYST_SP0, 0x003cf3cf }, + { GEN7_RBBM_CLOCK_DELAY_SP0, 0x00000080 }, + { GEN7_RBBM_CLOCK_CNTL_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL2_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL3_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL4_TP0, 0x00222222 }, + { GEN7_RBBM_CLOCK_HYST_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST2_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST3_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST4_TP0, 0x00077777 }, + { GEN7_RBBM_CLOCK_DELAY_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY2_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY3_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY4_TP0, 0x00011111 }, + { GEN7_RBBM_CLOCK_CNTL_UCHE, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL2_UCHE, 0x00222222 }, + { GEN7_RBBM_CLOCK_HYST_UCHE, 0x00000444 }, + { GEN7_RBBM_CLOCK_DELAY_UCHE, 0x00000222 }, + { GEN7_RBBM_CLOCK_CNTL_RB0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL2_RB0, 0x01002222 }, + { GEN7_RBBM_CLOCK_CNTL_CCU0, 0x00002220 }, + { GEN7_RBBM_CLOCK_HYST_RB_CCU0, 0x44000f00 }, + { GEN7_RBBM_CLOCK_CNTL_RAC, 0x25222022 }, + { GEN7_RBBM_CLOCK_CNTL2_RAC, 0x00555555 }, + { GEN7_RBBM_CLOCK_DELAY_RAC, 0x00000011 }, + { GEN7_RBBM_CLOCK_HYST_RAC, 0x00440044 }, + { GEN7_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222 }, + { GEN7_RBBM_CLOCK_MODE2_GRAS, 0x00000222 }, + { GEN7_RBBM_CLOCK_MODE_BV_GRAS, 0x00222222 }, + { GEN7_RBBM_CLOCK_MODE_GPC, 0x02222223 }, + { GEN7_RBBM_CLOCK_MODE_VFD, 0x00222222 }, + { GEN7_RBBM_CLOCK_MODE_BV_GPC, 0x00222222 }, + { GEN7_RBBM_CLOCK_MODE_BV_VFD, 0x00002222 }, + { GEN7_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000 }, + { GEN7_RBBM_CLOCK_HYST_GPC, 0x04104004 }, + { GEN7_RBBM_CLOCK_HYST_VFD, 0x00000000 }, + { GEN7_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000000 }, + { GEN7_RBBM_CLOCK_DELAY_GPC, 0x00000200 }, + { GEN7_RBBM_CLOCK_DELAY_VFD, 0x00000000 }, + { GEN7_RBBM_CLOCK_MODE_HLSQ, 0x00002222 }, + { GEN7_RBBM_CLOCK_DELAY_HLSQ, 0x00000000 }, + { GEN7_RBBM_CLOCK_HYST_HLSQ, 0x00000000 }, + { GEN7_RBBM_CLOCK_MODE_BV_LRZ, 0x55555552 }, + { GEN7_RBBM_CLOCK_HYST2_VFD, 0x00000000 }, + { GEN7_RBBM_CLOCK_MODE_CP, 0x00000222 }, + { GEN7_RBBM_CLOCK_CNTL, 0x8aa8aa82 }, + { GEN7_RBBM_ISDB_CNT, 0x00000182 }, + { GEN7_RBBM_RAC_THRESHOLD_CNT, 0x00000000 }, + { GEN7_RBBM_SP_HYST_CNT, 0x00000000 }, + { GEN7_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222 }, + { GEN7_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111 }, + { GEN7_RBBM_CLOCK_HYST_GMU_GX, 0x00000555 }, +}; + +static const struct kgsl_regmap_list gen7_2_0_ao_hwcg_regs[] = { + { GEN7_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 0x00020222 }, + { GEN7_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 0x00010111 }, + { GEN7_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 0x00005555 }, +}; + +static const struct adreno_gen7_core adreno_gpu_core_gen7_2_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN7_2_0, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen7-2-0", + .chipid = 0x43050a00, + .features = ADRENO_APRIV | ADRENO_IOCOHERENT, + .gpudev = &adreno_gen7_hwsched_gpudev.base, + .perfcounters = &adreno_gen7_perfcounters, + .gmem_base = 0, + .gmem_size = 3 * SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_4M, + }, + .sqefw_name = "a740_sqe.fw", + .gmufw_name = "gmu_gen70200.bin", + .zap_name = "a740_zap", + .hwcg = gen7_2_0_hwcg_regs, + .hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs), + .ao_hwcg = gen7_2_0_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), .gbif = gen7_0_0_gbif_regs, .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), .hang_detect_cycles = 0xcfffff, @@ -1937,6 +2038,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_4_0 = { .zap_name = "a730_zap", .hwcg = gen7_0_0_hwcg_regs, .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .ao_hwcg = gen7_0_0_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), .gbif = gen7_0_0_gbif_regs, .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), .hang_detect_cycles = 0xcfffff, @@ -1983,6 +2086,7 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen7_0_0.base, &adreno_gpu_core_gen7_0_1.base, &adreno_gpu_core_a662.base, + &adreno_gpu_core_gen7_2_0.base, &adreno_gpu_core_gen7_4_0.base, }; diff --git a/adreno.h b/adreno.h index e8838e3eb6..5609f6e12a 100644 --- a/adreno.h +++ b/adreno.h @@ -199,6 +199,7 @@ enum adreno_gpurev { */ ADRENO_REV_GEN7_0_0 = 0x070000, ADRENO_REV_GEN7_0_1 = 0x070001, + ADRENO_REV_GEN7_2_0 = 0x070200, ADRENO_REV_GEN7_4_0 = 0x070400, }; @@ -1106,6 +1107,7 @@ static inline int adreno_is_gen7(struct adreno_device *adreno_dev) ADRENO_TARGET(gen7_0_0, ADRENO_REV_GEN7_0_0) ADRENO_TARGET(gen7_0_1, ADRENO_REV_GEN7_0_1) +ADRENO_TARGET(gen7_2_0, ADRENO_REV_GEN7_2_0) /* * adreno_checkreg_off() - Checks the validity of a register enum diff --git a/adreno_gen7.c b/adreno_gen7.c index 9a1193215d..66d349b4f6 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -232,9 +232,6 @@ void gen7_cx_regulator_disable_wait(struct regulator *reg, } #define RBBM_CLOCK_CNTL_ON 0x8aa8aa82 -#define GMU_AO_CGC_MODE_CNTL 0x00020000 -#define GMU_AO_CGC_DELAY_CNTL 0x00010111 -#define GMU_AO_CGC_HYST_CNTL 0x00005555 static void gen7_hwcg_set(struct adreno_device *adreno_dev, bool on) { @@ -246,12 +243,9 @@ static void gen7_hwcg_set(struct adreno_device *adreno_dev, bool on) if (!adreno_dev->hwcg_enabled) on = false; - gmu_core_regwrite(device, GEN7_GPU_GMU_AO_GMU_CGC_MODE_CNTL, - on ? GMU_AO_CGC_MODE_CNTL : 0); - gmu_core_regwrite(device, GEN7_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, - on ? GMU_AO_CGC_DELAY_CNTL : 0); - gmu_core_regwrite(device, GEN7_GPU_GMU_AO_GMU_CGC_HYST_CNTL, - on ? GMU_AO_CGC_HYST_CNTL : 0); + for (i = 0; i < gen7_core->ao_hwcg_count; i++) + gmu_core_regwrite(device, gen7_core->ao_hwcg[i].offset, + on ? gen7_core->ao_hwcg[i].val : 0); kgsl_regread(device, GEN7_RBBM_CLOCK_CNTL, &value); diff --git a/adreno_gen7.h b/adreno_gen7.h index 761dc14430..b061a5fc8f 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -73,6 +73,10 @@ struct adreno_gen7_core { const struct kgsl_regmap_list *hwcg; /** @hwcg_count: Number of registers in @hwcg */ u32 hwcg_count; + /** @ao_hwcg: List of registers and values to write for HWCG in AO block */ + const struct kgsl_regmap_list *ao_hwcg; + /** @ao_hwcg_count: Number of registers in @ao_hwcg */ + u32 ao_hwcg_count; /** @gbif: List of registers and values to write for GBIF */ const struct kgsl_regmap_list *gbif; /** @gbif_count: Number of registers in @gbif */ diff --git a/gen7_reg.h b/gen7_reg.h index da03e710ff..8f6a433f21 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -598,6 +598,7 @@ #define GEN7_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x00109 #define GEN7_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x0010a #define GEN7_RBBM_CLOCK_CNTL_UCHE 0x0010b +#define GEN7_RBBM_CLOCK_CNTL2_UCHE 0x0010c #define GEN7_RBBM_CLOCK_DELAY_UCHE 0x0010f #define GEN7_RBBM_CLOCK_HYST_UCHE 0x00110 #define GEN7_RBBM_CLOCK_MODE_VFD 0x00111 @@ -613,6 +614,7 @@ #define GEN7_RBBM_CLOCK_MODE_HLSQ 0x0011b #define GEN7_RBBM_CLOCK_DELAY_HLSQ 0x0011c #define GEN7_RBBM_CLOCK_HYST_HLSQ 0x0011d +#define GEN7_RBBM_CLOCK_HYST2_VFD 0x0012f #define GEN7_RBBM_CLOCK_MODE_CP 0x00260 #define GEN7_RBBM_CLOCK_MODE_BV_LRZ 0x00284 #define GEN7_RBBM_CLOCK_MODE_BV_GRAS 0x00285 From 3a5e9c42271363a21c78b36fe6d79342c1b44f39 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Thu, 11 Nov 2021 18:58:35 -0800 Subject: [PATCH 0009/1016] msm: kgsl: Get GMU frequencies from devicetree The frequencies the GMU clocks can run at may change between devices. Read an optional devicetree property "qcom,gmu-freq-table" that is a list of frequencies that the GMU can run at. Change-Id: I9166a02cbbf753dd1ca82515a5af0086c87e1ac7 Signed-off-by: Lynus Vaz --- adreno_gen7_gmu.c | 88 ++++++++++++++++++++++++++++++++----------- adreno_gen7_gmu.h | 4 ++ adreno_gen7_hwsched.c | 6 +-- adreno_gen7_rpmh.c | 17 +++++---- 4 files changed, 84 insertions(+), 31 deletions(-) diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index da1a55ea50..44b39a0165 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -3,6 +3,7 @@ * Copyright (c) 2021, The Linux Foundation. All rights reserved. */ +#include #include #include #include @@ -1486,10 +1487,10 @@ int gen7_gmu_enable_clks(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; - gen7_rdpm_cx_freq_update(gmu, GMU_FREQ_MIN / 1000); + gen7_rdpm_cx_freq_update(gmu, gmu->freqs[0] / 1000); ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", - GMU_FREQ_MIN); + gmu->freqs[0]); if (ret) { dev_err(&gmu->pdev->dev, "Unable to set the GMU clock\n"); return ret; @@ -1837,6 +1838,68 @@ static int gen7_gmu_reg_probe(struct adreno_device *adreno_dev) return ret; } +static int gen7_gmu_clk_probe(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret, i; + int tbl_size; + int num_freqs; + int offset; + + ret = devm_clk_bulk_get_all(&gmu->pdev->dev, &gmu->clks); + if (ret < 0) + return ret; + + /* + * Voting for apb_pclk will enable power and clocks required for + * QDSS path to function. However, if QCOM_KGSL_QDSS_STM is not enabled, + * QDSS is essentially unusable. Hence, if QDSS cannot be used, + * don't vote for this clock. + */ + if (!IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) { + for (i = 0; i < ret; i++) { + if (!strcmp(gmu->clks[i].id, "apb_pclk")) { + gmu->clks[i].clk = NULL; + break; + } + } + } + + gmu->num_clks = ret; + + /* Read the optional list of GMU frequencies */ + if (of_get_property(gmu->pdev->dev.of_node, + "qcom,gmu-freq-table", &tbl_size) == NULL) + goto default_gmu_freq; + + num_freqs = (tbl_size / sizeof(u32)) / 2; + if (num_freqs != ARRAY_SIZE(gmu->freqs)) + goto default_gmu_freq; + + for (i = 0; i < num_freqs; i++) { + offset = i * 2; + ret = of_property_read_u32_index(gmu->pdev->dev.of_node, + "qcom,gmu-freq-table", offset, &gmu->freqs[i]); + if (ret) + goto default_gmu_freq; + ret = of_property_read_u32_index(gmu->pdev->dev.of_node, + "qcom,gmu-freq-table", offset + 1, &gmu->vlvls[i]); + if (ret) + goto default_gmu_freq; + } + return 0; + +default_gmu_freq: + /* The GMU frequency table is missing or invalid. Go with a default */ + gmu->freqs[0] = GMU_FREQ_MIN; + gmu->vlvls[0] = RPMH_REGULATOR_LEVEL_LOW_SVS; + gmu->freqs[1] = GMU_FREQ_MAX; + gmu->vlvls[1] = RPMH_REGULATOR_LEVEL_SVS; + + return 0; +} + static void gen7_gmu_rdpm_probe(struct gen7_gmu_device *gmu, struct kgsl_device *device) { @@ -1980,27 +2043,10 @@ int gen7_gmu_probe(struct kgsl_device *device, if (ret) return ret; - ret = devm_clk_bulk_get_all(&pdev->dev, &gmu->clks); - if (ret < 0) + ret = gen7_gmu_clk_probe(adreno_dev); + if (ret) return ret; - /* - * Voting for apb_pclk will enable power and clocks required for - * QDSS path to function. However, if QCOM_KGSL_QDSS_STM is not enabled, - * QDSS is essentially unusable. Hence, if QDSS cannot be used, - * don't vote for this clock. - */ - if (!IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) { - for (i = 0; i < ret; i++) { - if (!strcmp(gmu->clks[i].id, "apb_pclk")) { - gmu->clks[i].clk = NULL; - break; - } - } - } - - gmu->num_clks = ret; - /* Set up GMU IOMMU and shared memory with GMU */ ret = gen7_gmu_iommu_init(gmu); if (ret) diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 5f40bc575f..91e1b27ee0 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -55,6 +55,10 @@ struct gen7_gmu_device { /** @num_clks: Number of entries in the @clks array */ int num_clks; unsigned int idle_level; + /** @freqs: Array of GMU frequencies */ + u32 freqs[2]; + /** @vlvls: Array of GMU voltage levels */ + u32 vlvls[2]; struct kgsl_mailbox mailbox; /** @gmu_globals: Array to store gmu global buffers */ struct kgsl_memdesc gmu_globals[GMU_KERNEL_ENTRIES]; diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 18f6a6178c..25aa7f8293 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -913,17 +913,17 @@ static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); static unsigned long prev_freq; - unsigned long freq = GMU_FREQ_MIN; + unsigned long freq = gmu->freqs[0]; if (!gmu->perf_ddr_bw) return; /* * Scale the GMU if DDR is at a CX corner at which GMU can run at - * 500 Mhz + * a higher frequency */ if (pwr->ddr_table[buslevel] >= gmu->perf_ddr_bw) - freq = GMU_FREQ_MAX; + freq = gmu->freqs[1]; if (prev_freq == freq) return; diff --git a/adreno_gen7_rpmh.c b/adreno_gen7_rpmh.c index aa7841ad93..71b0db2c67 100644 --- a/adreno_gen7_rpmh.c +++ b/adreno_gen7_rpmh.c @@ -3,7 +3,6 @@ * Copyright (c) 2021, The Linux Foundation. All rights reserved. */ -#include #include #include #include @@ -254,12 +253,15 @@ static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms, * @hfi: Pointer to hfi device * @pri_rail: Pointer to primary power rail vlvl table * @sec_rail: Pointer to second/dependent power rail vlvl table + * @freqs: List of GMU frequencies + * @vlvls: List of GMU voltage levels * * This function initializes the cx votes for all gmu frequencies * for gmu dcvs */ static int setup_cx_arc_votes(struct gen7_hfi *hfi, - struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail) + struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, + u32 *freqs, u32 *vlvls) { /* Hardcoded values of GMU CX voltage levels */ u16 gmu_cx_vlvl[MAX_CX_LEVELS]; @@ -268,14 +270,14 @@ static int setup_cx_arc_votes(struct gen7_hfi *hfi, int ret, i; gmu_cx_vlvl[0] = 0; - gmu_cx_vlvl[1] = RPMH_REGULATOR_LEVEL_LOW_SVS; - gmu_cx_vlvl[2] = RPMH_REGULATOR_LEVEL_SVS; + gmu_cx_vlvl[1] = vlvls[0]; + gmu_cx_vlvl[2] = vlvls[1]; table->gmu_level_num = 3; table->cx_votes[0].freq = 0; - table->cx_votes[1].freq = GMU_FREQ_MIN / 1000; - table->cx_votes[2].freq = GMU_FREQ_MAX / 1000; + table->cx_votes[1].freq = freqs[0] / 1000; + table->cx_votes[2].freq = freqs[1] / 1000; ret = setup_volt_dependency_tbl(cx_votes, pri_rail, sec_rail, gmu_cx_vlvl, table->gmu_level_num); @@ -363,7 +365,8 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) if (ret) return ret; - ret = setup_cx_arc_votes(hfi, &cx_arc, &mx_arc); + ret = setup_cx_arc_votes(hfi, &cx_arc, &mx_arc, + gmu->freqs, gmu->vlvls); if (ret) return ret; From ada4d7c8550109006d70dca0c81d8d7d9aaa1ed3 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Tue, 30 Nov 2021 14:37:48 -0800 Subject: [PATCH 0010/1016] msm: kgsl: Update the header include paths The header files that KGSL uses have been modified in recent kernel versions. Update the file names and include paths that we look for. Change-Id: I79ff6ab77c12393bc974bc30f9a294b67da76c9d Signed-off-by: Lynus Vaz --- Kbuild | 2 +- kgsl.c | 1 - kgsl_iommu.c | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Kbuild b/Kbuild index b4d7e582ed..daf7ace63f 100644 --- a/Kbuild +++ b/Kbuild @@ -13,7 +13,7 @@ ifeq ($(CONFIG_ARCH_KALAMA), y) include $(KGSL_PATH)/config/gki_waipiodisp.conf endif -ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERN_SRC)/drivers/devfreq +ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERN_SRC)/drivers/devfreq -I$(KERN_SRC)/drivers/iommu obj-$(CONFIG_QCOM_KGSL) += msm_kgsl.o diff --git a/kgsl.c b/kgsl.c index e28048eb4d..1a72257c21 100644 --- a/kgsl.c +++ b/kgsl.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 16a006c82a..e1e845d6b0 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "adreno.h" From f092e735c5469e2128288c3b6473d8f995342559 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 29 Oct 2021 13:56:39 -0700 Subject: [PATCH 0011/1016] msm: kgsl: Honor the QMAA override flags The graphics driver can be compiled with the appropriate QMAA override flags. Change-Id: I2d1d43a6f2075e5c7895291fcf7a592e57c98e41 Signed-off-by: Lynus Vaz --- Android.mk | 20 ++++++++++++++------ Kbuild | 2 +- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/Android.mk b/Android.mk index 63924eac51..78597cb706 100644 --- a/Android.mk +++ b/Android.mk @@ -1,4 +1,13 @@ -ifneq ($(TARGET_USES_QMAA),true) +ifeq ($(TARGET_USES_QMAA),true) + KGSL_ENABLED := false + ifeq ($(TARGET_USES_QMAA_OVERRIDE_GFX),true) + KGSL_ENABLED := true + endif # TARGET_USES_QMAA_OVERRIDE_GFX +else + KGSL_ENABLED := true +endif # TARGET_USES_QMAA + +ifeq ($(KGSL_ENABLED),true) KGSL_SELECT := CONFIG_QCOM_KGSL=m LOCAL_PATH := $(call my-dir) @@ -8,12 +17,10 @@ include $(CLEAR_VARS) ifneq ($(findstring vendor,$(LOCAL_PATH)),) DLKM_DIR := device/qcom/common/dlkm -KERN_SRC := $(ANDROID_TOP)/kernel_platform/msm-kernel KBUILD_OPTIONS += BOARD_PLATFORM=$(TARGET_BOARD_PLATFORM) KBUILD_OPTIONS += $(KGSL_SELECT) KBUILD_OPTIONS += MODNAME=msm_kgsl -KBUILD_OPTIONS += KERN_SRC=$(KERN_SRC) KBUILD_OPTIONS += KBUILD_EXTRA_SYMBOLS=$(PWD)/$(call intermediates-dir-for,DLKM,mmrm-module-symvers)/Module.symvers @@ -25,12 +32,13 @@ LOCAL_MODULE_KBUILD_NAME := msm_kgsl.ko LOCAL_MODULE_TAGS := optional LOCAL_MODULE_DEBUG_ENABLE := true LOCAL_MODULE_PATH := $(KERNEL_MODULES_OUT) -#LOCAL_REQUIRED_MODULES := mmrm-module-symvers -#LOCAL_ADDITIONAL_DEPENDENCIES := $(call intermediates-dir-for,DLKM,mmrm-module-symvers)/Module.symvers + +LOCAL_REQUIRED_MODULES := mmrm-module-symvers +LOCAL_ADDITIONAL_DEPENDENCIES := $(call intermediates-dir-for,DLKM,mmrm-module-symvers)/Module.symvers # Include msm_kgsl.ko in the /vendor/lib/modules (vendor.img) BOARD_VENDOR_KERNEL_MODULES += $(LOCAL_MODULE_PATH)/$(LOCAL_MODULE) include $(DLKM_DIR)/Build_external_kernelmodule.mk endif # DLKM check -endif # QMAA check +endif # KGSL_ENABLED diff --git a/Kbuild b/Kbuild index daf7ace63f..8b921fe6ab 100644 --- a/Kbuild +++ b/Kbuild @@ -13,7 +13,7 @@ ifeq ($(CONFIG_ARCH_KALAMA), y) include $(KGSL_PATH)/config/gki_waipiodisp.conf endif -ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERN_SRC)/drivers/devfreq -I$(KERN_SRC)/drivers/iommu +ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq -I$(KERNEL_SRC)/drivers/iommu obj-$(CONFIG_QCOM_KGSL) += msm_kgsl.o From a0a56e9d4ebade419597e801cfc0c8ccec80aa43 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Mon, 22 Nov 2021 16:59:34 -0800 Subject: [PATCH 0012/1016] msm: kgsl: Use the GPU revision for the GMU id On Gen7 GPUs, the GMU uses a new versioning system for the chipid that is based on the GPU revision. Set up the GMU id based on this new system. Change-Id: I1b4701d67e69d0bbbc916448b55d9d6a0b08eed3 Signed-off-by: Lynus Vaz --- adreno.h | 9 +++++++++ adreno_gen7_gmu.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/adreno.h b/adreno.h index 5609f6e12a..ffce7391d5 100644 --- a/adreno.h +++ b/adreno.h @@ -58,6 +58,15 @@ FIELD_PREP(GENMASK(15, 12), ADRENO_CHIPID_MINOR(_id)) | \ FIELD_PREP(GENMASK(11, 8), ADRENO_CHIPID_PATCH(_id))) +#define ADRENO_REV_MAJOR(_rev) FIELD_GET(GENMASK(23, 16), _rev) +#define ADRENO_REV_MINOR(_rev) FIELD_GET(GENMASK(15, 8), _rev) +#define ADRENO_REV_PATCH(_rev) FIELD_GET(GENMASK(7, 0), _rev) + +#define ADRENO_GMU_REV(_rev) \ + (FIELD_PREP(GENMASK(31, 24), ADRENO_REV_MAJOR(_rev)) | \ + FIELD_PREP(GENMASK(23, 16), ADRENO_REV_MINOR(_rev)) | \ + FIELD_PREP(GENMASK(15, 8), ADRENO_REV_PATCH(_rev))) + /* ADRENO_GPUREV - Return the GPU ID for the given adreno_device */ #define ADRENO_GPUREV(_a) ((_a)->gpucore->gpurev) diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 44b39a0165..c32cb2f9c2 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -832,7 +832,7 @@ void gen7_gmu_register_config(struct adreno_device *adreno_dev) /* Pass chipid to GMU FW, must happen before starting GMU */ gmu_core_regwrite(device, GEN7_GMU_GENERAL_10, - ADRENO_GMU_CHIPID(adreno_dev->chipid)); + ADRENO_GMU_REV(ADRENO_GPUREV(adreno_dev))); /* Log size is encoded in (number of 4K units - 1) */ val = (gmu->gmu_log->gmuaddr & GENMASK(31, 12)) | From 6935541597c08e988e57eac800d4b9f954e39f84 Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Mon, 4 Oct 2021 12:02:57 -0700 Subject: [PATCH 0013/1016] msm: kgsl: Drop aggregated bandwidth vote during thermal throttling There is a possibility of gpu consuming bandwidth above the allowed ddr bandwidth for respective gpu power levels. Under thermal throttling, this could keep the DDR to run at high levels causing a thermal reset. When GPU is throttled to its lowest level, drop the ab vote as a last resort to prevent thermal reset. Change-Id: Ia0885ac1ebfc58f4af9f999dd8063c905ec130c3 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor Signed-off-by: Harshdeep Dhatt --- kgsl_pwrscale.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index fc7e47e177..caef4788c2 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -498,7 +498,15 @@ int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags) /* Update bus vote if AB or IB is modified */ if ((pwr->bus_mod != b) || (pwr->bus_ab_mbytes != ab_mbytes)) { pwr->bus_percent_ab = device->pwrscale.bus_profile.percent_ab; - pwr->bus_ab_mbytes = ab_mbytes; + /* + * When gpu is thermally throttled to its lowest power level, + * drop GPU's AB vote as a last resort to lower CX voltage and + * to prevent thermal reset. + */ + if (pwr->thermal_pwrlevel != pwr->num_pwrlevels - 1) + pwr->bus_ab_mbytes = ab_mbytes; + else + pwr->bus_ab_mbytes = 0; kgsl_bus_update(device, KGSL_BUS_VOTE_ON); } From 3cb1bfe3368703538d46b1d0ee539c1a6fd594ce Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Tue, 26 Oct 2021 15:00:03 -0700 Subject: [PATCH 0014/1016] msm: kgsl: Remove inline performance counter enable for gen7 targets HLSQ, SP and TP HW did not allow any programming between programming of draw_init, 2D_init, CL_init, Global_event and context_done. To get around this, KMD had an inline enable mechanism to enable these performance counters. As this is fixed on gen7 targets, program these regsiters from HLOS. Change-Id: I2a08fe19a486b70059484979fe0d1e1718cd4776 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- adreno_gen7_perfcounter.c | 76 +++------------------------------------ 1 file changed, 5 insertions(+), 71 deletions(-) diff --git a/adreno_gen7_perfcounter.c b/adreno_gen7_perfcounter.c index f088856da3..75a4b9831b 100644 --- a/adreno_gen7_perfcounter.c +++ b/adreno_gen7_perfcounter.c @@ -46,67 +46,6 @@ static int gen7_counter_enable(struct adreno_device *adreno_dev, return ret; } -static int gen7_counter_inline_enable(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, - unsigned int counter, unsigned int countable) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_perfcount_register *reg = &group->regs[counter]; - struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0]; - u32 cmds[3]; - int ret; - - if (!(device->state == KGSL_STATE_ACTIVE)) - return gen7_counter_enable(adreno_dev, group, counter, - countable); - - if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) - gen7_perfcounter_update(adreno_dev, reg, false); - - cmds[0] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); - cmds[1] = cp_type4_packet(reg->select, 1); - cmds[2] = countable; - - /* submit to highest priority RB always */ - ret = gen7_ringbuffer_addcmds(adreno_dev, rb, NULL, - F_NOTPROTECTED, cmds, 3, 0, NULL); - if (ret) - return ret; - - /* - * schedule dispatcher to make sure rb[0] is run, because - * if the current RB is not rb[0] and gpu is idle then - * rb[0] will not get scheduled to run - */ - if (adreno_dev->cur_rb != rb) - adreno_dispatcher_schedule(device); - - /* wait for the above commands submitted to complete */ - ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp, - ADRENO_IDLE_TIMEOUT); - - if (ret) { - /* - * If we were woken up because of cancelling rb events - * either due to soft reset or adreno_stop, ignore the - * error and return 0 here. The perfcounter is already - * set up in software and it will be programmed in - * hardware when we wake up or come up after soft reset - */ - if (ret == -EAGAIN) - ret = 0; - else - dev_err(device->dev, - "Perfcounter %s/%u/%u start via commands failed %d\n", - group->name, counter, countable, ret); - } - - if (!ret) - reg->value = 0; - - return ret; -} - static u64 gen7_counter_read(struct adreno_device *adreno_dev, const struct adreno_perfcount_group *group, unsigned int counter) @@ -849,8 +788,7 @@ static const struct adreno_perfcount_group gen7_perfcounter_groups gen7_counter_enable, gen7_counter_read), GEN7_REGULAR_PERFCOUNTER_GROUP(PC, pc), GEN7_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), - GEN7_PERFCOUNTER_GROUP(HLSQ, hlsq, - gen7_counter_inline_enable, gen7_counter_read), + GEN7_REGULAR_PERFCOUNTER_GROUP(HLSQ, hlsq), GEN7_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), GEN7_REGULAR_PERFCOUNTER_GROUP(CCU, ccu), GEN7_REGULAR_PERFCOUNTER_GROUP(CMP, cmp), @@ -858,10 +796,8 @@ static const struct adreno_perfcount_group gen7_perfcounter_groups GEN7_REGULAR_PERFCOUNTER_GROUP(RAS, ras), GEN7_REGULAR_PERFCOUNTER_GROUP(LRZ, lrz), GEN7_REGULAR_PERFCOUNTER_GROUP(UCHE, uche), - GEN7_PERFCOUNTER_GROUP(TP, tp, - gen7_counter_inline_enable, gen7_counter_read), - GEN7_PERFCOUNTER_GROUP(SP, sp, - gen7_counter_inline_enable, gen7_counter_read), + GEN7_REGULAR_PERFCOUNTER_GROUP(TP, tp), + GEN7_REGULAR_PERFCOUNTER_GROUP(SP, sp), GEN7_REGULAR_PERFCOUNTER_GROUP(RB, rb), GEN7_REGULAR_PERFCOUNTER_GROUP(VSC, vsc), GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, VBIF, gbif, 0, @@ -883,10 +819,8 @@ static const struct adreno_perfcount_group gen7_perfcounter_groups GEN7_BV_REGULAR_PERFCOUNTER_GROUP(PC, pc), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), - GEN7_BV_PERFCOUNTER_GROUP(TP, tp, - gen7_counter_inline_enable, gen7_counter_read), - GEN7_BV_PERFCOUNTER_GROUP(SP, sp, - gen7_counter_inline_enable, gen7_counter_read), + GEN7_BV_REGULAR_PERFCOUNTER_GROUP(TP, tp), + GEN7_BV_REGULAR_PERFCOUNTER_GROUP(SP, sp), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), }; From 0021da9fd06b2552e1f379746d82b15af7feab0e Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 4 Nov 2021 10:56:13 -0600 Subject: [PATCH 0015/1016] msm: kgsl: Fix sysfs deadlock in kgsl If kgsl mem entries are being accessed in sysfs and if putting back the mementry refcount triggers a free of the process private, we have a mutex deadlock. Because freeing the process private triggers removal of the sysfs directories from within a thread that is accessing the sysfs files. Fix this by taking an extra refcount on the process private and then putting it back in a deferred manner. Change-Id: I7db0e6144cabec2a86df9afbc500cd0ba3af0291 Signed-off-by: Harshdeep Dhatt --- kgsl_sharedmem.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index 53cbf1d8ba..43baa5cacf 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -132,6 +132,20 @@ static ssize_t mem_entry_sysfs_show(struct kobject *kobj, return pattr->show(priv, pattr->memtype, buf); } +struct deferred_work { + struct kgsl_process_private *private; + struct work_struct work; +}; + +static void process_private_deferred_put(struct work_struct *work) +{ + struct deferred_work *free_work = + container_of(work, struct deferred_work, work); + + kgsl_process_private_put(free_work->private); + kfree(free_work); +} + static ssize_t memtype_sysfs_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -140,10 +154,30 @@ static ssize_t memtype_sysfs_show(struct kobject *kobj, struct kgsl_mem_entry *entry; u64 size = 0; int id = 0; + struct deferred_work *work = kzalloc(sizeof(struct deferred_work), + GFP_KERNEL); + + if (!work) + return -ENOMEM; priv = container_of(kobj, struct kgsl_process_private, kobj_memtype); memtype = container_of(attr, struct kgsl_memtype, attr); + /* + * Take a process refcount here and put it back in a deferred manner. + * This is to avoid a deadlock where we put back last reference of the + * process private (via kgsl_mem_entry_put) here and end up trying to + * remove sysfs kobject while we are still in the middle of reading one + * of the sysfs files. + */ + if (!kgsl_process_private_get(priv)) { + kfree(work); + return -ENOENT; + } + + work->private = priv; + INIT_WORK(&work->work, process_private_deferred_put); + spin_lock(&priv->mem_lock); for (entry = idr_get_next(&priv->mem_idr, &id); entry; id++, entry = idr_get_next(&priv->mem_idr, &id)) { @@ -165,6 +199,8 @@ static ssize_t memtype_sysfs_show(struct kobject *kobj, } spin_unlock(&priv->mem_lock); + queue_work(kgsl_driver.mem_workqueue, &work->work); + return scnprintf(buf, PAGE_SIZE, "%llu\n", size); } @@ -184,6 +220,26 @@ imported_mem_show(struct kgsl_process_private *priv, struct kgsl_mem_entry *entry; uint64_t imported_mem = 0; int id = 0; + struct deferred_work *work = kzalloc(sizeof(struct deferred_work), + GFP_KERNEL); + + if (!work) + return -ENOMEM; + + /* + * Take a process refcount here and put it back in a deferred manner. + * This is to avoid a deadlock where we put back last reference of the + * process private (via kgsl_mem_entry_put) here and end up trying to + * remove sysfs kobject while we are still in the middle of reading one + * of the sysfs files. + */ + if (!kgsl_process_private_get(priv)) { + kfree(work); + return -ENOENT; + } + + work->private = priv; + INIT_WORK(&work->work, process_private_deferred_put); spin_lock(&priv->mem_lock); for (entry = idr_get_next(&priv->mem_idr, &id); entry; @@ -218,6 +274,8 @@ imported_mem_show(struct kgsl_process_private *priv, } spin_unlock(&priv->mem_lock); + queue_work(kgsl_driver.mem_workqueue, &work->work); + return scnprintf(buf, PAGE_SIZE, "%llu\n", imported_mem); } From d6aaa1d7f0e68e2c5557f02faa3492d7f211e89d Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Fri, 5 Nov 2021 12:28:15 -0600 Subject: [PATCH 0016/1016] msm: kgsl: Set context id as contextidr in hwscheduling This is used by the iommu page fault handler to figure out the page faulting context. Change-Id: Ic8f8c69df2bbb7d47465799c224adb8a496b4a96 Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched_hfi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index c910ab7581..60ad1921d0 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1277,7 +1277,7 @@ static int send_context_register(struct adreno_device *adreno_dev, cmd.ctxt_id = context->id; cmd.flags = HFI_CTXT_FLAG_NOTIFY | context->flags; cmd.pt_addr = kgsl_mmu_pagetable_get_ttbr0(pt); - cmd.ctxt_idr = pid_nr(context->proc_priv->pid); + cmd.ctxt_idr = context->id; cmd.ctxt_bank = kgsl_mmu_pagetable_get_context_bank(pt); return gen7_hfi_send_cmd_async(adreno_dev, &cmd); From 5aec9bede1ec367c262b6f17c48f3855fb8e0c92 Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Mon, 8 Nov 2021 00:18:21 +0530 Subject: [PATCH 0017/1016] msm: kgsl: Update GPUCC regs for A662 snapshot GPUCC register offsets are different in A662. So update the snapshot to use the new offsets for A662. Change-Id: I07fbc3d26840e1a483a06bcd9cbd76f30084148b Signed-off-by: Harshitha Sai Neelati --- adreno_a6xx_gmu_snapshot.c | 39 ++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/adreno_a6xx_gmu_snapshot.c b/adreno_a6xx_gmu_snapshot.c index 32dc60c59c..e11f70e0b2 100644 --- a/adreno_a6xx_gmu_snapshot.c +++ b/adreno_a6xx_gmu_snapshot.c @@ -39,14 +39,6 @@ static const unsigned int a6xx_gmu_registers[] = { 0x1F9E0, 0x1F9E2, 0x1F9F0, 0x1F9F0, 0x1FA00, 0x1FA01, /* GMU AO */ 0x23B00, 0x23B16, - /* GPU CC */ - 0x24000, 0x24012, 0x24040, 0x24052, 0x24400, 0x24404, 0x24407, 0x2440B, - 0x24415, 0x2441C, 0x2441E, 0x2442D, 0x2443C, 0x2443D, 0x2443F, 0x24440, - 0x24442, 0x24449, 0x24458, 0x2445A, 0x24540, 0x2455E, 0x24800, 0x24802, - 0x24C00, 0x24C02, 0x25400, 0x25402, 0x25800, 0x25802, 0x25C00, 0x25C02, - 0x26000, 0x26002, - /* GPU CC ACD */ - 0x26400, 0x26416, 0x26420, 0x26427, }; static const unsigned int a660_gmu_registers[] = { @@ -60,6 +52,28 @@ static const unsigned int a660_gmu_registers[] = { 0x23B30, 0x23B30, }; +static const unsigned int a6xx_gmu_gpucc_registers[] = { + /* GPU CC */ + 0x24000, 0x24012, 0x24040, 0x24052, 0x24400, 0x24404, 0x24407, 0x2440B, + 0x24415, 0x2441C, 0x2441E, 0x2442D, 0x2443C, 0x2443D, 0x2443F, 0x24440, + 0x24442, 0x24449, 0x24458, 0x2445A, 0x24540, 0x2455E, 0x24800, 0x24802, + 0x24C00, 0x24C02, 0x25400, 0x25402, 0x25800, 0x25802, 0x25C00, 0x25C02, + 0x26000, 0x26002, + /* GPU CC ACD */ + 0x26400, 0x26416, 0x26420, 0x26427, +}; + +static const unsigned int a662_gmu_gpucc_registers[] = { + /* GPU CC */ + 0x24000, 0x2400e, 0x24400, 0x2440e, 0x24800, 0x24805, 0x24c00, 0x24cff, + 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, 0x26400, 0x26405, + 0x26414, 0x2641d, 0x2642a, 0x26430, 0x26432, 0x26432, 0x26441, 0x26455, + 0x26466, 0x26468, 0x26478, 0x2647a, 0x26489, 0x2648a, 0x2649c, 0x2649e, + 0x264a0, 0x264a3, 0x264b3, 0x264b5, 0x264c5, 0x264c7, 0x264d6, 0x264d8, + 0x264e8, 0x264e9, 0x264f9, 0x264fc, 0x2650b, 0x2650c, 0x2651c, 0x2651e, + 0x26540, 0x26570, 0x26600, 0x26616, 0x26620, 0x2662d, +}; + static const unsigned int a630_rscc_snapshot_registers[] = { 0x23400, 0x23434, 0x23436, 0x23436, 0x23480, 0x23484, 0x23489, 0x2348C, 0x23491, 0x23494, 0x23499, 0x2349C, 0x234A1, 0x234A4, 0x234A9, 0x234AC, @@ -399,6 +413,15 @@ void a6xx_gmu_device_snapshot(struct kgsl_device *device, adreno_snapshot_registers(device, snapshot, a6xx_gmu_registers, ARRAY_SIZE(a6xx_gmu_registers) / 2); + if (adreno_is_a662(adreno_dev)) + adreno_snapshot_registers(device, snapshot, + a662_gmu_gpucc_registers, + ARRAY_SIZE(a662_gmu_gpucc_registers) / 2); + else + adreno_snapshot_registers(device, snapshot, + a6xx_gmu_gpucc_registers, + ARRAY_SIZE(a6xx_gmu_gpucc_registers) / 2); + /* Snapshot A660 specific GMU registers */ if (adreno_is_a660(adreno_dev)) adreno_snapshot_registers(device, snapshot, a660_gmu_registers, From 53f46c5c600d14cf0ea64cf22133f41a77cdd374 Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Mon, 8 Nov 2021 16:00:20 -0800 Subject: [PATCH 0018/1016] msm: kgsl: Add notifier call for thermal constraints Currently devfreq call back function is overloaded for DCVS recommendations and thermal constraints. This is causing devfreq mutex congestion thereby causing delayed response to thermal requests leading to thermal reset. Add a new QoS notifier callback function to apply the constraints. Change-Id: Ic3c4a2e59867aeaa342fa893344667c77d8b1984 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- kgsl_pwrctrl.c | 25 ----------------------- kgsl_pwrctrl.h | 3 ++- kgsl_pwrscale.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 53 insertions(+), 28 deletions(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 57b2b63499..9f68d8527e 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -2261,31 +2261,6 @@ int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device) return device->ftbl->gpu_clock_set(device, pwr->active_pwrlevel); } -/** - * kgsl_pwrctrl_update_thermal_pwrlevel() - Update GPU thermal power level - * @device: Pointer to the kgsl_device struct - */ -void kgsl_pwrctrl_update_thermal_pwrlevel(struct kgsl_device *device) -{ - s32 qos_max_freq = dev_pm_qos_read_value(&device->pdev->dev, - DEV_PM_QOS_MAX_FREQUENCY); - int level = 0; - - if (qos_max_freq != PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE) { - level = _get_nearest_pwrlevel(&device->pwrctrl, - qos_max_freq * 1000); - if (level < 0) - return; - } - - if (level != device->pwrctrl.thermal_pwrlevel) { - trace_kgsl_thermal_constraint( - device->pwrctrl.pwrlevels[level].gpu_freq); - - device->pwrctrl.thermal_pwrlevel = level; - } -} - int kgsl_gpu_num_freqs(void) { struct kgsl_device *device = kgsl_get_device(0); diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 925aceeef5..6d353b0b05 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -163,6 +163,8 @@ struct kgsl_pwrctrl { u64 time_in_pwrlevel[KGSL_MAX_PWRLEVELS]; /** @last_stat_updated: The last time stats were updated */ ktime_t last_stat_updated; + /** @nb_max: Notifier block for DEV_PM_QOS_MAX_FREQUENCY */ + struct notifier_block nb_max; }; int kgsl_pwrctrl_init(struct kgsl_device *device); @@ -216,7 +218,6 @@ void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy); void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, struct kgsl_pwr_constraint *pwrc, u32 id, u32 ts); int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device); -void kgsl_pwrctrl_update_thermal_pwrlevel(struct kgsl_device *device); /** * kgsl_pwrctrl_request_state - Request a specific power state diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index caef4788c2..74fe81f09f 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -259,8 +259,6 @@ int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) cur_freq = kgsl_pwrctrl_active_freq(pwr); level = pwr->active_pwrlevel; - kgsl_pwrctrl_update_thermal_pwrlevel(device); - /* If the governor recommends a new frequency, update it here */ if (rec_freq != cur_freq) { for (i = 0; i < pwr->num_pwrlevels; i++) @@ -629,6 +627,46 @@ static void pwrscale_of_ca_aware(struct kgsl_device *device) of_node_put(node); } +/* + * thermal_max_notifier_call - Callback function registered to receive qos max + * frequency events. + * @nb: The notifier block + * @val: Max frequency value in KHz for GPU + * + * The function subscribes to GPU max frequency change and updates thermal + * power level accordingly. + */ +static int thermal_max_notifier_call(struct notifier_block *nb, unsigned long val, void *data) +{ + struct kgsl_pwrctrl *pwr = container_of(nb, struct kgsl_pwrctrl, nb_max); + struct kgsl_device *device = container_of(pwr, struct kgsl_device, pwrctrl); + u32 max_freq = val * 1000; + int level; + + for (level = pwr->num_pwrlevels - 1; level >= 0; level--) { + /* get nearest power level with a maximum delta of 5MHz */ + if (abs(pwr->pwrlevels[level].gpu_freq - max_freq) < 5000000) + break; + } + + if (level < 0) + return NOTIFY_DONE; + + if (level == pwr->thermal_pwrlevel) + return NOTIFY_OK; + + trace_kgsl_thermal_constraint(max_freq); + pwr->thermal_pwrlevel = level; + + mutex_lock(&device->mutex); + + /* Update the current level using the new limit */ + kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel); + + mutex_unlock(&device->mutex); + return NOTIFY_OK; +} + int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, const char *governor) { @@ -711,6 +749,16 @@ int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, return ret; } + pwr->nb_max.notifier_call = thermal_max_notifier_call; + ret = dev_pm_qos_add_notifier(&pdev->dev, &pwr->nb_max, DEV_PM_QOS_MAX_FREQUENCY); + + if (ret) { + dev_err(device->dev, "Unable to register notifier call for thermal: %d\n", ret); + device->pwrscale.enabled = false; + msm_adreno_tz_exit(); + return ret; + } + devfreq = devfreq_add_device(&pdev->dev, &gpu_profile->profile, governor, &adreno_tz_data); if (IS_ERR_OR_NULL(devfreq)) { @@ -775,6 +823,7 @@ void kgsl_pwrscale_close(struct kgsl_device *device) devfreq_remove_device(device->pwrscale.devfreqptr); device->pwrscale.devfreqptr = NULL; + dev_pm_qos_remove_notifier(&device->pdev->dev, &pwr->nb_max, DEV_PM_QOS_MAX_FREQUENCY); msm_adreno_tz_exit(); } From 2d2726b851f45fb335264ea54659e4209aac9e53 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Wed, 10 Nov 2021 13:43:23 -0700 Subject: [PATCH 0019/1016] msm: kgsl: Avoid double SLUMBER entry adreno_suspend_context() relinquishes the device mutex which opens up a window for a concurrent thread to attempt SLUMBER. Hence, check for flags again, before proceeding with SLUMBER sequence. Change-Id: I3f36e19e31f5399a038a29af5cb9bc9f59bdfa5b Signed-off-by: Harshdeep Dhatt --- adreno_a6xx_gmu.c | 12 ++++++++++-- adreno_a6xx_rgmu.c | 9 +++++++-- adreno_gen7_gmu.c | 12 ++++++++++-- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 90fb4b23f8..9986573cc4 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -3034,10 +3034,18 @@ static int a6xx_power_off(struct adreno_device *adreno_dev) WARN_ON(!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)); - trace_kgsl_pwr_request_state(device, KGSL_STATE_SLUMBER); - adreno_suspend_context(device); + /* + * adreno_suspend_context() unlocks the device mutex, which + * could allow a concurrent thread to attempt SLUMBER sequence. + * Hence, check the flags again before proceeding with SLUMBER. + */ + if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_SLUMBER); + ret = a6xx_gmu_oob_set(device, oob_gpu); if (ret) { a6xx_gmu_oob_clear(device, oob_gpu); diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index a92f37a3da..0bbc651682 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -1051,13 +1051,18 @@ static int a6xx_power_off(struct adreno_device *adreno_dev) struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); int ret; + adreno_suspend_context(device); + + /* + * adreno_suspend_context() unlocks the device mutex, which + * could allow a concurrent thread to attempt SLUMBER sequence. + * Hence, check the flags before proceeding with SLUMBER. + */ if (!test_bit(RGMU_PRIV_GPU_STARTED, &rgmu->flags)) return 0; trace_kgsl_pwr_request_state(device, KGSL_STATE_SLUMBER); - adreno_suspend_context(device); - ret = a6xx_rgmu_oob_set(device, oob_gpu); if (ret) { a6xx_rgmu_oob_clear(device, oob_gpu); diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index c32cb2f9c2..d629f8edf8 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -2392,10 +2392,18 @@ static int gen7_power_off(struct adreno_device *adreno_dev) WARN_ON(!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)); - trace_kgsl_pwr_request_state(device, KGSL_STATE_SLUMBER); - adreno_suspend_context(device); + /* + * adreno_suspend_context() unlocks the device mutex, which + * could allow a concurrent thread to attempt SLUMBER sequence. + * Hence, check the flags again before proceeding with SLUMBER. + */ + if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_SLUMBER); + ret = gen7_gmu_oob_set(device, oob_gpu); if (!ret) { kgsl_pwrscale_update_stats(device); From ef3f8530b44b388b710875d6d94af7da65455f56 Mon Sep 17 00:00:00 2001 From: Sushmita Susheelendra Date: Fri, 1 Oct 2021 12:47:27 -0400 Subject: [PATCH 0020/1016] msm: kgsl: Add new tracepoints for command batch ready and done The new tracepoints adreno_cmdbatch_ready and adreno_cmdbatch_done provide common begin and end reference points respectively for comparison between dispatch on GMU and host. adreno_cmdbatch_ready is logged after the sync dependencies for a command have been resolved and the command is therefore ready to be submitted. adreno_cmdbatch_done is logged on both SW and HW dispatcher threads just before signaling events for the command. Change-Id: If9587bae0d4655be93bfc3fee855d6ffbe967e1f Signed-off-by: Sushmita Susheelendra --- adreno_dispatch.c | 7 +++++++ adreno_hwsched.c | 31 +++++++++++++++++++------------ adreno_trace.h | 43 +++++++++++++++++++++++++++++++++++++++++++ kgsl_drawobj.c | 1 + kgsl_drawobj.h | 2 ++ 5 files changed, 72 insertions(+), 12 deletions(-) diff --git a/adreno_dispatch.c b/adreno_dispatch.c index f73ab29b70..d1c747061f 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -737,6 +737,7 @@ static int dispatcher_context_sendcmds(struct adreno_device *adreno_dev, (dispatch_q->inflight < inflight)) { struct kgsl_drawobj *drawobj; struct kgsl_drawobj_cmd *cmdobj; + struct kgsl_context *context; if (adreno_gpu_fault(adreno_dev) != 0) break; @@ -762,6 +763,9 @@ static int dispatcher_context_sendcmds(struct adreno_device *adreno_dev, timestamp = drawobj->timestamp; cmdobj = CMDOBJ(drawobj); + context = drawobj->context; + trace_adreno_cmdbatch_ready(context->id, context->priority, + drawobj->timestamp, cmdobj->requeue_cnt); ret = sendcmd(adreno_dev, cmdobj); /* @@ -782,6 +786,7 @@ static int dispatcher_context_sendcmds(struct adreno_device *adreno_dev, drawctxt, cmdobj); if (r) ret = r; + cmdobj->requeue_cnt++; } break; @@ -2282,6 +2287,8 @@ static void retire_cmdobj(struct adreno_device *adreno_dev, drawctxt->ticks_index = (drawctxt->ticks_index + 1) % SUBMIT_RETIRE_TICKS_SIZE; + trace_adreno_cmdbatch_done(drawobj->context->id, + drawobj->context->priority, drawobj->timestamp); kgsl_drawobj_destroy(drawobj); } diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 13db07a065..7ce7d2177b 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -455,6 +455,7 @@ static int hwsched_sendcmds(struct adreno_device *adreno_dev, while (1) { struct kgsl_drawobj *drawobj; struct kgsl_drawobj_cmd *cmdobj; + struct kgsl_context *context; spin_lock(&drawctxt->lock); drawobj = _process_drawqueue_get_next_drawobj(adreno_dev, @@ -478,6 +479,9 @@ static int hwsched_sendcmds(struct adreno_device *adreno_dev, timestamp = drawobj->timestamp; cmdobj = CMDOBJ(drawobj); + context = drawobj->context; + trace_adreno_cmdbatch_ready(context->id, context->priority, + drawobj->timestamp, cmdobj->requeue_cnt); ret = hwsched_sendcmd(adreno_dev, cmdobj); /* @@ -498,6 +502,7 @@ static int hwsched_sendcmds(struct adreno_device *adreno_dev, drawctxt, cmdobj); if (r) ret = r; + cmdobj->requeue_cnt++; } break; @@ -1026,26 +1031,28 @@ static int adreno_hwsched_queue_cmds(struct kgsl_device_private *dev_priv, static void retire_cmdobj(struct adreno_hwsched *hwsched, struct kgsl_drawobj_cmd *cmdobj) { - struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct kgsl_drawobj *drawobj; struct kgsl_mem_entry *entry; struct kgsl_drawobj_profiling_buffer *profile_buffer; - if (cmdobj != NULL) { - if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME) - atomic64_inc(&drawobj->context->proc_priv->frame_count); + drawobj = DRAWOBJ(cmdobj); + if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME) + atomic64_inc(&drawobj->context->proc_priv->frame_count); - entry = cmdobj->profiling_buf_entry; - if (entry) { - profile_buffer = kgsl_gpuaddr_to_vaddr(&entry->memdesc, - cmdobj->profiling_buffer_gpuaddr); + entry = cmdobj->profiling_buf_entry; + if (entry) { + profile_buffer = kgsl_gpuaddr_to_vaddr(&entry->memdesc, + cmdobj->profiling_buffer_gpuaddr); - if (profile_buffer == NULL) - return; + if (profile_buffer == NULL) + return; - kgsl_memdesc_unmap(&entry->memdesc); - } + kgsl_memdesc_unmap(&entry->memdesc); } + trace_adreno_cmdbatch_done(drawobj->context->id, + drawobj->context->priority, drawobj->timestamp); + if (hwsched->big_cmdobj == cmdobj) { hwsched->big_cmdobj = NULL; kgsl_drawobj_put(drawobj); diff --git a/adreno_trace.h b/adreno_trace.h index 3890dfc501..536d6f7154 100644 --- a/adreno_trace.h +++ b/adreno_trace.h @@ -218,6 +218,49 @@ TRACE_EVENT(adreno_cmdbatch_sync, ) ); +TRACE_EVENT(adreno_cmdbatch_ready, + TP_PROTO(unsigned int ctx_id, unsigned int ctx_prio, + unsigned int timestamp, unsigned int requeue_cnt), + TP_ARGS(ctx_id, ctx_prio, timestamp, requeue_cnt), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(int, prio) + __field(unsigned int, timestamp) + __field(unsigned int, requeue_cnt) + ), + TP_fast_assign( + __entry->id = ctx_id; + __entry->prio = ctx_prio; + __entry->timestamp = timestamp; + __entry->requeue_cnt = requeue_cnt; + ), + TP_printk( + "ctx=%u ctx_prio=%d ts=%u requeue_cnt=%u", + __entry->id, __entry->prio, __entry->timestamp, + __entry->requeue_cnt + ) +); + +TRACE_EVENT(adreno_cmdbatch_done, + TP_PROTO(unsigned int ctx_id, unsigned int ctx_prio, + unsigned int timestamp), + TP_ARGS(ctx_id, ctx_prio, timestamp), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, prio) + __field(unsigned int, timestamp) + ), + TP_fast_assign( + __entry->id = ctx_id; + __entry->prio = ctx_prio; + __entry->timestamp = timestamp; + ), + TP_printk( + "ctx=%u ctx_prio=%u ts=%u", + __entry->id, __entry->prio, __entry->timestamp + ) +); + TRACE_EVENT(adreno_cmdbatch_fault, TP_PROTO(struct kgsl_drawobj_cmd *cmdobj, unsigned int fault), TP_ARGS(cmdobj, fault), diff --git a/kgsl_drawobj.c b/kgsl_drawobj.c index 83ce37938c..208ae04e73 100644 --- a/kgsl_drawobj.c +++ b/kgsl_drawobj.c @@ -1130,6 +1130,7 @@ struct kgsl_drawobj_cmd *kgsl_drawobj_cmd_create(struct kgsl_device *device, INIT_LIST_HEAD(&cmdobj->cmdlist); INIT_LIST_HEAD(&cmdobj->memlist); + cmdobj->requeue_cnt = 0; if (type & CMDOBJ_TYPE) atomic_inc(&context->proc_priv->cmd_count); diff --git a/kgsl_drawobj.h b/kgsl_drawobj.h index faf396ba74..03ee97dd82 100644 --- a/kgsl_drawobj.h +++ b/kgsl_drawobj.h @@ -85,6 +85,8 @@ struct kgsl_drawobj_cmd { uint64_t submit_ticks; /* @numibs: Number of ibs in this cmdobj */ u32 numibs; + /* @requeue_cnt: Number of times cmdobj was requeued before submission to dq succeeded */ + u32 requeue_cnt; }; /** From 08f906a2d0479288ea93710ec2ecce9a06cb4b6c Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Wed, 27 Jan 2021 12:29:47 +0530 Subject: [PATCH 0021/1016] msm: kgsl: Remove undefined HLSQ register dump to A6xx snapshot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 297883f14279 ("msm: kgsl: Dump HLSQ_DBG_CNTL in snapshot") added extra register “0xD004” which does not exists. Remove this register to have only required registers in snapshot dumping. Change-Id: I6dcdf6b0fdbcc89ac6854bd1b8a7d20cd375f621 Signed-off-by: Hareesh Gundu --- adreno_a6xx_snapshot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno_a6xx_snapshot.c b/adreno_a6xx_snapshot.c index b4bc986351..33eedc0bf0 100644 --- a/adreno_a6xx_snapshot.c +++ b/adreno_a6xx_snapshot.c @@ -288,7 +288,7 @@ static const unsigned int a6xx_registers[] = { 0xA600, 0xA601, 0xA603, 0xA603, 0xA60A, 0xA60A, 0xA610, 0xA617, 0xA630, 0xA630, /* HLSQ */ - 0xD002, 0xD004, + 0xD002, 0xD003, }; static const unsigned int a660_registers[] = { From 7b147bd2e127bc98de5dd1a3d9d3c18c6f648b82 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Fri, 19 Nov 2021 12:58:36 +0530 Subject: [PATCH 0022/1016] msm: kgsl: Remove RBBM_GPC_ERROR from a6xx hwsched interrupt mask RBBM_GPC interrupt is handled by GMU for hwsched enabled targets. Hence remove this from the a6xx hwsched interrupt mask. Change-Id: I7cc409514a59e528fa8310640197c1743a9d201d Signed-off-by: Hareesh Gundu --- adreno_a6xx.h | 1 - adreno_a6xx_hwsched_hfi.c | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/adreno_a6xx.h b/adreno_a6xx.h index 66454750dd..e581cce7c5 100644 --- a/adreno_a6xx.h +++ b/adreno_a6xx.h @@ -183,7 +183,6 @@ struct a6xx_cp_smmu_info { #define A6XX_HWSCHED_INT_MASK \ ((1 << A6XX_INT_CP_AHB_ERROR) | \ (1 << A6XX_INT_ATB_ASYNCFIFO_OVERFLOW) | \ - (1 << A6XX_INT_RBBM_GPC_ERROR) | \ (1 << A6XX_INT_RBBM_ATB_BUS_OVERFLOW) | \ (1 << A6XX_INT_UCHE_OOB_ACCESS) | \ (1 << A6XX_INT_UCHE_TRAP_INTR) | \ diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 0a6b6d3a83..2afcc03a7a 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -285,6 +285,9 @@ static void log_gpu_fault(struct adreno_device *adreno_dev) cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr); } break; + case GMU_CP_GPC_ERROR: + dev_crit_ratelimited(dev, "RBBM: GPC error\n"); + break; default: dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n", cmd->error); From abc40777759fd51ec434f94676db21a11d8845a4 Mon Sep 17 00:00:00 2001 From: Oleg Perelet Date: Tue, 2 Nov 2021 16:13:52 -0700 Subject: [PATCH 0023/1016] msm: kgsl: Enable bus voting on minimal powerlevel There are usecases where GPU is not busy but GPU consumes high ddr bandwidth. In such case, bus DCVS will not kick in, potentially causing an under voting scenario. Enable bus voting on minimal power level, even if GPU busy is less than 75%. Change-Id: I02db2e4b68ce9d48c2f755112f0dcf9912936b56 Signed-off-by: Oleg Perelet Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- governor_gpubw_mon.c | 2 +- kgsl_pwrscale.c | 2 ++ msm_adreno_devfreq.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/governor_gpubw_mon.c b/governor_gpubw_mon.c index f7f19c40e5..f853e46cd9 100644 --- a/governor_gpubw_mon.c +++ b/governor_gpubw_mon.c @@ -154,7 +154,7 @@ static int devfreq_gpubw_get_target(struct devfreq *df, (priv->bus.num - 1) : act_level; if ((norm_cycles > priv->bus.up[act_level] || wait_active_percent > WAIT_THRESHOLD) && - gpu_percent > CAP) + (gpu_percent > CAP || b.gpu_minfreq == *freq)) bus_profile->flag = DEVFREQ_FLAG_FAST_HINT; else if (norm_cycles < priv->bus.down[act_level] && b.buslevel) bus_profile->flag = DEVFREQ_FLAG_SLOW_HINT; diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index 74fe81f09f..d5f6df8f23 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -338,6 +338,7 @@ int kgsl_devfreq_get_dev_status(struct device *dev, last_b->ram_time = device->pwrscale.accum_stats.ram_time; last_b->ram_wait = device->pwrscale.accum_stats.ram_wait; last_b->buslevel = device->pwrctrl.cur_buslevel; + last_b->gpu_minfreq = pwrctrl->pwrlevels[pwrctrl->min_pwrlevel].gpu_freq; } kgsl_pwrctrl_busy_time(device, stat->total_time, stat->busy_time); @@ -404,6 +405,7 @@ int kgsl_busmon_get_dev_status(struct device *dev, b->ram_time = last_b->ram_time; b->ram_wait = last_b->ram_wait; b->buslevel = last_b->buslevel; + b->gpu_minfreq = last_b->gpu_minfreq; } return 0; } diff --git a/msm_adreno_devfreq.h b/msm_adreno_devfreq.h index be366cda04..c72924efb7 100644 --- a/msm_adreno_devfreq.h +++ b/msm_adreno_devfreq.h @@ -22,6 +22,7 @@ struct xstats { u64 ram_time; u64 ram_wait; int buslevel; + unsigned long gpu_minfreq; }; struct devfreq_msm_adreno_tz_data { From b019bba722636a1fc338471b3577827a964db0f8 Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Wed, 13 Oct 2021 15:45:29 +0530 Subject: [PATCH 0024/1016] msm: kgsl: Enable IFPC for A662 GPU Enable IFPC feature for A662 GPU. Change-Id: I59b6958600c24f952f54c3a041ef5b3cb79ba469 Signed-off-by: Harshitha Sai Neelati --- adreno-gpulist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 3c428535bc..a676433212 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1716,7 +1716,8 @@ static const struct adreno_a6xx_core adreno_gpu_core_a662 = { .base = { DEFINE_ADRENO_REV(ADRENO_REV_A662, 6, 6, 2, ANY_ID), .features = ADRENO_APRIV | ADRENO_IOCOHERENT | - ADRENO_CONTENT_PROTECTION | ADRENO_PREEMPTION, + ADRENO_CONTENT_PROTECTION | ADRENO_PREEMPTION | + ADRENO_IFPC, .gpudev = &adreno_a6xx_gmu_gpudev.base, .perfcounters = &adreno_a6xx_perfcounters, .gmem_base = 0, From a62385f47a48b318d23a0d0072f4119d06ec9e61 Mon Sep 17 00:00:00 2001 From: Puranam V G Tejaswi Date: Mon, 17 May 2021 14:41:40 +0530 Subject: [PATCH 0025/1016] msm: kgsl: Signal fence only if last fence refcount was not put Currently there is a chance that release for the fence was already called before we call dma_fence_get during kgsl_timeline_signal and kgsl_ioctl_timeline_destroy. This can cause use-after-free issue as we can access fence after release. Fix this by signalling fence only if the last refcount on the fence was not yet put. This makes sure that release for the fence will not be called until we are done signalling. Change-Id: I6bdcefa1f128febb7a0f7aef133757268a3b9ae3 Signed-off-by: Puranam V G Tejaswi Signed-off-by: Pranav Patel --- kgsl_timeline.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kgsl_timeline.c b/kgsl_timeline.c index b499face00..e07db2569f 100644 --- a/kgsl_timeline.c +++ b/kgsl_timeline.c @@ -272,12 +272,10 @@ void kgsl_timeline_signal(struct kgsl_timeline *timeline, u64 seqno) timeline->value = seqno; spin_lock(&timeline->fence_lock); - list_for_each_entry_safe(fence, tmp, &timeline->fences, node) { - if (timeline_fence_signaled(&fence->base)) { - dma_fence_get(&fence->base); + list_for_each_entry_safe(fence, tmp, &timeline->fences, node) + if (timeline_fence_signaled(&fence->base) && + kref_get_unless_zero(&fence->base.refcount)) list_move(&fence->node, &temp); - } - } spin_unlock(&timeline->fence_lock); list_for_each_entry_safe(fence, tmp, &temp, node) { @@ -552,7 +550,8 @@ long kgsl_ioctl_timeline_destroy(struct kgsl_device_private *dev_priv, spin_lock(&timeline->fence_lock); list_for_each_entry_safe(fence, tmp, &timeline->fences, node) - dma_fence_get(&fence->base); + if (!kref_get_unless_zero(&fence->base.refcount)) + list_del_init(&fence->node); list_replace_init(&timeline->fences, &temp); spin_unlock(&timeline->fence_lock); From d99092f93bf6d7d0341fc786ee74d735c1cf98c3 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 19 Nov 2021 22:49:05 +0530 Subject: [PATCH 0026/1016] msm: kgsl: Dump context information in snapshot Current context is not dumped during snapshot. Also, context count and timestamps are dumped only for gmu faults. These information can be helpful to debug issues. Hence, dump the context information in snapshot. Change-Id: I71babee314a4abede3a7af91ffc094c6d868288f Signed-off-by: Kamal Agrawal --- adreno_snapshot.c | 49 +++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/adreno_snapshot.c b/adreno_snapshot.c index fd9834f073..59877edf4c 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -839,13 +839,16 @@ static void adreno_snapshot_ringbuffer(struct kgsl_device *device, } static void adreno_snapshot_os(struct kgsl_device *device, - struct kgsl_snapshot *snapshot, struct kgsl_context *guilty, - bool dump_contexts) + struct kgsl_snapshot *snapshot, struct kgsl_context *guilty) { struct kgsl_snapshot_section_header *sect = (struct kgsl_snapshot_section_header *) snapshot->ptr; struct kgsl_snapshot_linux_v2 *header = (struct kgsl_snapshot_linux_v2 *) (snapshot->ptr + sizeof(*sect)); + struct kgsl_context *context; + u32 remain; + void *mem; + int id; if (snapshot->remain < (sizeof(*sect) + sizeof(*header))) { SNAPSHOT_ERR_NOMEM(device, "OS"); @@ -869,41 +872,37 @@ static void adreno_snapshot_os(struct kgsl_device *device, /* If we know the guilty context then dump it */ if (guilty) { + header->current_context = guilty->id; header->pid = guilty->tid; strlcpy(header->comm, guilty->proc_priv->comm, sizeof(header->comm)); } - if (dump_contexts) { - u32 remain = snapshot->remain - sizeof(*sect) + sizeof(*header); - void *mem = snapshot->ptr + sizeof(*sect) + sizeof(*header); - struct kgsl_context *context; - int id; + remain = snapshot->remain - sizeof(*sect) + sizeof(*header); + mem = snapshot->ptr + sizeof(*sect) + sizeof(*header); - read_lock(&device->context_lock); - idr_for_each_entry(&device->context_idr, context, id) { - struct kgsl_snapshot_linux_context_v2 *c = mem; + read_lock(&device->context_lock); + idr_for_each_entry(&device->context_idr, context, id) { + struct kgsl_snapshot_linux_context_v2 *c = mem; - if (remain < sizeof(*c)) - break; + if (remain < sizeof(*c)) + break; - kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, - &c->timestamp_queued); + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, + &c->timestamp_queued); - kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_CONSUMED, - &c->timestamp_consumed); + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_CONSUMED, + &c->timestamp_consumed); - kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, - &c->timestamp_retired); + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, + &c->timestamp_retired); - header->ctxtcount++; + header->ctxtcount++; - mem += sizeof(*c); - remain -= sizeof(*c); - - } - read_unlock(&device->context_lock); + mem += sizeof(*c); + remain -= sizeof(*c); } + read_unlock(&device->context_lock); sect->magic = SNAPSHOT_SECTION_MAGIC; sect->id = KGSL_SNAPSHOT_SECTION_OS; @@ -943,7 +942,7 @@ void adreno_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot, snapshot->size += sizeof(*header); /* Write the OS section */ - adreno_snapshot_os(device, snapshot, context, device->gmu_fault); + adreno_snapshot_os(device, snapshot, context); ib_max_objs = 0; /* Reset the list of objects */ From 1bfa35fdba8ffc9bb673d5c4d3cba52c32f73dd1 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 25 Nov 2021 21:45:51 +0530 Subject: [PATCH 0027/1016] msm: kgsl: Ignore thermal requests until first boot is done There is a possible deadlock scenario during kgsl firmware reading (request_firmware) and thermal notifier calls. During first boot, kgsl device mutex is held and then request_firmware is called for reading firmware. request_firmware internally takes dev_pm_qos_mtx lock. Whereas in case of thermal notifier calls, it first takes the same dev_pm_qos_mtx lock and then tries to take kgsl device mutex. This results in deadlock when both threads are unable to acquire the mutex held by other thread as shown in call stack below. Call stack: CPU0: mutex_lock --> waiting for kgsl device mutex thermal_max_notifier_call pm_qos_update_target apply_constraint __dev_pm_qos_update_request dev_pm_qos_update_request ---> takes dev_pm_qos_mtx mutex devfreq_cooling_set_cur_state thermal_cdev_update step_wise_throttle handle_thermal_trip CPU1: __mutex_lock __mutex_lock_slowpath ---> waiting for dev_pm_qos_mtx mutex dev_pm_qos_constraints_destroy dpm_sysfs_remove device_del fw_load_sysfs_fallback fw_load_from_user_helper firmware_fallback_sysfs _request_firmware request_firmware kgsl_zap_shader_load genc_rb_start genc_gpu_boot genc_first_boot genc_gmu_first_open adreno_first_open kgsl_open ---> takes kgsl device mutex Fix this by ensuring thermal notifier calls are not exercised till first boot is done i.e. till we are done reading all firmware files. Change-Id: I185c07f1491afddf820cbad30202733dff915125 Signed-off-by: Kamal Agrawal --- kgsl_pwrscale.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index d5f6df8f23..dd6d7fa2b3 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -645,6 +645,9 @@ static int thermal_max_notifier_call(struct notifier_block *nb, unsigned long va u32 max_freq = val * 1000; int level; + if (!device->pwrscale.devfreq_enabled) + return NOTIFY_DONE; + for (level = pwr->num_pwrlevels - 1; level >= 0; level--) { /* get nearest power level with a maximum delta of 5MHz */ if (abs(pwr->pwrlevels[level].gpu_freq - max_freq) < 5000000) From 907f856d4bd82ff3a8a02b165ba977ce01d4df7b Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 19 Oct 2021 12:55:51 +0530 Subject: [PATCH 0028/1016] msm: kgsl: Add GPU fault report To aid developers in tracking down errors in the application, expose a new IOCTL to provide GPU fault report. This will help to retrieve diagnostic information about faults that might have caused GPU hang. Application developers can use this information to debug issues. Faults tracking for a context can be enabled by specifying a flag (KGSL_CONTEXT_FAULT_INFO) during context creation. Fault report can be queried with new IOCTL_KGSL_GET_FAULT_REPORT ioctl once the context is invalidated. Change-Id: I7372b18f3b235183bc5dd070a7bdf92a0484bacb Signed-off-by: Kamal Agrawal --- adreno_drawctxt.c | 5 +- include/uapi/linux/msm_kgsl.h | 85 ++++++++++++++ kgsl.c | 207 ++++++++++++++++++++++++++++++++++ kgsl.h | 2 + kgsl_device.h | 39 +++++++ kgsl_ioctl.c | 2 + kgsl_iommu.c | 32 ++++++ 7 files changed, 370 insertions(+), 2 deletions(-) diff --git a/adreno_drawctxt.c b/adreno_drawctxt.c index 0844413fee..dcc163f2ab 100644 --- a/adreno_drawctxt.c +++ b/adreno_drawctxt.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. */ #include @@ -338,7 +338,8 @@ adreno_drawctxt_create(struct kgsl_device_private *dev_priv, KGSL_CONTEXT_IFH_NOP | KGSL_CONTEXT_SECURE | KGSL_CONTEXT_PREEMPT_STYLE_MASK | - KGSL_CONTEXT_NO_SNAPSHOT); + KGSL_CONTEXT_NO_SNAPSHOT | + KGSL_CONTEXT_FAULT_INFO); /* Check for errors before trying to initialize */ diff --git a/include/uapi/linux/msm_kgsl.h b/include/uapi/linux/msm_kgsl.h index 4b67887f09..f3395a6ec5 100644 --- a/include/uapi/linux/msm_kgsl.h +++ b/include/uapi/linux/msm_kgsl.h @@ -74,6 +74,7 @@ #define KGSL_CONTEXT_TYPE_UNKNOWN 0x1E #define KGSL_CONTEXT_INVALIDATE_ON_FAULT 0x10000000 +#define KGSL_CONTEXT_FAULT_INFO 0x40000000 #define KGSL_CONTEXT_INVALID 0xffffffff @@ -1998,4 +1999,88 @@ struct kgsl_gpu_aux_command_timeline { __u32 timelines_size; }; +/* Macros for fault type used in kgsl_fault structure */ +#define KGSL_FAULT_TYPE_NO_FAULT 0 +#define KGSL_FAULT_TYPE_PAGEFAULT 1 +#define KGSL_FAULT_TYPE_MAX 2 + +/* Macros to be used in kgsl_pagefault_report structure */ +#define KGSL_PAGEFAULT_TYPE_NONE 0 +#define KGSL_PAGEFAULT_TYPE_READ (1 << 0) +#define KGSL_PAGEFAULT_TYPE_WRITE (1 << 1) +#define KGSL_PAGEFAULT_TYPE_TRANSLATION (1 << 2) +#define KGSL_PAGEFAULT_TYPE_PERMISSION (1 << 3) +#define KGSL_PAGEFAULT_TYPE_EXTERNAL (1 << 4) +#define KGSL_PAGEFAULT_TYPE_TRANSACTION_STALLED (1 << 5) + +/** + * struct kgsl_pagefault_report - Descriptor for each page fault + * @fault_addr: page fault address + * @fault_type: type of page fault + * + * Contains information about supported GPU page fault. + * Supported fault type: KGSL_PAGEFAULT_TYPE_* + */ +struct kgsl_pagefault_report { + __u64 fault_addr; + /* private: reserved for future use */ + __u64 reserved[2]; + __u32 fault_type; + /* private: padding for 64 bit compatibility */ + __u32 __pad; +}; + +/** + * struct kgsl_fault - Descriptor for each GPU fault type + * @fault: User memory pointer to list of specific fault type + * @type: Type of gpu fault + * @count: Number of entries in @fault + * @size: Size of each entry in @fault in bytes + * + * Contains information about each GPU fault type. If user passes 0 for all the fields, KGSL + * will return the @count and @type of fault. Based on this, user can allocate a buffer for + * specific fault type, fill the @fault and specify the structure size of type specific fault + * in @size. User can walk through @fault list to parse the fault type specific information. + * + * Supported type: KGSL_FAULT_TYPE_* + */ +struct kgsl_fault { + __u64 fault; + __u32 type; + __u32 count; + __u32 size; + /* private: padding for 64 bit compatibility */ + __u32 padding; +}; + +/** + * struct kgsl_fault_report - Container for list of GPU faults + * @faultlist: User memory pointer to list of fault descriptor &struct kgsl_fault + * @faultnents: Number of entries in @faultlist. Each entry corresponds to a fault type i.e. + * KGSL_FAULT_TYPE_* + * @faultsize: Size of each entry in @faultlist in bytes + * @context_id: ID of a KGSL context + * + * Returns a list of GPU faults for a context identified by @context_id. If the user specifies + * @context_id only, then KGSL will set the @faultnents to the number of fault types it has + * for that context. + * + * User is expected to allocate an array of @struct kgsl_fault with @faultnents number of entries + * and fill the @faultlist field. On calling @IOCTL_KGSL_GET_FAULT_REPORT, KGSL will return the + * type and count for each fault. Based on this, user needs to update the @kgsl_fault structure. + * Then, it should call the @IOCTL_KGSL_GET_FAULT_REPORT again for kernel to fill the fault + * information. + */ +struct kgsl_fault_report { + __u64 faultlist; + __u32 faultnents; + __u32 faultsize; + __u32 context_id; + /* private: padding for 64 bit compatibility */ + __u32 padding; +}; + +#define IOCTL_KGSL_GET_FAULT_REPORT \ + _IOWR(KGSL_IOC_TYPE, 0x5E, struct kgsl_fault_report) + #endif /* _UAPI_MSM_KGSL_H */ diff --git a/kgsl.c b/kgsl.c index 1a72257c21..e69c7bc516 100644 --- a/kgsl.c +++ b/kgsl.c @@ -683,6 +683,8 @@ int kgsl_context_init(struct kgsl_device_private *dev_priv, context->id = id; + mutex_init(&context->fault_lock); + INIT_LIST_HEAD(&context->faults); kref_init(&context->refcount); /* * Get a refernce to the process private so its not destroyed, until @@ -718,6 +720,20 @@ out: return ret; } +void kgsl_free_faults(struct kgsl_context *context) +{ + struct kgsl_fault_node *p, *tmp; + + if (!(context->flags & KGSL_CONTEXT_FAULT_INFO)) + return; + + list_for_each_entry_safe(p, tmp, &context->faults, node) { + list_del(&p->node); + kfree(p->priv); + kfree(p); + } +} + /** * kgsl_context_detach() - Release the "master" context reference * @context: The context that will be detached @@ -779,6 +795,7 @@ kgsl_context_destroy(struct kref *kref) */ BUG_ON(!kgsl_context_detached(context)); + kgsl_free_faults(context); kgsl_sync_timeline_put(context->ktimeline); write_lock(&device->context_lock); @@ -3437,6 +3454,196 @@ out: return ret; } +static int kgsl_update_fault_details(struct kgsl_context *context, + void __user *ptr, u32 faultnents, u32 faultsize) +{ + u32 size = min_t(u32, sizeof(struct kgsl_fault), faultsize); + u32 cur_idx[KGSL_FAULT_TYPE_MAX] = {0}; + struct kgsl_fault_node *fault_node; + struct kgsl_fault *faults; + int i, ret = 0; + + faults = kcalloc(KGSL_FAULT_TYPE_MAX, sizeof(struct kgsl_fault), + GFP_KERNEL); + if (!faults) + return -ENOMEM; + + for (i = 0; i < faultnents; i++) { + struct kgsl_fault fault = {0}; + + if (copy_from_user(&fault, ptr + i * faultsize, size)) { + ret = -EFAULT; + goto err; + } + + if (fault.type >= KGSL_FAULT_TYPE_MAX) { + ret = -EINVAL; + goto err; + } + + memcpy(&faults[fault.type], &fault, sizeof(fault)); + } + + list_for_each_entry(fault_node, &context->faults, node) { + u32 fault_type = fault_node->type; + + if (cur_idx[fault_type] >= faults[fault_type].count) + continue; + + switch (fault_type) { + case KGSL_FAULT_TYPE_PAGEFAULT: + size = sizeof(struct kgsl_pagefault_report); + } + + size = min_t(u32, size, faults[fault_type].size); + + if (copy_to_user(u64_to_user_ptr(faults[fault_type].fault + + cur_idx[fault_type] * faults[fault_type].size), + fault_node->priv, size)) { + ret = -EFAULT; + goto err; + } + + cur_idx[fault_type] += 1; + } + +err: + kfree(faults); + return ret; +} + +static int kgsl_update_fault_count(struct kgsl_context *context, + void __user *faults, u32 faultnents, u32 faultsize) +{ + u32 size = min_t(u32, sizeof(struct kgsl_fault), faultsize); + u32 faultcount[KGSL_FAULT_TYPE_MAX] = {0}; + struct kgsl_fault_node *fault_node; + int i, j; + + list_for_each_entry(fault_node, &context->faults, node) + faultcount[fault_node->type]++; + + /* KGSL_FAULT_TYPE_NO_FAULT (i.e. 0) is not an actual fault type */ + for (i = 0, j = 1; i < faultnents && j < KGSL_FAULT_TYPE_MAX; j++) { + struct kgsl_fault fault = {0}; + + if (!faultcount[j]) + continue; + + fault.type = j; + fault.count = faultcount[j]; + + if (copy_to_user(faults, &fault, size)) + return -EFAULT; + + faults += faultsize; + i++; + } + + return 0; +} + +long kgsl_ioctl_get_fault_report(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_fault_report *param = data; + u32 size = min_t(u32, sizeof(struct kgsl_fault), param->faultsize); + void __user *ptr = u64_to_user_ptr(param->faultlist); + struct kgsl_context *context; + int i, ret = 0; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (!context) + return -EINVAL; + + /* This IOCTL is valid for invalidated contexts only */ + if (!(context->flags & KGSL_CONTEXT_FAULT_INFO) || + !kgsl_context_invalid(context)) { + ret = -EINVAL; + goto err; + } + + /* Return the number of fault types */ + if (!param->faultlist) { + param->faultnents = KGSL_FAULT_TYPE_MAX; + kgsl_context_put(context); + return 0; + } + + /* Check if it's a request to get fault counts or to fill the fault information */ + for (i = 0; i < param->faultnents; i++) { + struct kgsl_fault fault = {0}; + + if (copy_from_user(&fault, ptr, size)) { + ret = -EFAULT; + goto err; + } + + if (fault.fault) + break; + + ptr += param->faultsize; + } + + ptr = u64_to_user_ptr(param->faultlist); + + if (i == param->faultnents) + ret = kgsl_update_fault_count(context, ptr, param->faultnents, + param->faultsize); + else + ret = kgsl_update_fault_details(context, ptr, param->faultnents, + param->faultsize); + +err: + kgsl_context_put(context); + return ret; +} + +int kgsl_add_fault(struct kgsl_context *context, u32 type, void *priv) +{ + struct kgsl_fault_node *fault, *p, *tmp; + int length = 0; + ktime_t tout; + + if (kgsl_context_is_bad(context)) + return -EINVAL; + + fault = kmalloc(sizeof(struct kgsl_fault_node), GFP_KERNEL); + if (!fault) + return -ENOMEM; + + fault->type = type; + fault->priv = priv; + fault->time = ktime_get(); + + tout = ktime_sub_ms(ktime_get(), KGSL_MAX_FAULT_TIME_THRESHOLD); + + mutex_lock(&context->fault_lock); + + list_for_each_entry_safe(p, tmp, &context->faults, node) { + if (ktime_compare(p->time, tout) > 0) { + length++; + continue; + } + + list_del(&p->node); + kfree(p->priv); + kfree(p); + } + + if (length == KGSL_MAX_FAULT_ENTRIES) { + tmp = list_first_entry(&context->faults, struct kgsl_fault_node, node); + list_del(&tmp->node); + kfree(tmp->priv); + kfree(tmp); + } + + list_add_tail(&fault->node, &context->faults); + mutex_unlock(&context->fault_lock); + + return 0; +} + #ifdef CONFIG_ARM64 static uint64_t kgsl_filter_cachemode(uint64_t flags) { diff --git a/kgsl.h b/kgsl.h index 5bc6f64262..fa5d904e1e 100644 --- a/kgsl.h +++ b/kgsl.h @@ -474,6 +474,8 @@ long kgsl_ioctl_timeline_signal(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data); long kgsl_ioctl_timeline_destroy(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data); +long kgsl_ioctl_get_fault_report(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); void kgsl_mem_entry_destroy(struct kref *kref); diff --git a/kgsl_device.h b/kgsl_device.h index dc81abf235..4808436853 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -329,6 +329,25 @@ enum kgsl_context_priv { struct kgsl_process_private; +#define KGSL_MAX_FAULT_ENTRIES 40 + +/* Maintain faults observed within threshold time (in milliseconds) */ +#define KGSL_MAX_FAULT_TIME_THRESHOLD 5000 + +/** + * struct kgsl_fault_node - GPU fault descriptor + * @node: List node for list of faults + * @type: Type of fault + * @priv: Pointer to type specific fault + * @time: Time when fault was observed + */ +struct kgsl_fault_node { + struct list_head node; + u32 type; + void *priv; + ktime_t time; +}; + /** * struct kgsl_context - The context fields that are valid for a user defined * context @@ -382,6 +401,10 @@ struct kgsl_context { * submitted */ u32 gmu_dispatch_queue; + /** @faults: List of @kgsl_fault_node to store fault information */ + struct list_head faults; + /** @fault_lock: Mutex to protect faults */ + struct mutex fault_lock; }; #define _context_comm(_c) \ @@ -944,6 +967,22 @@ static inline void kgsl_mmu_set_feature(struct kgsl_device *device, set_bit(feature, &device->mmu.features); } +/** + * kgsl_add_fault - Add fault information for a context + * @context: Pointer to the KGSL context + * @type: type of fault info + * @priv: Pointer to type specific fault info + * + * Return: 0 on success or error code on failure. + */ +int kgsl_add_fault(struct kgsl_context *context, u32 type, void *priv); + +/** + * kgsl_free_faults - Free fault information for a context + * @context: Pointer to the KGSL context + */ +void kgsl_free_faults(struct kgsl_context *context); + /** * kgsl_trace_gpu_mem_total - Overall gpu memory usage tracking which includes * process allocations, imported dmabufs and kgsl globals diff --git a/kgsl_ioctl.c b/kgsl_ioctl.c index c6b55641a8..7fdf03dabd 100644 --- a/kgsl_ioctl.c +++ b/kgsl_ioctl.c @@ -100,6 +100,8 @@ static const struct kgsl_ioctl kgsl_ioctl_funcs[] = { kgsl_ioctl_timeline_signal), KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_DESTROY, kgsl_ioctl_timeline_destroy), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GET_FAULT_REPORT, + kgsl_ioctl_get_fault_report), }; long kgsl_ioctl_copy_in(unsigned int kernel_cmd, unsigned int user_cmd, diff --git a/kgsl_iommu.c b/kgsl_iommu.c index e1e845d6b0..8b16a8eba2 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -795,6 +795,37 @@ static struct kgsl_process_private *kgsl_iommu_get_process(u64 ptbase) return NULL; } +static void kgsl_iommu_add_fault_info(struct kgsl_context *context, + unsigned long addr, int flags) +{ + struct kgsl_pagefault_report *report; + u32 fault_flag = 0; + + if (!context || !(context->flags & KGSL_CONTEXT_FAULT_INFO)) + return; + + report = kzalloc(sizeof(struct kgsl_pagefault_report), GFP_KERNEL); + if (!report) + return; + + if (flags & IOMMU_FAULT_TRANSLATION) + fault_flag = KGSL_PAGEFAULT_TYPE_TRANSLATION; + else if (flags & IOMMU_FAULT_PERMISSION) + fault_flag = KGSL_PAGEFAULT_TYPE_PERMISSION; + else if (flags & IOMMU_FAULT_EXTERNAL) + fault_flag = KGSL_PAGEFAULT_TYPE_EXTERNAL; + else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) + fault_flag = KGSL_PAGEFAULT_TYPE_TRANSACTION_STALLED; + + fault_flag |= (flags & IOMMU_FAULT_WRITE) ? KGSL_PAGEFAULT_TYPE_WRITE : + KGSL_PAGEFAULT_TYPE_READ; + + report->fault_addr = addr; + report->fault_type = fault_flag; + if (kgsl_add_fault(context, KGSL_FAULT_TYPE_PAGEFAULT, report)) + kfree(report); +} + static void kgsl_iommu_print_fault(struct kgsl_mmu *mmu, struct kgsl_iommu_context *ctxt, unsigned long addr, u64 ptbase, u32 contextid, @@ -957,6 +988,7 @@ static int kgsl_iommu_fault_handler(struct kgsl_mmu *mmu, kgsl_iommu_print_fault(mmu, ctx, addr, ptbase, contextidr, flags, private, context); + kgsl_iommu_add_fault_info(context, addr, flags); if (stall) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); From b2ab4c577f213cb1eb63dded51f9605aaaba6a70 Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Mon, 29 Nov 2021 11:26:28 +0530 Subject: [PATCH 0029/1016] msm: kgsl: Fix preemption in a6x Earlier we used to submit CP_SET_PSEUDO_REGISTER packet for every cmdbatch. This was recently optimized to submit only once for each RB and during context switch. Switch back to old sequence for a6x family since the new one is not supported in a6x family. Change-Id: Id05000de619d9800f770b4eee6c4ca157c4ebbc2 Signed-off-by: Akhil P Oommen --- adreno_a6xx_preempt.c | 20 ++++++++++++-------- adreno_a6xx_ringbuffer.c | 10 +--------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/adreno_a6xx_preempt.c b/adreno_a6xx_preempt.c index 1d5596dc87..ecc9b8b6d5 100644 --- a/adreno_a6xx_preempt.c +++ b/adreno_a6xx_preempt.c @@ -491,14 +491,17 @@ u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, u32 *cmds) { unsigned int *cmds_orig = cmds; + uint64_t gpuaddr = 0; if (!adreno_is_preemption_enabled(adreno_dev)) return 0; - if (test_and_set_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags)) - goto done; - - *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); + if (drawctxt) { + gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 15); + } else { + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); + } /* NULL SMMU_INFO buffer - we track in KMD */ *cmds++ = SET_PSEUDO_SMMU_INFO; @@ -511,6 +514,11 @@ u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, cmds += cp_gpuaddr(adreno_dev, cmds, rb->secure_preemption_desc->gpuaddr); + if (drawctxt) { + *cmds++ = SET_PSEUDO_NON_PRIV_SAVE_ADDR; + cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); + } + /* * There is no need to specify this address when we are about to * trigger preemption. This is because CP internally stores this @@ -522,12 +530,10 @@ u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, cmds += cp_gpuaddr(adreno_dev, cmds, rb->perfcounter_save_restore_desc->gpuaddr); -done: if (drawctxt) { struct adreno_ringbuffer *rb = drawctxt->rb; uint64_t dest = adreno_dev->preempt.scratch->gpuaddr + (rb->id * sizeof(u64)); - u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2); cmds += cp_gpuaddr(adreno_dev, cmds, dest); @@ -607,8 +613,6 @@ void a6xx_preemption_start(struct adreno_device *adreno_dev) adreno_ringbuffer_set_pagetable(device, rb, device->mmu.defaultpagetable); - - clear_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags); } } diff --git a/adreno_a6xx_ringbuffer.c b/adreno_a6xx_ringbuffer.c index 6599c264dc..fe36694eb2 100644 --- a/adreno_a6xx_ringbuffer.c +++ b/adreno_a6xx_ringbuffer.c @@ -61,7 +61,7 @@ static int a6xx_rb_context_switch(struct adreno_device *adreno_dev, adreno_drawctxt_get_pagetable(drawctxt); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int count = 0; - u32 cmds[36]; + u32 cmds[32]; if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) count += a6xx_rb_pagetable_switch(adreno_dev, rb, drawctxt, @@ -87,14 +87,6 @@ static int a6xx_rb_context_switch(struct adreno_device *adreno_dev, cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1); cmds[count++] = 0x31; - if (adreno_is_preemption_enabled(adreno_dev)) { - u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; - - cmds[count++] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 3); - cmds[count++] = SET_PSEUDO_NON_PRIV_SAVE_ADDR; - count += cp_gpuaddr(adreno_dev, &cmds[count], gpuaddr); - } - return a6xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, cmds, count, 0, NULL); } From 63a813a6296875834749647ba7c8cdeecf8b69d0 Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Thu, 2 Dec 2021 15:06:43 +0530 Subject: [PATCH 0030/1016] msm: kgsl: Update a662 configuration Correct the gmem size and prim_fifo_threshold configuration for a662 gpu. Change-Id: I029cf8d806e34f9dc8e4a1b92908629f67e59248 Signed-off-by: Akhil P Oommen --- adreno-gpulist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index a676433212..59236118e8 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1721,11 +1721,11 @@ static const struct adreno_a6xx_core adreno_gpu_core_a662 = { .gpudev = &adreno_a6xx_gmu_gpudev.base, .perfcounters = &adreno_a6xx_perfcounters, .gmem_base = 0, - .gmem_size = SZ_512K, + .gmem_size = SZ_1M + SZ_512K, .bus_width = 32, .snapshot_size = SZ_2M, }, - .prim_fifo_threshold = 0x00200000, + .prim_fifo_threshold = 0x00300000, .gmu_major = 2, .gmu_minor = 0, .sqefw_name = "a660_sqe.fw", From ab186fed8f7fa71a5f92b7f20c03c6c3157c4cde Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Wed, 24 Nov 2021 14:34:19 -0700 Subject: [PATCH 0031/1016] msm: kgsl: Re-enable IOCTLs for securemem for 32-bit processes Now that we have restored 32-bit secure VA for 32-bit processes, we can re-enable these IOCTLs. Change-Id: I887a59b675f06ab984085414056848207a96456c Signed-off-by: Harshdeep Dhatt --- kgsl.c | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/kgsl.c b/kgsl.c index e69c7bc516..002d314fe1 100644 --- a/kgsl.c +++ b/kgsl.c @@ -3081,15 +3081,6 @@ long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, if (!check_and_warn_secured(device)) return -EOPNOTSUPP; - /* - * On 64 bit kernel, secure memory region is expanded and - * moved to 64 bit address, 32 bit apps can not access it from - * this IOCTL. - */ - if (is_compat_task() && - test_bit(KGSL_MMU_64BIT, &device->mmu.features)) - return -EOPNOTSUPP; - /* Can't use CPU map with secure buffers */ if (param->flags & KGSL_MEMFLAGS_USE_CPU_MAP) return -EINVAL; @@ -3875,20 +3866,10 @@ long kgsl_ioctl_gpuobj_alloc(struct kgsl_device_private *dev_priv, long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { - struct kgsl_device *device = dev_priv->device; struct kgsl_gpumem_alloc *param = data; struct kgsl_mem_entry *entry; uint64_t flags = param->flags; - /* - * On 64 bit kernel, secure memory region is expanded and - * moved to 64 bit address, 32 bit apps can not access it from - * this IOCTL. - */ - if ((param->flags & KGSL_MEMFLAGS_SECURE) && is_compat_task() - && test_bit(KGSL_MMU_64BIT, &device->mmu.features)) - return -EOPNOTSUPP; - /* Legacy functions doesn't support these advanced features */ flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); @@ -3913,20 +3894,10 @@ long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, long kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { - struct kgsl_device *device = dev_priv->device; struct kgsl_gpumem_alloc_id *param = data; struct kgsl_mem_entry *entry; uint64_t flags = param->flags; - /* - * On 64 bit kernel, secure memory region is expanded and - * moved to 64 bit address, 32 bit apps can not access it from - * this IOCTL. - */ - if ((param->flags & KGSL_MEMFLAGS_SECURE) && is_compat_task() - && test_bit(KGSL_MMU_64BIT, &device->mmu.features)) - return -EOPNOTSUPP; - if (is_compat_task()) flags |= KGSL_MEMFLAGS_FORCE_32BIT; From 4fd5db8534f1bd21dd451896966d4de6a621306e Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Fri, 12 Nov 2021 15:00:03 -0700 Subject: [PATCH 0032/1016] msm: kgsl: Fix memory leak in VBOs We take a refcount on the child mem entry when creating a bind range, but never put it back. This leads to memory leak, even when process has exited. Put back this reference when removing this bind range. Change-Id: I7e7f4b4cb36fa2d5d20a80b28890c9c77c69d7e2 Signed-off-by: Harshdeep Dhatt --- kgsl_vbo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kgsl_vbo.c b/kgsl_vbo.c index 091f2ecc3f..ff7488d297 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -111,6 +111,7 @@ static void kgsl_memdesc_remove_range(struct kgsl_mem_entry *target, kgsl_mmu_map_zero_page_to_range(memdesc->pagetable, memdesc, range->range.start, bind_range_len(range)); + kgsl_mem_entry_put(range->entry); kfree(range); } } From 79ee406847ab3c9c4e4231fd246bcd909d9f872f Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 18 Nov 2021 14:16:01 -0700 Subject: [PATCH 0033/1016] msm: kgsl: Use correct fault type for GMU asserts Use ADRENO_GMU_FAULT and make sure it gets propagated to snapshot layers. Change-Id: I9388bf408c623956d8e5d922d07393eb7f3061e2 Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched_hfi.c | 2 +- adreno_hwsched.c | 2 +- kgsl_snapshot.c | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 60ad1921d0..d7a7c543dc 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -427,7 +427,7 @@ static void process_dbgq_irq(struct adreno_device *adreno_dev) if (!recovery) return; - adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } /* HFI interrupt handler */ diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 7ce7d2177b..43062a94fb 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1430,7 +1430,7 @@ static void reset_and_snapshot(struct adreno_device *adreno_dev, int fault) obj = get_active_cmdobj(adreno_dev); if (!obj) { - kgsl_device_snapshot(device, NULL, false); + kgsl_device_snapshot(device, NULL, fault & ADRENO_GMU_FAULT); goto done; } diff --git a/kgsl_snapshot.c b/kgsl_snapshot.c index 40d16820eb..148e095dd7 100644 --- a/kgsl_snapshot.c +++ b/kgsl_snapshot.c @@ -631,6 +631,7 @@ void kgsl_device_snapshot(struct kgsl_device *device, /* increment the hang count for good book keeping */ device->snapshot_faultcount++; + device->gmu_fault = gmu_fault; if (device->snapshot != NULL) { From 69e51b81ed7b77e96ced136231e7675c568fbbd1 Mon Sep 17 00:00:00 2001 From: Rohan Sethi Date: Tue, 7 Dec 2021 13:18:51 +0530 Subject: [PATCH 0034/1016] msm: kgsl: Fix gpuaddr_in_range() to check upper bound Currently gpuaddr_in_range() accepts only the gpuaddr & returns true if it lies in valid range. But this does not mean that the entire buffer is within range. Modify the function to accept size as a parameter and check that both starting & ending points of buffer lie within mmu range. Change-Id: I1d722295b9a27e746bfdb6d3bf409ffe722193cb Signed-off-by: Rohan Sethi --- adreno_dispatch.c | 4 ++-- adreno_hwsched.c | 3 ++- kgsl.c | 4 ++-- kgsl_iommu.c | 11 +++++++---- kgsl_mmu.c | 6 +++--- kgsl_mmu.h | 5 +++-- 6 files changed, 19 insertions(+), 14 deletions(-) diff --git a/adreno_dispatch.c b/adreno_dispatch.c index d1c747061f..51d701434c 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -1092,8 +1092,8 @@ static inline bool _verify_ib(struct kgsl_device_private *dev_priv, } /* Make sure that the address is in range and dword aligned */ - if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr) || - !IS_ALIGNED(ib->gpuaddr, 4)) { + if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr, + ib->size) || !IS_ALIGNED(ib->gpuaddr, 4)) { pr_context(device, context, "ctxt %d invalid ib gpuaddr %llX\n", context->id, ib->gpuaddr); return false; diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 43062a94fb..83cfbfc2a0 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -705,7 +705,8 @@ static inline bool _verify_ib(struct kgsl_device_private *dev_priv, } /* Make sure that the address is mapped */ - if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr)) { + if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr, + ib->size)) { pr_context(device, context, "ctxt %d invalid ib gpuaddr %llX\n", context->id, ib->gpuaddr); return false; diff --git a/kgsl.c b/kgsl.c index 002d314fe1..4b176a069d 100644 --- a/kgsl.c +++ b/kgsl.c @@ -1303,9 +1303,9 @@ kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr) if (!private) return NULL; - if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr) && + if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr, 0) && !kgsl_mmu_gpuaddr_in_range( - private->pagetable->mmu->securepagetable, gpuaddr)) + private->pagetable->mmu->securepagetable, gpuaddr, 0)) return NULL; spin_lock(&private->mem_lock); diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 8b16a8eba2..dc01a17b93 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -2007,18 +2007,21 @@ static int kgsl_iommu_svm_range(struct kgsl_pagetable *pagetable, } static bool kgsl_iommu_addr_in_range(struct kgsl_pagetable *pagetable, - uint64_t gpuaddr) + uint64_t gpuaddr, uint64_t size) { if (gpuaddr == 0) return false; - if (gpuaddr >= pagetable->va_start && gpuaddr < pagetable->va_end) + if (gpuaddr >= pagetable->va_start && (gpuaddr + size) < + pagetable->va_end) return true; - if (gpuaddr >= pagetable->compat_va_start && gpuaddr < pagetable->compat_va_end) + if (gpuaddr >= pagetable->compat_va_start && (gpuaddr + size) < + pagetable->compat_va_end) return true; - if (gpuaddr >= pagetable->svm_start && gpuaddr < pagetable->svm_end) + if (gpuaddr >= pagetable->svm_start && (gpuaddr + size) < + pagetable->svm_end) return true; return false; diff --git a/kgsl_mmu.c b/kgsl_mmu.c index c0cc54f202..21f8b7c1e7 100644 --- a/kgsl_mmu.c +++ b/kgsl_mmu.c @@ -520,10 +520,10 @@ enum kgsl_mmutype kgsl_mmu_get_mmutype(struct kgsl_device *device) } bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pagetable, - uint64_t gpuaddr) + uint64_t gpuaddr, uint64_t size) { if (PT_OP_VALID(pagetable, addr_in_range)) - return pagetable->pt_ops->addr_in_range(pagetable, gpuaddr); + return pagetable->pt_ops->addr_in_range(pagetable, gpuaddr, size); return false; } @@ -535,7 +535,7 @@ bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pagetable, */ static bool nommu_gpuaddr_in_range(struct kgsl_pagetable *pagetable, - uint64_t gpuaddr) + uint64_t gpuaddr, uint64_t size) { return (gpuaddr != 0) ? true : false; } diff --git a/kgsl_mmu.h b/kgsl_mmu.h index 0852ca7097..fbf0ccb8c6 100644 --- a/kgsl_mmu.h +++ b/kgsl_mmu.h @@ -136,7 +136,7 @@ struct kgsl_mmu_pt_ops { int (*svm_range)(struct kgsl_pagetable *pt, uint64_t *lo, uint64_t *hi, uint64_t memflags); bool (*addr_in_range)(struct kgsl_pagetable *pagetable, - uint64_t gpuaddr); + uint64_t gpuaddr, uint64_t size); }; enum kgsl_mmu_feature { @@ -214,7 +214,8 @@ int kgsl_mmu_unmap_range(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, u64 offset, u64 length); unsigned int kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu, u64 ttbr0, uint64_t addr); -bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, uint64_t gpuaddr); +bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, uint64_t gpuaddr, + uint64_t size); int kgsl_mmu_get_region(struct kgsl_pagetable *pagetable, uint64_t gpuaddr, uint64_t size); From 8890addb51ec7779ec2a15f8da00eca440ed5755 Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Tue, 7 Dec 2021 10:15:27 +0530 Subject: [PATCH 0035/1016] msm: kgsl: Restore 32 bit secure VA range for 32 bit processes Some 32 bit apps cannot work with 64 bit secure GPU virtual addresses. Hence, use 32 bit secure VA for 32 bit processes. The hardware expects all secure VA(both 32 and 64 bit) to be a contiguous range. To make this happen, move global VA (which is currently sandwiched between 32 bit and 64 bit secure VA) below the 32 bit secure VA. Change-Id: I6fb9c0979fc6cedb649aa9a3ba1d0533188883bd Signed-off-by: Akhil P Oommen Signed-off-by: Nitheesh Muthuraj --- adreno_a5xx.c | 7 ++++--- adreno_a6xx.c | 7 ++++--- adreno_gen7.c | 7 ++++--- kgsl_iommu.c | 27 ++++++++++----------------- kgsl_iommu.h | 33 +++++++++++++++++---------------- 5 files changed, 39 insertions(+), 42 deletions(-) diff --git a/adreno_a5xx.c b/adreno_a5xx.c index 83e2b4b389..b25198be46 100644 --- a/adreno_a5xx.c +++ b/adreno_a5xx.c @@ -1530,11 +1530,12 @@ static int a5xx_start(struct adreno_device *adreno_dev) if (device->mmu.secured) { kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_CNTL, 0x0); kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, - lower_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + lower_32_bits(KGSL_IOMMU_SECURE_BASE32)); kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, - upper_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + upper_32_bits(KGSL_IOMMU_SECURE_BASE32)); kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, - KGSL_IOMMU_SECURE_SIZE(&device->mmu)); + FIELD_PREP(GENMASK(31, 12), + (KGSL_IOMMU_SECURE_SIZE(&device->mmu) / SZ_4K))); } a5xx_preemption_start(adreno_dev); diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 7bfa013467..b593593547 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -505,11 +505,12 @@ static void a6xx_set_secvid(struct kgsl_device *device) kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_CNTL, 0x0); kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, - lower_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + lower_32_bits(KGSL_IOMMU_SECURE_BASE32)); kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, - upper_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + upper_32_bits(KGSL_IOMMU_SECURE_BASE32)); kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, - KGSL_IOMMU_SECURE_SIZE(&device->mmu)); + FIELD_PREP(GENMASK(31, 12), + (KGSL_IOMMU_SECURE_SIZE(&device->mmu) / SZ_4K))); if (ADRENO_QUIRK(ADRENO_DEVICE(device), ADRENO_QUIRK_SECVID_SET_ONCE)) set = true; diff --git a/adreno_gen7.c b/adreno_gen7.c index 66d349b4f6..70e1aaf433 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -355,11 +355,12 @@ static void _set_secvid(struct kgsl_device *device) { kgsl_regwrite(device, GEN7_RBBM_SECVID_TSB_CNTL, 0x0); kgsl_regwrite(device, GEN7_RBBM_SECVID_TSB_TRUSTED_BASE_LO, - lower_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + lower_32_bits(KGSL_IOMMU_SECURE_BASE32)); kgsl_regwrite(device, GEN7_RBBM_SECVID_TSB_TRUSTED_BASE_HI, - upper_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + upper_32_bits(KGSL_IOMMU_SECURE_BASE32)); kgsl_regwrite(device, GEN7_RBBM_SECVID_TSB_TRUSTED_SIZE, - KGSL_IOMMU_SECURE_SIZE(&device->mmu)); + FIELD_PREP(GENMASK(31, 12), + (KGSL_IOMMU_SECURE_SIZE(&device->mmu) / SZ_4K))); } /* diff --git a/kgsl_iommu.c b/kgsl_iommu.c index dc01a17b93..0728683226 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -1233,18 +1233,16 @@ static struct kgsl_pagetable *kgsl_iommu_default_pagetable(struct kgsl_mmu *mmu) if (test_bit(KGSL_MMU_64BIT, &mmu->features)) { iommu_pt->base.compat_va_start = KGSL_IOMMU_SVM_BASE32; - iommu_pt->base.compat_va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + if (test_bit(KGSL_MMU_IOPGTABLE, &mmu->features)) + iommu_pt->base.compat_va_end = KGSL_MEMSTORE_TOKEN_ADDRESS; + else + iommu_pt->base.compat_va_end = KGSL_IOMMU_GLOBAL_MEM_BASE64; iommu_pt->base.va_start = KGSL_IOMMU_VA_BASE64; iommu_pt->base.va_end = KGSL_IOMMU_VA_END64; } else { iommu_pt->base.va_start = KGSL_IOMMU_SVM_BASE32; - - if (mmu->secured) - iommu_pt->base.va_end = KGSL_IOMMU_SECURE_BASE(mmu); - else - iommu_pt->base.va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); - + iommu_pt->base.va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); iommu_pt->base.compat_va_start = iommu_pt->base.va_start; iommu_pt->base.compat_va_end = iommu_pt->base.va_end; } @@ -1291,8 +1289,8 @@ static struct kgsl_pagetable *kgsl_iommu_secure_pagetable(struct kgsl_mmu *mmu) iommu_pt->base.rbtree = RB_ROOT; iommu_pt->base.pt_ops = &secure_pt_ops; - iommu_pt->base.compat_va_start = KGSL_IOMMU_SECURE_BASE(mmu); - iommu_pt->base.compat_va_end = KGSL_IOMMU_SECURE_END(mmu); + iommu_pt->base.compat_va_start = KGSL_IOMMU_SECURE_BASE32; + iommu_pt->base.compat_va_end = KGSL_IOMMU_SECURE_END32; iommu_pt->base.va_start = KGSL_IOMMU_SECURE_BASE(mmu); iommu_pt->base.va_end = KGSL_IOMMU_SECURE_END(mmu); @@ -1324,13 +1322,13 @@ static struct kgsl_pagetable *kgsl_iopgtbl_pagetable(struct kgsl_mmu *mmu, u32 n if (test_bit(KGSL_MMU_64BIT, &mmu->features)) { pt->base.compat_va_start = KGSL_IOMMU_SVM_BASE32; - pt->base.compat_va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + pt->base.compat_va_end = KGSL_MEMSTORE_TOKEN_ADDRESS; pt->base.va_start = KGSL_IOMMU_VA_BASE64; pt->base.va_end = KGSL_IOMMU_VA_END64; if (is_compat_task()) { pt->base.svm_start = KGSL_IOMMU_SVM_BASE32; - pt->base.svm_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + pt->base.svm_end = KGSL_MEMSTORE_TOKEN_ADDRESS; } else { pt->base.svm_start = KGSL_IOMMU_SVM_BASE64; pt->base.svm_end = KGSL_IOMMU_SVM_END64; @@ -1338,12 +1336,7 @@ static struct kgsl_pagetable *kgsl_iopgtbl_pagetable(struct kgsl_mmu *mmu, u32 n } else { pt->base.va_start = KGSL_IOMMU_SVM_BASE32; - - if (mmu->secured) - pt->base.va_end = KGSL_IOMMU_SECURE_BASE(mmu); - else - pt->base.va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); - + pt->base.va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); pt->base.compat_va_start = pt->base.va_start; pt->base.compat_va_end = pt->base.va_end; pt->base.svm_start = KGSL_IOMMU_SVM_BASE32; diff --git a/kgsl_iommu.h b/kgsl_iommu.h index 4632992831..b95d502714 100644 --- a/kgsl_iommu.h +++ b/kgsl_iommu.h @@ -13,7 +13,8 @@ */ #define KGSL_IOMMU_GLOBAL_MEM_SIZE (20 * SZ_1M) #define KGSL_IOMMU_GLOBAL_MEM_BASE32 0xf8000000 -#define KGSL_IOMMU_GLOBAL_MEM_BASE64 0xfc000000 +#define KGSL_IOMMU_GLOBAL_MEM_BASE64 \ + (KGSL_MEMSTORE_TOKEN_ADDRESS - KGSL_IOMMU_GLOBAL_MEM_SIZE) /* * This is a dummy token address that we use to identify memstore when the user @@ -23,7 +24,7 @@ * conflict */ -#define KGSL_MEMSTORE_TOKEN_ADDRESS 0xfff00000 +#define KGSL_MEMSTORE_TOKEN_ADDRESS (KGSL_IOMMU_SECURE_BASE32 - SZ_4K) #define KGSL_IOMMU_GLOBAL_MEM_BASE(__mmu) \ (test_bit(KGSL_MMU_64BIT, &(__mmu)->features) ? \ @@ -36,27 +37,27 @@ * Limit secure size to 256MB for 32bit kernels. */ #define KGSL_IOMMU_SECURE_SIZE32 SZ_256M -#define KGSL_IOMMU_SECURE_END32(_mmu) KGSL_IOMMU_GLOBAL_MEM_BASE(_mmu) -#define KGSL_IOMMU_SECURE_BASE32(_mmu) \ - (KGSL_IOMMU_GLOBAL_MEM_BASE(_mmu) - KGSL_IOMMU_SECURE_SIZE32) +#define KGSL_IOMMU_SECURE_BASE32 \ + (KGSL_IOMMU_SECURE_BASE64 - KGSL_IOMMU_SECURE_SIZE32) +#define KGSL_IOMMU_SECURE_END32 KGSL_IOMMU_SECURE_BASE64 -/* - * Try to use maximum allowed secure size i.e 0xFFFFF000 - * for both 32bit and 64bit secure apps when using 64bit kernel. - */ -#define KGSL_IOMMU_SECURE_BASE64 0x0100000000ULL -#define KGSL_IOMMU_SECURE_END64 0x01FFFFF000ULL -#define KGSL_IOMMU_SECURE_SIZE64 \ - (KGSL_IOMMU_SECURE_END64 - KGSL_IOMMU_SECURE_BASE64) +#define KGSL_IOMMU_SECURE_BASE64 0x100000000ULL +#define KGSL_IOMMU_SECURE_END64 \ + (KGSL_IOMMU_SECURE_BASE64 + KGSL_IOMMU_SECURE_SIZE64) + +#define KGSL_IOMMU_MAX_SECURE_SIZE 0xFFFFF000 + +#define KGSL_IOMMU_SECURE_SIZE64 \ + (KGSL_IOMMU_MAX_SECURE_SIZE - KGSL_IOMMU_SECURE_SIZE32) #define KGSL_IOMMU_SECURE_BASE(_mmu) (test_bit(KGSL_MMU_64BIT, \ &(_mmu)->features) ? KGSL_IOMMU_SECURE_BASE64 : \ - KGSL_IOMMU_SECURE_BASE32(_mmu)) + KGSL_IOMMU_SECURE_BASE32) #define KGSL_IOMMU_SECURE_END(_mmu) (test_bit(KGSL_MMU_64BIT, \ &(_mmu)->features) ? KGSL_IOMMU_SECURE_END64 : \ - KGSL_IOMMU_SECURE_END32(_mmu)) + KGSL_IOMMU_SECURE_END32) #define KGSL_IOMMU_SECURE_SIZE(_mmu) (test_bit(KGSL_MMU_64BIT, \ - &(_mmu)->features) ? KGSL_IOMMU_SECURE_SIZE64 : \ + &(_mmu)->features) ? KGSL_IOMMU_MAX_SECURE_SIZE : \ KGSL_IOMMU_SECURE_SIZE32) /* The CPU supports 39 bit addresses */ From 6b69f39eabaecc7e4af50cc4f2fd67fc08505c10 Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Tue, 8 Jun 2021 10:19:22 -0700 Subject: [PATCH 0036/1016] msm: kgsl: Add support for TSE, LRZ, RAZ and HLSQ perfcounters The select register for TSE, LRZ, RAZ and HLSQ counters are virtualized. Hence we need to program CP aperture control to route the register write to correct pipe. Also, since these registers do not have retention, update the power up list to include the pipe id so that CP can program its aperture correct. Change-Id: I7d553b19e81f3ea58bd870efd7fcc1a6bd45a875 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- adreno_gen7.c | 63 ++++++++++++------- adreno_gen7.h | 8 ++- adreno_gen7_perfcounter.c | 115 ++++++++++++++++++++++++++++++++-- adreno_gen7_snapshot.h | 5 -- include/uapi/linux/msm_kgsl.h | 6 +- kgsl_util.h | 16 ++++- 6 files changed, 175 insertions(+), 38 deletions(-) diff --git a/adreno_gen7.c b/adreno_gen7.c index 70e1aaf433..208ee07e07 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -130,7 +130,15 @@ void gen7_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds) /* Register initialization list with spinlock */ cmds[i++] = lower_32_bits(adreno_dev->pwrup_reglist->gpuaddr); cmds[i++] = upper_32_bits(adreno_dev->pwrup_reglist->gpuaddr); - cmds[i++] = 0; + /* + * Gen7 targets with concurrent binning are expected to have a dynamic + * power up list with triplets which contains the pipe id in it. + * Bit 31 of POWER_UP_REGISTER_LIST_LENGTH is reused here to let CP + * know if the power up contains the triplets. If + * REGISTER_INIT_LIST_WITH_SPINLOCK is set and bit 31 below is set, + * CP expects a dynamic list with triplets. + */ + cmds[i++] = BIT(31); } int gen7_fenced_write(struct adreno_device *adreno_dev, u32 offset, @@ -275,10 +283,12 @@ static void gen7_patch_pwrup_reglist(struct adreno_device *adreno_dev) /* Static IFPC-only registers */ reglist[0].regs = gen7_ifpc_pwrup_reglist; reglist[0].count = ARRAY_SIZE(gen7_ifpc_pwrup_reglist); + lock->ifpc_list_len = reglist[0].count; /* Static IFPC + preemption registers */ reglist[1].regs = gen7_pwrup_reglist; reglist[1].count = ARRAY_SIZE(gen7_pwrup_reglist); + lock->preemption_list_len = reglist[1].count; /* * For each entry in each of the lists, write the offset and the current @@ -291,14 +301,12 @@ static void gen7_patch_pwrup_reglist(struct adreno_device *adreno_dev) *dest++ = r[j]; kgsl_regread(KGSL_DEVICE(adreno_dev), r[j], dest++); } - - lock->list_length += reglist[i].count * 2; } - /* This needs to be at the end of the list */ + /* This needs to be at the end of the dynamic list */ + *dest++ = FIELD_PREP(GENMASK(13, 12), PIPE_NONE); *dest++ = GEN7_RBBM_PERFCTR_CNTL; *dest++ = 1; - lock->list_length += 2; /* * The overall register list is composed of @@ -306,12 +314,16 @@ static void gen7_patch_pwrup_reglist(struct adreno_device *adreno_dev) * 2. Static IFPC + preemption registers * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) * - * The CP views the second and third entries as one dynamic list - * starting from list_offset. list_length should be the total dwords in - * all the lists and list_offset should be specified as the size in - * dwords of the first entry in the list. + * The first two lists are static. Size of these lists are stored as + * number of pairs in ifpc_list_len and preemption_list_len + * respectively. With concurrent binning, Some of the perfcounter + * registers being virtualized, CP needs to know the pipe id to program + * the aperture inorder to restore the same. Thus, third list is a + * dynamic list with triplets as + * (
), and the length is + * stored as number for triplets in dynamic_list_len. */ - lock->list_offset = reglist[0].count * 2; + lock->dynamic_list_len = 1; } /* _llc_configure_gpu_scid() - Program the sub-cache ID for all GPU blocks */ @@ -1128,12 +1140,12 @@ static unsigned int gen7_register_offsets[ADRENO_REG_REGISTER_MAX] = { }; int gen7_perfcounter_update(struct adreno_device *adreno_dev, - struct adreno_perfcount_register *reg, bool update_reg) + struct adreno_perfcount_register *reg, bool update_reg, u32 pipe) { void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; u32 *data = ptr + sizeof(*lock); - int i, offset = 0; + int i, offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; if (kgsl_hwlock(lock)) { kgsl_hwunlock(lock); @@ -1142,19 +1154,19 @@ int gen7_perfcounter_update(struct adreno_device *adreno_dev, /* * If the perfcounter select register is already present in reglist - * update it, otherwise append the pair to * the end of the list. */ - for (i = 0; i < lock->list_length >> 1; i++) { - if (data[offset] == reg->select) { - data[offset + 1] = reg->countable; - goto update; - } - - if (data[offset] == A6XX_RBBM_PERFCTR_CNTL) - break; - - offset += 2; + if (select_reg_present) { + data[offset + 1] = reg->countable; + goto update; } /* @@ -2033,7 +2039,6 @@ int a6xx_perfcounter_update(struct adreno_device *adreno_dev, * so overwrite the existing A6XX_RBBM_PERFCNTL_CTRL and add it back to * the end. */ - data[offset] = reg->select; data[offset + 1] = reg->countable; data[offset + 2] = A6XX_RBBM_PERFCTR_CNTL, diff --git a/adreno_gen7.c b/adreno_gen7.c index 5eaac79e44..3235b98a05 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1618,15 +1618,8 @@ int gen7_perfcounter_remove(struct adreno_device *adreno_dev, bool remove_counter = false; u32 pipe = FIELD_PREP(GENMASK(13, 12), _get_pipeid(groupid)); - if (kgsl_hwlock(lock)) { - kgsl_hwunlock(lock); - return -EBUSY; - } - - if (lock->dynamic_list_len < 2) { - kgsl_hwunlock(lock); + if (lock->dynamic_list_len < 2) return -EINVAL; - } second_last_offset = offset + (lock->dynamic_list_len - 2) * 3; last_offset = second_last_offset + 3; @@ -1640,9 +1633,12 @@ int gen7_perfcounter_remove(struct adreno_device *adreno_dev, offset += 3; } - if (!remove_counter) { - kgsl_hwunlock(lock); + if (!remove_counter) return -ENOENT; + + if (kgsl_hwlock(lock)) { + kgsl_hwunlock(lock); + return -EBUSY; } /* @@ -1683,6 +1679,19 @@ int gen7_perfcounter_update(struct adreno_device *adreno_dev, struct cpu_gpu_lock *lock = ptr; u32 *data = ptr + sizeof(*lock); int i, offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; + bool select_reg_present = false; + + for (i = 0; i < lock->dynamic_list_len; i++) { + if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { + select_reg_present = true; + break; + } + + if (data[offset + 1] == GEN7_RBBM_PERFCTR_CNTL) + break; + + offset += 3; + } if (kgsl_hwlock(lock)) { kgsl_hwunlock(lock); @@ -1694,16 +1703,9 @@ int gen7_perfcounter_update(struct adreno_device *adreno_dev, * update it, otherwise append the * triplet to the end of the list. */ - for (i = 0; i < lock->dynamic_list_len; i++) { - if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { - data[offset + 2] = reg->countable; - goto update; - } - - if (data[offset + 1] == GEN7_RBBM_PERFCTR_CNTL) - break; - - offset += 3; + if (select_reg_present) { + data[offset + 2] = reg->countable; + goto update; } /* From 65d32f9946e3eac5813252a22b8457a841ccf4b7 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Tue, 22 Aug 2023 14:31:43 -0700 Subject: [PATCH 0499/1016] msm: kgsl: Simplify timelineobj cleanup Use the same path for timelineobj retire and destroy. This keeps the timelineobj valid until the preceding cmdbatches retire, and the scheduler retires it during normal operation or context detach. This simplifies cleanup when userspace detaches a context with timelineobjs in flight. Change-Id: I8812acd045ee13bf965fea1361cf867baf7345a0 Signed-off-by: Lynus Vaz --- adreno_dispatch.c | 5 +-- adreno_hwsched.c | 4 +- kgsl_drawobj.c | 93 ++++++++++++++++++++++++++++------------------- kgsl_drawobj.h | 8 ---- 4 files changed, 57 insertions(+), 53 deletions(-) diff --git a/adreno_dispatch.c b/adreno_dispatch.c index 37a84765da..bf922832e1 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -299,11 +299,8 @@ static int dispatch_retire_syncobj(struct kgsl_drawobj *drawobj, static int drawqueue_retire_timelineobj(struct kgsl_drawobj *drawobj, struct adreno_context *drawctxt) { - struct kgsl_drawobj_timeline *timelineobj = TIMELINEOBJ(drawobj); - _pop_drawobj(drawctxt); - kgsl_drawobj_timelineobj_retire(timelineobj); - + kgsl_drawobj_destroy(drawobj); return 0; } diff --git a/adreno_hwsched.c b/adreno_hwsched.c index b6ca356e64..5703eb94f7 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -195,10 +195,8 @@ static int _retire_markerobj(struct adreno_device *adreno_dev, struct kgsl_drawo static int _retire_timelineobj(struct kgsl_drawobj *drawobj, struct adreno_context *drawctxt) { - struct kgsl_drawobj_timeline *timelineobj = TIMELINEOBJ(drawobj); - _pop_drawobj(drawctxt); - kgsl_drawobj_timelineobj_retire(timelineobj); + kgsl_drawobj_destroy(drawobj); return 0; } diff --git a/kgsl_drawobj.c b/kgsl_drawobj.c index d9ecc6e56a..f1fdc7b831 100644 --- a/kgsl_drawobj.c +++ b/kgsl_drawobj.c @@ -393,12 +393,16 @@ static void syncobj_destroy(struct kgsl_drawobj *drawobj) } -static void timelineobj_destroy(struct kgsl_drawobj *drawobj) +static void _drawobj_timelineobj_retire(struct kref *kref) { - struct kgsl_drawobj_timeline *timelineobj = TIMELINEOBJ(drawobj); int i; + struct kgsl_drawobj_timeline *timelineobj = container_of(kref, + struct kgsl_drawobj_timeline, sig_refcount); for (i = 0; i < timelineobj->count; i++) { + kgsl_timeline_signal(timelineobj->timelines[i].timeline, + timelineobj->timelines[i].seqno); + kgsl_timeline_put(timelineobj->timelines[i].timeline); kgsl_context_put(timelineobj->timelines[i].context); } @@ -408,6 +412,32 @@ static void timelineobj_destroy(struct kgsl_drawobj *drawobj) timelineobj->count = 0; } +static void kgsl_timelineobj_signal(struct kgsl_drawobj_timeline *timelineobj) +{ + kref_put(&timelineobj->sig_refcount, _drawobj_timelineobj_retire); +} + +static void timelineobj_destroy(struct kgsl_drawobj *drawobj) +{ + struct kgsl_drawobj_timeline *timelineobj = TIMELINEOBJ(drawobj); + int i; + + /* + * At this point any syncobjs blocking this timelinobj have been + * signaled. The timelineobj now only needs all preceding timestamps to + * retire before signaling the timelines. Notify timelines to keep them + * in sync with the timestamps as they retire. + */ + for (i = 0; i < timelineobj->count; i++) + kgsl_timeline_add_signal(&timelineobj->timelines[i]); + + /* + * The scheduler is done with the timelineobj. Put the initial + * sig_refcount to continue with the signaling process. + */ + kgsl_timelineobj_signal(timelineobj); +} + static void bindobj_destroy(struct kgsl_drawobj *drawobj) { struct kgsl_drawobj_bind *bindobj = BINDOBJ(drawobj); @@ -938,7 +968,8 @@ kgsl_drawobj_timeline_create(struct kgsl_device *device, * Initialize the sig_refcount that triggers the timeline signal. * This refcount goes to 0 when: * 1) This timelineobj is popped off the context queue. This implies - * any syncobj blocking this timelineobj was already signaled. + * any syncobj blocking this timelineobj was already signaled, or + * the context queue is cleaned up at detach time. * 2) The cmdobjs queued on this context before this timeline object * are retired. */ @@ -950,43 +981,17 @@ kgsl_drawobj_timeline_create(struct kgsl_device *device, return timelineobj; } -static void _drawobj_timelineobj_retire(struct kref *kref) -{ - struct kgsl_drawobj_timeline *timelineobj = container_of(kref, - struct kgsl_drawobj_timeline, sig_refcount); - struct kgsl_drawobj *drawobj = DRAWOBJ(timelineobj); - int i; - - for (i = 0; i < timelineobj->count; i++) - kgsl_timeline_signal(timelineobj->timelines[i].timeline, - timelineobj->timelines[i].seqno); - - /* Now that timelines are signaled destroy the drawobj */ - kgsl_drawobj_destroy(drawobj); -} - static void _timeline_signaled(struct kgsl_device *device, struct kgsl_event_group *group, void *priv, int ret) { struct kgsl_drawobj_timeline *timelineobj = priv; + struct kgsl_drawobj *drawobj = DRAWOBJ(timelineobj); - kref_put(&timelineobj->sig_refcount, _drawobj_timelineobj_retire); -} + /* Put the sig_refcount we took when registering this event */ + kgsl_timelineobj_signal(timelineobj); -void kgsl_drawobj_timelineobj_retire(struct kgsl_drawobj_timeline *timelineobj) -{ - int i; - - /* - * At this point any syncobjs blocking this timelinobj have been - * signaled. The timelineobj now only needs all preceding timestamps to - * retire before signaling the timelines. Notify timelines to keep them - * in sync with the timestamps as they retire. - */ - for (i = 0; i < timelineobj->count; i++) - kgsl_timeline_add_signal(&timelineobj->timelines[i]); - - kref_put(&timelineobj->sig_refcount, _drawobj_timelineobj_retire); + /* Put the drawobj refcount we took when registering this event */ + kgsl_drawobj_put(drawobj); } int kgsl_drawobj_add_timeline(struct kgsl_device_private *dev_priv, @@ -1063,18 +1068,30 @@ int kgsl_drawobj_add_timeline(struct kgsl_device_private *dev_priv, timelineobj->count = cmd.count; /* - * Take a refcount that we put when the last queued timestamp on this - * context is retired. Use a kgsl_event to notify us when this - * timestamp retires. + * Register a kgsl_event to notify us when the last queued timestamp + * retires. Take a refcount on the drawobj to keep it valid for the + * callback, and take the sig_refcount to synchronize with the + * timelineobj retire. Both these refcounts are put in the callback. */ + kref_get(&drawobj->refcount); kref_get(&timelineobj->sig_refcount); ret = kgsl_add_event(device, &context->events, queued, _timeline_signaled, timelineobj); if (ret) - goto err; + goto event_err; return 0; + +event_err: + /* + * If there was an error, put back sig_refcount and drawobj refcounts. + * The caller still holds initial refcounts on both and puts them in + * kgsl_drawobj_destroy(). Clean up the timelinelines array since we + * do not want to signal anything now. + */ + kgsl_timelineobj_signal(timelineobj); + kgsl_drawobj_put(drawobj); err: for (i = 0; i < cmd.count; i++) { kgsl_timeline_put(timelineobj->timelines[i].timeline); diff --git a/kgsl_drawobj.h b/kgsl_drawobj.h index 5a38009ab3..b32ba58873 100644 --- a/kgsl_drawobj.h +++ b/kgsl_drawobj.h @@ -344,12 +344,4 @@ int kgsl_drawobj_add_timeline(struct kgsl_device_private *dev_priv, struct kgsl_drawobj_timeline *timelineobj, void __user *src, u64 cmdsize); -/** - * kgsl_drawobj_timelineobj_retire - Retire the timeline drawobj - * @timelineobj: Pointer to a timeline drawobject - * - * Retire the timelineobj when it is popped off the context queue. - */ -void kgsl_drawobj_timelineobj_retire(struct kgsl_drawobj_timeline *timelineobj); - #endif /* __KGSL_DRAWOBJ_H */ From 4c06d884fecf89a36f8a1c04a1338c932f62c293 Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Thu, 18 May 2023 14:36:21 +0530 Subject: [PATCH 0500/1016] msm: kgsl: Support qcs405 target Add config changes to support qcs405 target. Change-Id: If104542c5364ee76e9a29e5975abaef0336f011a Signed-off-by: Abhishek Barman --- Kbuild | 3 +++ config/gki_qcs405.conf | 15 +++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 config/gki_qcs405.conf diff --git a/Kbuild b/Kbuild index 91192e6f60..762f33ef94 100644 --- a/Kbuild +++ b/Kbuild @@ -49,6 +49,9 @@ endif ifeq ($(CONFIG_ARCH_TRINKET), y) include $(KGSL_PATH)/config/gki_trinket.conf endif +ifeq ($(CONFIG_ARCH_QCS405), y) + include $(KGSL_PATH)/config/gki_qcs405.conf +endif ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq diff --git a/config/gki_qcs405.conf b/config/gki_qcs405.conf new file mode 100644 index 0000000000..b9a6982a8a --- /dev/null +++ b/config/gki_qcs405.conf @@ -0,0 +1,15 @@ +CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y +CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y +CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 +CONFIG_QCOM_KGSL_SORT_POOL = y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y +# CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT is not set +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" + +ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ + -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ + -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ + -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ + -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ + -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=0 \ + -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" From a22f7484b90cfbf177372ef6aa7b3d73f42146ed Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Mon, 24 Jul 2023 15:34:09 +0530 Subject: [PATCH 0501/1016] msm: kgsl: Set correct values for SMMU protect register for A3xx For programming the CP Protect register for SMMU in A3xx GPU, pass correct values for SMMU registers base offset and the count of registers to be protected. Change-Id: I9fa809db79efc79bb7a59304fa2b4607ed1fc567 Signed-off-by: Abhishek Barman --- adreno_a3xx.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/adreno_a3xx.c b/adreno_a3xx.c index ffb2604e80..9f69430463 100644 --- a/adreno_a3xx.c +++ b/adreno_a3xx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1096,8 +1096,14 @@ static struct { { A3XX_CP_PROTECT_REG_0 + 13, 0x0cc0, 0 }, /* VBIF */ { A3XX_CP_PROTECT_REG_0 + 14, 0x3000, 6 }, - /* SMMU */ - { A3XX_CP_PROTECT_REG_0 + 15, 0xa000, 12 }, + /* + * SMMU + * For A3xx, base offset for smmu region is 0xa000 and length is + * 0x1000 bytes. Offset must be in dword and length of the block + * must be ilog2(dword length). + * 0xa000 >> 2 = 0x2800, ilog2(0x1000 >> 2) = 10. + */ + { A3XX_CP_PROTECT_REG_0 + 15, 0x2800, 10 }, /* There are no remaining protected mode registers for a3xx */ }; From c2fa8a4482f4b628a093b23a41023a0fc2634a84 Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Mon, 24 Jul 2023 16:41:08 +0530 Subject: [PATCH 0502/1016] msm: kgsl: Add iommu clock names for A306 GPU Add "gcc_smmu_cfg_clk" and "gcc_gfx_tcu_clk" iommu clock names to control these clocks on A306 GPU. Change-Id: I79d7a4c73217c6ebf931aed9f50efe8177944eda Signed-off-by: Abhishek Barman --- kgsl_iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 5b23759cc6..212825ca00 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -2415,6 +2415,8 @@ static const char * const kgsl_iommu_clocks[] = { "gcc_bimc_gpu_axi", "gcc_gpu_ahb", "gcc_gpu_axi_clk", + "gcc_smmu_cfg_clk", + "gcc_gfx_tcu_clk", }; static const struct kgsl_mmu_ops kgsl_iommu_ops; From f572cc31f20e8a2622db99aa65b1dd4a0c526817 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 3 Aug 2023 11:14:53 -0600 Subject: [PATCH 0503/1016] kgsl: hwsched: Fix HFI sequence number wrap issue When comparing the ack, make sure the entire header is compared instead of just the sequence number. This is required because two packets (waiting for their acks) can have the same sequence number (once it wraps around). Use a different sequence number generator for cmdbatch submissions to context queues or dispatch queues to reduce the chance of wrapping around the cmdq sequence number. For the same reason, use a different sequence number generator for hardware fence packets as well. Remove instances where the sequence number is getting updated twice for the same packet. Change-Id: I56232a3b5cf74b725f9572bd34eb4041774dc6d1 Signed-off-by: Harshdeep Dhatt --- adreno_a6xx_hfi.c | 6 ++-- adreno_a6xx_hwsched_hfi.c | 41 ++++++++++----------- adreno_gen7_hfi.c | 7 ++-- adreno_gen7_hwsched_hfi.c | 76 +++++++++++++++++++-------------------- adreno_gen7_hwsched_hfi.h | 2 ++ adreno_hfi.h | 7 ++-- adreno_hwsched.h | 5 +++ 7 files changed, 74 insertions(+), 70 deletions(-) diff --git a/adreno_a6xx_hfi.c b/adreno_a6xx_hfi.c index a0dc91ddfe..7bde0a2afe 100644 --- a/adreno_a6xx_hfi.c +++ b/adreno_a6xx_hfi.c @@ -123,8 +123,6 @@ int a6xx_hfi_queue_write(struct adreno_device *adreno_dev, uint32_t queue_idx, if (empty_space <= align_size) return -ENOSPC; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - for (i = 0; i < size_dwords; i++) { queue[write_idx] = msg[i]; write_idx = (write_idx + 1) % hdr->queue_size; @@ -244,7 +242,7 @@ int a6xx_receive_ack_cmd(struct a6xx_gmu_device *gmu, void *rcvd, if (ret_cmd == NULL) return -EINVAL; - if (HDR_CMP_SEQNUM(ret_cmd->sent_hdr, req_hdr)) { + if (CMP_HFI_ACK_HDR(ret_cmd->sent_hdr, req_hdr)) { memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2); return 0; } @@ -319,7 +317,7 @@ static int a6xx_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, struct a6xx_hfi *hfi = &gmu->hfi; unsigned int seqnum = atomic_inc_return(&hfi->seqnum); - *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + *cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2); if (ret_cmd == NULL) return a6xx_hfi_cmdq_write(adreno_dev, cmd, size_bytes); diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 9c3fc84541..02fac3497a 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -103,7 +103,7 @@ static void a6xx_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) read_lock(&hfi->msglock); list_for_each_entry(cmd, &hfi->msglist, node) { - if (HDR_CMP_SEQNUM(cmd->sent_hdr, req_hdr)) { + if (CMP_HFI_ACK_HDR(cmd->sent_hdr, req_hdr)) { memcpy(cmd->results, ack, min_t(u32, size_bytes, sizeof(cmd->results))); @@ -624,11 +624,12 @@ int a6xx_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 si struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); u32 *cmd = data; - u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); + u32 seqnum; int rc; struct pending_cmd pending_ack; - *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + *cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2); add_waiter(hfi, *cmd, &pending_ack); @@ -926,6 +927,7 @@ static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) struct hfi_mem_alloc_desc desc = {0}; struct hfi_mem_alloc_reply_cmd out = {0}; struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + u32 seqnum; int ret; hfi_get_mem_alloc_desc(rcvd, &desc); @@ -937,8 +939,8 @@ static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) memcpy(&out.desc, &desc, sizeof(out.desc)); out.hdr = ACK_MSG_HDR(F2H_MSG_MEM_ALLOC); - out.hdr = MSG_HDR_SET_SEQNUM(out.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2); out.req_hdr = *(u32 *)rcvd; return a6xx_hfi_cmdq_write(adreno_dev, (u32 *)&out, sizeof(out)); @@ -948,7 +950,8 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) { struct hfi_gmu_cntr_register_cmd *in = (struct hfi_gmu_cntr_register_cmd *)rcvd; struct hfi_gmu_cntr_register_reply_cmd out = {0}; - u32 lo = 0, hi = 0; + struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + u32 lo = 0, hi = 0, seqnum; /* * Failure to allocate counter is not fatal. Sending lo = 0, hi = 0 @@ -958,6 +961,8 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) in->group_id, in->countable, &lo, &hi, PERFCOUNTER_FLAG_KERNEL); out.hdr = ACK_MSG_HDR(F2H_MSG_GMU_CNTR_REGISTER); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2); out.req_hdr = in->hdr; out.group_id = in->group_id; out.countable = in->countable; @@ -972,7 +977,7 @@ static int send_start_msg(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - unsigned int seqnum = atomic_inc_return(&gmu->hfi.seqnum); + u32 seqnum; int rc, read_size; struct hfi_start_cmd cmd; u32 rcvd[MAX_RCVD_SIZE]; @@ -982,7 +987,8 @@ static int send_start_msg(struct adreno_device *adreno_dev) if (rc) return rc; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); pending_ack.sent_hdr = cmd.hdr; @@ -1705,8 +1711,6 @@ static int a6xx_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, uint3 if (empty_space <= align_size) return -ENOSPC; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - write = hdr->write_index; for (i = 0; i < size_dwords; i++) { @@ -1749,9 +1753,8 @@ static int a6xx_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, uint3 int a6xx_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { - struct a6xx_hfi *hfi = to_a6xx_hfi(adreno_dev); int ret = 0; - u32 cmd_sizebytes; + u32 cmd_sizebytes, seqnum; struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj); struct hfi_submit_cmd *cmd; struct adreno_submit_time time = {0}; @@ -1815,9 +1818,9 @@ int a6xx_hwsched_submit_drawobj(struct adreno_device *adreno_dev, skipib: adreno_drawobj_set_constraint(KGSL_DEVICE(adreno_dev), drawobj); + seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); + cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); ret = a6xx_hfi_dispatch_queue_write(adreno_dev, HFI_DSP_ID_0 + drawobj->context->gmu_dispatch_queue, @@ -1842,7 +1845,6 @@ int a6xx_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, { struct adreno_hwsched *hwsched = &adreno_dev->hwsched; struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); - struct a6xx_hfi *hfi = to_a6xx_hfi(adreno_dev); struct hfi_submit_cmd *cmd; struct kgsl_memobj_node *ib; u32 cmd_sizebytes; @@ -1900,8 +1902,6 @@ int a6xx_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, } cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_RECURRING_CMD, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); ret = a6xx_hfi_send_cmd_async(adreno_dev, cmd, cmd_sizebytes); @@ -1953,7 +1953,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, cmd.ts = ts, seqnum = atomic_inc_return(&gmu->hfi.seqnum); - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); add_waiter(hfi, cmd.hdr, &pending_ack); @@ -2044,6 +2044,7 @@ u32 a6xx_hwsched_preempt_count_get(struct adreno_device *adreno_dev) struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); struct pending_cmd pending_ack; int rc; + u32 seqnum; if (device->state != KGSL_STATE_ACTIVE) return 0; @@ -2052,8 +2053,8 @@ u32 a6xx_hwsched_preempt_count_get(struct adreno_device *adreno_dev) if (rc) return 0; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); cmd.type = HFI_VALUE_PREEMPT_COUNT; cmd.subtype = 0; diff --git a/adreno_gen7_hfi.c b/adreno_gen7_hfi.c index f1a799dade..26363771c8 100644 --- a/adreno_gen7_hfi.c +++ b/adreno_gen7_hfi.c @@ -124,8 +124,6 @@ int gen7_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx, if (empty_space <= align_size) return -ENOSPC; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - for (i = 0; i < size_dwords; i++) { queue[write_idx] = msg[i]; write_idx = (write_idx + 1) % hdr->queue_size; @@ -246,7 +244,7 @@ int gen7_receive_ack_cmd(struct gen7_gmu_device *gmu, void *rcvd, if (ret_cmd == NULL) return -EINVAL; - if (HDR_CMP_SEQNUM(ret_cmd->sent_hdr, req_hdr)) { + if (CMP_HFI_ACK_HDR(ret_cmd->sent_hdr, req_hdr)) { memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2); return 0; } @@ -316,7 +314,8 @@ static int gen7_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, struct gen7_hfi *hfi = &gmu->hfi; unsigned int seqnum = atomic_inc_return(&hfi->seqnum); - *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + *cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2); + if (ret_cmd == NULL) return gen7_hfi_cmdq_write(adreno_dev, cmd, size_bytes); diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index b4c4fb0715..cf1096eb8a 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -121,7 +121,7 @@ static void gen7_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) read_lock(&hfi->msglock); list_for_each_entry(cmd, &hfi->msglist, node) { - if (HDR_CMP_SEQNUM(cmd->sent_hdr, req_hdr)) { + if (CMP_HFI_ACK_HDR(cmd->sent_hdr, req_hdr)) { memcpy(cmd->results, ack, min_t(u32, size_bytes, sizeof(cmd->results))); @@ -818,7 +818,6 @@ static void gen7_syncobj_query_reply(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj, struct hfi_syncobj_query_cmd *cmd) { struct hfi_syncobj_query_cmd reply = {0}; - struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); int i, j, fence_index = 0; struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); @@ -850,8 +849,6 @@ static void gen7_syncobj_query_reply(struct adreno_device *adreno_dev, } reply.hdr = CREATE_MSG_HDR(F2H_MSG_SYNCOBJ_QUERY, HFI_MSG_CMD); - reply.hdr = MSG_HDR_SET_SEQNUM(reply.hdr, - atomic_inc_return(&hfi->seqnum)); reply.gmu_ctxt_id = cmd->gmu_ctxt_id; reply.sync_obj_ts = cmd->sync_obj_ts; @@ -1015,11 +1012,13 @@ static void _increment_hw_fence_unack_count(struct adreno_device *adreno_dev) static int _send_hw_fence_no_ack(struct adreno_device *adreno_dev, struct adreno_hw_fence_entry *entry) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + u32 seqnum; int ret; - entry->cmd.hdr = MSG_HDR_SET_SEQNUM(entry->cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + + seqnum = atomic_inc_return(&hfi->hw_fence.seqnum); + entry->cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(entry->cmd.hdr, seqnum, sizeof(entry->cmd) >> 2); ret = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&entry->cmd, sizeof(entry->cmd)); if (!ret) @@ -1183,7 +1182,7 @@ static void process_hw_fence_ack(struct adreno_device *adreno_dev, u32 received_ spin_lock(&hfi->hw_fence.lock); /* If this ack is being waited on, we don't need to touch the unack count */ - if (hw_fence_ack.sent_hdr && HDR_CMP_SEQNUM(hw_fence_ack.sent_hdr, received_hdr)) { + if (hw_fence_ack.sent_hdr && CMP_HFI_ACK_HDR(hw_fence_ack.sent_hdr, received_hdr)) { spin_unlock(&hfi->hw_fence.lock); complete(&hw_fence_ack.complete); return; @@ -1404,11 +1403,12 @@ int gen7_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 si struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); u32 *cmd = data; - u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); + u32 seqnum; int rc; struct pending_cmd pending_ack; - *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + *cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2); add_waiter(hfi, *cmd, &pending_ack); @@ -1708,6 +1708,7 @@ static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) struct hfi_mem_alloc_desc desc = {0}; struct hfi_mem_alloc_reply_cmd out = {0}; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 seqnum; int ret; hfi_get_mem_alloc_desc(rcvd, &desc); @@ -1720,8 +1721,8 @@ static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) out.hdr = ACK_MSG_HDR(F2H_MSG_MEM_ALLOC); - out.hdr = MSG_HDR_SET_SEQNUM(out.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2); out.req_hdr = *(u32 *)rcvd; @@ -1732,7 +1733,8 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) { struct hfi_gmu_cntr_register_cmd *in = (struct hfi_gmu_cntr_register_cmd *)rcvd; struct hfi_gmu_cntr_register_reply_cmd out = {0}; - u32 lo = 0, hi = 0; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 lo = 0, hi = 0, seqnum; /* * Failure to allocate counter is not fatal. Sending lo = 0, hi = 0 @@ -1742,6 +1744,8 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) in->group_id, in->countable, &lo, &hi, PERFCOUNTER_FLAG_KERNEL); out.hdr = ACK_MSG_HDR(F2H_MSG_GMU_CNTR_REGISTER); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2); out.req_hdr = in->hdr; out.group_id = in->group_id; out.countable = in->countable; @@ -1754,8 +1758,6 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) static int send_warmboot_start_msg(struct adreno_device *adreno_dev) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - unsigned int seqnum = atomic_inc_return(&gmu->hfi.seqnum); int ret = 0; struct hfi_start_cmd cmd; @@ -1766,8 +1768,6 @@ static int send_warmboot_start_msg(struct adreno_device *adreno_dev) if (ret) return ret; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); - cmd.hdr = RECORD_NOP_MSG_HDR(cmd.hdr); return gen7_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); @@ -1777,7 +1777,7 @@ static int send_start_msg(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - unsigned int seqnum = atomic_inc_return(&gmu->hfi.seqnum); + u32 seqnum; int read_size, rc = 0; struct hfi_start_cmd cmd; u32 rcvd[MAX_RCVD_SIZE]; @@ -1787,7 +1787,8 @@ static int send_start_msg(struct adreno_device *adreno_dev) if (rc) return rc; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); pending_ack.sent_hdr = cmd.hdr; @@ -2023,13 +2024,14 @@ u32 gen7_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop) struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct pending_cmd pending_ack; int rc; + u32 seqnum; rc = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE); if (rc) return 0; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); cmd.type = prop; cmd.subtype = 0; @@ -2173,6 +2175,7 @@ static int gen7_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev, .flags = HFI_WARMBOOT_EXEC_SCRATCH, }; int ret = 0; + u32 seqnum; if (!adreno_dev->warmboot_enabled) return 0; @@ -2181,8 +2184,8 @@ static int gen7_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev, if (ret) return ret; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); add_waiter(hfi, cmd.hdr, ret_cmd); @@ -3032,8 +3035,6 @@ int gen7_gmu_context_queue_write(struct adreno_device *adreno_dev, if (!IS_ALIGNED(size_bytes, sizeof(u32))) return -EINVAL; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - for (i = 0; i < size_dwords; i++) { queue[write_idx] = msg[i]; write_idx = (write_idx + 1) % hdr->queue_size; @@ -3131,12 +3132,12 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj, void *cmdbuf) { struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); - struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); int i, j; u32 cmd_sizebytes; struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); struct hfi_submit_syncobj *cmd; struct hfi_syncobj *obj = NULL; + u32 seqnum; /* Add hfi_syncobj struct for sync object */ cmd_sizebytes = sizeof(*cmd) + @@ -3215,9 +3216,9 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, drawobj->timestamp = ++drawctxt->syncobj_timestamp; cmd->timestamp = drawobj->timestamp; + seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_SYNCOBJ, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); + cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); return gen7_gmu_context_queue_write(adreno_dev, drawctxt, (u32 *)cmd, cmd_sizebytes, drawobj, NULL); @@ -3358,6 +3359,7 @@ int gen7_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 seqnum; int ret = 0; /* Device mutex is necessary to ensure only one hardware fence ack is being waited for */ @@ -3369,8 +3371,8 @@ int gen7_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, init_completion(&hw_fence_ack.complete); entry->cmd.flags |= flags; - entry->cmd.hdr = MSG_HDR_SET_SEQNUM(entry->cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&hfi->hw_fence.seqnum); + entry->cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(entry->cmd.hdr, seqnum, sizeof(entry->cmd) >> 2); hw_fence_ack.sent_hdr = entry->cmd.hdr; @@ -3695,8 +3697,6 @@ static int gen7_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 q if (empty_space <= align_size) return -ENOSPC; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - write = hdr->write_index; for (i = 0; i < size_dwords; i++) { @@ -3732,9 +3732,8 @@ static int gen7_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 q int gen7_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { - struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); int ret = 0; - u32 cmd_sizebytes; + u32 cmd_sizebytes, seqnum; struct kgsl_drawobj_cmd *cmdobj = NULL; struct hfi_submit_cmd *cmd; struct adreno_submit_time time = {0}; @@ -3814,9 +3813,9 @@ int gen7_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_dr skipib: adreno_drawobj_set_constraint(KGSL_DEVICE(adreno_dev), drawobj); + seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); + cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); if (adreno_hwsched_context_queue_enabled(adreno_dev)) ret = gen7_gmu_context_queue_write(adreno_dev, @@ -3849,7 +3848,6 @@ int gen7_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, { struct adreno_hwsched *hwsched = &adreno_dev->hwsched; struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); - struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); struct hfi_submit_cmd *cmd; struct kgsl_memobj_node *ib; u32 cmd_sizebytes; @@ -3907,8 +3905,6 @@ int gen7_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, } cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_RECURRING_CMD, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); ret = gen7_hfi_send_cmd_async(adreno_dev, cmd, sizeof(*cmd)); @@ -4015,7 +4011,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, cmd.ts = ts, seqnum = atomic_inc_return(&gmu->hfi.seqnum); - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); add_waiter(hfi, cmd.hdr, &pending_ack); diff --git a/adreno_gen7_hwsched_hfi.h b/adreno_gen7_hwsched_hfi.h index 6a6f02f49f..695d11e2a2 100644 --- a/adreno_gen7_hwsched_hfi.h +++ b/adreno_gen7_hwsched_hfi.h @@ -83,6 +83,8 @@ struct gen7_hwsched_hfi { * @flags: Flags to control the creation of new hardware fences */ unsigned long flags; + /** @seqnum: Sequence number for hardware fence packet header */ + atomic_t seqnum; } hw_fence; /** * @hw_fence_timer: Timer to trigger fault if unack'd hardware fence count does'nt drop diff --git a/adreno_hfi.h b/adreno_hfi.h index f3a2ad011d..2760a0119b 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -414,12 +414,15 @@ struct hfi_queue_table { #define MSG_HDR_GET_TYPE(hdr) (((hdr) >> 16) & 0xF) #define MSG_HDR_GET_SEQNUM(hdr) (((hdr) >> 20) & 0xFFF) -#define HDR_CMP_SEQNUM(out_hdr, in_hdr) \ - (MSG_HDR_GET_SEQNUM(out_hdr) == MSG_HDR_GET_SEQNUM(in_hdr)) +/* Clear the HFI_MSG_RECORD bit from both headers since some acks may have it set, and some not. */ +#define CMP_HFI_ACK_HDR(sent, rcvd) ((sent &= ~HFI_MSG_RECORD) == (rcvd &= ~HFI_MSG_RECORD)) #define MSG_HDR_SET_SEQNUM(hdr, num) \ (((hdr) & 0xFFFFF) | ((num) << 20)) +#define MSG_HDR_SET_SEQNUM_SIZE(hdr, seqnum, sizedwords) \ + (FIELD_PREP(GENMASK(31, 20), seqnum) | FIELD_PREP(GENMASK(15, 8), sizedwords) | hdr) + #define MSG_HDR_SET_TYPE(hdr, type) \ (((hdr) & 0xFFFFF) | ((type) << 16)) diff --git a/adreno_hwsched.h b/adreno_hwsched.h index 80b8651491..ef0c457359 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -110,6 +110,11 @@ struct adreno_hwsched { struct kmem_cache *hw_fence_cache; /** @hw_fence_count: Number of hardware fences that haven't yet been sent to Tx Queue */ atomic_t hw_fence_count; + /** + * @submission_seqnum: Sequence number for sending submissions to GMU context queues or + * dispatch queues + */ + atomic_t submission_seqnum; }; From a50124c22bd25b46a431b6d8f09bf667f6ed94ae Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Fri, 16 Jun 2023 16:22:07 +0530 Subject: [PATCH 0504/1016] msm: kgsl: Give hint to SMMU for skipping TLB ops during slumber Currently, TLB operation is performed irrespective of the GPU state. This results in unnecessary cx gdsc toggling. Use qcom_skip_tlb_management() API to request smmu driver to skip TLB flush operation during GPU slumber state. Moved kgsl_mmu_flush_tlb() to kgsl_mmu_send_tlb_hint(). Add kernel specific compatibility checks for older skip tlb logic and qcom_skip_tlb_management() API. Change-Id: Ic538e4404e8dddef56274e21eef7cf0e0f65bef6 Signed-off-by: Sanjay Yadav --- adreno_a6xx_gmu.c | 10 +++------- adreno_a6xx_hwsched.c | 7 ------- adreno_a6xx_rgmu.c | 4 ++++ adreno_gen7_gmu.c | 10 +++------- adreno_gen7_hwsched.c | 7 ------- kgsl_iommu.c | 44 ++++++++++++++++++++++++++++++++++++------- kgsl_mmu.h | 11 ++++------- kgsl_pwrctrl.c | 5 ++++- 8 files changed, 55 insertions(+), 43 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 40bbf74b4d..b8d35c663d 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -612,6 +612,7 @@ int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev) dev_err(&gmu->pdev->dev, "Failed to enable GMU CX gdsc, error %d\n", ret); + kgsl_mmu_send_tlb_hint(&device->mmu, false); clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); return ret; } @@ -619,7 +620,9 @@ int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev) void a6xx_gmu_disable_gdsc(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + kgsl_mmu_send_tlb_hint(&device->mmu, true); reinit_completion(&gmu->gdsc_gate); if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) @@ -2513,13 +2516,6 @@ static int a6xx_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; - /* - * TLB operations are skipped during slumber. Incase CX doesn't - * go down, it can result in incorrect translations due to stale - * TLB entries. Flush TLB before boot up to ensure fresh start. - */ - kgsl_mmu_flush_tlb(&device->mmu); - ret = a6xx_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 32e2c7e6eb..f40182170f 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -436,13 +436,6 @@ static int a6xx_hwsched_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; - /* - * TLB operations are skipped during slumber. Incase CX doesn't - * go down, it can result in incorrect translations due to stale - * TLB entries. Flush TLB before boot up to ensure fresh start. - */ - kgsl_mmu_flush_tlb(&device->mmu); - ret = a6xx_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 18358e300d..2f345ee191 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -518,6 +518,8 @@ static int a6xx_rgmu_disable_gdsc(struct adreno_device *adreno_dev) struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + kgsl_mmu_send_tlb_hint(&device->mmu, true); + /* Wait up to 5 seconds for the regulator to go off */ if (kgsl_regulator_disable_wait(rgmu->cx_gdsc, 5000)) return 0; @@ -590,6 +592,7 @@ static int a6xx_rgmu_enable_clks(struct adreno_device *adreno_dev) static int a6xx_rgmu_enable_gdsc(struct adreno_device *adreno_dev) { struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; if (IS_ERR_OR_NULL(rgmu->cx_gdsc)) @@ -600,6 +603,7 @@ static int a6xx_rgmu_enable_gdsc(struct adreno_device *adreno_dev) dev_err(&rgmu->pdev->dev, "Fail to enable CX gdsc:%d\n", ret); + kgsl_mmu_send_tlb_hint(&device->mmu, false); return ret; } diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 86c58d0033..6194621bf9 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -364,6 +364,7 @@ int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev) dev_err(&gmu->pdev->dev, "Failed to enable GMU CX gdsc, error %d\n", ret); + kgsl_mmu_send_tlb_hint(&device->mmu, false); clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); return ret; } @@ -371,7 +372,9 @@ int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev) void gen7_gmu_disable_gdsc(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + kgsl_mmu_send_tlb_hint(&device->mmu, true); reinit_completion(&gmu->gdsc_gate); set_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); regulator_disable(gmu->cx_gdsc); @@ -2040,13 +2043,6 @@ static int gen7_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; - /* - * TLB operations are skipped during slumber. Incase CX doesn't - * go down, it can result in incorrect translations due to stale - * TLB entries. Flush TLB before boot up to ensure fresh start. - */ - kgsl_mmu_flush_tlb(&device->mmu); - ret = gen7_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 5e4ed32dee..492c3f0859 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -597,13 +597,6 @@ static int gen7_hwsched_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; - /* - * TLB operations are skipped during slumber. Incase CX doesn't - * go down, it can result in incorrect translations due to stale - * TLB entries. Flush TLB before boot up to ensure fresh start. - */ - kgsl_mmu_flush_tlb(&device->mmu); - ret = gen7_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 22a55d4d9f..33cd3301f7 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -189,7 +189,6 @@ static void kgsl_iommu_flush_tlb(struct kgsl_mmu *mmu) static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) { - struct kgsl_device *device = KGSL_MMU_DEVICE(pt->base.mmu); struct io_pgtable_ops *ops = pt->pgtbl_ops; while (size) { @@ -200,13 +199,22 @@ static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) size -= PAGE_SIZE; } - /* Skip TLB Operations if GPU is in slumber */ - if (mutex_trylock(&device->mutex)) { - if (device->state == KGSL_STATE_SLUMBER) { + /* + * Skip below logic for 6.1 kernel version and above as + * qcom_skip_tlb_management() API takes care of avoiding + * TLB operations during slumber. + */ + if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) { + struct kgsl_device *device = KGSL_MMU_DEVICE(pt->base.mmu); + + /* Skip TLB Operations if GPU is in slumber */ + if (mutex_trylock(&device->mutex)) { + if (device->state == KGSL_STATE_SLUMBER) { + mutex_unlock(&device->mutex); + return 0; + } mutex_unlock(&device->mutex); - return 0; } - mutex_unlock(&device->mutex); } kgsl_iommu_flush_tlb(pt->base.mmu); @@ -267,6 +275,28 @@ static size_t _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, return mapped; } +static void kgsl_iommu_send_tlb_hint(struct kgsl_mmu *mmu, bool hint) +{ +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + struct kgsl_iommu *iommu = &mmu->iommu; + + /* + * Send hint to SMMU driver for skipping TLB operations during slumber. + * This will help to avoid unnecessary cx gdsc toggling. + */ + qcom_skip_tlb_management(&iommu->user_context.pdev->dev, hint); + if (iommu->lpac_context.domain) + qcom_skip_tlb_management(&iommu->lpac_context.pdev->dev, hint); +#endif + + /* + * TLB operations are skipped during slumber. Incase CX doesn't + * go down, it can result in incorrect translations due to stale + * TLB entries. Flush TLB before boot up to ensure fresh start. + */ + if (!hint) + kgsl_iommu_flush_tlb(mmu); +} static int kgsl_iopgtbl_map_child(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, @@ -2582,7 +2612,7 @@ static const struct kgsl_mmu_ops kgsl_iommu_ops = { .mmu_pagefault_resume = kgsl_iommu_pagefault_resume, .mmu_getpagetable = kgsl_iommu_getpagetable, .mmu_map_global = kgsl_iommu_map_global, - .mmu_flush_tlb = kgsl_iommu_flush_tlb, + .mmu_send_tlb_hint = kgsl_iommu_send_tlb_hint, }; static const struct kgsl_mmu_pt_ops iopgtbl_pt_ops = { diff --git a/kgsl_mmu.h b/kgsl_mmu.h index 8b5e083081..3035cdec41 100644 --- a/kgsl_mmu.h +++ b/kgsl_mmu.h @@ -115,7 +115,7 @@ struct kgsl_mmu_ops { unsigned long name); void (*mmu_map_global)(struct kgsl_mmu *mmu, struct kgsl_memdesc *memdesc, u32 padding); - void (*mmu_flush_tlb)(struct kgsl_mmu *mmu); + void (*mmu_send_tlb_hint)(struct kgsl_mmu *mmu, bool hint); }; struct kgsl_mmu_pt_ops { @@ -359,13 +359,10 @@ kgsl_mmu_pagetable_get_ttbr0(struct kgsl_pagetable *pagetable) return 0; } -static inline void kgsl_mmu_flush_tlb(struct kgsl_mmu *mmu) +static inline void kgsl_mmu_send_tlb_hint(struct kgsl_mmu *mmu, bool hint) { - if (!test_bit(KGSL_MMU_IOPGTABLE, &mmu->features)) - return; - - if (MMU_OP_VALID(mmu, mmu_flush_tlb)) - return mmu->mmu_ops->mmu_flush_tlb(mmu); + if (MMU_OP_VALID(mmu, mmu_send_tlb_hint)) + return mmu->mmu_ops->mmu_send_tlb_hint(mmu, hint); } /** diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index cf7504f625..7a99babbd6 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1376,14 +1376,17 @@ static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) if (!state) { if (test_and_clear_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->power_flags)) { + kgsl_mmu_send_tlb_hint(&device->mmu, true); trace_kgsl_rail(device, state); if (!kgsl_regulator_disable_wait(pwr->gx_gdsc, 200)) dev_err(device->dev, "Regulator vdd is stuck on\n"); if (!kgsl_regulator_disable_wait(pwr->cx_gdsc, 200)) dev_err(device->dev, "Regulator vddcx is stuck on\n"); } - } else + } else { status = enable_regulators(device); + kgsl_mmu_send_tlb_hint(&device->mmu, false); + } return status; } From db6e014625ac56ef11ecc393c4ba721f79fcf9f2 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 8 Aug 2023 22:05:10 +0530 Subject: [PATCH 0505/1016] msm: kgsl: Update power state machine for rgmu Power state machine is different for RGMU and GMU targets. Update the power state machine of RGMU to make it same as GMU targets. Change-Id: I44eba52b6eab90b4686c27d84509ac9ef85def89 Signed-off-by: Kamal Agrawal --- adreno_a6xx_rgmu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 2f345ee191..89d290b968 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -516,7 +516,6 @@ static void a6xx_rgmu_disable_clks(struct adreno_device *adreno_dev) static int a6xx_rgmu_disable_gdsc(struct adreno_device *adreno_dev) { struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); kgsl_mmu_send_tlb_hint(&device->mmu, true); @@ -526,8 +525,6 @@ static int a6xx_rgmu_disable_gdsc(struct adreno_device *adreno_dev) dev_err(&rgmu->pdev->dev, "RGMU CX gdsc off timeout\n"); - kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE); - return -ETIMEDOUT; } @@ -556,6 +553,8 @@ static void a6xx_rgmu_suspend(struct adreno_device *adreno_dev) a6xx_rgmu_disable_clks(adreno_dev); a6xx_rgmu_disable_gdsc(adreno_dev); + + kgsl_pwrctrl_set_state(KGSL_DEVICE(adreno_dev), KGSL_STATE_NONE); } static int a6xx_rgmu_enable_clks(struct adreno_device *adreno_dev) @@ -716,6 +715,8 @@ static void a6xx_rgmu_power_off(struct adreno_device *adreno_dev) a6xx_rgmu_disable_gdsc(adreno_dev); kgsl_pwrctrl_clear_l3_vote(device); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE); } static int a6xx_rgmu_clock_set(struct adreno_device *adreno_dev, From 2864d0656c051b850954bf1f2bf78f3fca4b4e47 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 17 Dec 2022 20:04:52 +0530 Subject: [PATCH 0506/1016] msm: kgsl: Add cx gdsc notifier for rgmu and non-gmu targets Extend cx gdsc notifier support to rgmu and non-gmu targets. With this, KGSL waits for cx collapse notifier event instead of polling for gdsc state. This helps to remove the CPU cycles spent for polling. Also, it addresses the corner case scenario where cx gdsc collapse event can get missed due to sleep operation during polling. Also, remove cx gdsc and gx gdsc members from gmu structure and use members in power control structure. Change-Id: I6199b612a18651dc53a46b666569742a21dda2df Signed-off-by: Kamal Agrawal --- adreno_a6xx.c | 6 ++ adreno_a6xx_gmu.c | 110 ++++------------------------------- adreno_a6xx_gmu.h | 26 --------- adreno_a6xx_hwsched.c | 4 +- adreno_a6xx_rgmu.c | 78 ++++--------------------- adreno_a6xx_rgmu.h | 5 +- adreno_a6xx_snapshot.c | 2 - adreno_gen7.c | 1 + adreno_gen7_gmu.c | 112 ++++------------------------------- adreno_gen7_gmu.h | 32 ---------- adreno_gen7_hwsched.c | 10 ++-- kgsl_gmu_core.h | 1 - kgsl_pwrctrl.c | 129 +++++++++++++++++++++++++++++++++++++---- kgsl_pwrctrl.h | 32 ++++++++++ 14 files changed, 201 insertions(+), 347 deletions(-) diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 248f5364b8..7857bb72a4 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -1888,6 +1888,7 @@ int a6xx_probe_common(struct platform_device *pdev, struct adreno_device *adreno_dev, u32 chipid, const struct adreno_gpu_core *gpucore) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gpudev *gpudev = gpucore->gpudev; int ret; @@ -1896,6 +1897,11 @@ int a6xx_probe_common(struct platform_device *pdev, adreno_reg_offset_init(gpudev->reg_offsets); + if (gmu_core_isenabled(device) && (gpudev != &adreno_a6xx_rgmu_gpudev)) + device->pwrctrl.cx_gdsc_offset = (adreno_is_a662(adreno_dev) || + adreno_is_a621(adreno_dev)) ? A662_GPU_CC_CX_GDSCR : + A6XX_GPU_CC_CX_GDSCR; + adreno_dev->hwcg_enabled = true; adreno_dev->uche_client_pf = 1; diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index b8d35c663d..f139dd046e 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -6,7 +6,6 @@ #include #include -#include #include #include #include @@ -594,45 +593,18 @@ static void gmu_ao_sync_event(struct adreno_device *adreno_dev) local_irq_restore(flags); } -int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); - int ret; - - ret = wait_for_completion_timeout(&gmu->gdsc_gate, msecs_to_jiffies(5000)); - if (!ret) { - dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); - /* Dump the cx regulator consumer list */ - qcom_clk_dump(NULL, gmu->cx_gdsc, false); - } - - ret = regulator_enable(gmu->cx_gdsc); - if (ret) - dev_err(&gmu->pdev->dev, - "Failed to enable GMU CX gdsc, error %d\n", ret); - - kgsl_mmu_send_tlb_hint(&device->mmu, false); - clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - return ret; -} - void a6xx_gmu_disable_gdsc(struct adreno_device *adreno_dev) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - kgsl_mmu_send_tlb_hint(&device->mmu, true); - reinit_completion(&gmu->gdsc_gate); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) - regulator_set_mode(gmu->cx_gdsc, REGULATOR_MODE_IDLE); + regulator_set_mode(pwr->cx_gdsc, REGULATOR_MODE_IDLE); - set_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - regulator_disable(gmu->cx_gdsc); + kgsl_pwrctrl_disable_cx_gdsc(device); if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) - regulator_set_mode(gmu->cx_gdsc, REGULATOR_MODE_NORMAL); + regulator_set_mode(pwr->cx_gdsc, REGULATOR_MODE_NORMAL); } int a6xx_gmu_device_start(struct adreno_device *adreno_dev) @@ -1866,6 +1838,7 @@ static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) int ret = 0; struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; /* If SPTP_RAC is on, turn off SPTP_RAC HS */ a6xx_gmu_sptprac_disable(adreno_dev); @@ -1910,14 +1883,14 @@ static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) * the GX HS. This code path is the only client voting for GX through * the regulator interface. */ - if (gmu->gx_gdsc) { + if (pwr->gx_gdsc) { if (a6xx_gmu_gx_is_on(adreno_dev)) { /* Switch gx gdsc control from GMU to CPU * force non-zero reference count in clk driver * so next disable call will turn * off the GDSC */ - ret = regulator_enable(gmu->gx_gdsc); + ret = regulator_enable(pwr->gx_gdsc); if (ret) dev_err(&gmu->pdev->dev, "suspend fail: gx enable %d\n", ret); @@ -1934,7 +1907,7 @@ static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) ndelay(520); } - ret = regulator_disable(gmu->gx_gdsc); + ret = regulator_disable(pwr->gx_gdsc); if (ret) dev_err(&gmu->pdev->dev, "suspend fail: gx disable %d\n", ret); @@ -2382,7 +2355,7 @@ static void a6xx_gmu_force_first_boot(struct kgsl_device *device) u32 val = 0; if (gmu->pdc_cfg_base) { - a6xx_gmu_enable_gdsc(adreno_dev); + kgsl_pwrctrl_enable_cx_gdsc(device); a6xx_gmu_enable_clks(adreno_dev, 0); val = __raw_readl(gmu->pdc_cfg_base + (PDC_GPU_ENABLE_PDC << 2)); @@ -2412,7 +2385,7 @@ static int a6xx_gmu_first_boot(struct adreno_device *adreno_dev) a6xx_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); - ret = a6xx_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -2508,7 +2481,7 @@ static int a6xx_gmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = a6xx_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -2826,65 +2799,6 @@ static void a6xx_gmu_rdpm_probe(struct a6xx_gmu_device *gmu, res->start, resource_size(res)); } -static int gmu_cx_gdsc_event(struct notifier_block *nb, - unsigned long event, void *data) -{ - struct a6xx_gmu_device *gmu = container_of(nb, struct a6xx_gmu_device, gdsc_nb); - struct adreno_device *adreno_dev = a6xx_gmu_to_adreno(gmu); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - u32 val, offset; - - if (!(event & REGULATOR_EVENT_DISABLE) || - !test_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags)) - return 0; - - offset = (adreno_is_a662(ADRENO_DEVICE(device)) || - adreno_is_a621(ADRENO_DEVICE(device))) ? - A662_GPU_CC_CX_GDSCR : A6XX_GPU_CC_CX_GDSCR; - - if (kgsl_regmap_read_poll_timeout(&device->regmap, offset, val, - !(val & BIT(31)), 100, 100 * 1000)) - dev_err(device->dev, "GPU CX wait timeout.\n"); - - clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - complete_all(&gmu->gdsc_gate); - - return 0; -} - -static int a6xx_gmu_regulators_probe(struct a6xx_gmu_device *gmu, - struct platform_device *pdev) -{ - int ret; - - gmu->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); - if (IS_ERR(gmu->cx_gdsc)) { - if (PTR_ERR(gmu->cx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vddcx gdsc\n"); - return PTR_ERR(gmu->cx_gdsc); - } - - gmu->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); - if (IS_ERR(gmu->gx_gdsc)) { - if (PTR_ERR(gmu->gx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vdd gdsc\n"); - return PTR_ERR(gmu->gx_gdsc); - } - - gmu->gdsc_nb.notifier_call = gmu_cx_gdsc_event; - ret = devm_regulator_register_notifier(gmu->cx_gdsc, &gmu->gdsc_nb); - - if (ret) { - dev_err(&pdev->dev, "Failed to register gmu cx gdsc notifier: %d\n", ret); - return ret; - } - - init_completion(&gmu->gdsc_gate); - complete_all(&gmu->gdsc_gate); - - return 0; -} - void a6xx_gmu_remove(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); @@ -2990,7 +2904,7 @@ int a6xx_gmu_probe(struct kgsl_device *device, a6xx_gmu_rdpm_probe(gmu, device); /* Set up GMU regulators */ - ret = a6xx_gmu_regulators_probe(gmu, pdev); + ret = kgsl_pwrctrl_probe_regulators(device, pdev); if (ret) return ret; diff --git a/adreno_a6xx_gmu.h b/adreno_a6xx_gmu.h index 6060b325c0..ba29ca53c6 100644 --- a/adreno_a6xx_gmu.h +++ b/adreno_a6xx_gmu.h @@ -24,9 +24,6 @@ * @num_bwlevel: number of GPU BW levels * @num_cnocbwlevel: number CNOC BW levels * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling - * @cx_gdsc: CX headswitch that controls power of GMU and - subsystem peripherals - * @gx_gdsc: GX headswitch that controls power of GPU subsystem * @clks: GPU subsystem clocks required for GMU functionality * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different * than default power level @@ -51,9 +48,6 @@ struct a6xx_gmu_device { /** @vrb: GMU virtual register bank memory */ struct kgsl_memdesc *vrb; struct a6xx_hfi hfi; - /** @pwrlevels: Array of GMU power levels */ - struct regulator *cx_gdsc; - struct regulator *gx_gdsc; struct clk_bulk_data *clks; /** @num_clks: Number of entries in the @clks array */ int num_clks; @@ -99,10 +93,6 @@ struct a6xx_gmu_device { u32 perf_ddr_bw; /** @num_oob_perfcntr: Number of active oob_perfcntr requests */ u32 num_oob_perfcntr; - /** @gdsc_nb: Notifier block for cx gdsc regulator */ - struct notifier_block gdsc_nb; - /** @gdsc_gate: Completion to signal cx gdsc collapse status */ - struct completion gdsc_gate; /** @pdc_cfg_base: Base address of PDC cfg registers */ void __iomem *pdc_cfg_base; /** @pdc_seq_base: Base address of PDC seq registers */ @@ -267,14 +257,6 @@ int a6xx_gmu_memory_init(struct adreno_device *adreno_dev); */ void a6xx_gmu_aop_send_acd_state(struct a6xx_gmu_device *gmu, bool flag); -/** - * a6xx_gmu_enable_gdsc - Enable gmu gdsc - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev); - /** * a6xx_gmu_disable_gdsc - Disable gmu gdsc * @adreno_dev: Pointer to the adreno device @@ -445,14 +427,6 @@ void a6xx_gmu_remove(struct kgsl_device *device); */ int a6xx_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level); -/** - * a6xx_gmu_enable_gdsc - Enable gmu gdsc - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev); - /** * a6xx_gmu_handle_watchdog - Handle watchdog interrupt * @adreno_dev: Pointer to the adreno device diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index f40182170f..a341ca6bba 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -345,7 +345,7 @@ static int a6xx_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) a6xx_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); - ret = a6xx_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -428,7 +428,7 @@ static int a6xx_hwsched_gmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = a6xx_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 89d290b968..dc988f34be 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -483,12 +483,14 @@ static void a6xx_rgmu_notify_slumber(struct adreno_device *adreno_dev) static void a6xx_rgmu_disable_clks(struct adreno_device *adreno_dev) { struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; int ret; /* Check GX GDSC is status */ if (a6xx_rgmu_gx_is_on(adreno_dev)) { - if (IS_ERR_OR_NULL(rgmu->gx_gdsc)) + if (IS_ERR_OR_NULL(pwr->gx_gdsc)) return; /* @@ -496,12 +498,12 @@ static void a6xx_rgmu_disable_clks(struct adreno_device *adreno_dev) * reference count in clk driver so next disable call will * turn off the GDSC. */ - ret = regulator_enable(rgmu->gx_gdsc); + ret = regulator_enable(pwr->gx_gdsc); if (ret) dev_err(&rgmu->pdev->dev, "Fail to enable gx gdsc:%d\n", ret); - ret = regulator_disable(rgmu->gx_gdsc); + ret = regulator_disable(pwr->gx_gdsc); if (ret) dev_err(&rgmu->pdev->dev, "Fail to disable gx gdsc:%d\n", ret); @@ -513,21 +515,6 @@ static void a6xx_rgmu_disable_clks(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(rgmu->num_clks, rgmu->clks); } -static int a6xx_rgmu_disable_gdsc(struct adreno_device *adreno_dev) -{ - struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); - - kgsl_mmu_send_tlb_hint(&device->mmu, true); - - /* Wait up to 5 seconds for the regulator to go off */ - if (kgsl_regulator_disable_wait(rgmu->cx_gdsc, 5000)) - return 0; - - dev_err(&rgmu->pdev->dev, "RGMU CX gdsc off timeout\n"); - - return -ETIMEDOUT; -} - void a6xx_rgmu_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -549,10 +536,11 @@ void a6xx_rgmu_snapshot(struct adreno_device *adreno_dev, static void a6xx_rgmu_suspend(struct adreno_device *adreno_dev) { - a6xx_rgmu_irq_disable(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + a6xx_rgmu_irq_disable(adreno_dev); a6xx_rgmu_disable_clks(adreno_dev); - a6xx_rgmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); kgsl_pwrctrl_set_state(KGSL_DEVICE(adreno_dev), KGSL_STATE_NONE); } @@ -588,24 +576,6 @@ static int a6xx_rgmu_enable_clks(struct adreno_device *adreno_dev) return 0; } -static int a6xx_rgmu_enable_gdsc(struct adreno_device *adreno_dev) -{ - struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - int ret; - - if (IS_ERR_OR_NULL(rgmu->cx_gdsc)) - return 0; - - ret = regulator_enable(rgmu->cx_gdsc); - if (ret) - dev_err(&rgmu->pdev->dev, - "Fail to enable CX gdsc:%d\n", ret); - - kgsl_mmu_send_tlb_hint(&device->mmu, false); - return ret; -} - /* * a6xx_rgmu_load_firmware() - Load the ucode into the RGMU TCM * @adreno_dev: Pointer to adreno device @@ -712,7 +682,7 @@ static void a6xx_rgmu_power_off(struct adreno_device *adreno_dev) a6xx_rgmu_irq_disable(adreno_dev); a6xx_rgmu_disable_clks(adreno_dev); - a6xx_rgmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); kgsl_pwrctrl_clear_l3_vote(device); @@ -814,13 +784,13 @@ static int a6xx_rgmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = a6xx_rgmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; ret = a6xx_rgmu_enable_clks(adreno_dev); if (ret) { - a6xx_rgmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); return ret; } @@ -1266,30 +1236,6 @@ static int a6xx_rgmu_irq_probe(struct kgsl_device *device) return 0; } -static int a6xx_rgmu_regulators_probe(struct a6xx_rgmu_device *rgmu) -{ - int ret = 0; - - rgmu->cx_gdsc = devm_regulator_get(&rgmu->pdev->dev, "vddcx"); - if (IS_ERR(rgmu->cx_gdsc)) { - ret = PTR_ERR(rgmu->cx_gdsc); - if (ret != -EPROBE_DEFER) - dev_err(&rgmu->pdev->dev, - "Couldn't get CX gdsc error:%d\n", ret); - return ret; - } - - rgmu->gx_gdsc = devm_regulator_get(&rgmu->pdev->dev, "vdd"); - if (IS_ERR(rgmu->gx_gdsc)) { - ret = PTR_ERR(rgmu->gx_gdsc); - if (ret != -EPROBE_DEFER) - dev_err(&rgmu->pdev->dev, - "Couldn't get GX gdsc error:%d\n", ret); - } - - return ret; -} - static int a6xx_rgmu_clocks_probe(struct a6xx_rgmu_device *rgmu, struct device_node *node) { @@ -1394,7 +1340,7 @@ static int a6xx_rgmu_probe(struct kgsl_device *device, rgmu->pdev = pdev; /* Set up RGMU regulators */ - ret = a6xx_rgmu_regulators_probe(rgmu); + ret = kgsl_pwrctrl_probe_regulators(device, pdev); if (ret) return ret; diff --git a/adreno_a6xx_rgmu.h b/adreno_a6xx_rgmu.h index 7f6f78b149..f34d2af7bf 100644 --- a/adreno_a6xx_rgmu.h +++ b/adreno_a6xx_rgmu.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_A6XX_RGMU_H #define __ADRENO_A6XX_RGMU_H @@ -31,8 +32,6 @@ enum { * @oob_interrupt_num: number of RGMU asserted OOB interrupt * @fw_hostptr: Buffer which holds the RGMU firmware * @fw_size: Size of RGMU firmware buffer - * @cx_gdsc: CX headswitch that controls power of RGMU and - subsystem peripherals * @clks: RGMU clocks including the GPU * @gpu_clk: Pointer to GPU core clock * @rgmu_clk: Pointer to rgmu clock @@ -47,8 +46,6 @@ struct a6xx_rgmu_device { unsigned int oob_interrupt_num; unsigned int *fw_hostptr; uint32_t fw_size; - struct regulator *cx_gdsc; - struct regulator *gx_gdsc; struct clk_bulk_data *clks; /** @num_clks: Number of clocks in @clks */ int num_clks; diff --git a/adreno_a6xx_snapshot.c b/adreno_a6xx_snapshot.c index e7a32e7822..ef1d55f385 100644 --- a/adreno_a6xx_snapshot.c +++ b/adreno_a6xx_snapshot.c @@ -4,8 +4,6 @@ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ -#include - #include "adreno.h" #include "adreno_a6xx.h" #include "adreno_snapshot.h" diff --git a/adreno_gen7.c b/adreno_gen7.c index 21f212a05c..733a59aca6 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1610,6 +1610,7 @@ int gen7_probe_common(struct platform_device *pdev, device->pwrscale.avoid_ddr_stall = true; device->pwrctrl.rt_bus_hint = gen7_core->rt_bus_hint; + device->pwrctrl.cx_gdsc_offset = GEN7_GPU_CC_CX_GDSCR; ret = adreno_device_probe(pdev, adreno_dev); if (ret) diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 6194621bf9..db6e8831e1 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -6,14 +6,12 @@ #include #include -#include #include #include #include #include #include #include -#include #include #include #include @@ -346,40 +344,6 @@ static void gmu_ao_sync_event(struct adreno_device *adreno_dev) local_irq_restore(flags); } -int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - int ret; - - ret = wait_for_completion_timeout(&gmu->gdsc_gate, msecs_to_jiffies(5000)); - if (!ret) { - dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); - /* Dump the cx regulator consumer list */ - qcom_clk_dump(NULL, gmu->cx_gdsc, false); - } - - ret = regulator_enable(gmu->cx_gdsc); - if (ret) - dev_err(&gmu->pdev->dev, - "Failed to enable GMU CX gdsc, error %d\n", ret); - - kgsl_mmu_send_tlb_hint(&device->mmu, false); - clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - return ret; -} - -void gen7_gmu_disable_gdsc(struct adreno_device *adreno_dev) -{ - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - kgsl_mmu_send_tlb_hint(&device->mmu, true); - reinit_completion(&gmu->gdsc_gate); - set_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - regulator_disable(gmu->cx_gdsc); -} - int gen7_gmu_device_start(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -1496,6 +1460,7 @@ static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) int ret = 0; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; /* Disconnect GPU from BUS is not needed if CX GDSC goes off later */ @@ -1533,19 +1498,19 @@ static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) * the GX HS. This code path is the only client voting for GX through * the regulator interface. */ - if (gmu->gx_gdsc) { + if (pwr->gx_gdsc) { if (gen7_gmu_gx_is_on(adreno_dev)) { /* Switch gx gdsc control from GMU to CPU * force non-zero reference count in clk driver * so next disable call will turn * off the GDSC */ - ret = regulator_enable(gmu->gx_gdsc); + ret = regulator_enable(pwr->gx_gdsc); if (ret) dev_err(&gmu->pdev->dev, "suspend fail: gx enable %d\n", ret); - ret = regulator_disable(gmu->gx_gdsc); + ret = regulator_disable(pwr->gx_gdsc); if (ret) dev_err(&gmu->pdev->dev, "suspend fail: gx disable %d\n", ret); @@ -1600,7 +1565,7 @@ void gen7_gmu_suspend(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -1937,7 +1902,7 @@ static int gen7_gmu_first_boot(struct adreno_device *adreno_dev) gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); - ret = gen7_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -2020,7 +1985,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -2035,7 +2000,7 @@ static int gen7_gmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = gen7_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -2085,7 +2050,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -2448,61 +2413,6 @@ static void gen7_gmu_rdpm_probe(struct gen7_gmu_device *gmu, res->start, resource_size(res)); } -static int gmu_cx_gdsc_event(struct notifier_block *nb, - unsigned long event, void *data) -{ - struct gen7_gmu_device *gmu = container_of(nb, struct gen7_gmu_device, gdsc_nb); - struct adreno_device *adreno_dev = gen7_gmu_to_adreno(gmu); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - u32 val; - - if (!(event & REGULATOR_EVENT_DISABLE) || - !test_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags)) - return 0; - - if (kgsl_regmap_read_poll_timeout(&device->regmap, GEN7_GPU_CC_CX_GDSCR, - val, !(val & BIT(31)), 100, 100 * 1000)) - dev_err(device->dev, "GPU CX wait timeout.\n"); - - clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - complete_all(&gmu->gdsc_gate); - - return 0; -} - -static int gen7_gmu_regulators_probe(struct gen7_gmu_device *gmu, - struct platform_device *pdev) -{ - int ret; - - gmu->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); - if (IS_ERR(gmu->cx_gdsc)) { - if (PTR_ERR(gmu->cx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vddcx gdsc\n"); - return PTR_ERR(gmu->cx_gdsc); - } - - gmu->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); - if (IS_ERR(gmu->gx_gdsc)) { - if (PTR_ERR(gmu->gx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vdd gdsc\n"); - return PTR_ERR(gmu->gx_gdsc); - } - - init_completion(&gmu->gdsc_gate); - complete_all(&gmu->gdsc_gate); - - gmu->gdsc_nb.notifier_call = gmu_cx_gdsc_event; - ret = devm_regulator_register_notifier(gmu->cx_gdsc, &gmu->gdsc_nb); - - if (ret) { - dev_err(&pdev->dev, "Failed to register gmu cx gdsc notifier: %d\n", ret); - return ret; - } - - return 0; -} - void gen7_gmu_remove(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); @@ -2615,7 +2525,7 @@ int gen7_gmu_probe(struct kgsl_device *device, gen7_gmu_rdpm_probe(gmu, device); /* Set up GMU regulators */ - ret = gen7_gmu_regulators_probe(gmu, pdev); + ret = kgsl_pwrctrl_probe_regulators(device, pdev); if (ret) return ret; @@ -2781,7 +2691,7 @@ static int gen7_gmu_power_off(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 1487e778df..483b9f3159 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -24,9 +24,6 @@ * @num_bwlevel: number of GPU BW levels * @num_cnocbwlevel: number CNOC BW levels * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling - * @cx_gdsc: CX headswitch that controls power of GMU and - * subsystem peripherals - * @gx_gdsc: GX headswitch that controls power of GPU subsystem * @clks: GPU subsystem clocks required for GMU functionality * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different * than default power level @@ -55,9 +52,6 @@ struct gen7_gmu_device { /** @vrb: GMU virtual register bank memory */ struct kgsl_memdesc *vrb; struct gen7_hfi hfi; - /** @pwrlevels: Array of GMU power levels */ - struct regulator *cx_gdsc; - struct regulator *gx_gdsc; struct clk_bulk_data *clks; /** @num_clks: Number of entries in the @clks array */ int num_clks; @@ -104,10 +98,6 @@ struct gen7_gmu_device { u32 num_oob_perfcntr; /** @acd_debug_val: DVM value to calibrate ACD for a level */ u32 acd_debug_val; - /** @gdsc_nb: Notifier block for cx gdsc regulator */ - struct notifier_block gdsc_nb; - /** @gdsc_gate: Completion to signal cx gdsc collapse status */ - struct completion gdsc_gate; /** @stats_enable: GMU stats feature enable */ bool stats_enable; /** @stats_mask: GMU performance countables to enable */ @@ -300,14 +290,6 @@ int gen7_gmu_memory_init(struct adreno_device *adreno_dev); */ void gen7_gmu_aop_send_acd_state(struct gen7_gmu_device *gmu, bool flag); -/** - * gen7_gmu_enable_clocks - Enable gmu clocks - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev); - /** * gen7_gmu_load_fw - Load gmu firmware * @adreno_dev: Pointer to the adreno device @@ -474,20 +456,6 @@ void gen7_gmu_remove(struct kgsl_device *device); */ int gen7_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level); -/** - * gen7_gmu_enable_gdsc - Enable gmu gdsc - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev); - -/** - * gen7_gmu_disable_gdsc - Disable gmu gdsc - * @adreno_dev: Pointer to the adreno device - */ -void gen7_gmu_disable_gdsc(struct adreno_device *adreno_dev); - /** * gen7_gmu_handle_watchdog - Handle watchdog interrupt * @adreno_dev: Pointer to the adreno device diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 492c3f0859..fec3079ae6 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -484,7 +484,7 @@ static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); - ret = gen7_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -574,7 +574,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -589,7 +589,7 @@ static int gen7_hwsched_gmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = gen7_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -641,7 +641,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -729,7 +729,7 @@ static int gen7_hwsched_gmu_power_off(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 98a40d0d81..197cf353c1 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -236,7 +236,6 @@ enum { GMU_PRIV_RSCC_SLEEP_DONE, GMU_PRIV_PM_SUSPEND, GMU_PRIV_PDC_RSC_LOADED, - GMU_PRIV_CX_GDSC_WAIT, /* Indicates if GMU INIT HFI messages are recorded successfully */ GMU_PRIV_WARMBOOT_GMU_INIT_DONE, /* Indicates if GPU BOOT HFI messages are recorded successfully */ diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 7a99babbd6..363e622ba9 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -4,7 +4,9 @@ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ +#include #include +#include #include #include #include @@ -1316,9 +1318,34 @@ int kgsl_pwrctrl_axi(struct kgsl_device *device, bool state) return 0; } -static int enable_regulator(struct device *dev, struct regulator *regulator, - const char *name) +int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device) { + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct regulator *regulator = pwr->cx_gdsc; + int ret; + + if (IS_ERR_OR_NULL(regulator)) + return 0; + + ret = wait_for_completion_timeout(&pwr->cx_gdsc_gate, msecs_to_jiffies(5000)); + if (!ret) { + dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); + /* Dump the cx regulator consumer list */ + qcom_clk_dump(NULL, regulator, false); + } + + ret = regulator_enable(regulator); + if (ret) + dev_err(device->dev, "Failed to enable CX regulator: %d\n", ret); + + kgsl_mmu_send_tlb_hint(&device->mmu, false); + pwr->cx_gdsc_wait = false; + return ret; +} + +static int kgsl_pwtctrl_enable_gx_gdsc(struct kgsl_device *device) +{ + struct regulator *regulator = device->pwrctrl.gx_gdsc; int ret; if (IS_ERR_OR_NULL(regulator)) @@ -1326,10 +1353,34 @@ static int enable_regulator(struct device *dev, struct regulator *regulator, ret = regulator_enable(regulator); if (ret) - dev_err(dev, "Unable to enable regulator %s: %d\n", name, ret); + dev_err(device->dev, "Failed to enable GX regulator: %d\n", ret); return ret; } +void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device) +{ + struct regulator *regulator = device->pwrctrl.cx_gdsc; + + if (IS_ERR_OR_NULL(regulator)) + return; + + kgsl_mmu_send_tlb_hint(&device->mmu, true); + reinit_completion(&device->pwrctrl.cx_gdsc_gate); + device->pwrctrl.cx_gdsc_wait = true; + regulator_disable(regulator); +} + +static void kgsl_pwrctrl_disable_gx_gdsc(struct kgsl_device *device) +{ + struct regulator *regulator = device->pwrctrl.gx_gdsc; + + if (IS_ERR_OR_NULL(regulator)) + return; + + if (!kgsl_regulator_disable_wait(regulator, 200)) + dev_err(device->dev, "Regulator vdd is stuck on\n"); +} + static int enable_regulators(struct kgsl_device *device) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; @@ -1338,15 +1389,14 @@ static int enable_regulators(struct kgsl_device *device) if (test_and_set_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->power_flags)) return 0; - ret = enable_regulator(&device->pdev->dev, pwr->cx_gdsc, "vddcx"); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (!ret) { /* Set parent in retention voltage to power up vdd supply */ ret = kgsl_regulator_set_voltage(device->dev, pwr->gx_gdsc_parent, pwr->gx_gdsc_parent_min_corner); if (!ret) - ret = enable_regulator(&device->pdev->dev, - pwr->gx_gdsc, "vdd"); + ret = kgsl_pwtctrl_enable_gx_gdsc(device); } if (ret) { @@ -1358,6 +1408,58 @@ static int enable_regulators(struct kgsl_device *device) return 0; } +int kgsl_pwrctrl_probe_regulators(struct kgsl_device *device, + struct platform_device *pdev) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + pwr->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); + if (IS_ERR(pwr->cx_gdsc)) { + if (PTR_ERR(pwr->cx_gdsc) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Couldn't get the vddcx gdsc\n"); + return PTR_ERR(pwr->cx_gdsc); + } + + pwr->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); + if (IS_ERR(pwr->gx_gdsc)) { + if (PTR_ERR(pwr->gx_gdsc) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Couldn't get the vdd gdsc\n"); + return PTR_ERR(pwr->gx_gdsc); + } + + return 0; +} + +static int kgsl_cx_gdsc_event(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct kgsl_pwrctrl *pwr = container_of(nb, struct kgsl_pwrctrl, cx_gdsc_nb); + struct kgsl_device *device = container_of(pwr, struct kgsl_device, pwrctrl); + u32 val; + + if (!(event & REGULATOR_EVENT_DISABLE) || !pwr->cx_gdsc_wait) + return 0; + + if (pwr->cx_gdsc_offset) { + if (kgsl_regmap_read_poll_timeout(&device->regmap, pwr->cx_gdsc_offset, + val, !(val & BIT(31)), 100, 100 * 1000)) + dev_err(device->dev, "GPU CX wait timeout.\n"); + } + + pwr->cx_gdsc_wait = false; + complete_all(&pwr->cx_gdsc_gate); + + return 0; +} + +int kgsl_register_gdsc_notifier(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + pwr->cx_gdsc_nb.notifier_call = kgsl_cx_gdsc_event; + return devm_regulator_register_notifier(pwr->cx_gdsc, &pwr->cx_gdsc_nb); +} + static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; @@ -1378,10 +1480,8 @@ static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) &pwr->power_flags)) { kgsl_mmu_send_tlb_hint(&device->mmu, true); trace_kgsl_rail(device, state); - if (!kgsl_regulator_disable_wait(pwr->gx_gdsc, 200)) - dev_err(device->dev, "Regulator vdd is stuck on\n"); - if (!kgsl_regulator_disable_wait(pwr->cx_gdsc, 200)) - dev_err(device->dev, "Regulator vddcx is stuck on\n"); + kgsl_pwrctrl_disable_gx_gdsc(device); + kgsl_pwrctrl_disable_cx_gdsc(device); } } else { status = enable_regulators(device); @@ -1618,6 +1718,15 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) } } + init_completion(&pwr->cx_gdsc_gate); + complete_all(&pwr->cx_gdsc_gate); + + result = kgsl_register_gdsc_notifier(device); + if (result) { + dev_err(&pdev->dev, "Failed to register gdsc notifier: %d\n", result); + return result; + } + pwr->power_flags = 0; pm_runtime_enable(&pdev->dev); diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index ee0da22d61..39e99f85b6 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -117,6 +117,14 @@ struct kgsl_pwrctrl { struct regulator *gx_gdsc_parent; /** @gx_gdsc_parent_min_corner: Minimum supply voltage for GX parent */ u32 gx_gdsc_parent_min_corner; + /** @cx_gdsc_nb: Notifier block for cx gdsc regulator */ + struct notifier_block cx_gdsc_nb; + /** @cx_gdsc_gate: Completion to signal cx gdsc collapse status */ + struct completion cx_gdsc_gate; + /** @cx_gdsc_wait: Whether to wait for cx gdsc to turn off */ + bool cx_gdsc_wait; + /** @cx_gdsc_offset: Offset of CX GDSC register */ + u32 cx_gdsc_offset; int isense_clk_indx; int isense_clk_on_level; unsigned long power_flags; @@ -278,4 +286,28 @@ void kgsl_pwrctrl_irq(struct kgsl_device *device, bool state); * Clear the l3 vote when going into slumber */ void kgsl_pwrctrl_clear_l3_vote(struct kgsl_device *device); + +/** + * kgsl_pwrctrl_enable_cx_gdsc - Enable cx gdsc + * @device: Pointer to the kgsl device + * + * Return: 0 on success or negative error on failure + */ +int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device); + +/** + * kgsl_pwrctrl_disable_cx_gdsc - Disable cx gdsc + * @device: Pointer to the kgsl device + */ +void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device); + +/** + * kgsl_pwrctrl_probe_regulators - Probe regulators + * @device: Pointer to the kgsl device + * @pdev: Pointer to the platform device + * + * Return: 0 on success or negative error on failure + */ +int kgsl_pwrctrl_probe_regulators(struct kgsl_device *device, + struct platform_device *pdev); #endif /* __KGSL_PWRCTRL_H */ From 84659689435e6091427d3df3c5941f0b032563d5 Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Wed, 10 May 2023 16:40:53 -0600 Subject: [PATCH 0507/1016] kgsl: gen7: Allow using up to 32 DCVS points Add support to allow newer gen7 devices the ability to use more than 16 DCVS points. Using more than 16 levels is dependent on the GMU FW also being able to support the new hfi_table_cmd packet which allows dynamic table support rather than hardcoded support (as was done with hfi_dcvs_table_cmd packet). This patch will detect if the GMU supports the new packet and send the correct HFI message accordingly. Change-Id: Ie1df5ab069c49265a61485ee00fb7958ac6eeba7 Signed-off-by: Carter Cooper --- adreno_gen7_gmu.c | 6 +-- adreno_gen7_gmu.h | 9 +++++ adreno_gen7_hfi.c | 85 ++++++++++++++++++++++++++++++++++++++- adreno_gen7_hfi.h | 10 ++++- adreno_gen7_hwsched.c | 8 ++-- adreno_gen7_hwsched_hfi.c | 3 +- adreno_gen7_rpmh.c | 35 +++++++--------- adreno_hfi.h | 34 ++++++++++++++-- kgsl_gmu_core.h | 9 ++++- kgsl_pwrctrl.h | 2 +- msm_adreno_devfreq.h | 4 +- 11 files changed, 163 insertions(+), 42 deletions(-) diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 86c58d0033..b168f457c3 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1564,7 +1564,7 @@ static int gen7_gmu_notify_slumber(struct adreno_device *adreno_dev) struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); int bus_level = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq; - int perf_idx = gmu->hfi.dcvs_table.gpu_level_num - + int perf_idx = gmu->dcvs_table.gpu_level_num - pwr->default_pwrlevel - 1; struct hfi_prep_slumber_cmd req = { .freq = perf_idx, @@ -1612,7 +1612,7 @@ static int gen7_gmu_dcvs_set(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct gen7_dcvs_table *table = &gmu->dcvs_table; struct hfi_gx_bw_perf_vote_cmd req = { .ack_type = DCVS_ACK_BLOCK, .freq = INVALID_DCVS_IDX, @@ -1661,7 +1661,7 @@ static int gen7_gmu_dcvs_set(struct adreno_device *adreno_dev, if (req.freq != INVALID_DCVS_IDX) gen7_rdpm_mx_freq_update(gmu, - gmu->hfi.dcvs_table.gx_votes[req.freq].freq); + gmu->dcvs_table.gx_votes[req.freq].freq); return ret; } diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 1487e778df..51f0bc4674 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -11,6 +11,13 @@ #include "adreno_gen7_hfi.h" #include "kgsl_gmu_core.h" +struct gen7_dcvs_table { + u32 gpu_level_num; + u32 gmu_level_num; + struct opp_gx_desc gx_votes[MAX_GX_LEVELS]; + struct opp_desc cx_votes[MAX_CX_LEVELS]; +}; + /** * struct gen7_gmu_device - GMU device structure * @ver: GMU Version information @@ -120,6 +127,8 @@ struct gen7_gmu_device { u32 cp_init_hdr; /** @switch_to_unsec_hdr: raw command header for switch to unsecure packet */ u32 switch_to_unsec_hdr; + /** @dcvs_table: Table for gpu dcvs levels */ + struct gen7_dcvs_table dcvs_table; }; /* Helper function to get to gen7 gmu device from adreno device */ diff --git a/adreno_gen7_hfi.c b/adreno_gen7_hfi.c index f1a799dade..7710da6624 100644 --- a/adreno_gen7_hfi.c +++ b/adreno_gen7_hfi.c @@ -9,6 +9,7 @@ #include "adreno.h" #include "adreno_gen7.h" +#include "adreno_gen7_gmu.h" #include "adreno_gen7_hfi.h" #include "kgsl_device.h" #include "kgsl_trace.h" @@ -659,6 +660,87 @@ static void reset_hfi_queues(struct adreno_device *adreno_dev) } } +/* Fill the entry and return the dword count written */ +static u32 _fill_table_entry(struct hfi_table_entry *entry, u32 count, + u32 stride_bytes, u32 *data) +{ + entry->count = count; + entry->stride = stride_bytes >> 2; /* entry->stride is in dwords */ + memcpy(entry->data, data, stride_bytes * count); + + /* Return total dword count of entry + data */ + return (sizeof(*entry) >> 2) + (entry->count * entry->stride); +} + +int gen7_hfi_send_gpu_perf_table(struct adreno_device *adreno_dev) +{ + /* + * Buffer to store either hfi_table_cmd or hfi_dcvstable_cmd. + * Current max size for either is 165 dwords. + */ + static u32 cmd_buf[200]; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_dcvs_table *tbl = &gmu->dcvs_table; + int ret = 0; + + /* Starting with GMU HFI Version 2.6.1, use H2F_MSG_TABLE */ + if (gmu->ver.hfi >= HFI_VERSION(2, 6, 1)) { + struct hfi_table_cmd *cmd = (struct hfi_table_cmd *)&cmd_buf[0]; + u32 dword_off; + + /* Already setup, so just send cmd */ + if (cmd->hdr) + return gen7_hfi_send_generic_req(adreno_dev, cmd, + MSG_HDR_GET_SIZE(cmd->hdr) << 2); + + if (tbl->gpu_level_num > MAX_GX_LEVELS || tbl->gmu_level_num > MAX_CX_LEVELS) + return -EINVAL; + + /* CMD starts with struct hfi_table_cmd data */ + cmd->type = HFI_TABLE_GPU_PERF; + dword_off = sizeof(*cmd) >> 2; + + /* Fill in the table entry and data starting at dword_off */ + dword_off += _fill_table_entry((struct hfi_table_entry *)&cmd_buf[dword_off], + tbl->gpu_level_num, sizeof(struct opp_gx_desc), + (u32 *)tbl->gx_votes); + + /* Fill in the table entry and data starting at dword_off */ + dword_off += _fill_table_entry((struct hfi_table_entry *)&cmd_buf[dword_off], + tbl->gmu_level_num, sizeof(struct opp_desc), + (u32 *)tbl->cx_votes); + + cmd->hdr = CREATE_MSG_HDR(H2F_MSG_TABLE, HFI_MSG_CMD); + cmd->hdr = MSG_HDR_SET_SIZE(cmd->hdr, dword_off); + + ret = gen7_hfi_send_generic_req(adreno_dev, cmd, dword_off << 2); + } else { + struct hfi_dcvstable_cmd *cmd = (struct hfi_dcvstable_cmd *)&cmd_buf[0]; + + /* Already setup, so just send cmd */ + if (cmd->hdr) + return gen7_hfi_send_generic_req(adreno_dev, cmd, sizeof(*cmd)); + + if (tbl->gpu_level_num > MAX_GX_LEVELS_LEGACY || tbl->gmu_level_num > MAX_CX_LEVELS) + return -EINVAL; + + ret = CMD_MSG_HDR(*cmd, H2F_MSG_PERF_TBL); + if (ret) + return ret; + + cmd->gpu_level_num = tbl->gpu_level_num; + cmd->gmu_level_num = tbl->gmu_level_num; + memcpy(&cmd->gx_votes, tbl->gx_votes, + sizeof(struct opp_gx_desc) * cmd->gpu_level_num); + memcpy(&cmd->cx_votes, tbl->cx_votes, + sizeof(struct opp_desc) * cmd->gmu_level_num); + + ret = gen7_hfi_send_generic_req(adreno_dev, cmd, sizeof(*cmd)); + } + + return ret; +} + int gen7_hfi_start(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); @@ -667,8 +749,7 @@ int gen7_hfi_start(struct adreno_device *adreno_dev) reset_hfi_queues(adreno_dev); - result = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.dcvs_table, - sizeof(gmu->hfi.dcvs_table)); + result = gen7_hfi_send_gpu_perf_table(adreno_dev); if (result) goto err; diff --git a/adreno_gen7_hfi.h b/adreno_gen7_hfi.h index c7274ae912..086aa29850 100644 --- a/adreno_gen7_hfi.h +++ b/adreno_gen7_hfi.h @@ -24,8 +24,6 @@ struct gen7_hfi { struct hfi_bwtable_cmd bw_table; /** @acd_table: HFI table for ACD data */ struct hfi_acd_table_cmd acd_table; - /** @dcvs_table: HFI table for gpu dcvs levels */ - struct hfi_dcvstable_cmd dcvs_table; /** @cmdq_lock: Spinlock for accessing the cmdq */ spinlock_t cmdq_lock; /** @@ -201,6 +199,14 @@ int gen7_hfi_send_clx_feature_ctrl(struct adreno_device *adreno_dev); */ int gen7_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev); +/** + * gen7_hfi_send_gpu_perf_table - Send the gpu perf table hfi packet + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_gpu_perf_table(struct adreno_device *adreno_dev); + /* * gen7_hfi_process_queue - Check hfi queue for messages from gmu * @gmu: Pointer to the gen7 gmu device diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 5e4ed32dee..df5a8223f6 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -690,8 +690,7 @@ static int gen7_hwsched_notify_slumber(struct adreno_device *adreno_dev) if (ret) return ret; - req.freq = gmu->hfi.dcvs_table.gpu_level_num - - pwr->default_pwrlevel - 1; + req.freq = gmu->dcvs_table.gpu_level_num - pwr->default_pwrlevel - 1; req.bw = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq; req.bw |= gen7_bus_ab_quantize(adreno_dev, 0); @@ -1328,7 +1327,7 @@ static int gen7_hwsched_dcvs_set(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct gen7_dcvs_table *table = &gmu->dcvs_table; struct hfi_gx_bw_perf_vote_cmd req = { .ack_type = DCVS_ACK_BLOCK, .freq = INVALID_DCVS_IDX, @@ -1379,8 +1378,7 @@ static int gen7_hwsched_dcvs_set(struct adreno_device *adreno_dev, } if (req.freq != INVALID_DCVS_IDX) - gen7_rdpm_mx_freq_update(gmu, - gmu->hfi.dcvs_table.gx_votes[req.freq].freq); + gen7_rdpm_mx_freq_update(gmu, gmu->dcvs_table.gx_votes[req.freq].freq); return ret; } diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index b4c4fb0715..25661c8b86 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -2372,8 +2372,7 @@ int gen7_hwsched_hfi_start(struct adreno_device *adreno_dev) if (ret) goto err; - ret = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.dcvs_table, - sizeof(gmu->hfi.dcvs_table)); + ret = gen7_hfi_send_gpu_perf_table(adreno_dev); if (ret) goto err; diff --git a/adreno_gen7_rpmh.c b/adreno_gen7_rpmh.c index 3590def0d6..62d46b703c 100644 --- a/adreno_gen7_rpmh.c +++ b/adreno_gen7_rpmh.c @@ -255,23 +255,22 @@ static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms, /* * setup_gmu_arc_votes - Build the gmu voting table - * @hfi: Pointer to hfi device + * @gmu: Pointer to gmu device * @pri_rail: Pointer to primary power rail vlvl table * @sec_rail: Pointer to second/dependent power rail vlvl table - * @freqs: List of GMU frequencies - * @vlvls: List of GMU voltage levels * * This function initializes the cx votes for all gmu frequencies * for gmu dcvs */ -static int setup_cx_arc_votes(struct gen7_hfi *hfi, - struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, - u32 *freqs, u32 *vlvls) +static int setup_cx_arc_votes(struct gen7_gmu_device *gmu, + struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail) { /* Hardcoded values of GMU CX voltage levels */ u16 gmu_cx_vlvl[MAX_CX_LEVELS]; u32 cx_votes[MAX_CX_LEVELS]; - struct hfi_dcvstable_cmd *table = &hfi->dcvs_table; + struct gen7_dcvs_table *table = &gmu->dcvs_table; + u32 *freqs = gmu->freqs; + u32 *vlvls = gmu->vlvls; int ret, i; gmu_cx_vlvl[0] = 0; @@ -333,21 +332,21 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; - struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct gen7_dcvs_table *table = &gmu->dcvs_table; u32 index; u16 vlvl_tbl[MAX_GX_LEVELS]; u32 gx_votes[MAX_GX_LEVELS]; int ret, i; - /* Add the zero powerlevel for the perf table */ - table->gpu_level_num = device->pwrctrl.num_pwrlevels + 1; - - if (table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) { - dev_err(&gmu->pdev->dev, + if (pwr->num_pwrlevels + 1 > ARRAY_SIZE(vlvl_tbl)) { + dev_err(device->dev, "Defined more GPU DCVS levels than RPMh can support\n"); return -ERANGE; } + /* Add the zero powerlevel for the perf table */ + table->gpu_level_num = pwr->num_pwrlevels + 1; + memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); table->gx_votes[0].freq = 0; @@ -366,7 +365,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, ret = to_cx_hlvl(cx_rail, cx_vlvl, &table->gx_votes[index].cx_vote); if (ret) { - dev_err(&gmu->pdev->dev, "Unsupported cx corner: %u\n", + dev_err(device->dev, "Unsupported cx corner: %u\n", cx_vlvl); return ret; } @@ -386,14 +385,9 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, static int build_dcvs_table(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - struct gen7_hfi *hfi = &gmu->hfi; struct rpmh_arc_vals gx_arc, cx_arc, mx_arc; int ret; - ret = CMD_MSG_HDR(hfi->dcvs_table, H2F_MSG_PERF_TBL); - if (ret) - return ret; - ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl"); if (ret) return ret; @@ -406,8 +400,7 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) if (ret) return ret; - ret = setup_cx_arc_votes(hfi, &cx_arc, &mx_arc, - gmu->freqs, gmu->vlvls); + ret = setup_cx_arc_votes(gmu, &cx_arc, &mx_arc); if (ret) return ret; diff --git a/adreno_hfi.h b/adreno_hfi.h index f3a2ad011d..035da6f98f 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -79,6 +79,19 @@ #define HFI_FEATURE_DMS 27 #define HFI_FEATURE_AQE 29 +/* Types to be used with H2F_MSG_TABLE */ +enum hfi_table_type { + HFI_TABLE_BW_VOTE = 0, + HFI_TABLE_GPU_PERF = 1, + HFI_TABLE_DIDT = 2, + HFI_TABLE_ACD = 3, + HFI_TABLE_CLX_V1 = 4, + HFI_TABLE_CLX_V2 = 5, + HFI_TABLE_THERM = 6, + HFI_TABLE_DCVS_DATA = 7, + HFI_TABLE_MAX, +}; + /* A6xx uses a different value for KPROF */ #define HFI_FEATURE_A6XX_KPROF 14 @@ -445,6 +458,7 @@ enum hfi_msg_type { H2F_MSG_GET_VALUE = 12, H2F_MSG_SET_VALUE = 13, H2F_MSG_CORE_FW_START = 14, + H2F_MSG_TABLE = 15, F2H_MSG_MEM_ALLOC = 20, H2F_MSG_GX_BW_PERF_VOTE = 30, H2F_MSG_FW_HALT = 32, @@ -512,7 +526,7 @@ struct hfi_bwtable_cmd { u32 cnoc_cmd_addrs[MAX_CNOC_CMDS]; u32 cnoc_cmd_data[MAX_CNOC_LEVELS][MAX_CNOC_CMDS]; u32 ddr_cmd_addrs[MAX_BW_CMDS]; - u32 ddr_cmd_data[MAX_GX_LEVELS][MAX_BW_CMDS]; + u32 ddr_cmd_data[MAX_BW_LEVELS][MAX_BW_CMDS]; } __packed; struct opp_gx_desc { @@ -532,7 +546,7 @@ struct hfi_dcvstable_v1_cmd { u32 hdr; u32 gpu_level_num; u32 gmu_level_num; - struct opp_desc gx_votes[MAX_GX_LEVELS]; + struct opp_desc gx_votes[MAX_GX_LEVELS_LEGACY]; struct opp_desc cx_votes[MAX_CX_LEVELS]; } __packed; @@ -541,10 +555,24 @@ struct hfi_dcvstable_cmd { u32 hdr; u32 gpu_level_num; u32 gmu_level_num; - struct opp_gx_desc gx_votes[MAX_GX_LEVELS]; + struct opp_gx_desc gx_votes[MAX_GX_LEVELS_LEGACY]; struct opp_desc cx_votes[MAX_CX_LEVELS]; } __packed; +/* H2F */ +struct hfi_table_entry { + u32 count; + u32 stride; + u32 data[]; +} __packed; + +struct hfi_table_cmd { + u32 hdr; + u32 version; + u32 type; + struct hfi_table_entry entry[]; +} __packed; + #define MAX_ACD_STRIDE 2 #define MAX_ACD_NUM_LEVELS KGSL_MAX_PWRLEVELS diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 98a40d0d81..bb7805f537 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -12,8 +12,10 @@ /* GMU_DEVICE - Given an KGSL device return the GMU specific struct */ #define GMU_DEVICE_OPS(_a) ((_a)->gmu_core.dev_ops) -#define MAX_GX_LEVELS 16 +#define MAX_GX_LEVELS 32 +#define MAX_GX_LEVELS_LEGACY 16 #define MAX_CX_LEVELS 4 +#define MAX_BW_LEVELS 16 #define MAX_CNOC_LEVELS 2 #define MAX_CNOC_CMDS 6 #define MAX_BW_CMDS 8 @@ -99,6 +101,11 @@ enum gmu_pwrctrl_mode { #define GMU_FREQ_MIN 200000000 #define GMU_FREQ_MAX 500000000 +#define HFI_VERSION(major, minor, step) \ + (FIELD_PREP(GENMASK(31, 28), major) | \ + FIELD_PREP(GENMASK(27, 16), minor) | \ + FIELD_PREP(GENMASK(15, 0), step)) + #define GMU_VER_MAJOR(ver) (((ver) >> 28) & 0xF) #define GMU_VER_MINOR(ver) (((ver) >> 16) & 0xFFF) #define GMU_VER_STEP(ver) ((ver) & 0xFFFF) diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index ee0da22d61..15e872c795 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -14,7 +14,7 @@ ****************************************************************************/ #define KGSL_MAX_CLKS 18 -#define KGSL_MAX_PWRLEVELS 16 +#define KGSL_MAX_PWRLEVELS 32 #define KGSL_PWRFLAGS_POWER_ON 0 #define KGSL_PWRFLAGS_CLK_ON 1 diff --git a/msm_adreno_devfreq.h b/msm_adreno_devfreq.h index 40d1f9790e..9944fff384 100644 --- a/msm_adreno_devfreq.h +++ b/msm_adreno_devfreq.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef MSM_ADRENO_DEVFREQ_H @@ -18,7 +18,7 @@ struct device; /* same as KGSL_MAX_PWRLEVELS */ -#define MSM_ADRENO_MAX_PWRLEVELS 16 +#define MSM_ADRENO_MAX_PWRLEVELS 32 struct xstats { u64 ram_time; From fc47593ee72e8a43ffde338dd9a1eee0df31a3ca Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 30 Aug 2023 21:18:01 +0530 Subject: [PATCH 0508/1016] msm: kgsl: Use local variable instead of pointer Instead of accessing the structure member via pointer multiple times, use local variable if they are available. Change-Id: I46e7296d07ed6acc6fd65247f0981b2c1c449d1a Signed-off-by: Kamal Agrawal --- adreno_drawctxt.c | 2 +- kgsl.c | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/adreno_drawctxt.c b/adreno_drawctxt.c index 797e732475..fb1dd828a9 100644 --- a/adreno_drawctxt.c +++ b/adreno_drawctxt.c @@ -358,7 +358,7 @@ adreno_drawctxt_create(struct kgsl_device_private *dev_priv, } /* Make sure that our target can support secure contexts if requested */ - if (!kgsl_mmu_is_secured(&dev_priv->device->mmu) && + if (!kgsl_mmu_is_secured(&device->mmu) && (local & KGSL_CONTEXT_SECURE)) { dev_err_once(device->dev, "Secure context not supported\n"); return ERR_PTR(-EOPNOTSUPP); diff --git a/kgsl.c b/kgsl.c index f84f3e7ae6..43b6d222c2 100644 --- a/kgsl.c +++ b/kgsl.c @@ -655,7 +655,7 @@ int kgsl_context_init(struct kgsl_device_private *dev_priv, { struct kgsl_device *device = dev_priv->device; int ret = 0, id; - struct kgsl_process_private *proc_priv = dev_priv->process_priv; + struct kgsl_process_private *proc_priv = dev_priv->process_priv; /* * Read and increment the context count under lock to make sure @@ -665,7 +665,7 @@ int kgsl_context_init(struct kgsl_device_private *dev_priv, if (atomic_read(&proc_priv->ctxt_count) > KGSL_MAX_CONTEXTS_PER_PROC) { dev_err(device->dev, "Per process context limit reached for pid %u\n", - pid_nr(dev_priv->process_priv->pid)); + pid_nr(proc_priv->pid)); spin_unlock(&proc_priv->ctxt_count_lock); kgsl_context_debug_info(device); return -ENOSPC; @@ -707,18 +707,19 @@ int kgsl_context_init(struct kgsl_device_private *dev_priv, * the context is destroyed. This will also prevent the pagetable * from being destroyed */ - if (!kgsl_process_private_get(dev_priv->process_priv)) { + if (!kgsl_process_private_get(proc_priv)) { ret = -EBADF; goto out; } - context->device = dev_priv->device; + + context->device = device; context->dev_priv = dev_priv; - context->proc_priv = dev_priv->process_priv; + context->proc_priv = proc_priv; context->tid = task_pid_nr(current); ret = kgsl_sync_timeline_create(context); if (ret) { - kgsl_process_private_put(dev_priv->process_priv); + kgsl_process_private_put(proc_priv); goto out; } @@ -729,7 +730,7 @@ out: if (ret) { atomic_dec(&proc_priv->ctxt_count); write_lock(&device->context_lock); - idr_remove(&dev_priv->device->context_idr, id); + idr_remove(&device->context_idr, id); write_unlock(&device->context_lock); } @@ -2059,7 +2060,7 @@ long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, result = kgsl_reclaim_to_pinned_state(dev_priv->process_priv); if (result == 0) - result = dev_priv->device->ftbl->queue_cmds(dev_priv, context, + result = device->ftbl->queue_cmds(dev_priv, context, &drawobj, 1, ¶m->timestamp); /* @@ -2576,7 +2577,7 @@ long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv, result = PTR_ERR(context); goto done; } - trace_kgsl_context_create(dev_priv->device, context, param->flags); + trace_kgsl_context_create(device, context, param->flags); /* Commit the pointer to the context in context_idr */ write_lock(&device->context_lock); From 0b331c044ab4eebeafac9ceb329493453b220dc8 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 2 Feb 2023 22:36:24 +0530 Subject: [PATCH 0509/1016] msm: kgsl: Use QCOM io-pagetables Use the optimized QCOM io-pagetables to make maps/unmaps faster. Change-Id: I29c018083f9fb4ce40f4d52f60ed9c83c742e2c7 Signed-off-by: Harshdeep Dhatt Signed-off-by: Kamal Agrawal Signed-off-by: Hareesh Gundu --- kgsl_iommu.c | 173 ++++++++++++++++++++++++++++++++++++++++----------- kgsl_iommu.h | 8 ++- 2 files changed, 140 insertions(+), 41 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 33cd3301f7..1dec807501 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -176,6 +176,92 @@ static struct page *iommu_get_guard_page(struct kgsl_memdesc *memdesc) return kgsl_guard_page; } +static size_t iommu_pgsize(unsigned long pgsize_bitmap, unsigned long iova, + phys_addr_t paddr, size_t size, size_t *count) +{ + unsigned int pgsize_idx, pgsize_idx_next; + unsigned long pgsizes; + size_t offset, pgsize, pgsize_next; + unsigned long addr_merge = paddr | iova; + + /* Page sizes supported by the hardware and small enough for @size */ + pgsizes = pgsize_bitmap & GENMASK(__fls(size), 0); + + /* Constrain the page sizes further based on the maximum alignment */ + if (likely(addr_merge)) + pgsizes &= GENMASK(__ffs(addr_merge), 0); + + /* Make sure we have at least one suitable page size */ + if (!pgsizes) + return 0; + + /* Pick the biggest page size remaining */ + pgsize_idx = __fls(pgsizes); + pgsize = BIT(pgsize_idx); + if (!count) + return pgsize; + + /* Find the next biggest support page size, if it exists */ + pgsizes = pgsize_bitmap & ~GENMASK(pgsize_idx, 0); + if (!pgsizes) + goto out_set_count; + + pgsize_idx_next = __ffs(pgsizes); + pgsize_next = BIT(pgsize_idx_next); + + /* + * There's no point trying a bigger page size unless the virtual + * and physical addresses are similarly offset within the larger page. + */ + if ((iova ^ paddr) & (pgsize_next - 1)) + goto out_set_count; + + /* Calculate the offset to the next page size alignment boundary */ + offset = pgsize_next - (addr_merge & (pgsize_next - 1)); + + /* + * If size is big enough to accommodate the larger page, reduce + * the number of smaller pages. + */ + if (offset + pgsize_next <= size) + size = offset; + +out_set_count: + *count = size >> pgsize_idx; + return pgsize; +} + +static int _iopgtbl_unmap_pages(struct kgsl_iommu_pt *pt, u64 gpuaddr, + size_t size) +{ + struct io_pgtable_ops *ops = pt->pgtbl_ops; + size_t unmapped = 0; + + while (unmapped < size) { + size_t ret, size_to_unmap, remaining, pgcount; + + remaining = (size - unmapped); + size_to_unmap = iommu_pgsize(pt->info.cfg.pgsize_bitmap, + gpuaddr, gpuaddr, remaining, &pgcount); + if (size_to_unmap == 0) + break; +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + ret = qcom_arm_lpae_unmap_pages(ops, gpuaddr, size_to_unmap, + pgcount, NULL); +#else + ret = ops->unmap_pages(ops, gpuaddr, size_to_unmap, + pgcount, NULL); +#endif + if (ret == 0) + break; + + gpuaddr += ret; + unmapped += ret; + } + + return (unmapped == size) ? 0 : -EINVAL; +} + static void kgsl_iommu_flush_tlb(struct kgsl_mmu *mmu) { struct kgsl_iommu *iommu = &mmu->iommu; @@ -190,6 +276,14 @@ static void kgsl_iommu_flush_tlb(struct kgsl_mmu *mmu) static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) { struct io_pgtable_ops *ops = pt->pgtbl_ops; + int ret = 0; + + if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_LPAE)) { + ret = _iopgtbl_unmap_pages(pt, gpuaddr, size); + if (ret) + return ret; + goto flush; + } while (size) { if ((ops->unmap(ops, gpuaddr, PAGE_SIZE, NULL)) != PAGE_SIZE) @@ -204,6 +298,7 @@ static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) * qcom_skip_tlb_management() API takes care of avoiding * TLB operations during slumber. */ +flush: if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) { struct kgsl_device *device = KGSL_MMU_DEVICE(pt->base.mmu); @@ -221,29 +316,6 @@ static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) return 0; } -static size_t _iopgtbl_map_pages(struct kgsl_iommu_pt *pt, u64 gpuaddr, - struct page **pages, int npages, int prot) -{ - struct io_pgtable_ops *ops = pt->pgtbl_ops; - size_t mapped = 0; - u64 addr = gpuaddr; - int ret, i; - - for (i = 0; i < npages; i++) { - ret = ops->map(ops, addr, page_to_phys(pages[i]), PAGE_SIZE, - prot, GFP_KERNEL); - if (ret) { - _iopgtbl_unmap(pt, gpuaddr, mapped); - return 0; - } - - mapped += PAGE_SIZE; - addr += PAGE_SIZE; - } - - return mapped; -} - static size_t _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, struct sg_table *sgt, int prot) { @@ -253,6 +325,22 @@ static size_t _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, u64 addr = gpuaddr; int ret, i; +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_LPAE)) { + ret = qcom_arm_lpae_map_sg(ops, addr, sgt->sgl, sgt->nents, prot, + GFP_KERNEL, &mapped); +#else + if (ops->map_sg) { + ret = ops->map_sg(ops, addr, sgt->sgl, sgt->nents, prot, + GFP_KERNEL, &mapped); +#endif + if (ret) { + _iopgtbl_unmap(pt, gpuaddr, mapped); + return 0; + } + return mapped; + } + for_each_sg(sgt->sgl, sg, sgt->nents, i) { size_t size = sg->length; phys_addr_t phys = sg_phys(sg); @@ -397,12 +485,20 @@ static int kgsl_iopgtbl_map(struct kgsl_pagetable *pagetable, /* Get the protection flags for the user context */ prot = _iommu_get_protection_flags(pagetable->mmu, memdesc); - if (memdesc->sgt) - mapped = _iopgtbl_map_sg(pt, memdesc->gpuaddr, - memdesc->sgt, prot); - else - mapped = _iopgtbl_map_pages(pt, memdesc->gpuaddr, - memdesc->pages, memdesc->page_count, prot); + if (!memdesc->sgt) { + struct sg_table sgt; + int ret; + + ret = sg_alloc_table_from_pages(&sgt, memdesc->pages, + memdesc->page_count, 0, memdesc->size, GFP_KERNEL); + if (ret) + return ret; + mapped = _iopgtbl_map_sg(pt, memdesc->gpuaddr, &sgt, prot); + sg_free_table(&sgt); + } else { + mapped = _iopgtbl_map_sg(pt, memdesc->gpuaddr, memdesc->sgt, + prot); + } if (mapped == 0) return -ENOMEM; @@ -1224,7 +1320,7 @@ static void kgsl_iommu_destroy_pagetable(struct kgsl_pagetable *pagetable) { struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); - free_io_pgtable_ops(pt->pgtbl_ops); + qcom_free_io_pgtable_ops(pt->pgtbl_ops); kfree(pt); } @@ -1284,22 +1380,23 @@ static int kgsl_iopgtbl_alloc(struct kgsl_iommu_context *ctx, struct kgsl_iommu_ { struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&ctx->pdev->dev); const struct io_pgtable_cfg *cfg = NULL; + void *domain = (void *)adreno_smmu->cookie; if (adreno_smmu->cookie) cfg = adreno_smmu->get_ttbr1_cfg(adreno_smmu->cookie); if (!cfg) return -ENODEV; - pt->cfg = *cfg; - pt->cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; - pt->cfg.tlb = &kgsl_iopgtbl_tlb_ops; - - pt->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, &pt->cfg, NULL); + pt->info = adreno_smmu->pgtbl_info; + pt->info.cfg = *cfg; + pt->info.cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; + pt->info.cfg.tlb = &kgsl_iopgtbl_tlb_ops; + pt->pgtbl_ops = qcom_alloc_io_pgtable_ops(QCOM_ARM_64_LPAE_S1, &pt->info, domain); if (!pt->pgtbl_ops) return -ENOMEM; - pt->ttbr0 = pt->cfg.arm_lpae_s1_cfg.ttbr; + pt->ttbr0 = pt->info.cfg.arm_lpae_s1_cfg.ttbr; return 0; } @@ -2339,11 +2436,11 @@ static int iommu_probe_user_context(struct kgsl_device *device, pt = to_iommu_pt(mmu->defaultpagetable); /* Enable TTBR0 on the default and LPAC contexts */ - kgsl_iommu_set_ttbr0(&iommu->user_context, mmu, &pt->cfg); + kgsl_iommu_set_ttbr0(&iommu->user_context, mmu, &pt->info.cfg); kgsl_set_smmu_aperture(device, &iommu->user_context); - kgsl_iommu_set_ttbr0(&iommu->lpac_context, mmu, &pt->cfg); + kgsl_iommu_set_ttbr0(&iommu->lpac_context, mmu, &pt->info.cfg); ret = set_smmu_lpac_aperture(device, &iommu->lpac_context); /* LPAC is optional, ignore setup failures in absence of LPAC feature */ diff --git a/kgsl_iommu.h b/kgsl_iommu.h index 271043f1df..6bce555ff7 100644 --- a/kgsl_iommu.h +++ b/kgsl_iommu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_IOMMU_H #define __KGSL_IOMMU_H @@ -179,15 +179,17 @@ struct kgsl_iommu { /* * struct kgsl_iommu_pt - Iommu pagetable structure private to kgsl driver - * @domain: Pointer to the iommu domain that contains the iommu pagetable + * @base: Container of the base kgsl pagetable * @ttbr0: register value to set when using this pagetable + * @pgtbl_ops: Pagetable operations for mapping/unmapping buffers + * @info: Pagetable info used to allocate pagetable operations */ struct kgsl_iommu_pt { struct kgsl_pagetable base; u64 ttbr0; struct io_pgtable_ops *pgtbl_ops; - struct io_pgtable_cfg cfg; + struct qcom_io_pgtable_info info; }; /** From dafea907d30c1a4499e11809aa78615837871a16 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 10 Jul 2023 22:08:02 -0700 Subject: [PATCH 0510/1016] msm: kgsl: Retire single page map/unmap() callbacks Starting with the kernel version 6.2.0 single page map/unmap() callbacks are deprecated. Hence use the map_pages/unmap_pages() which serves the same purpose. Change-Id: I74bfe58a12f15958cd7ad1db2c50202ace6b839b Signed-off-by: Hareesh Gundu --- kgsl_iommu.c | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 8bce09dbe7..99a6e5c372 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -277,6 +277,7 @@ static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) { struct io_pgtable_ops *ops = pt->pgtbl_ops; int ret = 0; + size_t unmapped; if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_LPAE)) { ret = _iopgtbl_unmap_pages(pt, gpuaddr, size); @@ -285,13 +286,10 @@ static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) goto flush; } - while (size) { - if ((ops->unmap(ops, gpuaddr, PAGE_SIZE, NULL)) != PAGE_SIZE) - return -EINVAL; - - gpuaddr += PAGE_SIZE; - size -= PAGE_SIZE; - } + unmapped = ops->unmap_pages(ops, gpuaddr, PAGE_SIZE, + size >> PAGE_SHIFT, NULL); + if (unmapped != size) + return -EINVAL; /* * Skip below logic for 6.1 kernel version and above as @@ -342,22 +340,17 @@ static size_t _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, } for_each_sg(sgt->sgl, sg, sgt->nents, i) { - size_t size = sg->length; + size_t size = sg->length, map_size = 0; phys_addr_t phys = sg_phys(sg); - while (size) { - ret = ops->map(ops, addr, phys, PAGE_SIZE, prot, GFP_KERNEL); - - if (ret) { - _iopgtbl_unmap(pt, gpuaddr, mapped); - return 0; - } - - phys += PAGE_SIZE; - mapped += PAGE_SIZE; - addr += PAGE_SIZE; - size -= PAGE_SIZE; + ret = ops->map_pages(ops, addr, phys, PAGE_SIZE, size >> PAGE_SHIFT, + prot, GFP_KERNEL, &map_size); + if (ret) { + _iopgtbl_unmap(pt, gpuaddr, mapped); + return 0; } + addr += size; + mapped += map_size; } return mapped; @@ -427,19 +420,19 @@ static size_t _iopgtbl_map_page_to_range(struct kgsl_iommu_pt *pt, struct page *page, u64 gpuaddr, size_t range, int prot) { struct io_pgtable_ops *ops = pt->pgtbl_ops; - size_t mapped = 0; + size_t mapped = 0, map_size = 0; u64 addr = gpuaddr; int ret; while (range) { - ret = ops->map(ops, addr, page_to_phys(page), PAGE_SIZE, - prot, GFP_KERNEL); + ret = ops->map_pages(ops, addr, page_to_phys(page), PAGE_SIZE, + 1, prot, GFP_KERNEL, &map_size); if (ret) { _iopgtbl_unmap(pt, gpuaddr, mapped); return 0; } - mapped += PAGE_SIZE; + mapped += map_size; addr += PAGE_SIZE; range -= PAGE_SIZE; } From e2dc259daa3fb44c8582d8f453e90d26ffd0befb Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 28 Aug 2023 10:54:51 -0700 Subject: [PATCH 0511/1016] kgsl: hwsched: Add support for GMU tracepoints logging In hardware scheduling some events originating in GMU are not captured on host side. The preemption tracepoints for preempt trigger and preempt done are examples. Such tracepoints need to be continued to be supported with HW scheduling for backwards compatibility and profiling or debugging. We therefore need a means to be able to log these events on the GMU with the timestamp of when they occur and convey them to the host such that kgsl can log them lazily to ftrace. Change-Id: Ib12e2341f928091ad3918841c267a8f2e92dc766 Signed-off-by: Hareesh Gundu --- adreno_a6xx_gmu.c | 4 + adreno_a6xx_gmu.h | 2 + adreno_a6xx_gmu_snapshot.c | 2 + adreno_a6xx_hwsched.c | 36 ++++++++- adreno_a6xx_hwsched_hfi.c | 13 +++- adreno_gen7_gmu.c | 5 ++ adreno_gen7_gmu.h | 2 + adreno_gen7_gmu_snapshot.c | 2 + adreno_gen7_hwsched.c | 36 ++++++++- adreno_gen7_hwsched_hfi.c | 13 +++- adreno_hfi.h | 16 ++++ kgsl_gmu_core.c | 135 ++++++++++++++++++++++++++++++++++ kgsl_gmu_core.h | 145 +++++++++++++++++++++++++++++++++++++ kgsl_snapshot.h | 1 + kgsl_util.h | 1 + 15 files changed, 399 insertions(+), 14 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index f139dd046e..5ac165f8b1 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -614,6 +614,7 @@ int a6xx_gmu_device_start(struct adreno_device *adreno_dev) u32 val = 0x00000100; u32 mask = 0x000001FF; + gmu_core_reset_trace_header(&gmu->trace); gmu_ao_sync_event(adreno_dev); /* Check for 0xBABEFACE on legacy targets */ @@ -2945,6 +2946,9 @@ int a6xx_gmu_probe(struct kgsl_device *device, set_bit(GMU_ENABLED, &device->gmu_core.flags); + /* Initialize to zero to detect trace packet loss */ + gmu->trace.seq_num = 0; + device->gmu_core.dev_ops = &a6xx_gmudev; /* Set default GMU attributes */ diff --git a/adreno_a6xx_gmu.h b/adreno_a6xx_gmu.h index ba29ca53c6..4ad298f6ca 100644 --- a/adreno_a6xx_gmu.h +++ b/adreno_a6xx_gmu.h @@ -47,6 +47,8 @@ struct a6xx_gmu_device { struct kgsl_memdesc *gmu_log; /** @vrb: GMU virtual register bank memory */ struct kgsl_memdesc *vrb; + /** @trace: gmu trace container */ + struct kgsl_gmu_trace trace; struct a6xx_hfi hfi; struct clk_bulk_data *clks; /** @num_clks: Number of entries in the @clks array */ diff --git a/adreno_a6xx_gmu_snapshot.c b/adreno_a6xx_gmu_snapshot.c index b8188a07a3..9e0c73aabe 100644 --- a/adreno_a6xx_gmu_snapshot.c +++ b/adreno_a6xx_gmu_snapshot.c @@ -261,6 +261,8 @@ static void a6xx_gmu_snapshot_memories(struct kgsl_device *device, desc.type = SNAPSHOT_GMU_MEM_DEBUG; else if (md == gmu->vrb) desc.type = SNAPSHOT_GMU_MEM_VRB; + else if (md == gmu->trace.md) + desc.type = SNAPSHOT_GMU_MEM_TRACE; else desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK; diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index a341ca6bba..0e7efc21f4 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -654,13 +654,32 @@ static int a6xx_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) gmu->vrb = reserve_gmu_kernel_block(gmu, 0, GMU_VRB_SIZE, GMU_NONCACHED_KERNEL, 0); + if (IS_ERR(gmu->vrb)) + return PTR_ERR(gmu->vrb); + /* Populate size of the virtual register bank */ - if (!IS_ERR(gmu->vrb)) - gmu_core_set_vrb_register(gmu->vrb->hostptr, - VRB_SIZE_IDX, gmu->vrb->size >> 2); + gmu_core_set_vrb_register(gmu->vrb->hostptr, VRB_SIZE_IDX, + gmu->vrb->size >> 2); } - return PTR_ERR_OR_ZERO(gmu->vrb); + /* GMU trace log */ + if (IS_ERR_OR_NULL(gmu->trace.md)) { + gmu->trace.md = reserve_gmu_kernel_block(gmu, 0, + GMU_TRACE_SIZE, GMU_NONCACHED_KERNEL, 0); + + if (IS_ERR(gmu->trace.md)) + return PTR_ERR(gmu->trace.md); + + /* Pass trace buffer address to GMU through the VRB */ + gmu_core_set_vrb_register(gmu->vrb->hostptr, + VRB_TRACE_BUFFER_ADDR_IDX, + gmu->trace.md->gmuaddr); + + /* Initialize the GMU trace buffer header */ + gmu_core_trace_header_init(&gmu->trace); + } + + return 0; } static int a6xx_hwsched_gmu_init(struct adreno_device *adreno_dev) @@ -1334,6 +1353,15 @@ int a6xx_hwsched_add_to_minidump(struct adreno_device *adreno_dev) return ret; } + if (!IS_ERR_OR_NULL(a6xx_dev->gmu.trace.md)) { + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, + KGSL_GMU_TRACE_ENTRY, + a6xx_dev->gmu.trace.md->hostptr, + a6xx_dev->gmu.trace.md->size); + if (ret) + return ret; + } + /* Dump HFI hwsched global mem alloc entries */ for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i]; diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 02fac3497a..ea3750413e 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -1433,17 +1433,24 @@ static int hfi_f2h_main(void *arg) { struct adreno_device *adreno_dev = arg; struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); + struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); while (!kthread_should_stop()) { wait_event_interruptible(hfi->f2h_wq, kthread_should_stop() || - (!(is_queue_empty(adreno_dev, HFI_MSG_ID) && - is_queue_empty(adreno_dev, HFI_DBG_ID)) && - (hfi->irq_mask & HFI_IRQ_MSGQ_MASK))); + /* If msgq irq is enabled and msgq has messages to process */ + (((hfi->irq_mask & HFI_IRQ_MSGQ_MASK) && + !is_queue_empty(adreno_dev, HFI_MSG_ID)) || + /* Trace buffer has messages to process */ + !gmu_core_is_trace_empty(gmu->trace.md->hostptr) || + /* Dbgq has messages to process */ + !is_queue_empty(adreno_dev, HFI_DBG_ID))); if (kthread_should_stop()) break; a6xx_hwsched_process_msgq(adreno_dev); + gmu_core_process_trace_data(KGSL_DEVICE(adreno_dev), + &gmu->pdev->dev, &gmu->trace); a6xx_hwsched_process_dbgq(adreno_dev, true); } diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index db6e8831e1..439d0018fc 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -349,6 +349,8 @@ int gen7_gmu_device_start(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + gmu_core_reset_trace_header(&gmu->trace); + gmu_ao_sync_event(adreno_dev); /* Bring GMU out of reset */ @@ -2575,6 +2577,9 @@ int gen7_gmu_probe(struct kgsl_device *device, gmu->log_stream_enable = false; gmu->log_group_mask = 0x3; + /* Initialize to zero to detect trace packet loss */ + gmu->trace.seq_num = 0; + /* Disabled by default */ gmu->stats_enable = false; /* Set default to CM3 busy cycles countable */ diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 483b9f3159..9291683eda 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -51,6 +51,8 @@ struct gen7_gmu_device { struct kgsl_memdesc *gpu_boot_scratch; /** @vrb: GMU virtual register bank memory */ struct kgsl_memdesc *vrb; + /** @trace: gmu trace container */ + struct kgsl_gmu_trace trace; struct gen7_hfi hfi; struct clk_bulk_data *clks; /** @num_clks: Number of entries in the @clks array */ diff --git a/adreno_gen7_gmu_snapshot.c b/adreno_gen7_gmu_snapshot.c index 0a2c04d6d7..bd4df95184 100644 --- a/adreno_gen7_gmu_snapshot.c +++ b/adreno_gen7_gmu_snapshot.c @@ -140,6 +140,8 @@ static void gen7_gmu_snapshot_memories(struct kgsl_device *device, desc.type = SNAPSHOT_GMU_MEM_WARMBOOT; else if (md == gmu->vrb) desc.type = SNAPSHOT_GMU_MEM_VRB; + else if (md == gmu->trace.md) + desc.type = SNAPSHOT_GMU_MEM_TRACE; else desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK; diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index fec3079ae6..cbc0e1439f 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -879,13 +879,32 @@ static int gen7_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) gmu->vrb = gen7_reserve_gmu_kernel_block(gmu, 0, GMU_VRB_SIZE, GMU_NONCACHED_KERNEL, 0); + if (IS_ERR(gmu->vrb)) + return PTR_ERR(gmu->vrb); + /* Populate size of the virtual register bank */ - if (!IS_ERR(gmu->vrb)) - gmu_core_set_vrb_register(gmu->vrb->hostptr, - VRB_SIZE_IDX, gmu->vrb->size >> 2); + gmu_core_set_vrb_register(gmu->vrb->hostptr, VRB_SIZE_IDX, + gmu->vrb->size >> 2); } - return PTR_ERR_OR_ZERO(gmu->vrb); + /* GMU trace log */ + if (IS_ERR_OR_NULL(gmu->trace.md)) { + gmu->trace.md = gen7_reserve_gmu_kernel_block(gmu, 0, + GMU_TRACE_SIZE, GMU_NONCACHED_KERNEL, 0); + + if (IS_ERR(gmu->trace.md)) + return PTR_ERR(gmu->trace.md); + + /* Pass trace buffer address to GMU through the VRB */ + gmu_core_set_vrb_register(gmu->vrb->hostptr, + VRB_TRACE_BUFFER_ADDR_IDX, + gmu->trace.md->gmuaddr); + + /* Initialize the GMU trace buffer header */ + gmu_core_trace_header_init(&gmu->trace); + } + + return 0; } static int gen7_hwsched_gmu_init(struct adreno_device *adreno_dev) @@ -1865,6 +1884,15 @@ int gen7_hwsched_add_to_minidump(struct adreno_device *adreno_dev) return ret; } + if (!IS_ERR_OR_NULL(gen7_dev->gmu.trace.md)) { + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, + KGSL_GMU_TRACE_ENTRY, + gen7_dev->gmu.trace.md->hostptr, + gen7_dev->gmu.trace.md->size); + if (ret) + return ret; + } + /* Dump HFI hwsched global mem alloc entries */ for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i]; diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index cf1096eb8a..4af8910fa9 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -2637,17 +2637,24 @@ static int hfi_f2h_main(void *arg) { struct adreno_device *adreno_dev = arg; struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); while (!kthread_should_stop()) { wait_event_interruptible(hfi->f2h_wq, kthread_should_stop() || - (!(is_queue_empty(adreno_dev, HFI_MSG_ID) && - is_queue_empty(adreno_dev, HFI_DBG_ID)) && - (hfi->irq_mask & HFI_IRQ_MSGQ_MASK))); + /* If msgq irq is enabled and msgq has messages to process */ + (((hfi->irq_mask & HFI_IRQ_MSGQ_MASK) && + !is_queue_empty(adreno_dev, HFI_MSG_ID)) || + /* Trace buffer has messages to process */ + !gmu_core_is_trace_empty(gmu->trace.md->hostptr) || + /* Dbgq has messages to process */ + !is_queue_empty(adreno_dev, HFI_DBG_ID))); if (kthread_should_stop()) break; gen7_hwsched_process_msgq(adreno_dev); + gmu_core_process_trace_data(KGSL_DEVICE(adreno_dev), + &gmu->pdev->dev, &gmu->trace); gen7_hwsched_process_dbgq(adreno_dev, true); } diff --git a/adreno_hfi.h b/adreno_hfi.h index 2760a0119b..e1843b429f 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -474,6 +474,7 @@ enum hfi_msg_type { H2F_MSG_ISSUE_SYNCOBJ = 152, F2H_MSG_SYNCOBJ_QUERY = 153, H2F_MSG_WARMBOOT_CMD = 154, + F2H_MSG_PROCESS_TRACE = 155, HFI_MAX_ID, }; @@ -733,6 +734,21 @@ struct hfi_debug_cmd { u32 data; } __packed; +/* F2H */ +struct hfi_trace_cmd { + u32 hdr; + u32 version; + u64 identifier; +} __packed; + +/* Trace packet definition */ +struct gmu_trace_packet { + u32 hdr; + u32 trace_id; + u64 ticks; + u32 payload[]; +} __packed; + /* F2H */ struct hfi_gmu_cntr_register_cmd { u32 hdr; diff --git a/kgsl_gmu_core.c b/kgsl_gmu_core.c index 7a5507546f..482ad65299 100644 --- a/kgsl_gmu_core.c +++ b/kgsl_gmu_core.c @@ -6,9 +6,11 @@ #include #include +#include #include #include "adreno.h" +#include "adreno_trace.h" #include "kgsl_device.h" #include "kgsl_gmu_core.h" #include "kgsl_trace.h" @@ -225,3 +227,136 @@ void gmu_core_set_vrb_register(void *ptr, u32 index, u32 val) /* Make sure the vrb write is posted before moving ahead */ wmb(); } + +static void stream_trace_data(struct gmu_trace_packet *pkt) +{ + switch (pkt->trace_id) { + case GMU_TRACE_PREEMPT_TRIGGER: { + struct trace_preempt_trigger *data = + (struct trace_preempt_trigger *)pkt->payload; + + trace_adreno_preempt_trigger(data->cur_rb, data->next_rb, + data->ctx_switch_cntl, pkt->ticks); + break; + } + case GMU_TRACE_PREEMPT_DONE: { + struct trace_preempt_done *data = + (struct trace_preempt_done *)pkt->payload; + + trace_adreno_preempt_done(data->prev_rb, data->next_rb, + data->ctx_switch_cntl, pkt->ticks); + break; + } + default: { + char str[64]; + + snprintf(str, sizeof(str), + "Unsupported GMU trace id %d\n", pkt->trace_id); + trace_kgsl_msg(str); + } + } +} + +void gmu_core_process_trace_data(struct kgsl_device *device, + struct device *dev, struct kgsl_gmu_trace *trace) +{ + struct gmu_trace_header *trace_hdr = trace->md->hostptr; + u32 size, *buffer = trace->md->hostptr; + struct gmu_trace_packet *pkt; + u16 seq_num, num_pkts = 0; + u32 ridx = readl(&trace_hdr->read_index); + u32 widx = readl(&trace_hdr->write_index); + + if (ridx == widx) + return; + + /* + * Don't process any traces and force set read_index to write_index if + * previously encountered invalid trace packet + */ + if (trace->reset_hdr) { + /* update read index to let f2h daemon to go to sleep */ + writel(trace_hdr->write_index, &trace_hdr->read_index); + return; + } + + /* start reading trace buffer data */ + pkt = (struct gmu_trace_packet *)&buffer[trace_hdr->payload_offset + ridx]; + + /* Validate packet header */ + if (TRACE_PKT_GET_VALID_FIELD(pkt->hdr) != TRACE_PKT_VALID) { + char str[128]; + + snprintf(str, sizeof(str), + "Invalid trace packet found at read index: %d resetting trace header\n", + trace_hdr->read_index); + /* + * GMU is not expected to write an invalid trace packet. This + * condition can be true in case there is memory corruption. In + * such scenario fastforward readindex to writeindex so the we + * don't process any trace packets until we reset the trace + * header in next slumber exit. + */ + dev_err_ratelimited(device->dev, "%s\n", str); + trace_kgsl_msg(str); + writel(trace_hdr->write_index, &trace_hdr->read_index); + trace->reset_hdr = true; + return; + } + + size = TRACE_PKT_GET_SIZE(pkt->hdr); + + if (TRACE_PKT_GET_SKIP_FIELD(pkt->hdr)) + goto done; + + seq_num = TRACE_PKT_GET_SEQNUM(pkt->hdr); + num_pkts = seq_num - trace->seq_num; + + /* Detect trace packet loss by tracking any gaps in the sequence number */ + if (num_pkts > 1) { + char str[128]; + + snprintf(str, sizeof(str), + "%d GMU trace packets dropped from sequence number: %d\n", + num_pkts - 1, trace->seq_num); + trace_kgsl_msg(str); + } + + trace->seq_num = seq_num; + stream_trace_data(pkt); +done: + ridx = (ridx + size) % trace_hdr->payload_size; + writel(ridx, &trace_hdr->read_index); +} + +bool gmu_core_is_trace_empty(struct gmu_trace_header *hdr) +{ + return (readl(&hdr->read_index) == readl(&hdr->write_index)) ? true : false; +} + +void gmu_core_trace_header_init(struct kgsl_gmu_trace *trace) +{ + struct gmu_trace_header *hdr = trace->md->hostptr; + + hdr->threshold = TRACE_BUFFER_THRESHOLD; + hdr->timeout = TRACE_TIMEOUT_MSEC; + hdr->metadata = FIELD_PREP(GENMASK(31, 30), TRACE_MODE_DROP) | + FIELD_PREP(GENMASK(3, 0), TRACE_HEADER_VERSION_1); + hdr->cookie = trace->md->gmuaddr; + hdr->size = trace->md->size; + hdr->log_type = TRACE_LOGTYPE_HWSCHED; +} + +void gmu_core_reset_trace_header(struct kgsl_gmu_trace *trace) +{ + struct gmu_trace_header *hdr = trace->md->hostptr; + + if (!trace->reset_hdr) + return; + + memset(hdr, 0, sizeof(struct gmu_trace_header)); + /* Reset sequence number to detect trace packet loss */ + trace->seq_num = 0; + gmu_core_trace_header_init(trace); + trace->reset_hdr = false; +} diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 197cf353c1..e39e8d8571 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -174,6 +174,122 @@ enum gmu_vrb_idx { VRB_TRACE_BUFFER_ADDR_IDX = 2, }; +/* For GMU Trace */ +#define GMU_TRACE_SIZE SZ_16K + +/* Trace header defines */ +/* Logtype to decode the trace pkt data */ +#define TRACE_LOGTYPE_HWSCHED 1 +/* Trace buffer threshold for GMU to send F2H message */ +#define TRACE_BUFFER_THRESHOLD 80 +/* + * GMU Trace timer value to check trace packet consumption. GMU timer handler tracks the + * readindex, If it's not moved since last timer fired, GMU will send the f2h message to + * drain trace packets. GMU Trace Timer will be restarted if the readindex is moving. + */ +#define TRACE_TIMEOUT_MSEC 5 + +/* Trace metadata defines */ +/* Trace drop mode hint for GMU to drop trace packets when trace buffer is full */ +#define TRACE_MODE_DROP 1 +/* Trace buffer header version */ +#define TRACE_HEADER_VERSION_1 1 + +/* Trace packet defines */ +#define TRACE_PKT_VALID 1 +#define TRACE_PKT_SEQ_MASK GENMASK(15, 0) +#define TRACE_PKT_SZ_MASK GENMASK(27, 16) +#define TRACE_PKT_SZ_SHIFT 16 +#define TRACE_PKT_VALID_MASK GENMASK(31, 31) +#define TRACE_PKT_SKIP_MASK GENMASK(30, 30) +#define TRACE_PKT_VALID_SHIFT 31 +#define TRACE_PKT_SKIP_SHIFT 30 + +#define TRACE_PKT_GET_SEQNUM(hdr) ((hdr) & TRACE_PKT_SEQ_MASK) +#define TRACE_PKT_GET_SIZE(hdr) (((hdr) & TRACE_PKT_SZ_MASK) >> TRACE_PKT_SZ_SHIFT) +#define TRACE_PKT_GET_VALID_FIELD(hdr) (((hdr) & TRACE_PKT_VALID_MASK) >> TRACE_PKT_VALID_SHIFT) +#define TRACE_PKT_GET_SKIP_FIELD(hdr) (((hdr) & TRACE_PKT_SKIP_MASK) >> TRACE_PKT_SKIP_SHIFT) + +/* + * Trace buffer header definition + * Trace buffer header fields initialized/updated by KGSL and GMU + * GMU input: Following header fields are initialized by KGSL + * - @metadata, @threshold, @size, @cookie, @timeout, @log_type + * - @readIndex updated by kgsl when traces messages are consumed. + * GMU output: Following header fields are initialized by GMU only + * - @magic, @payload_offset, @payload_size + * - @write_index updated by GMU upon filling the trace messages + */ +struct gmu_trace_header { + /** @magic: Initialized by GMU to check header is valid or not */ + u32 magic; + /** + * @metadata: Trace buffer metadata.Bit(31) Trace Mode to log tracepoints + * messages, Bits [3:0] Version for header format changes. + */ + u32 metadata; + /** + * @threshold: % at which GMU to send f2h message to wakeup KMD to consume + * tracepoints data. Set it to zero to disable thresholding. Threshold is % + * of buffer full condition not the trace packet count. If GMU is continuously + * writing to trace buffer makes it buffer full condition when KMD is not + * consuming it. So GMU check the how much trace buffer % space is full based + * on the threshold % value.If the trace packets are filling over % buffer full + * condition GMU will send the f2h message for KMD to drain the trace messages. + */ + u32 threshold; + /** @size: trace buffer allocation size in bytes */ + u32 size; + /** @read_index: trace buffer read index in dwords */ + u32 read_index; + /** @write_index: trace buffer write index in dwords */ + u32 write_index; + /** @payload_offset: trace buffer payload dword offset */ + u32 payload_offset; + /** @payload_size: trace buffer payload size in dword */ + u32 payload_size; + /** cookie: cookie data sent through F2H_PROCESS_MESSAGE */ + u64 cookie; + /** + * timeout: GMU Trace Timer value in msec - zero to disable trace timer else + * value for GMU trace timerhandler to send HFI msg. + */ + u32 timeout; + /** @log_type: To decode the trace buffer data */ + u32 log_type; +} __packed; + +/* Trace ID definition */ +enum gmu_trace_id { + GMU_TRACE_PREEMPT_TRIGGER = 1, + GMU_TRACE_PREEMPT_DONE = 2, + GMU_TRACE_MAX, +}; + +struct trace_preempt_trigger { + u32 cur_rb; + u32 next_rb; + u32 ctx_switch_cntl; +} __packed; + +struct trace_preempt_done { + u32 prev_rb; + u32 next_rb; + u32 ctx_switch_cntl; +} __packed; + +/** + * struct kgsl_gmu_trace - wrapper for gmu trace memory object + */ +struct kgsl_gmu_trace { + /** @md: gmu trace memory descriptor */ + struct kgsl_memdesc *md; + /* @seq_num: GMU trace packet sequence number to detect drop packet count */ + u16 seq_num; + /* @reset_hdr: To reset trace buffer header incase of invalid packet */ + bool reset_hdr; +}; + /* GMU memdesc entries */ #define GMU_KERNEL_ENTRIES 16 @@ -371,4 +487,33 @@ void gmu_core_dev_force_first_boot(struct kgsl_device *device); */ void gmu_core_set_vrb_register(void *ptr, u32 index, u32 val); +/** + * gmu_core_process_trace_data - Process gmu trace buffer data writes to default linux trace buffer + * @device: Pointer to KGSL device + * @dev: GMU device instance + * @trace: GMU trace memory pointer + */ +void gmu_core_process_trace_data(struct kgsl_device *device, + struct device *dev, struct kgsl_gmu_trace *trace); + +/** + * gmu_core_is_trace_empty - Check for trace buffer empty/full status + * @hdr: Pointer to gmu trace header + * + * Return: true if readidex equl to writeindex else false + */ +bool gmu_core_is_trace_empty(struct gmu_trace_header *hdr); + +/** + * gmu_core_trace_header_init - Initialize the GMU trace buffer header + * @trace: Pointer to kgsl gmu trace + */ +void gmu_core_trace_header_init(struct kgsl_gmu_trace *trace); + +/** + * gmu_core_reset_trace_header - Reset GMU trace buffer header + * @trace: Pointer to kgsl gmu trace + */ +void gmu_core_reset_trace_header(struct kgsl_gmu_trace *trace); + #endif /* __KGSL_GMU_CORE_H */ diff --git a/kgsl_snapshot.h b/kgsl_snapshot.h index c45dd2804d..50f245dbf6 100644 --- a/kgsl_snapshot.h +++ b/kgsl_snapshot.h @@ -217,6 +217,7 @@ struct kgsl_snapshot_ib_v2 { #define SNAPSHOT_GMU_MEM_HW_FENCE 0x07 #define SNAPSHOT_GMU_MEM_WARMBOOT 0x08 #define SNAPSHOT_GMU_MEM_VRB 0x09 +#define SNAPSHOT_GMU_MEM_TRACE 0x0a /* GMU memory section data */ struct kgsl_snapshot_gmu_mem { diff --git a/kgsl_util.h b/kgsl_util.h index 72c3e8986b..24e041f720 100644 --- a/kgsl_util.h +++ b/kgsl_util.h @@ -18,6 +18,7 @@ #define KGSL_SCRATCH_ENTRY "kgsl_scratch" #define KGSL_MEMSTORE_ENTRY "kgsl_memstore" #define KGSL_GMU_LOG_ENTRY "kgsl_gmu_log" +#define KGSL_GMU_TRACE_ENTRY "kgsl_gmu_trace" #define KGSL_HFIMEM_ENTRY "kgsl_hfi_mem" #define KGSL_GMU_DUMPMEM_ENTRY "kgsl_gmu_dump_mem" #define KGSL_GMU_RB_ENTRY "kgsl_gmu_rb" From b081c3cbb1f1bf7bed52aa0221c2e313f53ab352 Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Fri, 18 Aug 2023 14:04:44 +0530 Subject: [PATCH 0512/1016] msm: kgsl: Skip SMMU PT switch when using default PT If per-process pagetable is not enabled then current process pagetable points to default global pt and per-process pagetable ttbr0 config is set to 0x0. No CP SMMU UPDATE command is required to be submitted to ringbuffer. So,skip process pagetable switch if current process pagetable is using default pt. Change-Id: I85cdfbac704705cd4cb1c5e8a964231a8e66fe88 Signed-off-by: Abhishek Barman --- adreno_a3xx_ringbuffer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/adreno_a3xx_ringbuffer.c b/adreno_a3xx_ringbuffer.c index 9222af6b7c..3fbc91b8b5 100644 --- a/adreno_a3xx_ringbuffer.c +++ b/adreno_a3xx_ringbuffer.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -109,6 +110,9 @@ static int a3xx_rb_pagetable_switch(struct adreno_device *adreno_dev, struct kgsl_iommu *iommu = KGSL_IOMMU(device); int count = 0; + /* Skip pagetable switch if current context is using default PT. */ + if (pagetable == device->mmu.defaultpagetable) + return 0; /* * Adding an indirect buffer ensures that the prefetch stalls until * the commands in indirect buffer have completed. We need to stall From 4a3e6e27dbc15ecbee8fc2f6e0566997fef9224d Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Wed, 23 Aug 2023 23:38:40 +0530 Subject: [PATCH 0513/1016] msm: kgsl: Fix GPU microcode load for A3xx For A3xx kgsl_bulkwrite didn't load the firmware properly. Hence firmware is loaded by explicitly passing the index for the dword where the firmware load need to be started. Change-Id: Iaad3f3e205a5b6ba30e5166fc2bb1a2c1eded4a9 Signed-off-by: Abhishek Barman --- adreno_a3xx.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/adreno_a3xx.c b/adreno_a3xx.c index 264f31d8ea..fc9c6ab644 100644 --- a/adreno_a3xx.c +++ b/adreno_a3xx.c @@ -1318,17 +1318,20 @@ static void a3xx_microcode_load(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); size_t pm4_size = adreno_dev->fw[ADRENO_FW_PM4].size; size_t pfp_size = adreno_dev->fw[ADRENO_FW_PFP].size; + int i; /* load the CP ucode using AHB writes */ kgsl_regwrite(device, A3XX_CP_ME_RAM_WADDR, 0); - kgsl_regmap_bulk_write(&device->regmap, A3XX_CP_ME_RAM_DATA, - &adreno_dev->fw[ADRENO_FW_PM4].fwvirt[1], pm4_size - 1); + for (i = 1; i < pm4_size; i++) + kgsl_regwrite(device, A3XX_CP_ME_RAM_DATA, + adreno_dev->fw[ADRENO_FW_PM4].fwvirt[i]); kgsl_regwrite(device, A3XX_CP_PFP_UCODE_ADDR, 0); - kgsl_regmap_bulk_write(&device->regmap, A3XX_CP_PFP_UCODE_DATA, - &adreno_dev->fw[ADRENO_FW_PFP].fwvirt[1], pfp_size - 1); + for (i = 1; i < pfp_size; i++) + kgsl_regwrite(device, A3XX_CP_PFP_UCODE_DATA, + adreno_dev->fw[ADRENO_FW_PFP].fwvirt[i]); } static u64 a3xx_read_alwayson(struct adreno_device *adreno_dev) From 93276e4df6f58f9af7370e6023bcc0d252a58886 Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Fri, 8 Sep 2023 15:46:15 -0700 Subject: [PATCH 0514/1016] msm: kgsl: Prevent wrap around during user address mapping When setting svm region during the gpuobj import ioctl call for a usermem address, there is a possibility of a very large input size causing the region's 64-bit end address to wrap around. This can cause the region to incorrectly be considered valid, ultimately allowing a use after free scenario. To prevent this, detect the occurrence of a wrap and reject the import. Change-Id: I4a88f56c58b830d4342e47dc1d1f6290c78ab6b4 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- kgsl_iommu.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 8bce09dbe7..e80af601b0 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -2165,14 +2165,20 @@ static uint64_t kgsl_iommu_find_svm_region(struct kgsl_pagetable *pagetable, static bool iommu_addr_in_svm_ranges(struct kgsl_pagetable *pagetable, u64 gpuaddr, u64 size) { + u64 end = gpuaddr + size; + + /* Make sure size is not zero and we don't wrap around */ + if (end <= gpuaddr) + return false; + if ((gpuaddr >= pagetable->compat_va_start && gpuaddr < pagetable->compat_va_end) && - ((gpuaddr + size) > pagetable->compat_va_start && - (gpuaddr + size) <= pagetable->compat_va_end)) + (end > pagetable->compat_va_start && + end <= pagetable->compat_va_end)) return true; if ((gpuaddr >= pagetable->svm_start && gpuaddr < pagetable->svm_end) && - ((gpuaddr + size) > pagetable->svm_start && - (gpuaddr + size) <= pagetable->svm_end)) + (end > pagetable->svm_start && + end <= pagetable->svm_end)) return true; return false; From 1d6e3cd2e9b4291bbfe9723f709ee629a0be9340 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Mon, 11 Sep 2023 13:45:04 -0700 Subject: [PATCH 0515/1016] msm: kgsl: Limit the syncpoint count for AUX commands KGSL internally has a limit on the length of the list of syncpoints submitted in a single AUX command. Enforce this limit so we don't overwrite memory beyond the structures that track these syncpoints. Change-Id: I261bfd4f786ff7e4fbe07e8bca9e9b8d8b87c950 Signed-off-by: Lynus Vaz --- kgsl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kgsl.c b/kgsl.c index f84f3e7ae6..242df6c150 100644 --- a/kgsl.c +++ b/kgsl.c @@ -2309,6 +2309,10 @@ long kgsl_ioctl_gpu_aux_command(struct kgsl_device_private *dev_priv, (KGSL_GPU_AUX_COMMAND_BIND | KGSL_GPU_AUX_COMMAND_TIMELINE))) return -EINVAL; + if ((param->flags & KGSL_GPU_AUX_COMMAND_SYNC) && + (param->numsyncs > KGSL_MAX_SYNCPOINTS)) + return -EINVAL; + context = kgsl_context_get_owner(dev_priv, param->context_id); if (!context) return -EINVAL; From 5b2295772d727f32b0a69cc3d1ca8b4897f75dc0 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Fri, 1 Sep 2023 12:27:10 -0600 Subject: [PATCH 0516/1016] kgsl: hwsched: Don't cross dereference kgsl_mem_entry pointer The passed in pointer in kgsl_count_hw_fences() can be a kgsl_mem_entry pointer. This gets cross dereferenced to a kgsl_drawobj_sync_event pointer and causes a NULL pointer dereference. To avoid this cross dereference, decouple the two paths and call kgsl_count_hw_fences() only in the appropriate path. Change-Id: I1088a0b67f1f82a20ddc94c94cbdd31a44b18da6 Signed-off-by: Harshdeep Dhatt --- kgsl.c | 3 +-- kgsl_drawobj.c | 5 +++-- kgsl_sync.c | 14 ++++++-------- kgsl_sync.h | 13 ++++++++----- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/kgsl.c b/kgsl.c index f84f3e7ae6..34cbf79f2d 100644 --- a/kgsl.c +++ b/kgsl.c @@ -2760,8 +2760,7 @@ static long gpuobj_free_on_fence(struct kgsl_device_private *dev_priv, return -EINVAL; } - handle = kgsl_sync_fence_async_wait(event.fd, - gpuobj_free_fence_func, entry, NULL); + handle = kgsl_sync_fence_async_wait(event.fd, gpuobj_free_fence_func, entry); if (IS_ERR(handle)) { kgsl_mem_entry_unset_pend(entry); diff --git a/kgsl_drawobj.c b/kgsl_drawobj.c index d9ecc6e56a..a30e33ab34 100644 --- a/kgsl_drawobj.c +++ b/kgsl_drawobj.c @@ -609,8 +609,7 @@ static int drawobj_add_sync_fence(struct kgsl_device *device, set_bit(event->id, &syncobj->pending); - event->handle = kgsl_sync_fence_async_wait(sync.fd, - drawobj_sync_fence_func, event, priv); + event->handle = kgsl_sync_fence_async_wait(sync.fd, drawobj_sync_fence_func, event); event->priv = priv; @@ -635,6 +634,8 @@ static int drawobj_add_sync_fence(struct kgsl_device *device, return ret; } + kgsl_get_fence_info(event); + for (i = 0; priv && i < priv->num_fences; i++) { trace_syncpoint_fence(syncobj, priv->fences[i].name); log_kgsl_syncpoint_fence_event(syncobj->base.context->id, diff --git a/kgsl_sync.c b/kgsl_sync.c index 46cc03ad18..51ea268261 100644 --- a/kgsl_sync.c +++ b/kgsl_sync.c @@ -451,17 +451,17 @@ static void kgsl_count_hw_fences(struct kgsl_drawobj_sync_event *event, struct d } } -static void kgsl_get_fence_info(struct dma_fence *fence, - struct event_fence_info *info_ptr, void *priv) +void kgsl_get_fence_info(struct kgsl_drawobj_sync_event *event) { unsigned int num_fences; - struct dma_fence **fences; + struct dma_fence *fence, **fences; struct dma_fence_array *array; - struct kgsl_drawobj_sync_event *event = priv; + struct event_fence_info *info_ptr = event->priv; int i; - array = to_dma_fence_array(fence); + fence = event->handle->fence; + array = to_dma_fence_array(fence); if (array != NULL) { num_fences = array->num_fences; fences = array->fences; @@ -506,7 +506,7 @@ count: } struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, - bool (*func)(void *priv), void *priv, struct event_fence_info *info_ptr) + bool (*func)(void *priv), void *priv) { struct kgsl_sync_fence_cb *kcb; struct dma_fence *fence; @@ -527,8 +527,6 @@ struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, kcb->priv = priv; kcb->func = func; - kgsl_get_fence_info(fence, info_ptr, priv); - /* if status then error or signaled */ status = dma_fence_add_callback(fence, &kcb->fence_cb, kgsl_sync_fence_callback); diff --git a/kgsl_sync.h b/kgsl_sync.h index f2535fbf1f..5962318ce0 100644 --- a/kgsl_sync.h +++ b/kgsl_sync.h @@ -90,9 +90,9 @@ void kgsl_sync_timeline_detach(struct kgsl_sync_timeline *ktimeline); void kgsl_sync_timeline_put(struct kgsl_sync_timeline *ktimeline); -struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, - bool (*func)(void *priv), void *priv, - struct event_fence_info *info_ptr); +struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, bool (*func)(void *priv), void *priv); + +void kgsl_get_fence_info(struct kgsl_drawobj_sync_event *event); void kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_cb *kcb); @@ -138,9 +138,12 @@ static inline void kgsl_sync_timeline_put(struct kgsl_sync_timeline *ktimeline) } +static inline void kgsl_get_fence_info(struct kgsl_drawobj_sync_event *event) +{ +} + static inline struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, - bool (*func)(void *priv), void *priv, - struct event_fence_info *info_ptr) + bool (*func)(void *priv), void *priv); { return NULL; } From 326e7f94442415a196438891482ea650527d5f87 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Mon, 18 Sep 2023 07:11:01 -0700 Subject: [PATCH 0517/1016] kgsl: gen7: Program the aperture when enabling PC, VPC perfcounters When enabling perfcounters in the PC and VPC blocks, the aperture requires to be programmed with the appropriate pipe. Use the correct select procedure for perfcounters in these blocks. Change-Id: I9b58ced5a273f7a3265f4e928bd875a63bd0e5a5 Signed-off-by: Lynus Vaz --- adreno_gen7_perfcounter.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/adreno_gen7_perfcounter.c b/adreno_gen7_perfcounter.c index f882a6fa87..4d2cf20d1b 100644 --- a/adreno_gen7_perfcounter.c +++ b/adreno_gen7_perfcounter.c @@ -1071,10 +1071,10 @@ static const struct adreno_perfcount_group gen7_hwsched_perfcounter_groups GEN7_REGULAR_PERFCOUNTER_GROUP(CP, cp), GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, RBBM, rbbm, 0, gen7_counter_enable, gen7_counter_read), - GEN7_REGULAR_PERFCOUNTER_GROUP(PC, pc), + GEN7_PERFCOUNTER_GROUP(PC, pc, gen7_counter_br_enable, gen7_counter_read), GEN7_PERFCOUNTER_GROUP(VFD, vfd, gen7_hwsched_counter_enable, gen7_counter_read), GEN7_PERFCOUNTER_GROUP(HLSQ, hlsq, gen7_counter_br_enable, gen7_counter_read), - GEN7_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), + GEN7_PERFCOUNTER_GROUP(VPC, vpc, gen7_counter_br_enable, gen7_counter_read), GEN7_REGULAR_PERFCOUNTER_GROUP(CCU, ccu), GEN7_REGULAR_PERFCOUNTER_GROUP(CMP, cmp), GEN7_PERFCOUNTER_GROUP(TSE, tse, gen7_counter_br_enable, gen7_counter_read), @@ -1101,9 +1101,9 @@ static const struct adreno_perfcount_group gen7_hwsched_perfcounter_groups gen7_counter_gmu_perf_enable, gen7_counter_read_norestore), GEN7_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(CP, cp), - GEN7_BV_REGULAR_PERFCOUNTER_GROUP(PC, pc), + GEN7_BV_PERFCOUNTER_GROUP(PC, pc, gen7_counter_bv_enable, gen7_counter_read), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), - GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), + GEN7_BV_PERFCOUNTER_GROUP(VPC, vpc, gen7_counter_bv_enable, gen7_counter_read), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(TP, tp), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(SP, sp), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), @@ -1118,10 +1118,10 @@ static const struct adreno_perfcount_group gen7_9_0_hwsched_perfcounter_groups GEN7_REGULAR_PERFCOUNTER_GROUP(CP, cp), GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, RBBM, rbbm, 0, gen7_counter_enable, gen7_counter_read), - GEN7_REGULAR_PERFCOUNTER_GROUP(PC, pc), + GEN7_PERFCOUNTER_GROUP(PC, pc, gen7_counter_br_enable, gen7_counter_read), GEN7_PERFCOUNTER_GROUP(VFD, vfd, gen7_hwsched_counter_enable, gen7_counter_read), GEN7_PERFCOUNTER_GROUP(HLSQ, hlsq, gen7_counter_br_enable, gen7_counter_read), - GEN7_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), + GEN7_PERFCOUNTER_GROUP(VPC, vpc, gen7_counter_br_enable, gen7_counter_read), GEN7_REGULAR_PERFCOUNTER_GROUP(CCU, ccu), GEN7_REGULAR_PERFCOUNTER_GROUP(CMP, cmp), GEN7_PERFCOUNTER_GROUP(TSE, tse, gen7_counter_br_enable, gen7_counter_read), @@ -1149,8 +1149,9 @@ static const struct adreno_perfcount_group gen7_9_0_hwsched_perfcounter_groups GEN7_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(CP, cp), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(PC, pc), + GEN7_BV_PERFCOUNTER_GROUP(PC, pc, gen7_counter_bv_enable, gen7_counter_read), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), - GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), + GEN7_BV_PERFCOUNTER_GROUP(VPC, vpc, gen7_counter_bv_enable, gen7_counter_read), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(TP, tp), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(SP, sp), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), @@ -1165,10 +1166,10 @@ static const struct adreno_perfcount_group gen7_perfcounter_groups GEN7_REGULAR_PERFCOUNTER_GROUP(CP, cp), GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, RBBM, rbbm, 0, gen7_counter_enable, gen7_counter_read), - GEN7_REGULAR_PERFCOUNTER_GROUP(PC, pc), + GEN7_PERFCOUNTER_GROUP(PC, pc, gen7_counter_br_enable, gen7_counter_read), GEN7_PERFCOUNTER_GROUP(VFD, vfd, gen7_counter_inline_enable, gen7_counter_read), GEN7_PERFCOUNTER_GROUP(HLSQ, hlsq, gen7_counter_br_enable, gen7_counter_read), - GEN7_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), + GEN7_PERFCOUNTER_GROUP(VPC, vpc, gen7_counter_br_enable, gen7_counter_read), GEN7_REGULAR_PERFCOUNTER_GROUP(CCU, ccu), GEN7_REGULAR_PERFCOUNTER_GROUP(CMP, cmp), GEN7_PERFCOUNTER_GROUP(TSE, tse, gen7_counter_br_enable, gen7_counter_read), @@ -1195,9 +1196,9 @@ static const struct adreno_perfcount_group gen7_perfcounter_groups gen7_counter_gmu_perf_enable, gen7_counter_read_norestore), GEN7_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(CP, cp), - GEN7_BV_REGULAR_PERFCOUNTER_GROUP(PC, pc), + GEN7_BV_PERFCOUNTER_GROUP(PC, pc, gen7_counter_bv_enable, gen7_counter_read), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), - GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), + GEN7_BV_PERFCOUNTER_GROUP(VPC, vpc, gen7_counter_bv_enable, gen7_counter_read), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(TP, tp), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(SP, sp), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), From 88071754f39490be2f4b39f7456677d65c682c5e Mon Sep 17 00:00:00 2001 From: Rakesh Naidu Bhaviripudi Date: Tue, 22 Aug 2023 12:32:30 +0530 Subject: [PATCH 0518/1016] kgsl: build: Add change to compile graphics-kernel for holi Add change to compile graphics kernel code for holi. Change-Id: I56906f3c8437733126624bb76b6ac5393977c120 Signed-off-by: Rakesh Naidu Bhaviripudi --- Kbuild | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Kbuild b/Kbuild index 762f33ef94..933d63212b 100644 --- a/Kbuild +++ b/Kbuild @@ -52,6 +52,9 @@ endif ifeq ($(CONFIG_ARCH_QCS405), y) include $(KGSL_PATH)/config/gki_qcs405.conf endif +ifeq ($(CONFIG_ARCH_HOLI), y) + include $(KGSL_PATH)/config/gki_blair.conf +endif ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq From 358f22702b9cfdfb93fd1c06b3aae0f0f7d46707 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 5 Sep 2023 16:17:20 +0530 Subject: [PATCH 0519/1016] kgsl: hwsched: Use global context for inline perfcounter select Raw commands are designed to be used during boot up only. Register a global context (managed by KGSL) with GMU to submit perfcounter select PM4 packets for SP/TP/VFD blocks. Change-Id: Ie6cb8ac5325d323b1e9a48ce8e22af0f019fa1eb Signed-off-by: Kamal Agrawal --- adreno_a6xx.h | 3 - adreno_a6xx_hwsched_hfi.c | 106 +++++++++++++++++++++++++++--- adreno_a6xx_hwsched_hfi.h | 2 + adreno_gen7.h | 3 - adreno_gen7_hwsched_hfi.c | 133 ++++++++++++++++++++++++++++++++++---- adreno_gen7_hwsched_hfi.h | 2 + adreno_hwsched.c | 14 ++++ adreno_hwsched.h | 5 +- kgsl.c | 2 +- kgsl.h | 4 ++ 10 files changed, 244 insertions(+), 30 deletions(-) diff --git a/adreno_a6xx.h b/adreno_a6xx.h index a00f389ebe..48f6345846 100644 --- a/adreno_a6xx.h +++ b/adreno_a6xx.h @@ -167,9 +167,6 @@ struct a6xx_cp_smmu_info { /* Size of the CP_INIT pm4 stream in dwords */ #define A6XX_CP_INIT_DWORDS 11 -/* Size of the perf counter enable pm4 stream in dwords */ -#define A6XX_PERF_COUNTER_ENABLE_DWORDS 3 - #define A6XX_INT_MASK \ ((1 << A6XX_INT_CP_AHB_ERROR) | \ (1 << A6XX_INT_ATB_ASYNCFIFO_OVERFLOW) | \ diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index ea3750413e..ef2ffeef07 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -53,6 +53,10 @@ static struct dq_info { { 3, 11, }, /* RB3 */ }; +static int a6xx_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, uint32_t queue_idx, + uint32_t *msg, u32 size_bytes, struct kgsl_drawobj_cmd *cmdobj, + struct adreno_submit_time *time); + struct a6xx_hwsched_hfi *to_a6xx_hwsched_hfi( struct adreno_device *adreno_dev) { @@ -1377,25 +1381,107 @@ int a6xx_hwsched_cp_init(struct adreno_device *adreno_dev) return ret; } +static int register_global_ctxt(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct hfi_register_ctxt_cmd rcmd = {0}; + struct hfi_context_pointers_cmd pcmd = {0}; + int ret; + + if (hwsched->global_ctxt_gmu_registered) + return 0; + + ret = CMD_MSG_HDR(rcmd, H2F_MSG_REGISTER_CONTEXT); + if (ret) + return ret; + + rcmd.ctxt_id = KGSL_GLOBAL_CTXT_ID; + rcmd.flags = (KGSL_CONTEXT_PRIORITY_HIGH << KGSL_CONTEXT_PRIORITY_SHIFT); + + ret = a6xx_hfi_send_cmd_async(adreno_dev, &rcmd, sizeof(rcmd)); + if (ret) + return ret; + + ret = CMD_MSG_HDR(pcmd, H2F_MSG_CONTEXT_POINTERS); + if (ret) + return ret; + + pcmd.ctxt_id = KGSL_GLOBAL_CTXT_ID; + pcmd.sop_addr = MEMSTORE_ID_GPU_ADDR(device, KGSL_GLOBAL_CTXT_ID, soptimestamp); + pcmd.eop_addr = MEMSTORE_ID_GPU_ADDR(device, KGSL_GLOBAL_CTXT_ID, eoptimestamp); + + ret = a6xx_hfi_send_cmd_async(adreno_dev, &pcmd, sizeof(pcmd)); + if (!ret) + hwsched->global_ctxt_gmu_registered = true; + + return ret; +} + +#define HFI_DSP_IRQ_BASE 2 + +#define DISPQ_IRQ_BIT(_idx) BIT((_idx) + HFI_DSP_IRQ_BASE) + +static int submit_global_ctxt_cmd(struct adreno_device *adreno_dev, u64 gpuaddr, u32 size) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct { + struct hfi_submit_cmd submit_cmd; + struct hfi_issue_ib issue_ib; + } cmd = {0}; + u32 seqnum, cmd_size = sizeof(cmd); + static u32 ts; + int ret = 0; + + cmd.submit_cmd.ctxt_id = KGSL_GLOBAL_CTXT_ID; + cmd.submit_cmd.ts = ++ts; + cmd.submit_cmd.numibs = 1; + + cmd.issue_ib.addr = gpuaddr; + cmd.issue_ib.size = size; + + seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); + cmd.submit_cmd.hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD); + cmd.submit_cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.submit_cmd.hdr, seqnum, cmd_size >> 2); + + ret = a6xx_hfi_dispatch_queue_write(adreno_dev, HFI_DSP_ID_0, + (u32 *)&cmd, cmd_size, NULL, NULL); + /* Send interrupt to GMU to receive the message */ + if (!ret) + gmu_core_regwrite(device, A6XX_GMU_HOST2GMU_INTR_SET, DISPQ_IRQ_BIT(0)); + + return ret; +} + int a6xx_hwsched_counter_inline_enable(struct adreno_device *adreno_dev, const struct adreno_perfcount_group *group, u32 counter, u32 countable) { + struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_perfcount_register *reg = &group->regs[counter]; - u32 val, cmds[A6XX_PERF_COUNTER_ENABLE_DWORDS + 1]; + u32 val, *cmds, count = 0; int ret; + ret = register_global_ctxt(adreno_dev); + if (ret) + goto err; + + ret = adreno_allocate_global(device, &hfi->perfctr_scratch, + PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "perfctr_scratch"); + if (ret) + goto err; + if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) a6xx_perfcounter_update(adreno_dev, reg, false); - cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, HFI_MSG_CMD); + cmds = hfi->perfctr_scratch->hostptr; - cmds[1] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); - cmds[2] = cp_type4_packet(reg->select, 1); - cmds[3] = countable; + cmds[count++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + cmds[count++] = cp_type4_packet(reg->select, 1); + cmds[count++] = countable; - ret = a6xx_hfi_send_cmd_async(adreno_dev, cmds, sizeof(cmds)); + ret = submit_global_ctxt_cmd(adreno_dev, hfi->perfctr_scratch->gpuaddr, count << 2); if (ret) goto err; @@ -1736,6 +1822,9 @@ static int a6xx_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, uint3 /* Ensure packet is written out before proceeding */ wmb(); + if (!cmdobj) + goto done; + a6xx_add_profile_events(adreno_dev, cmdobj, time); /* @@ -1746,6 +1835,7 @@ static int a6xx_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, uint3 */ adreno_profile_submit_time(time); +done: trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg)); hfi_update_write_idx(&hdr->write_index, write); @@ -1753,10 +1843,6 @@ static int a6xx_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, uint3 return 0; } -#define HFI_DSP_IRQ_BASE 2 - -#define DISPQ_IRQ_BIT(_idx) BIT((_idx) + HFI_DSP_IRQ_BASE) - int a6xx_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { diff --git a/adreno_a6xx_hwsched_hfi.h b/adreno_a6xx_hwsched_hfi.h index 56cc8a5937..8e508723fc 100644 --- a/adreno_a6xx_hwsched_hfi.h +++ b/adreno_a6xx_hwsched_hfi.h @@ -24,6 +24,8 @@ struct a6xx_hwsched_hfi { struct kgsl_memdesc *big_ib; /** @big_ib_recurring: GMU buffer to hold big recurring IBs */ struct kgsl_memdesc *big_ib_recurring; + /** @perfctr_scratch: Buffer to hold perfcounter PM4 commands */ + struct kgsl_memdesc *perfctr_scratch; /** @msg_mutex: Mutex for accessing the msgq */ struct mutex msgq_mutex; }; diff --git a/adreno_gen7.h b/adreno_gen7.h index 74f08831f4..0a4b03ff50 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -196,9 +196,6 @@ struct gen7_cp_smmu_info { /* Size of the CP_INIT pm4 stream in dwords */ #define GEN7_CP_INIT_DWORDS 10 -/* Size of the perf counter enable pm4 stream in dwords */ -#define GEN7_PERF_COUNTER_ENABLE_DWORDS 3 - #define GEN7_INT_MASK \ ((1 << GEN7_INT_AHBERROR) | \ (1 << GEN7_INT_ATBASYNCFIFOOVERFLOW) | \ diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index f6378fc718..19d3a4f9e3 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -3019,12 +3019,12 @@ static void populate_ibs(struct adreno_device *adreno_dev, #define DISPQ_IRQ_BIT(_idx) BIT((_idx) + HFI_DSP_IRQ_BASE) int gen7_gmu_context_queue_write(struct adreno_device *adreno_dev, - struct adreno_context *drawctxt, u32 *msg, u32 size_bytes, + struct kgsl_memdesc *gmu_context_queue, u32 *msg, u32 size_bytes, struct kgsl_drawobj *drawobj, struct adreno_submit_time *time) { - struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; + struct gmu_context_queue_header *hdr = gmu_context_queue->hostptr; const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); - u32 *queue = drawctxt->gmu_context_queue.hostptr + sizeof(*hdr); + u32 *queue = gmu_context_queue->hostptr + sizeof(*hdr); u32 i, empty_space, write_idx = hdr->write_index, read_idx = hdr->read_index; u32 size_dwords = size_bytes >> 2; u32 align_size = ALIGN(size_dwords, SZ_4); @@ -3055,6 +3055,9 @@ int gen7_gmu_context_queue_write(struct adreno_device *adreno_dev, /* Ensure packet is written out before proceeding */ wmb(); + if (!drawobj) + goto done; + if (drawobj->type & SYNCOBJ_TYPE) { struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); @@ -3226,8 +3229,8 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_SYNCOBJ, HFI_MSG_CMD); cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); - return gen7_gmu_context_queue_write(adreno_dev, drawctxt, (u32 *)cmd, cmd_sizebytes, - drawobj, NULL); + return gen7_gmu_context_queue_write(adreno_dev, &drawctxt->gmu_context_queue, + (u32 *)cmd, cmd_sizebytes, drawobj, NULL); } int gen7_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_dev, @@ -3719,6 +3722,9 @@ static int gen7_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 q /* Ensure packet is written out before proceeding */ wmb(); + if (!cmdobj) + goto done; + gen7_add_profile_events(adreno_dev, cmdobj, time); /* @@ -3729,6 +3735,7 @@ static int gen7_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 q */ adreno_profile_submit_time(time); +done: trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg)); hfi_update_write_idx(&hdr->write_index, write); @@ -3825,7 +3832,7 @@ skipib: if (adreno_hwsched_context_queue_enabled(adreno_dev)) ret = gen7_gmu_context_queue_write(adreno_dev, - drawctxt, (u32 *)cmd, cmd_sizebytes, drawobj, &time); + &drawctxt->gmu_context_queue, (u32 *)cmd, cmd_sizebytes, drawobj, &time); else ret = gen7_hfi_dispatch_queue_write(adreno_dev, HFI_DSP_ID_0 + drawobj->context->gmu_dispatch_queue, @@ -4124,26 +4131,128 @@ void gen7_hwsched_context_destroy(struct adreno_device *adreno_dev, gen7_free_gmu_block(to_gen7_gmu(adreno_dev), &drawctxt->gmu_hw_fence_queue); } +static int register_global_ctxt(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct hfi_register_ctxt_cmd rcmd = {0}; + struct hfi_context_pointers_cmd pcmd = {0}; + int ret; + + if (hwsched->global_ctxt_gmu_registered) + return 0; + + if (adreno_hwsched_context_queue_enabled(adreno_dev) && !hwsched->global_ctxtq.hostptr) { + struct gmu_context_queue_header *hdr; + + ret = gen7_alloc_gmu_kernel_block(to_gen7_gmu(adreno_dev), &hwsched->global_ctxtq, + SZ_4K, GMU_NONCACHED_KERNEL, IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV); + if (ret) { + memset(&hwsched->global_ctxtq, 0x0, sizeof(hwsched->global_ctxtq)); + return ret; + } + + hdr = hwsched->global_ctxtq.hostptr; + hdr->start_addr = hwsched->global_ctxtq.gmuaddr + sizeof(*hdr); + hdr->queue_size = (hwsched->global_ctxtq.size - sizeof(*hdr)) >> 2; + } + + ret = CMD_MSG_HDR(rcmd, H2F_MSG_REGISTER_CONTEXT); + if (ret) + return ret; + + rcmd.ctxt_id = KGSL_GLOBAL_CTXT_ID; + rcmd.flags = (KGSL_CONTEXT_PRIORITY_HIGH << KGSL_CONTEXT_PRIORITY_SHIFT); + + ret = gen7_hfi_send_cmd_async(adreno_dev, &rcmd, sizeof(rcmd)); + if (ret) + return ret; + + ret = CMD_MSG_HDR(pcmd, H2F_MSG_CONTEXT_POINTERS); + if (ret) + return ret; + + pcmd.ctxt_id = KGSL_GLOBAL_CTXT_ID; + pcmd.sop_addr = MEMSTORE_ID_GPU_ADDR(device, KGSL_GLOBAL_CTXT_ID, soptimestamp); + pcmd.eop_addr = MEMSTORE_ID_GPU_ADDR(device, KGSL_GLOBAL_CTXT_ID, eoptimestamp); + + if (adreno_hwsched_context_queue_enabled(adreno_dev)) + pcmd.gmu_context_queue_addr = hwsched->global_ctxtq.gmuaddr; + + ret = gen7_hfi_send_cmd_async(adreno_dev, &pcmd, sizeof(pcmd)); + if (!ret) + hwsched->global_ctxt_gmu_registered = true; + + return ret; +} + +static int submit_global_ctxt_cmd(struct adreno_device *adreno_dev, u64 gpuaddr, u32 size) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct { + struct hfi_submit_cmd submit_cmd; + struct hfi_issue_ib issue_ib; + } cmd = {0}; + u32 seqnum, cmd_size = sizeof(cmd); + static u32 ts; + int ret; + + cmd.submit_cmd.ctxt_id = KGSL_GLOBAL_CTXT_ID; + cmd.submit_cmd.ts = ++ts; + cmd.submit_cmd.numibs = 1; + + cmd.issue_ib.addr = gpuaddr; + cmd.issue_ib.size = size; + + seqnum = atomic_inc_return(&hwsched->submission_seqnum); + cmd.submit_cmd.hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD); + cmd.submit_cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.submit_cmd.hdr, seqnum, cmd_size >> 2); + + if (adreno_hwsched_context_queue_enabled(adreno_dev)) + ret = gen7_gmu_context_queue_write(adreno_dev, + &hwsched->global_ctxtq, (u32 *)&cmd, cmd_size, NULL, NULL); + else + ret = gen7_hfi_dispatch_queue_write(adreno_dev, HFI_DSP_ID_0, + (u32 *)&cmd, cmd_size, NULL, NULL); + + /* Send interrupt to GMU to receive the message */ + if (!ret) + gmu_core_regwrite(device, GEN7_GMU_HOST2GMU_INTR_SET, DISPQ_IRQ_BIT(0)); + + return ret; +} + int gen7_hwsched_counter_inline_enable(struct adreno_device *adreno_dev, const struct adreno_perfcount_group *group, u32 counter, u32 countable) { + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_perfcount_register *reg = &group->regs[counter]; - u32 val, cmds[GEN7_PERF_COUNTER_ENABLE_DWORDS + 1]; + u32 val, *cmds, count = 0; int ret; + ret = register_global_ctxt(adreno_dev); + if (ret) + goto err; + + ret = adreno_allocate_global(device, &hfi->perfctr_scratch, + PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "perfctr_scratch"); + if (ret) + goto err; + if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) gen7_perfcounter_update(adreno_dev, reg, false, FIELD_PREP(GENMASK(13, 12), PIPE_NONE)); - cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, HFI_MSG_CMD); + cmds = hfi->perfctr_scratch->hostptr; - cmds[1] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); - cmds[2] = cp_type4_packet(reg->select, 1); - cmds[3] = countable; + cmds[count++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + cmds[count++] = cp_type4_packet(reg->select, 1); + cmds[count++] = countable; - ret = gen7_hfi_send_cmd_async(adreno_dev, cmds, sizeof(cmds)); + ret = submit_global_ctxt_cmd(adreno_dev, hfi->perfctr_scratch->gpuaddr, count << 2); if (ret) goto err; diff --git a/adreno_gen7_hwsched_hfi.h b/adreno_gen7_hwsched_hfi.h index 695d11e2a2..80afb5798c 100644 --- a/adreno_gen7_hwsched_hfi.h +++ b/adreno_gen7_hwsched_hfi.h @@ -55,6 +55,8 @@ struct gen7_hwsched_hfi { struct kgsl_memdesc *big_ib; /** @big_ib_recurring: GMU buffer to hold big recurring IBs */ struct kgsl_memdesc *big_ib_recurring; + /** @perfctr_scratch: Buffer to hold perfcounter PM4 commands */ + struct kgsl_memdesc *perfctr_scratch; /** @msg_mutex: Mutex for accessing the msgq */ struct mutex msgq_mutex; struct { diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 5703eb94f7..48660a2476 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1325,6 +1325,9 @@ static void adreno_hwsched_dispatcher_close(struct adreno_device *adreno_dev) kfree(hwsched->ctxt_bad); adreno_hwsched_deregister_hw_fence(adreno_dev); + + if (hwsched->global_ctxtq.hostptr) + kgsl_sharedmem_free(&hwsched->global_ctxtq); } static void force_retire_timestamp(struct kgsl_device *device, @@ -2170,10 +2173,21 @@ static int unregister_context(int id, void *ptr, void *data) void adreno_hwsched_unregister_contexts(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; read_lock(&device->context_lock); idr_for_each(&device->context_idr, unregister_context, NULL); read_unlock(&device->context_lock); + + if (hwsched->global_ctxtq.hostptr) { + struct gmu_context_queue_header *header = hwsched->global_ctxtq.hostptr; + + header->read_index = header->write_index; + /* This is to make sure GMU sees the correct indices after recovery */ + mb(); + } + + hwsched->global_ctxt_gmu_registered = false; } static int hwsched_idle(struct adreno_device *adreno_dev) diff --git a/adreno_hwsched.h b/adreno_hwsched.h index ef0c457359..378c95ae11 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -115,7 +115,10 @@ struct adreno_hwsched { * dispatch queues */ atomic_t submission_seqnum; - + /** @global_ctxtq: Memory descriptor for global context queue */ + struct kgsl_memdesc global_ctxtq; + /** @global_ctxt_gmu_registered: Whether global context is registered with gmu */ + bool global_ctxt_gmu_registered; }; /* diff --git a/kgsl.c b/kgsl.c index f84f3e7ae6..ea20ed7e06 100644 --- a/kgsl.c +++ b/kgsl.c @@ -630,7 +630,7 @@ static int _kgsl_get_context_id(struct kgsl_device *device) write_lock(&device->context_lock); /* Allocate the slot but don't put a pointer in it yet */ id = idr_alloc(&device->context_idr, NULL, 1, - KGSL_MEMSTORE_MAX, GFP_NOWAIT); + KGSL_GLOBAL_CTXT_ID, GFP_NOWAIT); write_unlock(&device->context_lock); idr_preload_end(); diff --git a/kgsl.h b/kgsl.h index d854ca6cc1..293225dae0 100644 --- a/kgsl.h +++ b/kgsl.h @@ -70,6 +70,10 @@ ((dev)->memstore->gpuaddr + \ KGSL_MEMSTORE_OFFSET(((rb)->id + KGSL_MEMSTORE_MAX), field)) +#define KGSL_CONTEXT_PRIORITY_HIGH 0 +/* Last context id is reserved for global context */ +#define KGSL_GLOBAL_CTXT_ID (KGSL_MEMSTORE_MAX - 1) + /* * SCRATCH MEMORY: The scratch memory is one page worth of data that * is mapped into the GPU. This allows for some 'shared' data between From 26cb5e1f9dd29aef7f12d7b3cd4099857d6553f6 Mon Sep 17 00:00:00 2001 From: Amit Kushwaha Date: Thu, 21 Sep 2023 15:28:51 +0530 Subject: [PATCH 0520/1016] msm: kgsl: Skip CX GDSC notifier register for unsupported targets Check CX GDSC support before registering the notifier for it. Change-Id: If96f7e31c9b6d11e976473a04a23896fc4e95b63 Signed-off-by: Amit Kushwaha --- kgsl_pwrctrl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index d23236a16f..2099e9b2ae 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1438,8 +1438,12 @@ int kgsl_register_gdsc_notifier(struct kgsl_device *device) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; - pwr->cx_gdsc_nb.notifier_call = kgsl_cx_gdsc_event; - return devm_regulator_register_notifier(pwr->cx_gdsc, &pwr->cx_gdsc_nb); + if (!IS_ERR_OR_NULL(pwr->cx_gdsc)) { + pwr->cx_gdsc_nb.notifier_call = kgsl_cx_gdsc_event; + return devm_regulator_register_notifier(pwr->cx_gdsc, &pwr->cx_gdsc_nb); + } + + return 0; } static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) From 412ecebdd9569e79e244ca66dfde45580db40b9f Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Thu, 31 Aug 2023 17:25:43 +0800 Subject: [PATCH 0521/1016] msm: kgsl: Enable hibernation for kalama Enable QCOM_KGSL_HIBERNATION config to support hibernation in kgsl for kalama chipset. Change-Id: I63f83a45084aa33272c7e3ca78d2c48e0c6e49d0 Signed-off-by: zhezhe song Signed-off-by: Abhishek Barman --- config/gki_kalama.conf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/config/gki_kalama.conf b/config/gki_kalama.conf index 8314658770..538e8f760c 100644 --- a/config/gki_kalama.conf +++ b/config/gki_kalama.conf @@ -6,6 +6,10 @@ CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT = y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" +ifneq ($(CONFIG_HIBERNATION),) + CONFIG_QCOM_KGSL_HIBERNATION = y +endif + ifneq ($(CONFIG_CORESIGHT),) CONFIG_QCOM_KGSL_CORESIGHT = y endif @@ -18,6 +22,10 @@ ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=1 \ -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" +ifneq ($(CONFIG_HIBERNATION),) + ccflags-y += -DCONFIG_QCOM_KGSL_HIBERNATION=1 +endif + ifneq ($(CONFIG_CORESIGHT),) ccflags-y += -DCONFIG_QCOM_KGSL_CORESIGHT=1 endif From f7acc6c74ddb12fd3cf9d944061b9f372785cc96 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Wed, 2 Feb 2022 20:04:25 +0530 Subject: [PATCH 0522/1016] msm: kgsl: Add freq limiter interrupt handler Add functionality to handle freq limiter interrupt. Since this interrupt is not available on some devices, use the platform_get_irq_byname_optional API to suppress error messages if it is not present. Change-Id: Idcff660c5fca9b64930fbf35911210934f661c96 Signed-off-by: Pankaj Gupta Signed-off-by: Harshitha Sai Neelati Signed-off-by: Mohammed Mirza Mandayappurath Manzoor Signed-off-by: Lynus Vaz --- adreno.c | 32 ++++++++++++++++++++++++++++---- kgsl.c | 21 +++++++++++++++++++++ kgsl.h | 3 +++ kgsl_device.h | 6 ++++++ kgsl_pwrctrl.c | 4 ++++ 5 files changed, 62 insertions(+), 4 deletions(-) diff --git a/adreno.c b/adreno.c index fb8b120ce4..1603294836 100644 --- a/adreno.c +++ b/adreno.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -419,6 +420,23 @@ static irqreturn_t adreno_irq_handler(int irq, void *data) return ret; } +static irqreturn_t adreno_freq_limiter_irq_handler(int irq, void *data) +{ + struct kgsl_device *device = data; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + dev_err_ratelimited(device->dev, + "GPU req freq %u from prev freq %u unsupported for speed_bin: %d, soc_code: 0x%x\n", + pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq, + pwr->pwrlevels[pwr->previous_pwrlevel].gpu_freq, + device->speed_bin, + device->soc_code); + + reset_control_reset(device->freq_limiter_irq_clear); + + return IRQ_HANDLED; +} + irqreturn_t adreno_irq_callbacks(struct adreno_device *adreno_dev, const struct adreno_irq_funcs *funcs, u32 status) { @@ -766,7 +784,6 @@ static int adreno_of_get_pwrlevels(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct device_node *node, *child; int feature_code, pcode; - u32 soc_code; node = of_find_node_by_name(parent, "qcom,gpu-pwrlevel-bins"); if (node == NULL) @@ -776,7 +793,8 @@ static int adreno_of_get_pwrlevels(struct adreno_device *adreno_dev, pcode = (feature_code >= SOCINFO_FC_Y0 && feature_code < SOCINFO_FC_INT_RESERVE) ? max_t(int, socinfo_get_pcode(), SOCINFO_PCODE_UNKNOWN) : SOCINFO_PCODE_UNKNOWN; - soc_code = FIELD_PREP(GENMASK(31, 16), pcode) | FIELD_PREP(GENMASK(15, 0), feature_code); + device->soc_code = FIELD_PREP(GENMASK(31, 16), pcode) | + FIELD_PREP(GENMASK(15, 0), feature_code); for_each_child_of_node(node, child) { bool match = false; @@ -805,7 +823,7 @@ static int adreno_of_get_pwrlevels(struct adreno_device *adreno_dev, for (i = 0; i < num_codes; i++) { if (!of_property_read_u32_index(child, "qcom,sku-codes", i, &sku_code) && - (sku_code == 0 || soc_code == sku_code)) { + (sku_code == 0 || device->soc_code == sku_code)) { match = true; break; } @@ -837,7 +855,7 @@ static int adreno_of_get_pwrlevels(struct adreno_device *adreno_dev, dev_err(&device->pdev->dev, "No match for speed_bin:%d and soc_code:0x%x\n", - device->speed_bin, soc_code); + device->speed_bin, device->soc_code); return -ENODEV; } @@ -1290,6 +1308,12 @@ int adreno_device_probe(struct platform_device *pdev, device->pwrctrl.interrupt_num = status; + device->freq_limiter_intr_num = kgsl_request_irq_optional(pdev, "freq_limiter_irq", + adreno_freq_limiter_irq_handler, device); + + device->freq_limiter_irq_clear = + devm_reset_control_get(&pdev->dev, "freq_limiter_irq_clear"); + status = kgsl_device_platform_probe(device); if (status) goto err_unbind; diff --git a/kgsl.c b/kgsl.c index 242df6c150..388ff6e136 100644 --- a/kgsl.c +++ b/kgsl.c @@ -5047,6 +5047,27 @@ int kgsl_request_irq(struct platform_device *pdev, const char *name, return num; } +int kgsl_request_irq_optional(struct platform_device *pdev, const char *name, + irq_handler_t handler, void *data) +{ + int ret, num = platform_get_irq_byname_optional(pdev, name); + + if (num < 0) + return num; + + ret = devm_request_irq(&pdev->dev, num, handler, IRQF_TRIGGER_HIGH, + name, data); + + if (ret) { + dev_err(&pdev->dev, "Unable to get interrupt %s: %d\n", + name, ret); + return ret; + } + + disable_irq(num); + return num; +} + int kgsl_of_property_read_ddrtype(struct device_node *node, const char *base, u32 *ptr) { diff --git a/kgsl.h b/kgsl.h index d854ca6cc1..243279f349 100644 --- a/kgsl.h +++ b/kgsl.h @@ -561,6 +561,9 @@ enum kgsl_mmutype kgsl_mmu_get_mmutype(struct kgsl_device *device); int kgsl_request_irq(struct platform_device *pdev, const char *name, irq_handler_t handler, void *data); +int kgsl_request_irq_optional(struct platform_device *pdev, const char *name, + irq_handler_t handler, void *data); + int __init kgsl_core_init(void); void kgsl_core_exit(void); diff --git a/kgsl_device.h b/kgsl_device.h index 519979e0e1..f1687b305e 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -302,6 +302,8 @@ struct kgsl_device { rwlock_t event_groups_lock; /** @speed_bin: Speed bin for the GPU device if applicable */ u32 speed_bin; + /** @soc_code: Identifier containing product and feature code */ + u32 soc_code; /** @gmu_fault: Set when a gmu or rgmu fault is encountered */ bool gmu_fault; /** @regmap: GPU register map */ @@ -336,6 +338,10 @@ struct kgsl_device { unsigned long idle_jiffies; /** @dump_all_ibs: Whether to dump all ibs in snapshot */ bool dump_all_ibs; + /** @freq_limiter_irq_clear: reset controller to clear freq limiter irq */ + struct reset_control *freq_limiter_irq_clear; + /** @freq_limiter_intr_num: The interrupt number for freq limiter */ + int freq_limiter_intr_num; }; #define KGSL_MMU_DEVICE(_mmu) \ diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index d23236a16f..9059cab200 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1482,11 +1482,15 @@ void kgsl_pwrctrl_irq(struct kgsl_device *device, bool state) &pwr->power_flags)) { trace_kgsl_irq(device, state); enable_irq(pwr->interrupt_num); + if (device->freq_limiter_intr_num > 0) + enable_irq(device->freq_limiter_intr_num); } } else { if (test_and_clear_bit(KGSL_PWRFLAGS_IRQ_ON, &pwr->power_flags)) { trace_kgsl_irq(device, state); + if (device->freq_limiter_intr_num > 0) + disable_irq(device->freq_limiter_intr_num); if (in_interrupt()) disable_irq_nosync(pwr->interrupt_num); else From dc459fb6b45365cf18cc4da4bc387332839238ff Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Thu, 7 Sep 2023 11:49:03 -0700 Subject: [PATCH 0523/1016] Revert "msm: kgsl: Loop through the msg queue for start msg" This reverts commit 5886928b1a2123fcddd54ac768e4d683a026fa72. The irq clear write in the loop could potentially be posted after a new valid new interrupt by GMU, causing us to timeout waiting for interrupt. Timeout could also occur if GMU raises the interrupt a bit late or if cpu is running slower. Change-Id: I30e8a75a06d71d70cdeb129b577b6efb1ef9ee2c Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- adreno_a6xx_hwsched_hfi.c | 111 +++++++++++++++++------------------ adreno_gen7_hwsched_hfi.c | 118 ++++++++++++++++++-------------------- 2 files changed, 107 insertions(+), 122 deletions(-) diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index ea3750413e..364e3d90a8 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -978,7 +978,7 @@ static int send_start_msg(struct adreno_device *adreno_dev) struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 seqnum; - int rc, read_size; + int rc; struct hfi_start_cmd cmd; u32 rcvd[MAX_RCVD_SIZE]; struct pending_cmd pending_ack = {0}; @@ -996,66 +996,59 @@ static int send_start_msg(struct adreno_device *adreno_dev) if (rc) return rc; - /* - * This will never be an infinite loop. We break out either when we receive the - * HFI_MSG_ACK or when we timeout waiting for the ack. - */ - while (true) { - rc = gmu_core_timed_poll_check(device, A6XX_GMU_GMU2HOST_INTR_INFO, - HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK); +poll: + rc = gmu_core_timed_poll_check(device, A6XX_GMU_GMU2HOST_INTR_INFO, + HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK); - if (rc) { - dev_err(&gmu->pdev->dev, - "Timed out processing MSG_START seqnum: %d\n", - seqnum); - gmu_core_fault_snapshot(device); - return rc; - } - - /* Clear the interrupt */ - gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, HFI_IRQ_MSGQ_MASK); - - read_size = a6xx_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)); - - if (read_size <= 0) { - dev_err(&gmu->pdev->dev, "MSG_START: no payload\n"); - gmu_core_fault_snapshot(device); - return -EINVAL; - } - /* Loop through the msg queue to read all messages */ - while (read_size > 0) { - switch (MSG_HDR_GET_ID(rcvd[0])) { - case F2H_MSG_MEM_ALLOC: - rc = mem_alloc_reply(adreno_dev, rcvd); - break; - case F2H_MSG_GMU_CNTR_REGISTER: - rc = gmu_cntr_register_reply(adreno_dev, rcvd); - break; - default: - if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { - rc = a6xx_receive_ack_cmd(gmu, rcvd, &pending_ack); - if (rc) - return rc; - - return check_ack_failure(adreno_dev, &pending_ack); - } - - dev_err(&gmu->pdev->dev, - "MSG_START: unexpected response id:%d, type:%d\n", - MSG_HDR_GET_ID(rcvd[0]), - MSG_HDR_GET_TYPE(rcvd[0])); - gmu_core_fault_snapshot(device); - return -EINVAL; - } - - if (rc) - return rc; - - /* Clear the interrupt before checking the queue again */ - gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, HFI_IRQ_MSGQ_MASK); - read_size = a6xx_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)); - } + if (rc) { + dev_err(&gmu->pdev->dev, + "Timed out processing MSG_START seqnum: %d\n", + seqnum); + gmu_core_fault_snapshot(device); + return rc; } + + /* Clear the interrupt */ + gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, + HFI_IRQ_MSGQ_MASK); + + if (a6xx_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)) <= 0) { + dev_err(&gmu->pdev->dev, "MSG_START: no payload\n"); + gmu_core_fault_snapshot(device); + return -EINVAL; + } + + if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { + rc = a6xx_receive_ack_cmd(gmu, rcvd, &pending_ack); + if (rc) + return rc; + + return check_ack_failure(adreno_dev, &pending_ack); + } + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_MEM_ALLOC) { + rc = mem_alloc_reply(adreno_dev, rcvd); + if (rc) + return rc; + + goto poll; + } + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_GMU_CNTR_REGISTER) { + rc = gmu_cntr_register_reply(adreno_dev, rcvd); + if (rc) + return rc; + goto poll; + } + + dev_err(&gmu->pdev->dev, + "MSG_START: unexpected response id:%d, type:%d\n", + MSG_HDR_GET_ID(rcvd[0]), + MSG_HDR_GET_TYPE(rcvd[0])); + + gmu_core_fault_snapshot(device); + + return rc; } static void reset_hfi_mem_records(struct adreno_device *adreno_dev) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index f6378fc718..099eed72fe 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1778,14 +1778,14 @@ static int send_start_msg(struct adreno_device *adreno_dev) struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 seqnum; - int read_size, rc = 0; + int ret, rc = 0; struct hfi_start_cmd cmd; u32 rcvd[MAX_RCVD_SIZE]; struct pending_cmd pending_ack = {0}; - rc = CMD_MSG_HDR(cmd, H2F_MSG_START); - if (rc) - return rc; + ret = CMD_MSG_HDR(cmd, H2F_MSG_START); + if (ret) + return ret; seqnum = atomic_inc_return(&gmu->hfi.seqnum); cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); @@ -1796,67 +1796,59 @@ static int send_start_msg(struct adreno_device *adreno_dev) if (rc) return rc; - /* - * This will never be an infinite loop. We break out either when we receive the - * HFI_MSG_ACK or when we timeout waiting for the ack. - */ - while (true) { - rc = gmu_core_timed_poll_check(device, GEN7_GMU_GMU2HOST_INTR_INFO, - HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK); +poll: + rc = gmu_core_timed_poll_check(device, GEN7_GMU_GMU2HOST_INTR_INFO, + HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK); - if (rc) { - dev_err(&gmu->pdev->dev, - "Timed out processing MSG_START seqnum: %d\n", - seqnum); - gmu_core_fault_snapshot(device); - return rc; - } - - /* Clear the interrupt */ - gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, HFI_IRQ_MSGQ_MASK); - - read_size = gen7_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)); - - if (read_size <= 0) { - dev_err(&gmu->pdev->dev, "MSG_START: no payload\n"); - gmu_core_fault_snapshot(device); - return -EINVAL; - } - - /* Loop through the msg queue to read all messages */ - while (read_size > 0) { - switch (MSG_HDR_GET_ID(rcvd[0])) { - case F2H_MSG_MEM_ALLOC: - rc = mem_alloc_reply(adreno_dev, rcvd); - break; - case F2H_MSG_GMU_CNTR_REGISTER: - rc = gmu_cntr_register_reply(adreno_dev, rcvd); - break; - default: - if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { - rc = gen7_receive_ack_cmd(gmu, rcvd, &pending_ack); - if (rc) - return rc; - return check_ack_failure(adreno_dev, &pending_ack); - } - - dev_err(&gmu->pdev->dev, - "MSG_START: unexpected response id:%d, type:%d\n", - MSG_HDR_GET_ID(rcvd[0]), - MSG_HDR_GET_TYPE(rcvd[0])); - - gmu_core_fault_snapshot(device); - return -EINVAL; - } - - if (rc) - return rc; - - /* Clear the interrupt before checking the queue again */ - gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, HFI_IRQ_MSGQ_MASK); - read_size = gen7_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)); - } + if (rc) { + dev_err(&gmu->pdev->dev, + "Timed out processing MSG_START seqnum: %d\n", + seqnum); + gmu_core_fault_snapshot(device); + return rc; } + + /* Clear the interrupt */ + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, + HFI_IRQ_MSGQ_MASK); + + if (gen7_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)) <= 0) { + dev_err(&gmu->pdev->dev, "MSG_START: no payload\n"); + gmu_core_fault_snapshot(device); + return -EINVAL; + } + + if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { + rc = gen7_receive_ack_cmd(gmu, rcvd, &pending_ack); + if (rc) + return rc; + + return check_ack_failure(adreno_dev, &pending_ack); + } + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_MEM_ALLOC) { + rc = mem_alloc_reply(adreno_dev, rcvd); + if (rc) + return rc; + + goto poll; + } + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_GMU_CNTR_REGISTER) { + rc = gmu_cntr_register_reply(adreno_dev, rcvd); + if (rc) + return rc; + goto poll; + } + + dev_err(&gmu->pdev->dev, + "MSG_START: unexpected response id:%d, type:%d\n", + MSG_HDR_GET_ID(rcvd[0]), + MSG_HDR_GET_TYPE(rcvd[0])); + + gmu_core_fault_snapshot(device); + + return rc; } static void reset_hfi_mem_records(struct adreno_device *adreno_dev) From 95e38c214c5895c1018fb85848bfb05c25e49aa7 Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Thu, 5 Oct 2023 14:16:53 +0530 Subject: [PATCH 0524/1016] msm: kgsl: Update hardcoded path to a BOARD_COMMON_DIR build variable Some targets use a different location for the build scripts and set the BOARD_COMMON_DIR build variable accordingly. If this variable is set, pick up the Build_external_kernelmodule.mk makefile from this location. Change-Id: I632cd5b8e3a603c163118220e2f0938da8f788a8 Signed-off-by: Abhishek Barman --- Android.mk | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Android.mk b/Android.mk index c466510fcd..1511d9f38f 100644 --- a/Android.mk +++ b/Android.mk @@ -23,7 +23,11 @@ include $(CLEAR_VARS) # This makefile is only for DLKM ifneq ($(findstring vendor,$(LOCAL_PATH)),) -DLKM_DIR := device/qcom/common/dlkm +ifeq ($(BOARD_COMMON_DIR),) + BOARD_COMMON_DIR := device/qcom/common +endif + +DLKM_DIR := $(BOARD_COMMON_DIR)/dlkm KBUILD_OPTIONS += BOARD_PLATFORM=$(TARGET_BOARD_PLATFORM) KBUILD_OPTIONS += $(KGSL_SELECT) From 7739a43e536745d96b188087afa5b0ae9c82203f Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Tue, 3 Oct 2023 15:15:10 -0700 Subject: [PATCH 0525/1016] kgsl: Disable QCOM_KGSL_USE_SHMEM for pineapple QCOM_KGSL_PROCESS_RECLAIM dependencies are not present in kernel-6.3. Hence disable QCOM_KGSL_USE_SHMEM and QCOM_KGSL_PROCESS_RECLAIM. Change-Id: I99d97bb544e1c2f9c3f25e61ec8c4fb9148c4b71 Signed-off-by: Hareesh Gundu --- config/gki_pineapple.conf | 10 ---------- config/pineapple_consolidate_gpuconf | 2 -- config/pineapple_gki_gpuconf | 2 -- 3 files changed, 14 deletions(-) diff --git a/config/gki_pineapple.conf b/config/gki_pineapple.conf index 3acecdd5ae..882ef0e387 100644 --- a/config/gki_pineapple.conf +++ b/config/gki_pineapple.conf @@ -11,11 +11,6 @@ ifneq ($(CONFIG_CORESIGHT),) CONFIG_QCOM_KGSL_CORESIGHT = y endif -ifneq ($(CONFIG_SHMEM),) - CONFIG_QCOM_KGSL_USE_SHMEM = y - CONFIG_QCOM_KGSL_PROCESS_RECLAIM = y -endif - ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ @@ -28,8 +23,3 @@ ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ ifneq ($(CONFIG_CORESIGHT),) ccflags-y += -DCONFIG_QCOM_KGSL_CORESIGHT=1 endif - -ifneq ($(CONFIG_SHMEM),) - ccflags-y += -DCONFIG_QCOM_KGSL_USE_SHMEM=1 \ - -DCONFIG_QCOM_KGSL_PROCESS_RECLAIM=1 -endif diff --git a/config/pineapple_consolidate_gpuconf b/config/pineapple_consolidate_gpuconf index dc86e13fdb..d58575d748 100644 --- a/config/pineapple_consolidate_gpuconf +++ b/config/pineapple_consolidate_gpuconf @@ -9,5 +9,3 @@ CONFIG_QCOM_KGSL_SORT_POOL=y CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" -CONFIG_QCOM_KGSL_USE_SHMEM=y -CONFIG_QCOM_KGSL_PROCESS_RECLAIM=y diff --git a/config/pineapple_gki_gpuconf b/config/pineapple_gki_gpuconf index dc86e13fdb..d58575d748 100644 --- a/config/pineapple_gki_gpuconf +++ b/config/pineapple_gki_gpuconf @@ -9,5 +9,3 @@ CONFIG_QCOM_KGSL_SORT_POOL=y CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" -CONFIG_QCOM_KGSL_USE_SHMEM=y -CONFIG_QCOM_KGSL_PROCESS_RECLAIM=y From 281fff49e4f6c55e956b0ca052015d3410f25330 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Thu, 5 Oct 2023 16:09:05 -0700 Subject: [PATCH 0526/1016] kgsl: build: Fix makefile syntax issue Replace tabs with the sapces to fix makefile syntax. Change-Id: Iaa9dc3e67d374bcf735cf913e76aa65facdd3366 Signed-off-by: Hareesh Gundu --- Android.mk | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Android.mk b/Android.mk index c466510fcd..06f87671ed 100644 --- a/Android.mk +++ b/Android.mk @@ -1,10 +1,10 @@ ifeq ($(TARGET_USES_QMAA),true) - KGSL_ENABLED := false - ifeq ($(TARGET_USES_QMAA_OVERRIDE_GFX),true) - KGSL_ENABLED := true - endif # TARGET_USES_QMAA_OVERRIDE_GFX + KGSL_ENABLED := false + ifeq ($(TARGET_USES_QMAA_OVERRIDE_GFX),true) + KGSL_ENABLED := true + endif # TARGET_USES_QMAA_OVERRIDE_GFX else - KGSL_ENABLED := true + KGSL_ENABLED := true endif # TARGET_USES_QMAA ifeq ($(ENABLE_HYP), true) @@ -29,7 +29,7 @@ KBUILD_OPTIONS += BOARD_PLATFORM=$(TARGET_BOARD_PLATFORM) KBUILD_OPTIONS += $(KGSL_SELECT) KBUILD_OPTIONS += MODNAME=msm_kgsl ifeq ($(TARGET_BOARD_PLATFORM), pineapple) - KBUILD_OPTIONS += KBUILD_EXTRA_SYMBOLS+=$(PWD)/$(call intermediates-dir-for,DLKM,hw-fence-module-symvers)/Module.symvers + KBUILD_OPTIONS += KBUILD_EXTRA_SYMBOLS+=$(PWD)/$(call intermediates-dir-for,DLKM,hw-fence-module-symvers)/Module.symvers endif include $(CLEAR_VARS) @@ -42,8 +42,8 @@ LOCAL_MODULE_DEBUG_ENABLE := true LOCAL_MODULE_PATH := $(KERNEL_MODULES_OUT) ifeq ($(TARGET_BOARD_PLATFORM), pineapple) - LOCAL_REQUIRED_MODULES := hw-fence-module-symvers - LOCAL_ADDITIONAL_DEPENDENCIES := $(call intermediates-dir-for,DLKM,hw-fence-module-symvers)/Module.symvers + LOCAL_REQUIRED_MODULES := hw-fence-module-symvers + LOCAL_ADDITIONAL_DEPENDENCIES := $(call intermediates-dir-for,DLKM,hw-fence-module-symvers)/Module.symvers endif # Include msm_kgsl.ko in the /vendor/lib/modules (vendor.img) BOARD_VENDOR_KERNEL_MODULES += $(LOCAL_MODULE_PATH)/$(LOCAL_MODULE) From 1d341f8f306f902020529a421d43e7ed8800f568 Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Wed, 9 Aug 2023 23:49:55 +0530 Subject: [PATCH 0527/1016] msm: kgsl: Update API for cache invalidate APIs for CMO (Cache Maintenance Operations) are updated in msm-6.1 kernel. Prior to msm-6.1, dma_sync_sg_for_device() with DMA_FROM_DEVICE as direction triggers cache invalidate and clean whereas in msm-6.1, it triggers only cache clean. Hence use dma_sync_sg_for_cpu() for cache invalidate. Change-Id: I1f2f5c155cfdda655b32f39a29128617c3448bcb Signed-off-by: Harshitha Sai Neelati --- kgsl_sharedmem.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index 011dbbd8ec..b4f31327e1 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "kgsl_device.h" #include "kgsl_pool.h" @@ -706,16 +707,32 @@ static void _dma_cache_op(struct device *dev, struct page *page, sg_set_page(&sgl, page, PAGE_SIZE, 0); sg_dma_address(&sgl) = page_to_phys(page); + /* + * APIs for Cache Maintenance Operations are updated in kernel + * version 6.1. Prior to 6.1, dma_sync_sg_for_device() with + * DMA_FROM_DEVICE as direction triggers cache invalidate and + * clean whereas in kernel version 6.1, it triggers only cache + * clean. Hence use dma_sync_sg_for_cpu() for cache invalidate + * for kernel version 6.1 and above. + */ + switch (op) { case KGSL_CACHE_OP_FLUSH: dma_sync_sg_for_device(dev, &sgl, 1, DMA_TO_DEVICE); +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + dma_sync_sg_for_cpu(dev, &sgl, 1, DMA_FROM_DEVICE); +#else dma_sync_sg_for_device(dev, &sgl, 1, DMA_FROM_DEVICE); +#endif break; case KGSL_CACHE_OP_CLEAN: dma_sync_sg_for_device(dev, &sgl, 1, DMA_TO_DEVICE); break; case KGSL_CACHE_OP_INV: dma_sync_sg_for_device(dev, &sgl, 1, DMA_FROM_DEVICE); +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + dma_sync_sg_for_cpu(dev, &sgl, 1, DMA_FROM_DEVICE); +#endif break; } } @@ -1218,7 +1235,21 @@ void kgsl_page_sync(struct device *dev, struct page *page, sg_set_page(&sg, page, size, 0); sg_dma_address(&sg) = page_to_phys(page); - dma_sync_sg_for_device(dev, &sg, 1, dir); + /* + * APIs for Cache Maintenance Operations are updated in kernel + * version 6.1. Prior to 6.1, dma_sync_sg_for_device() with + * DMA_BIDIRECTIONAL as direction triggers cache invalidate and + * clean whereas in kernel version 6.1, it triggers only cache + * clean. Hence use dma_sync_sg_for_cpu() for cache invalidate + * for kernel version 6.1 and above. + */ + + if ((dir == DMA_BIDIRECTIONAL) && + KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) { + dma_sync_sg_for_device(dev, &sg, 1, DMA_TO_DEVICE); + dma_sync_sg_for_cpu(dev, &sg, 1, DMA_FROM_DEVICE); + } else + dma_sync_sg_for_device(dev, &sg, 1, dir); } void kgsl_zero_page(struct page *p, unsigned int order, From 8294c5b08f64930564f94ef9967022fd9d7d7e69 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Thu, 28 Sep 2023 15:01:14 -0700 Subject: [PATCH 0528/1016] msm: kgsl: Do not free sharedmem if it cannot be unmapped If sharedmem cannot be unmapped from the mmu, it can still be accessed by the GPU. Therefore it is not safe to free the backing memory. In the case that unmap fails, do not free it or return it to the system. Change-Id: Iad3e86d043f129a4d71cf862865d9033d4a315e3 Signed-off-by: Lynus Vaz --- kgsl_mmu.c | 5 +++- kgsl_sharedmem.c | 22 ++++++++++++++++-- kgsl_vbo.c | 60 +++++++++++++++++++++++++++++++++++++----------- 3 files changed, 71 insertions(+), 16 deletions(-) diff --git a/kgsl_mmu.c b/kgsl_mmu.c index 7bf6f45c5e..32858bf6fc 100644 --- a/kgsl_mmu.c +++ b/kgsl_mmu.c @@ -458,6 +458,8 @@ kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, size = kgsl_memdesc_footprint(memdesc); ret = pagetable->pt_ops->mmu_unmap(pagetable, memdesc); + if (ret) + return ret; atomic_dec(&pagetable->stats.entries); atomic_long_sub(size, &pagetable->stats.mapped); @@ -487,7 +489,8 @@ kgsl_mmu_unmap_range(struct kgsl_pagetable *pagetable, ret = pagetable->pt_ops->mmu_unmap_range(pagetable, memdesc, offset, length); - atomic_long_sub(length, &pagetable->stats.mapped); + if (!ret) + atomic_long_sub(length, &pagetable->stats.mapped); } return ret; diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index 011dbbd8ec..1fc878aabe 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -993,6 +993,9 @@ static void kgsl_contiguous_free(struct kgsl_memdesc *memdesc) if (!memdesc->hostptr) return; + if (memdesc->priv & KGSL_MEMDESC_MAPPED) + return; + atomic_long_sub(memdesc->size, &kgsl_driver.stats.coherent); _kgsl_contiguous_free(memdesc); @@ -1327,6 +1330,9 @@ static void kgsl_free_pages(struct kgsl_memdesc *memdesc) kgsl_paged_unmap_kernel(memdesc); WARN_ON(memdesc->hostptr); + if (memdesc->priv & KGSL_MEMDESC_MAPPED) + return; + atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc); _kgsl_free_pages(memdesc); @@ -1344,6 +1350,9 @@ static void kgsl_free_system_pages(struct kgsl_memdesc *memdesc) kgsl_paged_unmap_kernel(memdesc); WARN_ON(memdesc->hostptr); + if (memdesc->priv & KGSL_MEMDESC_MAPPED) + return; + atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc); for (i = 0; i < memdesc->page_count; i++) @@ -1420,7 +1429,12 @@ static void kgsl_free_secure_system_pages(struct kgsl_memdesc *memdesc) { int i; struct scatterlist *sg; - int ret = kgsl_unlock_sgt(memdesc->sgt); + int ret; + + if (memdesc->priv & KGSL_MEMDESC_MAPPED) + return; + + ret = kgsl_unlock_sgt(memdesc->sgt); if (ret) { /* @@ -1450,8 +1464,12 @@ static void kgsl_free_secure_system_pages(struct kgsl_memdesc *memdesc) static void kgsl_free_secure_pages(struct kgsl_memdesc *memdesc) { - int ret = kgsl_unlock_sgt(memdesc->sgt); + int ret; + if (memdesc->priv & KGSL_MEMDESC_MAPPED) + return; + + ret = kgsl_unlock_sgt(memdesc->sgt); if (ret) { /* * Unlock of the secure buffer failed. This buffer will diff --git a/kgsl_vbo.c b/kgsl_vbo.c index dd4129f3b8..c7ef7d11e2 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -101,14 +101,15 @@ static void kgsl_memdesc_remove_range(struct kgsl_mem_entry *target, * the entire range between start and last in this case. */ if (!entry || range->entry->id == entry->id) { + if (kgsl_mmu_unmap_range(memdesc->pagetable, + memdesc, range->range.start, bind_range_len(range))) + continue; + interval_tree_remove(node, &memdesc->ranges); trace_kgsl_mem_remove_bind_range(target, range->range.start, range->entry, bind_range_len(range)); - kgsl_mmu_unmap_range(memdesc->pagetable, - memdesc, range->range.start, bind_range_len(range)); - if (!(memdesc->flags & KGSL_MEMFLAGS_VBO_NO_MAP_ZERO)) kgsl_mmu_map_zero_page_to_range(memdesc->pagetable, memdesc, range->range.start, bind_range_len(range)); @@ -128,6 +129,7 @@ static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, struct kgsl_memdesc *memdesc = &target->memdesc; struct kgsl_memdesc_bind_range *range = bind_range_create(start, last, entry); + int ret = 0; if (IS_ERR(range)) return PTR_ERR(range); @@ -139,9 +141,12 @@ static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, * in one call. Otherwise we have to figure out what ranges to unmap * while walking the interval tree. */ - if (!(memdesc->flags & KGSL_MEMFLAGS_VBO_NO_MAP_ZERO)) - kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, start, + if (!(memdesc->flags & KGSL_MEMFLAGS_VBO_NO_MAP_ZERO)) { + ret = kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, start, last - start + 1); + if (ret) + goto error; + } next = interval_tree_iter_first(&memdesc->ranges, start, last); @@ -160,10 +165,15 @@ static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, if (start <= cur->range.start) { if (last >= cur->range.last) { /* Unmap the entire cur range */ - if (memdesc->flags & KGSL_MEMFLAGS_VBO_NO_MAP_ZERO) - kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, + if (memdesc->flags & KGSL_MEMFLAGS_VBO_NO_MAP_ZERO) { + ret = kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, cur->range.start, cur->range.last - cur->range.start + 1); + if (ret) { + interval_tree_insert(node, &memdesc->ranges); + goto error; + } + } kgsl_mem_entry_put(cur->entry); kfree(cur); @@ -171,10 +181,15 @@ static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, } /* Unmap the range overlapping cur */ - if (memdesc->flags & KGSL_MEMFLAGS_VBO_NO_MAP_ZERO) - kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, + if (memdesc->flags & KGSL_MEMFLAGS_VBO_NO_MAP_ZERO) { + ret = kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, cur->range.start, last - cur->range.start + 1); + if (ret) { + interval_tree_insert(node, &memdesc->ranges); + goto error; + } + } /* Adjust the start of the mapping */ cur->range.start = last + 1; @@ -205,10 +220,15 @@ static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, } /* Unmap the range overlapping cur */ - if (memdesc->flags & KGSL_MEMFLAGS_VBO_NO_MAP_ZERO) - kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, + if (memdesc->flags & KGSL_MEMFLAGS_VBO_NO_MAP_ZERO) { + ret = kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, start, min_t(u64, cur->range.last, last) - start + 1); + if (ret) { + interval_tree_insert(node, &memdesc->ranges); + goto error; + } + } cur->range.last = start - 1; interval_tree_insert(node, &memdesc->ranges); @@ -227,19 +247,26 @@ static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, return kgsl_mmu_map_child(memdesc->pagetable, memdesc, start, &entry->memdesc, offset, last - start + 1); + +error: + kgsl_mem_entry_put(range->entry); + kfree(range); + mutex_unlock(&memdesc->ranges_lock); + return ret; } static void kgsl_sharedmem_vbo_put_gpuaddr(struct kgsl_memdesc *memdesc) { struct interval_tree_node *node, *next; struct kgsl_memdesc_bind_range *range; + int ret = 0; /* * If the VBO maps the zero range then we can unmap the entire * pagetable region in one call. */ if (!(memdesc->flags & KGSL_MEMFLAGS_VBO_NO_MAP_ZERO)) - kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, + ret = kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, 0, memdesc->size); /* @@ -259,14 +286,21 @@ static void kgsl_sharedmem_vbo_put_gpuaddr(struct kgsl_memdesc *memdesc) /* Unmap this range */ if (memdesc->flags & KGSL_MEMFLAGS_VBO_NO_MAP_ZERO) - kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, + ret = kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, range->range.start, range->range.last - range->range.start + 1); + /* If unmap failed, mark the child memdesc as still mapped */ + if (ret) + range->entry->memdesc.priv |= KGSL_MEMDESC_MAPPED; + kgsl_mem_entry_put(range->entry); kfree(range); } + if (ret) + return; + /* Put back the GPU address */ kgsl_mmu_put_gpuaddr(memdesc->pagetable, memdesc); From c0f6341f235407ca1436b5bee2224a218144bcdb Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 13 Oct 2023 14:15:45 +0530 Subject: [PATCH 0529/1016] kgsl: gen7: Remove CP_AHB_CNTL from power up register list KGSL doesn't override CP_AHB_CNTL register. Hence, there is no need to save and restore it in power up register list. Change-Id: I319d21aebb0322be8bec661e4e1f7fdc3187b021 Signed-off-by: Kamal Agrawal --- adreno_gen7.c | 2 -- gen7_reg.h | 1 - 2 files changed, 3 deletions(-) diff --git a/adreno_gen7.c b/adreno_gen7.c index 2c7bef7501..1c75334723 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -116,7 +116,6 @@ static const u32 gen7_ifpc_pwrup_reglist[] = { GEN7_CP_PROTECT_REG+45, GEN7_CP_PROTECT_REG+46, GEN7_CP_PROTECT_REG+47, - GEN7_CP_AHB_CNTL, }; static const u32 gen7_0_0_ifpc_pwrup_reglist[] = { @@ -171,7 +170,6 @@ static const u32 gen7_0_0_ifpc_pwrup_reglist[] = { GEN7_CP_PROTECT_REG+45, GEN7_CP_PROTECT_REG+46, GEN7_CP_PROTECT_REG+47, - GEN7_CP_AHB_CNTL, }; /* Gen7_9_x IFPC only static powerup restore list */ diff --git a/gen7_reg.h b/gen7_reg.h index 67e9ba45bd..f7ac2feae1 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -112,7 +112,6 @@ #define GEN7_CP_ALWAYS_ON_COUNTER_HI 0x981 #define GEN7_CP_ALWAYS_ON_CONTEXT_LO 0x982 #define GEN7_CP_ALWAYS_ON_CONTEXT_HI 0x983 -#define GEN7_CP_AHB_CNTL 0x98d #define GEN7_CP_RL_ERROR_DETAILS_0 0x9b4 #define GEN7_CP_RL_ERROR_DETAILS_1 0x9cf #define GEN7_CP_APERTURE_CNTL_HOST 0xa00 From 25f3b4e2ac9085f2fcb01bfeb8194135bc89bbbe Mon Sep 17 00:00:00 2001 From: NISARG SHETH Date: Tue, 3 Oct 2023 18:34:24 +0530 Subject: [PATCH 0530/1016] msm: kgsl: Update dynamic reg list for all perfcounter blocks RBBM_PERFCTR_CNTL register needs to be restored across IFPC. Currently it is added/removed from dynamic list only for perfcounters that have ADRENO_PERFCOUNTER_GROUP_RESTORE flag set. Make sure dynamic reg list is updated for all perfcounter blocks. Change-Id: Ia05954c63651c1c73ee10fd751c39fd7ba6d6f70 Signed-off-by: NISARG SHETH --- adreno.h | 5 +++ adreno_gen7.c | 74 +++++++++++++++++++++++++-------------- adreno_gen7.h | 4 ++- adreno_gen7_hwsched_hfi.c | 2 +- adreno_gen7_perfcounter.c | 30 +++++----------- adreno_perfcounter.c | 16 ++++----- 6 files changed, 72 insertions(+), 59 deletions(-) diff --git a/adreno.h b/adreno.h index ee9ac6093f..026a8ae2dd 100644 --- a/adreno.h +++ b/adreno.h @@ -725,6 +725,11 @@ struct adreno_device { u32 ifpc_hyst_floor; /** @cx_misc_base: CX MISC register block base offset */ u32 cx_misc_base; + /* + * @no_restore_count: Keep track of perfcounter requests that don't have + * ADRENO_PERFCOUNTER_GROUP_RESTORE flag set + */ + u32 no_restore_count; }; /** diff --git a/adreno_gen7.c b/adreno_gen7.c index 2c7bef7501..18f2c805d4 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1678,6 +1678,8 @@ static u32 _get_pipeid(u32 groupid) int gen7_perfcounter_remove(struct adreno_device *adreno_dev, struct adreno_perfcount_register *reg, u32 groupid) { + const struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; u32 *data = ptr + sizeof(*lock); @@ -1686,9 +1688,22 @@ int gen7_perfcounter_remove(struct adreno_device *adreno_dev, bool remove_counter = false; u32 pipe = FIELD_PREP(GENMASK(13, 12), _get_pipeid(groupid)); - if (lock->dynamic_list_len < 2) + if (!lock->dynamic_list_len) return -EINVAL; + group = &(counters->groups[groupid]); + + if (!(group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE)) { + if (lock->dynamic_list_len != 1) + return 0; + + if (kgsl_hwlock(lock)) { + kgsl_hwunlock(lock); + return -EBUSY; + } + goto disable_perfcounter; + } + second_last_offset = offset + (lock->dynamic_list_len - 2) * 3; last_offset = second_last_offset + 3; @@ -1727,11 +1742,12 @@ int gen7_perfcounter_remove(struct adreno_device *adreno_dev, lock->dynamic_list_len--; +disable_perfcounter: /* - * If dynamic list length is 1, the only entry in the list is the GEN7_RBBM_PERFCTR_CNTL. - * Remove the same. + * If dynamic list length is 1 and no_restore_count is 0, then we can remove the + * only entry in the list, which is the GEN7_RBBM_PERFCTRL_CNTL. */ - if (lock->dynamic_list_len == 1) { + if (lock->dynamic_list_len == 1 && !adreno_dev->no_restore_count) { memset(&data[offset], 0, 3 * sizeof(u32)); lock->dynamic_list_len = 0; } @@ -1741,7 +1757,7 @@ int gen7_perfcounter_remove(struct adreno_device *adreno_dev, } int gen7_perfcounter_update(struct adreno_device *adreno_dev, - struct adreno_perfcount_register *reg, bool update_reg, u32 pipe) + struct adreno_perfcount_register *reg, bool update_reg, u32 pipe, unsigned long flags) { void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; @@ -1749,16 +1765,20 @@ int gen7_perfcounter_update(struct adreno_device *adreno_dev, int i, offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; bool select_reg_present = false; - for (i = 0; i < lock->dynamic_list_len; i++) { - if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { - select_reg_present = true; - break; + if (flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) { + for (i = 0; i < lock->dynamic_list_len; i++) { + if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { + select_reg_present = true; + break; + } + + if (data[offset + 1] == GEN7_RBBM_PERFCTR_CNTL) + break; + + offset += 3; } - - if (data[offset + 1] == GEN7_RBBM_PERFCTR_CNTL) - break; - - offset += 3; + } else if (lock->dynamic_list_len) { + goto update; } if (kgsl_hwlock(lock)) { @@ -1773,36 +1793,36 @@ int gen7_perfcounter_update(struct adreno_device *adreno_dev, */ if (select_reg_present) { data[offset + 2] = reg->countable; + kgsl_hwunlock(lock); goto update; } + /* Initialize the lock->dynamic_list_len to account for GEN7_RBBM_PERFCTR_CNTL */ + if (!lock->dynamic_list_len) + lock->dynamic_list_len = 1; + /* * For all targets GEN7_RBBM_PERFCTR_CNTL needs to be the last entry, - * so overwrite the existing GEN7_RBBM_PERFCNTL_CTRL and add it back to + * so overwrite the existing GEN7_RBBM_PERFCTR_CNTL and add it back to * the end. */ - data[offset++] = pipe; - data[offset++] = reg->select; - data[offset++] = reg->countable; + if (flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) { + data[offset++] = pipe; + data[offset++] = reg->select; + data[offset++] = reg->countable; + lock->dynamic_list_len++; + } data[offset++] = FIELD_PREP(GENMASK(13, 12), PIPE_NONE); data[offset++] = GEN7_RBBM_PERFCTR_CNTL; data[offset++] = 1; - lock->dynamic_list_len++; - - /* If this is the first entry, enable perfcounters */ - if (lock->dynamic_list_len == 1) { - lock->dynamic_list_len++; - kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN7_RBBM_PERFCTR_CNTL, 0x1); - } + kgsl_hwunlock(lock); update: if (update_reg) kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg->select, reg->countable); - - kgsl_hwunlock(lock); return 0; } diff --git a/adreno_gen7.h b/adreno_gen7.h index 0a4b03ff50..2a2384e678 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -385,11 +385,13 @@ void gen7_spin_idle_debug(struct adreno_device *adreno_dev, * @reg: Perfcounter reg struct to add/remove to the list * @update_reg: true if the perfcounter needs to be programmed by the CPU * @pipe: pipe id for CP aperture control + * @flags: Flags set for requested perfcounter group * * Return: 0 on success or -EBUSY if the lock couldn't be taken */ int gen7_perfcounter_update(struct adreno_device *adreno_dev, - struct adreno_perfcount_register *reg, bool update_reg, u32 pipe); + struct adreno_perfcount_register *reg, bool update_reg, u32 pipe, + unsigned long flags); /* * gen7_ringbuffer_init - Initialize the ringbuffers diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 19d3a4f9e3..09cea057fa 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -4244,7 +4244,7 @@ int gen7_hwsched_counter_inline_enable(struct adreno_device *adreno_dev, if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) gen7_perfcounter_update(adreno_dev, reg, false, - FIELD_PREP(GENMASK(13, 12), PIPE_NONE)); + FIELD_PREP(GENMASK(13, 12), PIPE_NONE), group->flags); cmds = hfi->perfctr_scratch->hostptr; diff --git a/adreno_gen7_perfcounter.c b/adreno_gen7_perfcounter.c index f882a6fa87..aba1c5b87c 100644 --- a/adreno_gen7_perfcounter.c +++ b/adreno_gen7_perfcounter.c @@ -41,12 +41,8 @@ static int gen7_counter_br_enable(struct adreno_device *adreno_dev, kgsl_regread(device, GEN7_CP_APERTURE_CNTL_HOST, &val); kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(13, 12), PIPE_BR)); - if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) - ret = gen7_perfcounter_update(adreno_dev, reg, true, - FIELD_PREP(GENMASK(13, 12), PIPE_BR)); - else - kgsl_regwrite(device, reg->select, countable); - + ret = gen7_perfcounter_update(adreno_dev, reg, true, + FIELD_PREP(GENMASK(13, 12), PIPE_BR), group->flags); kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, val); if (!ret) @@ -67,12 +63,8 @@ static int gen7_counter_bv_enable(struct adreno_device *adreno_dev, kgsl_regread(device, GEN7_CP_APERTURE_CNTL_HOST, &val); kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(13, 12), PIPE_BV)); - if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) - ret = gen7_perfcounter_update(adreno_dev, reg, true, - FIELD_PREP(GENMASK(13, 12), PIPE_BV)); - else - kgsl_regwrite(device, reg->select, countable); - + ret = gen7_perfcounter_update(adreno_dev, reg, true, + FIELD_PREP(GENMASK(13, 12), PIPE_BV), group->flags); kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, val); if (!ret) @@ -85,16 +77,11 @@ static int gen7_counter_enable(struct adreno_device *adreno_dev, const struct adreno_perfcount_group *group, unsigned int counter, unsigned int countable) { - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_perfcount_register *reg = &group->regs[counter]; int ret = 0; - if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) - ret = gen7_perfcounter_update(adreno_dev, reg, true, - FIELD_PREP(GENMASK(13, 12), PIPE_NONE)); - else - kgsl_regwrite(device, reg->select, countable); - + ret = gen7_perfcounter_update(adreno_dev, reg, true, + FIELD_PREP(GENMASK(13, 12), PIPE_NONE), group->flags); if (!ret) reg->value = 0; @@ -127,9 +114,8 @@ static int gen7_counter_inline_enable(struct adreno_device *adreno_dev, return gen7_counter_enable(adreno_dev, group, counter, countable); - if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) - gen7_perfcounter_update(adreno_dev, reg, false, - FIELD_PREP(GENMASK(13, 12), PIPE_NONE)); + gen7_perfcounter_update(adreno_dev, reg, false, + FIELD_PREP(GENMASK(13, 12), PIPE_NONE), group->flags); cmds[0] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); cmds[1] = cp_type4_packet(reg->select, 1); diff --git a/adreno_perfcounter.c b/adreno_perfcounter.c index 23b40df882..9d38128cc3 100644 --- a/adreno_perfcounter.c +++ b/adreno_perfcounter.c @@ -449,6 +449,9 @@ int adreno_perfcounter_get(struct adreno_device *adreno_dev, return ret; } + if (!(group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE)) + adreno_dev->no_restore_count++; + /* set initial kernel and user count */ if (flags & PERFCOUNTER_FLAG_KERNEL) { group->regs[empty].kernelcount = 1; @@ -511,14 +514,11 @@ int adreno_perfcounter_put(struct adreno_device *adreno_dev, /* mark available if not used anymore */ if (group->regs[i].kernelcount == 0 && group->regs[i].usercount == 0) { - /* - * Perfcounter register is added to the power - * up reglist only if group_restore flag is set. - * Hence check the flag before removing the entry - * from the reglist. - */ - if ((group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) && - gpudev->perfcounter_remove) + + if (!(group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE)) + adreno_dev->no_restore_count--; + + if (gpudev->perfcounter_remove) ret = gpudev->perfcounter_remove(adreno_dev, &group->regs[i], groupid); if (!ret) From 93a3dbcbd98eb12d713fc35668e7fd3dcb8760cc Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Tue, 3 Oct 2023 15:52:25 -0700 Subject: [PATCH 0531/1016] kgsl: bazel: Fix bazel rule for perf variant build kernel-6.3 onwards "gki" variant was renamed to "perf". Hence fix bazel rule to load correct config for perf variant build. Change-Id: Ia573591bf94c094710459c4abaf126daf6e6f2bf Signed-off-by: Hareesh Gundu --- BUILD.bazel | 6 ++---- build/kgsl_defs.bzl | 7 ++++--- config/pineapple_perf_gpuconf | 1 + config/sun_gki_gpuconf | 11 ----------- config/sun_perf_gpuconf | 1 + 5 files changed, 8 insertions(+), 18 deletions(-) create mode 120000 config/pineapple_perf_gpuconf delete mode 100644 config/sun_gki_gpuconf create mode 120000 config/sun_perf_gpuconf diff --git a/BUILD.bazel b/BUILD.bazel index ff33593528..b466e29c0f 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1,5 +1,3 @@ -load(":build/kgsl_defs.bzl", "define_target_module") +load(":build/kgsl_defs.bzl", "define_target_modules") -define_target_module("pineapple") -define_target_module("sun") -define_target_module("blair") +define_target_modules() diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index 46cb6e89d4..b0c8bb5382 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -1,5 +1,6 @@ load("//build/kernel/kleaf:kernel.bzl", "ddk_module") load("//build/bazel_common_rules/dist:dist.bzl", "copy_to_dist_dir") +load("//msm-kernel:target_variants.bzl", "get_all_la_variants") msm_kgsl_includes = [ "include/linux/msm_kgsl.h", @@ -143,6 +144,6 @@ def define_target_variant_module(target, variant): log = "info", ) -def define_target_module(target): - define_target_variant_module(target, "gki") - define_target_variant_module(target, "consolidate") +def define_target_modules(): + for target, variant in get_all_la_variants(): + define_target_variant_module(target, variant) diff --git a/config/pineapple_perf_gpuconf b/config/pineapple_perf_gpuconf new file mode 120000 index 0000000000..454a84cbc0 --- /dev/null +++ b/config/pineapple_perf_gpuconf @@ -0,0 +1 @@ +pineapple_consolidate_gpuconf \ No newline at end of file diff --git a/config/sun_gki_gpuconf b/config/sun_gki_gpuconf deleted file mode 100644 index d58575d748..0000000000 --- a/config/sun_gki_gpuconf +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. - -CONFIG_QCOM_KGSL=m -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=y -CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 -CONFIG_QCOM_KGSL_SORT_POOL=y -CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y -CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=y -CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" diff --git a/config/sun_perf_gpuconf b/config/sun_perf_gpuconf new file mode 120000 index 0000000000..65b03f783f --- /dev/null +++ b/config/sun_perf_gpuconf @@ -0,0 +1 @@ +sun_consolidate_gpuconf \ No newline at end of file From e2a38778088ca80ea1dc542d976238a6fb92d39a Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Fri, 1 Sep 2023 13:47:32 -0600 Subject: [PATCH 0532/1016] msm: kgsl: Check if underlying vma is cached When importing a buffer via its user address, the user may not specify (via passed in flags) that whether this mapping needs to be io-coherent or not. This can cause the gpu mapping to be not marked as io-coherent even though the underlying vma is cached. This can lead to coherency issues. Hence, explicitly check if the underlying vma is cached. If so, then mark the GPU side mapping as io-coherent. Change-Id: Icd849902a403b02d3eec438d992d00c0a7fe23ee Signed-off-by: Harshdeep Dhatt --- kgsl.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/kgsl.c b/kgsl.c index 0351b9cd3b..fce962f303 100644 --- a/kgsl.c +++ b/kgsl.c @@ -2837,27 +2837,72 @@ long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid( return ret; } -static int check_vma(unsigned long hostptr, u64 size) +static bool _vma_is_cached(struct vm_area_struct *vma) +{ + pteval_t pgprot_val = pgprot_val(vma->vm_page_prot); + + /* + * An uncached cpu mapping can either be marked as writecombine or noncached. If it isn't + * either, then it means it is cached. + */ + if ((pgprot_val != pgprot_val(pgprot_writecombine((vma->vm_page_prot)))) && + (pgprot_val != pgprot_val(pgprot_noncached(vma->vm_page_prot)))) + return true; + + return false; +} + +static bool check_vma(struct kgsl_device *device, struct kgsl_memdesc *memdesc, + unsigned long hostptr) { struct vm_area_struct *vma; unsigned long cur = hostptr; + bool cached; - while (cur < (hostptr + size)) { + vma = find_vma(current->mm, hostptr); + if (!vma) + return false; + + /* Don't remap memory that we already own */ + if (vma->vm_file && (vma->vm_ops == &kgsl_gpumem_vm_ops)) + return false; + + cached = _vma_is_cached(vma); + + cur = vma->vm_end; + + while (cur < (hostptr + memdesc->size)) { vma = find_vma(current->mm, cur); if (!vma) return false; /* Don't remap memory that we already own */ - if (vma->vm_file && vma->vm_ops == &kgsl_gpumem_vm_ops) + if (vma->vm_file && (vma->vm_ops == &kgsl_gpumem_vm_ops)) + return false; + + /* + * Make sure the entire memdesc is either cached or noncached. Bail out if there is + * a mismatch as it can lead to coherency issues. + */ + if (cached != _vma_is_cached(vma)) return false; cur = vma->vm_end; } + /* + * If cpu side mapping is cached (and io-coherency is enabled), the gpu mapping should be + * marked io-coherent to avoid coherency issues. + */ + if (cached && kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT) && + IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT)) + memdesc->flags |= KGSL_MEMFLAGS_IOCOHERENT; + return true; } -static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, unsigned long useraddr) +static int memdesc_sg_virt(struct kgsl_device *device, struct kgsl_memdesc *memdesc, + unsigned long useraddr) { int ret = 0; long npages = 0, i; @@ -2880,7 +2925,7 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, unsigned long useraddr) } mmap_read_lock(current->mm); - if (!check_vma(useraddr, memdesc->size)) { + if (!check_vma(device, memdesc, useraddr)) { mmap_read_unlock(current->mm); ret = -EFAULT; goto out; @@ -2926,9 +2971,8 @@ static const struct kgsl_memdesc_ops kgsl_usermem_ops = { .put_gpuaddr = kgsl_unmap_and_put_gpuaddr, }; -static int kgsl_setup_anon_useraddr(struct kgsl_pagetable *pagetable, - struct kgsl_mem_entry *entry, unsigned long hostptr, - size_t offset, size_t size) +static int kgsl_setup_anon_useraddr(struct kgsl_device *device, struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, unsigned long hostptr, size_t offset, size_t size) { /* Map an anonymous memory chunk */ @@ -2962,7 +3006,7 @@ static int kgsl_setup_anon_useraddr(struct kgsl_pagetable *pagetable, entry->memdesc.gpuaddr = (uint64_t) hostptr; } - ret = memdesc_sg_virt(&entry->memdesc, hostptr); + ret = memdesc_sg_virt(device, &entry->memdesc, hostptr); if (ret && kgsl_memdesc_use_cpu_map(&entry->memdesc)) kgsl_mmu_put_gpuaddr(pagetable, &entry->memdesc); @@ -2978,7 +3022,7 @@ static int kgsl_setup_useraddr(struct kgsl_device *device, if (hostptr == 0 || !IS_ALIGNED(hostptr, PAGE_SIZE)) return -EINVAL; - return kgsl_setup_anon_useraddr(pagetable, entry, + return kgsl_setup_anon_useraddr(device, pagetable, entry, hostptr, offset, size); } From 5f63c96425e9a1aa84c99db0bcadd04dfd272fd9 Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Fri, 15 Sep 2023 11:19:41 +0530 Subject: [PATCH 0533/1016] kgsl: gen7: Update GBIF OT size for Gen7_9_x Update the GBIF OT size to the recommended value for Gen7_9_x GPUs. Change-Id: I5123b2f8b16d35edc75717de796e4de16de2b06e Signed-off-by: Akhil P Oommen --- adreno_gen7.c | 4 ++++ gen7_reg.h | 1 + 2 files changed, 5 insertions(+) diff --git a/adreno_gen7.c b/adreno_gen7.c index 18f2c805d4..8b7c09a644 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -828,6 +828,10 @@ int gen7_start(struct adreno_device *adreno_dev) if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) kgsl_regwrite(device, GEN7_CP_AQE_APRIV_CNTL, BIT(0)); + if (adreno_is_gen7_9_x(adreno_dev)) + kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), + FIELD_PREP(GENMASK(31, 29), 1)); + /* * CP Icache prefetch brings no benefit on few gen7 variants because of * the prefetch granularity size. diff --git a/gen7_reg.h b/gen7_reg.h index 67e9ba45bd..bc78f6bc5c 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -1013,6 +1013,7 @@ #define GBIF_AXI1_WRITE_DATA_TOTAL_BEATS 47 /* GBIF registers */ +#define GEN7_GBIF_CX_CONFIG 0x3c00 #define GEN7_GBIF_SCACHE_CNTL0 0x3c01 #define GEN7_GBIF_SCACHE_CNTL1 0x3c02 #define GEN7_GBIF_QSB_SIDE0 0x3c03 From d0c83a5ccfb4c6765be7b4b6e457b6b31d8bba64 Mon Sep 17 00:00:00 2001 From: Sushmita Susheelendra Date: Fri, 21 Jul 2023 13:39:54 -0400 Subject: [PATCH 0534/1016] msm: kgsl: Set default LLCC allocation policy to 0 Set the global LLCC allocation policy to 0 unless the mmu feature for no-write-allocate is set for the target. Force no write allocate for A3x, A5x, A6x and all gen7 targets except gen_7_9_x. gen_7_9_x uses 0. Using 0 for llcc flags maps to using write allocate for cached buffers. Change-Id: I72222872728c390932601f0fdf96c30d16deba01 Signed-off-by: Sushmita Susheelendra --- adreno.c | 9 +++++++++ kgsl_iommu.c | 10 ++++++---- kgsl_mmu.h | 2 ++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/adreno.c b/adreno.c index 1603294836..c251fa9025 100644 --- a/adreno.c +++ b/adreno.c @@ -1297,6 +1297,15 @@ int adreno_device_probe(struct platform_device *pdev, if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) kgsl_mmu_set_feature(device, KGSL_MMU_LLCC_ENABLE); + /* + * Force no write allocate for A3x, A5x, A6x and all gen7 targets + * except gen_7_9_x. gen_7_9_x uses write allocate + */ + if (adreno_is_a3xx(adreno_dev) || adreno_is_a5xx(adreno_dev) || + adreno_is_a6xx(adreno_dev) || + (adreno_is_gen7(adreno_dev) && !adreno_is_gen7_9_x(adreno_dev))) + kgsl_mmu_set_feature(device, KGSL_MMU_FORCE_LLCC_NWA); + /* Bind the components before doing the KGSL platform probe. */ status = component_bind_all(dev, NULL); if (status) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 7b7c8f9992..86cc71ed3b 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -86,10 +86,12 @@ static u32 get_llcc_flags(struct kgsl_mmu *mmu) if (!test_bit(KGSL_MMU_LLCC_ENABLE, &mmu->features)) return 0; - if (mmu->subtype == KGSL_IOMMU_SMMU_V500) - return 0; - else - return IOMMU_USE_UPSTREAM_HINT; + /* Return no-write-allocate if mmu feature for no-write-allocate is set */ + if (test_bit(KGSL_MMU_FORCE_LLCC_NWA, &mmu->features)) + return IOMMU_SYS_CACHE_NWA; + + /* Return 0 as default llcc allocation policy */ + return 0; } static int _iommu_get_protection_flags(struct kgsl_mmu *mmu, diff --git a/kgsl_mmu.h b/kgsl_mmu.h index 3035cdec41..a5c8be7385 100644 --- a/kgsl_mmu.h +++ b/kgsl_mmu.h @@ -173,6 +173,8 @@ enum kgsl_mmu_feature { KGSL_MMU_SUPPORT_VBO, /** @KGSL_MMU_PAGEFAULT_TERMINATE: Set to make pagefaults fatal */ KGSL_MMU_PAGEFAULT_TERMINATE, + /** @KGSL_MMU_LLCC_NWA: Set to make no write allocate the default LLCC policy */ + KGSL_MMU_FORCE_LLCC_NWA, }; #include "kgsl_iommu.h" From dc7790fca93ac77d006373474a3d637253bb7c58 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 2 Oct 2023 15:14:15 -0700 Subject: [PATCH 0535/1016] kgsl: Fix compilation errors on latest kernel-6.3 Following errors fixed: i) qcom_scm.h header include path. ii) class_create() argument list. iii) coresight trace_id undefined reference. Change-Id: Ie091110c3b4c9066558c7b4277853015ad0750d3 Signed-off-by: Hareesh Gundu --- adreno_a5xx.c | 5 +++++ adreno_coresight.c | 6 +++++- governor_msm_adreno_tz.c | 7 ++++++- kgsl.c | 5 ++++- kgsl_iommu.c | 5 +++++ kgsl_util.c | 5 +++++ 6 files changed, 30 insertions(+), 3 deletions(-) diff --git a/adreno_a5xx.c b/adreno_a5xx.c index 242b4cffd2..e6d0e577a7 100644 --- a/adreno_a5xx.c +++ b/adreno_a5xx.c @@ -9,7 +9,12 @@ #include #include #include +#include +#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) +#include +#else #include +#endif #include #include "adreno.h" diff --git a/adreno_coresight.c b/adreno_coresight.c index 30c84d08c8..b2b2399ad8 100644 --- a/adreno_coresight.c +++ b/adreno_coresight.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013-2020, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -187,15 +187,19 @@ void adreno_coresight_start(struct adreno_device *adreno_dev) _adreno_coresight_set(adreno_dev, &adreno_dev->cx_coresight); } +#if (KERNEL_VERSION(6, 3, 0) > LINUX_VERSION_CODE) static int adreno_coresight_trace_id(struct coresight_device *csdev) { struct adreno_coresight_device *adreno_csdev = dev_get_drvdata(&csdev->dev); return adreno_csdev->atid; } +#endif static const struct coresight_ops_source adreno_coresight_source_ops = { +#if (KERNEL_VERSION(6, 3, 0) > LINUX_VERSION_CODE) .trace_id = adreno_coresight_trace_id, +#endif .enable = adreno_coresight_enable, .disable = adreno_coresight_disable, }; diff --git a/governor_msm_adreno_tz.c b/governor_msm_adreno_tz.c index b6e60064e8..2c4f9f07df 100644 --- a/governor_msm_adreno_tz.c +++ b/governor_msm_adreno_tz.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include @@ -13,7 +13,12 @@ #include #include #include +#include +#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) +#include +#else #include +#endif #include #include diff --git a/kgsl.c b/kgsl.c index 0351b9cd3b..c411eecb13 100644 --- a/kgsl.c +++ b/kgsl.c @@ -5248,8 +5248,11 @@ int __init kgsl_core_init(void) goto err; } +#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) + kgsl_driver.class = class_create("kgsl"); +#else kgsl_driver.class = class_create(THIS_MODULE, "kgsl"); - +#endif if (IS_ERR(kgsl_driver.class)) { result = PTR_ERR(kgsl_driver.class); pr_err("kgsl: failed to create class for kgsl\n"); diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 7b7c8f9992..dabcbc9001 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -14,7 +14,12 @@ #include #include #include +#include +#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) +#include +#else #include +#endif #include #include #include diff --git a/kgsl_util.c b/kgsl_util.c index cb65817507..a6ba2ae922 100644 --- a/kgsl_util.c +++ b/kgsl_util.c @@ -12,7 +12,12 @@ #include #include #include +#include +#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) +#include +#else #include +#endif #include #include #include From a0d37cc390ee0c9e7294673d232d87a32110b8ba Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Mon, 16 Oct 2023 13:55:07 -0700 Subject: [PATCH 0536/1016] msm: kgsl: Update the API calls to kernel 6.4 The recent kernel version 6.4 changed the prototypes for some APIs. Update the function calls where needed. Change-Id: Icf1fbfb2f1a52f368c140f5a26a3aa750bec8110 Signed-off-by: Lynus Vaz --- kgsl.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kgsl.c b/kgsl.c index c411eecb13..cdbb116ae2 100644 --- a/kgsl.c +++ b/kgsl.c @@ -2886,7 +2886,12 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, unsigned long useraddr) goto out; } +#if (KERNEL_VERSION(6, 4, 0) <= LINUX_VERSION_CODE) + npages = get_user_pages(useraddr, sglen, write, pages); +#else npages = get_user_pages(useraddr, sglen, write, pages, NULL); +#endif + mmap_read_unlock(current->mm); ret = (npages < 0) ? (int)npages : 0; @@ -5016,7 +5021,7 @@ static int _register_device(struct kgsl_device *device) device->dev->dma_mask = &dma_mask; device->dev->dma_parms = &dma_parms; - dma_set_max_seg_size(device->dev, DMA_BIT_MASK(32)); + dma_set_max_seg_size(device->dev, (u32)DMA_BIT_MASK(32)); set_dma_ops(device->dev, NULL); From 31e83c743aed92e4ad7a4dbb6258a8a6e7d5e79f Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Fri, 13 Oct 2023 12:08:03 -0600 Subject: [PATCH 0537/1016] kgsl: build: Update msm_hw_fence.h header path Beginning with kernel 6.3, the header is no longer part of the kernel since it has been moved to the hw_fence driver package. Hence, use the updated path for newer kernels. Change-Id: Ic3004c11843da71215734f00fbeddce3a3f9373a Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched_hfi.c | 7 ++++++- adreno_hwsched.h | 4 ++++ build/kgsl_defs.bzl | 3 ++- kgsl_sync.c | 7 ++++++- 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 8b072ef946..d9ceea8d24 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include "adreno.h" @@ -23,6 +22,12 @@ #include "kgsl_trace.h" #include "kgsl_util.h" +#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) +#include +#else +#include +#endif + #define HFI_QUEUE_MAX (HFI_QUEUE_DEFAULT_CNT + HFI_QUEUE_DISPATCH_MAX_CNT) #define DEFINE_QHDR(gmuaddr, id, prio) \ diff --git a/adreno_hwsched.h b/adreno_hwsched.h index 378c95ae11..14610f5f52 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -7,7 +7,11 @@ #ifndef _ADRENO_HWSCHED_H_ #define _ADRENO_HWSCHED_H_ +#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) +#include +#else #include +#endif #include "kgsl_sync.h" diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index b0c8bb5382..0bad6ff389 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -85,7 +85,8 @@ def external_deps(target, variant): # Add msm_hw_fence in the dependency and defconfig lists for targets that use it if target in [ "pineapple" ]: deplist = deplist + [ - "//vendor/qcom/opensource/mm-drivers/hw_fence:{}_msm_hw_fence".format(tv) + "//vendor/qcom/opensource/mm-drivers/hw_fence:{}_msm_hw_fence".format(tv), + "//vendor/qcom/opensource/mm-drivers/hw_fence:hw_fence_headers".format(tv) ] defconfigs = defconfigs + [ "//vendor/qcom/opensource/mm-drivers/hw_fence:defconfig" diff --git a/kgsl_sync.c b/kgsl_sync.c index 51ea268261..ff9aa4c375 100644 --- a/kgsl_sync.c +++ b/kgsl_sync.c @@ -6,12 +6,17 @@ #include #include -#include #include #include "kgsl_device.h" #include "kgsl_sync.h" +#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) +#include +#else +#include +#endif + static const struct dma_fence_ops kgsl_sync_fence_ops; static struct kgsl_sync_fence *kgsl_sync_fence_create( From 1757dda6406035ac4cf52dfc55687e99371927ef Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 22 Sep 2023 16:17:12 +0530 Subject: [PATCH 0538/1016] kgsl: Optimize device mutex usage in perfcounter group query Groups and the maximum counters available per group is static. Thus, there is no need to take device mutex while reading it. Change-Id: Ib0eafae90f40697fc8bf57e60e7f720b9fdf6801 Signed-off-by: Kamal Agrawal --- adreno_perfcounter.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/adreno_perfcounter.c b/adreno_perfcounter.c index 9d38128cc3..e4d52db27d 100644 --- a/adreno_perfcounter.c +++ b/adreno_perfcounter.c @@ -303,8 +303,6 @@ int adreno_perfcounter_query_group(struct adreno_device *adreno_dev, if (counters == NULL || groupid >= counters->group_count) return -EINVAL; - mutex_lock(&device->mutex); - group = &(counters->groups[groupid]); *max_counters = group->reg_count; @@ -312,18 +310,16 @@ int adreno_perfcounter_query_group(struct adreno_device *adreno_dev, * if NULL countable or *count of zero, return max reg_count in * *max_counters and return success */ - if (countables == NULL || count == 0) { - mutex_unlock(&device->mutex); + if (countables == NULL || count == 0) return 0; - } t = min_t(unsigned int, group->reg_count, count); buf = kmalloc_array(t, sizeof(unsigned int), GFP_KERNEL); - if (buf == NULL) { - mutex_unlock(&device->mutex); + if (buf == NULL) return -ENOMEM; - } + + mutex_lock(&device->mutex); for (i = 0; i < t; i++) buf[i] = group->regs[i].countable; From 4abff240b27a7152fbf243b7ab8f971ff1e67530 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Tue, 18 Jul 2023 18:14:30 -0700 Subject: [PATCH 0539/1016] msm: kgsl: Remove a3xx gpu support Remove legacy a3xx gpu support from kgsl driver. Change-Id: I13d010ba6695d5eea01ac2eada4f7043d269ace9 Signed-off-by: Hareesh Gundu --- Kbuild | 5 - a3xx_reg.h | 564 -------------- a5xx_reg.h | 4 + adreno-gpulist.h | 76 +- adreno.c | 27 +- adreno.h | 35 +- adreno_a3xx.c | 1498 ------------------------------------- adreno_a3xx.h | 76 -- adreno_a3xx_coresight.c | 65 -- adreno_a3xx_perfcounter.c | 411 ---------- adreno_a3xx_ringbuffer.c | 458 ------------ adreno_a3xx_snapshot.c | 449 ----------- adreno_cp_parser.h | 23 +- adreno_dispatch.c | 112 +-- adreno_pm4types.h | 6 +- adreno_ringbuffer.c | 2 +- adreno_trace.h | 59 -- build/kgsl_defs.bzl | 5 - 18 files changed, 35 insertions(+), 3840 deletions(-) delete mode 100644 a3xx_reg.h delete mode 100644 adreno_a3xx.c delete mode 100644 adreno_a3xx.h delete mode 100644 adreno_a3xx_coresight.c delete mode 100644 adreno_a3xx_perfcounter.c delete mode 100644 adreno_a3xx_ringbuffer.c delete mode 100644 adreno_a3xx_snapshot.c diff --git a/Kbuild b/Kbuild index 933d63212b..96c1545086 100644 --- a/Kbuild +++ b/Kbuild @@ -91,10 +91,6 @@ endif msm_kgsl-y += \ adreno.o \ - adreno_a3xx.o \ - adreno_a3xx_perfcounter.o \ - adreno_a3xx_ringbuffer.o \ - adreno_a3xx_snapshot.o \ adreno_a5xx.o \ adreno_a5xx_perfcounter.o \ adreno_a5xx_preempt.o \ @@ -138,7 +134,6 @@ msm_kgsl-y += \ msm_kgsl-$(CONFIG_COMPAT) += adreno_compat.o msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_coresight.o -msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_a3xx_coresight.o msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_a5xx_coresight.o msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_a6xx_coresight.o msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_gen7_coresight.o diff --git a/a3xx_reg.h b/a3xx_reg.h deleted file mode 100644 index ab5079aa45..0000000000 --- a/a3xx_reg.h +++ /dev/null @@ -1,564 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2012-2017,2019-2020, The Linux Foundation. All rights reserved. - */ - -#ifndef _A300_REG_H -#define _A300_REG_H - -/* Interrupt bit positions within RBBM_INT_0 */ - -#define A3XX_INT_RBBM_GPU_IDLE 0 -#define A3XX_INT_RBBM_AHB_ERROR 1 -#define A3XX_INT_RBBM_REG_TIMEOUT 2 -#define A3XX_INT_RBBM_ME_MS_TIMEOUT 3 -#define A3XX_INT_RBBM_PFP_MS_TIMEOUT 4 -#define A3XX_INT_RBBM_ATB_BUS_OVERFLOW 5 -#define A3XX_INT_VFD_ERROR 6 -#define A3XX_INT_CP_SW_INT 7 -#define A3XX_INT_CP_T0_PACKET_IN_IB 8 -#define A3XX_INT_CP_OPCODE_ERROR 9 -#define A3XX_INT_CP_RESERVED_BIT_ERROR 10 -#define A3XX_INT_CP_HW_FAULT 11 -#define A3XX_INT_CP_DMA 12 -#define A3XX_INT_CP_IB2_INT 13 -#define A3XX_INT_CP_IB1_INT 14 -#define A3XX_INT_CP_RB_INT 15 -#define A3XX_INT_CP_REG_PROTECT_FAULT 16 -#define A3XX_INT_CP_RB_DONE_TS 17 -#define A3XX_INT_CP_VS_DONE_TS 18 -#define A3XX_INT_CP_PS_DONE_TS 19 -#define A3XX_INT_CACHE_FLUSH_TS 20 -#define A3XX_INT_CP_AHB_ERROR_HALT 21 -#define A3XX_INT_MISC_HANG_DETECT 24 -#define A3XX_INT_UCHE_OOB_ACCESS 25 - -/* Register definitions */ - -#define A3XX_RBBM_CLOCK_CTL 0x010 -#define A3XX_RBBM_SP_HYST_CNT 0x012 -#define A3XX_RBBM_SW_RESET_CMD 0x018 -#define A3XX_RBBM_AHB_CTL0 0x020 -#define A3XX_RBBM_AHB_CTL1 0x021 -#define A3XX_RBBM_AHB_CMD 0x022 -#define A3XX_RBBM_AHB_ERROR_STATUS 0x027 -#define A3XX_RBBM_GPR0_CTL 0x02E -/* This the same register as on A2XX, just in a different place */ -#define A3XX_RBBM_STATUS 0x030 -#define A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x33 -#define A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x50 -#define A3XX_RBBM_INT_CLEAR_CMD 0x061 -#define A3XX_RBBM_INT_0_MASK 0x063 -#define A3XX_RBBM_INT_0_STATUS 0x064 -#define A3XX_RBBM_PERFCTR_CTL 0x80 -#define A3XX_RBBM_PERFCTR_LOAD_CMD0 0x81 -#define A3XX_RBBM_PERFCTR_LOAD_CMD1 0x82 -#define A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x84 -#define A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x85 -#define A3XX_RBBM_PERFCOUNTER0_SELECT 0x86 -#define A3XX_RBBM_PERFCOUNTER1_SELECT 0x87 -#define A3XX_RBBM_GPU_BUSY_MASKED 0x88 -#define A3XX_RBBM_PERFCTR_CP_0_LO 0x90 -#define A3XX_RBBM_PERFCTR_CP_0_HI 0x91 -#define A3XX_RBBM_PERFCTR_RBBM_0_LO 0x92 -#define A3XX_RBBM_PERFCTR_RBBM_0_HI 0x93 -#define A3XX_RBBM_PERFCTR_RBBM_1_LO 0x94 -#define A3XX_RBBM_PERFCTR_RBBM_1_HI 0x95 -#define A3XX_RBBM_PERFCTR_PC_0_LO 0x96 -#define A3XX_RBBM_PERFCTR_PC_0_HI 0x97 -#define A3XX_RBBM_PERFCTR_PC_1_LO 0x98 -#define A3XX_RBBM_PERFCTR_PC_1_HI 0x99 -#define A3XX_RBBM_PERFCTR_PC_2_LO 0x9A -#define A3XX_RBBM_PERFCTR_PC_2_HI 0x9B -#define A3XX_RBBM_PERFCTR_PC_3_LO 0x9C -#define A3XX_RBBM_PERFCTR_PC_3_HI 0x9D -#define A3XX_RBBM_PERFCTR_VFD_0_LO 0x9E -#define A3XX_RBBM_PERFCTR_VFD_0_HI 0x9F -#define A3XX_RBBM_PERFCTR_VFD_1_LO 0xA0 -#define A3XX_RBBM_PERFCTR_VFD_1_HI 0xA1 -#define A3XX_RBBM_PERFCTR_HLSQ_0_LO 0xA2 -#define A3XX_RBBM_PERFCTR_HLSQ_0_HI 0xA3 -#define A3XX_RBBM_PERFCTR_HLSQ_1_LO 0xA4 -#define A3XX_RBBM_PERFCTR_HLSQ_1_HI 0xA5 -#define A3XX_RBBM_PERFCTR_HLSQ_2_LO 0xA6 -#define A3XX_RBBM_PERFCTR_HLSQ_2_HI 0xA7 -#define A3XX_RBBM_PERFCTR_HLSQ_3_LO 0xA8 -#define A3XX_RBBM_PERFCTR_HLSQ_3_HI 0xA9 -#define A3XX_RBBM_PERFCTR_HLSQ_4_LO 0xAA -#define A3XX_RBBM_PERFCTR_HLSQ_4_HI 0xAB -#define A3XX_RBBM_PERFCTR_HLSQ_5_LO 0xAC -#define A3XX_RBBM_PERFCTR_HLSQ_5_HI 0xAD -#define A3XX_RBBM_PERFCTR_VPC_0_LO 0xAE -#define A3XX_RBBM_PERFCTR_VPC_0_HI 0xAF -#define A3XX_RBBM_PERFCTR_VPC_1_LO 0xB0 -#define A3XX_RBBM_PERFCTR_VPC_1_HI 0xB1 -#define A3XX_RBBM_PERFCTR_TSE_0_LO 0xB2 -#define A3XX_RBBM_PERFCTR_TSE_0_HI 0xB3 -#define A3XX_RBBM_PERFCTR_TSE_1_LO 0xB4 -#define A3XX_RBBM_PERFCTR_TSE_1_HI 0xB5 -#define A3XX_RBBM_PERFCTR_RAS_0_LO 0xB6 -#define A3XX_RBBM_PERFCTR_RAS_0_HI 0xB7 -#define A3XX_RBBM_PERFCTR_RAS_1_LO 0xB8 -#define A3XX_RBBM_PERFCTR_RAS_1_HI 0xB9 -#define A3XX_RBBM_PERFCTR_UCHE_0_LO 0xBA -#define A3XX_RBBM_PERFCTR_UCHE_0_HI 0xBB -#define A3XX_RBBM_PERFCTR_UCHE_1_LO 0xBC -#define A3XX_RBBM_PERFCTR_UCHE_1_HI 0xBD -#define A3XX_RBBM_PERFCTR_UCHE_2_LO 0xBE -#define A3XX_RBBM_PERFCTR_UCHE_2_HI 0xBF -#define A3XX_RBBM_PERFCTR_UCHE_3_LO 0xC0 -#define A3XX_RBBM_PERFCTR_UCHE_3_HI 0xC1 -#define A3XX_RBBM_PERFCTR_UCHE_4_LO 0xC2 -#define A3XX_RBBM_PERFCTR_UCHE_4_HI 0xC3 -#define A3XX_RBBM_PERFCTR_UCHE_5_LO 0xC4 -#define A3XX_RBBM_PERFCTR_UCHE_5_HI 0xC5 -#define A3XX_RBBM_PERFCTR_TP_0_LO 0xC6 -#define A3XX_RBBM_PERFCTR_TP_0_HI 0xC7 -#define A3XX_RBBM_PERFCTR_TP_1_LO 0xC8 -#define A3XX_RBBM_PERFCTR_TP_1_HI 0xC9 -#define A3XX_RBBM_PERFCTR_TP_2_LO 0xCA -#define A3XX_RBBM_PERFCTR_TP_2_HI 0xCB -#define A3XX_RBBM_PERFCTR_TP_3_LO 0xCC -#define A3XX_RBBM_PERFCTR_TP_3_HI 0xCD -#define A3XX_RBBM_PERFCTR_TP_4_LO 0xCE -#define A3XX_RBBM_PERFCTR_TP_4_HI 0xCF -#define A3XX_RBBM_PERFCTR_TP_5_LO 0xD0 -#define A3XX_RBBM_PERFCTR_TP_5_HI 0xD1 -#define A3XX_RBBM_PERFCTR_SP_0_LO 0xD2 -#define A3XX_RBBM_PERFCTR_SP_0_HI 0xD3 -#define A3XX_RBBM_PERFCTR_SP_1_LO 0xD4 -#define A3XX_RBBM_PERFCTR_SP_1_HI 0xD5 -#define A3XX_RBBM_PERFCTR_SP_2_LO 0xD6 -#define A3XX_RBBM_PERFCTR_SP_2_HI 0xD7 -#define A3XX_RBBM_PERFCTR_SP_3_LO 0xD8 -#define A3XX_RBBM_PERFCTR_SP_3_HI 0xD9 -#define A3XX_RBBM_PERFCTR_SP_4_LO 0xDA -#define A3XX_RBBM_PERFCTR_SP_4_HI 0xDB -#define A3XX_RBBM_PERFCTR_SP_5_LO 0xDC -#define A3XX_RBBM_PERFCTR_SP_5_HI 0xDD -#define A3XX_RBBM_PERFCTR_SP_6_LO 0xDE -#define A3XX_RBBM_PERFCTR_SP_6_HI 0xDF -#define A3XX_RBBM_PERFCTR_SP_7_LO 0xE0 -#define A3XX_RBBM_PERFCTR_SP_7_HI 0xE1 -#define A3XX_RBBM_PERFCTR_RB_0_LO 0xE2 -#define A3XX_RBBM_PERFCTR_RB_0_HI 0xE3 -#define A3XX_RBBM_PERFCTR_RB_1_LO 0xE4 -#define A3XX_RBBM_PERFCTR_RB_1_HI 0xE5 - -#define A3XX_RBBM_RBBM_CTL 0x100 -#define A3XX_RBBM_PERFCTR_PWR_0_LO 0x0EA -#define A3XX_RBBM_PERFCTR_PWR_0_HI 0x0EB -#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC -#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED -#define A3XX_RBBM_DEBUG_BUS_CTL 0x111 -#define A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x112 -#define A3XX_RBBM_DEBUG_BUS_STB_CTL0 0x11B -#define A3XX_RBBM_DEBUG_BUS_STB_CTL1 0x11C -#define A3XX_RBBM_INT_TRACE_BUS_CTL 0x11D -#define A3XX_RBBM_EXT_TRACE_BUS_CTL 0x11E -#define A3XX_RBBM_EXT_TRACE_STOP_CNT 0x11F -#define A3XX_RBBM_EXT_TRACE_START_CNT 0x120 -#define A3XX_RBBM_EXT_TRACE_PERIOD_CNT 0x121 -#define A3XX_RBBM_EXT_TRACE_CMD 0x122 -#define A3XX_CP_RB_BASE 0x01C0 -#define A3XX_CP_RB_CNTL 0x01C1 -#define A3XX_CP_RB_RPTR 0x01C4 -#define A3XX_CP_RB_WPTR 0x01C5 -/* Following two are same as on A2XX, just in a different place */ -#define A3XX_CP_PFP_UCODE_ADDR 0x1C9 -#define A3XX_CP_PFP_UCODE_DATA 0x1CA -#define A3XX_CP_ROQ_ADDR 0x1CC -#define A3XX_CP_ROQ_DATA 0x1CD -#define A3XX_CP_MERCIU_ADDR 0x1D1 -#define A3XX_CP_MERCIU_DATA 0x1D2 -#define A3XX_CP_MERCIU_DATA2 0x1D3 -#define A3XX_CP_QUEUE_THRESHOLDS 0x01D5 -#define A3XX_CP_MEQ_ADDR 0x1DA -#define A3XX_CP_MEQ_DATA 0x1DB -#define A3XX_CP_STATE_DEBUG_INDEX 0x01EC -#define A3XX_CP_STATE_DEBUG_DATA 0x01ED -#define A3XX_CP_CNTL 0x01F4 -#define A3XX_CP_WFI_PEND_CTR 0x01F5 -#define A3XX_CP_ME_CNTL 0x01F6 -#define A3XX_CP_ME_STATUS 0x01F7 -#define A3XX_CP_ME_RAM_WADDR 0x01F8 -#define A3XX_CP_ME_RAM_RADDR 0x01F9 -#define A3XX_CP_ME_RAM_DATA 0x01FA -#define A3XX_CP_DEBUG 0x01FC - -#define A3XX_RBBM_PM_OVERRIDE2 0x039D - -#define A3XX_CP_PERFCOUNTER_SELECT 0x445 -#define A3XX_CP_IB1_BASE 0x0458 -#define A3XX_CP_IB1_BUFSZ 0x0459 -#define A3XX_CP_IB2_BASE 0x045A -#define A3XX_CP_IB2_BUFSZ 0x045B - -#define A3XX_CP_HW_FAULT 0x45C -#define A3XX_CP_PROTECT_CTRL 0x45E -#define A3XX_CP_PROTECT_STATUS 0x45F -#define A3XX_CP_PROTECT_REG_0 0x460 -#define A3XX_CP_STAT 0x047F -#define A3XX_CP_SCRATCH_REG0 0x578 -#define A3XX_CP_SCRATCH_REG6 0x57E -#define A3XX_CP_SCRATCH_REG7 0x57F -#define A3XX_VSC_SIZE_ADDRESS 0xC02 -#define A3XX_VSC_PIPE_DATA_ADDRESS_0 0xC07 -#define A3XX_VSC_PIPE_DATA_LENGTH_0 0xC08 -#define A3XX_VSC_PIPE_DATA_ADDRESS_1 0xC0A -#define A3XX_VSC_PIPE_DATA_LENGTH_1 0xC0B -#define A3XX_VSC_PIPE_DATA_ADDRESS_2 0xC0D -#define A3XX_VSC_PIPE_DATA_LENGTH_2 0xC0E -#define A3XX_VSC_PIPE_DATA_ADDRESS_3 0xC10 -#define A3XX_VSC_PIPE_DATA_LENGTH_3 0xC11 -#define A3XX_VSC_PIPE_DATA_ADDRESS_4 0xC13 -#define A3XX_VSC_PIPE_DATA_LENGTH_4 0xC14 -#define A3XX_VSC_PIPE_DATA_ADDRESS_5 0xC16 -#define A3XX_VSC_PIPE_DATA_LENGTH_5 0xC17 -#define A3XX_VSC_PIPE_DATA_ADDRESS_6 0xC19 -#define A3XX_VSC_PIPE_DATA_LENGTH_6 0xC1A -#define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C -#define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D -#define A3XX_PC_PERFCOUNTER0_SELECT 0xC48 -#define A3XX_PC_PERFCOUNTER1_SELECT 0xC49 -#define A3XX_PC_PERFCOUNTER2_SELECT 0xC4A -#define A3XX_PC_PERFCOUNTER3_SELECT 0xC4B -#define A3XX_GRAS_TSE_DEBUG_ECO 0xC81 -#define A3XX_GRAS_PERFCOUNTER0_SELECT 0xC88 -#define A3XX_GRAS_PERFCOUNTER1_SELECT 0xC89 -#define A3XX_GRAS_PERFCOUNTER2_SELECT 0xC8A -#define A3XX_GRAS_PERFCOUNTER3_SELECT 0xC8B -#define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0 -#define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1 -#define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2 -#define A3XX_GRAS_CL_USER_PLANE_W0 0xCA3 -#define A3XX_GRAS_CL_USER_PLANE_X1 0xCA4 -#define A3XX_GRAS_CL_USER_PLANE_Y1 0xCA5 -#define A3XX_GRAS_CL_USER_PLANE_Z1 0xCA6 -#define A3XX_GRAS_CL_USER_PLANE_W1 0xCA7 -#define A3XX_GRAS_CL_USER_PLANE_X2 0xCA8 -#define A3XX_GRAS_CL_USER_PLANE_Y2 0xCA9 -#define A3XX_GRAS_CL_USER_PLANE_Z2 0xCAA -#define A3XX_GRAS_CL_USER_PLANE_W2 0xCAB -#define A3XX_GRAS_CL_USER_PLANE_X3 0xCAC -#define A3XX_GRAS_CL_USER_PLANE_Y3 0xCAD -#define A3XX_GRAS_CL_USER_PLANE_Z3 0xCAE -#define A3XX_GRAS_CL_USER_PLANE_W3 0xCAF -#define A3XX_GRAS_CL_USER_PLANE_X4 0xCB0 -#define A3XX_GRAS_CL_USER_PLANE_Y4 0xCB1 -#define A3XX_GRAS_CL_USER_PLANE_Z4 0xCB2 -#define A3XX_GRAS_CL_USER_PLANE_W4 0xCB3 -#define A3XX_GRAS_CL_USER_PLANE_X5 0xCB4 -#define A3XX_GRAS_CL_USER_PLANE_Y5 0xCB5 -#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6 -#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7 -#define A3XX_RB_GMEM_BASE_ADDR 0xCC0 -#define A3XX_RB_DEBUG_ECO_CONTROLS_ADDR 0xCC1 -#define A3XX_RB_PERFCOUNTER0_SELECT 0xCC6 -#define A3XX_RB_PERFCOUNTER1_SELECT 0xCC7 -#define A3XX_RB_FRAME_BUFFER_DIMENSION 0xCE0 -#define A3XX_SQ_GPR_MANAGEMENT 0x0D00 -#define A3XX_SQ_INST_STORE_MANAGEMENT 0x0D02 -#define A3XX_HLSQ_PERFCOUNTER0_SELECT 0xE00 -#define A3XX_HLSQ_PERFCOUNTER1_SELECT 0xE01 -#define A3XX_HLSQ_PERFCOUNTER2_SELECT 0xE02 -#define A3XX_HLSQ_PERFCOUNTER3_SELECT 0xE03 -#define A3XX_HLSQ_PERFCOUNTER4_SELECT 0xE04 -#define A3XX_HLSQ_PERFCOUNTER5_SELECT 0xE05 -#define A3XX_TP0_CHICKEN 0x0E1E -#define A3XX_VFD_PERFCOUNTER0_SELECT 0xE44 -#define A3XX_VFD_PERFCOUNTER1_SELECT 0xE45 -#define A3XX_VPC_VPC_DEBUG_RAM_SEL 0xE61 -#define A3XX_VPC_VPC_DEBUG_RAM_READ 0xE62 -#define A3XX_VPC_PERFCOUNTER0_SELECT 0xE64 -#define A3XX_VPC_PERFCOUNTER1_SELECT 0xE65 -#define A3XX_UCHE_CACHE_MODE_CONTROL_REG 0xE82 -#define A3XX_UCHE_PERFCOUNTER0_SELECT 0xE84 -#define A3XX_UCHE_PERFCOUNTER1_SELECT 0xE85 -#define A3XX_UCHE_PERFCOUNTER2_SELECT 0xE86 -#define A3XX_UCHE_PERFCOUNTER3_SELECT 0xE87 -#define A3XX_UCHE_PERFCOUNTER4_SELECT 0xE88 -#define A3XX_UCHE_PERFCOUNTER5_SELECT 0xE89 -#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0 -#define A3XX_UCHE_CACHE_INVALIDATE1_REG 0xEA1 -#define A3XX_UCHE_CACHE_WAYS_VFD 0xEA6 -#define A3XX_SP_PERFCOUNTER0_SELECT 0xEC4 -#define A3XX_SP_PERFCOUNTER1_SELECT 0xEC5 -#define A3XX_SP_PERFCOUNTER2_SELECT 0xEC6 -#define A3XX_SP_PERFCOUNTER3_SELECT 0xEC7 -#define A3XX_SP_PERFCOUNTER4_SELECT 0xEC8 -#define A3XX_SP_PERFCOUNTER5_SELECT 0xEC9 -#define A3XX_SP_PERFCOUNTER6_SELECT 0xECA -#define A3XX_SP_PERFCOUNTER7_SELECT 0xECB -#define A3XX_TP_PERFCOUNTER0_SELECT 0xF04 -#define A3XX_TP_PERFCOUNTER1_SELECT 0xF05 -#define A3XX_TP_PERFCOUNTER2_SELECT 0xF06 -#define A3XX_TP_PERFCOUNTER3_SELECT 0xF07 -#define A3XX_TP_PERFCOUNTER4_SELECT 0xF08 -#define A3XX_TP_PERFCOUNTER5_SELECT 0xF09 -#define A3XX_GRAS_CL_CLIP_CNTL 0x2040 -#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044 -#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048 -#define A3XX_GRAS_CL_VPORT_XSCALE 0x2049 -#define A3XX_GRAS_CL_VPORT_YOFFSET 0x204A -#define A3XX_GRAS_CL_VPORT_YSCALE 0x204B -#define A3XX_GRAS_CL_VPORT_ZOFFSET 0x204C -#define A3XX_GRAS_CL_VPORT_ZSCALE 0x204D -#define A3XX_GRAS_SU_POINT_MINMAX 0x2068 -#define A3XX_GRAS_SU_POINT_SIZE 0x2069 -#define A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x206C -#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x206D -#define A3XX_GRAS_SU_MODE_CONTROL 0x2070 -#define A3XX_GRAS_SC_CONTROL 0x2072 -#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x2074 -#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x2075 -#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x2079 -#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x207A -#define A3XX_RB_MODE_CONTROL 0x20C0 -#define A3XX_RB_RENDER_CONTROL 0x20C1 -#define A3XX_RB_MSAA_CONTROL 0x20C2 -#define A3XX_RB_ALPHA_REFERENCE 0x20C3 -#define A3XX_RB_MRT_CONTROL0 0x20C4 -#define A3XX_RB_MRT_BUF_INFO0 0x20C5 -#define A3XX_RB_MRT_BUF_BASE0 0x20C6 -#define A3XX_RB_MRT_BLEND_CONTROL0 0x20C7 -#define A3XX_RB_MRT_CONTROL1 0x20C8 -#define A3XX_RB_MRT_BUF_INFO1 0x20C9 -#define A3XX_RB_MRT_BUF_BASE1 0x20CA -#define A3XX_RB_MRT_BLEND_CONTROL1 0x20CB -#define A3XX_RB_MRT_CONTROL2 0x20CC -#define A3XX_RB_MRT_BUF_INFO2 0x20CD -#define A3XX_RB_MRT_BUF_BASE2 0x20CE -#define A3XX_RB_MRT_BLEND_CONTROL2 0x20CF -#define A3XX_RB_MRT_CONTROL3 0x20D0 -#define A3XX_RB_MRT_BUF_INFO3 0x20D1 -#define A3XX_RB_MRT_BUF_BASE3 0x20D2 -#define A3XX_RB_MRT_BLEND_CONTROL3 0x20D3 -#define A3XX_RB_BLEND_RED 0x20E4 -#define A3XX_RB_BLEND_GREEN 0x20E5 -#define A3XX_RB_BLEND_BLUE 0x20E6 -#define A3XX_RB_BLEND_ALPHA 0x20E7 -#define A3XX_RB_CLEAR_COLOR_DW0 0x20E8 -#define A3XX_RB_CLEAR_COLOR_DW1 0x20E9 -#define A3XX_RB_CLEAR_COLOR_DW2 0x20EA -#define A3XX_RB_CLEAR_COLOR_DW3 0x20EB -#define A3XX_RB_COPY_CONTROL 0x20EC -#define A3XX_RB_COPY_DEST_BASE 0x20ED -#define A3XX_RB_COPY_DEST_PITCH 0x20EE -#define A3XX_RB_COPY_DEST_INFO 0x20EF -#define A3XX_RB_DEPTH_CONTROL 0x2100 -#define A3XX_RB_DEPTH_CLEAR 0x2101 -#define A3XX_RB_DEPTH_BUF_INFO 0x2102 -#define A3XX_RB_DEPTH_BUF_PITCH 0x2103 -#define A3XX_RB_STENCIL_CONTROL 0x2104 -#define A3XX_RB_STENCIL_CLEAR 0x2105 -#define A3XX_RB_STENCIL_BUF_INFO 0x2106 -#define A3XX_RB_STENCIL_BUF_PITCH 0x2107 -#define A3XX_RB_STENCIL_REF_MASK 0x2108 -#define A3XX_RB_STENCIL_REF_MASK_BF 0x2109 -#define A3XX_RB_LRZ_VSC_CONTROL 0x210C -#define A3XX_RB_WINDOW_OFFSET 0x210E -#define A3XX_RB_SAMPLE_COUNT_CONTROL 0x2110 -#define A3XX_RB_SAMPLE_COUNT_ADDR 0x2111 -#define A3XX_RB_Z_CLAMP_MIN 0x2114 -#define A3XX_RB_Z_CLAMP_MAX 0x2115 -#define A3XX_HLSQ_CONTROL_0_REG 0x2200 -#define A3XX_HLSQ_CONTROL_1_REG 0x2201 -#define A3XX_HLSQ_CONTROL_2_REG 0x2202 -#define A3XX_HLSQ_CONTROL_3_REG 0x2203 -#define A3XX_HLSQ_VS_CONTROL_REG 0x2204 -#define A3XX_HLSQ_FS_CONTROL_REG 0x2205 -#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x2206 -#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x2207 -#define A3XX_HLSQ_CL_NDRANGE_0_REG 0x220A -#define A3XX_HLSQ_CL_NDRANGE_1_REG 0x220B -#define A3XX_HLSQ_CL_NDRANGE_2_REG 0x220C -#define A3XX_HLSQ_CL_NDRANGE_3_REG 0x220D -#define A3XX_HLSQ_CL_NDRANGE_4_REG 0x220E -#define A3XX_HLSQ_CL_NDRANGE_5_REG 0x220F -#define A3XX_HLSQ_CL_NDRANGE_6_REG 0x2210 -#define A3XX_HLSQ_CL_CONTROL_0_REG 0x2211 -#define A3XX_HLSQ_CL_CONTROL_1_REG 0x2212 -#define A3XX_HLSQ_CL_KERNEL_CONST_REG 0x2214 -#define A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x2215 -#define A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG 0x2216 -#define A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x2217 -#define A3XX_HLSQ_CL_WG_OFFSET_REG 0x221A -#define A3XX_VFD_FETCH_INSTR_1_0 0x2247 -#define A3XX_VFD_FETCH_INSTR_1_1 0x2249 -#define A3XX_VFD_FETCH_INSTR_1_2 0x224B -#define A3XX_VFD_FETCH_INSTR_1_3 0x224D -#define A3XX_VFD_FETCH_INSTR_1_4 0x224F -#define A3XX_VFD_FETCH_INSTR_1_5 0x2251 -#define A3XX_VFD_FETCH_INSTR_1_6 0x2253 -#define A3XX_VFD_FETCH_INSTR_1_7 0x2255 -#define A3XX_VFD_FETCH_INSTR_1_8 0x2257 -#define A3XX_VFD_FETCH_INSTR_1_9 0x2259 -#define A3XX_VFD_FETCH_INSTR_1_A 0x225B -#define A3XX_VFD_FETCH_INSTR_1_B 0x225D -#define A3XX_VFD_FETCH_INSTR_1_C 0x225F -#define A3XX_VFD_FETCH_INSTR_1_D 0x2261 -#define A3XX_VFD_FETCH_INSTR_1_E 0x2263 -#define A3XX_VFD_FETCH_INSTR_1_F 0x2265 -#define A3XX_SP_SP_CTRL_REG 0x22C0 -#define A3XX_SP_VS_CTRL_REG0 0x22C4 -#define A3XX_SP_VS_CTRL_REG1 0x22C5 -#define A3XX_SP_VS_PARAM_REG 0x22C6 -#define A3XX_SP_VS_OUT_REG_0 0x22C7 -#define A3XX_SP_VS_OUT_REG_1 0x22C8 -#define A3XX_SP_VS_OUT_REG_2 0x22C9 -#define A3XX_SP_VS_OUT_REG_3 0x22CA -#define A3XX_SP_VS_OUT_REG_4 0x22CB -#define A3XX_SP_VS_OUT_REG_5 0x22CC -#define A3XX_SP_VS_OUT_REG_6 0x22CD -#define A3XX_SP_VS_OUT_REG_7 0x22CE -#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0 -#define A3XX_SP_VS_VPC_DST_REG_1 0x22D1 -#define A3XX_SP_VS_VPC_DST_REG_2 0x22D2 -#define A3XX_SP_VS_VPC_DST_REG_3 0x22D3 -#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4 -#define A3XX_SP_VS_OBJ_START_REG 0x22D5 -#define A3XX_SP_VS_PVT_MEM_PARAM_REG 0x22D6 -#define A3XX_SP_VS_PVT_MEM_ADDR_REG 0x22D7 -#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8 -#define A3XX_SP_VS_LENGTH_REG 0x22DF -#define A3XX_SP_FS_CTRL_REG0 0x22E0 -#define A3XX_SP_FS_CTRL_REG1 0x22E1 -#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2 -#define A3XX_SP_FS_OBJ_START_REG 0x22E3 -#define A3XX_SP_FS_PVT_MEM_PARAM_REG 0x22E4 -#define A3XX_SP_FS_PVT_MEM_ADDR_REG 0x22E5 -#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6 -#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8 -#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x22E9 -#define A3XX_SP_FS_OUTPUT_REG 0x22EC -#define A3XX_SP_FS_MRT_REG_0 0x22F0 -#define A3XX_SP_FS_MRT_REG_1 0x22F1 -#define A3XX_SP_FS_MRT_REG_2 0x22F2 -#define A3XX_SP_FS_MRT_REG_3 0x22F3 -#define A3XX_SP_FS_IMAGE_OUTPUT_REG_0 0x22F4 -#define A3XX_SP_FS_IMAGE_OUTPUT_REG_1 0x22F5 -#define A3XX_SP_FS_IMAGE_OUTPUT_REG_2 0x22F6 -#define A3XX_SP_FS_IMAGE_OUTPUT_REG_3 0x22F7 -#define A3XX_SP_FS_LENGTH_REG 0x22FF -#define A3XX_PA_SC_AA_CONFIG 0x2301 -#define A3XX_VBIF_CLKON 0x3001 -#define A3XX_VBIF_ABIT_SORT 0x301C -#define A3XX_VBIF_ABIT_SORT_CONF 0x301D -#define A3XX_VBIF_GATE_OFF_WRREQ_EN 0x302A -#define A3XX_VBIF_IN_RD_LIM_CONF0 0x302C -#define A3XX_VBIF_IN_RD_LIM_CONF1 0x302D -#define A3XX_VBIF_IN_WR_LIM_CONF0 0x3030 -#define A3XX_VBIF_IN_WR_LIM_CONF1 0x3031 -#define A3XX_VBIF_OUT_RD_LIM_CONF0 0x3034 -#define A3XX_VBIF_OUT_WR_LIM_CONF0 0x3035 -#define A3XX_VBIF_DDR_OUT_MAX_BURST 0x3036 -#define A3XX_VBIF_ARB_CTL 0x303C -#define A3XX_VBIF_ROUND_ROBIN_QOS_ARB 0x3049 -#define A3XX_VBIF_OUT_AXI_AOOO_EN 0x305E -#define A3XX_VBIF_OUT_AXI_AOOO 0x305F -#define A3XX_VBIF_PERF_CNT0_LO 0x3073 -#define A3XX_VBIF_PERF_CNT0_HI 0x3074 -#define A3XX_VBIF_PERF_CNT1_LO 0x3075 -#define A3XX_VBIF_PERF_CNT1_HI 0x3076 -#define A3XX_VBIF_PERF_PWR_CNT0_LO 0x3077 -#define A3XX_VBIF_PERF_PWR_CNT0_HI 0x3078 -#define A3XX_VBIF_PERF_PWR_CNT1_LO 0x3079 -#define A3XX_VBIF_PERF_PWR_CNT1_HI 0x307a -#define A3XX_VBIF_PERF_PWR_CNT2_LO 0x307b -#define A3XX_VBIF_PERF_PWR_CNT2_HI 0x307c - -#define A3XX_VBIF_XIN_HALT_CTRL0 0x3080 -#define A3XX_VBIF_XIN_HALT_CTRL0_MASK 0x3F -#define A30X_VBIF_XIN_HALT_CTRL0_MASK 0x7 - -#define A3XX_VBIF_XIN_HALT_CTRL1 0x3081 - -/* VBIF register offsets for A306 */ -#define A3XX_VBIF2_PERF_CNT_SEL0 0x30d0 -#define A3XX_VBIF2_PERF_CNT_SEL1 0x30d1 -#define A3XX_VBIF2_PERF_CNT_SEL2 0x30d2 -#define A3XX_VBIF2_PERF_CNT_SEL3 0x30d3 -#define A3XX_VBIF2_PERF_CNT_LOW0 0x30d8 -#define A3XX_VBIF2_PERF_CNT_LOW1 0x30d9 -#define A3XX_VBIF2_PERF_CNT_LOW2 0x30da -#define A3XX_VBIF2_PERF_CNT_LOW3 0x30db -#define A3XX_VBIF2_PERF_CNT_HIGH0 0x30e0 -#define A3XX_VBIF2_PERF_CNT_HIGH1 0x30e1 -#define A3XX_VBIF2_PERF_CNT_HIGH2 0x30e2 -#define A3XX_VBIF2_PERF_CNT_HIGH3 0x30e3 - -#define A3XX_VBIF2_PERF_PWR_CNT_EN0 0x3100 -#define A3XX_VBIF2_PERF_PWR_CNT_EN1 0x3101 -#define A3XX_VBIF2_PERF_PWR_CNT_EN2 0x3102 -#define A3XX_VBIF2_PERF_PWR_CNT_LOW0 0x3110 -#define A3XX_VBIF2_PERF_PWR_CNT_LOW1 0x3111 -#define A3XX_VBIF2_PERF_PWR_CNT_LOW2 0x3112 -#define A3XX_VBIF2_PERF_PWR_CNT_HIGH0 0x3118 -#define A3XX_VBIF2_PERF_PWR_CNT_HIGH1 0x3119 -#define A3XX_VBIF2_PERF_PWR_CNT_HIGH2 0x311a - -#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0 0x3800 -#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1 0x3801 - -/* RBBM Debug bus block IDs */ -#define RBBM_BLOCK_ID_CP 0x1 -#define RBBM_BLOCK_ID_RBBM 0x2 -#define RBBM_BLOCK_ID_VBIF 0x3 -#define RBBM_BLOCK_ID_HLSQ 0x4 -#define RBBM_BLOCK_ID_UCHE 0x5 -#define RBBM_BLOCK_ID_PC 0x8 -#define RBBM_BLOCK_ID_VFD 0x9 -#define RBBM_BLOCK_ID_VPC 0xa -#define RBBM_BLOCK_ID_TSE 0xb -#define RBBM_BLOCK_ID_RAS 0xc -#define RBBM_BLOCK_ID_VSC 0xd -#define RBBM_BLOCK_ID_SP_0 0x10 -#define RBBM_BLOCK_ID_SP_1 0x11 -#define RBBM_BLOCK_ID_SP_2 0x12 -#define RBBM_BLOCK_ID_SP_3 0x13 -#define RBBM_BLOCK_ID_TPL1_0 0x18 -#define RBBM_BLOCK_ID_TPL1_1 0x19 -#define RBBM_BLOCK_ID_TPL1_2 0x1a -#define RBBM_BLOCK_ID_TPL1_3 0x1b -#define RBBM_BLOCK_ID_RB_0 0x20 -#define RBBM_BLOCK_ID_RB_1 0x21 -#define RBBM_BLOCK_ID_RB_2 0x22 -#define RBBM_BLOCK_ID_RB_3 0x23 -#define RBBM_BLOCK_ID_MARB_0 0x28 -#define RBBM_BLOCK_ID_MARB_1 0x29 -#define RBBM_BLOCK_ID_MARB_2 0x2a -#define RBBM_BLOCK_ID_MARB_3 0x2b - -/* RBBM_CLOCK_CTL default value */ -#define A3XX_RBBM_CLOCK_CTL_DEFAULT 0xAAAAAAAA -#define A320_RBBM_CLOCK_CTL_DEFAULT 0xBFFFFFFF -#define A330_RBBM_CLOCK_CTL_DEFAULT 0xBFFCFFFF - -#define A330_RBBM_GPR0_CTL_DEFAULT 0x00000000 -#define A330v2_RBBM_GPR0_CTL_DEFAULT 0x05515455 -#define A310_RBBM_GPR0_CTL_DEFAULT 0x000000AA - -/* COUNTABLE FOR SP PERFCOUNTER */ -#define SP_ALU_ACTIVE_CYCLES 0x1D -#define SP0_ICL1_MISSES 0x1A -#define SP_FS_CFLOW_INSTRUCTIONS 0x0C - -/* COUNTABLE FOR TSE PERFCOUNTER */ -#define TSE_INPUT_PRIM_NUM 0x0 - -/* VBIF countables */ -#define VBIF_AXI_TOTAL_BEATS 85 - -/* VBIF Recoverable HALT bit value */ -#define VBIF_RECOVERABLE_HALT_CTRL 0x1 - -/* - * CP DEBUG settings for A3XX core: - * DYNAMIC_CLK_DISABLE [27] - turn off the dynamic clock control - * MIU_128BIT_WRITE_ENABLE [25] - Allow 128 bit writes to the VBIF - */ -#define A3XX_CP_DEBUG_DEFAULT ((1 << 27) | (1 << 25)) - - -#endif diff --git a/a5xx_reg.h b/a5xx_reg.h index 137a11c3d9..6d36d76980 100644 --- a/a5xx_reg.h +++ b/a5xx_reg.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2014-2016,2019, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _A5XX_REG_H @@ -885,6 +886,9 @@ #define A5XX_GDPM_INT_MASK 0xB811 #define A5XX_GPMU_BEC_ENABLE 0xB9A0 +/* VBIF countables */ +#define VBIF_AXI_TOTAL_BEATS 85 + /* ISENSE registers */ #define A5XX_GPU_CS_DECIMAL_ALIGN 0xC16A #define A5XX_GPU_CS_SENSOR_PARAM_CORE_1 0xC126 diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 7b0fdac348..1e3f59e2a4 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -16,73 +16,9 @@ static const struct adreno_gpu_core adreno_gpu_core_##_name = { \ .features = ADRENO_DEPRECATED, \ } -static const struct kgsl_regmap_list a306_vbif_regs[] = { - { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, - { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000A }, - { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000A }, -}; - -static const struct adreno_a3xx_core adreno_gpu_core_a306 = { - .base = { - DEFINE_ADRENO_REV(ADRENO_REV_A306, 3, 0, 6, 0), - .features = ADRENO_SOFT_FAULT_DETECT, - .gpudev = &adreno_a3xx_gpudev, - .perfcounters = &adreno_a3xx_perfcounters, - .uche_gmem_alignment = 0, - .gmem_size = SZ_128K, - .bus_width = 0, - .snapshot_size = 600 * SZ_1K, - }, - .pm4fw_name = "a300_pm4.fw", - .pfpfw_name = "a300_pfp.fw", - .vbif = a306_vbif_regs, - .vbif_count = ARRAY_SIZE(a306_vbif_regs), -}; - -static const struct kgsl_regmap_list a306a_vbif_regs[] = { - { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, - { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000010 }, - { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000010 }, -}; - -static const struct adreno_a3xx_core adreno_gpu_core_a306a = { - .base = { - DEFINE_ADRENO_REV(ADRENO_REV_A306A, 3, 0, 6, 0x20), - .features = ADRENO_SOFT_FAULT_DETECT, - .gpudev = &adreno_a3xx_gpudev, - .perfcounters = &adreno_a3xx_perfcounters, - .uche_gmem_alignment = 0, - .gmem_size = SZ_128K, - .bus_width = 16, - .snapshot_size = 600 * SZ_1K, - }, - .pm4fw_name = "a300_pm4.fw", - .pfpfw_name = "a300_pfp.fw", - .vbif = a306a_vbif_regs, - .vbif_count = ARRAY_SIZE(a306a_vbif_regs), -}; - -static const struct kgsl_regmap_list a304_vbif_regs[] = { - { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, -}; - -static const struct adreno_a3xx_core adreno_gpu_core_a304 = { - .base = { - DEFINE_ADRENO_REV(ADRENO_REV_A304, 3, 0, 4, 0), - .features = ADRENO_SOFT_FAULT_DETECT, - .gpudev = &adreno_a3xx_gpudev, - .perfcounters = &adreno_a3xx_perfcounters, - .uche_gmem_alignment = 0, - .gmem_size = (SZ_64K + SZ_32K), - .bus_width = 0, - .snapshot_size = 600 * SZ_1K, - }, - .pm4fw_name = "a300_pm4.fw", - .pfpfw_name = "a300_pfp.fw", - .vbif = a304_vbif_regs, - .vbif_count = ARRAY_SIZE(a304_vbif_regs), -}; - +DEFINE_DEPRECATED_CORE(a304, ADRENO_REV_A304, 4, 0, 5, ANY_ID); +DEFINE_DEPRECATED_CORE(a306, ADRENO_REV_A306, 4, 0, 5, ANY_ID); +DEFINE_DEPRECATED_CORE(a306a, ADRENO_REV_A306A, 4, 0, 5, ANY_ID); DEFINE_DEPRECATED_CORE(a405, ADRENO_REV_A405, 4, 0, 5, ANY_ID); DEFINE_DEPRECATED_CORE(a418, ADRENO_REV_A418, 4, 1, 8, ANY_ID); DEFINE_DEPRECATED_CORE(a420, ADRENO_REV_A420, 4, 2, 0, ANY_ID); @@ -2558,9 +2494,9 @@ static const struct adreno_a6xx_core adreno_gpu_core_a663 = { }; static const struct adreno_gpu_core *adreno_gpulist[] = { - &adreno_gpu_core_a306.base, - &adreno_gpu_core_a306a.base, - &adreno_gpu_core_a304.base, + &adreno_gpu_core_a306, /* Deprecated */ + &adreno_gpu_core_a306a, /* Deprecated */ + &adreno_gpu_core_a304, /* Deprecated */ &adreno_gpu_core_a405, /* Deprecated */ &adreno_gpu_core_a418, /* Deprecated */ &adreno_gpu_core_a420, /* Deprecated */ diff --git a/adreno.c b/adreno.c index c251fa9025..28f3e6aa10 100644 --- a/adreno.c +++ b/adreno.c @@ -25,7 +25,6 @@ #include #include "adreno.h" -#include "adreno_a3xx.h" #include "adreno_a5xx.h" #include "adreno_a6xx.h" #include "adreno_compat.h" @@ -168,11 +167,8 @@ unsigned int adreno_get_rptr(struct adreno_ringbuffer *rb) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 rptr = 0; - if (adreno_is_a3xx(adreno_dev)) - kgsl_regread(device, A3XX_CP_RB_RPTR, &rptr); - else - kgsl_sharedmem_readl(device->scratch, &rptr, - SCRATCH_RB_OFFSET(rb->id, rptr)); + kgsl_sharedmem_readl(device->scratch, &rptr, + SCRATCH_RB_OFFSET(rb->id, rptr)); return rptr; } @@ -1298,11 +1294,10 @@ int adreno_device_probe(struct platform_device *pdev, kgsl_mmu_set_feature(device, KGSL_MMU_LLCC_ENABLE); /* - * Force no write allocate for A3x, A5x, A6x and all gen7 targets + * Force no write allocate for A5x, A6x and all gen7 targets * except gen_7_9_x. gen_7_9_x uses write allocate */ - if (adreno_is_a3xx(adreno_dev) || adreno_is_a5xx(adreno_dev) || - adreno_is_a6xx(adreno_dev) || + if (adreno_is_a5xx(adreno_dev) || adreno_is_a6xx(adreno_dev) || (adreno_is_gen7(adreno_dev) && !adreno_is_gen7_9_x(adreno_dev))) kgsl_mmu_set_feature(device, KGSL_MMU_FORCE_LLCC_NWA); @@ -2487,13 +2482,6 @@ static int adreno_soft_reset(struct kgsl_device *device) const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); int ret; - /* - * Don't allow a soft reset for a304 because the SMMU needs to be hard - * reset - */ - if (adreno_is_a304(adreno_dev)) - return -ENODEV; - if (gpudev->clear_pending_transactions) { ret = gpudev->clear_pending_transactions(adreno_dev); if (ret) @@ -3019,8 +3007,6 @@ static bool adreno_is_hw_collapsible(struct kgsl_device *device) if (gpudev->clear_pending_transactions(adreno_dev)) return false; - adreno_dispatcher_stop_fault_timer(device); - return true; } @@ -3114,11 +3100,6 @@ int adreno_verify_cmdobj(struct kgsl_device_private *dev_priv, */ ADRENO_DEVICE(device)->wake_on_touch = false; } - - /* A3XX does not have support for drawobj profiling */ - if (adreno_is_a3xx(ADRENO_DEVICE(device)) && - (drawobj[i]->flags & KGSL_DRAWOBJ_PROFILING)) - return -EOPNOTSUPP; } return 0; diff --git a/adreno.h b/adreno.h index 026a8ae2dd..ee6342dc48 100644 --- a/adreno.h +++ b/adreno.h @@ -507,7 +507,6 @@ struct adreno_dispatch_ops { * @cur_rb: Pointer to the current ringbuffer * @next_rb: Ringbuffer we are switching to during preemption * @prev_rb: Ringbuffer we are switching from during preemption - * @fast_hang_detect: Software fault detection availability * @ft_policy: Defines the fault tolerance policy * @long_ib_detect: Long IB detection availability * @cooperative_reset: Indicates if graceful death handshake is enabled @@ -575,7 +574,6 @@ struct adreno_device { struct adreno_ringbuffer *cur_rb; struct adreno_ringbuffer *next_rb; struct adreno_ringbuffer *prev_rb; - unsigned int fast_hang_detect; unsigned long ft_policy; bool long_ib_detect; bool cooperative_reset; @@ -662,20 +660,6 @@ struct adreno_device { struct kgsl_memdesc *critpkts_secure; /** @irq_mask: The current interrupt mask for the GPU device */ u32 irq_mask; - /* - * @soft_ft_regs: an array of registers for soft fault detection on a3xx - * targets - */ - u32 *soft_ft_regs; - /* - * @soft_ft_vals: an array of register values for soft fault detection - * on a3xx targets - */ - u32 *soft_ft_vals; - /* - * @soft_ft_vals: number of elements in @soft_ft_regs and @soft_ft_vals - */ - int soft_ft_count; /** @wake_on_touch: If true our last wakeup was due to a touch event */ bool wake_on_touch; /* @dispatch_ops: A pointer to a set of adreno dispatch ops */ @@ -1014,7 +998,6 @@ enum kgsl_ft_policy_bits { extern const struct adreno_power_ops adreno_power_operations; -extern const struct adreno_gpudev adreno_a3xx_gpudev; extern const struct adreno_gpudev adreno_a5xx_gpudev; extern const struct adreno_gpudev adreno_a6xx_gpudev; extern const struct adreno_gpudev adreno_a6xx_rgmu_gpudev; @@ -1104,16 +1087,6 @@ static inline int adreno_is_##_name(struct adreno_device *adreno_dev) \ return (ADRENO_GPUREV(adreno_dev) == (_id)); \ } -static inline int adreno_is_a3xx(struct adreno_device *adreno_dev) -{ - return ((ADRENO_GPUREV(adreno_dev) >= 300) && - (ADRENO_GPUREV(adreno_dev) < 400)); -} - -ADRENO_TARGET(a304, ADRENO_REV_A304) -ADRENO_TARGET(a306, ADRENO_REV_A306) -ADRENO_TARGET(a306a, ADRENO_REV_A306A) - static inline int adreno_is_a5xx(struct adreno_device *adreno_dev) { return ADRENO_GPUREV(adreno_dev) >= 500 && @@ -1293,7 +1266,7 @@ static inline bool adreno_checkreg_off(struct adreno_device *adreno_dev, * programming needs to be skipped for certain GPU cores. * Example: Certain registers on a5xx like IB1_BASE are 64 bit. * Common programming programs 64bit register but upper 32 bits - * are skipped in a3xx using ADRENO_REG_SKIP. + * are skipped in a5xx using ADRENO_REG_SKIP. */ if (gpudev->reg_offsets[offset_name] == ADRENO_REG_SKIP) return false; @@ -1587,12 +1560,6 @@ static inline bool adreno_rb_empty(struct adreno_ringbuffer *rb) return (adreno_get_rptr(rb) == rb->wptr); } -static inline bool adreno_soft_fault_detect(struct adreno_device *adreno_dev) -{ - return adreno_dev->fast_hang_detect && - !test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv); -} - static inline bool adreno_long_ib_detect(struct adreno_device *adreno_dev) { return adreno_dev->long_ib_detect && diff --git a/adreno_a3xx.c b/adreno_a3xx.c deleted file mode 100644 index fc9c6ab644..0000000000 --- a/adreno_a3xx.c +++ /dev/null @@ -1,1498 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. - */ - -#include -#include -#include -#include -#include - -#include "adreno.h" -#include "adreno_cp_parser.h" -#include "adreno_a3xx.h" -#include "adreno_pm4types.h" -#include "adreno_snapshot.h" -#include "adreno_trace.h" - -/* - * Define registers for a3xx that contain addresses used by the - * cp parser logic - */ -const unsigned int a3xx_cp_addr_regs[ADRENO_CP_ADDR_MAX] = { - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0, - A3XX_VSC_PIPE_DATA_ADDRESS_0), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0, - A3XX_VSC_PIPE_DATA_LENGTH_0), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1, - A3XX_VSC_PIPE_DATA_ADDRESS_1), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1, - A3XX_VSC_PIPE_DATA_LENGTH_1), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2, - A3XX_VSC_PIPE_DATA_ADDRESS_2), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2, - A3XX_VSC_PIPE_DATA_LENGTH_2), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3, - A3XX_VSC_PIPE_DATA_ADDRESS_3), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3, - A3XX_VSC_PIPE_DATA_LENGTH_3), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4, - A3XX_VSC_PIPE_DATA_ADDRESS_4), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4, - A3XX_VSC_PIPE_DATA_LENGTH_4), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5, - A3XX_VSC_PIPE_DATA_ADDRESS_5), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5, - A3XX_VSC_PIPE_DATA_LENGTH_5), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6, - A3XX_VSC_PIPE_DATA_ADDRESS_6), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6, - A3XX_VSC_PIPE_DATA_LENGTH_6), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7, - A3XX_VSC_PIPE_DATA_ADDRESS_7), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7, - A3XX_VSC_PIPE_DATA_LENGTH_7), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0, - A3XX_VFD_FETCH_INSTR_1_0), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1, - A3XX_VFD_FETCH_INSTR_1_1), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2, - A3XX_VFD_FETCH_INSTR_1_2), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3, - A3XX_VFD_FETCH_INSTR_1_3), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4, - A3XX_VFD_FETCH_INSTR_1_4), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5, - A3XX_VFD_FETCH_INSTR_1_5), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6, - A3XX_VFD_FETCH_INSTR_1_6), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7, - A3XX_VFD_FETCH_INSTR_1_7), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8, - A3XX_VFD_FETCH_INSTR_1_8), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9, - A3XX_VFD_FETCH_INSTR_1_9), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10, - A3XX_VFD_FETCH_INSTR_1_A), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11, - A3XX_VFD_FETCH_INSTR_1_B), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12, - A3XX_VFD_FETCH_INSTR_1_C), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13, - A3XX_VFD_FETCH_INSTR_1_D), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14, - A3XX_VFD_FETCH_INSTR_1_E), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15, - A3XX_VFD_FETCH_INSTR_1_F), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_SIZE_ADDRESS, - A3XX_VSC_SIZE_ADDRESS), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR, - A3XX_SP_VS_PVT_MEM_ADDR_REG), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR, - A3XX_SP_FS_PVT_MEM_ADDR_REG), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_OBJ_START_REG, - A3XX_SP_VS_OBJ_START_REG), - ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_OBJ_START_REG, - A3XX_SP_FS_OBJ_START_REG), -}; - -static const unsigned int _a3xx_pwron_fixup_fs_instructions[] = { - 0x00000000, 0x302CC300, 0x00000000, 0x302CC304, - 0x00000000, 0x302CC308, 0x00000000, 0x302CC30C, - 0x00000000, 0x302CC310, 0x00000000, 0x302CC314, - 0x00000000, 0x302CC318, 0x00000000, 0x302CC31C, - 0x00000000, 0x302CC320, 0x00000000, 0x302CC324, - 0x00000000, 0x302CC328, 0x00000000, 0x302CC32C, - 0x00000000, 0x302CC330, 0x00000000, 0x302CC334, - 0x00000000, 0x302CC338, 0x00000000, 0x302CC33C, - 0x00000000, 0x00000400, 0x00020000, 0x63808003, - 0x00060004, 0x63828007, 0x000A0008, 0x6384800B, - 0x000E000C, 0x6386800F, 0x00120010, 0x63888013, - 0x00160014, 0x638A8017, 0x001A0018, 0x638C801B, - 0x001E001C, 0x638E801F, 0x00220020, 0x63908023, - 0x00260024, 0x63928027, 0x002A0028, 0x6394802B, - 0x002E002C, 0x6396802F, 0x00320030, 0x63988033, - 0x00360034, 0x639A8037, 0x003A0038, 0x639C803B, - 0x003E003C, 0x639E803F, 0x00000000, 0x00000400, - 0x00000003, 0x80D60003, 0x00000007, 0x80D60007, - 0x0000000B, 0x80D6000B, 0x0000000F, 0x80D6000F, - 0x00000013, 0x80D60013, 0x00000017, 0x80D60017, - 0x0000001B, 0x80D6001B, 0x0000001F, 0x80D6001F, - 0x00000023, 0x80D60023, 0x00000027, 0x80D60027, - 0x0000002B, 0x80D6002B, 0x0000002F, 0x80D6002F, - 0x00000033, 0x80D60033, 0x00000037, 0x80D60037, - 0x0000003B, 0x80D6003B, 0x0000003F, 0x80D6003F, - 0x00000000, 0x03000000, 0x00000000, 0x00000000, -}; - -/** - * _a3xx_pwron_fixup() - Initialize a special command buffer to run a - * post-power collapse shader workaround - * @adreno_dev: Pointer to a adreno_device struct - * - * Some targets require a special workaround shader to be executed after - * power-collapse. Construct the IB once at init time and keep it - * handy - * - * Returns: 0 on success or negative on error - */ -static int _a3xx_pwron_fixup(struct adreno_device *adreno_dev) -{ - unsigned int *cmds; - int count = ARRAY_SIZE(_a3xx_pwron_fixup_fs_instructions); - - /* Return if the fixup is already in place */ - if (test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv)) - return 0; - - adreno_dev->pwron_fixup = kgsl_allocate_global(KGSL_DEVICE(adreno_dev), - PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "pwron_fixup"); - - if (IS_ERR(adreno_dev->pwron_fixup)) - return PTR_ERR(adreno_dev->pwron_fixup); - - cmds = adreno_dev->pwron_fixup->hostptr; - - *cmds++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); - *cmds++ = 0x00000000; - *cmds++ = 0x90000000; - *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type3_packet(CP_REG_RMW, 3); - *cmds++ = A3XX_RBBM_CLOCK_CTL; - *cmds++ = 0xFFFCFFFF; - *cmds++ = 0x00010000; - *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1); - *cmds++ = 0x1E000150; - *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); - *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); - *cmds++ = 0x1E000150; - *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1); - *cmds++ = 0x1E000150; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_1_REG, 1); - *cmds++ = 0x00000040; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_2_REG, 1); - *cmds++ = 0x80000000; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_3_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_HLSQ_VS_CONTROL_REG, 1); - *cmds++ = 0x00000001; - *cmds++ = cp_type0_packet(A3XX_HLSQ_FS_CONTROL_REG, 1); - *cmds++ = 0x0D001002; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_0_REG, 1); - *cmds++ = 0x00401101; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_1_REG, 1); - *cmds++ = 0x00000400; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_2_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_3_REG, 1); - *cmds++ = 0x00000001; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_4_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_5_REG, 1); - *cmds++ = 0x00000001; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_6_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_1_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_CONST_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_X_REG, 1); - *cmds++ = 0x00000010; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG, 1); - *cmds++ = 0x00000001; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG, 1); - *cmds++ = 0x00000001; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_WG_OFFSET_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_SP_CTRL_REG, 1); - *cmds++ = 0x00040000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1); - *cmds++ = 0x0000000A; - *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG1, 1); - *cmds++ = 0x00000001; - *cmds++ = cp_type0_packet(A3XX_SP_VS_PARAM_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_0, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_1, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_2, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_3, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_4, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_5, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_6, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_7, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_0, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_1, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_2, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_3, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_OFFSET_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_START_REG, 1); - *cmds++ = 0x00000004; - *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_PARAM_REG, 1); - *cmds++ = 0x04008001; - *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_ADDR_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_VS_LENGTH_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1); - *cmds++ = 0x0DB0400A; - *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG1, 1); - *cmds++ = 0x00300402; - *cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_OFFSET_REG, 1); - *cmds++ = 0x00010000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_START_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_PARAM_REG, 1); - *cmds++ = 0x04008001; - *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_ADDR_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_1, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_OUTPUT_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_0, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_1, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_2, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_3, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_0, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_1, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_2, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_3, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_SP_FS_LENGTH_REG, 1); - *cmds++ = 0x0000000D; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_CLIP_CNTL, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_GB_CLIP_ADJ, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_XOFFSET, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_XSCALE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_YOFFSET, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_YSCALE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_ZOFFSET, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_ZSCALE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X0, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y0, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z0, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W0, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X1, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y1, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z1, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W1, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X2, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y2, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z2, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W2, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X3, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y3, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z3, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W3, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X4, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y4, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z4, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W4, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X5, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y5, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z5, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W5, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POINT_MINMAX, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POINT_SIZE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POLY_OFFSET_OFFSET, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POLY_OFFSET_SCALE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_SU_MODE_CONTROL, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_SC_CONTROL, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_SC_SCREEN_SCISSOR_BR, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_SC_WINDOW_SCISSOR_BR, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_TSE_DEBUG_ECO, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER0_SELECT, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER1_SELECT, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER2_SELECT, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER3_SELECT, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MODE_CONTROL, 1); - *cmds++ = 0x00008000; - *cmds++ = cp_type0_packet(A3XX_RB_RENDER_CONTROL, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MSAA_CONTROL, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_ALPHA_REFERENCE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL0, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL1, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL2, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL3, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO0, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO1, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO2, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO3, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE0, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE1, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE2, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE3, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL0, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL1, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL2, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL3, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_BLEND_RED, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_BLEND_GREEN, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_BLEND_BLUE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_BLEND_ALPHA, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW0, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW1, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW2, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW3, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_COPY_CONTROL, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_BASE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_PITCH, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_INFO, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_CONTROL, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_CLEAR, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_BUF_INFO, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_BUF_PITCH, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_CONTROL, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_CLEAR, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_BUF_INFO, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_BUF_PITCH, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_REF_MASK, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_REF_MASK_BF, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_LRZ_VSC_CONTROL, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_WINDOW_OFFSET, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_SAMPLE_COUNT_CONTROL, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_SAMPLE_COUNT_ADDR, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_Z_CLAMP_MIN, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_Z_CLAMP_MAX, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_GMEM_BASE_ADDR, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_DEBUG_ECO_CONTROLS_ADDR, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER0_SELECT, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER1_SELECT, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_RB_FRAME_BUFFER_DIMENSION, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); - *cmds++ = (1 << CP_LOADSTATE_DSTOFFSET_SHIFT) | - (0 << CP_LOADSTATE_STATESRC_SHIFT) | - (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | - (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); - *cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT) | - (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); - *cmds++ = 0x00400000; - *cmds++ = 0x00000000; - *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); - *cmds++ = (2 << CP_LOADSTATE_DSTOFFSET_SHIFT) | - (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | - (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); - *cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT); - *cmds++ = 0x00400220; - *cmds++ = 0x00000000; - *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); - *cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | - (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); - *cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT); - *cmds++ = 0x00000000; - *cmds++ = 0x00000000; - *cmds++ = cp_type3_packet(CP_LOAD_STATE, 2 + count); - *cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | - (13 << CP_LOADSTATE_NUMOFUNITS_SHIFT); - *cmds++ = 0x00000000; - - memcpy(cmds, _a3xx_pwron_fixup_fs_instructions, count << 2); - - cmds += count; - - *cmds++ = cp_type3_packet(CP_EXEC_CL, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1); - *cmds++ = 0x1E000150; - *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); - *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); - *cmds++ = 0x1E000050; - *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - *cmds++ = 0x00000000; - *cmds++ = cp_type3_packet(CP_REG_RMW, 3); - *cmds++ = A3XX_RBBM_CLOCK_CTL; - *cmds++ = 0xFFFCFFFF; - *cmds++ = 0x00000000; - *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - *cmds++ = 0x00000000; - - /* - * Remember the number of dwords in the command buffer for when we - * program the indirect buffer call in the ringbuffer - */ - adreno_dev->pwron_fixup_dwords = - (cmds - (unsigned int *) adreno_dev->pwron_fixup->hostptr); - - /* Mark the flag in ->priv to show that we have the fix */ - set_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv); - return 0; -} - -static int a3xx_probe(struct platform_device *pdev, - u32 chipid, const struct adreno_gpu_core *gpucore) -{ - struct adreno_device *adreno_dev; - struct kgsl_device *device; - int ret; - - adreno_dev = (struct adreno_device *) - of_device_get_match_data(&pdev->dev); - - memset(adreno_dev, 0, sizeof(*adreno_dev)); - - adreno_dev->gpucore = gpucore; - adreno_dev->chipid = chipid; - - adreno_reg_offset_init(gpucore->gpudev->reg_offsets); - - - device = KGSL_DEVICE(adreno_dev); - - timer_setup(&device->idle_timer, kgsl_timer, 0); - - INIT_WORK(&device->idle_check_ws, kgsl_idle_check); - - ret = adreno_device_probe(pdev, adreno_dev); - if (ret) - return ret; - - a3xx_coresight_init(adreno_dev); - - return adreno_dispatcher_init(adreno_dev); -} - -static int a3xx_send_me_init(struct adreno_device *adreno_dev, - struct adreno_ringbuffer *rb) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - unsigned int *cmds; - int ret; - - cmds = adreno_ringbuffer_allocspace(rb, 18); - if (IS_ERR(cmds)) - return PTR_ERR(cmds); - - *cmds++ = cp_type3_packet(CP_ME_INIT, 17); - - *cmds++ = 0x000003f7; - *cmds++ = 0x00000000; - *cmds++ = 0x00000000; - *cmds++ = 0x00000000; - *cmds++ = 0x00000080; - *cmds++ = 0x00000100; - *cmds++ = 0x00000180; - *cmds++ = 0x00006600; - *cmds++ = 0x00000150; - *cmds++ = 0x0000014e; - *cmds++ = 0x00000154; - *cmds++ = 0x00000001; - *cmds++ = 0x00000000; - *cmds++ = 0x00000000; - - /* Enable protected mode registers for A3XX */ - *cmds++ = 0x20000000; - - *cmds++ = 0x00000000; - *cmds++ = 0x00000000; - - /* Submit the command to the ringbuffer */ - kgsl_pwrscale_busy(device); - kgsl_regwrite(device, A3XX_CP_RB_WPTR, rb->_wptr); - rb->wptr = rb->_wptr; - - ret = adreno_spin_idle(adreno_dev, 2000); - if (ret) { - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - dev_err(device->dev, "CP initialization failed to idle\n"); - kgsl_device_snapshot(device, NULL, NULL, false); - } - - return ret; -} - -static void a3xx_microcode_load(struct adreno_device *adreno_dev); - -static int a3xx_rb_start(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); - - memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); - rb->wptr = 0; - rb->_wptr = 0; - rb->wptr_preempt_end = ~0; - - /* - * The size of the ringbuffer in the hardware is the log2 - * representation of the size in quadwords (sizedwords / 2). - * Also disable the host RPTR shadow register as it might be unreliable - * in certain circumstances. - */ - - kgsl_regwrite(device, A3XX_CP_RB_CNTL, - (ilog2(KGSL_RB_DWORDS >> 1) & 0x3F) | - (1 << 27)); - - kgsl_regwrite(device, A3XX_CP_RB_BASE, rb->buffer_desc->gpuaddr); - - a3xx_microcode_load(adreno_dev); - - /* clear ME_HALT to start micro engine */ - kgsl_regwrite(device, A3XX_CP_ME_CNTL, 0); - - return a3xx_send_me_init(adreno_dev, rb); -} - -/* - * a3xx soft fault detection - * - * a3xx targets do not have hardware fault detection so we need to do it the old - * fashioned way by periodically reading a set of registers and counters and - * checking that they are advancing. There are 6 registers and four 64 bit - * counters that we keep an eye on. - */ - -#define A3XX_SOFT_FAULT_DETECT_REGS 6 -#define A3XX_SOFT_FAULT_DETECT_COUNTERS 4 -#define A3XX_SOFT_FAULT_DETECT_COUNT \ - (A3XX_SOFT_FAULT_DETECT_REGS + (A3XX_SOFT_FAULT_DETECT_COUNTERS * 2)) - -static bool a3xx_soft_fault_detect_isidle(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - u32 reg; - - if (kgsl_state_is_awake(device)) { - if (!adreno_rb_empty(adreno_dev->cur_rb)) - return false; - - /* only check rbbm status to determine if GPU is idle */ - kgsl_regread(device, A3XX_RBBM_STATUS, ®); - - if (reg & 0x7ffffffe) - return false; - } - - memset(adreno_dev->soft_ft_vals, 0, A3XX_SOFT_FAULT_DETECT_COUNT << 2); - return true; -} - -/* Read the fault detect registers and compare them to the stored version */ -static int a3xx_soft_fault_detect_read_compare(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); - int i, ret = 0; - unsigned int ts; - - if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv)) - return 1; - - /* Check to see if the device is idle - if so report no hang */ - if (a3xx_soft_fault_detect_isidle(adreno_dev)) - ret = 1; - - for (i = 0; i < A3XX_SOFT_FAULT_DETECT_COUNT; i++) { - unsigned int val; - - if (!adreno_dev->soft_ft_regs[i]) - continue; - - kgsl_regread(device, adreno_dev->soft_ft_regs[i], &val); - if (val != adreno_dev->soft_ft_vals[i]) - ret = 1; - adreno_dev->soft_ft_vals[i] = val; - } - - if (!adreno_rb_readtimestamp(adreno_dev, adreno_dev->cur_rb, - KGSL_TIMESTAMP_RETIRED, &ts)) { - if (ts != rb->fault_detect_ts) - ret = 1; - - rb->fault_detect_ts = ts; - } - - return ret; -} - -/* - * This is called on a regular basis while cmdobjs are inflight. Fault - * detection registers are read and compared to the existing values - if they - * changed then the GPU is still running. If they are the same between - * subsequent calls then the GPU may have faulted - */ -static void a3xx_soft_fault_timer(struct timer_list *t) -{ - struct adreno_dispatcher *dispatcher = from_timer(dispatcher, - t, fault_timer); - struct adreno_device *adreno_dev = container_of(dispatcher, - struct adreno_device, dispatcher); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - /* Leave if the user decided to turn off fast hang detection */ - if (!adreno_soft_fault_detect(adreno_dev)) - return; - - if (adreno_gpu_fault(adreno_dev)) { - adreno_dispatcher_schedule(device); - return; - } - - /* - * Read the fault registers - if it returns 0 then they haven't changed - * so mark the dispatcher as faulted and schedule the work loop. - */ - - if (!a3xx_soft_fault_detect_read_compare(adreno_dev)) - adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT); - else if (dispatcher->inflight > 0) - adreno_dispatcher_start_fault_timer(adreno_dev); -} - -/* - * Start fault detection. The counters are only assigned while fault detection - * is running so that they can be used for other purposes if fault detection is - * disabled - */ -static void a3xx_soft_fault_detect_start(struct adreno_device *adreno_dev) -{ - u32 *regs = &adreno_dev->soft_ft_regs[A3XX_SOFT_FAULT_DETECT_COUNTERS]; - int ret = 0; - - if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv)) - return; - - if (adreno_dev->fast_hang_detect == 1) - return; - - ret |= adreno_perfcounter_kernel_get(adreno_dev, - KGSL_PERFCOUNTER_GROUP_SP, SP_ALU_ACTIVE_CYCLES, - ®s[0], ®s[1]); - - ret |= adreno_perfcounter_kernel_get(adreno_dev, - KGSL_PERFCOUNTER_GROUP_SP, SP0_ICL1_MISSES, - ®s[2], ®s[3]); - - ret |= adreno_perfcounter_kernel_get(adreno_dev, - KGSL_PERFCOUNTER_GROUP_SP, SP_FS_CFLOW_INSTRUCTIONS, - ®s[4], ®s[5]); - - ret |= adreno_perfcounter_kernel_get(adreno_dev, - KGSL_PERFCOUNTER_GROUP_TSE, TSE_INPUT_PRIM_NUM, - ®s[6], ®s[7]); - - WARN(ret, "Unable to allocate one or more fault detect counters\n"); - adreno_dev->fast_hang_detect = 1; -} - -/* Helper function to put back a counter */ -static void put_counter(struct adreno_device *adreno_dev, - int group, int countable, u32 *lo, u32 *hi) -{ - adreno_perfcounter_put(adreno_dev, group, countable, - PERFCOUNTER_FLAG_KERNEL); - - *lo = 0; - *hi = 0; -} - -/* Stop fault detection and return the counters */ -static void a3xx_soft_fault_detect_stop(struct adreno_device *adreno_dev) -{ - u32 *regs = &adreno_dev->soft_ft_regs[A3XX_SOFT_FAULT_DETECT_COUNTERS]; - - if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv)) - return; - - if (!adreno_dev->fast_hang_detect) - return; - - put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, SP_ALU_ACTIVE_CYCLES, - ®s[0], ®s[1]); - - put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, SP0_ICL1_MISSES, - ®s[2], ®s[3]); - - put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, - SP_FS_CFLOW_INSTRUCTIONS, ®s[4], ®s[5]); - - put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_TSE, TSE_INPUT_PRIM_NUM, - ®s[6], ®s[7]); - - adreno_dev->fast_hang_detect = 0; -} - -/* Initialize the registers and set up the data structures */ -static void a3xx_soft_fault_detect_init(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - if (!ADRENO_FEATURE(adreno_dev, ADRENO_SOFT_FAULT_DETECT)) - return; - - /* Disable the fast hang detect bit until we know its a go */ - adreno_dev->fast_hang_detect = 0; - - adreno_dev->soft_ft_regs = devm_kcalloc(&device->pdev->dev, - A3XX_SOFT_FAULT_DETECT_COUNT, sizeof(u32), GFP_KERNEL); - - adreno_dev->soft_ft_vals = devm_kcalloc(&device->pdev->dev, - A3XX_SOFT_FAULT_DETECT_COUNT, sizeof(u32), GFP_KERNEL); - - if (!adreno_dev->soft_ft_regs || !adreno_dev->soft_ft_vals) - return; - - adreno_dev->soft_ft_count = A3XX_SOFT_FAULT_DETECT_COUNT; - - adreno_dev->soft_ft_regs[0] = A3XX_RBBM_STATUS; - adreno_dev->soft_ft_regs[1] = A3XX_CP_RB_RPTR; - adreno_dev->soft_ft_regs[2] = A3XX_CP_IB1_BASE; - adreno_dev->soft_ft_regs[3] = A3XX_CP_IB1_BUFSZ; - adreno_dev->soft_ft_regs[4] = A3XX_CP_IB2_BASE; - adreno_dev->soft_ft_regs[5] = A3XX_CP_IB2_BUFSZ; - - set_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv); - - a3xx_soft_fault_detect_start(adreno_dev); -} - -static void a3xx_remove(struct adreno_device *adreno_dev) -{ - a3xx_soft_fault_detect_stop(adreno_dev); -} - -static int a3xx_microcode_read(struct adreno_device *adreno_dev); - -/* - * a3xx_init() - Initialize gpu specific data - * @adreno_dev: Pointer to adreno device - */ -static int a3xx_init(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct kgsl_iommu *iommu = KGSL_IOMMU(device); - struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; - int ret; - - /* - * Set up the a3xx only soft fault timer before heading into the generic - * dispatcher setup - */ - if (ADRENO_FEATURE(adreno_dev, ADRENO_SOFT_FAULT_DETECT)) - timer_setup(&dispatcher->fault_timer, a3xx_soft_fault_timer, 0); - - ret = a3xx_ringbuffer_init(adreno_dev); - if (ret) - return ret; - - ret = a3xx_microcode_read(adreno_dev); - if (ret) - return ret; - - _a3xx_pwron_fixup(adreno_dev); - - ret = adreno_allocate_global(device, &iommu->setstate, PAGE_SIZE, - 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "setstate"); - - if (!ret) - kgsl_sharedmem_writel(iommu->setstate, - KGSL_IOMMU_SETSTATE_NOP_OFFSET, - cp_type3_packet(CP_NOP, 1)); - - kgsl_mmu_set_feature(device, KGSL_MMU_NEED_GUARD_PAGE); - - /* Put the hardware in a responsive state to set up fault detection*/ - ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); - if (ret) - return ret; - - a3xx_soft_fault_detect_init(adreno_dev); - - kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); - return 0; -} - -/* - * a3xx_err_callback() - Call back for a3xx error interrupts - * @adreno_dev: Pointer to device - * @bit: Interrupt bit - */ -static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - unsigned int reg; - - switch (bit) { - case A3XX_INT_RBBM_AHB_ERROR: { - kgsl_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, ®); - - /* - * Return the word address of the erroring register so that it - * matches the register specification - */ - dev_crit_ratelimited(device->dev, - "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n", - reg & (1 << 28) ? "WRITE" : "READ", - (reg & 0xFFFFF) >> 2, - (reg >> 20) & 0x3, - (reg >> 24) & 0xF); - - /* Clear the error */ - kgsl_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3)); - break; - } - case A3XX_INT_RBBM_ATB_BUS_OVERFLOW: - dev_crit_ratelimited(device->dev, - "RBBM: ATB bus oveflow\n"); - break; - case A3XX_INT_CP_T0_PACKET_IN_IB: - dev_crit_ratelimited(device->dev, - "ringbuffer TO packet in IB interrupt\n"); - break; - case A3XX_INT_CP_OPCODE_ERROR: - dev_crit_ratelimited(device->dev, - "ringbuffer opcode error interrupt\n"); - break; - case A3XX_INT_CP_RESERVED_BIT_ERROR: - dev_crit_ratelimited(device->dev, - "ringbuffer reserved bit error interrupt\n"); - break; - case A3XX_INT_CP_HW_FAULT: - kgsl_regread(device, A3XX_CP_HW_FAULT, ®); - dev_crit_ratelimited(device->dev, - "CP | Ringbuffer HW fault | status=%x\n", - reg); - break; - case A3XX_INT_CP_REG_PROTECT_FAULT: - kgsl_regread(device, A3XX_CP_PROTECT_STATUS, ®); - dev_crit_ratelimited(device->dev, - "CP | Protected mode error| %s | addr=%x\n", - reg & (1 << 24) ? "WRITE" : "READ", - (reg & 0xFFFFF) >> 2); - break; - case A3XX_INT_CP_AHB_ERROR_HALT: - dev_crit_ratelimited(device->dev, - "ringbuffer AHB error interrupt\n"); - break; - case A3XX_INT_UCHE_OOB_ACCESS: - dev_crit_ratelimited(device->dev, - "UCHE: Out of bounds access\n"); - break; - default: - dev_crit_ratelimited(device->dev, "Unknown interrupt\n"); - } -} - -#define A3XX_INT_MASK \ - ((1 << A3XX_INT_RBBM_AHB_ERROR) | \ - (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \ - (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \ - (1 << A3XX_INT_CP_OPCODE_ERROR) | \ - (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \ - (1 << A3XX_INT_CP_HW_FAULT) | \ - (1 << A3XX_INT_CP_IB1_INT) | \ - (1 << A3XX_INT_CP_IB2_INT) | \ - (1 << A3XX_INT_CP_RB_INT) | \ - (1 << A3XX_INT_CACHE_FLUSH_TS) | \ - (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \ - (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \ - (1 << A3XX_INT_UCHE_OOB_ACCESS)) - -static const struct adreno_irq_funcs a3xx_irq_funcs[32] = { - ADRENO_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ - ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */ - ADRENO_IRQ_CALLBACK(NULL), /* 2 - RBBM_REG_TIMEOUT */ - ADRENO_IRQ_CALLBACK(NULL), /* 3 - RBBM_ME_MS_TIMEOUT */ - ADRENO_IRQ_CALLBACK(NULL), /* 4 - RBBM_PFP_MS_TIMEOUT */ - ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */ - ADRENO_IRQ_CALLBACK(NULL), /* 6 - RBBM_VFD_ERROR */ - ADRENO_IRQ_CALLBACK(NULL), /* 7 - CP_SW */ - ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */ - ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */ - /* 10 - CP_RESERVED_BIT_ERROR */ - ADRENO_IRQ_CALLBACK(a3xx_err_callback), - ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */ - ADRENO_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */ - ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 13 - CP_IB2_INT */ - ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 14 - CP_IB1_INT */ - ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */ - /* 16 - CP_REG_PROTECT_FAULT */ - ADRENO_IRQ_CALLBACK(a3xx_err_callback), - ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ - ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */ - ADRENO_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */ - ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */ - /* 21 - CP_AHB_ERROR_FAULT */ - ADRENO_IRQ_CALLBACK(a3xx_err_callback), - ADRENO_IRQ_CALLBACK(NULL), /* 22 - Unused */ - ADRENO_IRQ_CALLBACK(NULL), /* 23 - Unused */ - /* 24 - MISC_HANG_DETECT */ - ADRENO_IRQ_CALLBACK(adreno_hang_int_callback), - ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */ -}; - -static struct { - u32 reg; - u32 base; - u32 count; -} a3xx_protected_blocks[] = { - /* RBBM */ - { A3XX_CP_PROTECT_REG_0, 0x0018, 0 }, - { A3XX_CP_PROTECT_REG_0 + 1, 0x0020, 2 }, - { A3XX_CP_PROTECT_REG_0 + 2, 0x0033, 0 }, - { A3XX_CP_PROTECT_REG_0 + 3, 0x0042, 0 }, - { A3XX_CP_PROTECT_REG_0 + 4, 0x0050, 4 }, - { A3XX_CP_PROTECT_REG_0 + 5, 0x0063, 0 }, - { A3XX_CP_PROTECT_REG_0 + 6, 0x0100, 4 }, - /* CP */ - { A3XX_CP_PROTECT_REG_0 + 7, 0x01c0, 5 }, - { A3XX_CP_PROTECT_REG_0 + 8, 0x01ec, 1 }, - { A3XX_CP_PROTECT_REG_0 + 9, 0x01f6, 1 }, - { A3XX_CP_PROTECT_REG_0 + 10, 0x01f8, 2 }, - { A3XX_CP_PROTECT_REG_0 + 11, 0x045e, 2 }, - { A3XX_CP_PROTECT_REG_0 + 12, 0x0460, 4 }, - /* RB */ - { A3XX_CP_PROTECT_REG_0 + 13, 0x0cc0, 0 }, - /* VBIF */ - { A3XX_CP_PROTECT_REG_0 + 14, 0x3000, 6 }, - /* - * SMMU - * For A3xx, base offset for smmu region is 0xa000 and length is - * 0x1000 bytes. Offset must be in dword and length of the block - * must be ilog2(dword length). - * 0xa000 >> 2 = 0x2800, ilog2(0x1000 >> 2) = 10. - */ - { A3XX_CP_PROTECT_REG_0 + 15, 0x2800, 10 }, - /* There are no remaining protected mode registers for a3xx */ -}; - -static void a3xx_protect_init(struct kgsl_device *device) -{ - int i; - - kgsl_regwrite(device, A3XX_CP_PROTECT_CTRL, 0x00000007); - - for (i = 0; i < ARRAY_SIZE(a3xx_protected_blocks); i++) { - u32 val = 0x60000000 | - (a3xx_protected_blocks[i].count << 24) | - (a3xx_protected_blocks[i].base << 2); - - kgsl_regwrite(device, a3xx_protected_blocks[i].reg, val); - } -} - -static int a3xx_start(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - const struct adreno_a3xx_core *a3xx_core = to_a3xx_core(adreno_dev); - int ret; - - ret = kgsl_mmu_start(device); - if (ret) - return ret; - - adreno_get_bus_counters(adreno_dev); - adreno_perfcounter_restore(adreno_dev); - - if (adreno_dev->soft_ft_regs) - memset(adreno_dev->soft_ft_regs, 0, - adreno_dev->soft_ft_count << 2); - - adreno_dev->irq_mask = A3XX_INT_MASK; - - /* Set up VBIF registers from the GPU core definition */ - kgsl_regmap_multi_write(&device->regmap, a3xx_core->vbif, - a3xx_core->vbif_count); - - /* Make all blocks contribute to the GPU BUSY perf counter */ - kgsl_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF); - - /* Tune the hystersis counters for SP and CP idle detection */ - kgsl_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10); - kgsl_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10); - - /* - * Enable the RBBM error reporting bits. This lets us get - * useful information on failure - */ - - kgsl_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001); - - /* Enable AHB error reporting */ - kgsl_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF); - - /* Turn on the power counters */ - kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00030000); - - /* - * Turn on hang detection - this spews a lot of useful information - * into the RBBM registers on a hang - */ - kgsl_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL, - (1 << 16) | 0xFFF); - - /* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0). */ - kgsl_regwrite(device, A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001); - - /* Enable VFD to access most of the UCHE (7 ways out of 8) */ - kgsl_regwrite(device, A3XX_UCHE_CACHE_WAYS_VFD, 0x07); - - /* Enable Clock gating */ - kgsl_regwrite(device, A3XX_RBBM_CLOCK_CTL, A3XX_RBBM_CLOCK_CTL_DEFAULT); - - /* Turn on protection */ - a3xx_protect_init(device); - - /* Turn on performance counters */ - kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, 0x01); - - kgsl_regwrite(device, A3XX_CP_DEBUG, A3XX_CP_DEBUG_DEFAULT); - - /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */ - kgsl_regwrite(device, A3XX_CP_QUEUE_THRESHOLDS, 0x000E0602); - return 0; -} - -/* Register offset defines for A3XX */ -static unsigned int a3xx_register_offsets[ADRENO_REG_REGISTER_MAX] = { - ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A3XX_CP_RB_BASE), - ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, ADRENO_REG_SKIP), - ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A3XX_CP_RB_RPTR), - ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A3XX_CP_RB_WPTR), - ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A3XX_CP_ME_CNTL), - ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A3XX_CP_RB_CNTL), - ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A3XX_CP_IB1_BASE), - ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, ADRENO_REG_SKIP), - ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A3XX_CP_IB1_BUFSZ), - ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A3XX_CP_IB2_BASE), - ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, ADRENO_REG_SKIP), - ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A3XX_CP_IB2_BUFSZ), - ADRENO_REG_DEFINE(ADRENO_REG_CP_TIMESTAMP, A3XX_CP_SCRATCH_REG0), - ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG6, A3XX_CP_SCRATCH_REG6), - ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG7, A3XX_CP_SCRATCH_REG7), - ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A3XX_CP_PROTECT_REG_0), - ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A3XX_RBBM_STATUS), - ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_PWR_1_LO, - A3XX_RBBM_PERFCTR_PWR_1_LO), - ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A3XX_RBBM_INT_0_MASK), - ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A3XX_RBBM_CLOCK_CTL), - ADRENO_REG_DEFINE(ADRENO_REG_PA_SC_AA_CONFIG, A3XX_PA_SC_AA_CONFIG), - ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PM_OVERRIDE2, A3XX_RBBM_PM_OVERRIDE2), - ADRENO_REG_DEFINE(ADRENO_REG_SQ_GPR_MANAGEMENT, A3XX_SQ_GPR_MANAGEMENT), - ADRENO_REG_DEFINE(ADRENO_REG_SQ_INST_STORE_MANAGEMENT, - A3XX_SQ_INST_STORE_MANAGEMENT), - ADRENO_REG_DEFINE(ADRENO_REG_TP0_CHICKEN, A3XX_TP0_CHICKEN), - ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A3XX_RBBM_SW_RESET_CMD), -}; - -static int _load_firmware(struct kgsl_device *device, const char *fwfile, - void **buf, int *len) -{ - const struct firmware *fw = NULL; - int ret; - - ret = request_firmware(&fw, fwfile, &device->pdev->dev); - - if (ret) { - dev_err(&device->pdev->dev, "request_firmware(%s) failed: %d\n", - fwfile, ret); - return ret; - } - - if (!fw) - return -EINVAL; - - *buf = devm_kmemdup(&device->pdev->dev, fw->data, fw->size, GFP_KERNEL); - *len = fw->size; - - release_firmware(fw); - return (*buf) ? 0 : -ENOMEM; -} - -static int a3xx_microcode_read(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_firmware *pm4_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4); - struct adreno_firmware *pfp_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP); - const struct adreno_a3xx_core *a3xx_core = to_a3xx_core(adreno_dev); - - if (pm4_fw->fwvirt == NULL) { - int len; - void *ptr; - - int ret = _load_firmware(device, - a3xx_core->pm4fw_name, &ptr, &len); - - if (ret) { - dev_err(device->dev, "Failed to read pm4 ucode %s\n", - a3xx_core->pm4fw_name); - return ret; - } - - /* PM4 size is 3 dword aligned plus 1 dword of version */ - if (len % ((sizeof(uint32_t) * 3)) != sizeof(uint32_t)) { - dev_err(device->dev, - "Bad pm4 microcode size: %d\n", - len); - kfree(ptr); - return -ENOMEM; - } - - pm4_fw->size = len / sizeof(uint32_t); - pm4_fw->fwvirt = ptr; - pm4_fw->version = pm4_fw->fwvirt[1]; - } - - if (pfp_fw->fwvirt == NULL) { - int len; - void *ptr; - - int ret = _load_firmware(device, - a3xx_core->pfpfw_name, &ptr, &len); - if (ret) { - dev_err(device->dev, "Failed to read pfp ucode %s\n", - a3xx_core->pfpfw_name); - return ret; - } - - /* PFP size shold be dword aligned */ - if (len % sizeof(uint32_t) != 0) { - dev_err(device->dev, - "Bad PFP microcode size: %d\n", - len); - kfree(ptr); - return -ENOMEM; - } - - pfp_fw->size = len / sizeof(uint32_t); - pfp_fw->fwvirt = ptr; - pfp_fw->version = pfp_fw->fwvirt[1]; - } - - return 0; -} - -static void a3xx_microcode_load(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - size_t pm4_size = adreno_dev->fw[ADRENO_FW_PM4].size; - size_t pfp_size = adreno_dev->fw[ADRENO_FW_PFP].size; - int i; - - /* load the CP ucode using AHB writes */ - kgsl_regwrite(device, A3XX_CP_ME_RAM_WADDR, 0); - - for (i = 1; i < pm4_size; i++) - kgsl_regwrite(device, A3XX_CP_ME_RAM_DATA, - adreno_dev->fw[ADRENO_FW_PM4].fwvirt[i]); - - kgsl_regwrite(device, A3XX_CP_PFP_UCODE_ADDR, 0); - - for (i = 1; i < pfp_size; i++) - kgsl_regwrite(device, A3XX_CP_PFP_UCODE_DATA, - adreno_dev->fw[ADRENO_FW_PFP].fwvirt[i]); -} - -static u64 a3xx_read_alwayson(struct adreno_device *adreno_dev) -{ - /* A3XX does not have a always on timer */ - return 0; -} - -static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - irqreturn_t ret; - u32 status; - - /* Get the current interrupt status */ - kgsl_regread(device, A3XX_RBBM_INT_0_STATUS, &status); - - /* - * Clear all the interrupt bits except A3XX_INT_RBBM_AHB_ERROR. - * The interrupt will stay asserted until it is cleared by the handler - * so don't touch it yet to avoid a storm - */ - - kgsl_regwrite(device, A3XX_RBBM_INT_CLEAR_CMD, - status & ~A3XX_INT_RBBM_AHB_ERROR); - - /* Call the helper to execute the callbacks */ - ret = adreno_irq_callbacks(adreno_dev, a3xx_irq_funcs, status); - - trace_kgsl_a3xx_irq_status(adreno_dev, status); - - /* Now clear AHB_ERROR if it was set */ - if (status & A3XX_INT_RBBM_AHB_ERROR) - kgsl_regwrite(device, A3XX_RBBM_INT_CLEAR_CMD, - A3XX_INT_RBBM_AHB_ERROR); - - return ret; -} - -static bool a3xx_hw_isidle(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - u32 status; - - kgsl_regread(device, A3XX_RBBM_STATUS, &status); - - if (status & 0x7ffffffe) - return false; - - kgsl_regread(device, A3XX_RBBM_INT_0_STATUS, &status); - - /* Return busy if a interrupt is pending */ - return !((status & adreno_dev->irq_mask) || - atomic_read(&adreno_dev->pending_irq_refcnt)); -} - -static int a3xx_clear_pending_transactions(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - u32 mask = A30X_VBIF_XIN_HALT_CTRL0_MASK; - int ret; - - kgsl_regwrite(device, A3XX_VBIF_XIN_HALT_CTRL0, mask); - ret = adreno_wait_for_halt_ack(device, A3XX_VBIF_XIN_HALT_CTRL1, mask); - kgsl_regwrite(device, A3XX_VBIF_XIN_HALT_CTRL0, 0); - - return ret; -} - -static bool a3xx_is_hw_collapsible(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - /* - * Skip power collapse for A304, if power ctrl flag is set to - * non zero. As A304 soft_reset will not work, power collapse - * needs to disable to avoid soft_reset. - */ - if (adreno_is_a304(adreno_dev) && device->pwrctrl.ctrl_flags) - return false; - - return adreno_isidle(adreno_dev); -} - -static void a3xx_power_stats(struct adreno_device *adreno_dev, - struct kgsl_power_stats *stats) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_busy_data *busy = &adreno_dev->busy_data; - s64 freq = kgsl_pwrctrl_active_freq(&device->pwrctrl) / 1000000; - u64 gpu_busy; - - /* Set the GPU busy counter for frequency scaling */ - gpu_busy = counter_delta(device, A3XX_RBBM_PERFCTR_PWR_1_LO, - &busy->gpu_busy); - - stats->busy_time = gpu_busy / freq; - - if (!device->pwrctrl.bus_control) - return; - - stats->ram_time = counter_delta(device, adreno_dev->ram_cycles_lo, - &busy->bif_ram_cycles); - - stats->ram_wait = counter_delta(device, adreno_dev->starved_ram_lo, - &busy->bif_starved_ram); -} - -static int a3xx_setproperty(struct kgsl_device_private *dev_priv, - u32 type, void __user *value, u32 sizebytes) -{ - struct kgsl_device *device = dev_priv->device; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - u32 enable; - - if (type != KGSL_PROP_PWRCTRL) - return -ENODEV; - - if (sizebytes != sizeof(enable)) - return -EINVAL; - - if (copy_from_user(&enable, value, sizeof(enable))) - return -EFAULT; - - mutex_lock(&device->mutex); - if (enable) { - device->pwrctrl.ctrl_flags = 0; - - if (!adreno_active_count_get(adreno_dev)) { - a3xx_soft_fault_detect_start(adreno_dev); - adreno_active_count_put(adreno_dev); - } - - kgsl_pwrscale_enable(device); - } else { - kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); - device->pwrctrl.ctrl_flags = KGSL_PWR_ON; - - a3xx_soft_fault_detect_stop(adreno_dev); - kgsl_pwrscale_disable(device, true); - } - mutex_unlock(&device->mutex); - - return 0; -} - -const struct adreno_gpudev adreno_a3xx_gpudev = { - .reg_offsets = a3xx_register_offsets, - .irq_handler = a3xx_irq_handler, - .probe = a3xx_probe, - .rb_start = a3xx_rb_start, - .init = a3xx_init, - .start = a3xx_start, - .snapshot = a3xx_snapshot, - .read_alwayson = a3xx_read_alwayson, - .hw_isidle = a3xx_hw_isidle, - .power_ops = &adreno_power_operations, - .clear_pending_transactions = a3xx_clear_pending_transactions, - .ringbuffer_submitcmd = a3xx_ringbuffer_submitcmd, - .is_hw_collapsible = a3xx_is_hw_collapsible, - .power_stats = a3xx_power_stats, - .setproperty = a3xx_setproperty, - .remove = a3xx_remove, -}; diff --git a/adreno_a3xx.h b/adreno_a3xx.h deleted file mode 100644 index 5a3a902bd3..0000000000 --- a/adreno_a3xx.h +++ /dev/null @@ -1,76 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2013-2016, 2019-2020, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. - */ -#ifndef __A3XX_H -#define __A3XX_H - -#include "a3xx_reg.h" -/** - * struct adreno_a3xx_core - a3xx specific GPU core definitions - */ -struct adreno_a3xx_core { - /** @base: Container for the generic &struct adreno_gpu_core */ - struct adreno_gpu_core base; - /** pm4fw_name: Name of the PM4 microcode file */ - const char *pm4fw_name; - /** pfpfw_name: Name of the PFP microcode file */ - const char *pfpfw_name; - /** @vbif: List of registers and values to write for VBIF */ - const struct kgsl_regmap_list *vbif; - /** @vbif_count: Number of registers in @vbif */ - u32 vbif_count; -}; - -struct adreno_device; - -/** - * to_a3xx_core - return the a3xx specific GPU core struct - * @adreno_dev: An Adreno GPU device handle - * - * Returns: - * A pointer to the a3xx specific GPU core struct - */ -static inline const struct adreno_a3xx_core * -to_a3xx_core(struct adreno_device *adreno_dev) -{ - const struct adreno_gpu_core *core = adreno_dev->gpucore; - - return container_of(core, struct adreno_a3xx_core, base); -} - -void a3xx_snapshot(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot); - -extern const struct adreno_perfcounters adreno_a3xx_perfcounters; - -/** - * a3xx_ringbuffer_init - Initialize the ringbuffer - * @adreno_dev: An Adreno GPU handle - * - * Initialize the ringbuffer for a3xx. - * Return: 0 on success or negative on failure - */ -int a3xx_ringbuffer_init(struct adreno_device *adreno_dev); - -/** - * a3xx_ringbuffer_submitcmd - Submit a user command to the ringbuffer - * @adreno_dev: An Adreno GPU handle - * @cmdobj: Pointer to a user command object - * @flags: Internal submit flags - * @time: Optional pointer to a adreno_submit_time container - * - * Return: 0 on success or negative on failure - */ -int a3xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev, - struct kgsl_drawobj_cmd *cmdobj, u32 flags, - struct adreno_submit_time *time); - -#ifdef CONFIG_QCOM_KGSL_CORESIGHT -void a3xx_coresight_init(struct adreno_device *device); -#else -static inline void a3xx_coresight_init(struct adreno_device *device) { } -#endif - -#endif /*__A3XX_H */ diff --git a/adreno_a3xx_coresight.c b/adreno_a3xx_coresight.c deleted file mode 100644 index 728792ebe9..0000000000 --- a/adreno_a3xx_coresight.c +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* -* Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. -* Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. -*/ - -#include "adreno.h" -#include "adreno_a3xx.h" -#include "adreno_coresight.h" - -static struct adreno_coresight_register a3xx_coresight_registers[] = { - { A3XX_RBBM_DEBUG_BUS_CTL, 0x0001093F }, - { A3XX_RBBM_EXT_TRACE_STOP_CNT, 0x00017fff }, - { A3XX_RBBM_EXT_TRACE_START_CNT, 0x0001000f }, - { A3XX_RBBM_EXT_TRACE_PERIOD_CNT, 0x0001ffff }, - { A3XX_RBBM_EXT_TRACE_CMD, 0x00000001 }, - { A3XX_RBBM_EXT_TRACE_BUS_CTL, 0x89100010 }, - { A3XX_RBBM_DEBUG_BUS_STB_CTL0, 0x00000000 }, - { A3XX_RBBM_DEBUG_BUS_STB_CTL1, 0xFFFFFFFE }, - { A3XX_RBBM_INT_TRACE_BUS_CTL, 0x00201111 }, -}; - -static ADRENO_CORESIGHT_ATTR(config_debug_bus, - &a3xx_coresight_registers[0]); -static ADRENO_CORESIGHT_ATTR(config_trace_stop_cnt, - &a3xx_coresight_registers[1]); -static ADRENO_CORESIGHT_ATTR(config_trace_start_cnt, - &a3xx_coresight_registers[2]); -static ADRENO_CORESIGHT_ATTR(config_trace_period_cnt, - &a3xx_coresight_registers[3]); -static ADRENO_CORESIGHT_ATTR(config_trace_cmd, - &a3xx_coresight_registers[4]); -static ADRENO_CORESIGHT_ATTR(config_trace_bus_ctl, - &a3xx_coresight_registers[5]); - -static struct attribute *a3xx_coresight_attrs[] = { - &coresight_attr_config_debug_bus.attr.attr, - &coresight_attr_config_trace_start_cnt.attr.attr, - &coresight_attr_config_trace_stop_cnt.attr.attr, - &coresight_attr_config_trace_period_cnt.attr.attr, - &coresight_attr_config_trace_cmd.attr.attr, - &coresight_attr_config_trace_bus_ctl.attr.attr, - NULL, -}; - -static const struct attribute_group a3xx_coresight_group = { - .attrs = a3xx_coresight_attrs, -}; - -static const struct attribute_group *a3xx_coresight_groups[] = { - &a3xx_coresight_group, - NULL, -}; - -static const struct adreno_coresight a3xx_coresight = { - .registers = a3xx_coresight_registers, - .count = ARRAY_SIZE(a3xx_coresight_registers), - .groups = a3xx_coresight_groups, -}; - -void a3xx_coresight_init(struct adreno_device *adreno_dev) -{ - adreno_coresight_add_device(adreno_dev, "coresight-gfx", - &a3xx_coresight, &adreno_dev->gx_coresight); -} diff --git a/adreno_a3xx_perfcounter.c b/adreno_a3xx_perfcounter.c deleted file mode 100644 index a525fef97b..0000000000 --- a/adreno_a3xx_perfcounter.c +++ /dev/null @@ -1,411 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) 2020, The Linux Foundation. All rights reserved. - */ - -#include "adreno.h" -#include "adreno_a3xx.h" -#include "adreno_perfcounter.h" -#include "kgsl_device.h" - -/* Bit flag for RBMM_PERFCTR_CTL */ -#define RBBM_PERFCTR_CTL_ENABLE 0x00000001 -#define VBIF2_PERF_CNT_SEL_MASK 0x7F -/* offset of clear register from select register */ -#define VBIF2_PERF_CLR_REG_SEL_OFF 8 -/* offset of enable register from select register */ -#define VBIF2_PERF_EN_REG_SEL_OFF 16 -/* offset of clear register from the enable register */ -#define VBIF2_PERF_PWR_CLR_REG_EN_OFF 8 - -static void a3xx_counter_load(struct adreno_device *adreno_dev, - struct adreno_perfcount_register *reg) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - int index = reg->load_bit / 32; - u32 enable = BIT(reg->load_bit & 31); - - kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_VALUE_LO, - lower_32_bits(reg->value)); - - kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_VALUE_HI, - upper_32_bits(reg->value)); - - if (index == 0) - kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD0, enable); - else - kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD1, enable); -} - -static int a3xx_counter_enable(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, - unsigned int counter, unsigned int countable) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_perfcount_register *reg = &group->regs[counter]; - - kgsl_regwrite(device, reg->select, countable); - reg->value = 0; - - return 0; -} - -static u64 a3xx_counter_read(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, - unsigned int counter) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_perfcount_register *reg = &group->regs[counter]; - u32 val, hi, lo; - - kgsl_regread(device, A3XX_RBBM_PERFCTR_CTL, &val); - kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, - val & ~RBBM_PERFCTR_CTL_ENABLE); - - kgsl_regread(device, reg->offset, &lo); - kgsl_regread(device, reg->offset_hi, &hi); - - kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val); - - return (((u64) hi) << 32) | lo; -} - -static int a3xx_counter_pwr_enable(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, - unsigned int counter, unsigned int countable) -{ - return 0; -} - -static u64 a3xx_counter_pwr_read(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, - unsigned int counter) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_perfcount_register *reg = &group->regs[counter]; - u32 val, hi, lo; - - kgsl_regread(device, A3XX_RBBM_RBBM_CTL, &val); - - /* Freeze the counter so we can read it */ - if (!counter) - kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val & ~0x10000); - else - kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val & ~0x20000); - - kgsl_regread(device, reg->offset, &lo); - kgsl_regread(device, reg->offset_hi, &hi); - - kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val); - - return ((((u64) hi) << 32) | lo) + reg->value; -} - -static int a3xx_counter_vbif_enable(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, - unsigned int counter, unsigned int countable) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_perfcount_register *reg = &group->regs[counter]; - - if (countable > VBIF2_PERF_CNT_SEL_MASK) - return -EINVAL; - - /* - * Write 1, followed by 0 to CLR register for - * clearing the counter - */ - kgsl_regwrite(device, - reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 1); - kgsl_regwrite(device, - reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 0); - kgsl_regwrite(device, - reg->select, countable & VBIF2_PERF_CNT_SEL_MASK); - /* enable reg is 8 DWORDS before select reg */ - kgsl_regwrite(device, - reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1); - - kgsl_regwrite(device, reg->select, countable); - - reg->value = 0; - return 0; -} - -static u64 a3xx_counter_vbif_read(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, - unsigned int counter) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_perfcount_register *reg = &group->regs[counter]; - u32 hi, lo; - - /* freeze counter */ - kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 0); - - kgsl_regread(device, reg->offset, &lo); - kgsl_regread(device, reg->offset_hi, &hi); - - /* un-freeze counter */ - kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1); - - return ((((u64) hi) << 32) | lo) + reg->value; -} - -static int a3xx_counter_vbif_pwr_enable(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, - unsigned int counter, unsigned int countable) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_perfcount_register *reg = &group->regs[counter]; - - /* - * Write 1, followed by 0 to CLR register for - * clearing the counter - */ - kgsl_regwrite(device, reg->select + - VBIF2_PERF_PWR_CLR_REG_EN_OFF, 1); - kgsl_regwrite(device, reg->select + - VBIF2_PERF_PWR_CLR_REG_EN_OFF, 0); - kgsl_regwrite(device, reg->select, 1); - - reg->value = 0; - return 0; -} - -static u64 a3xx_counter_vbif_pwr_read(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, - unsigned int counter) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_perfcount_register *reg = &group->regs[counter]; - u32 hi, lo; - - /* freeze counter */ - kgsl_regwrite(device, reg->select, 0); - - kgsl_regread(device, reg->offset, &lo); - kgsl_regread(device, reg->offset_hi, &hi); - - /* un-freeze counter */ - kgsl_regwrite(device, reg->select, 1); - - return ((((u64) hi) << 32) | lo) + reg->value; -} - -/* - * Define the available perfcounter groups - these get used by - * adreno_perfcounter_get and adreno_perfcounter_put - */ - -static struct adreno_perfcount_register a3xx_perfcounters_cp[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_CP_0_LO, - A3XX_RBBM_PERFCTR_CP_0_HI, 0, A3XX_CP_PERFCOUNTER_SELECT }, -}; - -static struct adreno_perfcount_register a3xx_perfcounters_rbbm[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO, - A3XX_RBBM_PERFCTR_RBBM_0_HI, 1, A3XX_RBBM_PERFCOUNTER0_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO, - A3XX_RBBM_PERFCTR_RBBM_1_HI, 2, A3XX_RBBM_PERFCOUNTER1_SELECT }, -}; - -static struct adreno_perfcount_register a3xx_perfcounters_pc[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_0_LO, - A3XX_RBBM_PERFCTR_PC_0_HI, 3, A3XX_PC_PERFCOUNTER0_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_1_LO, - A3XX_RBBM_PERFCTR_PC_1_HI, 4, A3XX_PC_PERFCOUNTER1_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_2_LO, - A3XX_RBBM_PERFCTR_PC_2_HI, 5, A3XX_PC_PERFCOUNTER2_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_3_LO, - A3XX_RBBM_PERFCTR_PC_3_HI, 6, A3XX_PC_PERFCOUNTER3_SELECT }, -}; - -static struct adreno_perfcount_register a3xx_perfcounters_vfd[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_0_LO, - A3XX_RBBM_PERFCTR_VFD_0_HI, 7, A3XX_VFD_PERFCOUNTER0_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_1_LO, - A3XX_RBBM_PERFCTR_VFD_1_HI, 8, A3XX_VFD_PERFCOUNTER1_SELECT }, -}; - -static struct adreno_perfcount_register a3xx_perfcounters_hlsq[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO, - A3XX_RBBM_PERFCTR_HLSQ_0_HI, 9, - A3XX_HLSQ_PERFCOUNTER0_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO, - A3XX_RBBM_PERFCTR_HLSQ_1_HI, 10, - A3XX_HLSQ_PERFCOUNTER1_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO, - A3XX_RBBM_PERFCTR_HLSQ_2_HI, 11, - A3XX_HLSQ_PERFCOUNTER2_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO, - A3XX_RBBM_PERFCTR_HLSQ_3_HI, 12, - A3XX_HLSQ_PERFCOUNTER3_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO, - A3XX_RBBM_PERFCTR_HLSQ_4_HI, 13, - A3XX_HLSQ_PERFCOUNTER4_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO, - A3XX_RBBM_PERFCTR_HLSQ_5_HI, 14, - A3XX_HLSQ_PERFCOUNTER5_SELECT }, -}; - -static struct adreno_perfcount_register a3xx_perfcounters_vpc[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_0_LO, - A3XX_RBBM_PERFCTR_VPC_0_HI, 15, A3XX_VPC_PERFCOUNTER0_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_1_LO, - A3XX_RBBM_PERFCTR_VPC_1_HI, 16, A3XX_VPC_PERFCOUNTER1_SELECT }, -}; - -static struct adreno_perfcount_register a3xx_perfcounters_tse[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_0_LO, - A3XX_RBBM_PERFCTR_TSE_0_HI, 17, A3XX_GRAS_PERFCOUNTER0_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_1_LO, - A3XX_RBBM_PERFCTR_TSE_1_HI, 18, A3XX_GRAS_PERFCOUNTER1_SELECT }, -}; - -static struct adreno_perfcount_register a3xx_perfcounters_ras[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_0_LO, - A3XX_RBBM_PERFCTR_RAS_0_HI, 19, A3XX_GRAS_PERFCOUNTER2_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_1_LO, - A3XX_RBBM_PERFCTR_RAS_1_HI, 20, A3XX_GRAS_PERFCOUNTER3_SELECT }, -}; - -static struct adreno_perfcount_register a3xx_perfcounters_uche[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO, - A3XX_RBBM_PERFCTR_UCHE_0_HI, 21, - A3XX_UCHE_PERFCOUNTER0_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO, - A3XX_RBBM_PERFCTR_UCHE_1_HI, 22, - A3XX_UCHE_PERFCOUNTER1_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO, - A3XX_RBBM_PERFCTR_UCHE_2_HI, 23, - A3XX_UCHE_PERFCOUNTER2_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO, - A3XX_RBBM_PERFCTR_UCHE_3_HI, 24, - A3XX_UCHE_PERFCOUNTER3_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO, - A3XX_RBBM_PERFCTR_UCHE_4_HI, 25, - A3XX_UCHE_PERFCOUNTER4_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO, - A3XX_RBBM_PERFCTR_UCHE_5_HI, 26, - A3XX_UCHE_PERFCOUNTER5_SELECT }, -}; - -static struct adreno_perfcount_register a3xx_perfcounters_tp[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_0_LO, - A3XX_RBBM_PERFCTR_TP_0_HI, 27, A3XX_TP_PERFCOUNTER0_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_1_LO, - A3XX_RBBM_PERFCTR_TP_1_HI, 28, A3XX_TP_PERFCOUNTER1_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_2_LO, - A3XX_RBBM_PERFCTR_TP_2_HI, 29, A3XX_TP_PERFCOUNTER2_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_3_LO, - A3XX_RBBM_PERFCTR_TP_3_HI, 30, A3XX_TP_PERFCOUNTER3_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_4_LO, - A3XX_RBBM_PERFCTR_TP_4_HI, 31, A3XX_TP_PERFCOUNTER4_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_5_LO, - A3XX_RBBM_PERFCTR_TP_5_HI, 32, A3XX_TP_PERFCOUNTER5_SELECT }, -}; - -static struct adreno_perfcount_register a3xx_perfcounters_sp[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_0_LO, - A3XX_RBBM_PERFCTR_SP_0_HI, 33, A3XX_SP_PERFCOUNTER0_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_1_LO, - A3XX_RBBM_PERFCTR_SP_1_HI, 34, A3XX_SP_PERFCOUNTER1_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_2_LO, - A3XX_RBBM_PERFCTR_SP_2_HI, 35, A3XX_SP_PERFCOUNTER2_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_3_LO, - A3XX_RBBM_PERFCTR_SP_3_HI, 36, A3XX_SP_PERFCOUNTER3_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_4_LO, - A3XX_RBBM_PERFCTR_SP_4_HI, 37, A3XX_SP_PERFCOUNTER4_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_5_LO, - A3XX_RBBM_PERFCTR_SP_5_HI, 38, A3XX_SP_PERFCOUNTER5_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_6_LO, - A3XX_RBBM_PERFCTR_SP_6_HI, 39, A3XX_SP_PERFCOUNTER6_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_7_LO, - A3XX_RBBM_PERFCTR_SP_7_HI, 40, A3XX_SP_PERFCOUNTER7_SELECT }, -}; - -static struct adreno_perfcount_register a3xx_perfcounters_rb[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_0_LO, - A3XX_RBBM_PERFCTR_RB_0_HI, 41, A3XX_RB_PERFCOUNTER0_SELECT }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_1_LO, - A3XX_RBBM_PERFCTR_RB_1_HI, 42, A3XX_RB_PERFCOUNTER1_SELECT }, -}; - -static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_0_LO, - A3XX_RBBM_PERFCTR_PWR_0_HI, -1, 0 }, - /* - * A3XX_RBBM_PERFCTR_PWR_1_LO is used for frequency scaling and removed - * from the pool of available counters - */ -}; - -static struct adreno_perfcount_register a3xx_perfcounters_vbif2[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW0, - A3XX_VBIF2_PERF_CNT_HIGH0, -1, A3XX_VBIF2_PERF_CNT_SEL0 }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW1, - A3XX_VBIF2_PERF_CNT_HIGH1, -1, A3XX_VBIF2_PERF_CNT_SEL1 }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW2, - A3XX_VBIF2_PERF_CNT_HIGH2, -1, A3XX_VBIF2_PERF_CNT_SEL2 }, - { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW3, - A3XX_VBIF2_PERF_CNT_HIGH3, -1, A3XX_VBIF2_PERF_CNT_SEL3 }, -}; -/* - * Placing EN register in select field since vbif perf counters - * don't have select register to program - */ -static struct adreno_perfcount_register a3xx_perfcounters_vbif2_pwr[] = { - { KGSL_PERFCOUNTER_NOT_USED, 0, - 0, A3XX_VBIF2_PERF_PWR_CNT_LOW0, - A3XX_VBIF2_PERF_PWR_CNT_HIGH0, -1, - A3XX_VBIF2_PERF_PWR_CNT_EN0 }, - { KGSL_PERFCOUNTER_NOT_USED, 0, - 0, A3XX_VBIF2_PERF_PWR_CNT_LOW1, - A3XX_VBIF2_PERF_PWR_CNT_HIGH1, -1, - A3XX_VBIF2_PERF_PWR_CNT_EN1 }, - { KGSL_PERFCOUNTER_NOT_USED, 0, - 0, A3XX_VBIF2_PERF_PWR_CNT_LOW2, - A3XX_VBIF2_PERF_PWR_CNT_HIGH2, -1, - A3XX_VBIF2_PERF_PWR_CNT_EN2 }, -}; - -#define A3XX_PERFCOUNTER_GROUP(offset, name, enable, read, load) \ - ADRENO_PERFCOUNTER_GROUP(a3xx, offset, name, enable, read, load) - -#define A3XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags, enable, read, load) \ - ADRENO_PERFCOUNTER_GROUP_FLAGS(a3xx, offset, name, flags, enable, read, load) - -#define A3XX_REGULAR_PERFCOUNTER_GROUP(offset, name) \ - A3XX_PERFCOUNTER_GROUP(offset, name, a3xx_counter_enable,\ - a3xx_counter_read, a3xx_counter_load) - -static const struct adreno_perfcount_group -a3xx_perfcounter_groups[KGSL_PERFCOUNTER_GROUP_MAX] = { - A3XX_REGULAR_PERFCOUNTER_GROUP(CP, cp), - A3XX_REGULAR_PERFCOUNTER_GROUP(RBBM, rbbm), - A3XX_REGULAR_PERFCOUNTER_GROUP(PC, pc), - A3XX_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), - A3XX_REGULAR_PERFCOUNTER_GROUP(HLSQ, hlsq), - A3XX_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), - A3XX_REGULAR_PERFCOUNTER_GROUP(TSE, tse), - A3XX_REGULAR_PERFCOUNTER_GROUP(RAS, ras), - A3XX_REGULAR_PERFCOUNTER_GROUP(UCHE, uche), - A3XX_REGULAR_PERFCOUNTER_GROUP(TP, tp), - A3XX_REGULAR_PERFCOUNTER_GROUP(SP, sp), - A3XX_REGULAR_PERFCOUNTER_GROUP(RB, rb), - A3XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr, - ADRENO_PERFCOUNTER_GROUP_FIXED, - a3xx_counter_pwr_enable, a3xx_counter_pwr_read, NULL), - A3XX_PERFCOUNTER_GROUP(VBIF, vbif2, - a3xx_counter_vbif_enable, a3xx_counter_vbif_read, NULL), - A3XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif2_pwr, - ADRENO_PERFCOUNTER_GROUP_FIXED, - a3xx_counter_vbif_pwr_enable, a3xx_counter_vbif_pwr_read, - NULL), - -}; - -const struct adreno_perfcounters adreno_a3xx_perfcounters = { - a3xx_perfcounter_groups, - ARRAY_SIZE(a3xx_perfcounter_groups), -}; diff --git a/adreno_a3xx_ringbuffer.c b/adreno_a3xx_ringbuffer.c deleted file mode 100644 index 3fbc91b8b5..0000000000 --- a/adreno_a3xx_ringbuffer.c +++ /dev/null @@ -1,458 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. - */ - -#include "adreno.h" -#include "adreno_a3xx.h" -#include "adreno_pm4types.h" -#include "adreno_ringbuffer.h" -#include "adreno_trace.h" -#include "kgsl_trace.h" - -static int a3xx_wait_reg(unsigned int *cmds, unsigned int addr, - unsigned int val, unsigned int mask, - unsigned int interval) -{ - cmds[0] = cp_type3_packet(CP_WAIT_REG_EQ, 4); - cmds[1] = addr; - cmds[2] = val; - cmds[3] = mask; - cmds[4] = interval; - - return 5; -} - -static int a3xx_vbif_lock(unsigned int *cmds) -{ - int count; - - /* - * glue commands together until next - * WAIT_FOR_ME - */ - count = a3xx_wait_reg(cmds, A3XX_CP_WFI_PEND_CTR, - 1, 0xFFFFFFFF, 0xF); - - /* MMU-500 VBIF stall */ - cmds[count++] = cp_type3_packet(CP_REG_RMW, 3); - cmds[count++] = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0; - /* AND to unmask the HALT bit */ - cmds[count++] = ~(VBIF_RECOVERABLE_HALT_CTRL); - /* OR to set the HALT bit */ - cmds[count++] = 0x1; - - /* Wait for acknowledgment */ - count += a3xx_wait_reg(&cmds[count], - A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1, - 1, 0xFFFFFFFF, 0xF); - - return count; -} - -static int a3xx_vbif_unlock(unsigned int *cmds) -{ - /* MMU-500 VBIF unstall */ - cmds[0] = cp_type3_packet(CP_REG_RMW, 3); - cmds[1] = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0; - /* AND to unmask the HALT bit */ - cmds[2] = ~(VBIF_RECOVERABLE_HALT_CTRL); - /* OR to reset the HALT bit */ - cmds[3] = 0; - - /* release all commands since _vbif_lock() with wait_for_me */ - cmds[4] = cp_type3_packet(CP_WAIT_FOR_ME, 1); - cmds[5] = 0; - - return 6; -} - -#define A3XX_GPU_OFFSET 0xa000 - -static int a3xx_cp_smmu_reg(unsigned int *cmds, - u32 reg, - unsigned int num) -{ - cmds[0] = cp_type3_packet(CP_REG_WR_NO_CTXT, num + 1); - cmds[1] = (A3XX_GPU_OFFSET + reg) >> 2; - - return 2; -} - -/* This function is only needed for A3xx targets */ -static int a3xx_tlbiall(unsigned int *cmds) -{ - unsigned int tlbstatus = (A3XX_GPU_OFFSET + - KGSL_IOMMU_CTX_TLBSTATUS) >> 2; - int count; - - count = a3xx_cp_smmu_reg(cmds, KGSL_IOMMU_CTX_TLBIALL, 1); - cmds[count++] = 1; - - count += a3xx_cp_smmu_reg(&cmds[count], KGSL_IOMMU_CTX_TLBSYNC, 1); - cmds[count++] = 0; - - count += a3xx_wait_reg(&cmds[count], tlbstatus, 0, - KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE, 0xF); - - return count; -} - -/* offset at which a nop command is placed in setstate */ -#define KGSL_IOMMU_SETSTATE_NOP_OFFSET 1024 - -static int a3xx_rb_pagetable_switch(struct adreno_device *adreno_dev, - struct kgsl_pagetable *pagetable, u32 *cmds) -{ - u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct kgsl_iommu *iommu = KGSL_IOMMU(device); - int count = 0; - - /* Skip pagetable switch if current context is using default PT. */ - if (pagetable == device->mmu.defaultpagetable) - return 0; - /* - * Adding an indirect buffer ensures that the prefetch stalls until - * the commands in indirect buffer have completed. We need to stall - * prefetch with a nop indirect buffer when updating pagetables - * because it provides stabler synchronization. - */ - cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1); - cmds[count++] = 0; - - cmds[count++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); - cmds[count++] = lower_32_bits(iommu->setstate->gpuaddr); - cmds[count++] = 2; - - cmds[count++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - cmds[count++] = 0; - - cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1); - cmds[count++] = 0; - - count += a3xx_vbif_lock(&cmds[count]); - - count += a3xx_cp_smmu_reg(&cmds[count], KGSL_IOMMU_CTX_TTBR0, 2); - cmds[count++] = lower_32_bits(ttbr0); - cmds[count++] = upper_32_bits(ttbr0); - - count += a3xx_vbif_unlock(&cmds[count]); - - count += a3xx_tlbiall(&cmds[count]); - - /* wait for me to finish the TLBI */ - cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1); - cmds[count++] = 0; - cmds[count++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - cmds[count++] = 0; - - /* Invalidate the state */ - cmds[count++] = cp_type3_packet(CP_INVALIDATE_STATE, 1); - cmds[count++] = 0x7ffff; - - return count; -} - -#define RB_SOPTIMESTAMP(device, rb) \ - MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp) -#define CTXT_SOPTIMESTAMP(device, drawctxt) \ - MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp) - -#define RB_EOPTIMESTAMP(device, rb) \ - MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp) -#define CTXT_EOPTIMESTAMP(device, drawctxt) \ - MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp) - -int a3xx_ringbuffer_init(struct adreno_device *adreno_dev) -{ - adreno_dev->num_ringbuffers = 1; - - adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]); - - return adreno_ringbuffer_setup(adreno_dev, - &adreno_dev->ringbuffers[0], 0); -} - -#define A3XX_SUBMIT_MAX 55 - -static int a3xx_ringbuffer_addcmds(struct adreno_device *adreno_dev, - struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, - u32 flags, u32 *in, u32 dwords, u32 timestamp, - struct adreno_submit_time *time) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - u32 size = A3XX_SUBMIT_MAX + dwords; - u32 *cmds, index = 0; - u64 profile_gpuaddr; - u32 profile_dwords; - - if (adreno_drawctxt_detached(drawctxt)) - return -ENOENT; - - if (adreno_gpu_fault(adreno_dev) != 0) - return -EPROTO; - - rb->timestamp++; - - if (drawctxt) - drawctxt->internal_timestamp = rb->timestamp; - - cmds = adreno_ringbuffer_allocspace(rb, size); - if (IS_ERR(cmds)) - return PTR_ERR(cmds); - - /* Identify the start of a command */ - cmds[index++] = cp_type3_packet(CP_NOP, 1); - cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER; - - if (IS_PWRON_FIXUP(flags)) { - cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1); - cmds[index++] = 0; - - cmds[index++] = cp_type3_packet(CP_NOP, 1); - cmds[index++] = PWRON_FIXUP_IDENTIFIER; - - cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); - cmds[index++] = lower_32_bits(adreno_dev->pwron_fixup->gpuaddr); - cmds[index++] = adreno_dev->pwron_fixup_dwords; - - cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1); - cmds[index++] = 0; - } - - profile_gpuaddr = adreno_profile_preib_processing(adreno_dev, - drawctxt, &profile_dwords); - - if (profile_gpuaddr) { - cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); - cmds[index++] = lower_32_bits(profile_gpuaddr); - cmds[index++] = profile_dwords; - } - - if (drawctxt) { - cmds[index++] = cp_type3_packet(CP_MEM_WRITE, 2); - cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device, - drawctxt)); - cmds[index++] = timestamp; - } - - cmds[index++] = cp_type3_packet(CP_MEM_WRITE, 2); - cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb)); - cmds[index++] = rb->timestamp; - - if (IS_NOTPROTECTED(flags)) { - cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1); - cmds[index++] = 0; - } - - memcpy(&cmds[index], in, dwords << 2); - index += dwords; - - if (IS_NOTPROTECTED(flags)) { - cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1); - cmds[index++] = 1; - } - - /* - * Flush HLSQ lazy updates to make sure there are no resourses pending - * for indirect loads after the timestamp - */ - - cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 1); - cmds[index++] = 0x07; /* HLSQ FLUSH */ - cmds[index++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - cmds[index++] = 0; - - profile_gpuaddr = adreno_profile_postib_processing(adreno_dev, - drawctxt, &profile_dwords); - - if (profile_gpuaddr) { - cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); - cmds[index++] = lower_32_bits(profile_gpuaddr); - cmds[index++] = profile_dwords; - } - - /* - * If this is an internal command, just write the ringbuffer timestamp, - * otherwise, write both - */ - if (!drawctxt) { - cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3); - cmds[index++] = CACHE_FLUSH_TS | (1 << 31); - cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); - cmds[index++] = rb->timestamp; - } else { - cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3); - cmds[index++] = CACHE_FLUSH_TS | (1 << 31); - cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device, - drawctxt)); - cmds[index++] = timestamp; - - cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3); - cmds[index++] = CACHE_FLUSH_TS; - cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); - cmds[index++] = rb->timestamp; - } - - /* Trigger a context rollover */ - cmds[index++] = cp_type3_packet(CP_SET_CONSTANT, 2); - cmds[index++] = (4 << 16) | (A3XX_HLSQ_CL_KERNEL_GROUP_X_REG - 0x2000); - cmds[index++] = 0; - - if (IS_WFI(flags)) { - cmds[index++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - cmds[index++] = 0; - } - - /* Adjust the thing for the number of bytes we actually wrote */ - rb->_wptr -= (size - index); - - kgsl_pwrscale_busy(device); - kgsl_regwrite(device, A3XX_CP_RB_WPTR, rb->_wptr); - rb->wptr = rb->_wptr; - - return 0; -} - -static int a3xx_rb_context_switch(struct adreno_device *adreno_dev, - struct adreno_ringbuffer *rb, - struct adreno_context *drawctxt) -{ - struct kgsl_pagetable *pagetable = - adreno_drawctxt_get_pagetable(drawctxt); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - int count = 0; - u32 cmds[64]; - - if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) - count += a3xx_rb_pagetable_switch(adreno_dev, pagetable, cmds); - - cmds[count++] = cp_type3_packet(CP_NOP, 1); - cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER; - - cmds[count++] = cp_type3_packet(CP_MEM_WRITE, 2); - cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, - current_context)); - cmds[count++] = drawctxt->base.id; - - cmds[count++] = cp_type3_packet(CP_MEM_WRITE, 2); - cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device, - KGSL_MEMSTORE_GLOBAL, current_context)); - cmds[count++] = drawctxt->base.id; - - cmds[count++] = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); - cmds[count++] = 0; - cmds[count++] = 0x90000000; - - return a3xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, - cmds, count, 0, NULL); -} - -static int a3xx_drawctxt_switch(struct adreno_device *adreno_dev, - struct adreno_ringbuffer *rb, - struct adreno_context *drawctxt) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - if (rb->drawctxt_active == drawctxt) - return 0; - - if (kgsl_context_detached(&drawctxt->base)) - return -ENOENT; - - if (!_kgsl_context_get(&drawctxt->base)) - return -ENOENT; - - trace_adreno_drawctxt_switch(rb, drawctxt); - - a3xx_rb_context_switch(adreno_dev, rb, drawctxt); - - /* Release the current drawctxt as soon as the new one is switched */ - adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active, - rb, rb->timestamp); - - rb->drawctxt_active = drawctxt; - return 0; -} - -#define A3XX_COMMAND_DWORDS 4 - -int a3xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev, - struct kgsl_drawobj_cmd *cmdobj, u32 flags, - struct adreno_submit_time *time) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); - struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); - struct adreno_ringbuffer *rb = drawctxt->rb; - int ret = 0, numibs = 0, index = 0; - u32 *cmds; - - /* Count the number of IBs (if we are not skipping) */ - if (!IS_SKIP(flags)) { - struct list_head *tmp; - - list_for_each(tmp, &cmdobj->cmdlist) - numibs++; - } - - cmds = kmalloc((A3XX_COMMAND_DWORDS + (numibs * 4)) << 2, GFP_KERNEL); - if (!cmds) { - ret = -ENOMEM; - goto done; - } - - cmds[index++] = cp_type3_packet(CP_NOP, 1); - cmds[index++] = START_IB_IDENTIFIER; - - if (numibs) { - struct kgsl_memobj_node *ib; - - list_for_each_entry(ib, &cmdobj->cmdlist, node) { - if (ib->priv & MEMOBJ_SKIP || - (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE - && !IS_PREAMBLE(flags))) - cmds[index++] = cp_type3_packet(CP_NOP, 3); - - cmds[index++] = - cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); - cmds[index++] = lower_32_bits(ib->gpuaddr); - cmds[index++] = ib->size >> 2; - } - } - - cmds[index++] = cp_type3_packet(CP_NOP, 1); - cmds[index++] = END_IB_IDENTIFIER; - - ret = a3xx_drawctxt_switch(adreno_dev, rb, drawctxt); - - /* - * In the unlikely event of an error in the drawctxt switch, - * treat it like a hang - */ - if (ret) { - /* - * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it, - * the upper layers know how to handle it - */ - if (ret != -ENOSPC && ret != -ENOENT) - dev_err(device->dev, - "Unable to switch draw context: %d\n", - ret); - goto done; - } - - adreno_drawobj_set_constraint(device, drawobj); - - ret = a3xx_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt, - flags, cmds, index, drawobj->timestamp, NULL); - -done: - trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs, - drawobj->timestamp, drawobj->flags, ret, drawctxt->type); - - kfree(cmds); - return ret; -} diff --git a/adreno_a3xx_snapshot.c b/adreno_a3xx_snapshot.c deleted file mode 100644 index 4b4d4184d6..0000000000 --- a/adreno_a3xx_snapshot.c +++ /dev/null @@ -1,449 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) 2012-2017,2019-2020, The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. - */ - -#include - -#include "adreno.h" -#include "adreno_a3xx.h" -#include "adreno_snapshot.h" -#include "kgsl_device.h" - -/* - * Set of registers to dump for A3XX on snapshot. - * Registers in pairs - first value is the start offset, second - * is the stop offset (inclusive) - */ - -static const unsigned int a3xx_registers[] = { - 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027, - 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c, - 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5, - 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1, - 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd, - 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f6, 0x01f8, 0x01f9, - 0x01fc, 0x01ff, - 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f, - 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f, - 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e, - 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f, - 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7, - 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, - 0x0e41, 0x0e45, 0x0e64, 0x0e65, - 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7, - 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09, - 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069, - 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075, - 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109, - 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115, - 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0, - 0x2240, 0x227e, - 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8, - 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7, - 0x22ff, 0x22ff, 0x2340, 0x2343, - 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d, - 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472, - 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef, - 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511, - 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed, - 0x25f0, 0x25f0, - 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce, - 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec, - 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, - 0x300C, 0x300E, 0x301C, 0x301D, - 0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036, - 0x303C, 0x303C, 0x305E, 0x305F, -}; - -/* Removed the following HLSQ register ranges from being read during - * fault tolerance since reading the registers may cause the device to hang: - */ -static const unsigned int a3xx_hlsq_registers[] = { - 0x0e00, 0x0e05, 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, - 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, - 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a, -}; - -/* Shader memory size in words */ -#define SHADER_MEMORY_SIZE 0x4000 - -/** - * _rbbm_debug_bus_read - Helper function to read data from the RBBM - * debug bus. - * @device - GPU device to read/write registers - * @block_id - Debug bus block to read from - * @index - Index in the debug bus block to read - * @ret - Value of the register read - */ -static void _rbbm_debug_bus_read(struct kgsl_device *device, - unsigned int block_id, unsigned int index, unsigned int *val) -{ - unsigned int block = (block_id << 8) | 1 << 16; - - kgsl_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, block | index); - kgsl_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS, val); -} - -/** - * a3xx_snapshot_shader_memory - Helper function to dump the GPU shader - * memory to the snapshot buffer. - * @device: GPU device whose shader memory is to be dumped - * @buf: Pointer to binary snapshot data blob being made - * @remain: Number of remaining bytes in the snapshot blob - * @priv: Unused parameter - * - */ -static size_t a3xx_snapshot_shader_memory(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv) -{ - struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; - void *data = buf + sizeof(*header); - unsigned int shader_read_len = SHADER_MEMORY_SIZE; - - if (remain < DEBUG_SECTION_SZ(shader_read_len)) { - SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); - return 0; - } - - header->type = SNAPSHOT_DEBUG_SHADER_MEMORY; - header->size = shader_read_len; - - /* Map shader memory to kernel, for dumping */ - if (IS_ERR_OR_NULL(device->shader_mem_virt)) { - struct resource *res; - - res = platform_get_resource_byname(device->pdev, - IORESOURCE_MEM, "kgsl_3d0_shader_memory"); - - if (res) - device->shader_mem_virt = - devm_ioremap_resource(&device->pdev->dev, res); - } - - if (IS_ERR_OR_NULL(device->shader_mem_virt)) { - dev_err(device->dev, "Unable to map the shader memory\n"); - return 0; - } - - memcpy_fromio(data, device->shader_mem_virt, shader_read_len << 2); - - return DEBUG_SECTION_SZ(shader_read_len); -} - -static size_t a3xx_snapshot_debugbus_block(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv) -{ - struct kgsl_snapshot_debugbus *header - = (struct kgsl_snapshot_debugbus *)buf; - struct adreno_debugbus_block *block = priv; - int i; - unsigned int *data = (unsigned int *)(buf + sizeof(*header)); - size_t size; - - size = (0x40 * sizeof(unsigned int)) + sizeof(*header); - - if (remain < size) { - SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); - return 0; - } - - header->id = block->block_id; - header->count = 0x40; - - for (i = 0; i < 0x40; i++) - _rbbm_debug_bus_read(device, block->block_id, i, &data[i]); - - return size; -} - -static struct adreno_debugbus_block debugbus_blocks[] = { - { RBBM_BLOCK_ID_CP, 0x52, }, - { RBBM_BLOCK_ID_RBBM, 0x40, }, - { RBBM_BLOCK_ID_VBIF, 0x40, }, - { RBBM_BLOCK_ID_HLSQ, 0x40, }, - { RBBM_BLOCK_ID_UCHE, 0x40, }, - { RBBM_BLOCK_ID_PC, 0x40, }, - { RBBM_BLOCK_ID_VFD, 0x40, }, - { RBBM_BLOCK_ID_VPC, 0x40, }, - { RBBM_BLOCK_ID_TSE, 0x40, }, - { RBBM_BLOCK_ID_RAS, 0x40, }, - { RBBM_BLOCK_ID_VSC, 0x40, }, - { RBBM_BLOCK_ID_SP_0, 0x40, }, - { RBBM_BLOCK_ID_SP_1, 0x40, }, - { RBBM_BLOCK_ID_SP_2, 0x40, }, - { RBBM_BLOCK_ID_SP_3, 0x40, }, - { RBBM_BLOCK_ID_TPL1_0, 0x40, }, - { RBBM_BLOCK_ID_TPL1_1, 0x40, }, - { RBBM_BLOCK_ID_TPL1_2, 0x40, }, - { RBBM_BLOCK_ID_TPL1_3, 0x40, }, - { RBBM_BLOCK_ID_RB_0, 0x40, }, - { RBBM_BLOCK_ID_RB_1, 0x40, }, - { RBBM_BLOCK_ID_RB_2, 0x40, }, - { RBBM_BLOCK_ID_RB_3, 0x40, }, - { RBBM_BLOCK_ID_MARB_0, 0x40, }, - { RBBM_BLOCK_ID_MARB_1, 0x40, }, - { RBBM_BLOCK_ID_MARB_2, 0x40, }, - { RBBM_BLOCK_ID_MARB_3, 0x40, }, -}; - -static void a3xx_snapshot_debugbus(struct kgsl_device *device, - struct kgsl_snapshot *snapshot) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(debugbus_blocks); i++) { - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_DEBUGBUS, snapshot, - a3xx_snapshot_debugbus_block, - (void *) &debugbus_blocks[i]); - } -} - -static void _snapshot_hlsq_regs(struct kgsl_device *device, - struct kgsl_snapshot *snapshot) -{ - unsigned int next_pif = 0; - - /* - * Trying to read HLSQ registers when the HLSQ block is busy - * will cause the device to hang. The RBBM_DEBUG_BUS has information - * that will tell us if the HLSQ block is busy or not. Read values - * from the debug bus to ensure the HLSQ block is not busy (this - * is hardware dependent). If the HLSQ block is busy do not - * dump the registers, otherwise dump the HLSQ registers. - */ - - /* - * tpif status bits: RBBM_BLOCK_ID_HLSQ index 4 [4:0] - * spif status bits: RBBM_BLOCK_ID_HLSQ index 7 [5:0] - * - * if ((tpif == 0, 1, 28) && (spif == 0, 1, 10)) - * then dump HLSQ registers - */ - - /* check tpif */ - _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 4, &next_pif); - next_pif &= 0x1f; - if (next_pif != 0 && next_pif != 1 && next_pif != 28) - return; - - /* check spif */ - _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 7, &next_pif); - next_pif &= 0x3f; - if (next_pif != 0 && next_pif != 1 && next_pif != 10) - return; - - SNAPSHOT_REGISTERS(device, snapshot, a3xx_hlsq_registers); -} - -#define VPC_MEM_SIZE 512 - -static size_t a3xx_snapshot_vpc_memory(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; - unsigned int *data = (unsigned int *)(buf + sizeof(*header)); - size_t size = 4 * VPC_MEM_SIZE; - int bank, addr, i = 0; - - if (remain < DEBUG_SECTION_SZ(size)) { - SNAPSHOT_ERR_NOMEM(device, "VPC MEMORY"); - return 0; - } - - header->type = SNAPSHOT_DEBUG_VPC_MEMORY; - header->size = size; - - for (bank = 0; bank < 4; bank++) { - for (addr = 0; addr < VPC_MEM_SIZE; addr++) { - unsigned int val = bank | (addr << 4); - - kgsl_regwrite(device, A3XX_VPC_VPC_DEBUG_RAM_SEL, val); - kgsl_regread(device, A3XX_VPC_VPC_DEBUG_RAM_READ, - &data[i++]); - } - } - - return DEBUG_SECTION_SZ(size); -} - -static size_t a3xx_snapshot_cp_pm4_ram(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; - unsigned int *data = (unsigned int *)(buf + sizeof(*header)); - struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4); - size_t size = fw->size - 1; - - if (remain < DEBUG_SECTION_SZ(size)) { - SNAPSHOT_ERR_NOMEM(device, "CP PM4 RAM DEBUG"); - return 0; - } - - header->type = SNAPSHOT_DEBUG_CP_PM4_RAM; - header->size = size; - - /* - * Read the firmware from the GPU rather than use our cache in order to - * try to catch mis-programming or corruption in the hardware. We do - * use the cached version of the size, however, instead of trying to - * maintain always changing hardcoded constants - */ - kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_ME_RAM_RADDR, - A3XX_CP_ME_RAM_DATA, data, size); - - return DEBUG_SECTION_SZ(size); -} - -static size_t a3xx_snapshot_cp_pfp_ram(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; - unsigned int *data = (unsigned int *)(buf + sizeof(*header)); - struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP); - int size = fw->size - 1; - - if (remain < DEBUG_SECTION_SZ(size)) { - SNAPSHOT_ERR_NOMEM(device, "CP PFP RAM DEBUG"); - return 0; - } - - header->type = SNAPSHOT_DEBUG_CP_PFP_RAM; - header->size = size; - - /* - * Read the firmware from the GPU rather than use our cache in order to - * try to catch mis-programming or corruption in the hardware. We do - * use the cached version of the size, however, instead of trying to - * maintain always changing hardcoded constants - */ - kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_PFP_UCODE_ADDR, - A3XX_CP_PFP_UCODE_DATA, data, size); - - return DEBUG_SECTION_SZ(size); -} - -static size_t a3xx_snapshot_cp_roq(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf; - u32 *data = (u32 *) (buf + sizeof(*header)); - - if (remain < DEBUG_SECTION_SZ(128)) { - SNAPSHOT_ERR_NOMEM(device, "CP ROQ DEBUG"); - return 0; - } - - header->type = SNAPSHOT_DEBUG_CP_ROQ; - header->size = 128; - - kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_ROQ_ADDR, - A3XX_CP_ROQ_DATA, data, 128); - - return DEBUG_SECTION_SZ(128); -} - -static size_t a3xx_snapshot_cp_meq(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf; - u32 *data = (u32 *) (buf + sizeof(*header)); - - if (remain < DEBUG_SECTION_SZ(16)) { - SNAPSHOT_ERR_NOMEM(device, "CP MEQ DEBUG"); - return 0; - } - - header->type = SNAPSHOT_DEBUG_CP_MEQ; - header->size = 16; - - kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_MEQ_ADDR, - A3XX_CP_MEQ_DATA, data, 16); - - return DEBUG_SECTION_SZ(16); -} - -/* - * a3xx_snapshot() - A3XX GPU snapshot function - * @adreno_dev: Device being snapshotted - * @snapshot: Snapshot metadata - * @remain: Amount of space left in snapshot memory - * - * This is where all of the A3XX specific bits and pieces are grabbed - * into the snapshot memory - */ -void a3xx_snapshot(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - unsigned int reg; - - /* Disable Clock gating temporarily for the debug bus to work */ - kgsl_regwrite(device, A3XX_RBBM_CLOCK_CTL, 0x0); - - /* Save some CP information that the generic snapshot uses */ - kgsl_regread(device, A3XX_CP_IB1_BASE, ®); - snapshot->ib1base = (u64) reg; - - kgsl_regread(device, A3XX_CP_IB2_BASE, ®); - snapshot->ib2base = (u64) reg; - - kgsl_regread(device, A3XX_CP_IB1_BUFSZ, &snapshot->ib1size); - kgsl_regread(device, A3XX_CP_IB2_BUFSZ, &snapshot->ib2size); - - SNAPSHOT_REGISTERS(device, snapshot, a3xx_registers); - - _snapshot_hlsq_regs(device, snapshot); - - kgsl_snapshot_indexed_registers(device, snapshot, - A3XX_CP_STATE_DEBUG_INDEX, A3XX_CP_STATE_DEBUG_DATA, 0, 0x14); - - /* CP_ME indexed registers */ - kgsl_snapshot_indexed_registers(device, snapshot, - A3XX_CP_ME_CNTL, A3XX_CP_ME_STATUS, 64, 44); - - /* VPC memory */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, a3xx_snapshot_vpc_memory, NULL); - - /* CP MEQ */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, - a3xx_snapshot_cp_meq, NULL); - - /* Shader working/shadow memory */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, a3xx_snapshot_shader_memory, NULL); - - - /* CP PFP and PM4 */ - - /* - * Reading the microcode while the CP is running will - * basically move the CP instruction pointer to - * whatever address we read. Big badaboom ensues. Stop the CP - * (if it isn't already stopped) to ensure that we are safe. - * We do this here and not earlier to avoid corrupting the RBBM - * status and CP registers - by the time we get here we don't - * care about the contents of the CP anymore. - */ - - kgsl_regread(device, A3XX_CP_ME_CNTL, ®); - reg |= (1 << 27) | (1 << 28); - kgsl_regwrite(device, A3XX_CP_ME_CNTL, reg); - - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, a3xx_snapshot_cp_pfp_ram, NULL); - - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, a3xx_snapshot_cp_pm4_ram, NULL); - - /* CP ROQ */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, a3xx_snapshot_cp_roq, NULL); - - a3xx_snapshot_debugbus(device, snapshot); -} diff --git a/adreno_cp_parser.h b/adreno_cp_parser.h index 903381904b..52b8ddfe05 100644 --- a/adreno_cp_parser.h +++ b/adreno_cp_parser.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2013-2014, 2017, 2019, The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_IB_PARSER__ @@ -8,9 +9,6 @@ #include "adreno.h" -extern const unsigned int a3xx_cp_addr_regs[]; -extern const unsigned int a4xx_cp_addr_regs[]; - /* * struct adreno_ib_object - Structure containing information about an * address range found in an IB @@ -127,12 +125,7 @@ static inline void adreno_ib_init_ib_obj(uint64_t gpuaddr, static inline int adreno_cp_parser_getreg(struct adreno_device *adreno_dev, enum adreno_cp_addr_regs reg_enum) { - if (reg_enum == ADRENO_CP_ADDR_MAX) - return -EEXIST; - - if (!adreno_is_a3xx(adreno_dev)) - return -EEXIST; - return a3xx_cp_addr_regs[reg_enum]; + return -EEXIST; } /* @@ -150,18 +143,8 @@ static inline int adreno_cp_parser_regindex(struct adreno_device *adreno_dev, enum adreno_cp_addr_regs start, enum adreno_cp_addr_regs end) { - int i; - const unsigned int *regs; - - if (!adreno_is_a3xx(adreno_dev)) - return -EEXIST; - - regs = a3xx_cp_addr_regs; - - for (i = start; i <= end && i < ADRENO_CP_ADDR_MAX; i++) - if (regs[i] == offset) - return i; return -EEXIST; + } int adreno_ib_create_object_list( diff --git a/adreno_dispatch.c b/adreno_dispatch.c index bf922832e1..eacb294dce 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -94,50 +94,6 @@ _drawqueue_inflight(struct adreno_dispatcher_drawqueue *drawqueue) ? _dispatcher_q_inflight_lo : _dispatcher_q_inflight_hi; } -static void fault_detect_read(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - int i; - - if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv)) - return; - - for (i = 0; i < adreno_dev->num_ringbuffers; i++) { - struct adreno_ringbuffer *rb = &(adreno_dev->ringbuffers[i]); - - adreno_rb_readtimestamp(adreno_dev, rb, - KGSL_TIMESTAMP_RETIRED, &(rb->fault_detect_ts)); - } - - for (i = 0; i < adreno_dev->soft_ft_count; i++) { - if (adreno_dev->soft_ft_regs[i]) - kgsl_regread(device, adreno_dev->soft_ft_regs[i], - &adreno_dev->soft_ft_vals[i]); - } -} - -void adreno_dispatcher_start_fault_timer(struct adreno_device *adreno_dev) -{ - struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; - - if (adreno_soft_fault_detect(adreno_dev)) - mod_timer(&dispatcher->fault_timer, - jiffies + msecs_to_jiffies(_fault_timer_interval)); -} - -/* - * This takes a kgsl_device pointer so that it can be used for the function - * hook in adreno.c too - */ -void adreno_dispatcher_stop_fault_timer(struct kgsl_device *device) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; - - if (ADRENO_FEATURE(adreno_dev, ADRENO_SOFT_FAULT_DETECT)) - del_timer_sync(&dispatcher->fault_timer); -} - /** * _retire_timestamp() - Retire object without sending it * to the hardware @@ -188,20 +144,10 @@ static void _retire_timestamp(struct kgsl_drawobj *drawobj) atomic_inc(&context->proc_priv->period->frames); } - /* - * For A3xx we still get the rptr from the CP_RB_RPTR instead of - * rptr scratch out address. At this point GPU clocks turned off. - * So avoid reading GPU register directly for A3xx. - */ - if (adreno_is_a3xx(ADRENO_DEVICE(device))) { - trace_adreno_cmdbatch_retired(context, &info, - drawobj->flags, rb->dispatch_q.inflight, 0); - } else { - info.rptr = adreno_get_rptr(rb); + info.rptr = adreno_get_rptr(rb); - trace_adreno_cmdbatch_retired(context, &info, - drawobj->flags, rb->dispatch_q.inflight, 0); - } + trace_adreno_cmdbatch_retired(context, &info, + drawobj->flags, rb->dispatch_q.inflight, 0); log_kgsl_cmdbatch_retired_event(context->id, drawobj->timestamp, context->priority, drawobj->flags, 0, 0); @@ -562,15 +508,6 @@ static int sendcmd(struct adreno_device *adreno_dev, if (dispatcher->inflight == 1) { if (ret == 0) { - - /* Stop fault timer before reading fault registers */ - adreno_dispatcher_stop_fault_timer(device); - - fault_detect_read(adreno_dev); - - /* Start the fault timer on first submission */ - adreno_dispatcher_start_fault_timer(adreno_dev); - if (!test_and_set_bit(ADRENO_DISPATCHER_ACTIVE, &dispatcher->priv)) reinit_completion(&dispatcher->idle_gate); @@ -1976,13 +1913,12 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) gx_on = adreno_gx_is_on(adreno_dev); /* - * On non-A3xx, read RBBM_STATUS3:SMMU_STALLED_ON_FAULT (BIT 24) - * to tell if this function was entered after a pagefault. If so, only - * proceed if the fault handler has already run in the IRQ thread, - * else return early to give the fault handler a chance to run. + * Read RBBM_STATUS3:SMMU_STALLED_ON_FAULT (BIT 24) to tell if this + * function was entered after a pagefault. If so, only proceed if the + * fault handler has already run in the IRQ thread, else return early + * to give the fault handler a chance to run. */ - if (!(fault & ADRENO_IOMMU_PAGE_FAULT) && - !adreno_is_a3xx(adreno_dev) && gx_on) { + if (!(fault & ADRENO_IOMMU_PAGE_FAULT) && gx_on) { unsigned int val; adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS3, &val); @@ -1997,8 +1933,6 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) /* Turn off all the timers */ del_timer_sync(&dispatcher->timer); - adreno_dispatcher_stop_fault_timer(device); - /* * Deleting uninitialized timer will block for ever on kernel debug * disable build. Hence skip del timer if it is not initialized. @@ -2016,12 +1950,12 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) */ if (!(fault & ADRENO_HARD_FAULT) && gx_on) { adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, ®); - if (adreno_is_a3xx(adreno_dev)) - reg |= (1 << 27) | (1 << 28); - else if (adreno_is_a5xx(adreno_dev) || adreno_is_a6xx(adreno_dev)) + + if (adreno_is_a5xx(adreno_dev) || adreno_is_a6xx(adreno_dev)) reg |= 1 | (1 << 1); else reg = 0x0; + adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg); } /* @@ -2221,21 +2155,10 @@ static void retire_cmdobj(struct adreno_device *adreno_dev, atomic_inc(&context->proc_priv->period->frames); } - /* - * For A3xx we still get the rptr from the CP_RB_RPTR instead of - * rptr scratch out address. At this point GPU clocks turned off. - * So avoid reading GPU register directly for A3xx. - */ - if (adreno_is_a3xx(adreno_dev)) { - trace_adreno_cmdbatch_retired(drawobj->context, &info, - drawobj->flags, rb->dispatch_q.inflight, - cmdobj->fault_recovery); - } else { - info.rptr = adreno_get_rptr(rb); - trace_adreno_cmdbatch_retired(drawobj->context, &info, - drawobj->flags, rb->dispatch_q.inflight, - cmdobj->fault_recovery); - } + info.rptr = adreno_get_rptr(rb); + trace_adreno_cmdbatch_retired(drawobj->context, &info, + drawobj->flags, rb->dispatch_q.inflight, + cmdobj->fault_recovery); log_kgsl_cmdbatch_retired_event(context->id, drawobj->timestamp, context->priority, drawobj->flags, start, end); @@ -2358,7 +2281,6 @@ static void _dispatcher_power_down(struct adreno_device *adreno_dev) if (test_and_clear_bit(ADRENO_DISPATCHER_ACTIVE, &dispatcher->priv)) complete_all(&dispatcher->idle_gate); - adreno_dispatcher_stop_fault_timer(device); process_rt_bus_hint(device, false); if (test_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv)) { @@ -2488,8 +2410,6 @@ void adreno_dispatcher_stop(struct adreno_device *adreno_dev) struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; del_timer_sync(&dispatcher->timer); - - adreno_dispatcher_stop_fault_timer(KGSL_DEVICE(adreno_dev)); } /* Return the ringbuffer that matches the draw context priority */ @@ -2609,8 +2529,6 @@ static void adreno_dispatcher_close(struct adreno_device *adreno_dev) mutex_lock(&dispatcher->mutex); del_timer_sync(&dispatcher->timer); - adreno_dispatcher_stop_fault_timer(KGSL_DEVICE(adreno_dev)); - FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { struct adreno_dispatcher_drawqueue *dispatch_q = &(rb->dispatch_q); diff --git a/adreno_pm4types.h b/adreno_pm4types.h index 033a025e5b..7e2dc23003 100644 --- a/adreno_pm4types.h +++ b/adreno_pm4types.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_PM4TYPES_H #define __ADRENO_PM4TYPES_H @@ -139,10 +139,6 @@ #define CP_BOOTSTRAP_UCODE 0x6f /* bootstraps microcode */ -/* - * for a3xx - */ - #define CP_LOAD_STATE 0x30 /* load high level sequencer command */ /* Conditionally load a IB based on a flag */ diff --git a/adreno_ringbuffer.c b/adreno_ringbuffer.c index 3d8af5a086..a502393d0e 100644 --- a/adreno_ringbuffer.c +++ b/adreno_ringbuffer.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -8,7 +9,6 @@ #include #include -#include "a3xx_reg.h" #include "a5xx_reg.h" #include "a6xx_reg.h" #include "adreno.h" diff --git a/adreno_trace.h b/adreno_trace.h index 81d3dd5b5f..5f8dc88bf7 100644 --- a/adreno_trace.h +++ b/adreno_trace.h @@ -15,7 +15,6 @@ #define TRACE_INCLUDE_FILE adreno_trace #include -#include "adreno_a3xx.h" #include "adreno_a5xx.h" #include "adreno_gen7.h" #include "adreno_hfi.h" @@ -588,64 +587,6 @@ TRACE_EVENT(adreno_sp_tp, ) ); -/* - * Tracepoint for a3xx irq. Includes status info - */ -TRACE_EVENT(kgsl_a3xx_irq_status, - - TP_PROTO(struct adreno_device *adreno_dev, unsigned int status), - - TP_ARGS(adreno_dev, status), - - TP_STRUCT__entry( - __string(device_name, adreno_dev->dev.name) - __field(unsigned int, status) - ), - - TP_fast_assign( - __assign_str(device_name, adreno_dev->dev.name); - __entry->status = status; - ), - - TP_printk( - "d_name=%s status=%s", - __get_str(device_name), - __entry->status ? __print_flags(__entry->status, "|", - { BIT(A3XX_INT_RBBM_GPU_IDLE), "RBBM_GPU_IDLE" }, - { BIT(A3XX_INT_RBBM_AHB_ERROR), "RBBM_AHB_ERR" }, - { BIT(A3XX_INT_RBBM_REG_TIMEOUT), "RBBM_REG_TIMEOUT" }, - { BIT(A3XX_INT_RBBM_ME_MS_TIMEOUT), - "RBBM_ME_MS_TIMEOUT" }, - { BIT(A3XX_INT_RBBM_PFP_MS_TIMEOUT), - "RBBM_PFP_MS_TIMEOUT" }, - { BIT(A3XX_INT_RBBM_ATB_BUS_OVERFLOW), - "RBBM_ATB_BUS_OVERFLOW" }, - { BIT(A3XX_INT_VFD_ERROR), "RBBM_VFD_ERROR" }, - { BIT(A3XX_INT_CP_SW_INT), "CP_SW" }, - { BIT(A3XX_INT_CP_T0_PACKET_IN_IB), - "CP_T0_PACKET_IN_IB" }, - { BIT(A3XX_INT_CP_OPCODE_ERROR), "CP_OPCODE_ERROR" }, - { BIT(A3XX_INT_CP_RESERVED_BIT_ERROR), - "CP_RESERVED_BIT_ERROR" }, - { BIT(A3XX_INT_CP_HW_FAULT), "CP_HW_FAULT" }, - { BIT(A3XX_INT_CP_DMA), "CP_DMA" }, - { BIT(A3XX_INT_CP_IB2_INT), "CP_IB2_INT" }, - { BIT(A3XX_INT_CP_IB1_INT), "CP_IB1_INT" }, - { BIT(A3XX_INT_CP_RB_INT), "CP_RB_INT" }, - { BIT(A3XX_INT_CP_REG_PROTECT_FAULT), - "CP_REG_PROTECT_FAULT" }, - { BIT(A3XX_INT_CP_RB_DONE_TS), "CP_RB_DONE_TS" }, - { BIT(A3XX_INT_CP_VS_DONE_TS), "CP_VS_DONE_TS" }, - { BIT(A3XX_INT_CP_PS_DONE_TS), "CP_PS_DONE_TS" }, - { BIT(A3XX_INT_CACHE_FLUSH_TS), "CACHE_FLUSH_TS" }, - { BIT(A3XX_INT_CP_AHB_ERROR_HALT), - "CP_AHB_ERROR_HALT" }, - { BIT(A3XX_INT_MISC_HANG_DETECT), "MISC_HANG_DETECT" }, - { BIT(A3XX_INT_UCHE_OOB_ACCESS), "UCHE_OOB_ACCESS" }) - : "None" - ) -); - /* * Tracepoint for a5xx irq. Includes status info */ diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index 0bad6ff389..d16037095f 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -10,10 +10,6 @@ msm_kgsl_includes = [ def kgsl_get_srcs(): srcs = [ "adreno.c", - "adreno_a3xx.c", - "adreno_a3xx_perfcounter.c", - "adreno_a3xx_ringbuffer.c", - "adreno_a3xx_snapshot.c", "adreno_a5xx.c", "adreno_a5xx_perfcounter.c", "adreno_a5xx_preempt.c", @@ -120,7 +116,6 @@ def define_target_variant_module(target, variant): "CONFIG_DEBUG_FS": { True: [ "kgsl_debugfs.c", "adreno_debugfs.c", "adreno_profile.c" ] }, "CONFIG_QCOM_KGSL_CORESIGHT": { True: [ "adreno_coresight.c", - "adreno_a3xx_coresight.c", "adreno_a5xx_coresight.c", "adreno_a6xx_coresight.c", "adreno_gen7_coresight.c"] }, From 3ca8900b70646b2f4a2e5580716621246d4a1f03 Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Wed, 18 Oct 2023 23:25:15 +0530 Subject: [PATCH 0540/1016] msm: kgsl: Set LPAC smmu aperture based on LPAC feature enablement Setting aperture for LPAC when the feature is disabled can lead to improper aperture configuration. Hence, set LPAC aperture only when LPAC feature is enabled. Change-Id: I2c8385cf727d2b6ad798c5be384ba3202160231d Signed-off-by: Harshitha Sai Neelati --- kgsl_iommu.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 33c63c8be6..8750b946a7 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -2448,11 +2448,12 @@ static int iommu_probe_user_context(struct kgsl_device *device, kgsl_iommu_set_ttbr0(&iommu->lpac_context, mmu, &pt->info.cfg); - ret = set_smmu_lpac_aperture(device, &iommu->lpac_context); - /* LPAC is optional, ignore setup failures in absence of LPAC feature */ - if ((ret < 0) && ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) { - kgsl_iommu_detach_context(&iommu->lpac_context); - goto err; + if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) { + ret = set_smmu_lpac_aperture(device, &iommu->lpac_context); + if (ret < 0) { + kgsl_iommu_detach_context(&iommu->lpac_context); + goto err; + } } return 0; From c92706b1f39b3999a2a82be55cdbb0d1fb272517 Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Mon, 9 Oct 2023 11:26:10 -0700 Subject: [PATCH 0541/1016] kgsl: hwsched: Force coldboot after a perfcounter release GMU requests for perfcounters during init process. The responses are recorded in scratch for warmboot execution. When the perfcounters are released, we need to force a coldboot to prevent GMU from reading the scratch and marking the counters as active. Change-Id: Ie0669b9ea1e4f3fba2226196ae43603c3ffbdf8f Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- adreno_gen7_hwsched_hfi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index d9ceea8d24..6daf926268 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1278,6 +1278,8 @@ void gen7_hwsched_process_msgq(struct adreno_device *adreno_dev) adreno_perfcounter_put(adreno_dev, cmd->group_id, cmd->countable, PERFCOUNTER_FLAG_KERNEL); + + adreno_mark_for_coldboot(adreno_dev); } break; } From 1c714aca33f9c94e2b09abd904f1af17dc035bd3 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 20 Oct 2023 10:22:53 -0700 Subject: [PATCH 0542/1016] kgsl: build: Remove internal config options The DEVFREQ_GOV_QCOM_ADRENO_TZ and DEVFREQ_GOV_QCOM_GPUBW_MON config options do not make any difference in the code. Remove these extraneous configs. Change-Id: Ib9304ebd22f216f8c03f6c71fbb9084d2b74a81a Signed-off-by: Lynus Vaz --- Kconfig | 18 ------------------ config/blair_consolidate_gpuconf | 2 -- config/blair_gki_gpuconf | 2 -- config/gki_blair.conf | 6 +----- config/gki_kalama.conf | 6 +----- config/gki_khajedisp.conf | 6 +----- config/gki_kona.conf | 6 +----- config/gki_lemans.conf | 6 +----- config/gki_monaco.conf | 6 +----- config/gki_pineapple.conf | 6 +----- config/gki_qcs405.conf | 6 +----- config/gki_sa8155.conf | 6 +----- config/gki_trinket.conf | 6 +----- config/gki_waipiodisp.conf | 6 +----- config/pineapple_consolidate_gpuconf | 2 -- config/pineapple_gki_gpuconf | 2 -- config/sun_consolidate_gpuconf | 2 -- 17 files changed, 11 insertions(+), 83 deletions(-) diff --git a/Kconfig b/Kconfig index 1d8d7d865e..5dca817d92 100644 --- a/Kconfig +++ b/Kconfig @@ -12,24 +12,6 @@ config QCOM_KGSL on QTI targets. This includes power management, memory management, and scheduling for the Adreno GPUs. -config DEVFREQ_GOV_QCOM_ADRENO_TZ - tristate "Qualcomm Technologies, Inc. GPU frequency governor" - depends on PM_DEVFREQ - help - GPU frequency governor for the Adreno GPU. Sets the frequency - using an "on demand" algorithm in conjunction with other - components on Adreno platforms. This is not useful for non-Adreno - devices. - -config DEVFREQ_GOV_QCOM_GPUBW_MON - tristate "Qualcomm Technologies, Inc. GPU bandwidth governor" - depends on DEVFREQ_GOV_QCOM_ADRENO_TZ - help - This governor works together with the Adreno GPU governor to - select bus frequency votes using an "on-demand" algorithm. - This governor will not be useful for non-Adreno based - targets. - config QCOM_KGSL_FENCE_TRACE bool "Enable built-in tracing for adreno fence timeouts" help diff --git a/config/blair_consolidate_gpuconf b/config/blair_consolidate_gpuconf index 8319734c76..03c2dd6a85 100644 --- a/config/blair_consolidate_gpuconf +++ b/config/blair_consolidate_gpuconf @@ -2,8 +2,6 @@ # Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. CONFIG_QCOM_KGSL=m -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=y CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 CONFIG_QCOM_KGSL_SORT_POOL=y CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y diff --git a/config/blair_gki_gpuconf b/config/blair_gki_gpuconf index 8319734c76..03c2dd6a85 100644 --- a/config/blair_gki_gpuconf +++ b/config/blair_gki_gpuconf @@ -2,8 +2,6 @@ # Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. CONFIG_QCOM_KGSL=m -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=y CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 CONFIG_QCOM_KGSL_SORT_POOL=y CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y diff --git a/config/gki_blair.conf b/config/gki_blair.conf index f656a91b1d..b26c8f15cd 100644 --- a/config/gki_blair.conf +++ b/config/gki_blair.conf @@ -1,5 +1,3 @@ -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y CONFIG_QCOM_KGSL_SORT_POOL = y CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y @@ -10,9 +8,7 @@ ifneq ($(CONFIG_CORESIGHT),) CONFIG_QCOM_KGSL_CORESIGHT = y endif -ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ - -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ - -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ +ccflags-y += -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" diff --git a/config/gki_kalama.conf b/config/gki_kalama.conf index 8314658770..fb326b4a30 100644 --- a/config/gki_kalama.conf +++ b/config/gki_kalama.conf @@ -1,5 +1,3 @@ -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 CONFIG_QCOM_KGSL_SORT_POOL = y CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y @@ -10,9 +8,7 @@ ifneq ($(CONFIG_CORESIGHT),) CONFIG_QCOM_KGSL_CORESIGHT = y endif -ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ - -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ - -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=1 \ diff --git a/config/gki_khajedisp.conf b/config/gki_khajedisp.conf index ddc889c9d1..ad78222067 100644 --- a/config/gki_khajedisp.conf +++ b/config/gki_khajedisp.conf @@ -1,5 +1,3 @@ -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" @@ -9,9 +7,7 @@ ifneq ($(CONFIG_SHMEM),) CONFIG_QCOM_KGSL_PROCESS_RECLAIM = y endif -ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ - -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ - -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" diff --git a/config/gki_kona.conf b/config/gki_kona.conf index cf041dcbe8..8807b7f1c5 100644 --- a/config/gki_kona.conf +++ b/config/gki_kona.conf @@ -1,16 +1,12 @@ # SPDX-License-Identifier: GPL-2.0-only -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 CONFIG_QCOM_KGSL_SORT_POOL = y CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y # CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT is not set CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" -ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ - -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ - -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=0 \ diff --git a/config/gki_lemans.conf b/config/gki_lemans.conf index 87097eadd1..fedad204a1 100644 --- a/config/gki_lemans.conf +++ b/config/gki_lemans.conf @@ -1,14 +1,10 @@ -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 CONFIG_QCOM_KGSL_SORT_POOL = y CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT = y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" -ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ - -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ - -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=1 \ diff --git a/config/gki_monaco.conf b/config/gki_monaco.conf index cf041dcbe8..8807b7f1c5 100644 --- a/config/gki_monaco.conf +++ b/config/gki_monaco.conf @@ -1,16 +1,12 @@ # SPDX-License-Identifier: GPL-2.0-only -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 CONFIG_QCOM_KGSL_SORT_POOL = y CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y # CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT is not set CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" -ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ - -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ - -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=0 \ diff --git a/config/gki_pineapple.conf b/config/gki_pineapple.conf index 882ef0e387..895ca7b89f 100644 --- a/config/gki_pineapple.conf +++ b/config/gki_pineapple.conf @@ -1,5 +1,3 @@ -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 CONFIG_QCOM_KGSL_SORT_POOL = y CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y @@ -11,9 +9,7 @@ ifneq ($(CONFIG_CORESIGHT),) CONFIG_QCOM_KGSL_CORESIGHT = y endif -ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ - -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ - -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=1 \ diff --git a/config/gki_qcs405.conf b/config/gki_qcs405.conf index b9a6982a8a..93d423a284 100644 --- a/config/gki_qcs405.conf +++ b/config/gki_qcs405.conf @@ -1,14 +1,10 @@ -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 CONFIG_QCOM_KGSL_SORT_POOL = y CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y # CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT is not set CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" -ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ - -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ - -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=0 \ diff --git a/config/gki_sa8155.conf b/config/gki_sa8155.conf index 259aa10b05..8bd108460c 100644 --- a/config/gki_sa8155.conf +++ b/config/gki_sa8155.conf @@ -1,5 +1,3 @@ -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 CONFIG_QCOM_KGSL_SORT_POOL = y CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y @@ -7,9 +5,7 @@ CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT = y CONFIG_QCOM_KGSL_HIBERNATION = y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" -ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ - -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ - -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=1 \ diff --git a/config/gki_trinket.conf b/config/gki_trinket.conf index cf041dcbe8..8807b7f1c5 100644 --- a/config/gki_trinket.conf +++ b/config/gki_trinket.conf @@ -1,16 +1,12 @@ # SPDX-License-Identifier: GPL-2.0-only -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 CONFIG_QCOM_KGSL_SORT_POOL = y CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y # CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT is not set CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" -ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ - -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ - -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=0 \ diff --git a/config/gki_waipiodisp.conf b/config/gki_waipiodisp.conf index 87097eadd1..fedad204a1 100644 --- a/config/gki_waipiodisp.conf +++ b/config/gki_waipiodisp.conf @@ -1,14 +1,10 @@ -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 CONFIG_QCOM_KGSL_SORT_POOL = y CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT = y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" -ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ - -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ - -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=1 \ diff --git a/config/pineapple_consolidate_gpuconf b/config/pineapple_consolidate_gpuconf index d58575d748..837c2df56d 100644 --- a/config/pineapple_consolidate_gpuconf +++ b/config/pineapple_consolidate_gpuconf @@ -2,8 +2,6 @@ # Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. CONFIG_QCOM_KGSL=m -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=y CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 CONFIG_QCOM_KGSL_SORT_POOL=y CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y diff --git a/config/pineapple_gki_gpuconf b/config/pineapple_gki_gpuconf index d58575d748..837c2df56d 100644 --- a/config/pineapple_gki_gpuconf +++ b/config/pineapple_gki_gpuconf @@ -2,8 +2,6 @@ # Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. CONFIG_QCOM_KGSL=m -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=y CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 CONFIG_QCOM_KGSL_SORT_POOL=y CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y diff --git a/config/sun_consolidate_gpuconf b/config/sun_consolidate_gpuconf index d58575d748..837c2df56d 100644 --- a/config/sun_consolidate_gpuconf +++ b/config/sun_consolidate_gpuconf @@ -2,8 +2,6 @@ # Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. CONFIG_QCOM_KGSL=m -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=y CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 CONFIG_QCOM_KGSL_SORT_POOL=y CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y From caa9d669eac512ee5c45592979b59c2d2a2d6000 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 20 Oct 2023 10:26:28 -0700 Subject: [PATCH 0543/1016] msm: kgsl: Use the devfreq governors inside the kernel tree If the kernel enables in-tree versions of the Adreno GPU and bandwidth governors, use these instead of the kgsl code. Change-Id: I996f0d50acf88dbbd95a9b7f33221c6748052310 Signed-off-by: Lynus Vaz --- adreno.c | 2 +- build/kgsl_defs.bzl | 4 ++-- kgsl_pwrscale.h | 32 +++++++++++++++++++++++++++++--- msm_adreno_devfreq.h | 2 ++ 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/adreno.c b/adreno.c index 28f3e6aa10..5c6224f844 100644 --- a/adreno.c +++ b/adreno.c @@ -3732,7 +3732,7 @@ module_exit(kgsl_3d_exit); MODULE_DESCRIPTION("3D Graphics driver"); MODULE_LICENSE("GPL v2"); -MODULE_SOFTDEP("pre: arm_smmu nvmem_qfprom socinfo"); +MODULE_SOFTDEP("pre: arm_smmu nvmem_qfprom socinfo governor_msm_adreno_tz governor_gpubw_mon"); #if (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0)) MODULE_IMPORT_NS(DMA_BUF); #endif diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index d16037095f..b68d10f814 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -48,8 +48,6 @@ def kgsl_get_srcs(): "adreno_snapshot.c", "adreno_sysfs.c", "adreno_trace.c", - "governor_msm_adreno_tz.c", - "governor_gpubw_mon.c", "kgsl.c", "kgsl_bus.c", "kgsl_drawobj.c", @@ -122,6 +120,8 @@ def define_target_variant_module(target, variant): "CONFIG_QCOM_KGSL_PROCESS_RECLAIM": { True: [ "kgsl_reclaim.c" ] }, "CONFIG_QCOM_KGSL_USE_SHMEM": { False: [ "kgsl_pool.c" ] }, "CONFIG_SYNC_FILE": { True: [ "kgsl_sync.c" ] }, + "CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ": { False: [ "governor_msm_adreno_tz.c" ] }, + "CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON": { False: [ "governor_gpubw_mon.c" ] } }, deps = [ "//msm-kernel:all_headers" ] + ext_deps, includes = ["include", "."], diff --git a/kgsl_pwrscale.h b/kgsl_pwrscale.h index f5a2b8501f..46a76ffd6c 100644 --- a/kgsl_pwrscale.h +++ b/kgsl_pwrscale.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_PWRSCALE_H @@ -9,7 +9,12 @@ #include "governor.h" #include "kgsl_pwrctrl.h" + +#if IS_ENABLED(CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ) +#include +#else #include "msm_adreno_devfreq.h" +#endif /* devfreq governor call window in usec */ #define KGSL_GOVERNOR_CALL_INTERVAL 10000 @@ -107,13 +112,34 @@ int kgsl_busmon_get_dev_status(struct device *dev, struct devfreq_dev_status *stat); int kgsl_busmon_get_cur_freq(struct device *dev, unsigned long *freq); +#if IS_ENABLED(CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ) +static inline int msm_adreno_tz_init(void) +{ + return 0; +} + +static inline void msm_adreno_tz_exit(void) +{ +} +#else int msm_adreno_tz_init(void); -int msm_adreno_tz_reinit(struct devfreq *devfreq); - void msm_adreno_tz_exit(void); +#endif +#if IS_ENABLED(CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON) +static inline int devfreq_gpubw_init(void) +{ + return 0; +} + +static inline void devfreq_gpubw_exit(void) +{ +} +#else int devfreq_gpubw_init(void); void devfreq_gpubw_exit(void); #endif + +#endif diff --git a/msm_adreno_devfreq.h b/msm_adreno_devfreq.h index 9944fff384..75b74da4b1 100644 --- a/msm_adreno_devfreq.h +++ b/msm_adreno_devfreq.h @@ -79,4 +79,6 @@ typedef void(*getbw_func)(unsigned long *, unsigned long *, void *); int devfreq_vbif_update_bw(void); void devfreq_vbif_register_callback(getbw_func func, void *data); +int msm_adreno_tz_reinit(struct devfreq *devfreq); + #endif From af70cb17c7c89c0ed66459884cd31118fff8d64f Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Tue, 10 Oct 2023 17:14:41 -0700 Subject: [PATCH 0544/1016] kgsl: Update firmware dump sequence in snapshot SQE and AQE firmware can be dumped irrespective of GX status. Hence move the firmware dumping before GX ON check so that firmware details are captured correctly in snapshot even for GX OFF case. Change-Id: Ia9e26a313ecd0a7b287a820cf4ff6d8e9714e8c0 Signed-off-by: Hareesh Gundu --- adreno_a6xx_snapshot.c | 8 ++++---- adreno_gen7_snapshot.c | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/adreno_a6xx_snapshot.c b/adreno_a6xx_snapshot.c index ef1d55f385..d6fe0ecab5 100644 --- a/adreno_a6xx_snapshot.c +++ b/adreno_a6xx_snapshot.c @@ -1799,6 +1799,10 @@ void a6xx_snapshot(struct adreno_device *adreno_dev, sptprac_on = a6xx_gmu_sptprac_is_on(adreno_dev); + /* SQE Firmware */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a6xx_snapshot_sqe, NULL); + if (!adreno_gx_is_on(adreno_dev)) return; @@ -1895,10 +1899,6 @@ void a6xx_snapshot(struct adreno_device *adreno_dev, kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, a6xx_snapshot_cp_roq, NULL); - /* SQE Firmware */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, a6xx_snapshot_sqe, NULL); - /* Mempool debug data */ if (adreno_is_a650_family(adreno_dev)) a650_snapshot_mempool(device, snapshot); diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 99097ad0fb..51aa8fd31a 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -1643,6 +1643,14 @@ void gen7_snapshot(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN7_RBBM_CLOCK_CNTL3_TP0, cgc2); } + /* SQE Firmware */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, gen7_snapshot_sqe, NULL); + + /* AQE Firmware */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, gen7_snapshot_aqe, NULL); + if (!adreno_gx_is_on(adreno_dev)) return; @@ -1720,14 +1728,6 @@ void gen7_snapshot(struct adreno_device *adreno_dev, gen7_snapshot_lpac_roq(device, snapshot); } - /* SQE Firmware */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, gen7_snapshot_sqe, NULL); - - /* AQE Firmware */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, gen7_snapshot_aqe, NULL); - /* Mempool debug data */ gen7_snapshot_mempool(device, snapshot); From 8c33420b1932693ff835c4559040479c9c6fe6f4 Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Thu, 7 Jul 2022 17:36:44 +0530 Subject: [PATCH 0545/1016] msm: kgsl: Make avoid_ddr_stall flag target specific Make avoid_ddr_stall flag target specific and convert it to 'fast_bus_hint' flag. This would allow us to control the IB vote on high DDR stall for each target separately. Change-Id: Ied9ce988e2058b2631c80b4a66964a6a88acc6b1 Signed-off-by: Akhil P Oommen Signed-off-by: Kaushal Sanadhya --- adreno-gpulist.h | 7 +++++++ adreno_gen7.c | 3 ++- adreno_gen7.h | 2 ++ governor_gpubw_mon.c | 2 +- kgsl_pwrscale.c | 23 +++++++++++++++-------- kgsl_pwrscale.h | 7 ++----- msm_adreno_devfreq.h | 2 +- 7 files changed, 30 insertions(+), 16 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 1e3f59e2a4..e87232c5d9 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2006,6 +2006,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_0 = { .gen7_snapshot_block_list = &gen7_0_0_snapshot_block_list, .preempt_level = 1, .ctxt_record_size = (2860 * SZ_1K), + .fast_bus_hint = true, }; static const struct adreno_gen7_core adreno_gpu_core_gen7_0_1 = { @@ -2041,6 +2042,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_1 = { .gen7_snapshot_block_list = &gen7_0_0_snapshot_block_list, .preempt_level = 1, .ctxt_record_size = (2860 * SZ_1K), + .fast_bus_hint = true, }; extern const struct gen7_snapshot_block_list gen7_2_0_snapshot_block_list; @@ -2151,6 +2153,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_2_0 = { .bcl_data = 1, .preempt_level = 1, .ctxt_record_size = (4192 * SZ_1K), + .fast_bus_hint = true, }; static const struct adreno_gen7_core adreno_gpu_core_gen7_2_1 = { @@ -2187,6 +2190,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_2_1 = { .bcl_data = 1, .preempt_level = 1, .ctxt_record_size = (4192 * SZ_1K), + .fast_bus_hint = true, }; static const struct adreno_gen7_core adreno_gpu_core_gen7_4_0 = { @@ -2222,6 +2226,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_4_0 = { .gen7_snapshot_block_list = &gen7_0_0_snapshot_block_list, .preempt_level = 1, .ctxt_record_size = (2860 * SZ_1K), + .fast_bus_hint = true, }; extern const struct gen7_snapshot_block_list gen7_9_0_snapshot_block_list; @@ -2313,6 +2318,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_9_0 = { .acv_perfmode_vote = BIT(2), .ctxt_record_size = (3572 * SZ_1K), .preempt_level = 1, + .fast_bus_hint = true, }; static const struct adreno_gen7_core adreno_gpu_core_gen7_9_1 = { @@ -2349,6 +2355,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_9_1 = { .acv_perfmode_vote = BIT(2), .ctxt_record_size = (3572 * SZ_1K), .preempt_level = 1, + .fast_bus_hint = true, }; static const struct kgsl_regmap_list a663_hwcg_regs[] = { diff --git a/adreno_gen7.c b/adreno_gen7.c index 8b7c09a644..e7cc2dd2f4 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -18,6 +18,7 @@ #include "adreno_gen7_hwsched.h" #include "adreno_pm4types.h" #include "adreno_trace.h" +#include "kgsl_pwrscale.h" #include "kgsl_trace.h" #include "kgsl_util.h" @@ -1611,7 +1612,7 @@ int gen7_probe_common(struct platform_device *pdev, adreno_dev->hwcg_enabled = true; adreno_dev->uche_client_pf = 1; - device->pwrscale.avoid_ddr_stall = true; + kgsl_pwrscale_fast_bus_hint(gen7_core->fast_bus_hint); device->pwrctrl.rt_bus_hint = gen7_core->rt_bus_hint; device->pwrctrl.cx_gdsc_offset = GEN7_GPU_CC_CX_GDSCR; diff --git a/adreno_gen7.h b/adreno_gen7.h index 2a2384e678..1c38de9b47 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -123,6 +123,8 @@ struct adreno_gen7_core { u32 acv_perfmode_vote; /** @rt_bus_hint: IB level hint for real time clients i.e. RB-0 */ const u32 rt_bus_hint; + /** @fast_bus_hint: Whether or not to increase IB vote on high ddr stall */ + bool fast_bus_hint; }; /** diff --git a/governor_gpubw_mon.c b/governor_gpubw_mon.c index c4cc0e00cd..88fc1c723a 100644 --- a/governor_gpubw_mon.c +++ b/governor_gpubw_mon.c @@ -116,7 +116,7 @@ static u32 generate_hint(struct devfreq_msm_adreno_tz_data *priv, int buslevel, } /* Increase BW vote to avoid starving GPU for BW if required */ - if (priv->avoid_ddr_stall && minfreq == freq) { + if (priv->fast_bus_hint && minfreq == freq) { if (wait_active_percent > 95) return BUSMON_FLAG_SUPER_FAST_HINT; diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index 2a810f7781..cd0d70aa38 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -31,6 +31,18 @@ static void do_devfreq_notify(struct work_struct *work); static struct xstats last_xstats; static struct devfreq_dev_status last_status = { .private_data = &last_xstats }; +/* + * kgsl_pwrscale_fast_bus_hint - enable fast_bus_hint feature in + * adreno_tz governer + * @on: boolean flag to ON/OFF fast_bus_hint + * + * Called when fast_bus_hint feature should be enabled. + */ +void kgsl_pwrscale_fast_bus_hint(bool on) +{ + adreno_tz_data.fast_bus_hint = on; +} + /* * kgsl_pwrscale_sleep - notify governor that device is going off * @device: The device @@ -320,6 +332,7 @@ int kgsl_devfreq_get_dev_status(struct device *dev, * to be (re)used by kgsl_busmon_get_dev_status() */ if (pwrctrl->bus_control) { + struct kgsl_pwrlevel *pwrlevel; struct xstats *last_b = (struct xstats *)last_status.private_data; @@ -331,12 +344,8 @@ int kgsl_devfreq_get_dev_status(struct device *dev, last_b->ram_wait = device->pwrscale.accum_stats.ram_wait; last_b->buslevel = device->pwrctrl.cur_dcvs_buslevel; - if (pwrscale->avoid_ddr_stall) { - struct kgsl_pwrlevel *pwrlevel; - - pwrlevel = &pwrctrl->pwrlevels[pwrctrl->min_pwrlevel]; - last_b->gpu_minfreq = pwrlevel->gpu_freq; - } + pwrlevel = &pwrctrl->pwrlevels[pwrctrl->min_pwrlevel]; + last_b->gpu_minfreq = pwrlevel->gpu_freq; } kgsl_pwrctrl_busy_time(device, stat->total_time, stat->busy_time); @@ -691,8 +700,6 @@ int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, struct msm_adreno_extended_profile *gpu_profile; int i, ret; - adreno_tz_data.avoid_ddr_stall = pwrscale->avoid_ddr_stall; - gpu_profile = &pwrscale->gpu_profile; gpu_profile->private_data = &adreno_tz_data; diff --git a/kgsl_pwrscale.h b/kgsl_pwrscale.h index 46a76ffd6c..fd24760901 100644 --- a/kgsl_pwrscale.h +++ b/kgsl_pwrscale.h @@ -74,11 +74,6 @@ struct kgsl_pwrscale { struct devfreq *bus_devfreq; /** @devfreq_enabled: Whether or not devfreq is enabled */ bool devfreq_enabled; - /** - * @avoid_ddr_stall: Whether or not to increase IB vote on high - * ddr stall - */ - bool avoid_ddr_stall; }; /** @@ -140,6 +135,8 @@ static inline void devfreq_gpubw_exit(void) int devfreq_gpubw_init(void); void devfreq_gpubw_exit(void); + +void kgsl_pwrscale_fast_bus_hint(bool on); #endif #endif diff --git a/msm_adreno_devfreq.h b/msm_adreno_devfreq.h index 75b74da4b1..6754c65ec1 100644 --- a/msm_adreno_devfreq.h +++ b/msm_adreno_devfreq.h @@ -57,7 +57,7 @@ struct devfreq_msm_adreno_tz_data { /* Multiplier to change gpu busy status */ u32 mod_percent; /* Increase IB vote on high ddr stall */ - bool avoid_ddr_stall; + bool fast_bus_hint; }; struct msm_adreno_extended_profile { From 4665e0be355c390855e29e246b99e5e5f095ff6b Mon Sep 17 00:00:00 2001 From: Rakesh Naidu Bhaviripudi Date: Thu, 5 Oct 2023 21:46:15 +0530 Subject: [PATCH 0546/1016] kgsl: Remove vote on vdd parent supply voltage during slumber Add changes to remove vote during slumber on vdd parent supply for nogmu targets. Change-Id: I05eb3a4fadd6cbc44f7efc52f95244d44629e6b9 Signed-off-by: Rakesh Naidu Bhaviripudi --- kgsl_pwrctrl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index aefb6c533c..dc289a926d 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1466,7 +1466,16 @@ static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) &pwr->power_flags)) { kgsl_mmu_send_tlb_hint(&device->mmu, true); trace_kgsl_rail(device, state); + + /* Set the parent in retention voltage to disable CPR interrupts */ + kgsl_regulator_set_voltage(device->dev, pwr->gx_gdsc_parent, + pwr->gx_gdsc_parent_min_corner); + kgsl_pwrctrl_disable_gx_gdsc(device); + + /* Remove the vote for the vdd parent supply */ + kgsl_regulator_set_voltage(device->dev, pwr->gx_gdsc_parent, 0); + kgsl_pwrctrl_disable_cx_gdsc(device); } } else { From 4de7b3816b9f9ff895b9cf2cd18f92bb0b8bf075 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Fri, 20 Oct 2023 13:17:21 -0700 Subject: [PATCH 0547/1016] msm: kgsl: Add Gen8 support Add Gen8 support for KGSL code base. Change-Id: Ic9fcf51cc1fe01e8fad4a04c4a3ef11dd2517869 Signed-off-by: Hareesh Gundu --- Kbuild | 8 + adreno-gpulist.h | 97 + adreno.c | 3 +- adreno.h | 18 + adreno_dispatch.c | 3 + adreno_gen7.c | 2 - adreno_gen7.h | 5 - adreno_gen8.c | 2128 +++++++++++++++++ adreno_gen8.h | 490 ++++ adreno_gen8_gmu.c | 3425 +++++++++++++++++++++++++++ adreno_gen8_gmu.h | 530 +++++ adreno_gen8_hfi.c | 717 ++++++ adreno_gen8_hfi.h | 221 ++ adreno_gen8_hwsched.c | 1468 ++++++++++++ adreno_gen8_hwsched.h | 96 + adreno_gen8_hwsched_hfi.c | 4185 +++++++++++++++++++++++++++++++++ adreno_gen8_hwsched_hfi.h | 371 +++ adreno_gen8_preempt.c | 807 +++++++ adreno_gen8_ringbuffer.c | 649 +++++ adreno_gen8_rpmh.c | 527 +++++ adreno_hfi.h | 3 + adreno_trace.h | 65 + build/kgsl_defs.bzl | 8 + gen8_reg.h | 1196 ++++++++++ include/uapi/linux/msm_kgsl.h | 1 + kgsl_gmu_core.c | 1 + kgsl_gmu_core.h | 3 +- kgsl_util.h | 1 + 28 files changed, 17018 insertions(+), 10 deletions(-) create mode 100644 adreno_gen8.c create mode 100644 adreno_gen8.h create mode 100644 adreno_gen8_gmu.c create mode 100644 adreno_gen8_gmu.h create mode 100644 adreno_gen8_hfi.c create mode 100644 adreno_gen8_hfi.h create mode 100644 adreno_gen8_hwsched.c create mode 100644 adreno_gen8_hwsched.h create mode 100644 adreno_gen8_hwsched_hfi.c create mode 100644 adreno_gen8_hwsched_hfi.h create mode 100644 adreno_gen8_preempt.c create mode 100644 adreno_gen8_ringbuffer.c create mode 100644 adreno_gen8_rpmh.c create mode 100644 gen8_reg.h diff --git a/Kbuild b/Kbuild index 96c1545086..befd8d9e0d 100644 --- a/Kbuild +++ b/Kbuild @@ -122,6 +122,14 @@ msm_kgsl-y += \ adreno_gen7_ringbuffer.o \ adreno_gen7_rpmh.o \ adreno_gen7_snapshot.o \ + adreno_gen8.o \ + adreno_gen8_gmu.o \ + adreno_gen8_hfi.o \ + adreno_gen8_hwsched.o \ + adreno_gen8_hwsched_hfi.o \ + adreno_gen8_preempt.o \ + adreno_gen8_ringbuffer.o \ + adreno_gen8_rpmh.o \ adreno_hwsched.o \ adreno_ioctl.o \ adreno_perfcounter.o \ diff --git a/adreno-gpulist.h b/adreno-gpulist.h index e87232c5d9..a2e730d6a8 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2500,6 +2500,102 @@ static const struct adreno_a6xx_core adreno_gpu_core_a663 = { .ctxt_record_size = 2496 * 1024, }; +extern const struct gen8_snapshot_block_list gen8_0_0_snapshot_block_list; + +static const struct kgsl_regmap_list gen8_0_0_gbif_cx_regs[] = { + { GEN8_GBIF_QSB_SIDE0, 0x00071e20 }, + { GEN8_GBIF_QSB_SIDE1, 0x00071e20 }, + { GEN8_GBIF_QSB_SIDE2, 0x00071e20 }, + { GEN8_GBIF_QSB_SIDE3, 0x00071e20 }, + { GEN8_GBIF_CX_CONFIG, 0x20023000 }, + { GEN8_GMUCX_MRC_GBIF_QOS_CTRL, 0x33 }, +}; + +/* GEN8_0_0 protected register list */ +static const struct gen8_protected_regs gen8_0_0_protected_regs[] = { + { GEN8_CP_PROTECT_REG_GLOBAL + 0, 0x00000, 0x003a3, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 1, 0x003b4, 0x0043f, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 2, 0x00440, 0x0045f, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 3, 0x00580, 0x005df, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 4, 0x005e0, 0x006ff, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 5, 0x0074a, 0x0074f, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 6, 0x00759, 0x0077f, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 7, 0x00789, 0x00789, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 8, 0x0078c, 0x0079f, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 9, 0x00800, 0x00829, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 10, 0x00837, 0x008e6, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 11, 0x008e7, 0x009b0, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 12, 0x008ec, 0x009af, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 13, 0x009b1, 0x00c01, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 14, 0x00ce0, 0x00ce1, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 15, 0x00df0, 0x00df0, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 16, 0x00df1, 0x00df1, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 17, 0x00e01, 0x00e01, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 18, 0x00e03, 0x02e02, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 19, 0x03c00, 0x03cc5, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 20, 0x03cc6, 0x05cc5, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 21, 0x08600, 0x087ff, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 22, 0x08e00, 0x08eff, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 23, 0x08f00, 0x08f00, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 24, 0x08f01, 0x090bf, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 25, 0x09600, 0x097ff, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 26, 0x0981a, 0x09aff, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 27, 0x09e00, 0x09fff, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 28, 0x0a600, 0x0a7ff, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 29, 0x0ae00, 0x0ae06, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 30, 0x0ae08, 0x0ae0e, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 31, 0x0ae10, 0x0b17f, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 32, 0x0b600, 0x0d5ff, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 33, 0x0dc00, 0x0fbff, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 34, 0x0fc00, 0x11bff, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 35, 0x18400, 0x1843f, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 36, 0x18440, 0x1857f, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 37, 0x18580, 0x1a57f, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 38, 0x1b400, 0x1d3ff, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 39, 0x1f400, 0x1f877, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 40, 0x1f878, 0x1ffff, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 41, 0x1f930, 0x1fc59, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 42, 0x20000, 0x21fff, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 43, 0x27800, 0x2787f, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 44, 0x27880, 0x27c01, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 45, 0x27882, 0x27883, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 63, 0x27c02, 0x27c02, 1 }, + { 0 }, +}; + +static const struct kgsl_regmap_list gen8_ao_hwcg_regs[] = { + { GEN8_GMUAO_CGC_MODE_CNTL, 0x00020000 }, + { GEN8_GMUAO_CGC_DELAY_CNTL, 0x00010111 }, + { GEN8_GMUAO_CGC_HYST_CNTL, 0x00005555 }, +}; + +static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN8_0_0, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen8-0-0", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION, + .gpudev = &adreno_gen8_hwsched_gpudev.base, + .uche_gmem_alignment = SZ_16M, + .gmem_size = 12 * SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_8M, + }, + .aqefw_name = "gen80000_aqe.fw", + .sqefw_name = "gen80000_sqe.fw", + .gmufw_name = "gen80000_gmu.bin", + .zap_name = "gen80000_zap.mbn", + .ao_hwcg = gen8_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen8_ao_hwcg_regs), + .gbif = gen8_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen8_0_0_gbif_cx_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen8_0_0_protected_regs, + .highest_bank_bit = 16, + .gmu_hub_clk_freq = 200000000, +}; + static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_a306, /* Deprecated */ &adreno_gpu_core_a306a, /* Deprecated */ @@ -2547,5 +2643,6 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen7_4_0.base, &adreno_gpu_core_gen7_9_0.base, &adreno_gpu_core_gen7_9_1.base, + &adreno_gpu_core_gen8_0_0.base, }; diff --git a/adreno.c b/adreno.c index 5c6224f844..b3e6325e84 100644 --- a/adreno.c +++ b/adreno.c @@ -1201,6 +1201,7 @@ static void adreno_setup_device(struct adreno_device *adreno_dev) } static const struct of_device_id adreno_component_match[] = { + { .compatible = "qcom,gen8-gmu" }, { .compatible = "qcom,gen7-gmu" }, { .compatible = "qcom,gpu-gmu" }, { .compatible = "qcom,gpu-rgmu" }, @@ -1826,7 +1827,7 @@ void adreno_get_bus_counters(struct adreno_device *adreno_dev) &adreno_dev->starved_ram_lo, NULL); /* Target has GBIF */ - if (adreno_is_gen7(adreno_dev) || + if (adreno_is_gen8(adreno_dev) || adreno_is_gen7(adreno_dev) || (adreno_is_a6xx(adreno_dev) && !adreno_is_a630(adreno_dev))) { ret |= adreno_perfcounter_kernel_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_VBIF_PWR, 1, diff --git a/adreno.h b/adreno.h index ee6342dc48..6e4d17ee41 100644 --- a/adreno.h +++ b/adreno.h @@ -236,6 +236,7 @@ enum adreno_gpurev { ADRENO_REV_GEN7_4_0 = 0x070400, ADRENO_REV_GEN7_9_0 = 0x070900, ADRENO_REV_GEN7_9_1 = 0x070901, + ADRENO_REV_GEN8_0_0 = 0x080000, }; #define ADRENO_SOFT_FAULT BIT(0) @@ -247,6 +248,17 @@ enum adreno_gpurev { #define ADRENO_CTX_DETATCH_TIMEOUT_FAULT BIT(6) #define ADRENO_GMU_FAULT_SKIP_SNAPSHOT BIT(7) +enum adreno_pipe_type { + PIPE_NONE = 0, + PIPE_BR = 1, + PIPE_BV = 2, + PIPE_LPAC = 3, + PIPE_AQE0 = 4, + PIPE_AQE1 = 5, + PIPE_DDE_BR = 6, + PIPE_DDE_BV = 7, +}; + /* number of throttle counters for DCVS adjustment */ #define ADRENO_GPMU_THROTTLE_COUNTERS 4 @@ -1221,6 +1233,12 @@ static inline int adreno_is_gen7(struct adreno_device *adreno_dev) ADRENO_GPUREV(adreno_dev) < 0x080000; } +static inline int adreno_is_gen8(struct adreno_device *adreno_dev) +{ + return ADRENO_GPUREV(adreno_dev) >= 0x080000 && + ADRENO_GPUREV(adreno_dev) < 0x090000; +} + ADRENO_TARGET(gen7_0_0, ADRENO_REV_GEN7_0_0) ADRENO_TARGET(gen7_0_1, ADRENO_REV_GEN7_0_1) ADRENO_TARGET(gen7_2_0, ADRENO_REV_GEN7_2_0) diff --git a/adreno_dispatch.c b/adreno_dispatch.c index eacb294dce..834e2291eb 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -1502,6 +1502,8 @@ static void adreno_fault_header(struct kgsl_device *device, ADRENO_REG_CP_IB2_BASE_HI, &ib2base); adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ib2sz); + /* FIXME Add slice and unslice busy status for Gen8 */ + if (drawobj != NULL) { drawctxt->base.total_fault_count++; drawctxt->base.last_faulted_cmd_ts = drawobj->timestamp; @@ -1921,6 +1923,7 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) if (!(fault & ADRENO_IOMMU_PAGE_FAULT) && gx_on) { unsigned int val; + /* FIXME: Use adreno_is_smmu_stalled() for Gen8 */ adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS3, &val); if (val & BIT(24)) { mutex_unlock(&device->mutex); diff --git a/adreno_gen7.c b/adreno_gen7.c index e7cc2dd2f4..58b7d8224b 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -183,8 +183,6 @@ static const u32 gen7_9_x_ifpc_pwrup_reglist[] = { GEN7_TPL1_BICUBIC_WEIGHTS_TABLE_4, }; -#define F_PWR_ACD_CALIBRATE 78 - static int acd_calibrate_set(void *data, u64 val) { struct kgsl_device *device = data; diff --git a/adreno_gen7.h b/adreno_gen7.h index 1c38de9b47..31111f0116 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -12,11 +12,6 @@ #include "gen7_reg.h" #include "adreno_gen7_gmu.h" -#define PIPE_NONE 0 -#define PIPE_BR 1 -#define PIPE_BV 2 -#define PIPE_LPAC 3 - /* Forward struct declaration */ struct gen7_snapshot_block_list; diff --git a/adreno_gen8.c b/adreno_gen8.c new file mode 100644 index 0000000000..cb6d54712b --- /dev/null +++ b/adreno_gen8.c @@ -0,0 +1,2128 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_gen8.h" +#include "adreno_gen8_hwsched.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" +#include "kgsl_pwrscale.h" +#include "kgsl_trace.h" +#include "kgsl_util.h" + +/* IFPC & Preemption static powerup restore list */ +static const u32 gen8_pwrup_reglist[] = { + GEN8_UCHE_MODE_CNTL, + GEN8_UCHE_CACHE_WAYS, + GEN8_UCHE_VARB_IDLE_TIMEOUT, + GEN8_UCHE_GBIF_GX_CONFIG, + GEN8_UCHE_CCHE_MODE_CNTL, + GEN8_UCHE_CCHE_CACHE_WAYS, + GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_LO, + GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_HI, + GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_LO, + GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_HI, + GEN8_UCHE_WRITE_THRU_BASE_LO, + GEN8_UCHE_WRITE_THRU_BASE_HI, + GEN8_UCHE_TRAP_BASE_LO, + GEN8_UCHE_TRAP_BASE_HI, +}; + +/* IFPC only static powerup restore list */ +static const u32 gen8_ifpc_pwrup_reglist[] = { + GEN8_CP_PROTECT_CNTL_PIPE, + GEN8_RBBM_NC_MODE_CNTL, + GEN8_RBBM_SLICE_NC_MODE_CNTL, + GEN8_SP_NC_MODE_CNTL, + GEN8_SP_CHICKEN_BITS_2, + GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, + GEN8_SP_READ_SEL, + GEN8_TPL1_DBG_ECO_CNTL1, + GEN8_TPL1_NC_MODE_CNTL, + GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_LO, + GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_HI, + GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_LO, + GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_HI, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_1, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_2, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_3, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_4, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_5, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_6, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_7, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_8, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_9, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_10, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_11, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_12, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_13, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_14, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_15, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_16, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_17, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_18, + GEN8_CP_PROTECT_REG_GLOBAL, + GEN8_CP_PROTECT_REG_GLOBAL + 1, + GEN8_CP_PROTECT_REG_GLOBAL + 2, + GEN8_CP_PROTECT_REG_GLOBAL + 3, + GEN8_CP_PROTECT_REG_GLOBAL + 4, + GEN8_CP_PROTECT_REG_GLOBAL + 5, + GEN8_CP_PROTECT_REG_GLOBAL + 6, + GEN8_CP_PROTECT_REG_GLOBAL + 7, + GEN8_CP_PROTECT_REG_GLOBAL + 8, + GEN8_CP_PROTECT_REG_GLOBAL + 9, + GEN8_CP_PROTECT_REG_GLOBAL + 10, + GEN8_CP_PROTECT_REG_GLOBAL + 11, + GEN8_CP_PROTECT_REG_GLOBAL + 12, + GEN8_CP_PROTECT_REG_GLOBAL + 13, + GEN8_CP_PROTECT_REG_GLOBAL + 14, + GEN8_CP_PROTECT_REG_GLOBAL + 15, + GEN8_CP_PROTECT_REG_GLOBAL + 16, + GEN8_CP_PROTECT_REG_GLOBAL + 17, + GEN8_CP_PROTECT_REG_GLOBAL + 18, + GEN8_CP_PROTECT_REG_GLOBAL + 19, + GEN8_CP_PROTECT_REG_GLOBAL + 20, + GEN8_CP_PROTECT_REG_GLOBAL + 21, + GEN8_CP_PROTECT_REG_GLOBAL + 22, + GEN8_CP_PROTECT_REG_GLOBAL + 23, + GEN8_CP_PROTECT_REG_GLOBAL + 24, + GEN8_CP_PROTECT_REG_GLOBAL + 25, + GEN8_CP_PROTECT_REG_GLOBAL + 26, + GEN8_CP_PROTECT_REG_GLOBAL + 27, + GEN8_CP_PROTECT_REG_GLOBAL + 28, + GEN8_CP_PROTECT_REG_GLOBAL + 29, + GEN8_CP_PROTECT_REG_GLOBAL + 30, + GEN8_CP_PROTECT_REG_GLOBAL + 31, + GEN8_CP_PROTECT_REG_GLOBAL + 32, + GEN8_CP_PROTECT_REG_GLOBAL + 33, + GEN8_CP_PROTECT_REG_GLOBAL + 34, + GEN8_CP_PROTECT_REG_GLOBAL + 35, + GEN8_CP_PROTECT_REG_GLOBAL + 36, + GEN8_CP_PROTECT_REG_GLOBAL + 37, + GEN8_CP_PROTECT_REG_GLOBAL + 38, + GEN8_CP_PROTECT_REG_GLOBAL + 39, + GEN8_CP_PROTECT_REG_GLOBAL + 40, + GEN8_CP_PROTECT_REG_GLOBAL + 41, + GEN8_CP_PROTECT_REG_GLOBAL + 42, + GEN8_CP_PROTECT_REG_GLOBAL + 43, + GEN8_CP_PROTECT_REG_GLOBAL + 44, + GEN8_CP_PROTECT_REG_GLOBAL + 45, + GEN8_CP_PROTECT_REG_GLOBAL + 63, + GEN8_CP_PROTECT_REG_PIPE + 15, +}; + +static int acd_calibrate_set(void *data, u64 val) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 debug_val = (u32) val; + int ret; + + mutex_lock(&device->mutex); + ret = adreno_active_count_get(adreno_dev); + if (ret) + goto err; + + ret = gen8_hfi_send_set_value(adreno_dev, HFI_VALUE_DBG, + F_PWR_ACD_CALIBRATE, debug_val); + if (!ret) + gmu->acd_debug_val = debug_val; + + adreno_active_count_put(adreno_dev); +err: + mutex_unlock(&device->mutex); + return ret; +} + +static int acd_calibrate_get(void *data, u64 *val) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + *val = (u64) gmu->acd_debug_val; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(acd_cal_fops, acd_calibrate_get, acd_calibrate_set, "%llu\n"); + +void gen8_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds) +{ + u32 i = 0, mask = 0; + + /* Disable concurrent binning before sending CP init */ + cmds[i++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[i++] = BIT(27); + + /* Use multiple HW contexts */ + mask |= BIT(0); + + /* Enable error detection */ + mask |= BIT(1); + + /* Set default reset state */ + mask |= BIT(3); + + /* Disable save/restore of performance counters across preemption */ + mask |= BIT(6); + + /* Enable the register init list with the spinlock */ + mask |= BIT(8); + + /* By default DMS is enabled from CP side, disable it if not supported */ + if (!adreno_dev->dms_enabled) + mask |= BIT(11); + + cmds[i++] = cp_type7_packet(CP_ME_INIT, 7); + + /* Enabled ordinal mask */ + cmds[i++] = mask; + cmds[i++] = 0x00000003; /* Set number of HW contexts */ + cmds[i++] = 0x20000000; /* Enable error detection */ + cmds[i++] = 0x00000002; /* Operation mode mask */ + + /* Register initialization list with spinlock */ + cmds[i++] = lower_32_bits(adreno_dev->pwrup_reglist->gpuaddr); + cmds[i++] = upper_32_bits(adreno_dev->pwrup_reglist->gpuaddr); + /* + * Gen8 targets with concurrent binning are expected to have a dynamic + * power up list with triplets which contains the pipe id in it. + * Bit 31 of POWER_UP_REGISTER_LIST_LENGTH is reused here to let CP + * know if the power up contains the triplets. If + * REGISTER_INIT_LIST_WITH_SPINLOCK is set and bit 31 below is set, + * CP expects a dynamic list with triplets. + */ + cmds[i++] = BIT(31); +} + +int gen8_fenced_write(struct adreno_device *adreno_dev, u32 offset, + u32 value, u32 mask) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u32 status, i; + u64 ts1, ts2; + + kgsl_regwrite(device, offset, value); + ts1 = gpudev->read_alwayson(adreno_dev); + for (i = 0; i < GMU_CORE_LONG_WAKEUP_RETRY_LIMIT; i++) { + /* + * Make sure the previous register write is posted before + * checking the fence status + */ + mb(); + + gmu_core_regread(device, GEN8_GMUAO_AHB_FENCE_STATUS, &status); + + /* + * If !writedropped0/1, then the write to fenced register + * was successful + */ + if (!(status & mask)) + break; + + /* Wait a small amount of time before trying again */ + udelay(GMU_CORE_WAKEUP_DELAY_US); + + /* Try to write the fenced register again */ + kgsl_regwrite(device, offset, value); + } + + if (i < GMU_CORE_SHORT_WAKEUP_RETRY_LIMIT) + return 0; + + if (i == GMU_CORE_LONG_WAKEUP_RETRY_LIMIT) { + ts2 = gpudev->read_alwayson(adreno_dev); + dev_err(device->dev, + "Timed out waiting %d usecs to write fenced register 0x%x, timestamps: %llx %llx\n", + i * GMU_CORE_WAKEUP_DELAY_US, offset, ts1, ts2); + return -ETIMEDOUT; + } + + dev_info(device->dev, + "Waited %d usecs to write fenced register 0x%x\n", + i * GMU_CORE_WAKEUP_DELAY_US, offset); + + return 0; +} + +int gen8_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + u64 freq = gen8_core->gmu_hub_clk_freq; + + adreno_dev->highest_bank_bit = gen8_core->highest_bank_bit; + adreno_dev->gmu_hub_clk_freq = freq ? freq : 150000000; + adreno_dev->bcl_data = gen8_core->bcl_data; + + adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev, + ADRENO_COOP_RESET); + + /* If the memory type is DDR 4, override the existing configuration */ + if (of_fdt_get_ddrtype() == 0x7) + adreno_dev->highest_bank_bit = 14; + + return adreno_allocate_global(device, &adreno_dev->pwrup_reglist, + PAGE_SIZE, 0, 0, KGSL_MEMDESC_PRIVILEGED, + "powerup_register_list"); +} + +void gen8_get_gpu_feature_info(struct adreno_device *adreno_dev) +{ + u32 feature_fuse = 0; + + /* Get HW feature soft fuse value */ + adreno_cx_misc_regread(adreno_dev, GEN8_GPU_CX_MISC_SW_FUSE_VALUE, + &feature_fuse); + + adreno_dev->fastblend_enabled = feature_fuse & BIT(GEN8_FASTBLEND_SW_FUSE); + adreno_dev->raytracing_enabled = feature_fuse & BIT(GEN8_RAYTRACING_SW_FUSE); + + /* If software enables LPAC without HW support, disable it */ + if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) + adreno_dev->lpac_enabled = feature_fuse & BIT(GEN8_LPAC_SW_FUSE); + + adreno_dev->feature_fuse = feature_fuse; +} + +static void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id, + u32 slice_id, u32 use_slice_id) +{ + struct gen8_device *gen8_dev = container_of(adreno_dev, + struct gen8_device, adreno_dev); + u32 aperture_val = (FIELD_PREP(GENMASK(15, 12), pipe_id) | + FIELD_PREP(GENMASK(18, 16), slice_id) | + FIELD_PREP(GENMASK(23, 23), use_slice_id)); + + /* Check if we already set the aperture */ + if (gen8_dev->aperture == aperture_val) + return; + + kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_CP_APERTURE_CNTL_HOST, aperture_val); + + gen8_dev->aperture = aperture_val; +} + +static inline void gen8_regread_aperture(struct kgsl_device *device, + u32 offsetwords, u32 *value, u32 pipe, u32 slice_id, u32 use_slice_id) +{ + gen8_host_aperture_set(ADRENO_DEVICE(device), pipe, slice_id, use_slice_id); + + *value = kgsl_regmap_read(&device->regmap, offsetwords); +} + +static inline void gen8_regwrite_aperture(struct kgsl_device *device, + u32 offsetwords, u32 value, u32 pipe, u32 slice_id, u32 use_slice_id) +{ + gen8_host_aperture_set(ADRENO_DEVICE(device), pipe, slice_id, use_slice_id); + + kgsl_regmap_write(&device->regmap, value, offsetwords); +} + +#define GEN8_CP_PROTECT_DEFAULT (FIELD_PREP(GENMASK(31, 16), 0xffff) | BIT(0) | BIT(1) | BIT(3)) +static void gen8_protect_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + const struct gen8_protected_regs *regs = gen8_core->protected_regs; + u32 count; + int i; + + /* + * Enable access protection to privileged registers, fault on an access + * protect violation and select the last span to protect from the start + * address all the way to the end of the register address space + */ + gen8_regwrite_aperture(device, GEN8_CP_PROTECT_CNTL_PIPE, + GEN8_CP_PROTECT_DEFAULT, PIPE_BR, 0, 0); + gen8_regwrite_aperture(device, GEN8_CP_PROTECT_CNTL_PIPE, + GEN8_CP_PROTECT_DEFAULT, PIPE_BV, 0, 0); + if (adreno_dev->lpac_enabled) + gen8_regwrite_aperture(device, GEN8_CP_PROTECT_CNTL_PIPE, + GEN8_CP_PROTECT_DEFAULT, PIPE_LPAC, 0, 0); + + /* Clear aperture register */ + gen8_host_aperture_set(adreno_dev, 0, 0, 0); + + /* Program each register defined by the core definition */ + for (i = 0; regs[i].reg; i++) { + /* + * This is the offset of the end register as counted from the + * start, i.e. # of registers in the range - 1 + */ + count = regs[i].end - regs[i].start; + + kgsl_regwrite(device, regs[i].reg, + FIELD_PREP(GENMASK(17, 0), regs[i].start) | + FIELD_PREP(GENMASK(30, 18), count) | + FIELD_PREP(BIT(31), regs[i].noaccess)); + } + + /* + * Last span setting is only being applied to the last pipe specific + * register. Hence duplicate the last span from protect reg into the + * BR, BV and LPAC protect reg pipe 15. + */ + i--; + gen8_regwrite_aperture(device, GEN8_CP_PROTECT_REG_PIPE + 15, + FIELD_PREP(GENMASK(17, 0), regs[i].start) | + FIELD_PREP(GENMASK(30, 18), count) | + FIELD_PREP(BIT(31), regs[i].noaccess), + PIPE_BR, 0, 0); + + gen8_regwrite_aperture(device, GEN8_CP_PROTECT_REG_PIPE + 15, + FIELD_PREP(GENMASK(17, 0), regs[i].start) | + FIELD_PREP(GENMASK(30, 18), count) | + FIELD_PREP(BIT(31), regs[i].noaccess), + PIPE_BV, 0, 0); + + if (adreno_dev->lpac_enabled) + gen8_regwrite_aperture(device, GEN8_CP_PROTECT_REG_PIPE + 15, + FIELD_PREP(GENMASK(17, 0), regs[i].start) | + FIELD_PREP(GENMASK(30, 18), count) | + FIELD_PREP(BIT(31), regs[i].noaccess), + PIPE_LPAC, 0, 0); +} + +#define RBBM_CLOCK_CNTL_ON 0x8aa8aa82 + +static void gen8_hwcg_set(struct adreno_device *adreno_dev, bool on) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + u32 value; + int i; + + /* Increase clock keep-on hysteresis from 5 cycles to 8 cycles */ + if (on) + kgsl_regwrite(device, GEN8_RBBM_CGC_0_PC, 0x00000702); + + if (!adreno_dev->hwcg_enabled) + on = false; + + for (i = 0; i < gen8_core->ao_hwcg_count; i++) + gmu_core_regwrite(device, gen8_core->ao_hwcg[i].offset, + on ? gen8_core->ao_hwcg[i].val : 0); + + kgsl_regwrite(device, GEN8_RBBM_CLOCK_CNTL_GLOBAL, 1); + kgsl_regwrite(device, GEN8_RBBM_CGC_GLOBAL_LOAD_CMD, on ? 1 : 0); + + if (on) { + u32 retry = 3; + + kgsl_regwrite(device, GEN8_RBBM_CGC_P2S_TRIG_CMD, 1); + /* Poll for the TXDONE:BIT(0) status */ + do { + /* Wait for small amount of time for TXDONE status*/ + udelay(1); + kgsl_regread(device, GEN8_RBBM_CGC_P2S_STATUS, &value); + } while (!(value & BIT(0)) && --retry); + + if (!(value & BIT(0))) { + dev_err(device->dev, "RBBM_CGC_P2S_STATUS:TXDONE Poll failed\n"); + kgsl_device_snapshot(device, NULL, NULL, false); + return; + } + kgsl_regwrite(device, GEN8_RBBM_CLOCK_CNTL_GLOBAL, 0); + } +} + +static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev) +{ + struct adreno_reglist_list reglist[3]; + void *ptr = adreno_dev->pwrup_reglist->hostptr; + struct cpu_gpu_lock *lock = ptr; + u32 items = 0, i, j; + u32 *dest = ptr + sizeof(*lock); + + /* Static IFPC-only registers */ + reglist[items].regs = gen8_ifpc_pwrup_reglist; + reglist[items].count = ARRAY_SIZE(gen8_ifpc_pwrup_reglist); + lock->ifpc_list_len = reglist[items].count; + items++; + + /* Static IFPC + preemption registers */ + reglist[items].regs = gen8_pwrup_reglist; + reglist[items].count = ARRAY_SIZE(gen8_pwrup_reglist); + lock->preemption_list_len = reglist[items].count; + items++; + + /* + * For each entry in each of the lists, write the offset and the current + * register value into the GPU buffer + */ + for (i = 0; i < items; i++) { + const u32 *r = reglist[i].regs; + + for (j = 0; j < reglist[i].count; j++) { + *dest++ = r[j]; + kgsl_regread(KGSL_DEVICE(adreno_dev), r[j], dest++); + } + } + + /* + * The overall register list is composed of + * 1. Static IFPC-only registers + * 2. Static IFPC + preemption registers + * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) + * + * The first two lists are static. Size of these lists are stored as + * number of pairs in ifpc_list_len and preemption_list_len + * respectively. With concurrent binning, Some of the perfcounter + * registers being virtualized, CP needs to know the pipe id to program + * the aperture inorder to restore the same. Thus, third list is a + * dynamic list with triplets as + * (
), and the length is + * stored as number for triplets in dynamic_list_len. + */ + lock->dynamic_list_len = 0; +} + +/* _llc_configure_gpu_scid() - Program the sub-cache ID for all GPU blocks */ +static void _llc_configure_gpu_scid(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 gpu_scid; + + if (IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice) || + !adreno_dev->gpu_llc_slice_enable) + return; + + if (llcc_slice_activate(adreno_dev->gpu_llc_slice)) + return; + + gpu_scid = llcc_get_slice_id(adreno_dev->gpu_llc_slice); + + /* 5 blocks at 6 bits per block */ + kgsl_regwrite(device, GEN8_GBIF_SCACHE_CNTL1, + FIELD_PREP(GENMASK(29, 24), gpu_scid) | + FIELD_PREP(GENMASK(23, 18), gpu_scid) | + FIELD_PREP(GENMASK(17, 12), gpu_scid) | + FIELD_PREP(GENMASK(11, 6), gpu_scid) | + FIELD_PREP(GENMASK(5, 0), gpu_scid)); + + kgsl_regwrite(device, GEN8_GBIF_SCACHE_CNTL0, + FIELD_PREP(GENMASK(15, 10), gpu_scid) | + FIELD_PREP(GENMASK(21, 16), gpu_scid) | + FIELD_PREP(GENMASK(27, 22), gpu_scid) | BIT(8)); +} + +static void _llc_gpuhtw_slice_activate(struct adreno_device *adreno_dev) +{ + if (IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice) || + !adreno_dev->gpuhtw_llc_slice_enable) + return; + + llcc_slice_activate(adreno_dev->gpuhtw_llc_slice); +} + +static void _set_secvid(struct kgsl_device *device) +{ + kgsl_regwrite(device, GEN8_RBBM_SECVID_TSB_CNTL, 0x0); + kgsl_regwrite(device, GEN8_RBBM_SECVID_TSB_TRUSTED_BASE_LO, + lower_32_bits(KGSL_IOMMU_SECURE_BASE32)); + kgsl_regwrite(device, GEN8_RBBM_SECVID_TSB_TRUSTED_BASE_HI, + upper_32_bits(KGSL_IOMMU_SECURE_BASE32)); + kgsl_regwrite(device, GEN8_RBBM_SECVID_TSB_TRUSTED_SIZE, + FIELD_PREP(GENMASK(31, 12), + (KGSL_IOMMU_SECURE_SIZE(&device->mmu) / SZ_4K))); +} + +/* Set UCHE_TRAP_BASE to a page below the top of the memory space */ +#define GEN8_UCHE_TRAP_BASE 0x1FFFFFFFFF000ULL + +static u64 gen8_get_uche_trap_base(void) +{ + return GEN8_UCHE_TRAP_BASE; +} + +/* + * All Gen8 targets support marking certain transactions as always privileged + * which allows us to mark more memory as privileged without having to + * explicitly set the APRIV bit. Choose the following transactions to be + * privileged by default: + * CDWRITE [6:6] - Crashdumper writes + * CDREAD [5:5] - Crashdumper reads + * RBRPWB [3:3] - RPTR shadow writes + * RBPRIVLEVEL [2:2] - Memory accesses from PM4 packets in the ringbuffer + * RBFETCH [1:1] - Ringbuffer reads + * ICACHE [0:0] - Instruction cache fetches + */ + +#define GEN8_APRIV_DEFAULT (BIT(3) | BIT(2) | BIT(1) | BIT(0)) +/* Add crashdumper permissions for the BR APRIV */ +#define GEN8_BR_APRIV_DEFAULT (GEN8_APRIV_DEFAULT | BIT(6) | BIT(5)) + +static const struct kgsl_regmap_list gen8_0_0_bicubic_regs[] = { + /*GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_0 default and recomended values are same */ + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_1, 0x3fe05ff4 }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_2, 0x3fa0ebee }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_3, 0x3f5193ed }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_4, 0x3f0243f0 }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_5, 0x00000000 }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_6, 0x3fd093e8 }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_7, 0x3f4133dc }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_8, 0x3ea1dfdb }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_9, 0x3e0283e0 }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_10, 0x0000ac2b }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_11, 0x0000f01d }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_12, 0x00114412 }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_13, 0x0021980a }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_14, 0x0051ec05 }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_15, 0x0000380e }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_16, 0x3ff09001 }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_17, 0x3fc10bfa }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_18, 0x3f9193f7 }, + { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_19, 0x3f7227f7 }, +}; + +#define MIN_HBB 13 +int gen8_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + u32 mal, mode = 0, mode2 = 0, rgb565_predicator = 0, amsbc = 0; + struct gen8_device *gen8_dev = container_of(adreno_dev, + struct gen8_device, adreno_dev); + /* + * HBB values 13 to 16 can represented LSB of HBB from 0 to 3. + * Any HBB value beyond 16 needs programming MSB of HBB. + * By default highest bank bit is 14, Hence set default HBB LSB + * to "1" and MSB to "0". + */ + u32 hbb_lo = 1, hbb_hi = 0, hbb = 1; + struct cpu_gpu_lock *pwrup_lock = adreno_dev->pwrup_reglist->hostptr; + u64 uche_trap_base = gen8_get_uche_trap_base(); + u32 rgba8888_lossless = 0; + + /* Reset aperture fields to go through first aperture write check */ + gen8_dev->aperture = UINT_MAX; + + /* Configure GBIF GX registers */ + kgsl_regwrite(device, GEN8_UCHE_GBIF_GX_CONFIG, 0x010240e0); + kgsl_regwrite(device, GEN8_RBBM_GBIF_CLIENT_QOS_CNTL, 0x22122212); + + /* Make all blocks contribute to the GPU BUSY perf counter */ + kgsl_regwrite(device, GEN8_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); + + kgsl_regwrite(device, GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_LO, + lower_32_bits(adreno_dev->uche_gmem_base)); + kgsl_regwrite(device, GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_HI, + upper_32_bits(adreno_dev->uche_gmem_base)); + kgsl_regwrite(device, GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_LO, + lower_32_bits(adreno_dev->uche_gmem_base)); + kgsl_regwrite(device, GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_HI, + upper_32_bits(adreno_dev->uche_gmem_base)); + + if (adreno_dev->lpac_enabled) { + gen8_regwrite_aperture(device, GEN8_RB_LPAC_GMEM_PROTECT, + 0x0c000000, PIPE_BR, 0, 0); + + /* Clear aperture register */ + gen8_host_aperture_set(adreno_dev, 0, 0, 0); + + kgsl_regwrite(device, GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_LO, + lower_32_bits(adreno_dev->uche_gmem_base)); + kgsl_regwrite(device, GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_HI, + upper_32_bits(adreno_dev->uche_gmem_base)); + kgsl_regwrite(device, GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_LO, + lower_32_bits(adreno_dev->uche_gmem_base)); + kgsl_regwrite(device, GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_HI, + upper_32_bits(adreno_dev->uche_gmem_base)); + } + + /* + * Set UCHE_WRITE_THRU_BASE to the UCHE_TRAP_BASE effectively + * disabling L2 bypass + */ + kgsl_regwrite(device, GEN8_UCHE_TRAP_BASE_LO, lower_32_bits(uche_trap_base)); + kgsl_regwrite(device, GEN8_UCHE_TRAP_BASE_HI, upper_32_bits(uche_trap_base)); + kgsl_regwrite(device, GEN8_UCHE_WRITE_THRU_BASE_LO, lower_32_bits(uche_trap_base)); + kgsl_regwrite(device, GEN8_UCHE_WRITE_THRU_BASE_HI, upper_32_bits(uche_trap_base)); + + + /* + * CP takes care of the restore during IFPC exit. We need to restore at slumber + * boundary as well + */ + if (pwrup_lock->dynamic_list_len > 0) { + kgsl_regwrite(device, GEN8_RBBM_PERFCTR_CNTL, 0x1); + kgsl_regwrite(device, GEN8_RBBM_SLICE_PERFCTR_CNTL, 0x1); + } + + /* Turn on the IFPC counter (countable 4 on XOCLK4) */ + kgsl_regwrite(device, GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1, + FIELD_PREP(GENMASK(7, 0), 0x4)); + + /* Turn on counter to count total time spent in BCL throttle */ + if (adreno_dev->bcl_enabled) + kgsl_regrmw(device, GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1, GENMASK(15, 8), + FIELD_PREP(GENMASK(15, 8), 0x26)); + + if (of_property_read_u32(device->pdev->dev.of_node, "qcom,min-access-length", &mal)) + mal = 32; + + of_property_read_u32(device->pdev->dev.of_node, "qcom,ubwc-mode", &mode); + + switch (mode) { + case KGSL_UBWC_5_0: + amsbc = 1; + rgb565_predicator = 1; + mode2 = 4; + break; + case KGSL_UBWC_4_0: + amsbc = 1; + rgb565_predicator = 1; + rgba8888_lossless = 1; + mode2 = 2; + break; + case KGSL_UBWC_3_0: + amsbc = 1; + mode2 = 1; + break; + default: + break; + } + + if (!WARN_ON(!adreno_dev->highest_bank_bit)) { + hbb = adreno_dev->highest_bank_bit - MIN_HBB; + hbb_lo = hbb & 3; + hbb_hi = (hbb >> 2) & 1; + } + + mal = (mal == 64) ? 1 : 0; + + gen8_regwrite_aperture(device, GEN8_GRAS_NC_MODE_CNTL, + FIELD_PREP(GENMASK(8, 5), hbb), PIPE_BV, 0, 0); + gen8_regwrite_aperture(device, GEN8_GRAS_NC_MODE_CNTL, + FIELD_PREP(GENMASK(8, 5), hbb), PIPE_BR, 0, 0); + gen8_regwrite_aperture(device, GEN8_RB_CCU_NC_MODE_CNTL, + FIELD_PREP(GENMASK(3, 3), hbb_hi) | + FIELD_PREP(GENMASK(2, 1), hbb_lo), + PIPE_BR, 0, 0); + gen8_regwrite_aperture(device, GEN8_RB_CMP_NC_MODE_CNTL, + FIELD_PREP(GENMASK(17, 15), mode2) | + FIELD_PREP(GENMASK(4, 4), rgba8888_lossless) | + FIELD_PREP(GENMASK(2, 2), rgb565_predicator) | + FIELD_PREP(GENMASK(1, 1), amsbc) | + FIELD_PREP(GENMASK(0, 0), mal), + PIPE_BR, 0, 0); + + /* Clear aperture register */ + gen8_host_aperture_set(adreno_dev, 0, 0, 0); + + kgsl_regwrite(device, GEN8_SP_NC_MODE_CNTL, + FIELD_PREP(GENMASK(11, 10), hbb_hi) | + FIELD_PREP(GENMASK(3, 3), mal) | + FIELD_PREP(GENMASK(2, 1), hbb_lo)); + + kgsl_regwrite(device, GEN8_TPL1_NC_MODE_CNTL, + FIELD_PREP(GENMASK(4, 4), hbb_hi) | + FIELD_PREP(GENMASK(3, 3), mal) | + FIELD_PREP(GENMASK(2, 1), hbb_lo)); + + /* Configure TP bicubic registers */ + kgsl_regmap_multi_write(&device->regmap, gen8_0_0_bicubic_regs, + ARRAY_SIZE(gen8_0_0_bicubic_regs)); + + /* Enable hardware hang detection */ + kgsl_regwrite(device, GEN8_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | + FIELD_PREP(GENMASK(27, 0), gen8_core->hang_detect_cycles)); + kgsl_regwrite(device, GEN8_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30)); + + kgsl_regwrite(device, GEN8_UCHE_CLIENT_PF, BIT(7) | + FIELD_PREP(GENMASK(3, 0), adreno_dev->uche_client_pf)); + + /* Enable the GMEM save/restore feature for preemption */ + if (adreno_is_preemption_enabled(adreno_dev)) { + gen8_regwrite_aperture(device, + GEN8_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, + 0x1, PIPE_BR, 0, 0); + /* Clear aperture register */ + gen8_host_aperture_set(adreno_dev, 0, 0, 0); + } + + /* Enable GMU power counter 0 to count GPU busy */ + kgsl_regwrite(device, GEN8_GMUAO_GPU_CX_BUSY_MASK, 0xff000000); + kgsl_regrmw(device, GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1, 0xFF, 0x20); + kgsl_regwrite(device, GEN8_GMUCX_POWER_COUNTER_ENABLE, 0x1); + + gen8_protect_init(adreno_dev); + + /* Configure LLCC */ + _llc_configure_gpu_scid(adreno_dev); + _llc_gpuhtw_slice_activate(adreno_dev); + + gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, + GEN8_BR_APRIV_DEFAULT, PIPE_BR, 0, 0); + gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, + GEN8_APRIV_DEFAULT, PIPE_BV, 0, 0); + + if (adreno_dev->lpac_enabled) + gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, + GEN8_APRIV_DEFAULT, PIPE_LPAC, 0, 0); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) { + gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, + GEN8_APRIV_DEFAULT, PIPE_AQE0, 0, 0); + gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, + GEN8_APRIV_DEFAULT, PIPE_AQE1, 0, 0); + } + + /* Clear aperture register */ + gen8_host_aperture_set(adreno_dev, 0, 0, 0); + + _set_secvid(device); + + /* + * Enable hardware clock gating here to prevent any register access + * issue due to internal clock gating. + */ + gen8_hwcg_set(adreno_dev, true); + + /* + * All registers must be written before this point so that we don't + * miss any register programming when we patch the power up register + * list. + */ + if (!adreno_dev->patch_reglist && + (adreno_dev->pwrup_reglist->gpuaddr != 0)) { + gen8_patch_pwrup_reglist(adreno_dev); + adreno_dev->patch_reglist = true; + } + + return 0; +} + +/* Offsets into the MX/CX mapped register regions */ +#define GEN8_RDPM_MX_OFFSET 0xf00 +#define GEN8_RDPM_CX_OFFSET 0xf14 + +void gen8_rdpm_mx_freq_update(struct gen8_gmu_device *gmu, u32 freq) +{ + if (gmu->rdpm_mx_virt) { + writel_relaxed(freq/1000, (gmu->rdpm_mx_virt + GEN8_RDPM_MX_OFFSET)); + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + } +} + +void gen8_rdpm_cx_freq_update(struct gen8_gmu_device *gmu, u32 freq) +{ + if (gmu->rdpm_cx_virt) { + writel_relaxed(freq/1000, (gmu->rdpm_cx_virt + GEN8_RDPM_CX_OFFSET)); + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + } +} + +int gen8_scm_gpu_init_cx_regs(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 gpu_req = GPU_ALWAYS_EN_REQ; + int ret; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL)) + gpu_req |= GPU_BCL_EN_REQ; + + gpu_req |= GPU_TSENSE_EN_REQ; + + ret = kgsl_scm_gpu_init_regs(&device->pdev->dev, gpu_req); + + /* + * For targets that support this scm call to program BCL id , enable BCL. + * For other targets, BCL is enabled after first GMU boot. + */ + if (!ret && ADRENO_FEATURE(adreno_dev, ADRENO_BCL)) + adreno_dev->bcl_enabled = true; + + /* + * If scm call returned EOPNOTSUPP, either we are on a kernel version + * lesser than 6.1 where scm call is not supported or we are sending an + * empty request. Ignore the error in such cases. + */ + return (ret == -EOPNOTSUPP) ? 0 : ret; +} + +void gen8_spin_idle_debug(struct adreno_device *adreno_dev, + const char *str) +{ + struct kgsl_device *device = &adreno_dev->dev; + u32 rptr, wptr, status, intstatus, global_status; + + dev_err(device->dev, str); + + kgsl_regread(device, GEN8_CP_RB_RPTR_BR, &rptr); + kgsl_regread(device, GEN8_CP_RB_WPTR_GC, &wptr); + + kgsl_regread(device, GEN8_RBBM_STATUS, &status); + kgsl_regread(device, GEN8_RBBM_INT_0_STATUS, &intstatus); + kgsl_regread(device, GEN8_CP_INTERRUPT_STATUS_GLOBAL, &global_status); + + dev_err(device->dev, + "rb=%d pos=%X/%X rbbm_status=%8.8X int_0_status=%8.8X global_status=%8.8X\n", + adreno_dev->cur_rb ? adreno_dev->cur_rb->id : -1, rptr, wptr, + status, intstatus, global_status); + + kgsl_device_snapshot(device, NULL, NULL, false); +} + +/* + * gen8_send_cp_init() - Initialize ringbuffer + * @adreno_dev: Pointer to adreno device + * @rb: Pointer to the ringbuffer of device + * + * Submit commands for ME initialization, + */ +static int gen8_send_cp_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + u32 *cmds; + int ret; + + cmds = adreno_ringbuffer_allocspace(rb, GEN8_CP_INIT_DWORDS); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + gen8_cp_init_cmds(adreno_dev, cmds); + + ret = gen8_ringbuffer_submit(rb, NULL); + if (ret) + return ret; + + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) { + gen8_spin_idle_debug(adreno_dev, + "CP initialization failed to idle\n"); + rb->wptr = 0; + rb->_wptr = 0; + } + + return ret; +} + +static int gen8_post_start(struct adreno_device *adreno_dev) +{ + int ret; + u32 *cmds; + struct adreno_ringbuffer *rb = adreno_dev->cur_rb; + struct adreno_preemption *preempt = &adreno_dev->preempt; + u64 kmd_postamble_addr; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev)); + gen8_preemption_prepare_postamble(adreno_dev); + + cmds = adreno_ringbuffer_allocspace(rb, + (preempt->postamble_bootup_len ? 16 : 12)); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 6); + *cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR; + *cmds++ = lower_32_bits(rb->preemption_desc->gpuaddr); + *cmds++ = upper_32_bits(rb->preemption_desc->gpuaddr); + + *cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR; + *cmds++ = lower_32_bits(rb->secure_preemption_desc->gpuaddr); + *cmds++ = upper_32_bits(rb->secure_preemption_desc->gpuaddr); + + if (preempt->postamble_bootup_len) { + *cmds++ = cp_type7_packet(CP_SET_AMBLE, 3); + *cmds++ = lower_32_bits(kmd_postamble_addr); + *cmds++ = upper_32_bits(kmd_postamble_addr); + *cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE) + | (FIELD_PREP(GENMASK(19, 0), + adreno_dev->preempt.postamble_bootup_len)); + } + + *cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4); + *cmds++ = 0; + *cmds++ = 0; + *cmds++ = 0; + /* generate interrupt on preemption completion */ + *cmds++ = 0; + + ret = gen8_ringbuffer_submit(rb, NULL); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) + gen8_spin_idle_debug(adreno_dev, + "hw preemption initialization failed to idle\n"); + } + + return ret; +} + +int gen8_rb_start(struct adreno_device *adreno_dev) +{ + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb; + u64 addr; + int ret, i; + u32 *cmds; + + /* Clear all the ringbuffers */ + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); + kgsl_sharedmem_writel(device->scratch, + SCRATCH_RB_OFFSET(rb->id, rptr), 0); + kgsl_sharedmem_writel(device->scratch, + SCRATCH_RB_OFFSET(rb->id, bv_rptr), 0); + + rb->wptr = 0; + rb->_wptr = 0; + rb->wptr_preempt_end = UINT_MAX; + } + + gen8_preemption_start(adreno_dev); + + /* Set up the current ringbuffer */ + rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + + addr = SCRATCH_RB_GPU_ADDR(device, rb->id, rptr); + kgsl_regwrite(device, GEN8_CP_RB_RPTR_ADDR_LO_BR, lower_32_bits(addr)); + kgsl_regwrite(device, GEN8_CP_RB_RPTR_ADDR_HI_BR, upper_32_bits(addr)); + + addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_rptr); + kgsl_regwrite(device, GEN8_CP_RB_RPTR_ADDR_LO_BV, lower_32_bits(addr)); + kgsl_regwrite(device, GEN8_CP_RB_RPTR_ADDR_HI_BV, upper_32_bits(addr)); + + kgsl_regwrite(device, GEN8_CP_RB_CNTL_GC, GEN8_CP_RB_CNTL_DEFAULT); + + kgsl_regwrite(device, GEN8_CP_RB_BASE_LO_GC, + lower_32_bits(rb->buffer_desc->gpuaddr)); + kgsl_regwrite(device, GEN8_CP_RB_BASE_HI_GC, + upper_32_bits(rb->buffer_desc->gpuaddr)); + + /* Program the ucode base for CP */ + kgsl_regwrite(device, GEN8_CP_SQE_INSTR_BASE_LO, + lower_32_bits(fw->memdesc->gpuaddr)); + kgsl_regwrite(device, GEN8_CP_SQE_INSTR_BASE_HI, + upper_32_bits(fw->memdesc->gpuaddr)); + + /* Clear the SQE_HALT to start the CP engine */ + kgsl_regwrite(device, GEN8_CP_SQE_CNTL, 1); + + ret = gen8_send_cp_init(adreno_dev, rb); + if (ret) + return ret; + + ret = adreno_zap_shader_load(adreno_dev, gen8_core->zap_name); + if (ret) + return ret; + + /* + * Take the GPU out of secure mode. Try the zap shader if it is loaded, + * otherwise just try to write directly to the secure control register + */ + if (!adreno_dev->zap_loaded) + kgsl_regwrite(device, GEN8_RBBM_SECVID_TRUST_CNTL, 0); + else { + cmds = adreno_ringbuffer_allocspace(rb, 2); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + *cmds++ = cp_type7_packet(CP_SET_SECURE_MODE, 1); + *cmds++ = 0; + + ret = gen8_ringbuffer_submit(rb, NULL); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) { + gen8_spin_idle_debug(adreno_dev, + "Switch to unsecure failed to idle\n"); + return ret; + } + } + } + + return gen8_post_start(adreno_dev); +} + +/* + * gen8_gpu_keepalive() - GMU reg write to request GPU stays on + * @adreno_dev: Pointer to the adreno device that has the GMU + * @state: State to set: true is ON, false is OFF + */ +static void gen8_gpu_keepalive(struct adreno_device *adreno_dev, + bool state) +{ + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), + GEN8_GMUCX_PWR_COL_KEEPALIVE, state); +} + +bool gen8_hw_isidle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 reg; + + gmu_core_regread(device, GEN8_GMUAO_GPU_CX_BUSY_STATUS, ®); + + /* Bit 23 is GPUBUSYIGNAHB */ + return (reg & BIT(23)) ? false : true; +} + +int gen8_microcode_read(struct adreno_device *adreno_dev) +{ + struct adreno_firmware *sqe_fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + + return adreno_get_firmware(adreno_dev, gen8_core->sqefw_name, sqe_fw); +} + +/* CP Interrupt bits */ +#define GEN8_CP_GLOBAL_INT_HWFAULTBR 0 +#define GEN8_CP_GLOBAL_INT_HWFAULTBV 1 +#define GEN8_CP_GLOBAL_INT_HWFAULTLPAC 2 +#define GEN8_CP_GLOBAL_INT_HWFAULTAQE0 3 +#define GEN8_CP_GLOBAL_INT_HWFAULTAQE1 4 +#define GEN8_CP_GLOBAL_INT_HWFAULTDDEBR 5 +#define GEN8_CP_GLOBAL_INT_HWFAULTDDEBV 6 +#define GEN8_CP_GLOBAL_INT_SWFAULTBR 16 +#define GEN8_CP_GLOBAL_INT_SWFAULTBV 17 +#define GEN8_CP_GLOBAL_INT_SWFAULTLPAC 18 +#define GEN8_CP_GLOBAL_INT_SWFAULTAQE0 19 +#define GEN8_CP_GLOBAL_INT_SWFAULTAQE1 20 +#define GEN8_CP_GLOBAL_INT_SWFAULTDDEBR 21 +#define GEN8_CP_GLOBAL_INT_SWFAULTDDEBV 22 + +/* CP HW Fault status bits */ +#define CP_HW_RBFAULT 0 +#define CP_HW_IB1FAULT 1 +#define CP_HW_IB2FAULT 2 +#define CP_HW_IB3FAULT 3 +#define CP_HW_SDSFAULT 4 +#define CP_HW_MRBFAULT 5 +#define CP_HW_VSDFAULT 6 +#define CP_HW_SQEREADBRUSTOVF 8 +#define CP_HW_EVENTENGINEOVF 9 +#define CP_HW_UCODEERROR 10 + +/* CP SW Fault status bits */ +#define CP_SW_CSFRBWRAP 0 +#define CP_SW_CSFIB1WRAP 1 +#define CP_SW_CSFIB2WRAP 2 +#define CP_SW_CSFIB3WRAP 3 +#define CP_SW_SDSWRAP 4 +#define CP_SW_MRBWRAP 5 +#define CP_SW_VSDWRAP 6 +#define CP_SW_OPCODEERROR 8 +#define CP_SW_VSDPARITYERROR 9 +#define CP_SW_REGISTERPROTECTIONERROR 10 +#define CP_SW_ILLEGALINSTRUCTION 11 +#define CP_SW_SMMUFAULT 12 +#define CP_SW_VBIFRESPCLIENT 13 +#define CP_SW_VBIFRESPTYPE 19 +#define CP_SW_VBIFRESPREAD 21 +#define CP_SW_VBIFRESP 22 +#define CP_SW_RTWROVF 23 +#define CP_SW_LRZRTWROVF 24 +#define CP_SW_LRZRTREFCNTOVF 25 +#define CP_SW_LRZRTCLRRESMISS 26 + +static void gen8_get_cp_hwfault_status(struct adreno_device *adreno_dev, u32 status) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 hw_status; + u32 pipe_id = PIPE_NONE; + const char * const table[] = { + [CP_HW_RBFAULT] = "RBFAULT", + [CP_HW_IB1FAULT] = "IB1FAULT", + [CP_HW_IB2FAULT] = "IB2FAULT", + [CP_HW_SDSFAULT] = "SDSFAULT", + [CP_HW_MRBFAULT] = "MRGFAULT", + [CP_HW_VSDFAULT] = "VSDFAULT", + [CP_HW_SQEREADBRUSTOVF] = "SQEREADBRUSTOVF", + [CP_HW_EVENTENGINEOVF] = "EVENTENGINEOVF", + [CP_HW_UCODEERROR] = "UCODEERROR", + }; + + switch (status) { + case BIT(GEN8_CP_GLOBAL_INT_HWFAULTBR): + pipe_id = PIPE_BR; + break; + case BIT(GEN8_CP_GLOBAL_INT_HWFAULTBV): + pipe_id = PIPE_BV; + break; + case BIT(GEN8_CP_GLOBAL_INT_HWFAULTLPAC): + pipe_id = PIPE_LPAC; + break; + case BIT(GEN8_CP_GLOBAL_INT_HWFAULTAQE0): + pipe_id = PIPE_AQE0; + break; + case BIT(GEN8_CP_GLOBAL_INT_HWFAULTAQE1): + pipe_id = PIPE_AQE1; + break; + case BIT(GEN8_CP_GLOBAL_INT_HWFAULTDDEBR): + pipe_id = PIPE_DDE_BR; + break; + case BIT(GEN8_CP_GLOBAL_INT_HWFAULTDDEBV): + pipe_id = PIPE_DDE_BV; + break; + } + + gen8_regread_aperture(device, GEN8_CP_HW_FAULT_STATUS_PIPE, &hw_status, + pipe_id, 0, 0); + /* Clear aperture register */ + gen8_host_aperture_set(adreno_dev, 0, 0, 0); + + dev_crit_ratelimited(device->dev, "CP HW Fault pipe_id:%u %s\n", pipe_id, + hw_status < ARRAY_SIZE(table) ? table[hw_status] : "UNKNOWN"); +} + +static void gen8_get_cp_swfault_status(struct adreno_device *adreno_dev, u32 status) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 sw_status, status1; + u32 opcode, pipe_id = PIPE_NONE; + const char * const table[] = { + [CP_SW_CSFRBWRAP] = "CSFRBWRAP", + [CP_SW_CSFIB1WRAP] = "CSFIB1WRAP", + [CP_SW_CSFIB2WRAP] = "CSFIB2WRAP", + [CP_SW_CSFIB3WRAP] = "CSFIB3WRAP", + [CP_SW_SDSWRAP] = "SDSWRAP", + [CP_SW_MRBWRAP] = "MRBWRAP", + [CP_SW_VSDWRAP] = "VSDWRAP", + [CP_SW_OPCODEERROR] = "OPCODEERROR", + [CP_SW_VSDPARITYERROR] = "VSDPARITYERROR", + [CP_SW_REGISTERPROTECTIONERROR] = "REGISTERPROTECTIONERROR", + [CP_SW_ILLEGALINSTRUCTION] = "ILLEGALINSTRUCTION", + [CP_SW_SMMUFAULT] = "SMMUFAULT", + [CP_SW_VBIFRESPCLIENT] = "VBIFRESPCLIENT", + [CP_SW_VBIFRESPTYPE] = "VBIFRESPTYPE", + [CP_SW_VBIFRESPREAD] = "VBIFRESPREAD", + [CP_SW_VBIFRESP] = "VBIFRESP", + [CP_SW_RTWROVF] = "RTWROVF", + [CP_SW_LRZRTWROVF] = "LRZRTWROVF", + [CP_SW_LRZRTREFCNTOVF] = "LRZRTREFCNTOVF", + [CP_SW_LRZRTCLRRESMISS] = "LRZRTCLRRESMISS", + }; + + switch (status) { + case BIT(GEN8_CP_GLOBAL_INT_SWFAULTBR): + pipe_id = PIPE_BR; + break; + case BIT(GEN8_CP_GLOBAL_INT_SWFAULTBV): + pipe_id = PIPE_BV; + break; + case BIT(GEN8_CP_GLOBAL_INT_SWFAULTLPAC): + pipe_id = PIPE_LPAC; + break; + case BIT(GEN8_CP_GLOBAL_INT_SWFAULTAQE0): + pipe_id = PIPE_AQE0; + break; + case BIT(GEN8_CP_GLOBAL_INT_SWFAULTAQE1): + pipe_id = PIPE_AQE1; + break; + case BIT(GEN8_CP_GLOBAL_INT_SWFAULTDDEBR): + pipe_id = PIPE_DDE_BR; + break; + case BIT(GEN8_CP_GLOBAL_INT_SWFAULTDDEBV): + pipe_id = PIPE_DDE_BV; + break; + } + + gen8_regread_aperture(device, GEN8_CP_INTERRUPT_STATUS_PIPE, &sw_status, + pipe_id, 0, 0); + + dev_crit_ratelimited(device->dev, "CP SW Fault pipe_id: %u %s\n", pipe_id, + sw_status < ARRAY_SIZE(table) ? table[sw_status] : "UNKNOWN"); + + if (sw_status & BIT(CP_SW_OPCODEERROR)) { + gen8_regwrite_aperture(device, GEN8_CP_SQE_STAT_ADDR_PIPE, 1, + pipe_id, 0, 0); + gen8_regread_aperture(device, GEN8_CP_SQE_STAT_DATA_PIPE, &opcode, + pipe_id, 0, 0); + dev_crit_ratelimited(device->dev, + "CP opcode error interrupt | opcode=0x%8.8x\n", opcode); + } + + if (sw_status & BIT(CP_SW_REGISTERPROTECTIONERROR)) { + gen8_regread_aperture(device, GEN8_CP_PROTECT_STATUS_PIPE, &status1, + pipe_id, 0, 0); + dev_crit_ratelimited(device->dev, + "CP | Protected mode error | %s | addr=%lx | status=%x\n", + FIELD_GET(GENMASK(20, 20), status1) ? "READ" : "WRITE", + FIELD_GET(GENMASK(17, 0), status1), status1); + } + + /* Clear aperture register */ + gen8_host_aperture_set(adreno_dev, 0, 0, 0); +} + +static void gen8_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 global_status; + u32 hw_fault, sw_fault; + + kgsl_regread(device, GEN8_CP_INTERRUPT_STATUS_GLOBAL, &global_status); + + dev_crit_ratelimited(device->dev, "CP fault int_status_global=0x%x\n", global_status); + + hw_fault = FIELD_GET(GENMASK(6, 0), global_status); + sw_fault = FIELD_GET(GENMASK(22, 16), global_status); + + if (hw_fault) + gen8_get_cp_hwfault_status(adreno_dev, hw_fault); + else if (sw_fault) + gen8_get_cp_swfault_status(adreno_dev, sw_fault); +} + +static void gen8_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + switch (bit) { + case GEN8_INT_AHBERROR: + { + u32 err_details_0, err_details_1; + + kgsl_regread(device, GEN8_CP_RL_ERROR_DETAILS_0, &err_details_0); + kgsl_regread(device, GEN8_CP_RL_ERROR_DETAILS_1, &err_details_1); + dev_crit_ratelimited(device->dev, + "CP: AHB bus error, CP_RL_ERROR_DETAILS_0:0x%x CP_RL_ERROR_DETAILS_1:0x%x\n", + err_details_0, err_details_1); + break; + } + case GEN8_INT_ATBASYNCFIFOOVERFLOW: + dev_crit_ratelimited(device->dev, "RBBM: ATB ASYNC overflow\n"); + break; + case GEN8_INT_ATBBUSOVERFLOW: + dev_crit_ratelimited(device->dev, "RBBM: ATB bus overflow\n"); + break; + case GEN8_INT_OUTOFBOUNDACCESS: + dev_crit_ratelimited(device->dev, "UCHE: Out of bounds access\n"); + break; + case GEN8_INT_UCHETRAPINTERRUPT: + dev_crit_ratelimited(device->dev, "UCHE: Trap interrupt\n"); + break; + case GEN8_INT_TSBWRITEERROR: + { + u32 lo, hi; + + kgsl_regread(device, GEN8_RBBM_SECVID_TSB_STATUS_LO, &lo); + kgsl_regread(device, GEN8_RBBM_SECVID_TSB_STATUS_HI, &hi); + + dev_crit_ratelimited(device->dev, "TSB: Write error interrupt: Address: 0x%lx MID: %lu\n", + FIELD_GET(GENMASK(16, 0), hi) << 32 | lo, + FIELD_GET(GENMASK(31, 23), hi)); + break; + } + default: + dev_crit_ratelimited(device->dev, "Unknown interrupt %d\n", bit); + } +} + +static const char *const uche_client[] = { + "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", + "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", + "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", + "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP" +}; + +static const char *const uche_lpac_client[] = { + "-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC" +}; + +#define SCOOBYDOO 0x5c00bd00 + +static const char *gen8_fault_block_uche(struct kgsl_device *device, + char *str, int size, bool lpac) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 uche_client_id = adreno_dev->uche_client_pf; + const char *uche_client_str, *fault_block; + + /* + * Smmu driver takes a vote on CX gdsc before calling the kgsl + * pagefault handler. If there is contention for device mutex in this + * path and the dispatcher fault handler is holding this lock, trying + * to turn off CX gdsc will fail during the reset. So to avoid blocking + * here, try to lock device mutex and return if it fails. + */ + if (!mutex_trylock(&device->mutex)) + goto regread_fail; + + if (!kgsl_state_is_awake(device)) { + mutex_unlock(&device->mutex); + goto regread_fail; + } + + kgsl_regread(device, GEN8_UCHE_CLIENT_PF, &uche_client_id); + mutex_unlock(&device->mutex); + + /* Ignore the value if the gpu is in IFPC */ + if (uche_client_id == SCOOBYDOO) { + uche_client_id = adreno_dev->uche_client_pf; + goto regread_fail; + } + + /* UCHE client id mask is bits [6:0] */ + uche_client_id &= GENMASK(6, 0); + +regread_fail: + if (lpac) { + fault_block = "UCHE_LPAC"; + if (uche_client_id >= ARRAY_SIZE(uche_lpac_client)) + goto fail; + uche_client_str = uche_lpac_client[uche_client_id]; + } else { + fault_block = "UCHE"; + if (uche_client_id >= ARRAY_SIZE(uche_client)) + goto fail; + uche_client_str = uche_client[uche_client_id]; + } + + snprintf(str, size, "%s: %s", fault_block, uche_client_str); + return str; + +fail: + snprintf(str, size, "%s: Unknown (client_id: %u)", + fault_block, uche_client_id); + return str; +} + +static const char *gen8_iommu_fault_block(struct kgsl_device *device, + u32 fsynr1) +{ + u32 mid = fsynr1 & 0xff; + static char str[36]; + + switch (mid) { + case 0x0: + return "CP"; + case 0x1: + return "UCHE: Unknown"; + case 0x2: + return "UCHE_LPAC: Unknown"; + case 0x3: + return gen8_fault_block_uche(device, str, sizeof(str), false); + case 0x4: + return "CCU"; + case 0x5: + return "Flag cache"; + case 0x6: + return "PREFETCH"; + case 0x7: + return "GMU"; + case 0x8: + return gen8_fault_block_uche(device, str, sizeof(str), true); + case 0x9: + return "UCHE_HPAC"; + } + + snprintf(str, sizeof(str), "Unknown (mid: %u)", mid); + return str; +} + +static void gen8_cp_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (adreno_is_preemption_enabled(adreno_dev)) + gen8_preemption_trigger(adreno_dev, true); + + adreno_dispatcher_schedule(device); +} + +/* + * gen8_gpc_err_int_callback() - Isr for GPC error interrupts + * @adreno_dev: Pointer to device + * @bit: Interrupt bit + */ +static void gen8_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* + * GPC error is typically the result of mistake SW programming. + * Force GPU fault for this interrupt so that we can debug it + * with help of register dump. + */ + + dev_crit(device->dev, "RBBM: GPC error\n"); + adreno_irqctrl(adreno_dev, 0); + + /* Trigger a fault in the dispatcher - this will effect a restart */ + adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT); +} + +/* + * gen8_swfuse_violation_callback() - ISR for software fuse violation interrupt + * @adreno_dev: Pointer to device + * @bit: Interrupt bit + */ +static void gen8_swfuse_violation_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status; + + /* + * SWFUSEVIOLATION error is typically the result of enabling software + * feature which is not supported by the hardware. Following are the + * Feature violation will be reported + * 1) FASTBLEND (BIT:0): NO Fault, RB will send the workload to legacy + * blender HW pipeline. + * 2) LPAC (BIT:1): Fault + * 3) RAYTRACING (BIT:2): Fault + */ + kgsl_regread(device, GEN8_RBBM_SW_FUSE_INT_STATUS, &status); + + /* + * RBBM_INT_CLEAR_CMD will not clear SWFUSEVIOLATION interrupt. Hence + * do explicit swfuse irq clear. + */ + kgsl_regwrite(device, GEN8_RBBM_SW_FUSE_INT_MASK, 0); + + dev_crit_ratelimited(device->dev, + "RBBM: SW Feature Fuse violation status=0x%8.8x\n", status); + + /* Trigger a fault in the dispatcher for LPAC and RAYTRACING violation */ + if (status & GENMASK(GEN8_RAYTRACING_SW_FUSE, GEN8_LPAC_SW_FUSE)) { + adreno_irqctrl(adreno_dev, 0); + adreno_dispatcher_fault(adreno_dev, ADRENO_HARD_FAULT); + } +} + +static const struct adreno_irq_funcs gen8_irq_funcs[32] = { + ADRENO_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ + ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 1 - RBBM_AHB_ERROR */ + ADRENO_IRQ_CALLBACK(NULL), /* 2 - UNUSED */ + ADRENO_IRQ_CALLBACK(NULL), /* 3 - UNUSED */ + ADRENO_IRQ_CALLBACK(NULL), /* 4 - CPIPCINT0 */ + ADRENO_IRQ_CALLBACK(NULL), /* 5 - CPIPCINT1 */ + ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 6 - ATBASYNCOVERFLOW */ + ADRENO_IRQ_CALLBACK(gen8_gpc_err_int_callback), /* 7 - GPC_ERR */ + ADRENO_IRQ_CALLBACK(gen8_preemption_callback),/* 8 - CP_SW */ + ADRENO_IRQ_CALLBACK(gen8_cp_hw_err_callback), /* 9 - CP_HW_ERROR */ + ADRENO_IRQ_CALLBACK(NULL), /* 10 - CP_CCU_FLUSH_DEPTH_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 11 - CP_CCU_FLUSH_COLOR_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 12 - CP_CCU_RESOLVE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 13 - UNUSED */ + ADRENO_IRQ_CALLBACK(NULL), /* 14 - UNUSED */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */ + ADRENO_IRQ_CALLBACK(NULL), /* 16 - CP_RB_INT_LPAC*/ + ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 18 - UNUSED */ + ADRENO_IRQ_CALLBACK(NULL), /* 19 - UNUSED */ + ADRENO_IRQ_CALLBACK(gen8_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 21 - CP_CACHE_TS_LPAC */ + ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */ + ADRENO_IRQ_CALLBACK(adreno_hang_int_callback), /* 23 - MISHANGDETECT */ + ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 24 - UCHE_OOB_ACCESS */ + ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 25 - UCHE_TRAP_INTR */ + ADRENO_IRQ_CALLBACK(NULL), /* 26 - DEBBUS_INTR_0 */ + ADRENO_IRQ_CALLBACK(NULL), /* 27 - DEBBUS_INTR_1 */ + ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 28 - TSBWRITEERROR */ + ADRENO_IRQ_CALLBACK(gen8_swfuse_violation_callback), /* 29 - SWFUSEVIOLATION */ + ADRENO_IRQ_CALLBACK(NULL), /* 30 - ISDB_CPU_IRQ */ + ADRENO_IRQ_CALLBACK(NULL), /* 31 - ISDB_UNDER_DEBUG */ +}; + +/* + * If the AHB fence is not in ALLOW mode when we receive an RBBM + * interrupt, something went wrong. This means that we cannot proceed + * since the IRQ status and clear registers are not accessible. + * This is usually harmless because the GMU will abort power collapse + * and change the fence back to ALLOW. Poll so that this can happen. + */ +static int gen8_irq_poll_fence(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u32 status, fence, fence_retries = 0; + u64 a, b, c; + + a = gpudev->read_alwayson(adreno_dev); + + kgsl_regread(device, GEN8_GMUAO_AHB_FENCE_CTRL, &fence); + + while (fence != 0) { + b = gpudev->read_alwayson(adreno_dev); + + /* Wait for small time before trying again */ + udelay(1); + kgsl_regread(device, GEN8_GMUAO_AHB_FENCE_CTRL, &fence); + + if (fence_retries == 100 && fence != 0) { + c = gpudev->read_alwayson(adreno_dev); + + kgsl_regread(device, GEN8_GMUAO_RBBM_INT_UNMASKED_STATUS_SHADOW, + &status); + + dev_crit_ratelimited(device->dev, + "status=0x%x Unmasked status=0x%x Mask=0x%x timestamps: %llx %llx %llx\n", + status & adreno_dev->irq_mask, status, + adreno_dev->irq_mask, a, b, c); + return -ETIMEDOUT; + } + + fence_retries++; + } + + return 0; +} + +static irqreturn_t gen8_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret = IRQ_NONE; + u32 status; + + /* + * GPU can power down once the INT_0_STATUS is read below. + * But there still might be some register reads required so + * force the GMU/GPU into KEEPALIVE mode until done with the ISR. + */ + gen8_gpu_keepalive(adreno_dev, true); + + if (gen8_irq_poll_fence(adreno_dev)) { + adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT); + goto done; + } + + kgsl_regread(device, GEN8_RBBM_INT_0_STATUS, &status); + + kgsl_regwrite(device, GEN8_RBBM_INT_CLEAR_CMD, status); + + ret = adreno_irq_callbacks(adreno_dev, gen8_irq_funcs, status); + + trace_kgsl_gen8_irq_status(adreno_dev, status); + +done: + /* If hard fault, then let snapshot turn off the keepalive */ + if (!(adreno_gpu_fault(adreno_dev) & ADRENO_HARD_FAULT)) + gen8_gpu_keepalive(adreno_dev, false); + + return ret; +} + +int gen8_probe_common(struct platform_device *pdev, + struct adreno_device *adreno_dev, u32 chipid, + const struct adreno_gpu_core *gpucore) +{ + const struct adreno_gpudev *gpudev = gpucore->gpudev; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gen8_core *gen8_core = container_of(gpucore, + struct adreno_gen8_core, base); + int ret; + + adreno_dev->gpucore = gpucore; + adreno_dev->chipid = chipid; + + adreno_reg_offset_init(gpudev->reg_offsets); + + adreno_dev->hwcg_enabled = true; + adreno_dev->uche_client_pf = 1; + + kgsl_pwrscale_fast_bus_hint(gen8_core->fast_bus_hint); + + ret = adreno_device_probe(pdev, adreno_dev); + if (ret) + return ret; + + if (adreno_preemption_feature_set(adreno_dev)) { + adreno_dev->preempt.preempt_level = gen8_core->preempt_level; + adreno_dev->preempt.skipsaverestore = true; + adreno_dev->preempt.usesgmem = true; + set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); + } + + /* debugfs node for ACD calibration */ + debugfs_create_file("acd_calibrate", 0644, device->d_debugfs, device, &acd_cal_fops); + + /* Dump additional AQE 16KB data on top of default 96KB(48(BR)+48(BV)) */ + device->snapshot_ctxt_record_size = ADRENO_FEATURE(adreno_dev, ADRENO_AQE) ? + 112 * SZ_1K : 96 * SZ_1K; + + return 0; +} + +/* Register offset defines for Gen8, in order of enum adreno_regs */ +static u32 gen8_register_offsets[ADRENO_REG_REGISTER_MAX] = { + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, GEN8_CP_RB_BASE_LO_GC), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, GEN8_CP_RB_BASE_HI_GC), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, GEN8_CP_RB_RPTR_BR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, GEN8_CP_RB_WPTR_GC), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, GEN8_CP_SQE_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, GEN8_CP_IB1_BASE_LO_PIPE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, GEN8_CP_IB1_BASE_HI_PIPE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, GEN8_CP_IB1_REM_SIZE_PIPE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, GEN8_CP_IB2_BASE_LO_PIPE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, GEN8_CP_IB2_BASE_HI_PIPE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, GEN8_CP_IB2_REM_SIZE_PIPE), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, GEN8_RBBM_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, GEN8_RBBM_INT_0_MASK), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, GEN8_RBBM_SW_RESET_CMD), + ADRENO_REG_DEFINE(ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK, + GEN8_GMUAO_AO_HOST_INTERRUPT_MASK), + ADRENO_REG_DEFINE(ADRENO_REG_GMU_GMU2HOST_INTR_MASK, + GEN8_GMUCX_GMU2HOST_INTR_MASK), +}; + +static u32 _get_pipeid(u32 groupid) +{ + u32 pipe; + + switch (groupid) { + case KGSL_PERFCOUNTER_GROUP_BV_TSE: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_BV_RAS: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_BV_LRZ: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_BV_HLSQ: + pipe = PIPE_BV; + break; + case KGSL_PERFCOUNTER_GROUP_TSE: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_RAS: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_LRZ: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_HLSQ: + pipe = PIPE_BR; + break; + default: + pipe = PIPE_NONE; + } + + return pipe; +} + +int gen8_perfcounter_remove(struct adreno_device *adreno_dev, + struct adreno_perfcount_register *reg, u32 groupid) +{ + void *ptr = adreno_dev->pwrup_reglist->hostptr; + struct cpu_gpu_lock *lock = ptr; + u32 *data = ptr + sizeof(*lock); + int offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; + int i, second_last_offset, last_offset; + bool remove_counter = false; + u32 pipe = FIELD_PREP(GENMASK(13, 12), _get_pipeid(groupid)); + + if (kgsl_hwlock(lock)) { + kgsl_hwunlock(lock); + return -EBUSY; + } + + if (lock->dynamic_list_len < 3) { + kgsl_hwunlock(lock); + return -EINVAL; + } + + second_last_offset = offset + (lock->dynamic_list_len - 3) * 3; + last_offset = second_last_offset + 3; + + /* Look for the perfcounter to remove in the list */ + for (i = 0; i < lock->dynamic_list_len - 2; i++) { + if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { + remove_counter = true; + break; + } + offset += 3; + } + + if (!remove_counter) { + kgsl_hwunlock(lock); + return -ENOENT; + } + + /* + * If the entry is found, remove it from the list by overwriting with second last + * entry. Skip this if data at offset is already second last entry + */ + if (offset != second_last_offset) + memcpy(&data[offset], &data[second_last_offset], 6 * sizeof(u32)); + + /* + * Overwrite the second last entry with last entry as last entry always has to be + * GEN8_RBBM_SLICE_PERFCTR_CNTL. + */ + memcpy(&data[second_last_offset], &data[last_offset], 6 * sizeof(u32)); + + /* Clear the last entry */ + memset(&data[last_offset], 0, 6 * sizeof(u32)); + + lock->dynamic_list_len--; + + /* + * If dynamic list length is 2, the only entry in the list is the GEN8_RBBM_PERFCTR_CNTL. + * Remove the same as we can disable perfcounters now. + */ + if (lock->dynamic_list_len == 2) { + memset(&data[offset], 0, 6 * sizeof(u32)); + lock->dynamic_list_len = 0; + kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_RBBM_PERFCTR_CNTL, 0x0); + kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_RBBM_SLICE_PERFCTR_CNTL, 0x0); + } + + kgsl_hwunlock(lock); + return 0; +} + +int gen8_perfcounter_update(struct adreno_device *adreno_dev, + struct adreno_perfcount_register *reg, bool update_reg, u32 pipe) +{ + void *ptr = adreno_dev->pwrup_reglist->hostptr; + struct cpu_gpu_lock *lock = ptr; + u32 *data = ptr + sizeof(*lock); + int i, offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; + + if (kgsl_hwlock(lock)) { + kgsl_hwunlock(lock); + return -EBUSY; + } + + /* + * If the perfcounter select register is already present in reglist + * update it, otherwise append the + * triplet to the end of the list. + */ + for (i = 0; i < lock->dynamic_list_len; i++) { + if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { + data[offset + 2] = reg->countable; + goto update; + } + + if (data[offset + 1] == GEN8_RBBM_PERFCTR_CNTL) + break; + + offset += 3; + } + + /* + * For all targets GEN8_RBBM_PERFCTR_CNTL needs to be the last entry, + * so overwrite the existing GEN8_RBBM_PERFCNTL_CTRL and add it back to + * the end. + */ + data[offset++] = pipe; + data[offset++] = reg->select; + data[offset++] = reg->countable; + + data[offset++] = FIELD_PREP(GENMASK(13, 12), PIPE_NONE); + data[offset++] = GEN8_RBBM_PERFCTR_CNTL; + data[offset++] = 1; + lock->dynamic_list_len++; + + data[offset++] = FIELD_PREP(GENMASK(13, 12), PIPE_NONE); + data[offset++] = GEN8_RBBM_SLICE_PERFCTR_CNTL; + data[offset++] = 1; + lock->dynamic_list_len++; + + /* If this is the first entry, enable perfcounters */ + if (lock->dynamic_list_len == 2) { + kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_RBBM_PERFCTR_CNTL, 0x1); + kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_RBBM_SLICE_PERFCTR_CNTL, 0x1); + } + +update: + if (update_reg) + kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg->select, + reg->countable); + + kgsl_hwunlock(lock); + return 0; +} + +static u64 gen8_read_alwayson(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 lo = 0, hi = 0, tmp = 0; + + /* Always use the GMU AO counter when doing a AHB read */ + gmu_core_regread(device, GEN8_GMUCX_AO_COUNTER_HI, &hi); + gmu_core_regread(device, GEN8_GMUCX_AO_COUNTER_LO, &lo); + + /* Check for overflow */ + gmu_core_regread(device, GEN8_GMUCX_AO_COUNTER_HI, &tmp); + + if (hi != tmp) { + gmu_core_regread(device, GEN8_GMUCX_AO_COUNTER_LO, + &lo); + hi = tmp; + } + + return (((u64) hi) << 32) | lo; +} + +static int gen8_lpac_store(struct adreno_device *adreno_dev, bool enable) +{ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) + return -EINVAL; + + if (!(adreno_dev->feature_fuse & BIT(GEN8_LPAC_SW_FUSE)) || + (adreno_dev->lpac_enabled == enable)) + return 0; + + /* Power down the GPU before changing the lpac setting */ + return adreno_power_cycle_bool(adreno_dev, &adreno_dev->lpac_enabled, enable); +} + +static void gen8_remove(struct adreno_device *adreno_dev) +{ + if (adreno_preemption_feature_set(adreno_dev)) + del_timer(&adreno_dev->preempt.timer); +} + +static void gen8_read_bus_stats(struct kgsl_device *device, + struct kgsl_power_stats *stats, + struct adreno_busy_data *busy) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u64 ram_cycles, starved_ram; + + ram_cycles = counter_delta(device, adreno_dev->ram_cycles_lo, + &busy->bif_ram_cycles); + + starved_ram = counter_delta(device, adreno_dev->starved_ram_lo, + &busy->bif_starved_ram); + + ram_cycles += counter_delta(device, + adreno_dev->ram_cycles_lo_ch1_read, + &busy->bif_ram_cycles_read_ch1); + + ram_cycles += counter_delta(device, + adreno_dev->ram_cycles_lo_ch0_write, + &busy->bif_ram_cycles_write_ch0); + + ram_cycles += counter_delta(device, + adreno_dev->ram_cycles_lo_ch1_write, + &busy->bif_ram_cycles_write_ch1); + + starved_ram += counter_delta(device, + adreno_dev->starved_ram_lo_ch1, + &busy->bif_starved_ram_ch1); + + stats->ram_time = ram_cycles; + stats->ram_wait = starved_ram; +} + +static void gen8_power_stats(struct adreno_device *adreno_dev, + struct kgsl_power_stats *stats) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_busy_data *busy = &adreno_dev->busy_data; + u64 gpu_busy; + + /* Set the GPU busy counter for frequency scaling */ + gpu_busy = counter_delta(device, GEN8_GMUCX_POWER_COUNTER_XOCLK_L_0, + &busy->gpu_busy); + + stats->busy_time = gpu_busy * 10; + do_div(stats->busy_time, 192); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) { + u32 ifpc = counter_delta(device, + GEN8_GMUCX_POWER_COUNTER_XOCLK_L_4, + &busy->num_ifpc); + + adreno_dev->ifpc_count += ifpc; + if (ifpc > 0) + trace_adreno_ifpc_count(adreno_dev->ifpc_count); + } + + if (device->pwrctrl.bus_control) + gen8_read_bus_stats(device, stats, busy); + + if (adreno_dev->bcl_enabled) { + u32 a, b, c, bcl_throttle; + + a = counter_delta(device, GEN8_GMUCX_POWER_COUNTER_XOCLK_L_1, + &busy->throttle_cycles[0]); + + b = counter_delta(device, GEN8_GMUCX_POWER_COUNTER_XOCLK_L_2, + &busy->throttle_cycles[1]); + + c = counter_delta(device, GEN8_GMUCX_POWER_COUNTER_XOCLK_L_3, + &busy->throttle_cycles[2]); + + if (a || b || c) + trace_kgsl_bcl_clock_throttling(a, b, c); + + bcl_throttle = counter_delta(device, + GEN8_GMUCX_POWER_COUNTER_XOCLK_L_5, &busy->bcl_throttle); + /* + * This counts number of cycles throttled in XO cycles. Convert it to + * micro seconds by dividing by XO freq which is 19.2MHz. + */ + adreno_dev->bcl_throttle_time_us += ((bcl_throttle * 10) / 192); + } +} + +static int gen8_setproperty(struct kgsl_device_private *dev_priv, + u32 type, void __user *value, u32 sizebytes) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 enable; + + if (type != KGSL_PROP_PWRCTRL) + return -ENODEV; + + if (sizebytes != sizeof(enable)) + return -EINVAL; + + if (copy_from_user(&enable, value, sizeof(enable))) + return -EFAULT; + + mutex_lock(&device->mutex); + + if (enable) { + clear_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); + + kgsl_pwrscale_enable(device); + } else { + set_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); + + if (!adreno_active_count_get(adreno_dev)) + adreno_active_count_put(adreno_dev); + + kgsl_pwrscale_disable(device, true); + } + + mutex_unlock(&device->mutex); + + return 0; +} + +static void gen8_set_isdb_breakpoint_registers(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct clk *clk; + int ret; + + if (!device->set_isdb_breakpoint || device->ftbl->is_hwcg_on(device) + || device->qdss_gfx_virt == NULL || !device->force_panic) + return; + + clk = clk_get(&device->pdev->dev, "apb_pclk"); + + if (IS_ERR(clk)) { + dev_err(device->dev, "Unable to get QDSS clock\n"); + goto err; + } + + ret = clk_prepare_enable(clk); + + if (ret) { + dev_err(device->dev, "QDSS Clock enable error: %d\n", ret); + clk_put(clk); + goto err; + } + + /* Issue break command for SPs */ + isdb_write(device->qdss_gfx_virt, 0x0000); + isdb_write(device->qdss_gfx_virt, 0x1000); + isdb_write(device->qdss_gfx_virt, 0x2000); + isdb_write(device->qdss_gfx_virt, 0x3000); + isdb_write(device->qdss_gfx_virt, 0x4000); + isdb_write(device->qdss_gfx_virt, 0x5000); + isdb_write(device->qdss_gfx_virt, 0x6000); + isdb_write(device->qdss_gfx_virt, 0x7000); + isdb_write(device->qdss_gfx_virt, 0x8000); + isdb_write(device->qdss_gfx_virt, 0x9000); + isdb_write(device->qdss_gfx_virt, 0xa000); + isdb_write(device->qdss_gfx_virt, 0xb000); + + clk_disable_unprepare(clk); + clk_put(clk); + + return; + +err: + /* Do not force kernel panic if isdb writes did not go through */ + device->force_panic = false; +} + +static void gen8_swfuse_irqctrl(struct adreno_device *adreno_dev, bool state) +{ + kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_RBBM_SW_FUSE_INT_MASK, + state ? GEN8_SW_FUSE_INT_MASK : 0); +} + +const struct gen8_gpudev adreno_gen8_hwsched_gpudev = { + .base = { + .reg_offsets = gen8_register_offsets, + .probe = gen8_hwsched_probe, + .irq_handler = gen8_irq_handler, + .iommu_fault_block = gen8_iommu_fault_block, + .preemption_context_init = gen8_preemption_context_init, + .context_detach = gen8_hwsched_context_detach, + .read_alwayson = gen8_read_alwayson, + .reset = gen8_hwsched_reset_replay, + .power_ops = &gen8_hwsched_power_ops, + .power_stats = gen8_power_stats, + .setproperty = gen8_setproperty, + .hw_isidle = gen8_hw_isidle, + .add_to_va_minidump = gen8_hwsched_add_to_minidump, + .gx_is_on = gen8_gmu_gx_is_on, + .send_recurring_cmdobj = gen8_hwsched_send_recurring_cmdobj, + .perfcounter_remove = gen8_perfcounter_remove, + .set_isdb_breakpoint_registers = gen8_set_isdb_breakpoint_registers, + .context_destroy = gen8_hwsched_context_destroy, + .lpac_store = gen8_lpac_store, + .get_uche_trap_base = gen8_get_uche_trap_base, + }, + .hfi_probe = gen8_hwsched_hfi_probe, + .hfi_remove = gen8_hwsched_hfi_remove, + .handle_watchdog = gen8_hwsched_handle_watchdog, +}; + +const struct gen8_gpudev adreno_gen8_gmu_gpudev = { + .base = { + .reg_offsets = gen8_register_offsets, + .probe = gen8_gmu_device_probe, + .irq_handler = gen8_irq_handler, + .rb_start = gen8_rb_start, + .gpu_keepalive = gen8_gpu_keepalive, + .hw_isidle = gen8_hw_isidle, + .iommu_fault_block = gen8_iommu_fault_block, + .reset = gen8_gmu_reset, + .preemption_schedule = gen8_preemption_schedule, + .preemption_context_init = gen8_preemption_context_init, + .read_alwayson = gen8_read_alwayson, + .power_ops = &gen8_gmu_power_ops, + .remove = gen8_remove, + .ringbuffer_submitcmd = gen8_ringbuffer_submitcmd, + .power_stats = gen8_power_stats, + .setproperty = gen8_setproperty, + .add_to_va_minidump = gen8_gmu_add_to_minidump, + .gx_is_on = gen8_gmu_gx_is_on, + .perfcounter_remove = gen8_perfcounter_remove, + .set_isdb_breakpoint_registers = gen8_set_isdb_breakpoint_registers, + .swfuse_irqctrl = gen8_swfuse_irqctrl, + .get_uche_trap_base = gen8_get_uche_trap_base, + }, + .hfi_probe = gen8_gmu_hfi_probe, + .handle_watchdog = gen8_gmu_handle_watchdog, +}; diff --git a/adreno_gen8.h b/adreno_gen8.h new file mode 100644 index 0000000000..9cd8daa2b5 --- /dev/null +++ b/adreno_gen8.h @@ -0,0 +1,490 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _ADRENO_GEN8_H_ +#define _ADRENO_GEN8_H_ + +#include + +#include "adreno_gen8_gmu.h" +#include "gen8_reg.h" + +/* Forward struct declaration */ +struct gen8_snapshot_block_list; + +extern const struct adreno_power_ops gen8_gmu_power_ops; +extern const struct adreno_power_ops gen8_hwsched_power_ops; +extern const struct adreno_perfcounters adreno_gen8_perfcounters; + +struct gen8_gpudev { + struct adreno_gpudev base; + int (*hfi_probe)(struct adreno_device *adreno_dev); + void (*hfi_remove)(struct adreno_device *adreno_dev); + void (*handle_watchdog)(struct adreno_device *adreno_dev); +}; + +extern const struct gen8_gpudev adreno_gen8_gmu_gpudev; +extern const struct gen8_gpudev adreno_gen8_hwsched_gpudev; + +/** + * struct gen8_device - Container for the gen8_device + */ +struct gen8_device { + /** @gmu: Container for the gen8 GMU device */ + struct gen8_gmu_device gmu; + /** @adreno_dev: Container for the generic adreno device */ + struct adreno_device adreno_dev; + /** @aperture: The last value that the host aperture register was programmed to */ + u32 aperture; +}; + +/** + * struct gen8_protected_regs - container for a protect register span + */ +struct gen8_protected_regs { + /** @reg: Physical protected mode register to write to */ + u32 reg; + /** @start: Dword offset of the starting register in the range */ + u32 start; + /** @end: Dword offset of the ending register in the range (inclusive) */ + u32 end; + /** + * @noaccess: 1 if the register should not be accessible from + * userspace, 0 if it can be read (but not written) + */ + u32 noaccess; +}; + +/** + * struct adreno_gen8_core - gen8 specific GPU core definitions + */ +struct adreno_gen8_core { + /** @base: Container for the generic GPU definitions */ + struct adreno_gpu_core base; + /** @gmu_fw_version: Minimum firmware version required to support this core */ + u32 gmu_fw_version; + /** @sqefw_name: Name of the SQE microcode file */ + const char *sqefw_name; + /** @aqefw_name: Name of the AQE microcode file */ + const char *aqefw_name; + /** @gmufw_name: Name of the GMU firmware file */ + const char *gmufw_name; + /** @zap_name: Name of the CPZ zap file */ + const char *zap_name; + /** @ao_hwcg: List of registers and values to write for HWCG in AO block */ + const struct kgsl_regmap_list *ao_hwcg; + /** @ao_hwcg_count: Number of registers in @ao_hwcg */ + u32 ao_hwcg_count; + /** @gbif: List of registers and values to write for GBIF */ + const struct kgsl_regmap_list *gbif; + /** @gbif_count: Number of registers in @gbif */ + u32 gbif_count; + /** @hang_detect_cycles: Hang detect counter timeout value */ + u32 hang_detect_cycles; + /** @protected_regs: Array of protected registers for the target */ + const struct gen8_protected_regs *protected_regs; + /** @ctxt_record_size: Size of the preemption record in bytes */ + u64 ctxt_record_size; + /** @highest_bank_bit: Highest bank bit value */ + u32 highest_bank_bit; + /** @gen8_snapshot_block_list: Device-specific blocks dumped in the snapshot */ + const struct gen8_snapshot_block_list *gen8_snapshot_block_list; + /** @gmu_hub_clk_freq: Gmu hub interface clock frequency */ + u64 gmu_hub_clk_freq; + /** + * @bcl_data: bit 0 contains response type for bcl alarms and bits 1:21 controls sid vals + * to configure throttle levels for bcl alarm levels 0-2. If sid vals are not set, + * gmu fw sets default throttle levels. + */ + u32 bcl_data; + /** @preempt_level: Preemption level valid ranges [0 to 2] */ + u32 preempt_level; + /** @qos_value: GPU qos value to set for each RB. */ + const u32 *qos_value; + /** @acv_perfmode_vote: ACV vote for GPU perfmode */ + u32 acv_perfmode_vote; + /** @fast_bus_hint: Whether or not to increase IB vote on high ddr stall */ + bool fast_bus_hint; +}; + +/** + * struct gen8_cp_preemption_record - CP context record for + * preemption. + * @magic: (00) Value at this offset must be equal to + * GEN8_CP_CTXRECORD_MAGIC_REF. + * @info: (04) Type of record. Written non-zero (usually) by CP. + * we must set to zero for all ringbuffers. + * @errno: (08) Error code. Initialize this to GEN8_CP_CTXRECORD_ERROR_NONE. + * CP will update to another value if a preemption error occurs. + * @data: (12) DATA field in YIELD and SET_MARKER packets. + * Written by CP when switching out. Not used on switch-in. Initialized to 0. + * @cntl: (16) RB_CNTL, saved and restored by CP. We must initialize this. + * @rptr: (20) RB_RPTR, saved and restored by CP. We must initialize this. + * @wptr: (24) RB_WPTR, saved and restored by CP. We must initialize this. + * @_pad28: (28) Reserved/padding. + * @rptr_addr: (32) RB_RPTR_ADDR_LO|HI saved and restored. We must initialize. + * rbase: (40) RB_BASE_LO|HI saved and restored. + * counter: (48) Pointer to preemption counter. + * @bv_rptr_addr: (56) BV_RB_RPTR_ADDR_LO|HI save and restored. We must initialize. + */ +struct gen8_cp_preemption_record { + u32 magic; + u32 info; + u32 errno; + u32 data; + u32 cntl; + u32 rptr; + u32 wptr; + u32 _pad28; + u64 rptr_addr; + u64 rbase; + u64 counter; + u64 bv_rptr_addr; +}; + +/** + * struct gen8_cp_smmu_info - CP preemption SMMU info. + * @magic: (00) The value at this offset must be equal to + * GEN8_CP_SMMU_INFO_MAGIC_REF + * @_pad4: (04) Reserved/padding + * @ttbr0: (08) Base address of the page table for the * incoming context + * @asid: (16) Address Space IDentifier (ASID) of the incoming context + * @context_idr: (20) Context Identification Register value + * @context_bank: (24) Which Context Bank in SMMU to update + */ +struct gen8_cp_smmu_info { + u32 magic; + u32 _pad4; + u64 ttbr0; + u32 asid; + u32 context_idr; + u32 context_bank; +}; + +#define GEN8_CP_SMMU_INFO_MAGIC_REF 0x241350d5UL + +#define GEN8_CP_CTXRECORD_MAGIC_REF 0xae399d6eUL +/* Size of each CP preemption record */ +#define GEN8_CP_CTXRECORD_SIZE_IN_BYTES (4192 * 1024) +/* Size of the user context record block (in bytes) */ +#define GEN8_CP_CTXRECORD_USER_RESTORE_SIZE (192 * 1024) +/* Size of the performance counter save/restore block (in bytes) */ +#define GEN8_CP_PERFCOUNTER_SAVE_RESTORE_SIZE (4 * 1024) + +#define GEN8_CP_RB_CNTL_DEFAULT \ + (FIELD_PREP(GENMASK(7, 0), ilog2(KGSL_RB_DWORDS >> 1)) | \ + FIELD_PREP(GENMASK(12, 8), ilog2(4))) + +/* Size of the CP_INIT pm4 stream in dwords */ +#define GEN8_CP_INIT_DWORDS 10 + +/* Size of the perf counter enable pm4 stream in dwords */ +#define GEN8_PERF_COUNTER_ENABLE_DWORDS 3 + +#define GEN8_INT_MASK \ + ((1 << GEN8_INT_AHBERROR) | \ + (1 << GEN8_INT_ATBASYNCFIFOOVERFLOW) | \ + (1 << GEN8_INT_GPCERROR) | \ + (1 << GEN8_INT_SWINTERRUPT) | \ + (1 << GEN8_INT_HWERROR) | \ + (1 << GEN8_INT_PM4CPINTERRUPT) | \ + (1 << GEN8_INT_RB_DONE_TS) | \ + (1 << GEN8_INT_CACHE_CLEAN_TS) | \ + (1 << GEN8_INT_ATBBUSOVERFLOW) | \ + (1 << GEN8_INT_HANGDETECTINTERRUPT) | \ + (1 << GEN8_INT_OUTOFBOUNDACCESS) | \ + (1 << GEN8_INT_UCHETRAPINTERRUPT) | \ + (1 << GEN8_INT_TSBWRITEERROR) | \ + (1 << GEN8_INT_SWFUSEVIOLATION)) + +#define GEN8_HWSCHED_INT_MASK \ + ((1 << GEN8_INT_AHBERROR) | \ + (1 << GEN8_INT_ATBASYNCFIFOOVERFLOW) | \ + (1 << GEN8_INT_ATBBUSOVERFLOW) | \ + (1 << GEN8_INT_OUTOFBOUNDACCESS) | \ + (1 << GEN8_INT_UCHETRAPINTERRUPT)) + +/** + * to_gen8_core - return the gen8 specific GPU core struct + * @adreno_dev: An Adreno GPU device handle + * + * Returns: + * A pointer to the gen8 specific GPU core struct + */ +static inline const struct adreno_gen8_core * +to_gen8_core(struct adreno_device *adreno_dev) +{ + const struct adreno_gpu_core *core = adreno_dev->gpucore; + + return container_of(core, struct adreno_gen8_core, base); +} + +/** + * gen8_is_smmu_stalled() - Check whether smmu is stalled or not + * @device: Pointer to KGSL device + * + * Return - True if smmu is stalled or false otherwise + */ +static inline bool gen8_is_smmu_stalled(struct kgsl_device *device) +{ + + /* FIXME: Implment SW smmu stall check */ + return false; +} + +/* Preemption functions */ +void gen8_preemption_trigger(struct adreno_device *adreno_dev, bool atomic); +void gen8_preemption_schedule(struct adreno_device *adreno_dev); +void gen8_preemption_start(struct adreno_device *adreno_dev); +int gen8_preemption_init(struct adreno_device *adreno_dev); + +u32 gen8_preemption_post_ibsubmit(struct adreno_device *adreno_dev, + u32 *cmds); +u32 gen8_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 *cmds); + +u32 gen8_set_marker(u32 *cmds, enum adreno_cp_marker_type type); + +void gen8_preemption_callback(struct adreno_device *adreno_dev, int bit); + +int gen8_preemption_context_init(struct kgsl_context *context); + +void gen8_preemption_context_destroy(struct kgsl_context *context); + +void gen8_preemption_prepare_postamble(struct adreno_device *adreno_dev); + +void gen8_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); +void gen8_crashdump_init(struct adreno_device *adreno_dev); + +/** + * gen8_snapshot_external_core_regs - Dump external registers into snapshot + * @device: Pointer to KGSL device + * @snapshot: Pointer to the snapshot + * + * Dump external core registers like GPUCC, CPR into GPU snapshot. + */ +void gen8_snapshot_external_core_regs(struct kgsl_device *device, + struct kgsl_snapshot *snapshot); + +/** + * gen8_start - Program gen8 registers + * @adreno_dev: An Adreno GPU handle + * + * This function does all gen8 register programming every + * time we boot the gpu + * + * Return: 0 on success or negative on failure + */ +int gen8_start(struct adreno_device *adreno_dev); + +/** + * gen8_init - Initialize gen8 resources + * @adreno_dev: An Adreno GPU handle + * + * This function does gen8 specific one time initialization + * and is invoked when the very first client opens a + * kgsl instance + * + * Return: Zero on success and negative error on failure + */ +int gen8_init(struct adreno_device *adreno_dev); + +/** + * gen8_get_gpu_feature_info - Get hardware supported feature info + * @adreno_dev: Pointer to the adreno device + * + * Get HW supported feature info and update sofware feature configuration + */ +void gen8_get_gpu_feature_info(struct adreno_device *adreno_dev); + +/** + * gen8_rb_start - Gen8 specific ringbuffer setup + * @adreno_dev: An Adreno GPU handle + * + * This function does gen8 specific ringbuffer setup and + * attempts to submit CP INIT and bring GPU out of secure mode + * + * Return: Zero on success and negative error on failure + */ +int gen8_rb_start(struct adreno_device *adreno_dev); + +/** + * gen8_microcode_read - Get the cp microcode from the filesystem + * @adreno_dev: An Adreno GPU handle + * + * This function gets the firmware from filesystem and sets up + * the micorocode global buffer + * + * Return: Zero on success and negative error on failure + */ +int gen8_microcode_read(struct adreno_device *adreno_dev); + +/** + * gen8_probe_common - Probe common gen8 resources + * @pdev: Pointer to the platform device + * @adreno_dev: Pointer to the adreno device + * @chipid: Chipid of the target + * @gpucore: Pointer to the gpucore strucure + * + * This function sets up the gen8 resources common across all + * gen8 targets + */ +int gen8_probe_common(struct platform_device *pdev, + struct adreno_device *adreno_dev, u32 chipid, + const struct adreno_gpu_core *gpucore); + +/** + * gen8_hw_isidle - Check whether gen8 gpu is idle or not + * @adreno_dev: An Adreno GPU handle + * + * Return: True if gpu is idle, otherwise false + */ +bool gen8_hw_isidle(struct adreno_device *adreno_dev); + +/** + * gen8_spin_idle_debug - Debug logging used when gpu fails to idle + * @adreno_dev: An Adreno GPU handle + * + * This function logs interesting registers and triggers a snapshot + */ +void gen8_spin_idle_debug(struct adreno_device *adreno_dev, + const char *str); + +/** + * gen8_perfcounter_update - Update the IFPC perfcounter list + * @adreno_dev: An Adreno GPU handle + * @reg: Perfcounter reg struct to add/remove to the list + * @update_reg: true if the perfcounter needs to be programmed by the CPU + * @pipe: pipe id for CP aperture control + * + * Return: 0 on success or -EBUSY if the lock couldn't be taken + */ +int gen8_perfcounter_update(struct adreno_device *adreno_dev, + struct adreno_perfcount_register *reg, bool update_reg, u32 pipe); + +/* + * gen8_ringbuffer_init - Initialize the ringbuffers + * @adreno_dev: An Adreno GPU handle + * + * Initialize the ringbuffer(s) for a5xx. + * Return: 0 on success or negative on failure + */ +int gen8_ringbuffer_init(struct adreno_device *adreno_dev); + +/** + * gen8_ringbuffer_submitcmd - Submit a user command to the ringbuffer + * @adreno_dev: An Adreno GPU handle + * @cmdobj: Pointer to a user command object + * @flags: Internal submit flags + * @time: Optional pointer to a adreno_submit_time container + * + * Return: 0 on success or negative on failure + */ +int gen8_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time); + +/** + * gen8_ringbuffer_submit - Submit a command to the ringbuffer + * @rb: Ringbuffer pointer + * @time: Optional pointer to a adreno_submit_time container + * + * Return: 0 on success or negative on failure + */ +int gen8_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time); + +/** + * gen8_fenced_write - Write to a fenced register + * @adreno_dev: An Adreno GPU handle + * @offset: Register offset + * @value: Value to write + * @mask: Expected FENCE_STATUS for successful write + * + * Return: 0 on success or negative on failure + */ +int gen8_fenced_write(struct adreno_device *adreno_dev, u32 offset, + u32 value, u32 mask); + +/** + * gen87ringbuffer_addcmds - Wrap and submit commands to the ringbuffer + * @adreno_dev: An Adreno GPU handle + * @rb: Ringbuffer pointer + * @drawctxt: Draw context submitting the commands + * @flags: Submission flags + * @in: Input buffer to write to ringbuffer + * @dwords: Dword length of @in + * @timestamp: Draw context timestamp for the submission + * @time: Optional pointer to a adreno_submit_time container + * + * Return: 0 on success or negative on failure + */ +int gen8_ringbuffer_addcmds(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 flags, u32 *in, u32 dwords, u32 timestamp, + struct adreno_submit_time *time); + +/** + * gen8_cp_init_cmds - Create the CP_INIT commands + * @adreno_dev: An Adreno GPU handle + * @cmd: Buffer to write the CP_INIT commands into + */ +void gen8_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds); + +/** + * gen8_gmu_hfi_probe - Probe Gen8 HFI specific data + * @adreno_dev: An Adreno GPU handle + * + * Return: 0 on success or negative on failure + */ +int gen8_gmu_hfi_probe(struct adreno_device *adreno_dev); + +static inline const struct gen8_gpudev * +to_gen8_gpudev(const struct adreno_gpudev *gpudev) +{ + return container_of(gpudev, struct gen8_gpudev, base); +} + +/** + * gen8_reset_preempt_records - Reset the preemption buffers + * @adreno_dev: Handle to the adreno device + * + * Reset the preemption records at the time of hard reset + */ +void gen8_reset_preempt_records(struct adreno_device *adreno_dev); + +/** + * gen8_rdpm_mx_freq_update - Update the mx frequency + * @gmu: An Adreno GMU handle + * @freq: Frequency in KHz + * + * This function communicates GPU mx frequency(in Mhz) changes to rdpm. + */ +void gen8_rdpm_mx_freq_update(struct gen8_gmu_device *gmu, u32 freq); + +/** + * gen8_rdpm_cx_freq_update - Update the cx frequency + * @gmu: An Adreno GMU handle + * @freq: Frequency in KHz + * + * This function communicates GPU cx frequency(in Mhz) changes to rdpm. + */ +void gen8_rdpm_cx_freq_update(struct gen8_gmu_device *gmu, u32 freq); + +/** + * gen8_scm_gpu_init_cx_regs - Program gpu regs for feature support + * @adreno_dev: Handle to the adreno device + * + * Program gpu regs for feature support. Scm call for the same + * is added from kernel version 6.0 onwards. + * + * Return: 0 on success or negative on failure + */ +int gen8_scm_gpu_init_cx_regs(struct adreno_device *adreno_dev); + +#endif diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c new file mode 100644 index 0000000000..fcea42776e --- /dev/null +++ b/adreno_gen8_gmu.c @@ -0,0 +1,3425 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_gen8.h" +#include "adreno_trace.h" +#include "kgsl_bus.h" +#include "kgsl_device.h" +#include "kgsl_trace.h" +#include "kgsl_util.h" + +static struct gmu_vma_entry gen8_gmu_vma[] = { + [GMU_ITCM] = { + .start = 0x00000000, + .size = SZ_16K, + }, + [GMU_CACHE] = { + .start = SZ_16K, + .size = (SZ_16M - SZ_16K), + .next_va = SZ_16K, + }, + [GMU_DTCM] = { + .start = SZ_256M + SZ_16K, + .size = SZ_16K, + }, + [GMU_DCACHE] = { + .start = 0x0, + .size = 0x0, + }, + [GMU_NONCACHED_KERNEL] = { + .start = 0x60000000, + .size = SZ_512M, + .next_va = 0x60000000, + }, + [GMU_NONCACHED_KERNEL_EXTENDED] = { + .start = 0xc0000000, + .size = SZ_512M, + .next_va = 0xc0000000, + }, +}; + +static ssize_t log_stream_enable_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, log_kobj); + bool val; + int ret; + + ret = kstrtobool(buf, &val); + if (ret) + return ret; + + gmu->log_stream_enable = val; + adreno_mark_for_coldboot(gen8_gmu_to_adreno(gmu)); + return count; +} + +static ssize_t log_stream_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, log_kobj); + + return scnprintf(buf, PAGE_SIZE, "%d\n", gmu->log_stream_enable); +} + +static ssize_t log_group_mask_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, log_kobj); + u32 val; + int ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + gmu->log_group_mask = val; + adreno_mark_for_coldboot(gen8_gmu_to_adreno(gmu)); + return count; +} + +static ssize_t log_group_mask_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, log_kobj); + + return scnprintf(buf, PAGE_SIZE, "%x\n", gmu->log_group_mask); +} + +static struct kobj_attribute log_stream_enable_attr = + __ATTR(log_stream_enable, 0644, log_stream_enable_show, log_stream_enable_store); + +static struct kobj_attribute log_group_mask_attr = + __ATTR(log_group_mask, 0644, log_group_mask_show, log_group_mask_store); + +static struct attribute *log_attrs[] = { + &log_stream_enable_attr.attr, + &log_group_mask_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(log); + +static struct kobj_type log_kobj_type = { + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = log_groups, +}; + +static ssize_t stats_enable_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, stats_kobj); + bool val; + int ret; + + ret = kstrtobool(buf, &val); + if (ret) + return ret; + + gmu->stats_enable = val; + adreno_mark_for_coldboot(gen8_gmu_to_adreno(gmu)); + return count; +} + +static ssize_t stats_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, stats_kobj); + + return scnprintf(buf, PAGE_SIZE, "%d\n", gmu->stats_enable); +} + +static ssize_t stats_mask_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, stats_kobj); + u32 val; + int ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + gmu->stats_mask = val; + adreno_mark_for_coldboot(gen8_gmu_to_adreno(gmu)); + return count; +} + +static ssize_t stats_mask_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, stats_kobj); + + return scnprintf(buf, PAGE_SIZE, "%x\n", gmu->stats_mask); +} + +static ssize_t stats_interval_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, stats_kobj); + u32 val; + int ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + gmu->stats_interval = val; + adreno_mark_for_coldboot(gen8_gmu_to_adreno(gmu)); + return count; +} + +static ssize_t stats_interval_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, stats_kobj); + + return scnprintf(buf, PAGE_SIZE, "%x\n", gmu->stats_interval); +} + +static struct kobj_attribute stats_enable_attr = + __ATTR(stats_enable, 0644, stats_enable_show, stats_enable_store); + +static struct kobj_attribute stats_mask_attr = + __ATTR(stats_mask, 0644, stats_mask_show, stats_mask_store); + +static struct kobj_attribute stats_interval_attr = + __ATTR(stats_interval, 0644, stats_interval_show, stats_interval_store); + +static struct attribute *stats_attrs[] = { + &stats_enable_attr.attr, + &stats_mask_attr.attr, + &stats_interval_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(stats); + +static struct kobj_type stats_kobj_type = { + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = stats_groups, +}; + +static int gen8_timed_poll_check_rscc(struct gen8_gmu_device *gmu, + u32 offset, u32 expected_ret, + u32 timeout, u32 mask) +{ + u32 value; + + return readl_poll_timeout(gmu->rscc_virt + (offset << 2), value, + (value & mask) == expected_ret, 100, timeout * 1000); +} + +struct gen8_gmu_device *to_gen8_gmu(struct adreno_device *adreno_dev) +{ + struct gen8_device *gen8_dev = container_of(adreno_dev, + struct gen8_device, adreno_dev); + + return &gen8_dev->gmu; +} + +struct adreno_device *gen8_gmu_to_adreno(struct gen8_gmu_device *gmu) +{ + struct gen8_device *gen8_dev = + container_of(gmu, struct gen8_device, gmu); + + return &gen8_dev->adreno_dev; +} + +#define RSC_CMD_OFFSET 2 + +static void _regwrite(void __iomem *regbase, + u32 offsetwords, u32 value) +{ + void __iomem *reg; + + reg = regbase + (offsetwords << 2); + __raw_writel(value, reg); +} + +void gen8_load_rsc_ucode(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + void __iomem *rscc = gmu->rscc_virt; + u32 seq_offset = GEN8_RSCC_SEQ_MEM_0_DRV0; + + /* Disable SDE clock gating */ + _regwrite(rscc, GEN8_GPU_RSCC_RSC_STATUS0_DRV0, BIT(24)); + + /* Setup RSC PDC handshake for sleep and wakeup */ + _regwrite(rscc, GEN8_RSCC_PDC_SLAVE_ID_DRV0, 1); + _regwrite(rscc, GEN8_RSCC_HIDDEN_TCS_CMD0_DATA, 0); + _regwrite(rscc, GEN8_RSCC_HIDDEN_TCS_CMD0_ADDR, 0); + _regwrite(rscc, GEN8_RSCC_HIDDEN_TCS_CMD0_DATA + RSC_CMD_OFFSET, 0); + _regwrite(rscc, GEN8_RSCC_HIDDEN_TCS_CMD0_ADDR + RSC_CMD_OFFSET, 0); + _regwrite(rscc, GEN8_RSCC_HIDDEN_TCS_CMD0_DATA + RSC_CMD_OFFSET * 2, 0x80000021); + _regwrite(rscc, GEN8_RSCC_HIDDEN_TCS_CMD0_ADDR + RSC_CMD_OFFSET * 2, 0); + + /* Load RSC sequencer uCode for sleep and wakeup */ + _regwrite(rscc, seq_offset, 0xeaaae5a0); + _regwrite(rscc, seq_offset + 1, 0xe1a1ebab); + _regwrite(rscc, seq_offset + 2, 0xa2e0a581); + _regwrite(rscc, seq_offset + 3, 0xecac82e2); + _regwrite(rscc, seq_offset + 4, 0x0020edad); +} + +int gen8_load_pdc_ucode(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct resource *res_cfg; + void __iomem *cfg = NULL; + + res_cfg = platform_get_resource_byname(gmu->pdev, IORESOURCE_MEM, + "gmu_pdc"); + if (res_cfg) + cfg = ioremap(res_cfg->start, resource_size(res_cfg)); + + if (!cfg) { + dev_err(&gmu->pdev->dev, "Failed to map PDC CFG\n"); + return -ENODEV; + } + + /* Setup GPU PDC */ + _regwrite(cfg, GEN8_PDC_GPU_ENABLE_PDC, 0x80000001); + + iounmap(cfg); + + return 0; +} + +/* Configure and enable GMU low power mode */ +static void gen8_gmu_power_config(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Disable GMU WB/RB buffer and caches at boot */ + gmu_core_regwrite(device, GEN8_GMU_SYS_BUS_CONFIG, 0x1); + gmu_core_regwrite(device, GEN8_GMU_ICACHE_CONFIG, 0x1); + gmu_core_regwrite(device, GEN8_GMU_DCACHE_CONFIG, 0x1); +} + +static void gmu_ao_sync_event(struct adreno_device *adreno_dev) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned long flags; + u64 ticks; + + /* + * Get the GMU always on ticks and log it in a trace message. This + * will be used to map GMU ticks to ftrace time. Do this in atomic + * context to ensure nothing happens between reading the always + * on ticks and doing the trace. + */ + + local_irq_save(flags); + + ticks = gpudev->read_alwayson(adreno_dev); + + trace_gmu_ao_sync(ticks); + + local_irq_restore(flags); +} + +int gen8_gmu_enable_gdsc(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; + + ret = wait_for_completion_timeout(&gmu->gdsc_gate, msecs_to_jiffies(5000)); + if (!ret) { + dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); + /* Dump the cx regulator consumer list */ + qcom_clk_dump(NULL, gmu->cx_gdsc, false); + } + + ret = regulator_enable(gmu->cx_gdsc); + if (ret) + dev_err(&gmu->pdev->dev, + "Failed to enable GMU CX gdsc, error %d\n", ret); + + kgsl_mmu_send_tlb_hint(&device->mmu, false); + clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); + return ret; +} + +void gen8_gmu_disable_gdsc(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_mmu_send_tlb_hint(&device->mmu, true); + reinit_completion(&gmu->gdsc_gate); + set_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); + regulator_disable(gmu->cx_gdsc); +} + +int gen8_gmu_device_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + gmu_ao_sync_event(adreno_dev); + + /* Bring GMU out of reset */ + gmu_core_regwrite(device, GEN8_GMUCX_CM3_SYSRESET, 0); + + /* Make sure the write is posted before moving ahead */ + wmb(); + + if (gmu_core_timed_poll_check(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, + BIT(8), 100, GENMASK(8, 0))) { + dev_err(&gmu->pdev->dev, "GMU failed to come out of reset\n"); + gmu_core_fault_snapshot(device); + return -ETIMEDOUT; + } + + return 0; +} + +/* + * gen8_gmu_hfi_start() - Write registers and start HFI. + * @device: Pointer to KGSL device + */ +int gen8_gmu_hfi_start(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gmu_core_regwrite(device, GEN8_GMUCX_HFI_CTRL_INIT, 1); + + if (gmu_core_timed_poll_check(device, GEN8_GMUCX_HFI_CTRL_STATUS, + BIT(0), 100, BIT(0))) { + dev_err(&gmu->pdev->dev, "GMU HFI init failed\n"); + gmu_core_fault_snapshot(device); + return -ETIMEDOUT; + } + + return 0; +} + +int gen8_rscc_wakeup_sequence(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct device *dev = &gmu->pdev->dev; + + /* Skip wakeup sequence if we didn't do the sleep sequence */ + if (!test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags)) + return 0; + + /* RSC wake sequence */ + gmu_core_regwrite(device, GEN8_GMUAO_RSCC_CONTROL_REQ, BIT(1)); + + /* Write request before polling */ + wmb(); + + if (gmu_core_timed_poll_check(device, GEN8_GMUAO_RSCC_CONTROL_ACK, + BIT(1), 100, BIT(1))) { + dev_err(dev, "Failed to do GPU RSC power on\n"); + return -ETIMEDOUT; + } + + if (gen8_timed_poll_check_rscc(gmu, GEN8_RSCC_SEQ_BUSY_DRV0, + 0x0, 100, UINT_MAX)) { + dev_err(dev, "GPU RSC sequence stuck in waking up GPU\n"); + return -ETIMEDOUT; + } + + gmu_core_regwrite(device, GEN8_GMUAO_RSCC_CONTROL_REQ, 0); + + clear_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags); + + return 0; +} + +int gen8_rscc_sleep_sequence(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; + + if (!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + return 0; + + if (test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags)) + return 0; + + gmu_core_regwrite(device, GEN8_GMUCX_CM3_SYSRESET, 1); + /* Make sure M3 is in reset before going on */ + wmb(); + + gmu_core_regread(device, GEN8_GMUCX_GENERAL_9, &gmu->log_wptr_retention); + + gmu_core_regwrite(device, GEN8_GMUAO_RSCC_CONTROL_REQ, BIT(0)); + /* Make sure the request completes before continuing */ + wmb(); + + ret = gen8_timed_poll_check_rscc(gmu, GEN8_GPU_RSCC_RSC_STATUS0_DRV0, + BIT(16), 100, BIT(16)); + if (ret) { + dev_err(&gmu->pdev->dev, "GPU RSC power off fail\n"); + return -ETIMEDOUT; + } + + gmu_core_regwrite(device, GEN8_GMUAO_RSCC_CONTROL_REQ, 0); + + if (adreno_dev->lm_enabled) + gmu_core_regwrite(device, GEN8_GMUAO_AO_SPARE_CNTL, 0); + + set_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags); + + return 0; +} + +static struct kgsl_memdesc *find_gmu_memdesc(struct gen8_gmu_device *gmu, + u32 addr, u32 size) +{ + int i; + + for (i = 0; i < gmu->global_entries; i++) { + struct kgsl_memdesc *md = &gmu->gmu_globals[i]; + + if ((addr >= md->gmuaddr) && + (((addr + size) <= (md->gmuaddr + md->size)))) + return md; + } + + return NULL; +} + +static int find_vma_block(struct gen8_gmu_device *gmu, u32 addr, u32 size) +{ + int i; + + for (i = 0; i < GMU_MEM_TYPE_MAX; i++) { + struct gmu_vma_entry *vma = &gmu->vma[i]; + + if ((addr >= vma->start) && + ((addr + size) <= (vma->start + vma->size))) + return i; + } + + return -ENOENT; +} + +static void load_tcm(struct adreno_device *adreno_dev, const u8 *src, + u32 tcm_start, u32 base, const struct gmu_block_header *blk) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 tcm_offset = tcm_start + ((blk->addr - base)/sizeof(u32)); + + kgsl_regmap_bulk_write(&device->regmap, tcm_offset, src, + blk->size >> 2); +} + +int gen8_gmu_load_fw(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + const u8 *fw = (const u8 *)gmu->fw_image->data; + + while (fw < gmu->fw_image->data + gmu->fw_image->size) { + const struct gmu_block_header *blk = + (const struct gmu_block_header *)fw; + int id; + + fw += sizeof(*blk); + + /* Don't deal with zero size blocks */ + if (blk->size == 0) + continue; + + id = find_vma_block(gmu, blk->addr, blk->size); + + if (id < 0) { + dev_err(&gmu->pdev->dev, + "Unknown block in GMU FW addr:0x%x size:0x%x\n", + blk->addr, blk->size); + return -EINVAL; + } + + if (id == GMU_ITCM) { + load_tcm(adreno_dev, fw, + GEN8_GMU_CM3_ITCM_START, + gmu->vma[GMU_ITCM].start, blk); + } else if (id == GMU_DTCM) { + load_tcm(adreno_dev, fw, + GEN8_GMU_CM3_DTCM_START, + gmu->vma[GMU_DTCM].start, blk); + } else { + struct kgsl_memdesc *md = + find_gmu_memdesc(gmu, blk->addr, blk->size); + + if (!md) { + dev_err(&gmu->pdev->dev, + "No backing memory for GMU FW block addr:0x%x size:0x%x\n", + blk->addr, blk->size); + return -EINVAL; + } + + memcpy(md->hostptr + (blk->addr - md->gmuaddr), fw, + blk->size); + } + + fw += blk->size; + } + + /* Proceed only after the FW is written */ + wmb(); + return 0; +} + +static const char *oob_to_str(enum oob_request req) +{ + switch (req) { + case oob_gpu: + return "oob_gpu"; + case oob_perfcntr: + return "oob_perfcntr"; + case oob_boot_slumber: + return "oob_boot_slumber"; + case oob_dcvs: + return "oob_dcvs"; + default: + return "unknown"; + } +} + +static void trigger_reset_recovery(struct adreno_device *adreno_dev, + enum oob_request req) +{ + /* + * Trigger recovery for perfcounter oob only since only + * perfcounter oob can happen alongside an actively rendering gpu. + */ + if (req != oob_perfcntr) + return; + + if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->fault) + adreno_dev->dispatch_ops->fault(adreno_dev, + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); +} + +int gen8_gmu_oob_set(struct kgsl_device *device, + enum oob_request req) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret = 0; + int set, check; + + if (req == oob_perfcntr && gmu->num_oob_perfcntr++) + return 0; + + if (req >= oob_boot_slumber) { + dev_err(&gmu->pdev->dev, + "Unsupported OOB request %s\n", + oob_to_str(req)); + return -EINVAL; + } + + set = BIT(30 - req * 2); + check = BIT(31 - req); + + gmu_core_regwrite(device, GEN8_GMUCX_HOST2GMU_INTR_SET, set); + + if (gmu_core_timed_poll_check(device, GEN8_GMUCX_GMU2HOST_INTR_INFO, check, + 100, check)) { + if (req == oob_perfcntr) + gmu->num_oob_perfcntr--; + gmu_core_fault_snapshot(device); + ret = -ETIMEDOUT; + WARN(1, "OOB request %s timed out\n", oob_to_str(req)); + trigger_reset_recovery(adreno_dev, req); + } + + gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, check); + + trace_kgsl_gmu_oob_set(set); + return ret; +} + +void gen8_gmu_oob_clear(struct kgsl_device *device, + enum oob_request req) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int clear = BIT(31 - req * 2); + + if (req == oob_perfcntr && --gmu->num_oob_perfcntr) + return; + + if (req >= oob_boot_slumber) { + dev_err(&gmu->pdev->dev, "Unsupported OOB clear %s\n", + oob_to_str(req)); + return; + } + + gmu_core_regwrite(device, GEN8_GMUCX_HOST2GMU_INTR_SET, clear); + trace_kgsl_gmu_oob_clear(clear); +} + +void gen8_gmu_irq_enable(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hfi *hfi = &gmu->hfi; + + /* Clear pending IRQs and Unmask needed IRQs */ + gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, UINT_MAX); + gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_CLR, UINT_MAX); + + gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_MASK, + (u32)~HFI_IRQ_MASK); + gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK, + (u32)~GMU_AO_INT_MASK); + + /* Enable all IRQs on host */ + enable_irq(hfi->irq); + enable_irq(gmu->irq); +} + +void gen8_gmu_irq_disable(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hfi *hfi = &gmu->hfi; + + /* Disable all IRQs on host */ + disable_irq(gmu->irq); + disable_irq(hfi->irq); + + /* Mask all IRQs and clear pending IRQs */ + gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_MASK, UINT_MAX); + gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK, UINT_MAX); + + gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, UINT_MAX); + gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_CLR, UINT_MAX); +} + +static int gen8_gmu_hfi_start_msg(struct adreno_device *adreno_dev) +{ + struct hfi_start_cmd req; + int ret; + + ret = CMD_MSG_HDR(req, H2F_MSG_START); + if (ret) + return ret; + + return gen8_hfi_send_generic_req(adreno_dev, &req, sizeof(req)); +} + +static u32 gen8_rscc_tcsm_drv0_status_reglist[] = { + GEN8_RSCC_TCS0_DRV0_STATUS, + GEN8_RSCC_TCS1_DRV0_STATUS, + GEN8_RSCC_TCS2_DRV0_STATUS, + GEN8_RSCC_TCS3_DRV0_STATUS, + GEN8_RSCC_TCS4_DRV0_STATUS, + GEN8_RSCC_TCS5_DRV0_STATUS, + GEN8_RSCC_TCS6_DRV0_STATUS, + GEN8_RSCC_TCS7_DRV0_STATUS, + GEN8_RSCC_TCS8_DRV0_STATUS, + GEN8_RSCC_TCS9_DRV0_STATUS, +}; + +static int gen8_complete_rpmh_votes(struct gen8_gmu_device *gmu, + u32 timeout) +{ + int i, ret = 0; + + for (i = 0; i < ARRAY_SIZE(gen8_rscc_tcsm_drv0_status_reglist); i++) + ret |= gen8_timed_poll_check_rscc(gmu, + gen8_rscc_tcsm_drv0_status_reglist[i], BIT(0), timeout, + BIT(0)); + + if (ret) + dev_err(&gmu->pdev->dev, "RPMH votes timedout: %d\n", ret); + + return ret; +} + +#define GX_GDSC_POWER_OFF BIT(0) +#define GX_CLK_OFF BIT(1) +#define is_on(val) (!(val & (GX_GDSC_POWER_OFF | GX_CLK_OFF))) + +bool gen8_gmu_gx_is_on(struct adreno_device *adreno_dev) +{ + u32 val; + + gmu_core_regread(KGSL_DEVICE(adreno_dev), + GEN8_GMUCX_GFX_PWR_CLK_STATUS, &val); + return is_on(val); +} + +static const char *idle_level_name(int level) +{ + if (level == GPU_HW_ACTIVE) + return "GPU_HW_ACTIVE"; + else if (level == GPU_HW_IFPC) + return "GPU_HW_IFPC"; + + return "(Unknown)"; +} + +int gen8_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u32 reg, reg1, reg2, reg3, reg4, reg5,/* reg6,*/ reg7, reg8; + unsigned long t; + u64 ts1, ts2; + + ts1 = gpudev->read_alwayson(adreno_dev); + + t = jiffies + msecs_to_jiffies(100); + do { + gmu_core_regread(device, + GEN8_GMUCX_RPMH_POWER_STATE, ®); + gmu_core_regread(device, GEN8_GMUCX_GFX_PWR_CLK_STATUS, ®1); + + /* + * Check that we are at lowest level. If lowest level is IFPC + * double check that GFX clock is off. + */ + if (gmu->idle_level == reg) + if (!(gmu->idle_level == GPU_HW_IFPC && is_on(reg1))) + return 0; + + /* Wait 100us to reduce unnecessary AHB bus traffic */ + usleep_range(10, 100); + } while (!time_after(jiffies, t)); + + /* Check one last time */ + gmu_core_regread(device, GEN8_GMUCX_RPMH_POWER_STATE, ®); + gmu_core_regread(device, GEN8_GMUCX_GFX_PWR_CLK_STATUS, ®1); + + /* + * Check that we are at lowest level. If lowest level is IFPC + * double check that GFX clock is off. + */ + if (gmu->idle_level == reg) + if (!(gmu->idle_level == GPU_HW_IFPC && is_on(reg1))) + return 0; + + ts2 = gpudev->read_alwayson(adreno_dev); + + /* Collect abort data to help with debugging */ + gmu_core_regread(device, GEN8_GMUAO_GPU_CX_BUSY_STATUS, ®2); + gmu_core_regread(device, GEN8_GMUAO_RBBM_INT_UNMASKED_STATUS_SHADOW, ®3); + gmu_core_regread(device, GEN8_GMUCX_PWR_COL_KEEPALIVE, ®4); + gmu_core_regread(device, GEN8_GMUAO_AO_SPARE_CNTL, ®5); + + dev_err(&gmu->pdev->dev, + "----------------------[ GMU error ]----------------------\n"); + dev_err(&gmu->pdev->dev, + "Timeout waiting for lowest idle level %s\n", + idle_level_name(gmu->idle_level)); + dev_err(&gmu->pdev->dev, "Start: %llx (absolute ticks)\n", ts1); + dev_err(&gmu->pdev->dev, "Poll: %llx (ticks relative to start)\n", + ts2-ts1); + dev_err(&gmu->pdev->dev, + "RPMH_POWER_STATE=%x GFX_PWR_CLK_STATUS=%x\n", reg, reg1); + dev_err(&gmu->pdev->dev, "CX_BUSY_STATUS=%x\n", reg2); + dev_err(&gmu->pdev->dev, + "RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n", + reg3, reg4); + dev_err(&gmu->pdev->dev, "GMUAO_AO_SPARE_CNTL=%x\n", reg5); + + /* Access GX registers only when GX is ON */ + if (is_on(reg1)) { + //kgsl_regread(device, GEN8_CP_STATUS_1, ®6);// fEIXME + kgsl_regread(device, GEN8_CP_CP2GMU_STATUS, ®7); + kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_CNTL, ®8); + + //dev_err(&gmu->pdev->dev, "GEN8_CP_STATUS_1=%x\n", reg6); + dev_err(&gmu->pdev->dev, + "CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n", + reg7, reg8); + } + + WARN_ON(1); + gmu_core_fault_snapshot(device); + return -ETIMEDOUT; +} + +/* Bitmask for GPU idle status check */ +#define CXGXCPUBUSYIGNAHB BIT(30) +int gen8_gmu_wait_for_idle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u32 status2; + u64 ts1; + + ts1 = gpudev->read_alwayson(adreno_dev); + if (gmu_core_timed_poll_check(device, GEN8_GMUAO_GPU_CX_BUSY_STATUS, + 0, 100, CXGXCPUBUSYIGNAHB)) { + gmu_core_regread(device, + GEN8_GMUAO_GPU_CX_BUSY_STATUS2, &status2); + dev_err(&gmu->pdev->dev, + "GMU not idling: status2=0x%x %llx %llx\n", + status2, ts1, + gpudev->read_alwayson(adreno_dev)); + gmu_core_fault_snapshot(device); + return -ETIMEDOUT; + } + + return 0; +} + +int gen8_gmu_version_info(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + + /* GMU version info is at a fixed offset in the DTCM */ + gmu_core_regread(device, GEN8_GMU_CM3_DTCM_START + 0xff8, + &gmu->ver.core); + gmu_core_regread(device, GEN8_GMU_CM3_DTCM_START + 0xff9, + &gmu->ver.core_dev); + gmu_core_regread(device, GEN8_GMU_CM3_DTCM_START + 0xffa, + &gmu->ver.pwr); + gmu_core_regread(device, GEN8_GMU_CM3_DTCM_START + 0xffb, + &gmu->ver.pwr_dev); + gmu_core_regread(device, GEN8_GMU_CM3_DTCM_START + 0xffc, + &gmu->ver.hfi); + + /* Check if gmu fw version on device is compatible with kgsl driver */ + if (gmu->ver.core < gen8_core->gmu_fw_version) { + dev_err_once(&gmu->pdev->dev, + "GMU FW version 0x%x error (expected 0x%x)\n", + gmu->ver.core, gen8_core->gmu_fw_version); + return -EINVAL; + } + return 0; +} + +int gen8_gmu_itcm_shadow(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 i, *dest; + + if (gmu->itcm_shadow) + return 0; + + gmu->itcm_shadow = vzalloc(gmu->vma[GMU_ITCM].size); + if (!gmu->itcm_shadow) + return -ENOMEM; + + dest = (u32 *)gmu->itcm_shadow; + + for (i = 0; i < (gmu->vma[GMU_ITCM].size >> 2); i++) + gmu_core_regread(KGSL_DEVICE(adreno_dev), + GEN8_GMU_CM3_ITCM_START + i, dest++); + + return 0; +} + +void gen8_gmu_register_config(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 val; + + /* Clear any previously set cm3 fault */ + atomic_set(&gmu->cm3_fault, 0); + + /* Vote veto for FAL10 */ + gmu_core_regwrite(device, GEN8_GMUCX_CX_FALNEXT_INTF, 0x1); + gmu_core_regwrite(device, GEN8_GMUCX_CX_FAL_INTF, 0x1); + + /* Clear init result to make sure we are getting fresh value */ + gmu_core_regwrite(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, 0); + gmu_core_regwrite(device, GEN8_GMUCX_CM3_BOOT_CONFIG, 0x2); + + gmu_core_regwrite(device, GEN8_GMUCX_HFI_QTBL_ADDR, + gmu->hfi.hfi_mem->gmuaddr); + gmu_core_regwrite(device, GEN8_GMUCX_HFI_QTBL_INFO, 1); + + gmu_core_regwrite(device, GEN8_GMUAO_AHB_FENCE_RANGE_0, BIT(31) | + FIELD_PREP(GENMASK(30, 18), 0x32) | + FIELD_PREP(GENMASK(17, 0), 0x8a0)); + + /* + * Make sure that CM3 state is at reset value. Snapshot is changing + * NMI bit and if we boot up GMU with NMI bit set GMU will boot + * straight in to NMI handler without executing __main code + */ + gmu_core_regwrite(device, GEN8_GMUCX_CM3_CFG, 0x4052); + + /* Set up GBIF registers from the GPU core definition */ + kgsl_regmap_multi_write(&device->regmap, gen8_core->gbif, + gen8_core->gbif_count); + + /** + * We may have asserted gbif halt as part of reset sequence which may + * not get cleared if the gdsc was not reset. So clear it before + * attempting GMU boot. + */ + kgsl_regwrite(device, GEN8_GBIF_HALT, BIT(3)); + + /* Set the log wptr index */ + gmu_core_regwrite(device, GEN8_GMUCX_GENERAL_9, + gmu->log_wptr_retention); + + /* Pass chipid to GMU FW, must happen before starting GMU */ + gmu_core_regwrite(device, GEN8_GMUCX_GENERAL_10, + ADRENO_GMU_REV(ADRENO_GPUREV(adreno_dev))); + + /* Log size is encoded in (number of 4K units - 1) */ + val = (gmu->gmu_log->gmuaddr & GENMASK(31, 12)) | + ((GMU_LOG_SIZE/SZ_4K - 1) & GENMASK(7, 0)); + gmu_core_regwrite(device, GEN8_GMUCX_GENERAL_8, val); + + /* Configure power control and bring the GMU out of reset */ + gen8_gmu_power_config(adreno_dev); + + /* + * Enable BCL throttling - + * XOCLK1: countable: 0x13 (25% throttle) + * XOCLK2: countable: 0x17 (58% throttle) + * XOCLK3: countable: 0x19 (75% throttle) + * POWER_CONTROL_SELECT_0 controls counters 0 - 3, each selector + * is 8 bits wide. + */ + if (adreno_dev->bcl_enabled) + gmu_core_regrmw(device, GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0, + 0xffffff00, FIELD_PREP(GENMASK(31, 24), 0x19) | + FIELD_PREP(GENMASK(23, 16), 0x17) | + FIELD_PREP(GENMASK(15, 8), 0x13)); + +} + +static struct gmu_vma_node *find_va(struct gmu_vma_entry *vma, u32 addr, u32 size) +{ + struct rb_node *node = vma->vma_root.rb_node; + + while (node != NULL) { + struct gmu_vma_node *data = rb_entry(node, struct gmu_vma_node, node); + + if (addr + size <= data->va) + node = node->rb_left; + else if (addr >= data->va + data->size) + node = node->rb_right; + else + return data; + } + return NULL; +} + +/* Return true if VMA supports dynamic allocations */ +static bool vma_is_dynamic(int vma_id) +{ + /* Dynamic allocations are done in the GMU_NONCACHED_KERNEL space */ + return vma_id == GMU_NONCACHED_KERNEL; +} + +static int insert_va(struct gmu_vma_entry *vma, u32 addr, u32 size) +{ + struct rb_node **node, *parent = NULL; + struct gmu_vma_node *new = kzalloc(sizeof(*new), GFP_NOWAIT); + + if (new == NULL) + return -ENOMEM; + + new->va = addr; + new->size = size; + + node = &vma->vma_root.rb_node; + while (*node != NULL) { + struct gmu_vma_node *this; + + parent = *node; + this = rb_entry(parent, struct gmu_vma_node, node); + + if (addr + size <= this->va) + node = &parent->rb_left; + else if (addr >= this->va + this->size) + node = &parent->rb_right; + else { + kfree(new); + return -EEXIST; + } + } + + /* Add new node and rebalance tree */ + rb_link_node(&new->node, parent, node); + rb_insert_color(&new->node, &vma->vma_root); + + return 0; +} + +static u32 find_unmapped_va(struct gmu_vma_entry *vma, u32 size, u32 va_align) +{ + struct rb_node *node = rb_first(&vma->vma_root); + u32 cur = vma->start; + bool found = false; + + cur = ALIGN(cur, va_align); + + while (node) { + struct gmu_vma_node *data = rb_entry(node, struct gmu_vma_node, node); + + if (cur + size <= data->va) { + found = true; + break; + } + + cur = ALIGN(data->va + data->size, va_align); + node = rb_next(node); + } + + /* Do we have space after the last node? */ + if (!found && (cur + size <= vma->start + vma->size)) + found = true; + return found ? cur : 0; +} + +static int _map_gmu_dynamic(struct gen8_gmu_device *gmu, + struct kgsl_memdesc *md, + u32 addr, u32 vma_id, int attrs, u32 align) +{ + int ret; + struct gmu_vma_entry *vma = &gmu->vma[vma_id]; + struct gmu_vma_node *vma_node = NULL; + u32 size = ALIGN(md->size, hfi_get_gmu_sz_alignment(align)); + + spin_lock(&vma->lock); + if (!addr) { + /* + * We will end up with a hole (GMU VA range not backed by physical mapping) if + * the aligned size is greater than the size of the physical mapping + */ + addr = find_unmapped_va(vma, size, hfi_get_gmu_va_alignment(align)); + if (addr == 0) { + spin_unlock(&vma->lock); + dev_err(&gmu->pdev->dev, + "Insufficient VA space size: %x\n", size); + return -ENOMEM; + } + } + + ret = insert_va(vma, addr, size); + spin_unlock(&vma->lock); + if (ret < 0) { + dev_err(&gmu->pdev->dev, + "Could not insert va: %x size %x\n", addr, size); + return ret; + } + + ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); + if (!ret) { + md->gmuaddr = addr; + return 0; + } + + /* Failed to map to GMU */ + dev_err(&gmu->pdev->dev, + "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", + addr, md->size, ret); + + spin_lock(&vma->lock); + vma_node = find_va(vma, md->gmuaddr, size); + if (vma_node) + rb_erase(&vma_node->node, &vma->vma_root); + spin_unlock(&vma->lock); + kfree(vma_node); + + return ret; +} + +static int _map_gmu_static(struct gen8_gmu_device *gmu, + struct kgsl_memdesc *md, + u32 addr, u32 vma_id, int attrs, u32 align) +{ + int ret; + struct gmu_vma_entry *vma = &gmu->vma[vma_id]; + u32 size = ALIGN(md->size, hfi_get_gmu_sz_alignment(align)); + + if (!addr) + addr = ALIGN(vma->next_va, hfi_get_gmu_va_alignment(align)); + + ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); + if (ret) { + dev_err(&gmu->pdev->dev, + "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", + addr, md->size, ret); + return ret; + } + md->gmuaddr = addr; + /* + * We will end up with a hole (GMU VA range not backed by physical mapping) if the aligned + * size is greater than the size of the physical mapping + */ + vma->next_va = md->gmuaddr + size; + return 0; +} + +static int _map_gmu(struct gen8_gmu_device *gmu, + struct kgsl_memdesc *md, + u32 addr, u32 vma_id, int attrs, u32 align) +{ + return vma_is_dynamic(vma_id) ? + _map_gmu_dynamic(gmu, md, addr, vma_id, attrs, align) : + _map_gmu_static(gmu, md, addr, vma_id, attrs, align); +} + +int gen8_gmu_import_buffer(struct gen8_gmu_device *gmu, u32 vma_id, + struct kgsl_memdesc *md, u32 attrs, u32 align) +{ + return _map_gmu(gmu, md, 0, vma_id, attrs, align); +} + +struct kgsl_memdesc *gen8_reserve_gmu_kernel_block(struct gen8_gmu_device *gmu, + u32 addr, u32 size, u32 vma_id, u32 align) +{ + int ret; + struct kgsl_memdesc *md; + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); + int attrs = IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV; + + if (gmu->global_entries == ARRAY_SIZE(gmu->gmu_globals)) + return ERR_PTR(-ENOMEM); + + md = &gmu->gmu_globals[gmu->global_entries]; + + ret = kgsl_allocate_kernel(device, md, size, 0, KGSL_MEMDESC_SYSMEM); + if (ret) { + memset(md, 0x0, sizeof(*md)); + return ERR_PTR(-ENOMEM); + } + + ret = _map_gmu(gmu, md, addr, vma_id, attrs, align); + if (ret) { + kgsl_sharedmem_free(md); + memset(md, 0x0, sizeof(*md)); + return ERR_PTR(ret); + } + + gmu->global_entries++; + + return md; +} + +struct kgsl_memdesc *gen8_reserve_gmu_kernel_block_fixed(struct gen8_gmu_device *gmu, + u32 addr, u32 size, u32 vma_id, const char *resource, int attrs, u32 align) +{ + int ret; + struct kgsl_memdesc *md; + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); + + if (gmu->global_entries == ARRAY_SIZE(gmu->gmu_globals)) + return ERR_PTR(-ENOMEM); + + md = &gmu->gmu_globals[gmu->global_entries]; + + ret = kgsl_memdesc_init_fixed(device, gmu->pdev, resource, md); + if (ret) + return ERR_PTR(ret); + + ret = _map_gmu(gmu, md, addr, vma_id, attrs, align); + + sg_free_table(md->sgt); + kfree(md->sgt); + md->sgt = NULL; + + if (!ret) + gmu->global_entries++; + else { + dev_err(&gmu->pdev->dev, + "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", + addr, md->size, ret); + memset(md, 0x0, sizeof(*md)); + md = ERR_PTR(ret); + } + return md; +} + +int gen8_alloc_gmu_kernel_block(struct gen8_gmu_device *gmu, + struct kgsl_memdesc *md, u32 size, u32 vma_id, int attrs) +{ + int ret; + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); + + ret = kgsl_allocate_kernel(device, md, size, 0, KGSL_MEMDESC_SYSMEM); + if (ret) + return ret; + + ret = _map_gmu(gmu, md, 0, vma_id, attrs, 0); + if (ret) + kgsl_sharedmem_free(md); + + return ret; +} + +void gen8_free_gmu_block(struct gen8_gmu_device *gmu, struct kgsl_memdesc *md) +{ + int vma_id = find_vma_block(gmu, md->gmuaddr, md->size); + struct gmu_vma_entry *vma; + struct gmu_vma_node *vma_node; + + if ((vma_id < 0) || !vma_is_dynamic(vma_id)) + return; + + vma = &gmu->vma[vma_id]; + + /* + * Do not remove the vma node if we failed to unmap the entire buffer. This is because the + * iommu driver considers remapping an already mapped iova as fatal. + */ + if (md->size != iommu_unmap(gmu->domain, md->gmuaddr, md->size)) + goto free; + + spin_lock(&vma->lock); + vma_node = find_va(vma, md->gmuaddr, md->size); + if (vma_node) + rb_erase(&vma_node->node, &vma->vma_root); + spin_unlock(&vma->lock); + kfree(vma_node); +free: + kgsl_sharedmem_free(md); +} + +static int gen8_gmu_process_prealloc(struct gen8_gmu_device *gmu, + struct gmu_block_header *blk) +{ + struct kgsl_memdesc *md; + + int id = find_vma_block(gmu, blk->addr, blk->value); + + if (id < 0) { + dev_err(&gmu->pdev->dev, + "Invalid prealloc block addr: 0x%x value:%d\n", + blk->addr, blk->value); + return id; + } + + /* Nothing to do for TCM blocks or user uncached */ + if (id == GMU_ITCM || id == GMU_DTCM || id == GMU_NONCACHED_USER) + return 0; + + /* Check if the block is already allocated */ + md = find_gmu_memdesc(gmu, blk->addr, blk->value); + if (md != NULL) + return 0; + + md = gen8_reserve_gmu_kernel_block(gmu, blk->addr, blk->value, id, 0); + + return PTR_ERR_OR_ZERO(md); +} + +int gen8_gmu_parse_fw(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + struct gmu_block_header *blk; + int ret, offset = 0; + const char *gmufw_name = gen8_core->gmufw_name; + + /* + * If GMU fw already saved and verified, do nothing new. + * Skip only request_firmware and allow preallocation to + * ensure in scenario where GMU request firmware succeeded + * but preallocation fails, we don't return early without + * successful preallocations on next open call. + */ + if (!gmu->fw_image) { + + if (gen8_core->gmufw_name == NULL) + return -EINVAL; + + ret = request_firmware(&gmu->fw_image, gmufw_name, + &gmu->pdev->dev); + if (ret) { + dev_err(&gmu->pdev->dev, "request_firmware (%s) failed: %d\n", + gmufw_name, ret); + return ret; + } + } + + /* + * Zero payload fw blocks contain metadata and are + * guaranteed to precede fw load data. Parse the + * metadata blocks. + */ + while (offset < gmu->fw_image->size) { + blk = (struct gmu_block_header *)&gmu->fw_image->data[offset]; + + if (offset + sizeof(*blk) > gmu->fw_image->size) { + dev_err(&gmu->pdev->dev, "Invalid FW Block\n"); + return -EINVAL; + } + + /* Done with zero length blocks so return */ + if (blk->size) + break; + + offset += sizeof(*blk); + + if (blk->type == GMU_BLK_TYPE_PREALLOC_REQ || + blk->type == GMU_BLK_TYPE_PREALLOC_PERSIST_REQ) { + ret = gen8_gmu_process_prealloc(gmu, blk); + + if (ret) + return ret; + } + } + + return 0; +} + +int gen8_gmu_memory_init(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + /* GMU master log */ + if (IS_ERR_OR_NULL(gmu->gmu_log)) + gmu->gmu_log = gen8_reserve_gmu_kernel_block(gmu, 0, + GMU_LOG_SIZE, GMU_NONCACHED_KERNEL, 0); + + return PTR_ERR_OR_ZERO(gmu->gmu_log); +} + +static int gen8_gmu_init(struct adreno_device *adreno_dev) +{ + int ret; + + ret = gen8_gmu_parse_fw(adreno_dev); + if (ret) + return ret; + + ret = gen8_gmu_memory_init(adreno_dev); + if (ret) + return ret; + + return gen8_hfi_init(adreno_dev); +} + +static void _do_gbif_halt(struct kgsl_device *device, u32 reg, u32 ack_reg, + u32 mask, const char *client) +{ + u32 ack; + unsigned long t; + + kgsl_regwrite(device, reg, mask); + + t = jiffies + msecs_to_jiffies(100); + do { + kgsl_regread(device, ack_reg, &ack); + if ((ack & mask) == mask) + return; + + /* + * If we are attempting recovery in case of stall-on-fault + * then the halt sequence will not complete as long as SMMU + * is stalled. + */ + kgsl_mmu_pagefault_resume(&device->mmu, false); + + usleep_range(10, 100); + } while (!time_after(jiffies, t)); + + /* Check one last time */ + kgsl_mmu_pagefault_resume(&device->mmu, false); + + kgsl_regread(device, ack_reg, &ack); + if ((ack & mask) == mask) + return; + + dev_err(device->dev, "%s GBIF halt timed out\n", client); +} + +static void gen8_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) +{ + int ret = 0; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Disconnect GPU from BUS is not needed if CX GDSC goes off later */ + + /* Check no outstanding RPMh voting */ + gen8_complete_rpmh_votes(gmu, 1); + + /* Clear the WRITEDROPPED fields and set fence to allow mode */ + gmu_core_regwrite(device, GEN8_GMUAO_AHB_FENCE_STATUS_CLR, 0x7); + gmu_core_regwrite(device, GEN8_GMUAO_AHB_FENCE_CTRL, 0); + + /* Make sure above writes are committed before we proceed to recovery */ + wmb(); + + gmu_core_regwrite(device, GEN8_GMUCX_CM3_SYSRESET, 1); + + /* Halt GX traffic */ + if (gen8_gmu_gx_is_on(adreno_dev)) + _do_gbif_halt(device, GEN8_RBBM_GBIF_HALT, + GEN8_RBBM_GBIF_HALT_ACK, + GEN8_GBIF_GX_HALT_MASK, + "GX"); + + /* Halt CX traffic */ + _do_gbif_halt(device, GEN8_GBIF_HALT, GEN8_GBIF_HALT_ACK, + GEN8_GBIF_ARB_HALT_MASK, "CX"); + + if (gen8_gmu_gx_is_on(adreno_dev)) + kgsl_regwrite(device, GEN8_RBBM_SW_RESET_CMD, 0x1); + + /* Allow the software reset to complete */ + udelay(100); + + /* + * This is based on the assumption that GMU is the only one controlling + * the GX HS. This code path is the only client voting for GX through + * the regulator interface. + */ + if (gmu->gx_gdsc) { + if (gen8_gmu_gx_is_on(adreno_dev)) { + /* Switch gx gdsc control from GMU to CPU + * force non-zero reference count in clk driver + * so next disable call will turn + * off the GDSC + */ + ret = regulator_enable(gmu->gx_gdsc); + if (ret) + dev_err(&gmu->pdev->dev, + "suspend fail: gx enable %d\n", ret); + + ret = regulator_disable(gmu->gx_gdsc); + if (ret) + dev_err(&gmu->pdev->dev, + "suspend fail: gx disable %d\n", ret); + + if (gen8_gmu_gx_is_on(adreno_dev)) + dev_err(&gmu->pdev->dev, + "gx is stuck on\n"); + } + } +} + +/* + * gen8_gmu_notify_slumber() - initiate request to GMU to prepare to slumber + * @device: Pointer to KGSL device + */ +static int gen8_gmu_notify_slumber(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int bus_level = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq; + int perf_idx = gmu->hfi.dcvs_table.gpu_level_num - + pwr->default_pwrlevel - 1; + struct hfi_prep_slumber_cmd req = { + .freq = perf_idx, + .bw = bus_level, + }; + int ret; + + req.bw |= gen8_bus_ab_quantize(adreno_dev, 0); + + /* Disable the power counter so that the GMU is not busy */ + gmu_core_regwrite(device, GEN8_GMUCX_POWER_COUNTER_ENABLE, 0); + + ret = CMD_MSG_HDR(req, H2F_MSG_PREPARE_SLUMBER); + if (ret) + return ret; + + ret = gen8_hfi_send_generic_req(adreno_dev, &req, sizeof(req)); + + /* Make sure the fence is in ALLOW mode */ + gmu_core_regwrite(device, GEN8_GMUAO_AHB_FENCE_CTRL, 0); + return ret; +} + +void gen8_gmu_suspend(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gen8_gmu_pwrctrl_suspend(adreno_dev); + + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + + gen8_gmu_disable_gdsc(adreno_dev); + + gen8_rdpm_cx_freq_update(gmu, 0); + + dev_err(&gmu->pdev->dev, "Suspended GMU\n"); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE); +} + +static int gen8_gmu_dcvs_set(struct adreno_device *adreno_dev, + int gpu_pwrlevel, int bus_level, u32 ab) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct hfi_gx_bw_perf_vote_cmd req = { + .ack_type = DCVS_ACK_BLOCK, + .freq = INVALID_DCVS_IDX, + .bw = INVALID_DCVS_IDX, + }; + int ret = 0; + + if (!test_bit(GMU_PRIV_HFI_STARTED, &gmu->flags)) + return 0; + + /* Do not set to XO and lower GPU clock vote from GMU */ + if ((gpu_pwrlevel != INVALID_DCVS_IDX) && + (gpu_pwrlevel >= table->gpu_level_num - 1)) + return -EINVAL; + + if (gpu_pwrlevel < table->gpu_level_num - 1) + req.freq = table->gpu_level_num - gpu_pwrlevel - 1; + + if (bus_level < pwr->ddr_table_count && bus_level > 0) + req.bw = bus_level; + + req.bw |= gen8_bus_ab_quantize(adreno_dev, ab); + + /* GMU will vote for slumber levels through the sleep sequence */ + if ((req.freq == INVALID_DCVS_IDX) && (req.bw == INVALID_BW_VOTE)) + return 0; + + ret = CMD_MSG_HDR(req, H2F_MSG_GX_BW_PERF_VOTE); + if (ret) + return ret; + + ret = gen8_hfi_send_generic_req(adreno_dev, &req, sizeof(req)); + if (ret) { + dev_err_ratelimited(&gmu->pdev->dev, + "Failed to set GPU perf idx %d, bw idx %d\n", + req.freq, req.bw); + + /* + * If this was a dcvs request along side an active gpu, request + * dispatcher based reset and recovery. + */ + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT | + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + } + + if (req.freq != INVALID_DCVS_IDX) + gen8_rdpm_mx_freq_update(gmu, + gmu->hfi.dcvs_table.gx_votes[req.freq].freq); + + return ret; +} + +static int gen8_gmu_clock_set(struct adreno_device *adreno_dev, u32 pwrlevel) +{ + return gen8_gmu_dcvs_set(adreno_dev, pwrlevel, INVALID_DCVS_IDX, INVALID_AB_VALUE); +} + +static int gen8_gmu_ifpc_store(struct kgsl_device *device, + u32 val) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 requested_idle_level; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) + return -EINVAL; + + if (val) + requested_idle_level = GPU_HW_IFPC; + else + requested_idle_level = GPU_HW_ACTIVE; + + if (gmu->idle_level == requested_idle_level) + return 0; + + /* Power down the GPU before changing the idle level */ + return adreno_power_cycle_u32(adreno_dev, &gmu->idle_level, + requested_idle_level); +} + +static u32 gen8_gmu_ifpc_isenabled(struct kgsl_device *device) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(ADRENO_DEVICE(device)); + + return gmu->idle_level == GPU_HW_IFPC; +} + +/* Send an NMI to the GMU */ +void gen8_gmu_send_nmi(struct kgsl_device *device, bool force) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 result; + + /* + * Do not send NMI if the SMMU is stalled because GMU will not be able + * to save cm3 state to DDR. + */ + if (gen8_gmu_gx_is_on(adreno_dev) && gen8_is_smmu_stalled(device)) { + dev_err(&gmu->pdev->dev, + "Skipping NMI because SMMU is stalled\n"); + return; + } + + if (force) + goto nmi; + + /* + * We should not send NMI if there was a CM3 fault reported because we + * don't want to overwrite the critical CM3 state captured by gmu before + * it sent the CM3 fault interrupt. Also don't send NMI if GMU reset is + * already active. We could have hit a GMU assert and NMI might have + * already been triggered. + */ + + /* make sure we're reading the latest cm3_fault */ + smp_rmb(); + + if (atomic_read(&gmu->cm3_fault)) + return; + + gmu_core_regread(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, &result); + + if (result & 0xE00) + return; + +nmi: + /* Mask so there's no interrupt caused by NMI */ + gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_MASK, UINT_MAX); + + /* Make sure the interrupt is masked before causing it */ + wmb(); + + /* This will cause the GMU to save it's internal state to ddr */ + gmu_core_regrmw(device, GEN8_GMUCX_CM3_CFG, BIT(9), BIT(9)); + + /* Make sure the NMI is invoked before we proceed*/ + wmb(); + + /* Wait for the NMI to be handled */ + udelay(200); +} + +static void gen8_gmu_cooperative_reset(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 result; + + gmu_core_regwrite(device, GEN8_GMUCX_WDOG_CTRL, 0); + gmu_core_regwrite(device, GEN8_GMUCX_HOST2GMU_INTR_SET, BIT(17)); + + /* + * After triggering graceful death wait for snapshot ready + * indication from GMU. + */ + if (!gmu_core_timed_poll_check(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, + 0x800, 2, 0x800)) + return; + + gmu_core_regread(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, &result); + dev_err(&gmu->pdev->dev, + "GMU cooperative reset timed out 0x%x\n", result); + /* + * If we dont get a snapshot ready from GMU, trigger NMI + * and if we still timeout then we just continue with reset. + */ + gen8_gmu_send_nmi(device, true); + + gmu_core_regread(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, &result); + if ((result & 0x800) != 0x800) + dev_err(&gmu->pdev->dev, + "GMU cooperative reset NMI timed out 0x%x\n", result); +} + +static int gen8_gmu_wait_for_active_transition(struct kgsl_device *device) +{ + u32 reg; + struct gen8_gmu_device *gmu = to_gen8_gmu(ADRENO_DEVICE(device)); + + if (gmu_core_timed_poll_check(device, GEN8_GMUCX_RPMH_POWER_STATE, + GPU_HW_ACTIVE, 100, GENMASK(3, 0))) { + gmu_core_regread(device, GEN8_GMUCX_RPMH_POWER_STATE, ®); + dev_err(&gmu->pdev->dev, + "GMU failed to move to ACTIVE state, Current state: 0x%x\n", + reg); + + return -ETIMEDOUT; + } + + return 0; +} + +static bool gen8_gmu_scales_bandwidth(struct kgsl_device *device) +{ + return true; +} + +void gen8_gmu_handle_watchdog(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 mask; + + /* Temporarily mask the watchdog interrupt to prevent a storm */ + gmu_core_regread(device, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK, &mask); + gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK, + (mask | GMU_INT_WDOG_BITE)); + + gen8_gmu_send_nmi(device, false); + + dev_err_ratelimited(&gmu->pdev->dev, + "GMU watchdog expired interrupt received\n"); +} + +static irqreturn_t gen8_gmu_irq_handler(int irq, void *data) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + const struct gen8_gpudev *gen8_gpudev = + to_gen8_gpudev(ADRENO_GPU_DEVICE(adreno_dev)); + u32 status = 0; + + gmu_core_regread(device, GEN8_GMUAO_AO_HOST_INTERRUPT_STATUS, &status); + gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_CLR, status); + + if (status & GMU_INT_HOST_AHB_BUS_ERR) + dev_err_ratelimited(&gmu->pdev->dev, + "AHB bus error interrupt received\n"); + + if (status & GMU_INT_WDOG_BITE) + gen8_gpudev->handle_watchdog(adreno_dev); + + if (status & GMU_INT_FENCE_ERR) { + u32 fence_status; + + gmu_core_regread(device, GEN8_GMUAO_AHB_FENCE_STATUS, + &fence_status); + dev_err_ratelimited(&gmu->pdev->dev, + "FENCE error interrupt received %x\n", fence_status); + } + + if (status & ~GMU_AO_INT_MASK) + dev_err_ratelimited(&gmu->pdev->dev, + "Unhandled GMU interrupts 0x%lx\n", + status & ~GMU_AO_INT_MASK); + + return IRQ_HANDLED; +} + +void gen8_gmu_aop_send_acd_state(struct gen8_gmu_device *gmu, bool flag) +{ + struct qmp_pkt msg; + char msg_buf[36]; + u32 size; + int ret; + + if (IS_ERR_OR_NULL(gmu->mailbox.channel)) + return; + + size = scnprintf(msg_buf, sizeof(msg_buf), + "{class: gpu, res: acd, val: %d}", flag); + + /* mailbox controller expects 4-byte aligned buffer */ + msg.size = ALIGN((size + 1), SZ_4); + msg.data = msg_buf; + + ret = mbox_send_message(gmu->mailbox.channel, &msg); + + if (ret < 0) + dev_err(&gmu->pdev->dev, + "AOP mbox send message failed: %d\n", ret); +} + +int gen8_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + gen8_rdpm_cx_freq_update(gmu, gmu->freqs[level] / 1000); + + ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", + gmu->freqs[level]); + if (ret) { + dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n", + gmu->freqs[level], ret); + return ret; + } + + ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "hub_clk", + adreno_dev->gmu_hub_clk_freq); + if (ret && ret != -ENODEV) { + dev_err(&gmu->pdev->dev, "Unable to set the HUB clock\n"); + return ret; + } + + ret = clk_bulk_prepare_enable(gmu->num_clks, gmu->clks); + if (ret) { + dev_err(&gmu->pdev->dev, "Cannot enable GMU clocks\n"); + return ret; + } + + device->state = KGSL_STATE_AWARE; + + return 0; +} + +static int gen8_gmu_first_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int level, ret; + + kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); + + gen8_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); + + ret = gen8_gmu_enable_gdsc(adreno_dev); + if (ret) + return ret; + + ret = gen8_gmu_enable_clks(adreno_dev, 0); + if (ret) + goto gdsc_off; + + ret = gen8_gmu_load_fw(adreno_dev); + if (ret) + goto clks_gdsc_off; + + ret = gen8_gmu_version_info(adreno_dev); + if (ret) + goto clks_gdsc_off; + + ret = gen8_gmu_itcm_shadow(adreno_dev); + if (ret) + goto clks_gdsc_off; + + ret = gen8_scm_gpu_init_cx_regs(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen8_gmu_register_config(adreno_dev); + + gen8_gmu_irq_enable(adreno_dev); + + /* Vote for minimal DDR BW for GMU to init */ + level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min; + icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); + + ret = gen8_gmu_device_start(adreno_dev); + if (ret) + goto err; + + if (!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) { + ret = gen8_load_pdc_ucode(adreno_dev); + if (ret) + goto err; + + gen8_load_rsc_ucode(adreno_dev); + set_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags); + } + + ret = gen8_gmu_hfi_start(adreno_dev); + if (ret) + goto err; + + gen8_get_gpu_feature_info(adreno_dev); + + ret = gen8_hfi_start(adreno_dev); + if (ret) + goto err; + + if (gen8_hfi_send_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE, 0) == 1) { + adreno_dev->gmu_ab = true; + set_bit(ADRENO_DEVICE_GMU_AB, &adreno_dev->priv); + } + + icc_set_bw(pwr->icc_path, 0, 0); + + device->gmu_fault = false; + + kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE); + + return 0; + +err: + gen8_gmu_irq_disable(adreno_dev); + + if (device->gmu_fault) { + gen8_gmu_suspend(adreno_dev); + return ret; + } + +clks_gdsc_off: + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + +gdsc_off: + gen8_gmu_disable_gdsc(adreno_dev); + + gen8_rdpm_cx_freq_update(gmu, 0); + + return ret; +} + +static int gen8_gmu_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret = 0; + + kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); + + ret = gen8_gmu_enable_gdsc(adreno_dev); + if (ret) + return ret; + + ret = gen8_gmu_enable_clks(adreno_dev, 0); + if (ret) + goto gdsc_off; + + ret = gen8_rscc_wakeup_sequence(adreno_dev); + if (ret) + goto clks_gdsc_off; + + ret = gen8_gmu_load_fw(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen8_gmu_register_config(adreno_dev); + + gen8_gmu_irq_enable(adreno_dev); + + ret = gen8_gmu_device_start(adreno_dev); + if (ret) + goto err; + + ret = gen8_gmu_hfi_start(adreno_dev); + if (ret) + goto err; + + ret = gen8_hfi_start(adreno_dev); + if (ret) + goto err; + + device->gmu_fault = false; + + kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE); + + return 0; + +err: + gen8_gmu_irq_disable(adreno_dev); + + if (device->gmu_fault) { + gen8_gmu_suspend(adreno_dev); + return ret; + } + +clks_gdsc_off: + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + +gdsc_off: + gen8_gmu_disable_gdsc(adreno_dev); + + gen8_rdpm_cx_freq_update(gmu, 0); + + return ret; +} + +static void set_acd(struct adreno_device *adreno_dev, void *priv) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + adreno_dev->acd_enabled = *((bool *)priv); + gen8_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); +} + +static int gen8_gmu_acd_set(struct kgsl_device *device, bool val) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + if (IS_ERR_OR_NULL(gmu->mailbox.channel)) + return -EINVAL; + + /* Don't do any unneeded work if ACD is already in the correct state */ + if (adreno_dev->acd_enabled == val) + return 0; + + /* Power cycle the GPU for changes to take effect */ + return adreno_power_cycle(adreno_dev, set_acd, &val); +} + +#define BCL_RESP_TYPE_MASK BIT(0) +#define BCL_SID0_MASK GENMASK(7, 1) +#define BCL_SID1_MASK GENMASK(14, 8) +#define BCL_SID2_MASK GENMASK(21, 15) + +static int gen8_bcl_sid_set(struct kgsl_device *device, u32 sid_id, u64 sid_val) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 bcl_data, val = (u32) sid_val; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_BCL) || + !FIELD_GET(BCL_RESP_TYPE_MASK, adreno_dev->bcl_data)) + return -EINVAL; + + switch (sid_id) { + case 0: + adreno_dev->bcl_data &= ~BCL_SID0_MASK; + bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID0_MASK, val); + break; + case 1: + adreno_dev->bcl_data &= ~BCL_SID1_MASK; + bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID1_MASK, val); + break; + case 2: + adreno_dev->bcl_data &= ~BCL_SID2_MASK; + bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID2_MASK, val); + break; + default: + return -EINVAL; + } + + return adreno_power_cycle_u32(adreno_dev, &adreno_dev->bcl_data, bcl_data); +} + +static u64 gen8_bcl_sid_get(struct kgsl_device *device, u32 sid_id) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_BCL) || + !FIELD_GET(BCL_RESP_TYPE_MASK, adreno_dev->bcl_data)) + return 0; + + switch (sid_id) { + case 0: + return ((u64) FIELD_GET(BCL_SID0_MASK, adreno_dev->bcl_data)); + case 1: + return ((u64) FIELD_GET(BCL_SID1_MASK, adreno_dev->bcl_data)); + case 2: + return ((u64) FIELD_GET(BCL_SID2_MASK, adreno_dev->bcl_data)); + default: + return 0; + } +} + +static const struct gmu_dev_ops gen8_gmudev = { + .oob_set = gen8_gmu_oob_set, + .oob_clear = gen8_gmu_oob_clear, + .ifpc_store = gen8_gmu_ifpc_store, + .ifpc_isenabled = gen8_gmu_ifpc_isenabled, + .cooperative_reset = gen8_gmu_cooperative_reset, + .wait_for_active_transition = gen8_gmu_wait_for_active_transition, + .scales_bandwidth = gen8_gmu_scales_bandwidth, + .acd_set = gen8_gmu_acd_set, + .bcl_sid_set = gen8_bcl_sid_set, + .bcl_sid_get = gen8_bcl_sid_get, + .send_nmi = gen8_gmu_send_nmi, +}; + +static int gen8_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, + u32 ab) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret = 0; + + if (buslevel == pwr->cur_buslevel) + buslevel = INVALID_DCVS_IDX; + + if ((ab == pwr->cur_ab) || (ab == 0)) + ab = INVALID_AB_VALUE; + + if ((ab == INVALID_AB_VALUE) && (buslevel == INVALID_DCVS_IDX)) + return 0; + + ret = gen8_gmu_dcvs_set(adreno_dev, INVALID_DCVS_IDX, + buslevel, ab); + if (ret) + return ret; + + if (buslevel != INVALID_DCVS_IDX) + pwr->cur_buslevel = buslevel; + + if (ab != INVALID_AB_VALUE) { + if (!adreno_dev->gmu_ab) + icc_set_bw(pwr->icc_path, MBps_to_icc(ab), 0); + pwr->cur_ab = ab; + } + + trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel, ab); + return ret; +} + +#define NUM_CHANNELS 4 + +u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab) +{ + u16 vote = 0; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + if (!adreno_dev->gmu_ab || (ab == INVALID_AB_VALUE)) + return (FIELD_PREP(GENMASK(31, 16), INVALID_AB_VALUE)); + + if (pwr->ddr_table[pwr->ddr_table_count - 1]) { + /* + * if ab is calculated as higher than theoretical max bandwidth, set ab as + * theoretical max to prevent truncation during quantization. + * + * max ddr bandwidth (kbps) = (Max bw in kbps per channel * number of channel) + * max ab (Mbps) = max ddr bandwidth (kbps) / 1000 + */ + u32 max_bw = pwr->ddr_table[pwr->ddr_table_count - 1] * NUM_CHANNELS; + u32 max_ab = max_bw / 1000; + + ab = min_t(u32, ab, max_ab); + + /* + * Power FW supports a 16 bit AB BW level. We can quantize the entire vote-able BW + * range to a 16 bit space and the quantized value can be used to vote for AB though + * GMU. Quantization can be performed as below. + * + * quantized_vote = (ab vote (kbps) * 2^16) / max ddr bandwidth (kbps) + */ + vote = (u16)(((u64)ab * 1000 * (1 << 16)) / max_bw); + } + + /* + * Set ab enable mask and valid AB vote. req.bw is 32 bit value 0xABABENIB + * and with this return we want to set the upper 16 bits and EN field specifies + * if the AB vote is valid or not. + */ + return (FIELD_PREP(GENMASK(31, 16), vote) | FIELD_PREP(GENMASK(15, 8), 1)); +} + +static void gen8_free_gmu_globals(struct gen8_gmu_device *gmu) +{ + int i; + + for (i = 0; i < gmu->global_entries && i < ARRAY_SIZE(gmu->gmu_globals); i++) { + struct kgsl_memdesc *md = &gmu->gmu_globals[i]; + + if (!md->gmuaddr) + continue; + + iommu_unmap(gmu->domain, md->gmuaddr, md->size); + + if (md->priv & KGSL_MEMDESC_SYSMEM) + kgsl_sharedmem_free(md); + + memset(md, 0, sizeof(*md)); + } + + if (gmu->domain) { + iommu_detach_device(gmu->domain, &gmu->pdev->dev); + iommu_domain_free(gmu->domain); + gmu->domain = NULL; + } + + gmu->global_entries = 0; +} + +static int gen8_gmu_aop_mailbox_init(struct adreno_device *adreno_dev, + struct gen8_gmu_device *gmu) +{ + struct kgsl_mailbox *mailbox = &gmu->mailbox; + + mailbox->client.dev = &gmu->pdev->dev; + mailbox->client.tx_block = true; + mailbox->client.tx_tout = 1000; + mailbox->client.knows_txdone = false; + + mailbox->channel = mbox_request_channel(&mailbox->client, 0); + if (IS_ERR(mailbox->channel)) + return PTR_ERR(mailbox->channel); + + adreno_dev->acd_enabled = true; + return 0; +} + +static void gen8_gmu_acd_probe(struct kgsl_device *device, + struct gen8_gmu_device *gmu, struct device_node *node) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrlevel *pwrlevel = + &pwr->pwrlevels[pwr->num_pwrlevels - 1]; + struct hfi_acd_table_cmd *cmd = &gmu->hfi.acd_table; + int ret, i, cmd_idx = 0; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_ACD)) + return; + + cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ACD_TBL, HFI_MSG_CMD); + + cmd->version = 1; + cmd->stride = 1; + cmd->enable_by_level = 0; + + /* + * Iterate through each gpu power level and generate a mask for GMU + * firmware for ACD enabled levels and store the corresponding control + * register configurations to the acd_table structure. + */ + for (i = 0; i < pwr->num_pwrlevels; i++) { + if (pwrlevel->acd_level) { + cmd->enable_by_level |= (1 << (i + 1)); + cmd->data[cmd_idx++] = pwrlevel->acd_level; + } + pwrlevel--; + } + + if (!cmd->enable_by_level) + return; + + cmd->num_levels = cmd_idx; + + ret = gen8_gmu_aop_mailbox_init(adreno_dev, gmu); + if (ret) + dev_err(&gmu->pdev->dev, + "AOP mailbox init failed: %d\n", ret); +} + +static int gen8_gmu_reg_probe(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; + + ret = kgsl_regmap_add_region(&device->regmap, gmu->pdev, "gmu", NULL, NULL); + + if (ret) + dev_err(&gmu->pdev->dev, "Unable to map the GMU registers\n"); + /* + * gmu_ao_blk_dec1 and gmu_ao_blk_dec2 are contiguous and contained within the gmu region + * mapped above. gmu_ao_blk_dec0 is not within the gmu region and is mapped separately. + */ + kgsl_regmap_add_region(&device->regmap, gmu->pdev, "gmu_ao_blk_dec0", NULL, NULL); + + return ret; +} + +static int gen8_gmu_clk_probe(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret, i; + int tbl_size; + int num_freqs; + int offset; + + ret = devm_clk_bulk_get_all(&gmu->pdev->dev, &gmu->clks); + if (ret < 0) + return ret; + + /* + * Voting for apb_pclk will enable power and clocks required for + * QDSS path to function. However, if QCOM_KGSL_QDSS_STM is not enabled, + * QDSS is essentially unusable. Hence, if QDSS cannot be used, + * don't vote for this clock. + */ + if (!IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) { + for (i = 0; i < ret; i++) { + if (!strcmp(gmu->clks[i].id, "apb_pclk")) { + gmu->clks[i].clk = NULL; + break; + } + } + } + + gmu->num_clks = ret; + + /* Read the optional list of GMU frequencies */ + if (of_get_property(gmu->pdev->dev.of_node, + "qcom,gmu-freq-table", &tbl_size) == NULL) + goto default_gmu_freq; + + num_freqs = (tbl_size / sizeof(u32)) / 2; + if (num_freqs != ARRAY_SIZE(gmu->freqs)) + goto default_gmu_freq; + + for (i = 0; i < num_freqs; i++) { + offset = i * 2; + ret = of_property_read_u32_index(gmu->pdev->dev.of_node, + "qcom,gmu-freq-table", offset, &gmu->freqs[i]); + if (ret) + goto default_gmu_freq; + ret = of_property_read_u32_index(gmu->pdev->dev.of_node, + "qcom,gmu-freq-table", offset + 1, &gmu->vlvls[i]); + if (ret) + goto default_gmu_freq; + } + return 0; + +default_gmu_freq: + /* The GMU frequency table is missing or invalid. Go with a default */ + gmu->freqs[0] = GMU_FREQ_MIN; + gmu->vlvls[0] = RPMH_REGULATOR_LEVEL_LOW_SVS; + gmu->freqs[1] = GMU_FREQ_MAX; + gmu->vlvls[1] = RPMH_REGULATOR_LEVEL_SVS; + + return 0; +} + +static void gen8_gmu_rdpm_probe(struct gen8_gmu_device *gmu, + struct kgsl_device *device) +{ + struct resource *res; + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "rdpm_cx"); + if (res) + gmu->rdpm_cx_virt = devm_ioremap(&device->pdev->dev, + res->start, resource_size(res)); + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "rdpm_mx"); + if (res) + gmu->rdpm_mx_virt = devm_ioremap(&device->pdev->dev, + res->start, resource_size(res)); +} + +static int gmu_cx_gdsc_event(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct gen8_gmu_device *gmu = container_of(nb, struct gen8_gmu_device, gdsc_nb); + struct adreno_device *adreno_dev = gen8_gmu_to_adreno(gmu); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 val; + + if (!(event & REGULATOR_EVENT_DISABLE) || + !test_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags)) + return 0; + + if (kgsl_regmap_read_poll_timeout(&device->regmap, GEN8_GPU_CC_CX_GDSCR, + val, !(val & BIT(31)), 100, 100 * 1000)) + dev_err(device->dev, "GPU CX wait timeout.\n"); + + clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); + complete_all(&gmu->gdsc_gate); + + return 0; +} + +static int gen8_gmu_regulators_probe(struct gen8_gmu_device *gmu, + struct platform_device *pdev) +{ + int ret; + + gmu->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); + if (IS_ERR(gmu->cx_gdsc)) { + if (PTR_ERR(gmu->cx_gdsc) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Couldn't get the vddcx gdsc\n"); + return PTR_ERR(gmu->cx_gdsc); + } + + gmu->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); + if (IS_ERR(gmu->gx_gdsc)) { + if (PTR_ERR(gmu->gx_gdsc) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Couldn't get the vdd gdsc\n"); + return PTR_ERR(gmu->gx_gdsc); + } + + init_completion(&gmu->gdsc_gate); + complete_all(&gmu->gdsc_gate); + + gmu->gdsc_nb.notifier_call = gmu_cx_gdsc_event; + ret = devm_regulator_register_notifier(gmu->cx_gdsc, &gmu->gdsc_nb); + + if (ret) { + dev_err(&pdev->dev, "Failed to register gmu cx gdsc notifier: %d\n", ret); + return ret; + } + + return 0; +} + +void gen8_gmu_remove(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + if (!IS_ERR_OR_NULL(gmu->mailbox.channel)) + mbox_free_channel(gmu->mailbox.channel); + + adreno_dev->acd_enabled = false; + + if (gmu->fw_image) + release_firmware(gmu->fw_image); + + gen8_free_gmu_globals(gmu); + + vfree(gmu->itcm_shadow); + kobject_put(&gmu->log_kobj); + kobject_put(&gmu->stats_kobj); +} + +static int gen8_gmu_iommu_fault_handler(struct iommu_domain *domain, + struct device *dev, unsigned long addr, int flags, void *token) +{ + char *fault_type = "unknown"; + + if (flags & IOMMU_FAULT_TRANSLATION) + fault_type = "translation"; + else if (flags & IOMMU_FAULT_PERMISSION) + fault_type = "permission"; + else if (flags & IOMMU_FAULT_EXTERNAL) + fault_type = "external"; + else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) + fault_type = "transaction stalled"; + + dev_err(dev, "GMU fault addr = %lX, context=kernel (%s %s fault)\n", + addr, + (flags & IOMMU_FAULT_WRITE) ? "write" : "read", + fault_type); + + return 0; +} + +static int gen8_gmu_iommu_init(struct gen8_gmu_device *gmu) +{ + int ret; + + gmu->domain = iommu_domain_alloc(&platform_bus_type); + if (gmu->domain == NULL) { + dev_err(&gmu->pdev->dev, "Unable to allocate GMU IOMMU domain\n"); + return -ENODEV; + } + + /* + * Disable stall on fault for the GMU context bank. + * This sets SCTLR.CFCFG = 0. + * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. + */ + qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL); + + ret = iommu_attach_device(gmu->domain, &gmu->pdev->dev); + if (!ret) { + iommu_set_fault_handler(gmu->domain, + gen8_gmu_iommu_fault_handler, gmu); + return 0; + } + + dev_err(&gmu->pdev->dev, + "Unable to attach GMU IOMMU domain: %d\n", ret); + iommu_domain_free(gmu->domain); + gmu->domain = NULL; + + return ret; +} + +/* Default IFPC timer (300usec) value */ +#define GEN8_GMU_LONG_IFPC_HYST FIELD_PREP(GENMASK(15, 0), 0x1680) + +/* Minimum IFPC timer (200usec) allowed to override default value */ +#define GEN8_GMU_LONG_IFPC_HYST_FLOOR FIELD_PREP(GENMASK(15, 0), 0x0F00) + +int gen8_gmu_probe(struct kgsl_device *device, + struct platform_device *pdev) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct device *dev = &pdev->dev; + struct resource *res; + int ret, i; + + gmu->pdev = pdev; + + dma_set_coherent_mask(&gmu->pdev->dev, DMA_BIT_MASK(64)); + gmu->pdev->dev.dma_mask = &gmu->pdev->dev.coherent_dma_mask; + set_dma_ops(&gmu->pdev->dev, NULL); + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "rscc"); + if (res) { + gmu->rscc_virt = devm_ioremap(&device->pdev->dev, res->start, + resource_size(res)); + if (!gmu->rscc_virt) { + dev_err(&gmu->pdev->dev, "rscc ioremap failed\n"); + return -ENOMEM; + } + } + + /* Setup any rdpm register ranges */ + gen8_gmu_rdpm_probe(gmu, device); + + /* Set up GMU regulators */ + ret = gen8_gmu_regulators_probe(gmu, pdev); + if (ret) + return ret; + + ret = gen8_gmu_clk_probe(adreno_dev); + if (ret) + return ret; + + /* Set up GMU IOMMU and shared memory with GMU */ + ret = gen8_gmu_iommu_init(gmu); + if (ret) + goto error; + + gmu->vma = gen8_gmu_vma; + for (i = 0; i < ARRAY_SIZE(gen8_gmu_vma); i++) { + struct gmu_vma_entry *vma = &gen8_gmu_vma[i]; + + vma->vma_root = RB_ROOT; + spin_lock_init(&vma->lock); + } + + /* Map and reserve GMU CSRs registers */ + ret = gen8_gmu_reg_probe(adreno_dev); + if (ret) + goto error; + + /* Populates RPMh configurations */ + ret = gen8_build_rpmh_tables(adreno_dev); + if (ret) + goto error; + + /* Set up GMU idle state */ + if (ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) { + gmu->idle_level = GPU_HW_IFPC; + adreno_dev->ifpc_hyst = GEN8_GMU_LONG_IFPC_HYST; + adreno_dev->ifpc_hyst_floor = GEN8_GMU_LONG_IFPC_HYST_FLOOR; + } else { + gmu->idle_level = GPU_HW_ACTIVE; + } + + gen8_gmu_acd_probe(device, gmu, pdev->dev.of_node); + + set_bit(GMU_ENABLED, &device->gmu_core.flags); + + device->gmu_core.dev_ops = &gen8_gmudev; + + /* Set default GMU attributes */ + gmu->log_stream_enable = false; + gmu->log_group_mask = 0x3; + + /* Disabled by default */ + gmu->stats_enable = false; + /* Set default to CM3 busy cycles countable */ + gmu->stats_mask = BIT(GEN8_GMU_CM3_BUSY_CYCLES); + /* Interval is in 50 us units. Set default sampling frequency to 4x50 us */ + gmu->stats_interval = HFI_FEATURE_GMU_STATS_INTERVAL; + + /* GMU sysfs nodes setup */ + (void) kobject_init_and_add(&gmu->log_kobj, &log_kobj_type, &dev->kobj, "log"); + (void) kobject_init_and_add(&gmu->stats_kobj, &stats_kobj_type, &dev->kobj, "stats"); + + of_property_read_u32(gmu->pdev->dev.of_node, "qcom,gmu-perf-ddr-bw", + &gmu->perf_ddr_bw); + + spin_lock_init(&gmu->hfi.cmdq_lock); + + gmu->irq = kgsl_request_irq(gmu->pdev, "gmu", + gen8_gmu_irq_handler, device); + + if (gmu->irq >= 0) + return 0; + + ret = gmu->irq; + +error: + gen8_gmu_remove(device); + return ret; +} + +static void gen8_gmu_active_count_put(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (WARN(atomic_read(&device->active_cnt) == 0, + "Unbalanced get/put calls to KGSL active count\n")) + return; + + if (atomic_dec_and_test(&device->active_cnt)) { + kgsl_pwrscale_update_stats(device); + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + } + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + wake_up(&device->active_cnt_wq); +} + +int gen8_halt_gbif(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* Halt new client requests */ + kgsl_regwrite(device, GEN8_GBIF_HALT, GEN8_GBIF_CLIENT_HALT_MASK); + ret = adreno_wait_for_halt_ack(device, + GEN8_GBIF_HALT_ACK, GEN8_GBIF_CLIENT_HALT_MASK); + + /* Halt all AXI requests */ + kgsl_regwrite(device, GEN8_GBIF_HALT, GEN8_GBIF_ARB_HALT_MASK); + ret = adreno_wait_for_halt_ack(device, + GEN8_GBIF_HALT_ACK, GEN8_GBIF_ARB_HALT_MASK); + + /* De-assert the halts */ + kgsl_regwrite(device, GEN8_GBIF_HALT, 0x0); + + return ret; +} + +static int gen8_gmu_power_off(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = 0; + + if (device->gmu_fault) + goto error; + + /* Wait for the lowest idle level we requested */ + ret = gen8_gmu_wait_for_lowest_idle(adreno_dev); + if (ret) + goto error; + + ret = gen8_complete_rpmh_votes(gmu, 2); + if (ret) + goto error; + + ret = gen8_gmu_notify_slumber(adreno_dev); + if (ret) + goto error; + + ret = gen8_gmu_wait_for_idle(adreno_dev); + if (ret) + goto error; + + ret = gen8_rscc_sleep_sequence(adreno_dev); + if (ret) + goto error; + + gen8_rdpm_mx_freq_update(gmu, 0); + + /* Now that we are done with GMU and GPU, Clear the GBIF */ + ret = gen8_halt_gbif(adreno_dev); + if (ret) + goto error; + + gen8_gmu_irq_disable(adreno_dev); + + gen8_hfi_stop(adreno_dev); + + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + + gen8_gmu_disable_gdsc(adreno_dev); + + gen8_rdpm_cx_freq_update(gmu, 0); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE); + + return 0; + +error: + gen8_gmu_irq_disable(adreno_dev); + gen8_hfi_stop(adreno_dev); + gen8_gmu_suspend(adreno_dev); + + return ret; +} + +void gen8_enable_gpu_irq(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_pwrctrl_irq(device, true); + + adreno_irqctrl(adreno_dev, 1); +} + +void gen8_disable_gpu_irq(struct adreno_device *adreno_dev) +{ + kgsl_pwrctrl_irq(KGSL_DEVICE(adreno_dev), false); + + if (gen8_gmu_gx_is_on(adreno_dev)) + adreno_irqctrl(adreno_dev, 0); +} + +static int gen8_gpu_boot(struct adreno_device *adreno_dev) +{ + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + + adreno_set_active_ctxs_null(adreno_dev); + + ret = kgsl_mmu_start(device); + if (ret) + goto err; + + ret = gen8_gmu_oob_set(device, oob_gpu); + if (ret) + goto oob_clear; + + ret = gen8_gmu_hfi_start_msg(adreno_dev); + if (ret) + goto oob_clear; + + /* Clear the busy_data stats - we're starting over from scratch */ + memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data)); + + gen8_start(adreno_dev); + + if (gen8_core->qos_value && adreno_is_preemption_enabled(adreno_dev)) + kgsl_regwrite(device, GEN8_RBBM_GBIF_CLIENT_QOS_CNTL, + gen8_core->qos_value[adreno_dev->cur_rb->id]); + + /* Re-initialize the coresight registers if applicable */ + adreno_coresight_start(adreno_dev); + + adreno_perfcounter_start(adreno_dev); + + /* Clear FSR here in case it is set from a previous pagefault */ + kgsl_mmu_clear_fsr(&device->mmu); + + gen8_enable_gpu_irq(adreno_dev); + + ret = gen8_rb_start(adreno_dev); + if (ret) { + gen8_disable_gpu_irq(adreno_dev); + goto oob_clear; + } + + /* + * At this point it is safe to assume that we recovered. Setting + * this field allows us to take a new snapshot for the next failure + * if we are prioritizing the first unrecoverable snapshot. + */ + if (device->snapshot) + device->snapshot->recovered = true; + + /* Start the dispatcher */ + adreno_dispatcher_start(device); + + device->reset_counter++; + + gen8_gmu_oob_clear(device, oob_gpu); + + return 0; + +oob_clear: + gen8_gmu_oob_clear(device, oob_gpu); + +err: + gen8_gmu_power_off(adreno_dev); + + return ret; +} + +static void gmu_idle_timer(struct timer_list *t) +{ + struct kgsl_device *device = container_of(t, struct kgsl_device, + idle_timer); + + kgsl_schedule_work(&device->idle_check_ws); +} + +static int gen8_boot(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + if (WARN_ON(test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))) + return 0; + + kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen8_gmu_boot(adreno_dev); + if (ret) + return ret; + + ret = gen8_gpu_boot(adreno_dev); + if (ret) + return ret; + + kgsl_start_idle_timer(device); + kgsl_pwrscale_wake(device); + + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + device->pwrctrl.last_stat_updated = ktime_get(); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); + + return ret; +} + +static int gen8_first_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) { + if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return gen8_boot(adreno_dev); + + return 0; + } + + ret = gen8_ringbuffer_init(adreno_dev); + if (ret) + return ret; + + ret = gen8_microcode_read(adreno_dev); + if (ret) + return ret; + + ret = gen8_init(adreno_dev); + if (ret) + return ret; + + ret = gen8_gmu_init(adreno_dev); + if (ret) + return ret; + + kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen8_gmu_first_boot(adreno_dev); + if (ret) + return ret; + + ret = gen8_gpu_boot(adreno_dev); + if (ret) + return ret; + + adreno_get_bus_counters(adreno_dev); + + adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev, + ADRENO_COOP_RESET); + + adreno_create_profile_buffer(adreno_dev); + + set_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags); + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + /* + * BCL needs respective Central Broadcast register to + * be programed from TZ. For kernel version prior to 6.1, this + * programing happens only when zap shader firmware load is successful. + * Zap firmware load can fail in boot up path hence enable BCL only + * after we successfully complete first boot to ensure that Central + * Broadcast register was programed before enabling BCL. + */ + if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL)) + adreno_dev->bcl_enabled = true; + + /* + * There is a possible deadlock scenario during kgsl firmware reading + * (request_firmware) and devfreq update calls. During first boot, kgsl + * device mutex is held and then request_firmware is called for reading + * firmware. request_firmware internally takes dev_pm_qos_mtx lock. + * Whereas in case of devfreq update calls triggered by thermal/bcl or + * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then + * tries to take kgsl device mutex as part of get_dev_status/target + * calls. This results in deadlock when both thread are unable to acquire + * the mutex held by other thread. Enable devfreq updates now as we are + * done reading all firmware files. + */ + device->pwrscale.devfreq_enabled = true; + + device->pwrctrl.last_stat_updated = ktime_get(); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); + + return 0; +} + +static bool gen8_irq_pending(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status; + + kgsl_regread(device, GEN8_RBBM_INT_0_STATUS, &status); + + /* Return busy if a interrupt is pending */ + return ((status & adreno_dev->irq_mask) || + atomic_read(&adreno_dev->pending_irq_refcnt)); +} + +static int gen8_power_off(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; + + WARN_ON(!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)); + + adreno_suspend_context(device); + + /* + * adreno_suspend_context() unlocks the device mutex, which + * could allow a concurrent thread to attempt SLUMBER sequence. + * Hence, check the flags again before proceeding with SLUMBER. + */ + if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER); + + ret = gen8_gmu_oob_set(device, oob_gpu); + if (ret) + goto no_gx_power; + + if (gen8_irq_pending(adreno_dev)) { + gen8_gmu_oob_clear(device, oob_gpu); + return -EBUSY; + } + + kgsl_pwrscale_update_stats(device); + + /* Save active coresight registers if applicable */ + adreno_coresight_stop(adreno_dev); + + adreno_irqctrl(adreno_dev, 0); + +no_gx_power: + gen8_gmu_oob_clear(device, oob_gpu); + + kgsl_pwrctrl_irq(device, false); + + gen8_gmu_power_off(adreno_dev); + + adreno_set_active_ctxs_null(adreno_dev); + + adreno_dispatcher_stop(adreno_dev); + + adreno_ringbuffer_stop(adreno_dev); + + if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpu_llc_slice); + + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + del_timer_sync(&device->idle_timer); + + kgsl_pwrscale_sleep(device); + + kgsl_pwrctrl_clear_l3_vote(device); + + /* + * Reset the context records so that CP can start + * at the correct read pointer for BV thread after + * coming out of slumber. + */ + gen8_reset_preempt_records(adreno_dev); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); + + return ret; +} + +static void gmu_idle_check(struct work_struct *work) +{ + struct kgsl_device *device = container_of(work, + struct kgsl_device, idle_check_ws); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; + + mutex_lock(&device->mutex); + + if (test_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags)) + goto done; + + if (atomic_read(&device->active_cnt) || time_is_after_jiffies(device->idle_jiffies)) { + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + goto done; + } + + if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + goto done; + + spin_lock(&device->submit_lock); + + if (device->submit_now) { + spin_unlock(&device->submit_lock); + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + goto done; + } + + device->skip_inline_submit = true; + spin_unlock(&device->submit_lock); + + ret = gen8_power_off(adreno_dev); + if (ret == -EBUSY) { + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + } + +done: + mutex_unlock(&device->mutex); +} + +static int gen8_gmu_first_open(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* + * Do the one time settings that need to happen when we + * attempt to boot the gpu the very first time + */ + ret = gen8_first_boot(adreno_dev); + if (ret) + return ret; + + /* + * A client that does a first_open but never closes the device + * may prevent us from going back to SLUMBER. So trigger the idle + * check by incrementing the active count and immediately releasing it. + */ + atomic_inc(&device->active_cnt); + gen8_gmu_active_count_put(adreno_dev); + + return 0; +} + +static int gen8_gmu_last_close(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return gen8_power_off(adreno_dev); + + return 0; +} + +static int gen8_gmu_active_count_get(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret = 0; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EINVAL; + + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags)) + return -EINVAL; + + if ((atomic_read(&device->active_cnt) == 0) && + !test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + ret = gen8_boot(adreno_dev); + + if (ret == 0) + atomic_inc(&device->active_cnt); + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + return ret; +} + +static int gen8_gmu_pm_suspend(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags)) + return 0; + + kgsl_pwrctrl_request_state(device, KGSL_STATE_SUSPEND); + + /* Halt any new submissions */ + reinit_completion(&device->halt_gate); + + /* wait for active count so device can be put in slumber */ + ret = kgsl_active_count_wait(device, 0, HZ); + if (ret) { + dev_err(device->dev, + "Timed out waiting for the active count\n"); + goto err; + } + + ret = adreno_idle(device); + if (ret) + goto err; + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + gen8_power_off(adreno_dev); + + set_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags); + + adreno_get_gpu_halt(adreno_dev); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND); + + return 0; +err: + adreno_dispatcher_start(device); + return ret; +} + +static void gen8_gmu_pm_resume(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + if (WARN(!test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags), + "resume invoked without a suspend\n")) + return; + + adreno_put_gpu_halt(adreno_dev); + + adreno_dispatcher_start(device); + + clear_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags); +} + +static void gen8_gmu_touch_wakeup(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; + + /* + * Do not wake up a suspended device or until the first boot sequence + * has been completed. + */ + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags) || + !test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + return; + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + goto done; + + kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen8_gmu_boot(adreno_dev); + if (ret) + return; + + ret = gen8_gpu_boot(adreno_dev); + if (ret) + return; + + kgsl_pwrscale_wake(device); + + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + device->pwrctrl.last_stat_updated = ktime_get(); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); + +done: + /* + * When waking up from a touch event we want to stay active long enough + * for the user to send a draw command. The default idle timer timeout + * is shorter than we want so go ahead and push the idle timer out + * further for this special case + */ + mod_timer(&device->idle_timer, jiffies + + msecs_to_jiffies(adreno_wake_timeout)); +} + +const struct adreno_power_ops gen8_gmu_power_ops = { + .first_open = gen8_gmu_first_open, + .last_close = gen8_gmu_last_close, + .active_count_get = gen8_gmu_active_count_get, + .active_count_put = gen8_gmu_active_count_put, + .pm_suspend = gen8_gmu_pm_suspend, + .pm_resume = gen8_gmu_pm_resume, + .touch_wakeup = gen8_gmu_touch_wakeup, + .gpu_clock_set = gen8_gmu_clock_set, + .gpu_bus_set = gen8_gmu_bus_set, +}; + +int gen8_gmu_device_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore) +{ + struct adreno_device *adreno_dev; + struct kgsl_device *device; + struct gen8_device *gen8_dev; + int ret; + + gen8_dev = devm_kzalloc(&pdev->dev, sizeof(*gen8_dev), + GFP_KERNEL); + if (!gen8_dev) + return -ENOMEM; + + adreno_dev = &gen8_dev->adreno_dev; + + ret = gen8_probe_common(pdev, adreno_dev, chipid, gpucore); + if (ret) + return ret; + + ret = adreno_dispatcher_init(adreno_dev); + if (ret) + return ret; + + device = KGSL_DEVICE(adreno_dev); + + INIT_WORK(&device->idle_check_ws, gmu_idle_check); + + timer_setup(&device->idle_timer, gmu_idle_timer, 0); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_DMS)) { + set_bit(ADRENO_DEVICE_DMS, &adreno_dev->priv); + adreno_dev->dms_enabled = true; + } + + adreno_dev->irq_mask = GEN8_INT_MASK; + + return 0; +} + +int gen8_gmu_reset(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + gen8_disable_gpu_irq(adreno_dev); + + gen8_gmu_irq_disable(adreno_dev); + + gen8_hfi_stop(adreno_dev); + + /* Hard reset the gmu and gpu */ + gen8_gmu_suspend(adreno_dev); + + gen8_reset_preempt_records(adreno_dev); + + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + /* Attempt to reboot the gmu and gpu */ + return gen8_boot(adreno_dev); +} + +int gen8_gmu_hfi_probe(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hfi *hfi = &gmu->hfi; + + hfi->irq = kgsl_request_irq(gmu->pdev, "hfi", + gen8_hfi_irq_handler, KGSL_DEVICE(adreno_dev)); + + return hfi->irq < 0 ? hfi->irq : 0; +} + +int gen8_gmu_add_to_minidump(struct adreno_device *adreno_dev) +{ + struct gen8_device *gen8_dev = container_of(adreno_dev, + struct gen8_device, adreno_dev); + int ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_GEN8_DEVICE, + (void *)(gen8_dev), sizeof(struct gen8_device)); + if (ret) + return ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_GMU_LOG_ENTRY, + gen8_dev->gmu.gmu_log->hostptr, gen8_dev->gmu.gmu_log->size); + if (ret) + return ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_HFIMEM_ENTRY, + gen8_dev->gmu.hfi.hfi_mem->hostptr, gen8_dev->gmu.hfi.hfi_mem->size); + + return ret; +} + +static int gen8_gmu_bind(struct device *dev, struct device *master, void *data) +{ + struct kgsl_device *device = dev_get_drvdata(master); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + const struct gen8_gpudev *gen8_gpudev = to_gen8_gpudev(gpudev); + int ret; + + ret = gen8_gmu_probe(device, to_platform_device(dev)); + if (ret) + return ret; + + if (gen8_gpudev->hfi_probe) { + ret = gen8_gpudev->hfi_probe(adreno_dev); + + if (ret) { + gen8_gmu_remove(device); + return ret; + } + } + + return 0; +} + +static void gen8_gmu_unbind(struct device *dev, struct device *master, + void *data) +{ + struct kgsl_device *device = dev_get_drvdata(master); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + const struct gen8_gpudev *gen8_gpudev = to_gen8_gpudev(gpudev); + + if (gen8_gpudev->hfi_remove) + gen8_gpudev->hfi_remove(adreno_dev); + + gen8_gmu_remove(device); +} + +static const struct component_ops gen8_gmu_component_ops = { + .bind = gen8_gmu_bind, + .unbind = gen8_gmu_unbind, +}; + +static int gen8_gmu_probe_dev(struct platform_device *pdev) +{ + return component_add(&pdev->dev, &gen8_gmu_component_ops); +} + +static int gen8_gmu_remove_dev(struct platform_device *pdev) +{ + component_del(&pdev->dev, &gen8_gmu_component_ops); + return 0; +} + +static const struct of_device_id gen8_gmu_match_table[] = { + { .compatible = "qcom,gen8-gmu" }, + { }, +}; + +struct platform_driver gen8_gmu_driver = { + .probe = gen8_gmu_probe_dev, + .remove = gen8_gmu_remove_dev, + .driver = { + .name = "adreno-gen8-gmu", + .of_match_table = gen8_gmu_match_table, + }, +}; diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h new file mode 100644 index 0000000000..1ff5aa23d2 --- /dev/null +++ b/adreno_gen8_gmu.h @@ -0,0 +1,530 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef __ADRENO_GEN8_GMU_H +#define __ADRENO_GEN8_GMU_H + +#include + +#include "adreno_gen8_hfi.h" +#include "kgsl_gmu_core.h" + +/** + * struct gen8_gmu_device - GMU device structure + * @ver: GMU Version information + * @irq: GMU interrupt number + * @fw_image: GMU FW image + * @hfi_mem: pointer to HFI shared memory + * @dump_mem: pointer to GMU debug dump memory + * @gmu_log: gmu event log memory + * @hfi: HFI controller + * @num_gpupwrlevels: number GPU frequencies in GPU freq table + * @num_bwlevel: number of GPU BW levels + * @num_cnocbwlevel: number CNOC BW levels + * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling + * @cx_gdsc: CX headswitch that controls power of GMU and + * subsystem peripherals + * @gx_gdsc: GX headswitch that controls power of GPU subsystem + * @clks: GPU subsystem clocks required for GMU functionality + * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different + * than default power level + * @idle_level: Minimal GPU idle power level + * @fault_count: GMU fault count + * @mailbox: Messages to AOP for ACD enable/disable go through this + * @log_wptr_retention: Store the log wptr offset on slumber + */ +struct gen8_gmu_device { + struct { + u32 core; + u32 core_dev; + u32 pwr; + u32 pwr_dev; + u32 hfi; + } ver; + struct platform_device *pdev; + int irq; + const struct firmware *fw_image; + struct kgsl_memdesc *dump_mem; + struct kgsl_memdesc *gmu_log; + /** @gmu_init_scratch: Memory to store the initial HFI messages */ + struct kgsl_memdesc *gmu_init_scratch; + /** @gpu_boot_scratch: Memory to store the bootup HFI messages */ + struct kgsl_memdesc *gpu_boot_scratch; + struct gen8_hfi hfi; + /** @pwrlevels: Array of GMU power levels */ + struct regulator *cx_gdsc; + struct regulator *gx_gdsc; + struct clk_bulk_data *clks; + /** @num_clks: Number of entries in the @clks array */ + int num_clks; + u32 idle_level; + /** @freqs: Array of GMU frequencies */ + u32 freqs[GMU_MAX_PWRLEVELS]; + /** @vlvls: Array of GMU voltage levels */ + u32 vlvls[GMU_MAX_PWRLEVELS]; + struct kgsl_mailbox mailbox; + /** @gmu_globals: Array to store gmu global buffers */ + struct kgsl_memdesc gmu_globals[GMU_KERNEL_ENTRIES]; + /** @global_entries: To keep track of number of gmu buffers */ + u32 global_entries; + struct gmu_vma_entry *vma; + u32 log_wptr_retention; + /** @cm3_fault: whether gmu received a cm3 fault interrupt */ + atomic_t cm3_fault; + /** + * @itcm_shadow: Copy of the itcm block in firmware binary used for + * snapshot + */ + void *itcm_shadow; + /** @flags: Internal gmu flags */ + unsigned long flags; + /** @rscc_virt: Pointer where RSCC block is mapped */ + void __iomem *rscc_virt; + /** @domain: IOMMU domain for the kernel context */ + struct iommu_domain *domain; + /** @log_stream_enable: GMU log streaming enable. Disabled by default */ + bool log_stream_enable; + /** @log_group_mask: Allows overriding default GMU log group mask */ + u32 log_group_mask; + struct kobject log_kobj; + /* + * @perf_ddr_bw: The lowest ddr bandwidth that puts CX at a corner at + * which GMU can run at higher frequency. + */ + u32 perf_ddr_bw; + /** @rdpm_cx_virt: Pointer where the RDPM CX block is mapped */ + void __iomem *rdpm_cx_virt; + /** @rdpm_mx_virt: Pointer where the RDPM MX block is mapped */ + void __iomem *rdpm_mx_virt; + /** @num_oob_perfcntr: Number of active oob_perfcntr requests */ + u32 num_oob_perfcntr; + /** @acd_debug_val: DVM value to calibrate ACD for a level */ + u32 acd_debug_val; + /** @gdsc_nb: Notifier block for cx gdsc regulator */ + struct notifier_block gdsc_nb; + /** @gdsc_gate: Completion to signal cx gdsc collapse status */ + struct completion gdsc_gate; + /** @stats_enable: GMU stats feature enable */ + bool stats_enable; + /** @stats_mask: GMU performance countables to enable */ + u32 stats_mask; + /** @stats_interval: GMU performance counters sampling interval */ + u32 stats_interval; + /** @stats_kobj: kernel object for GMU stats directory in sysfs */ + struct kobject stats_kobj; + /** @cp_init_hdr: raw command header for cp_init */ + u32 cp_init_hdr; + /** @switch_to_unsec_hdr: raw command header for switch to unsecure packet */ + u32 switch_to_unsec_hdr; +}; + +/* Helper function to get to gen8 gmu device from adreno device */ +struct gen8_gmu_device *to_gen8_gmu(struct adreno_device *adreno_dev); + +/* Helper function to get to adreno device from gen8 gmu device */ +struct adreno_device *gen8_gmu_to_adreno(struct gen8_gmu_device *gmu); + +/** + * gen8_reserve_gmu_kernel_block() - Allocate a global gmu buffer + * @gmu: Pointer to the gen8 gmu device + * @addr: Desired gmu virtual address + * @size: Size of the buffer in bytes + * @vma_id: Target gmu vma where this buffer should be mapped + * @align: Alignment for the GMU VA and GMU mapping size + * + * This function allocates a global gmu buffer and maps it in + * the desired gmu vma + * + * Return: Pointer to the memory descriptor or error pointer on failure + */ +struct kgsl_memdesc *gen8_reserve_gmu_kernel_block(struct gen8_gmu_device *gmu, + u32 addr, u32 size, u32 vma_id, u32 align); + +/** + * gen8_reserve_gmu_kernel_block_fixed() - Maps phyical resource address to gmu + * @gmu: Pointer to the gen8 gmu device + * @addr: Desired gmu virtual address + * @size: Size of the buffer in bytes + * @vma_id: Target gmu vma where this buffer should be mapped + * @resource: Name of the resource to get the size and address to allocate + * @attrs: Attributes for the mapping + * @align: Alignment for the GMU VA and GMU mapping size + * + * This function maps the physcial resource address to desired gmu vma + * + * Return: Pointer to the memory descriptor or error pointer on failure + */ +struct kgsl_memdesc *gen8_reserve_gmu_kernel_block_fixed(struct gen8_gmu_device *gmu, + u32 addr, u32 size, u32 vma_id, const char *resource, int attrs, u32 align); + +/** + * gen8_alloc_gmu_kernel_block() - Allocate a gmu buffer + * @gmu: Pointer to the gen8 gmu device + * @md: Pointer to the memdesc + * @size: Size of the buffer in bytes + * @vma_id: Target gmu vma where this buffer should be mapped + * @attrs: Attributes for the mapping + * + * This function allocates a buffer and maps it in the desired gmu vma + * + * Return: 0 on success or error code on failure + */ +int gen8_alloc_gmu_kernel_block(struct gen8_gmu_device *gmu, + struct kgsl_memdesc *md, u32 size, u32 vma_id, int attrs); + +/** + * gen8_gmu_import_buffer() - Import a gmu buffer + * @gmu: Pointer to the gen8 gmu device + * @vma_id: Target gmu vma where this buffer should be mapped + * @md: Pointer to the memdesc to be mapped + * @attrs: Attributes for the mapping + * @align: Alignment for the GMU VA and GMU mapping size + * + * This function imports and maps a buffer to a gmu vma + * + * Return: 0 on success or error code on failure + */ +int gen8_gmu_import_buffer(struct gen8_gmu_device *gmu, u32 vma_id, + struct kgsl_memdesc *md, u32 attrs, u32 align); + +/** + * gen8_free_gmu_block() - Free a gmu buffer + * @gmu: Pointer to the gen8 gmu device + * @md: Pointer to the memdesc that is to be freed + * + * This function frees a gmu block allocated by gen8_reserve_gmu_kernel_block() + */ +void gen8_free_gmu_block(struct gen8_gmu_device *gmu, struct kgsl_memdesc *md); + +/** + * gen8_build_rpmh_tables - Build the rpmh tables + * @adreno_dev: Pointer to the adreno device + * + * This function creates the gpu dcvs and bw tables + * + * Return: 0 on success and negative error on failure + */ +int gen8_build_rpmh_tables(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_gx_is_on - Check if GX is on + * @adreno_dev: Pointer to the adreno device + * + * This function reads pwr status registers to check if GX + * is on or off + */ +bool gen8_gmu_gx_is_on(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_device_probe - GEN8 GMU snapshot function + * @pdev: Pointer to the platform device + * @chipid: Chipid of the target + * @gpucore: Pointer to the gpucore + * + * The target specific probe function for gmu based gen8 targets. + */ +int gen8_gmu_device_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore); + +/** + * gen8_gmu_reset - Reset and restart the gmu + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_reset(struct adreno_device *adreno_dev); + +/** + * gen8_enable_gpu_irq - Enable gpu interrupt + * @adreno_dev: Pointer to the adreno device + */ +void gen8_enable_gpu_irq(struct adreno_device *adreno_dev); + +/** + * gen8_disable_gpu_irq - Disable gpu interrupt + * @adreno_dev: Pointer to the adreno device + */ +void gen8_disable_gpu_irq(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_snapshot- Take snapshot for gmu targets + * @adreno_dev: Pointer to the adreno device + * @snapshot: Pointer to the snapshot structure + * + * Send an NMI to gmu if we hit a gmu fault. Then take gmu + * snapshot and carry on with rest of the gen8 snapshot + */ +void gen8_gmu_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + +/** + * gen8_gmu_probe - Probe gen8 gmu resources + * @device: Pointer to the kgsl device + * @pdev: Pointer to the gmu platform device + * + * Probe the gmu and hfi resources + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_probe(struct kgsl_device *device, + struct platform_device *pdev); + +/** + * gen8_gmu_parse_fw - Parse the gmu fw binary + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_parse_fw(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_memory_init - Allocate gmu memory + * @adreno_dev: Pointer to the adreno device + * + * Allocates the gmu log buffer and others if ndeeded. + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_memory_init(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_aop_send_acd_state - Enable or disable acd feature in aop + * @gmu: Pointer to the gen8 gmu device + * @flag: Boolean to enable or disable acd in aop + * + * This function enables or disables gpu acd feature using mailbox + */ +void gen8_gmu_aop_send_acd_state(struct gen8_gmu_device *gmu, bool flag); + +/** + * gen8_gmu_enable_clocks - Enable gmu clocks + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_enable_gdsc(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_load_fw - Load gmu firmware + * @adreno_dev: Pointer to the adreno device + * + * Loads the gmu firmware binary into TCMs and memory + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_load_fw(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_device_start - Bring gmu out of reset + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_device_start(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_hfi_start - Indicate hfi start to gmu + * @device: Pointer to the kgsl device + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_hfi_start(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_itcm_shadow - Create itcm shadow copy for snapshot + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_itcm_shadow(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_register_config - gmu register configuration + * @adreno_dev: Pointer to the adreno device + * + * Program gmu regsiters based on features + */ +void gen8_gmu_register_config(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_version_info - Get gmu firmware version + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_version_info(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_irq_enable - Enable gmu interrupts + * @adreno_dev: Pointer to the adreno device + */ +void gen8_gmu_irq_enable(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_irq_disable - Disaable gmu interrupts + * @adreno_dev: Pointer to the adreno device + */ +void gen8_gmu_irq_disable(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_suspend - Hard reset the gpu and gmu + * @adreno_dev: Pointer to the adreno device + * + * In case we hit a gmu fault, hard reset the gpu and gmu + * to recover from the fault + */ +void gen8_gmu_suspend(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_oob_set - send gmu oob request + * @device: Pointer to the kgsl device + * @req: Type of oob request as defined in enum oob_request + * + * Request gmu to keep gpu powered up till the oob is cleared + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_oob_set(struct kgsl_device *device, enum oob_request oob); + +/** + * gen8_gmu_oob_clear - clear an asserted oob request + * @device: Pointer to the kgsl device + * @req: Type of oob request as defined in enum oob_request + * + * Clear a previously requested oob so that gmu can power + * collapse the gpu + */ +void gen8_gmu_oob_clear(struct kgsl_device *device, enum oob_request oob); + +/** + * gen8_gmu_wait_for_lowest_idle - wait for gmu to complete ifpc + * @adreno_dev: Pointer to the adreno device + * + * If ifpc is enabled, wait for gmu to put gpu into ifpc. + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_wait_for_idle - Wait for gmu to become idle + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_wait_for_idle(struct adreno_device *adreno_dev); + +/** + * gen8_rscc_sleep_sequence - Trigger rscc sleep sequence + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_rscc_sleep_sequence(struct adreno_device *adreno_dev); + +/** + * gen8_rscc_wakeup_sequence - Trigger rscc wakeup sequence + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_rscc_wakeup_sequence(struct adreno_device *adreno_dev); + +/** + * gen8_halt_gbif - Halt CX and GX requests in GBIF + * @adreno_dev: Pointer to the adreno device + * + * Clear any pending GX or CX transactions in GBIF and + * deassert GBIF halt + * + * Return: 0 on success or negative error on failure + */ +int gen8_halt_gbif(struct adreno_device *adreno_dev); + +/** + * gen8_load_pdc_ucode - Load and enable pdc sequence + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_load_pdc_ucode(struct adreno_device *adreno_dev); + +/** + * gen8_load_rsc_ucode - Load rscc sequence + * @adreno_dev: Pointer to the adreno device + */ +void gen8_load_rsc_ucode(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_remove - Clean up gmu probed resources + * @device: Pointer to the kgsl device + */ +void gen8_gmu_remove(struct kgsl_device *device); + +/** + * gen8_gmu_enable_clks - Enable gmu clocks + * @adreno_dev: Pointer to the adreno device + * @level: GMU frequency level + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level); + +/** + * gen8_gmu_enable_gdsc - Enable gmu gdsc + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_gmu_enable_gdsc(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_disable_gdsc - Disable gmu gdsc + * @adreno_dev: Pointer to the adreno device + */ +void gen8_gmu_disable_gdsc(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_handle_watchdog - Handle watchdog interrupt + * @adreno_dev: Pointer to the adreno device + */ +void gen8_gmu_handle_watchdog(struct adreno_device *adreno_dev); + +/** + * gen8_gmu_send_nmi - Send NMI to GMU + * @device: Pointer to the kgsl device + * @force: Boolean to forcefully send NMI irrespective of GMU state + */ +void gen8_gmu_send_nmi(struct kgsl_device *device, bool force); + +/** + * gen8_gmu_add_to_minidump - Register gen8_device with va minidump + * @adreno_dev: Pointer to the adreno device + */ +int gen8_gmu_add_to_minidump(struct adreno_device *adreno_dev); + +/** + * gen8_snapshot_gmu_mem - Snapshot a GMU memory descriptor + * @device: Pointer to the kgsl device + * @buf: Destination snapshot buffer + * @remain: Remaining size of the snapshot buffer + * @priv: Opaque handle + * + * Return: Number of bytes written to snapshot buffer + */ +size_t gen8_snapshot_gmu_mem(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv); + +/** + * gen8_bus_ab_quantize - Calculate the AB vote that needs to be sent to GMU + * @adreno_dev: Handle to the adreno device + * @ab: ab request that needs to be scaled in MBps + * + * Returns the AB value that needs to be prefixed to bandwidth vote in kbps + */ +u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab); + +#endif diff --git a/adreno_gen8_hfi.c b/adreno_gen8_hfi.c new file mode 100644 index 0000000000..b289ad076a --- /dev/null +++ b/adreno_gen8_hfi.c @@ -0,0 +1,717 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include +#include + +#include "adreno.h" +#include "adreno_gen8.h" +#include "adreno_gen8_hfi.h" +#include "kgsl_device.h" +#include "kgsl_trace.h" + +/* Below section is for all structures related to HFI queues */ +#define HFI_QUEUE_MAX HFI_QUEUE_DEFAULT_CNT + +/* Total header sizes + queue sizes + 16 for alignment */ +#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \ + (HFI_QUEUE_SIZE * HFI_QUEUE_MAX)) + +#define HOST_QUEUE_START_ADDR(hfi_mem, i) \ + ((hfi_mem)->hostptr + HFI_QUEUE_OFFSET(i)) + +struct gen8_hfi *to_gen8_hfi(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + return &gmu->hfi; +} + +/* Size in below functions are in unit of dwords */ +int gen8_hfi_queue_read(struct gen8_gmu_device *gmu, u32 queue_idx, + u32 *output, u32 max_size) +{ + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + u32 *queue; + u32 msg_hdr; + u32 i, read; + u32 size; + int result = 0; + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + return -EINVAL; + + if (hdr->read_index == hdr->write_index) + return -ENODATA; + + /* Clear the output data before populating */ + memset(output, 0, max_size); + + queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx); + msg_hdr = queue[hdr->read_index]; + size = MSG_HDR_GET_SIZE(msg_hdr); + + if (size > (max_size >> 2)) { + dev_err(&gmu->pdev->dev, + "HFI message too big: hdr:0x%x rd idx=%d\n", + msg_hdr, hdr->read_index); + result = -EMSGSIZE; + goto done; + } + + read = hdr->read_index; + + if (read < hdr->queue_size) { + for (i = 0; i < size && i < (max_size >> 2); i++) { + output[i] = queue[read]; + read = (read + 1)%hdr->queue_size; + } + result = size; + } else { + /* In case FW messed up */ + dev_err(&gmu->pdev->dev, + "Read index %d greater than queue size %d\n", + hdr->read_index, hdr->queue_size); + result = -ENODATA; + } + + read = ALIGN(read, SZ_4) % hdr->queue_size; + + hfi_update_read_idx(hdr, read); + + /* For acks, trace the packet for which this ack was sent */ + if (MSG_HDR_GET_TYPE(msg_hdr) == HFI_MSG_ACK) + trace_kgsl_hfi_receive(MSG_HDR_GET_ID(output[1]), + MSG_HDR_GET_SIZE(output[1]), + MSG_HDR_GET_SEQNUM(output[1])); + else + trace_kgsl_hfi_receive(MSG_HDR_GET_ID(msg_hdr), + MSG_HDR_GET_SIZE(msg_hdr), MSG_HDR_GET_SEQNUM(msg_hdr)); + +done: + return result; +} + +int gen8_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx, + u32 *msg, u32 size_bytes) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + u32 *queue; + u32 i, write_idx, read_idx, empty_space; + u32 size_dwords = size_bytes >> 2; + u32 align_size = ALIGN(size_dwords, SZ_4); + u32 id = MSG_HDR_GET_ID(*msg); + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED || !IS_ALIGNED(size_bytes, sizeof(u32))) + return -EINVAL; + + queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx); + + trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg)); + + write_idx = hdr->write_index; + read_idx = hdr->read_index; + + empty_space = (write_idx >= read_idx) ? + (hdr->queue_size - (write_idx - read_idx)) + : (read_idx - write_idx); + + if (empty_space <= align_size) + return -ENOSPC; + + *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); + + for (i = 0; i < size_dwords; i++) { + queue[write_idx] = msg[i]; + write_idx = (write_idx + 1) % hdr->queue_size; + } + + /* Cookify any non used data at the end of the write buffer */ + for (; i < align_size; i++) { + queue[write_idx] = 0xfafafafa; + write_idx = (write_idx + 1) % hdr->queue_size; + } + + hfi_update_write_idx(&hdr->write_index, write_idx); + + return 0; +} + +int gen8_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg, u32 size_bytes) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hfi *hfi = &gmu->hfi; + int ret; + + spin_lock(&hfi->cmdq_lock); + + if (test_bit(MSG_HDR_GET_ID(msg[0]), hfi->wb_set_record_bitmask)) + *msg = RECORD_MSG_HDR(*msg); + + ret = gen8_hfi_queue_write(adreno_dev, HFI_CMD_ID, msg, size_bytes); + + /* + * Some messages like ACD table and perf table are saved in memory, so we need + * to reset the header to make sure we do not send a record enabled bit incase + * we change the warmboot setting from debugfs + */ + *msg = CLEAR_RECORD_MSG_HDR(*msg); + /* + * Memory barrier to make sure packet and write index are written before + * an interrupt is raised + */ + wmb(); + + /* Send interrupt to GMU to receive the message */ + if (!ret) + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), + GEN8_GMUCX_HOST2GMU_INTR_SET, 0x1); + + spin_unlock(&hfi->cmdq_lock); + + return ret; +} + +/* Sizes of the queue and message are in unit of dwords */ +static void init_queues(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + int i; + struct hfi_queue_table *tbl; + struct hfi_queue_header *hdr; + struct { + u32 idx; + u32 pri; + u32 status; + } queue[HFI_QUEUE_MAX] = { + { HFI_CMD_ID, HFI_CMD_PRI, HFI_QUEUE_STATUS_ENABLED }, + { HFI_MSG_ID, HFI_MSG_PRI, HFI_QUEUE_STATUS_ENABLED }, + { HFI_DBG_ID, HFI_DBG_PRI, HFI_QUEUE_STATUS_ENABLED }, + }; + + /* Fill Table Header */ + tbl = mem_addr->hostptr; + tbl->qtbl_hdr.version = 0; + tbl->qtbl_hdr.size = sizeof(struct hfi_queue_table) >> 2; + tbl->qtbl_hdr.qhdr0_offset = sizeof(struct hfi_queue_table_header) >> 2; + tbl->qtbl_hdr.qhdr_size = sizeof(struct hfi_queue_header) >> 2; + tbl->qtbl_hdr.num_q = HFI_QUEUE_MAX; + tbl->qtbl_hdr.num_active_q = HFI_QUEUE_MAX; + + memset(&tbl->qhdr[0], 0, sizeof(tbl->qhdr)); + + /* Fill Individual Queue Headers */ + for (i = 0; i < HFI_QUEUE_MAX; i++) { + hdr = &tbl->qhdr[i]; + hdr->start_addr = GMU_QUEUE_START_ADDR(mem_addr->gmuaddr, i); + hdr->type = QUEUE_HDR_TYPE(queue[i].idx, queue[i].pri, 0, 0); + hdr->status = queue[i].status; + hdr->queue_size = HFI_QUEUE_SIZE >> 2; /* convert to dwords */ + } +} + +int gen8_hfi_init(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hfi *hfi = &gmu->hfi; + + /* Allocates & maps memory for HFI */ + if (IS_ERR_OR_NULL(hfi->hfi_mem)) { + hfi->hfi_mem = gen8_reserve_gmu_kernel_block(gmu, 0, + HFIMEM_SIZE, GMU_NONCACHED_KERNEL, 0); + if (!IS_ERR(hfi->hfi_mem)) + init_queues(adreno_dev); + } + + return PTR_ERR_OR_ZERO(hfi->hfi_mem); +} + +int gen8_receive_ack_cmd(struct gen8_gmu_device *gmu, void *rcvd, + struct pending_cmd *ret_cmd) +{ + struct adreno_device *adreno_dev = gen8_gmu_to_adreno(gmu); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 *ack = rcvd; + u32 hdr = ack[0]; + u32 req_hdr = ack[1]; + + if (ret_cmd == NULL) + return -EINVAL; + + if (HDR_CMP_SEQNUM(ret_cmd->sent_hdr, req_hdr)) { + memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2); + return 0; + } + + /* Didn't find the sender, list the waiter */ + dev_err_ratelimited(&gmu->pdev->dev, + "HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n", + req_hdr, ret_cmd->sent_hdr); + + gmu_core_fault_snapshot(device); + + return -ENODEV; +} + +static int poll_gmu_reg(struct adreno_device *adreno_dev, + u32 offsetdwords, u32 expected_val, + u32 mask, u32 timeout_ms) +{ + u32 val; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms); + bool nmi = false; + + while (time_is_after_jiffies(timeout)) { + gmu_core_regread(device, offsetdwords, &val); + if ((val & mask) == expected_val) + return 0; + + /* + * If GMU firmware fails any assertion, error message is sent + * to KMD and NMI is triggered. So check if GMU is in NMI and + * timeout early. Bits [11:9] of A6XX_GMU_CM3_FW_INIT_RESULT + * contain GMU reset status. Non zero value here indicates that + * GMU reset is active, NMI handler would eventually complete + * and GMU would wait for recovery. + */ + gmu_core_regread(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, &val); + if (val & 0xE00) { + nmi = true; + break; + } + + usleep_range(10, 100); + } + + /* Check one last time */ + gmu_core_regread(device, offsetdwords, &val); + if ((val & mask) == expected_val) + return 0; + + dev_err(&gmu->pdev->dev, + "Reg poll %s: offset 0x%x, want 0x%x, got 0x%x\n", + nmi ? "abort" : "timeout", offsetdwords, expected_val, + val & mask); + + return -ETIMEDOUT; +} + +static int gen8_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, + void *data, u32 size_bytes, struct pending_cmd *ret_cmd) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int rc; + u32 *cmd = data; + struct gen8_hfi *hfi = &gmu->hfi; + u32 seqnum = atomic_inc_return(&hfi->seqnum); + + *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + if (ret_cmd == NULL) + return gen8_hfi_cmdq_write(adreno_dev, cmd, size_bytes); + + ret_cmd->sent_hdr = cmd[0]; + + rc = gen8_hfi_cmdq_write(adreno_dev, cmd, size_bytes); + if (rc) + return rc; + + rc = poll_gmu_reg(adreno_dev, GEN8_GMUCX_GMU2HOST_INTR_INFO, + HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT); + + if (rc) { + gmu_core_fault_snapshot(device); + dev_err(&gmu->pdev->dev, + "Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n", + cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd)); + return rc; + } + + /* Clear the interrupt */ + gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, + HFI_IRQ_MSGQ_MASK); + + rc = gen8_hfi_process_queue(gmu, HFI_MSG_ID, ret_cmd); + + return rc; +} + +int gen8_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 size_bytes) +{ + struct pending_cmd ret_cmd; + int rc; + + memset(&ret_cmd, 0, sizeof(ret_cmd)); + + rc = gen8_hfi_send_cmd_wait_inline(adreno_dev, cmd, size_bytes, &ret_cmd); + if (rc) + return rc; + + if (ret_cmd.results[2]) { + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gmu_core_fault_snapshot(device); + dev_err(&gmu->pdev->dev, + "HFI ACK failure: Req=0x%8.8X, Result=0x%8.8X\n", + ret_cmd.results[1], + ret_cmd.results[2]); + return -EINVAL; + } + + return 0; +} + +int gen8_hfi_send_core_fw_start(struct adreno_device *adreno_dev) +{ + struct hfi_core_fw_start_cmd cmd = { + .handle = 0x0, + }; + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_CORE_FW_START); + if (ret) + return ret; + + return gen8_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); +} + +static const char *feature_to_string(u32 feature) +{ + if (feature == HFI_FEATURE_ACD) + return "ACD"; + else if (feature == HFI_FEATURE_LM) + return "LM"; + + return "unknown"; +} + +/* For sending hfi message inline to handle GMU return type error */ +int gen8_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd, + struct pending_cmd *ret_cmd, u32 size_bytes) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int rc; + + if (GMU_VER_MINOR(gmu->ver.hfi) <= 4) + return gen8_hfi_send_generic_req(adreno_dev, cmd, size_bytes); + + rc = gen8_hfi_send_cmd_wait_inline(adreno_dev, cmd, size_bytes, ret_cmd); + if (rc) + return rc; + + switch (ret_cmd->results[3]) { + case GMU_SUCCESS: + rc = ret_cmd->results[2]; + break; + case GMU_ERROR_NO_ENTRY: + /* Unique error to handle undefined HFI msgs by caller */ + rc = -ENOENT; + break; + case GMU_ERROR_TIMEOUT: + rc = -EINVAL; + break; + default: + gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev)); + dev_err(&gmu->pdev->dev, + "HFI ACK: Req=0x%8.8X, Result=0x%8.8X Error:0x%8.8X\n", + ret_cmd->results[1], ret_cmd->results[2], ret_cmd->results[3]); + rc = -EINVAL; + break; + } + + return rc; +} + +int gen8_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, + u32 feature, u32 enable, u32 data) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct pending_cmd ret_cmd = {0}; + struct hfi_feature_ctrl_cmd cmd = { + .feature = feature, + .enable = enable, + .data = data, + }; + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_FEATURE_CTRL); + if (ret) + return ret; + + ret = gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd)); + if (ret < 0) + dev_err(&gmu->pdev->dev, + "Unable to %s feature %s (%d)\n", + enable ? "enable" : "disable", + feature_to_string(feature), + feature); + return ret; +} + +int gen8_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subtype) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct pending_cmd ret_cmd = {0}; + struct hfi_get_value_cmd cmd = { + .type = type, + .subtype = subtype, + }; + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE); + if (ret) + return ret; + + ret = gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd)); + if (ret < 0) + dev_err(&gmu->pdev->dev, + "Unable to get HFI Value type: %d, subtype: %d, error = %d\n", + type, subtype, ret); + + return ret; +} + +int gen8_hfi_send_set_value(struct adreno_device *adreno_dev, + u32 type, u32 subtype, u32 data) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct pending_cmd ret_cmd = {0}; + struct hfi_set_value_cmd cmd = { + .type = type, + .subtype = subtype, + .data = data, + }; + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_SET_VALUE); + if (ret) + return ret; + + ret = gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd)); + if (ret < 0) + dev_err(&gmu->pdev->dev, + "Unable to set HFI Value %d, %d to %d, error = %d\n", + type, subtype, data, ret); + return ret; +} + +void adreno_gen8_receive_err_req(struct gen8_gmu_device *gmu, void *rcvd) +{ + struct hfi_err_cmd *cmd = rcvd; + + dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n", + ((cmd->error_code >> 16) & 0xffff), + (cmd->error_code & 0xffff), + (char *) cmd->data); +} + +void adreno_gen8_receive_debug_req(struct gen8_gmu_device *gmu, void *rcvd) +{ + struct hfi_debug_cmd *cmd = rcvd; + + dev_dbg(&gmu->pdev->dev, "HFI Debug Received: %d %d %d\n", + cmd->type, cmd->timestamp, cmd->data); +} + +int gen8_hfi_process_queue(struct gen8_gmu_device *gmu, + u32 queue_idx, struct pending_cmd *ret_cmd) +{ + u32 rcvd[MAX_RCVD_SIZE]; + + while (gen8_hfi_queue_read(gmu, queue_idx, rcvd, sizeof(rcvd)) > 0) { + /* ACK Handler */ + if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { + int ret = gen8_receive_ack_cmd(gmu, rcvd, ret_cmd); + + if (ret) + return ret; + continue; + } + + /* Request Handler */ + switch (MSG_HDR_GET_ID(rcvd[0])) { + case F2H_MSG_ERR: /* No Reply */ + adreno_gen8_receive_err_req(gmu, rcvd); + break; + case F2H_MSG_DEBUG: /* No Reply */ + adreno_gen8_receive_debug_req(gmu, rcvd); + break; + default: /* No Reply */ + dev_err(&gmu->pdev->dev, + "HFI request %d not supported\n", + MSG_HDR_GET_ID(rcvd[0])); + break; + } + } + + return 0; +} + +int gen8_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev) +{ + if (!adreno_dev->bcl_enabled) + return 0; + + /* + * BCL data is expected by gmu in below format + * BIT[0] - response type + * BIT[1:7] - Throttle level 1 (optional) + * BIT[8:14] - Throttle level 2 (optional) + * BIT[15:21] - Throttle level 3 (optional) + */ + return gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_BCL, 1, adreno_dev->bcl_data); +} + +#define EVENT_PWR_ACD_THROTTLE_PROF 44 + +int gen8_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret = 0; + + if (adreno_dev->acd_enabled) { + ret = gen8_hfi_send_feature_ctrl(adreno_dev, + HFI_FEATURE_ACD, 1, 0); + if (ret) + return ret; + + ret = gen8_hfi_send_generic_req(adreno_dev, + &gmu->hfi.acd_table, sizeof(gmu->hfi.acd_table)); + if (ret) + return ret; + + gen8_hfi_send_set_value(adreno_dev, HFI_VALUE_LOG_EVENT_ON, + EVENT_PWR_ACD_THROTTLE_PROF, 0); + } + + return 0; +} + +int gen8_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + if (gmu->idle_level == GPU_HW_IFPC) + return gen8_hfi_send_feature_ctrl(adreno_dev, + HFI_FEATURE_IFPC, 1, adreno_dev->ifpc_hyst); + return 0; +} + +static void reset_hfi_queues(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr; + u32 i; + + /* Flush HFI queues */ + for (i = 0; i < HFI_QUEUE_MAX; i++) { + hdr = &tbl->qhdr[i]; + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + continue; + + hdr->read_index = hdr->write_index; + } +} + +int gen8_hfi_start(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int result; + + reset_hfi_queues(adreno_dev); + + result = gen8_hfi_send_generic_req(adreno_dev, &gmu->hfi.dcvs_table, + sizeof(gmu->hfi.dcvs_table)); + if (result) + goto err; + + result = gen8_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table, + sizeof(gmu->hfi.bw_table)); + if (result) + goto err; + + result = gen8_hfi_send_acd_feature_ctrl(adreno_dev); + if (result) + goto err; + + result = gen8_hfi_send_bcl_feature_ctrl(adreno_dev); + if (result) + goto err; + + result = gen8_hfi_send_ifpc_feature_ctrl(adreno_dev); + if (result) + goto err; + + result = gen8_hfi_send_core_fw_start(adreno_dev); + if (result) + goto err; + + set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); + + /* Request default DCVS level */ + result = kgsl_pwrctrl_set_default_gpu_pwrlevel(device); + if (result) + goto err; + + /* Request default BW vote */ + result = kgsl_pwrctrl_axi(device, true); + +err: + if (result) + gen8_hfi_stop(adreno_dev); + + return result; + +} + +void gen8_hfi_stop(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_pwrctrl_axi(device, false); + + clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); +} + +/* HFI interrupt handler */ +irqreturn_t gen8_hfi_irq_handler(int irq, void *data) +{ + struct kgsl_device *device = data; + struct gen8_gmu_device *gmu = to_gen8_gmu(ADRENO_DEVICE(device)); + u32 status = 0; + + gmu_core_regread(device, GEN8_GMUCX_GMU2HOST_INTR_INFO, &status); + gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, HFI_IRQ_MASK); + + if (status & HFI_IRQ_DBGQ_MASK) + gen8_hfi_process_queue(gmu, HFI_DBG_ID, NULL); + if (status & HFI_IRQ_CM3_FAULT_MASK) { + dev_err_ratelimited(&gmu->pdev->dev, + "GMU CM3 fault interrupt received\n"); + atomic_set(&gmu->cm3_fault, 1); + + /* make sure other CPUs see the update */ + smp_wmb(); + } + if (status & ~HFI_IRQ_MASK) + dev_err_ratelimited(&gmu->pdev->dev, + "Unhandled HFI interrupts 0x%lx\n", + status & ~HFI_IRQ_MASK); + + return IRQ_HANDLED; +} diff --git a/adreno_gen8_hfi.h b/adreno_gen8_hfi.h new file mode 100644 index 0000000000..b4387c3739 --- /dev/null +++ b/adreno_gen8_hfi.h @@ -0,0 +1,221 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef __ADRENO_GEN8_HFI_H +#define __ADRENO_GEN8_HFI_H + +#include "adreno_hfi.h" + +/** + * struct gen8_hfi - HFI control structure + */ +struct gen8_hfi { + /** @irq: HFI interrupt line */ + int irq; + /** @seqnum: atomic counter that is incremented for each message sent. + * The value of the counter is used as sequence number for HFI message. + */ + atomic_t seqnum; + /** @hfi_mem: Memory descriptor for the hfi memory */ + struct kgsl_memdesc *hfi_mem; + /** @bw_table: HFI BW table buffer */ + struct hfi_bwtable_cmd bw_table; + /** @acd_table: HFI table for ACD data */ + struct hfi_acd_table_cmd acd_table; + /** @dcvs_table: HFI table for gpu dcvs levels */ + struct hfi_dcvstable_cmd dcvs_table; + /** @cmdq_lock: Spinlock for accessing the cmdq */ + spinlock_t cmdq_lock; + /** + * @wb_set_record_bitmask: Bitmask to enable or disable the recording + * of messages in the GMU scratch. + */ + unsigned long wb_set_record_bitmask[BITS_TO_LONGS(HFI_MAX_ID)]; +}; + +struct gen8_gmu_device; + +/* gen8_hfi_irq_handler - IRQ handler for HFI interripts */ +irqreturn_t gen8_hfi_irq_handler(int irq, void *data); + +/** + * gen8_hfi_start - Send the various HFIs during device boot up + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_start(struct adreno_device *adreno_dev); + +/** + * gen8_hfi_start - Send the various HFIs during device boot up + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +void gen8_hfi_stop(struct adreno_device *adreno_dev); + +/** + * gen8_hfi_init - Initialize hfi resources + * @adreno_dev: Pointer to the adreno device + * + * This function allocates and sets up hfi queues + * when a process creates the very first kgsl instance + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_init(struct adreno_device *adreno_dev); + +/* Helper function to get to gen8 hfi struct from adreno device */ +struct gen8_hfi *to_gen8_hfi(struct adreno_device *adreno_dev); + +/** + * gen8_hfi_queue_write - Write a command to hfi queue + * @adreno_dev: Pointer to the adreno device + * @queue_idx: destination queue id + * @msg: Data to be written to the queue + * @size_bytes: Size of the command in bytes + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx, + u32 *msg, u32 size_bytes); + +/** + * gen8_hfi_queue_read - Read data from hfi queue + * @gmu: Pointer to the gen8 gmu device + * @queue_idx: queue id to read from + * @output: Pointer to read the data into + * @max_size: Number of bytes to read from the queue + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_queue_read(struct gen8_gmu_device *gmu, u32 queue_idx, + u32 *output, u32 max_size); + +/** + * gen8_receive_ack_cmd - Process ack type packets + * @gmu: Pointer to the gen8 gmu device + * @rcvd: Pointer to the data read from hfi queue + * @ret_cmd: Container for the hfi packet for which this ack is received + * + * Return: 0 on success or negative error on failure + */ +int gen8_receive_ack_cmd(struct gen8_gmu_device *gmu, void *rcvd, + struct pending_cmd *ret_cmd); + +/** + * gen8_hfi_send_feature_ctrl - Enable gmu feature via hfi + * @adreno_dev: Pointer to the adreno device + * @feature: feature to be enabled or disabled + * enable: Set 1 to enable or 0 to disable a feature + * @data: payload for the send feature hfi packet + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, + u32 feature, u32 enable, u32 data); + +/** + * gen8_hfi_send_get_value - Send gmu get_values via hfi + * @adreno_dev: Pointer to the adreno device + * @type: GMU get_value type + * @subtype: GMU get_value subtype + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subtype); + +/** + * gen8_hfi_send_set_value - Send gmu set_values via hfi + * @adreno_dev: Pointer to the adreno device + * @type: GMU set_value type + * @subtype: GMU set_value subtype + * @data: Value to set + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_send_set_value(struct adreno_device *adreno_dev, + u32 type, u32 subtype, u32 data); + +/** + * gen8_hfi_send_core_fw_start - Send the core fw start hfi + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_send_core_fw_start(struct adreno_device *adreno_dev); + +/** + * gen8_hfi_send_acd_feature_ctrl - Send the acd table and acd feature + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev); + +/** + * gen8_hfi_send_generic_req - Send a generic hfi packet + * @adreno_dev: Pointer to the adreno device + * @cmd: Pointer to the hfi packet header and data + * @size_bytes: Size of the packet in bytes + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 size_bytes); + +/** + * gen8_hfi_send_generic_req_v5 - Send a generic hfi packet with additional error handling + * @adreno_dev: Pointer to the adreno device + * @cmd: Pointer to the hfi packet header and data + * @ret_cmd: Ack for the command we just sent + * @size_bytes: Size of the packet in bytes + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd, + struct pending_cmd *ret_cmd, u32 size_bytes); + +/** + * gen8_hfi_send_bcl_feature_ctrl - Send the bcl feature hfi packet + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev); + +/** + * gen8_hfi_send_ifpc_feature_ctrl - Send the ipfc feature hfi packet + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev); + +/* + * gen8_hfi_process_queue - Check hfi queue for messages from gmu + * @gmu: Pointer to the gen8 gmu device + * @queue_idx: queue id to be processed + * @ret_cmd: Container for data needed for waiting for the ack + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_process_queue(struct gen8_gmu_device *gmu, + u32 queue_idx, struct pending_cmd *ret_cmd); + +/** + * gen8_hfi_cmdq_write - Write a command to command queue + * @adreno_dev: Pointer to the adreno device + * @msg: Data to be written to the queue + * @size_bytes: Size of the command in bytes + * + * This function takes the cmdq lock before writing data to the queue + + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg, u32 size_bytes); +void adreno_gen8_receive_err_req(struct gen8_gmu_device *gmu, void *rcvd); +void adreno_gen8_receive_debug_req(struct gen8_gmu_device *gmu, void *rcvd); +#endif diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c new file mode 100644 index 0000000000..5401cc3e53 --- /dev/null +++ b/adreno_gen8_hwsched.c @@ -0,0 +1,1468 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_gen8.h" +#include "adreno_gen8_hwsched.h" +#include "kgsl_bus.h" +#include "kgsl_device.h" +#include "kgsl_trace.h" + +static void _wakeup_hw_fence_waiters(struct adreno_device *adreno_dev, u32 fault) +{ + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + bool lock = !in_interrupt(); + + if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags)) + return; + + /* + * We could be in interrupt context here, which means we need to use spin_lock_irqsave + * (which disables interrupts) everywhere we take this lock. Instead of that, simply + * avoid taking this lock if we are recording a fault from an interrupt handler. + */ + if (lock) + spin_lock(&hfi->hw_fence.lock); + + clear_bit(GEN8_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags); + + /* Avoid creating new hardware fences until recovery is complete */ + set_bit(GEN8_HWSCHED_HW_FENCE_ABORT_BIT, &hfi->hw_fence.flags); + + if (!lock) { + /* + * This barrier ensures that the above bitops complete before we wake up the waiters + */ + smp_wmb(); + } else { + spin_unlock(&hfi->hw_fence.lock); + } + + wake_up_all(&hfi->hw_fence.unack_wq); + + del_timer_sync(&hfi->hw_fence_timer); +} + +void gen8_hwsched_fault(struct adreno_device *adreno_dev, u32 fault) +{ + /* + * Wake up any threads that may be sleeping waiting for the hardware fence unack count to + * drop to a desired threshold. + */ + _wakeup_hw_fence_waiters(adreno_dev, fault); + + adreno_hwsched_fault(adreno_dev, fault); +} + +static int gmu_clock_set_rate(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret = 0; + + /* Switch to min GMU clock */ + gen8_rdpm_cx_freq_update(gmu, gmu->freqs[0] / 1000); + + ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", + gmu->freqs[0]); + if (ret) + dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n", + gmu->freqs[0], ret); + + trace_kgsl_gmu_pwrlevel(gmu->freqs[0], gmu->freqs[GMU_MAX_PWRLEVELS - 1]); + + return ret; +} + +static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int level, ret = 0; + + kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); + + gen8_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); + + ret = gen8_gmu_enable_gdsc(adreno_dev); + if (ret) + return ret; + + ret = gen8_gmu_enable_clks(adreno_dev, GMU_MAX_PWRLEVELS - 1); + if (ret) + goto gdsc_off; + + ret = gen8_gmu_load_fw(adreno_dev); + if (ret) + goto clks_gdsc_off; + + ret = gen8_gmu_itcm_shadow(adreno_dev); + if (ret) + goto clks_gdsc_off; + + if (!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) { + ret = gen8_load_pdc_ucode(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen8_load_rsc_ucode(adreno_dev); + set_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags); + } + + ret = gen8_scm_gpu_init_cx_regs(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen8_gmu_register_config(adreno_dev); + + ret = gen8_gmu_version_info(adreno_dev); + if (ret) + goto clks_gdsc_off; + + if (GMU_VER_MINOR(gmu->ver.hfi) < 2) + set_bit(ADRENO_HWSCHED_CTX_BAD_LEGACY, &adreno_dev->hwsched.flags); + + gen8_gmu_irq_enable(adreno_dev); + + /* Vote for minimal DDR BW for GMU to init */ + level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min; + + icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); + + ret = gen8_gmu_device_start(adreno_dev); + if (ret) + goto err; + + gen8_get_gpu_feature_info(adreno_dev); + + ret = gen8_hwsched_hfi_start(adreno_dev); + if (ret) + goto err; + + ret = gmu_clock_set_rate(adreno_dev); + if (ret) { + gen8_hwsched_hfi_stop(adreno_dev); + goto err; + } + + if (gen8_hwsched_hfi_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE) == 1) { + adreno_dev->gmu_ab = true; + set_bit(ADRENO_DEVICE_GMU_AB, &adreno_dev->priv); + } + + icc_set_bw(pwr->icc_path, 0, 0); + + device->gmu_fault = false; + + kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE); + + return 0; + +err: + gen8_gmu_irq_disable(adreno_dev); + + if (device->gmu_fault) { + gen8_gmu_suspend(adreno_dev); + + return ret; + } + +clks_gdsc_off: + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + +gdsc_off: + gen8_gmu_disable_gdsc(adreno_dev); + + gen8_rdpm_cx_freq_update(gmu, 0); + + return ret; +} + +static int gen8_hwsched_gmu_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret = 0; + + kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); + + ret = gen8_gmu_enable_gdsc(adreno_dev); + if (ret) + return ret; + + ret = gen8_gmu_enable_clks(adreno_dev, GMU_MAX_PWRLEVELS - 1); + if (ret) + goto gdsc_off; + + ret = gen8_rscc_wakeup_sequence(adreno_dev); + if (ret) + goto clks_gdsc_off; + + ret = gen8_gmu_load_fw(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen8_gmu_register_config(adreno_dev); + + gen8_gmu_irq_enable(adreno_dev); + + ret = gen8_gmu_device_start(adreno_dev); + if (ret) + goto err; + + ret = gen8_hwsched_hfi_start(adreno_dev); + if (ret) + goto err; + + ret = gmu_clock_set_rate(adreno_dev); + if (ret) { + gen8_hwsched_hfi_stop(adreno_dev); + goto err; + } + + device->gmu_fault = false; + + kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE); + + return 0; +err: + gen8_gmu_irq_disable(adreno_dev); + + if (device->gmu_fault) { + gen8_gmu_suspend(adreno_dev); + + return ret; + } + +clks_gdsc_off: + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + +gdsc_off: + gen8_gmu_disable_gdsc(adreno_dev); + + gen8_rdpm_cx_freq_update(gmu, 0); + + return ret; +} + +void gen8_hwsched_active_count_put(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (WARN(atomic_read(&device->active_cnt) == 0, + "Unbalanced get/put calls to KGSL active count\n")) + return; + + if (atomic_dec_and_test(&device->active_cnt)) { + kgsl_pwrscale_update_stats(device); + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + } + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + wake_up(&device->active_cnt_wq); +} + +static int gen8_hwsched_notify_slumber(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct hfi_prep_slumber_cmd req; + int ret; + + ret = CMD_MSG_HDR(req, H2F_MSG_PREPARE_SLUMBER); + if (ret) + return ret; + + req.freq = gmu->hfi.dcvs_table.gpu_level_num - + pwr->default_pwrlevel - 1; + req.bw = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq; + + req.bw |= gen8_bus_ab_quantize(adreno_dev, 0); + /* Disable the power counter so that the GMU is not busy */ + gmu_core_regwrite(device, GEN8_GMUCX_POWER_COUNTER_ENABLE, 0); + + return gen8_hfi_send_cmd_async(adreno_dev, &req, sizeof(req)); + +} +static int gen8_hwsched_gmu_power_off(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret = 0; + + if (device->gmu_fault) + goto error; + + /* Wait for the lowest idle level we requested */ + ret = gen8_gmu_wait_for_lowest_idle(adreno_dev); + if (ret) + goto error; + + ret = gen8_hwsched_notify_slumber(adreno_dev); + if (ret) + goto error; + + ret = gen8_gmu_wait_for_idle(adreno_dev); + if (ret) + goto error; + + ret = gen8_rscc_sleep_sequence(adreno_dev); + + gen8_rdpm_mx_freq_update(gmu, 0); + + /* Now that we are done with GMU and GPU, Clear the GBIF */ + ret = gen8_halt_gbif(adreno_dev); + + gen8_gmu_irq_disable(adreno_dev); + + gen8_hwsched_hfi_stop(adreno_dev); + + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + + gen8_gmu_disable_gdsc(adreno_dev); + + gen8_rdpm_cx_freq_update(gmu, 0); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE); + + return ret; + +error: + gen8_gmu_irq_disable(adreno_dev); + gen8_hwsched_hfi_stop(adreno_dev); + gen8_gmu_suspend(adreno_dev); + + return ret; +} + +static void gen8_hwsched_init_ucode_regs(struct adreno_device *adreno_dev) +{ + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Program the ucode base for CP */ + kgsl_regwrite(device, GEN8_CP_SQE_INSTR_BASE_LO, + lower_32_bits(fw->memdesc->gpuaddr)); + kgsl_regwrite(device, GEN8_CP_SQE_INSTR_BASE_HI, + upper_32_bits(fw->memdesc->gpuaddr)); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) { + fw = ADRENO_FW(adreno_dev, ADRENO_FW_AQE); + + /* Program the ucode base for AQE0 (BV coprocessor) */ + kgsl_regwrite(device, GEN8_CP_AQE_INSTR_BASE_LO_0, + lower_32_bits(fw->memdesc->gpuaddr)); + kgsl_regwrite(device, GEN8_CP_AQE_INSTR_BASE_HI_0, + upper_32_bits(fw->memdesc->gpuaddr)); + + /* Program the ucode base for AQE1 (LPAC coprocessor) */ + if (adreno_dev->lpac_enabled) { + kgsl_regwrite(device, GEN8_CP_AQE_INSTR_BASE_LO_1, + lower_32_bits(fw->memdesc->gpuaddr)); + kgsl_regwrite(device, GEN8_CP_AQE_INSTR_BASE_HI_1, + upper_32_bits(fw->memdesc->gpuaddr)); + } + } +} + +static int gen8_hwsched_gpu_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + + ret = kgsl_mmu_start(device); + if (ret) + goto err; + + ret = gen8_gmu_oob_set(device, oob_gpu); + if (ret) + goto err; + + /* Clear the busy_data stats - we're starting over from scratch */ + memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data)); + + gen8_start(adreno_dev); + + /* Re-initialize the coresight registers if applicable */ + adreno_coresight_start(adreno_dev); + + adreno_perfcounter_start(adreno_dev); + + /* Clear FSR here in case it is set from a previous pagefault */ + kgsl_mmu_clear_fsr(&device->mmu); + + gen8_enable_gpu_irq(adreno_dev); + + gen8_hwsched_init_ucode_regs(adreno_dev); + + ret = gen8_hwsched_boot_gpu(adreno_dev); + if (ret) + goto err; + + /* + * At this point it is safe to assume that we recovered. Setting + * this field allows us to take a new snapshot for the next failure + * if we are prioritizing the first unrecoverable snapshot. + */ + if (device->snapshot) + device->snapshot->recovered = true; + + device->reset_counter++; + + /* + * If warmboot is enabled and we switched a sysfs node, we will do a coldboot + * in the subseqent slumber exit. Once that is done we need to mark this bool + * as false so that in the next run we can do warmboot + */ + clear_bit(ADRENO_DEVICE_FORCE_COLDBOOT, &adreno_dev->priv); +err: + gen8_gmu_oob_clear(device, oob_gpu); + + if (ret) + gen8_hwsched_gmu_power_off(adreno_dev); + + return ret; +} + +static void hwsched_idle_timer(struct timer_list *t) +{ + struct kgsl_device *device = container_of(t, struct kgsl_device, + idle_timer); + + kgsl_schedule_work(&device->idle_check_ws); +} + +static int gen8_gmu_warmboot_init(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret = 0; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_GMU_WARMBOOT)) + return ret; + + if (IS_ERR_OR_NULL(gmu->gmu_init_scratch)) { + gmu->gmu_init_scratch = gen8_reserve_gmu_kernel_block(gmu, 0, + SZ_4K, GMU_CACHE, 0); + ret = PTR_ERR_OR_ZERO(gmu->gmu_init_scratch); + if (ret) + return ret; + } + + if (IS_ERR_OR_NULL(gmu->gpu_boot_scratch)) { + gmu->gpu_boot_scratch = gen8_reserve_gmu_kernel_block(gmu, 0, + SZ_4K, GMU_CACHE, 0); + ret = PTR_ERR_OR_ZERO(gmu->gpu_boot_scratch); + } + + return ret; +} + +static int gen8_hwsched_gmu_init(struct adreno_device *adreno_dev) +{ + int ret; + + ret = gen8_gmu_parse_fw(adreno_dev); + if (ret) + return ret; + + ret = gen8_gmu_memory_init(adreno_dev); + if (ret) + return ret; + + ret = gen8_gmu_warmboot_init(adreno_dev); + if (ret) + return ret; + + return gen8_hwsched_hfi_init(adreno_dev); +} + +static void gen8_hwsched_touch_wakeup(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; + + /* + * Do not wake up a suspended device or until the first boot sequence + * has been completed. + */ + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags) || + !test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + return; + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + goto done; + + kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen8_hwsched_gmu_boot(adreno_dev); + if (ret) + return; + + ret = gen8_hwsched_gpu_boot(adreno_dev); + if (ret) + return; + + kgsl_pwrscale_wake(device); + + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + device->pwrctrl.last_stat_updated = ktime_get(); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); + +done: + /* + * When waking up from a touch event we want to stay active long enough + * for the user to send a draw command. The default idle timer timeout + * is shorter than we want so go ahead and push the idle timer out + * further for this special case + */ + mod_timer(&device->idle_timer, jiffies + + msecs_to_jiffies(adreno_wake_timeout)); +} + +static int gen8_hwsched_boot(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE); + + adreno_hwsched_start(adreno_dev); + + ret = gen8_hwsched_gmu_boot(adreno_dev); + if (ret) + return ret; + + ret = gen8_hwsched_gpu_boot(adreno_dev); + if (ret) + return ret; + + kgsl_start_idle_timer(device); + kgsl_pwrscale_wake(device); + + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + device->pwrctrl.last_stat_updated = ktime_get(); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); + + return ret; +} + +static int gen8_aqe_microcode_read(struct adreno_device *adreno_dev) +{ + struct adreno_firmware *aqe_fw = ADRENO_FW(adreno_dev, ADRENO_FW_AQE); + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) + return 0; + + return adreno_get_firmware(adreno_dev, gen8_core->aqefw_name, aqe_fw); +} + +static int gen8_hwsched_first_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + return gen8_hwsched_boot(adreno_dev); + + adreno_hwsched_start(adreno_dev); + + ret = gen8_microcode_read(adreno_dev); + if (ret) + return ret; + + ret = gen8_aqe_microcode_read(adreno_dev); + if (ret) + return ret; + + ret = gen8_init(adreno_dev); + if (ret) + return ret; + + ret = gen8_hwsched_gmu_init(adreno_dev); + if (ret) + return ret; + + kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen8_hwsched_gmu_first_boot(adreno_dev); + if (ret) + return ret; + + ret = gen8_hwsched_gpu_boot(adreno_dev); + if (ret) + return ret; + + adreno_get_bus_counters(adreno_dev); + + adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev, + ADRENO_COOP_RESET); + + set_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags); + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + /* + * BCL needs respective Central Broadcast register to + * be programed from TZ. This programing happens only + * when zap shader firmware load is successful. Zap firmware + * load can fail in boot up path hence enable BCL only after we + * successfully complete first boot to ensure that Central + * Broadcast register was programed before enabling BCL. + */ + if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL)) + adreno_dev->bcl_enabled = true; + + /* + * There is a possible deadlock scenario during kgsl firmware reading + * (request_firmware) and devfreq update calls. During first boot, kgsl + * device mutex is held and then request_firmware is called for reading + * firmware. request_firmware internally takes dev_pm_qos_mtx lock. + * Whereas in case of devfreq update calls triggered by thermal/bcl or + * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then + * tries to take kgsl device mutex as part of get_dev_status/target + * calls. This results in deadlock when both thread are unable to acquire + * the mutex held by other thread. Enable devfreq updates now as we are + * done reading all firmware files. + */ + device->pwrscale.devfreq_enabled = true; + + device->pwrctrl.last_stat_updated = ktime_get(); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); + + return 0; +} + +/** + * drain_ctx_hw_fences_cpu - Force trigger the hardware fences that + * were not sent to TxQueue by the GMU + */ +static void drain_ctx_hw_fences_cpu(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct adreno_hw_fence_entry *entry, *tmp; + + spin_lock(&drawctxt->lock); + list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) { + gen8_trigger_hw_fence_cpu(adreno_dev, entry); + gen8_remove_hw_fence_entry(adreno_dev, entry); + } + spin_unlock(&drawctxt->lock); +} + +static void drain_hw_fences_cpu(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_context *context; + int id; + + read_lock(&device->context_lock); + idr_for_each_entry(&device->context_idr, context, id) { + if (context->gmu_registered) + drain_ctx_hw_fences_cpu(adreno_dev, ADRENO_CONTEXT(context)); + } + read_unlock(&device->context_lock); +} + +/** + * check_inflight_hw_fences - During SLUMBER entry, we must make sure all hardware fences across + * all registered contexts have been sent to TxQueue. If not, take a snapshot + */ +static int check_inflight_hw_fences(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_context *context; + int id, ret = 0; + + if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags)) + return 0; + + read_lock(&device->context_lock); + idr_for_each_entry(&device->context_idr, context, id) { + + if (context->gmu_registered) { + ret = gen8_hwsched_check_context_inflight_hw_fences(adreno_dev, + ADRENO_CONTEXT(context)); + if (ret) + break; + } + } + read_unlock(&device->context_lock); + + if (ret) + gmu_core_fault_snapshot(device); + + return ret; +} + +static int gen8_hwsched_power_off(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret = 0; + bool drain_cpu = false; + + if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER); + + ret = gen8_gmu_oob_set(device, oob_gpu); + if (ret) { + gen8_gmu_oob_clear(device, oob_gpu); + goto no_gx_power; + } + + kgsl_pwrscale_update_stats(device); + + /* Save active coresight registers if applicable */ + adreno_coresight_stop(adreno_dev); + + adreno_irqctrl(adreno_dev, 0); + + gen8_gmu_oob_clear(device, oob_gpu); + +no_gx_power: + kgsl_pwrctrl_irq(device, false); + + /* Make sure GMU has sent all hardware fences to TxQueue */ + if (check_inflight_hw_fences(adreno_dev)) + drain_cpu = true; + + gen8_hwsched_gmu_power_off(adreno_dev); + + /* Now that we are sure that GMU is powered off, drain pending fences */ + if (drain_cpu) + drain_hw_fences_cpu(adreno_dev); + + adreno_hwsched_unregister_contexts(adreno_dev); + + if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpu_llc_slice); + + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + del_timer_sync(&device->idle_timer); + + kgsl_pwrscale_sleep(device); + + kgsl_pwrctrl_clear_l3_vote(device); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); + + return ret; +} + +static void check_hw_fence_unack_count(struct adreno_device *adreno_dev) +{ + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 unack_count; + + if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags)) + return; + + gen8_hwsched_process_msgq(adreno_dev); + + spin_lock(&hfi->hw_fence.lock); + unack_count = hfi->hw_fence.unack_count; + spin_unlock(&hfi->hw_fence.lock); + + if (!unack_count) + return; + + dev_err(&gmu->pdev->dev, "hardware fence unack_count(%d) isn't zero before SLUMBER\n", + unack_count); + gmu_core_fault_snapshot(device); +} + +static void hwsched_idle_check(struct work_struct *work) +{ + struct kgsl_device *device = container_of(work, + struct kgsl_device, idle_check_ws); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + mutex_lock(&device->mutex); + + if (test_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags)) + goto done; + + if (atomic_read(&device->active_cnt) || time_is_after_jiffies(device->idle_jiffies)) { + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + goto done; + } + + spin_lock(&device->submit_lock); + if (device->submit_now) { + spin_unlock(&device->submit_lock); + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + goto done; + } + + device->skip_inline_submit = true; + spin_unlock(&device->submit_lock); + + if (!gen8_hw_isidle(adreno_dev)) { + dev_err(device->dev, "GPU isn't idle before SLUMBER\n"); + gmu_core_fault_snapshot(device); + } + + check_hw_fence_unack_count(adreno_dev); + + gen8_hwsched_power_off(adreno_dev); + +done: + mutex_unlock(&device->mutex); +} + +static int gen8_hwsched_first_open(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* + * Do the one time settings that need to happen when we + * attempt to boot the gpu the very first time + */ + ret = gen8_hwsched_first_boot(adreno_dev); + if (ret) + return ret; + + /* + * A client that does a first_open but never closes the device + * may prevent us from going back to SLUMBER. So trigger the idle + * check by incrementing the active count and immediately releasing it. + */ + atomic_inc(&device->active_cnt); + gen8_hwsched_active_count_put(adreno_dev); + + return 0; +} + +int gen8_hwsched_active_count_get(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret = 0; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EINVAL; + + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags)) + return -EINVAL; + + if ((atomic_read(&device->active_cnt) == 0)) + ret = gen8_hwsched_boot(adreno_dev); + + if (ret == 0) + atomic_inc(&device->active_cnt); + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + return ret; +} + +static int gen8_hwsched_dcvs_set(struct adreno_device *adreno_dev, + int gpu_pwrlevel, int bus_level, u32 ab) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct hfi_gx_bw_perf_vote_cmd req = { + .ack_type = DCVS_ACK_BLOCK, + .freq = INVALID_DCVS_IDX, + .bw = INVALID_DCVS_IDX, + }; + int ret; + + if (!test_bit(GMU_PRIV_HFI_STARTED, &gmu->flags)) + return 0; + + /* Do not set to XO and lower GPU clock vote from GMU */ + if ((gpu_pwrlevel != INVALID_DCVS_IDX) && + (gpu_pwrlevel >= table->gpu_level_num - 1)) { + dev_err(&gmu->pdev->dev, "Invalid gpu dcvs request: %d\n", + gpu_pwrlevel); + return -EINVAL; + } + + if (gpu_pwrlevel < table->gpu_level_num - 1) + req.freq = table->gpu_level_num - gpu_pwrlevel - 1; + + if (bus_level < pwr->ddr_table_count && bus_level > 0) + req.bw = bus_level; + + req.bw |= gen8_bus_ab_quantize(adreno_dev, ab); + + /* GMU will vote for slumber levels through the sleep sequence */ + if ((req.freq == INVALID_DCVS_IDX) && (req.bw == INVALID_BW_VOTE)) + return 0; + + ret = CMD_MSG_HDR(req, H2F_MSG_GX_BW_PERF_VOTE); + if (ret) + return ret; + + ret = gen8_hfi_send_cmd_async(adreno_dev, &req, sizeof(req)); + + if (ret) { + dev_err_ratelimited(&gmu->pdev->dev, + "Failed to set GPU perf idx %d, bw idx %d\n", + req.freq, req.bw); + + /* + * If this was a dcvs request along side an active gpu, request + * dispatcher based reset and recovery. + */ + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + } + + if (req.freq != INVALID_DCVS_IDX) + gen8_rdpm_mx_freq_update(gmu, + gmu->hfi.dcvs_table.gx_votes[req.freq].freq); + + return ret; +} + +static int gen8_hwsched_clock_set(struct adreno_device *adreno_dev, + u32 pwrlevel) +{ + return gen8_hwsched_dcvs_set(adreno_dev, pwrlevel, INVALID_DCVS_IDX, INVALID_AB_VALUE); +} + +static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + static unsigned long prev_freq; + unsigned long freq = gmu->freqs[0]; + + if (!gmu->perf_ddr_bw) + return; + + /* + * Scale the GMU if DDR is at a CX corner at which GMU can run at + * a higher frequency + */ + if (pwr->ddr_table[buslevel] >= gmu->perf_ddr_bw) + freq = gmu->freqs[GMU_MAX_PWRLEVELS - 1]; + + if (prev_freq == freq) + return; + + if (kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", freq)) { + dev_err(&gmu->pdev->dev, "Unable to set the GMU clock to %ld\n", + freq); + return; + } + + gen8_rdpm_cx_freq_update(gmu, freq / 1000); + + trace_kgsl_gmu_pwrlevel(freq, prev_freq); + + prev_freq = freq; +} + +static int gen8_hwsched_bus_set(struct adreno_device *adreno_dev, int buslevel, + u32 ab) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret = 0; + + if (buslevel == pwr->cur_buslevel) + buslevel = INVALID_DCVS_IDX; + + if ((ab == pwr->cur_ab) || (ab == 0)) + ab = INVALID_AB_VALUE; + + if ((ab == INVALID_AB_VALUE) && (buslevel == INVALID_DCVS_IDX)) + return 0; + + ret = gen8_hwsched_dcvs_set(adreno_dev, INVALID_DCVS_IDX, + buslevel, ab); + if (ret) + return ret; + + if (buslevel != INVALID_DCVS_IDX) { + scale_gmu_frequency(adreno_dev, buslevel); + + pwr->cur_buslevel = buslevel; + } + + if (ab != INVALID_AB_VALUE) { + if (!adreno_dev->gmu_ab) + icc_set_bw(pwr->icc_path, MBps_to_icc(ab), 0); + pwr->cur_ab = ab; + } + + trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel, ab); + return ret; +} + +static int gen8_hwsched_pm_suspend(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags)) + return 0; + + kgsl_pwrctrl_request_state(device, KGSL_STATE_SUSPEND); + + /* Halt any new submissions */ + reinit_completion(&device->halt_gate); + + /** + * Wait for the dispatcher to retire everything by waiting + * for the active count to go to zero. + */ + ret = kgsl_active_count_wait(device, 0, msecs_to_jiffies(100)); + if (ret) { + dev_err(device->dev, "Timed out waiting for the active count\n"); + goto err; + } + + ret = adreno_hwsched_idle(adreno_dev); + if (ret) + goto err; + + gen8_hwsched_power_off(adreno_dev); + + adreno_get_gpu_halt(adreno_dev); + + set_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND); + + return 0; + +err: + adreno_hwsched_start(adreno_dev); + + return ret; +} + +static void gen8_hwsched_pm_resume(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + if (WARN(!test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags), + "resume invoked without a suspend\n")) + return; + + adreno_put_gpu_halt(adreno_dev); + + adreno_hwsched_start(adreno_dev); + + clear_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags); +} + +void gen8_hwsched_handle_watchdog(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 mask; + + /* Temporarily mask the watchdog interrupt to prevent a storm */ + gmu_core_regread(device, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK, + &mask); + gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK, + (mask | GMU_INT_WDOG_BITE)); + + gen8_gmu_send_nmi(device, false); + + dev_err_ratelimited(&gmu->pdev->dev, + "GMU watchdog expired interrupt received\n"); + + gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); +} + +static void gen8_hwsched_drain_ctxt_unregister(struct adreno_device *adreno_dev) +{ + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct pending_cmd *cmd = NULL; + + read_lock(&hfi->msglock); + + list_for_each_entry(cmd, &hfi->msglist, node) { + if (MSG_HDR_GET_ID(cmd->sent_hdr) == H2F_MSG_UNREGISTER_CONTEXT) + complete(&cmd->complete); + } + + read_unlock(&hfi->msglock); +} + +/** + * process_context_hw_fences_after_reset - This function processes all hardware fences that were + * sent to GMU prior to recovery. If a fence is not retired by the GPU, and the context is still + * good, then move them to the reset list. + */ +static void process_context_hw_fences_after_reset(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct list_head *reset_list) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hw_fence_entry *entry, *tmp; + + spin_lock(&drawctxt->lock); + list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) { + struct adreno_context *drawctxt = entry->drawctxt; + struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; + bool retired = kgsl_check_timestamp(device, &drawctxt->base, (u32)entry->cmd.ts); + + /* Delete the fences that GMU has sent to the TxQueue */ + if (timestamp_cmp(hdr->out_fence_ts, (u32)entry->cmd.ts) >= 0) { + gen8_remove_hw_fence_entry(adreno_dev, entry); + continue; + } + + /* + * Force retire the fences if the corresponding submission is retired by GPU + * or if the context has gone bad + */ + if (retired || kgsl_context_is_bad(&drawctxt->base)) + entry->cmd.flags |= HW_FENCE_FLAG_SKIP_MEMSTORE; + + list_add_tail(&entry->reset_node, reset_list); + } + spin_unlock(&drawctxt->lock); +} + +/** + * process_inflight_hw_fences_after_reset - Send hardware fences from all contexts back to the GMU + * after fault recovery. We must wait for ack when sending each of these fences to GMU so as to + * avoid sending a large number of hardware fences in a short span of time. + */ +static int process_inflight_hw_fences_after_reset(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_context *context = NULL; + int id, ret = 0; + struct list_head hw_fence_list; + struct adreno_hw_fence_entry *entry, *tmp; + + /** + * Since we need to wait for ack from GMU when sending each inflight fence back to GMU, we + * cannot send them from within atomic context. Hence, walk list of such hardware fences + * for each context and add it to this local list and then walk this list to send all these + * fences to GMU. + */ + INIT_LIST_HEAD(&hw_fence_list); + + read_lock(&device->context_lock); + idr_for_each_entry(&device->context_idr, context, id) { + process_context_hw_fences_after_reset(adreno_dev, ADRENO_CONTEXT(context), + &hw_fence_list); + } + read_unlock(&device->context_lock); + + list_for_each_entry_safe(entry, tmp, &hw_fence_list, reset_node) { + + /* + * This is part of the reset sequence and any error in this path will be handled by + * the caller. + */ + ret = gen8_send_hw_fence_hfi_wait_ack(adreno_dev, entry, 0); + if (ret) + break; + + list_del_init(&entry->reset_node); + } + + return ret; +} + +/** + * process_detached_hw_fences_after_reset - Send fences that couldn't be sent to GMU when a context + * got detached. We must wait for ack when sending each of these fences to GMU so as to avoid + * sending a large number of hardware fences in a short span of time. + */ +static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_dev) +{ + struct adreno_hw_fence_entry *entry, *tmp; + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + int ret = 0; + + list_for_each_entry_safe(entry, tmp, &hfi->detached_hw_fence_list, node) { + + /* + * This is part of the reset sequence and any error in this path will be handled by + * the caller. + */ + ret = gen8_send_hw_fence_hfi_wait_ack(adreno_dev, entry, + HW_FENCE_FLAG_SKIP_MEMSTORE); + if (ret) + return ret; + + gen8_remove_hw_fence_entry(adreno_dev, entry); + } + + return ret; +} + +static int drain_guilty_context_hw_fences(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_context *context = NULL; + struct adreno_context *guilty = NULL; + int id; + + read_lock(&device->context_lock); + idr_for_each_entry(&device->context_idr, context, id) { + if (test_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv)) { + guilty = ADRENO_CONTEXT(context); + break; + } + } + read_unlock(&device->context_lock); + + if (!guilty) + return 0; + + return gen8_hwsched_drain_context_hw_fences(adreno_dev, guilty); +} + +static int handle_hw_fences_after_reset(struct adreno_device *adreno_dev) +{ + int ret; + + ret = drain_guilty_context_hw_fences(adreno_dev); + if (ret) + return ret; + + /* + * We must do this after adreno_hwsched_replay() so that context registration + * is done before we re-send the un-retired hardware fences to the GMU + */ + ret = process_inflight_hw_fences_after_reset(adreno_dev); + if (ret) + return ret; + + ret = process_detached_hw_fences_after_reset(adreno_dev); + if (ret) + return ret; + + return gen8_hwsched_disable_hw_fence_throttle(adreno_dev); +} + +int gen8_hwsched_reset_replay(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + int ret; + + /* + * Any pending context unregister packets will be lost + * since we hard reset the GMU. This means any threads waiting + * for context unregister hfi ack will timeout. Wake them + * to avoid false positive ack timeout messages later. + */ + gen8_hwsched_drain_ctxt_unregister(adreno_dev); + + if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + gen8_disable_gpu_irq(adreno_dev); + + gen8_gmu_irq_disable(adreno_dev); + + gen8_hwsched_hfi_stop(adreno_dev); + + gen8_gmu_suspend(adreno_dev); + + adreno_hwsched_unregister_contexts(adreno_dev); + + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + spin_lock(&hfi->hw_fence.lock); + + /* Reset the unack count back to zero as we start afresh */ + hfi->hw_fence.unack_count = 0; + + spin_unlock(&hfi->hw_fence.lock); + + /* + * When we reset, we want to coldboot incase any scratch corruption + * has occurred before we faulted. + */ + adreno_mark_for_coldboot(adreno_dev); + + ret = gen8_hwsched_boot(adreno_dev); + if (ret) + goto done; + + adreno_hwsched_replay(adreno_dev); + + ret = handle_hw_fences_after_reset(adreno_dev); +done: + BUG_ON(ret); + + return ret; +} + +const struct adreno_power_ops gen8_hwsched_power_ops = { + .first_open = gen8_hwsched_first_open, + .last_close = gen8_hwsched_power_off, + .active_count_get = gen8_hwsched_active_count_get, + .active_count_put = gen8_hwsched_active_count_put, + .touch_wakeup = gen8_hwsched_touch_wakeup, + .pm_suspend = gen8_hwsched_pm_suspend, + .pm_resume = gen8_hwsched_pm_resume, + .gpu_clock_set = gen8_hwsched_clock_set, + .gpu_bus_set = gen8_hwsched_bus_set, +}; + +const struct adreno_hwsched_ops gen8_hwsched_ops = { + .submit_drawobj = gen8_hwsched_submit_drawobj, + .preempt_count = gen8_hwsched_preempt_count_get, + .create_hw_fence = gen8_hwsched_create_hw_fence, +}; + +int gen8_hwsched_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore) +{ + struct adreno_device *adreno_dev; + struct kgsl_device *device; + struct gen8_hwsched_device *gen8_hwsched_dev; + int ret; + + gen8_hwsched_dev = devm_kzalloc(&pdev->dev, sizeof(*gen8_hwsched_dev), + GFP_KERNEL); + if (!gen8_hwsched_dev) + return -ENOMEM; + + adreno_dev = &gen8_hwsched_dev->gen8_dev.adreno_dev; + + adreno_dev->hwsched_enabled = true; + + ret = gen8_probe_common(pdev, adreno_dev, chipid, gpucore); + if (ret) + return ret; + + device = KGSL_DEVICE(adreno_dev); + + INIT_WORK(&device->idle_check_ws, hwsched_idle_check); + + timer_setup(&device->idle_timer, hwsched_idle_timer, 0); + + adreno_dev->irq_mask = GEN8_HWSCHED_INT_MASK; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) + adreno_dev->lpac_enabled = true; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_DMS)) { + set_bit(ADRENO_DEVICE_DMS, &adreno_dev->priv); + adreno_dev->dms_enabled = true; + } + + kgsl_mmu_set_feature(device, KGSL_MMU_PAGEFAULT_TERMINATE); + + return adreno_hwsched_init(adreno_dev, &gen8_hwsched_ops); +} + +int gen8_hwsched_add_to_minidump(struct adreno_device *adreno_dev) +{ + struct gen8_device *gen8_dev = container_of(adreno_dev, + struct gen8_device, adreno_dev); + struct gen8_hwsched_device *gen8_hwsched = container_of(gen8_dev, + struct gen8_hwsched_device, gen8_dev); + struct gen8_hwsched_hfi *hw_hfi = &gen8_hwsched->hwsched_hfi; + int ret, i; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_HWSCHED_DEVICE, + (void *)(gen8_hwsched), sizeof(struct gen8_hwsched_device)); + if (ret) + return ret; + + if (!IS_ERR_OR_NULL(gen8_dev->gmu.gmu_log)) { + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, + KGSL_GMU_LOG_ENTRY, + gen8_dev->gmu.gmu_log->hostptr, + gen8_dev->gmu.gmu_log->size); + if (ret) + return ret; + } + + if (!IS_ERR_OR_NULL(gen8_dev->gmu.hfi.hfi_mem)) { + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, + KGSL_HFIMEM_ENTRY, + gen8_dev->gmu.hfi.hfi_mem->hostptr, + gen8_dev->gmu.hfi.hfi_mem->size); + if (ret) + return ret; + } + + /* Dump HFI hwsched global mem alloc entries */ + for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { + struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i]; + char hfi_minidump_str[MAX_VA_MINIDUMP_STR_LEN] = {0}; + u32 rb_id = 0; + + if (!hfi_get_minidump_string(entry->desc.mem_kind, + &hfi_minidump_str[0], + sizeof(hfi_minidump_str), &rb_id)) { + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, + hfi_minidump_str, + entry->md->hostptr, + entry->md->size); + if (ret) + return ret; + } + } + + if (!IS_ERR_OR_NULL(hw_hfi->big_ib)) { + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, + KGSL_HFI_BIG_IB_ENTRY, + hw_hfi->big_ib->hostptr, + hw_hfi->big_ib->size); + if (ret) + return ret; + } + + if (!IS_ERR_OR_NULL(hw_hfi->big_ib_recurring)) + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, + KGSL_HFI_BIG_IB_REC_ENTRY, + hw_hfi->big_ib_recurring->hostptr, + hw_hfi->big_ib_recurring->size); + + return ret; +} diff --git a/adreno_gen8_hwsched.h b/adreno_gen8_hwsched.h new file mode 100644 index 0000000000..6c790c6698 --- /dev/null +++ b/adreno_gen8_hwsched.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _ADRENO_GEN8_HWSCHED_H_ +#define _ADRENO_GEN8_HWSCHED_H_ + +#include "adreno_gen8_hwsched_hfi.h" + +/** + * struct gen8_hwsched_device - Container for the gen8 hwscheduling device + */ +struct gen8_hwsched_device { + /** @gen8_dev: Container for the gen8 device */ + struct gen8_device gen8_dev; + /** @hwsched_hfi: Container for hwscheduling specific hfi resources */ + struct gen8_hwsched_hfi hwsched_hfi; +}; + +/** + * gen8_hwsched_probe - Target specific probe for hwsched + * @pdev: Pointer to the platform device + * @chipid: Chipid of the target + * @gpucore: Pointer to the gpucore + * + * The target specific probe function for hwsched enabled gmu targets. + * + * Return: 0 on success or negative error on failure + */ +int gen8_hwsched_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore); + +/** + * gen8_hwsched_reset_replay - Restart the gmu and gpu and replay inflight cmdbatches + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_hwsched_reset_replay(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_handle_watchdog - Handle watchdog interrupt + * @adreno_dev: Pointer to the adreno device + */ +void gen8_hwsched_handle_watchdog(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_active_count_get - Increment the active count + * @adreno_dev: Pointer to the adreno device + * + * This function increments the active count. If active count + * is 0, this function also powers up the device. + * + * Return: 0 on success or negative error on failure + */ +int gen8_hwsched_active_count_get(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_active_count_put - Put back the active count + * @adreno_dev: Pointer to the adreno device + * + * This function decrements the active count sets the idle + * timer if active count is zero. + */ +void gen8_hwsched_active_count_put(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_add_to_minidump - Register hwsched_device with va minidump + * @adreno_dev: Pointer to the adreno device + */ +int gen8_hwsched_add_to_minidump(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_send_recurring_cmdobj - Dispatch IBs to GMU + * @adreno_dev: Pointer to adreno device structure + * @cmdobj: The command object which needs to be submitted + * + * This function is used to register the context if needed and submit + * recurring IBs to the GMU. Upon receiving ipc interrupt GMU will submit + * recurring IBs to GPU. + + * Return: 0 on success and negative error on failure + */ +int gen8_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj); + +/** + * gen8_hwsched_fault - Set hwsched fault to request recovery + * @adreno_dev: A handle to adreno device + * @fault: The type of fault + */ +void gen8_hwsched_fault(struct adreno_device *adreno_dev, u32 fault); + +#endif diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c new file mode 100644 index 0000000000..ae6d9f59b2 --- /dev/null +++ b/adreno_gen8_hwsched_hfi.c @@ -0,0 +1,4185 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_gen8.h" +#include "adreno_gen8_hwsched.h" +#include "adreno_hfi.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" +#include "kgsl_device.h" +#include "kgsl_eventlog.h" +#include "kgsl_pwrctrl.h" +#include "kgsl_trace.h" +#include "kgsl_util.h" + +#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) +#include +#else +#include +#endif + + +#define HFI_QUEUE_MAX (HFI_QUEUE_DEFAULT_CNT + HFI_QUEUE_DISPATCH_MAX_CNT) + +#define DEFINE_QHDR(gmuaddr, id, prio) \ + {\ + .status = 1, \ + .start_addr = GMU_QUEUE_START_ADDR(gmuaddr, id), \ + .type = QUEUE_HDR_TYPE(id, prio, 0, 0), \ + .queue_size = SZ_4K >> 2, \ + .msg_size = 0, \ + .unused0 = 0, \ + .unused1 = 0, \ + .unused2 = 0, \ + .unused3 = 0, \ + .unused4 = 0, \ + .read_index = 0, \ + .write_index = 0, \ +} + +static struct dq_info { + /** @max_dq: Maximum number of dispatch queues per RB level */ + u32 max_dq; + /** @base_dq_id: Base dqid for level */ + u32 base_dq_id; + /** @offset: Next dqid to use for roundrobin context assignment */ + u32 offset; +} gen8_hfi_dqs[KGSL_PRIORITY_MAX_RB_LEVELS] = { + { 4, 0, }, /* RB0 */ + { 4, 4, }, /* RB1 */ + { 3, 8, }, /* RB2 */ + { 3, 11, }, /* RB3 */ +}, gen8_hfi_dqs_lpac[KGSL_PRIORITY_MAX_RB_LEVELS + 1] = { + { 4, 0, }, /* RB0 */ + { 4, 4, }, /* RB1 */ + { 3, 8, }, /* RB2 */ + { 2, 11, }, /* RB3 */ + { 1, 13, }, /* RB LPAC */ +}; + +struct pending_cmd gen8_hw_fence_ack; + +struct gen8_hwsched_hfi *to_gen8_hwsched_hfi( + struct adreno_device *adreno_dev) +{ + struct gen8_device *gen8_dev = container_of(adreno_dev, + struct gen8_device, adreno_dev); + struct gen8_hwsched_device *gen8_hwsched = container_of(gen8_dev, + struct gen8_hwsched_device, gen8_dev); + + return &gen8_hwsched->hwsched_hfi; +} + +int gen8_hfi_send_lpac_feature_ctrl(struct adreno_device *adreno_dev) +{ + if (!adreno_dev->lpac_enabled) + return 0; + + return gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_LPAC, 1, 0); +} + +static void add_waiter(struct gen8_hwsched_hfi *hfi, u32 hdr, + struct pending_cmd *ack) +{ + memset(ack, 0x0, sizeof(*ack)); + + init_completion(&ack->complete); + write_lock_irq(&hfi->msglock); + list_add_tail(&ack->node, &hfi->msglist); + write_unlock_irq(&hfi->msglock); + + ack->sent_hdr = hdr; +} + +static void del_waiter(struct gen8_hwsched_hfi *hfi, struct pending_cmd *ack) +{ + write_lock_irq(&hfi->msglock); + list_del(&ack->node); + write_unlock_irq(&hfi->msglock); +} + +static void gen8_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct pending_cmd *cmd = NULL; + u32 waiters[64], num_waiters = 0, i; + u32 *ack = rcvd; + u32 hdr = ack[0]; + u32 req_hdr = ack[1]; + u32 size_bytes = MSG_HDR_GET_SIZE(hdr) << 2; + + if (size_bytes > sizeof(cmd->results)) + dev_err_ratelimited(&gmu->pdev->dev, + "Ack result too big: %d Truncating to: %ld\n", + size_bytes, sizeof(cmd->results)); + + read_lock(&hfi->msglock); + + list_for_each_entry(cmd, &hfi->msglist, node) { + if (HDR_CMP_SEQNUM(cmd->sent_hdr, req_hdr)) { + memcpy(cmd->results, ack, + min_t(u32, size_bytes, + sizeof(cmd->results))); + complete(&cmd->complete); + read_unlock(&hfi->msglock); + return; + } + + if (num_waiters < ARRAY_SIZE(waiters)) + waiters[num_waiters++] = cmd->sent_hdr; + } + + read_unlock(&hfi->msglock); + + /* Didn't find the sender, list the waiter */ + dev_err_ratelimited(&gmu->pdev->dev, + "Unexpectedly got id %d seqnum %d. Total waiters: %d Top %d Waiters:\n", + MSG_HDR_GET_ID(req_hdr), MSG_HDR_GET_SEQNUM(req_hdr), + num_waiters, min_t(u32, num_waiters, 5)); + + for (i = 0; i < num_waiters && i < 5; i++) + dev_err_ratelimited(&gmu->pdev->dev, + " id %d seqnum %d\n", + MSG_HDR_GET_ID(waiters[i]), + MSG_HDR_GET_SEQNUM(waiters[i])); +} + +/* This function is called while holding the drawctxt spinlock */ +void gen8_remove_hw_fence_entry(struct adreno_device *adreno_dev, + struct adreno_hw_fence_entry *entry) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct adreno_context *drawctxt = entry->drawctxt; + + atomic_dec(&hwsched->hw_fence_count); + drawctxt->hw_fence_count--; + + dma_fence_put(&entry->kfence->fence); + list_del_init(&entry->node); + kmem_cache_free(hwsched->hw_fence_cache, entry); +} + +static void _retire_inflight_hw_fences(struct adreno_device *adreno_dev, + struct kgsl_context *context) +{ + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + struct adreno_hw_fence_entry *entry, *tmp; + + if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags)) + return; + + spin_lock(&drawctxt->lock); + + list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) { + struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; + + /* + * Since this list is sorted by timestamp, abort on the first fence that hasn't + * yet been sent to TxQueue + */ + if (timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0) + break; + + gen8_remove_hw_fence_entry(adreno_dev, entry); + } + spin_unlock(&drawctxt->lock); +} + +static void log_profiling_info(struct adreno_device *adreno_dev, u32 *rcvd) +{ + struct hfi_ts_retire_cmd *cmd = (struct hfi_ts_retire_cmd *)rcvd; + struct kgsl_context *context; + struct retire_info info = {0}; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + context = kgsl_context_get(device, cmd->ctxt_id); + if (context == NULL) + return; + + info.timestamp = cmd->ts; + info.rb_id = adreno_get_level(context); + info.gmu_dispatch_queue = context->gmu_dispatch_queue; + info.submitted_to_rb = cmd->submitted_to_rb; + info.sop = cmd->sop; + info.eop = cmd->eop; + if (GMU_VER_MINOR(gmu->ver.hfi) < 4) + info.active = cmd->eop - cmd->sop; + else + info.active = cmd->active; + info.retired_on_gmu = cmd->retired_on_gmu; + + /* protected GPU work must not be reported */ + if (!(context->flags & KGSL_CONTEXT_SECURE)) + kgsl_work_period_update(device, context->proc_priv->period, + info.active); + + trace_adreno_cmdbatch_retired(context, &info, 0, 0, 0); + + log_kgsl_cmdbatch_retired_event(context->id, cmd->ts, + context->priority, 0, cmd->sop, cmd->eop); + + _retire_inflight_hw_fences(adreno_dev, context); + + kgsl_context_put(context); +} + +u32 gen8_hwsched_parse_payload(struct payload_section *payload, u32 key) +{ + u32 i; + + /* Each key-value pair is 2 dwords */ + for (i = 0; i < payload->dwords; i += 2) { + if (payload->data[i] == key) + return payload->data[i + 1]; + } + + return 0; +} + +/* Look up a particular key's value for a given type of payload */ +static u32 gen8_hwsched_lookup_key_value_legacy(struct adreno_device *adreno_dev, + u32 type, u32 key) +{ + struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + + if (!cmd->hdr) + return 0; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd_legacy, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == type) + return gen8_hwsched_parse_payload(payload, key); + + i += struct_size(payload, data, payload->dwords); + } + + return 0; +} + +static u32 get_payload_rb_key_legacy(struct adreno_device *adreno_dev, + u32 rb_id, u32 key) +{ + struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + + if (!cmd->hdr) + return 0; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd_legacy, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == PAYLOAD_RB) { + u32 id = gen8_hwsched_parse_payload(payload, KEY_RB_ID); + + if (id == rb_id) + return gen8_hwsched_parse_payload(payload, key); + } + + i += struct_size(payload, data, payload->dwords); + } + + return 0; +} + +struct syncobj_flags { + unsigned long mask; + const char *name; +}; + +static void _get_syncobj_string(char *str, u32 max_size, struct hfi_syncobj *syncobj, u32 index) +{ + u32 count = scnprintf(str, max_size, "syncobj[%d] ctxt_id:%llu seqno:%llu flags:", index, + syncobj->ctxt_id, syncobj->seq_no); + u32 i; + bool first = true; + static const struct syncobj_flags _flags[] = { + GMU_SYNCOBJ_FLAGS, { -1, NULL }}; + + for (i = 0; _flags[i].name; i++) { + if (!(syncobj->flags & _flags[i].mask)) + continue; + + if (first) { + count += scnprintf(str + count, max_size - count, "%s", _flags[i].name); + first = false; + } else { + count += scnprintf(str + count, max_size - count, "|%s", _flags[i].name); + } + } +} + +static void log_syncobj(struct gen8_gmu_device *gmu, struct hfi_submit_syncobj *cmd) +{ + struct hfi_syncobj *syncobj = (struct hfi_syncobj *)&cmd[1]; + char str[128]; + u32 i = 0; + + for (i = 0; i < cmd->num_syncobj; i++) { + _get_syncobj_string(str, sizeof(str), syncobj, i); + dev_err(&gmu->pdev->dev, "%s\n", str); + syncobj++; + } +} + +static void find_timeout_syncobj(struct adreno_device *adreno_dev, u32 ctxt_id, u32 ts) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_context *context = NULL; + struct adreno_context *drawctxt; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gmu_context_queue_header *hdr; + struct hfi_submit_syncobj *cmd; + u32 *queue, i; + int ret; + + /* We want to get the context even if it is detached */ + read_lock(&device->context_lock); + context = idr_find(&device->context_idr, ctxt_id); + ret = _kgsl_context_get(context); + read_unlock(&device->context_lock); + + if (!ret) + return; + + drawctxt = ADRENO_CONTEXT(context); + + hdr = drawctxt->gmu_context_queue.hostptr; + queue = (u32 *)(drawctxt->gmu_context_queue.hostptr + sizeof(*hdr)); + + for (i = hdr->read_index; i != hdr->write_index;) { + if (MSG_HDR_GET_ID(queue[i]) != H2F_MSG_ISSUE_SYNCOBJ) { + i = (i + MSG_HDR_GET_SIZE(queue[i])) % hdr->queue_size; + continue; + } + + cmd = (struct hfi_submit_syncobj *)&queue[i]; + + if (cmd->timestamp == ts) { + log_syncobj(gmu, cmd); + break; + } + i = (i + MSG_HDR_GET_SIZE(queue[i])) % hdr->queue_size; + } + + if (i == hdr->write_index) + dev_err(&gmu->pdev->dev, "Couldn't find unsignaled syncobj ctx:%d ts:%d\n", + ctxt_id, ts); + + kgsl_context_put(context); +} + +static void log_gpu_fault_legacy(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct device *dev = &gmu->pdev->dev; + struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; + + switch (cmd->error) { + case GMU_GPU_HW_HANG: + dev_crit_ratelimited(dev, "MISC: GPU hang detected\n"); + break; + case GMU_GPU_SW_HANG: + dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %u\n", + cmd->ctxt_id, cmd->ts); + break; + case GMU_CP_OPCODE_ERROR: + dev_crit_ratelimited(dev, + "CP opcode error interrupt | opcode=0x%8.8x\n", + gen8_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_OPCODE_ERROR)); + break; + case GMU_CP_PROTECTED_ERROR: { + u32 status = gen8_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_PROTECTED_ERROR); + + dev_crit_ratelimited(dev, + "CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", + status & (1 << 20) ? "READ" : "WRITE", + status & 0x3FFFF, status); + } + break; + case GMU_CP_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "CP Illegal instruction error\n"); + break; + case GMU_CP_UCODE_ERROR: + dev_crit_ratelimited(dev, "CP ucode error interrupt\n"); + break; + case GMU_CP_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP | Ringbuffer HW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_HW_FAULT)); + break; + case GMU_GPU_PREEMPT_TIMEOUT: { + u32 cur, next, cur_rptr, cur_wptr, next_rptr, next_wptr; + + cur = gen8_hwsched_lookup_key_value_legacy(adreno_dev, + PAYLOAD_PREEMPT_TIMEOUT, KEY_PREEMPT_TIMEOUT_CUR_RB_ID); + next = gen8_hwsched_lookup_key_value_legacy(adreno_dev, + PAYLOAD_PREEMPT_TIMEOUT, + KEY_PREEMPT_TIMEOUT_NEXT_RB_ID); + cur_rptr = get_payload_rb_key_legacy(adreno_dev, cur, KEY_RB_RPTR); + cur_wptr = get_payload_rb_key_legacy(adreno_dev, cur, KEY_RB_WPTR); + next_rptr = get_payload_rb_key_legacy(adreno_dev, next, KEY_RB_RPTR); + next_wptr = get_payload_rb_key_legacy(adreno_dev, next, KEY_RB_WPTR); + + dev_crit_ratelimited(dev, + "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n", + cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr); + } + break; + case GMU_CP_GPC_ERROR: + dev_crit_ratelimited(dev, "RBBM: GPC error\n"); + break; + case GMU_CP_BV_OPCODE_ERROR: + dev_crit_ratelimited(dev, + "CP BV opcode error | opcode=0x%8.8x\n", + gen8_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_BV_OPCODE_ERROR)); + break; + case GMU_CP_BV_PROTECTED_ERROR: { + u32 status = gen8_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_BV_PROTECTED_ERROR); + + dev_crit_ratelimited(dev, + "CP BV | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", + status & (1 << 20) ? "READ" : "WRITE", + status & 0x3FFFF, status); + } + break; + case GMU_CP_BV_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP BV | Ringbuffer HW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_HW_FAULT)); + break; + case GMU_CP_BV_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "CP BV Illegal instruction error\n"); + break; + case GMU_CP_BV_UCODE_ERROR: + dev_crit_ratelimited(dev, "CP BV ucode error interrupt\n"); + break; + case GMU_GPU_SW_FUSE_VIOLATION: + dev_crit_ratelimited(dev, "RBBM: SW Feature Fuse violation status=0x%8.8x\n", + gen8_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_SWFUSE_VIOLATION_FAULT)); + break; + case GMU_GPU_AQE0_OPCODE_ERRROR: + dev_crit_ratelimited(dev, "AQE0 opcode error | opcode=0x%8.8x\n", + gen8_hwsched_lookup_key_value_legacy(adreno_dev, + PAYLOAD_FAULT_REGS, KEY_AQE0_OPCODE_ERROR)); + break; + case GMU_GPU_AQE0_UCODE_ERROR: + dev_crit_ratelimited(dev, "AQE0 ucode error interrupt\n"); + break; + case GMU_GPU_AQE0_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, "AQE0 HW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value_legacy(adreno_dev, + PAYLOAD_FAULT_REGS, KEY_AQE0_HW_FAULT)); + break; + case GMU_GPU_AQE0_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "AQE0 Illegal instruction error\n"); + break; + case GMU_GPU_AQE1_OPCODE_ERRROR: + dev_crit_ratelimited(dev, "AQE1 opcode error | opcode=0x%8.8x\n", + gen8_hwsched_lookup_key_value_legacy(adreno_dev, + PAYLOAD_FAULT_REGS, KEY_AQE1_OPCODE_ERROR)); + break; + case GMU_GPU_AQE1_UCODE_ERROR: + dev_crit_ratelimited(dev, "AQE1 ucode error interrupt\n"); + break; + case GMU_GPU_AQE1_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, "AQE1 HW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value_legacy(adreno_dev, + PAYLOAD_FAULT_REGS, KEY_AQE1_HW_FAULT)); + break; + case GMU_GPU_AQE1_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "AQE1 Illegal instruction error\n"); + break; + case GMU_SYNCOBJ_TIMEOUT_ERROR: + dev_crit_ratelimited(dev, "syncobj timeout ctx %d ts %u\n", + cmd->ctxt_id, cmd->ts); + find_timeout_syncobj(adreno_dev, cmd->ctxt_id, cmd->ts); + break; + case GMU_CP_UNKNOWN_ERROR: + fallthrough; + default: + dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n", + cmd->error); + break; + } +} + +/* Look up a particular key's value for a given type of payload */ +static u32 gen8_hwsched_lookup_key_value(struct adreno_device *adreno_dev, + u32 type, u32 key) +{ + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + + if (!cmd->hdr) + return 0; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == type) + return gen8_hwsched_parse_payload(payload, key); + + i += struct_size(payload, data, payload->dwords); + } + + return 0; +} + +static u32 get_payload_rb_key(struct adreno_device *adreno_dev, + u32 rb_id, u32 key) +{ + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + + if (!cmd->hdr) + return 0; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == PAYLOAD_RB) { + u32 id = gen8_hwsched_parse_payload(payload, KEY_RB_ID); + + if (id == rb_id) + return gen8_hwsched_parse_payload(payload, key); + } + + i += struct_size(payload, data, payload->dwords); + } + + return 0; +} + +static void log_gpu_fault(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct device *dev = &gmu->pdev->dev; + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + + switch (cmd->error) { + case GMU_GPU_HW_HANG: + dev_crit_ratelimited(dev, "MISC: GPU hang detected\n"); + break; + case GMU_GPU_SW_HANG: + dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %d\n", + cmd->gc.ctxt_id, cmd->gc.ts); + break; + case GMU_CP_OPCODE_ERROR: + dev_crit_ratelimited(dev, + "CP opcode error interrupt | opcode=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_OPCODE_ERROR)); + break; + case GMU_CP_PROTECTED_ERROR: { + u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_PROTECTED_ERROR); + + dev_crit_ratelimited(dev, + "CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", + status & (1 << 20) ? "READ" : "WRITE", + status & 0x3FFFF, status); + } + break; + case GMU_CP_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "CP Illegal instruction error\n"); + break; + case GMU_CP_UCODE_ERROR: + dev_crit_ratelimited(dev, "CP ucode error interrupt\n"); + break; + case GMU_CP_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP | Ringbuffer HW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_HW_FAULT)); + break; + case GMU_GPU_PREEMPT_TIMEOUT: { + u32 cur, next, cur_rptr, cur_wptr, next_rptr, next_wptr; + + cur = gen8_hwsched_lookup_key_value(adreno_dev, + PAYLOAD_PREEMPT_TIMEOUT, KEY_PREEMPT_TIMEOUT_CUR_RB_ID); + next = gen8_hwsched_lookup_key_value(adreno_dev, + PAYLOAD_PREEMPT_TIMEOUT, + KEY_PREEMPT_TIMEOUT_NEXT_RB_ID); + cur_rptr = get_payload_rb_key(adreno_dev, cur, KEY_RB_RPTR); + cur_wptr = get_payload_rb_key(adreno_dev, cur, KEY_RB_WPTR); + next_rptr = get_payload_rb_key(adreno_dev, next, KEY_RB_RPTR); + next_wptr = get_payload_rb_key(adreno_dev, next, KEY_RB_WPTR); + + dev_crit_ratelimited(dev, + "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n", + cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr); + } + break; + case GMU_CP_GPC_ERROR: + dev_crit_ratelimited(dev, "RBBM: GPC error\n"); + break; + case GMU_CP_BV_OPCODE_ERROR: + dev_crit_ratelimited(dev, + "CP BV opcode error | opcode=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_BV_OPCODE_ERROR)); + break; + case GMU_CP_BV_PROTECTED_ERROR: { + u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_BV_PROTECTED_ERROR); + + dev_crit_ratelimited(dev, + "CP BV | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", + status & (1 << 20) ? "READ" : "WRITE", + status & 0x3FFFF, status); + } + break; + case GMU_CP_BV_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP BV | Ringbuffer HW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_HW_FAULT)); + break; + case GMU_CP_BV_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "CP BV Illegal instruction error\n"); + break; + case GMU_CP_BV_UCODE_ERROR: + dev_crit_ratelimited(dev, "CP BV ucode error interrupt\n"); + break; + case GMU_CP_LPAC_OPCODE_ERROR: + dev_crit_ratelimited(dev, + "CP LPAC opcode error | opcode=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_LPAC_OPCODE_ERROR)); + break; + case GMU_CP_LPAC_PROTECTED_ERROR: { + u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_LPAC_PROTECTED_ERROR); + + dev_crit_ratelimited(dev, + "CP LPAC | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", + status & (1 << 20) ? "READ" : "WRITE", + status & 0x3FFFF, status); + } + break; + case GMU_CP_LPAC_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP LPAC | Ringbuffer HW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_LPAC_HW_FAULT)); + break; + case GMU_CP_LPAC_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "CP LPAC Illegal instruction error\n"); + break; + case GMU_CP_LPAC_UCODE_ERROR: + dev_crit_ratelimited(dev, "CP LPAC ucode error interrupt\n"); + break; + case GMU_GPU_LPAC_SW_HANG: + dev_crit_ratelimited(dev, "LPAC: gpu timeout ctx %d ts %d\n", + cmd->lpac.ctxt_id, cmd->lpac.ts); + break; + case GMU_GPU_SW_FUSE_VIOLATION: + dev_crit_ratelimited(dev, "RBBM: SW Feature Fuse violation status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_SWFUSE_VIOLATION_FAULT)); + break; + case GMU_GPU_AQE0_OPCODE_ERRROR: + dev_crit_ratelimited(dev, "AQE0 opcode error | opcode=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, + PAYLOAD_FAULT_REGS, KEY_AQE0_OPCODE_ERROR)); + break; + case GMU_GPU_AQE0_UCODE_ERROR: + dev_crit_ratelimited(dev, "AQE0 ucode error interrupt\n"); + break; + case GMU_GPU_AQE0_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, "AQE0 HW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, + PAYLOAD_FAULT_REGS, KEY_AQE0_HW_FAULT)); + break; + case GMU_GPU_AQE0_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "AQE0 Illegal instruction error\n"); + break; + case GMU_GPU_AQE1_OPCODE_ERRROR: + dev_crit_ratelimited(dev, "AQE1 opcode error | opcode=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, + PAYLOAD_FAULT_REGS, KEY_AQE1_OPCODE_ERROR)); + break; + case GMU_GPU_AQE1_UCODE_ERROR: + dev_crit_ratelimited(dev, "AQE1 ucode error interrupt\n"); + break; + case GMU_GPU_AQE1_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, "AQE1 HW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, + PAYLOAD_FAULT_REGS, KEY_AQE1_HW_FAULT)); + break; + case GMU_GPU_AQE1_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "AQE1 Illegal instruction error\n"); + break; + case GMU_SYNCOBJ_TIMEOUT_ERROR: + dev_crit_ratelimited(dev, "syncobj timeout ctx %d ts %u\n", + cmd->gc.ctxt_id, cmd->gc.ts); + find_timeout_syncobj(adreno_dev, cmd->gc.ctxt_id, cmd->gc.ts); + break; + case GMU_CP_UNKNOWN_ERROR: + fallthrough; + default: + dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n", + cmd->error); + break; + } +} + +static u32 peek_next_header(struct gen8_gmu_device *gmu, uint32_t queue_idx) +{ + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + u32 *queue; + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + return 0; + + if (hdr->read_index == hdr->write_index) + return 0; + + queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx); + + return queue[hdr->read_index]; +} + +static void process_ctx_bad(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + if (GMU_VER_MINOR(gmu->ver.hfi) < 2) + log_gpu_fault_legacy(adreno_dev); + else + log_gpu_fault(adreno_dev); + + gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); +} + +#define GET_QUERIED_FENCE_INDEX(x) (x / BITS_PER_SYNCOBJ_QUERY) +#define GET_QUERIED_FENCE_BIT(x) (x % BITS_PER_SYNCOBJ_QUERY) + +static bool fence_is_queried(struct hfi_syncobj_query_cmd *cmd, u32 fence_index) +{ + u32 index = GET_QUERIED_FENCE_INDEX(fence_index); + u32 bit = GET_QUERIED_FENCE_BIT(fence_index); + + return (cmd->queries[index].query_bitmask & BIT(bit)); +} + +static void set_fence_signal_bit(struct adreno_device *adreno_dev, + struct hfi_syncobj_query_cmd *reply, struct dma_fence *fence, u32 fence_index, + char *name) +{ + u32 index = GET_QUERIED_FENCE_INDEX(fence_index); + u32 bit = GET_QUERIED_FENCE_BIT(fence_index); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u64 flags = ADRENO_HW_FENCE_SW_STATUS_PENDING; + char value[32] = "unknown"; + + if (fence->ops->timeline_value_str) + fence->ops->timeline_value_str(fence, value, sizeof(value)); + + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + dev_err(&gmu->pdev->dev, + "GMU is waiting for signaled fence(ctx:%llu seqno:%llu value:%s)\n", + fence->context, fence->seqno, value); + reply->queries[index].query_bitmask |= BIT(bit); + flags = ADRENO_HW_FENCE_SW_STATUS_SIGNALED; + } + trace_adreno_hw_fence_query(fence->context, fence->seqno, flags, name, value); +} + +static void gen8_syncobj_query_reply(struct adreno_device *adreno_dev, + struct kgsl_drawobj *drawobj, struct hfi_syncobj_query_cmd *cmd) +{ + struct hfi_syncobj_query_cmd reply = {0}; + struct gen8_hfi *hfi = to_gen8_hfi(adreno_dev); + int i, j, fence_index = 0; + struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + for (i = 0; i < syncobj->numsyncs; i++) { + struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i]; + struct kgsl_sync_fence_cb *kcb = event->handle; + struct dma_fence **fences; + struct dma_fence_array *array; + struct event_fence_info *info = event->priv; + u32 num_fences; + + array = to_dma_fence_array(kcb->fence); + if (array != NULL) { + num_fences = array->num_fences; + fences = array->fences; + } else { + num_fences = 1; + fences = &kcb->fence; + } + + for (j = 0; j < num_fences; j++, fence_index++) { + if (!fence_is_queried(cmd, fence_index)) + continue; + + set_fence_signal_bit(adreno_dev, &reply, fences[j], fence_index, + info ? info->fences[j].name : "unknown"); + } + } + + reply.hdr = CREATE_MSG_HDR(F2H_MSG_SYNCOBJ_QUERY, HFI_MSG_CMD); + reply.hdr = MSG_HDR_SET_SEQNUM(reply.hdr, + atomic_inc_return(&hfi->seqnum)); + reply.gmu_ctxt_id = cmd->gmu_ctxt_id; + reply.sync_obj_ts = cmd->sync_obj_ts; + + trace_adreno_syncobj_query_reply(reply.gmu_ctxt_id, reply.sync_obj_ts, + gpudev->read_alwayson(adreno_dev)); + + gen8_hfi_send_cmd_async(adreno_dev, &reply, sizeof(reply)); +} + +struct syncobj_query_work { + /** @cmd: The query command to be processed */ + struct hfi_syncobj_query_cmd cmd; + /** @context: kgsl context that is waiting for this sync object */ + struct kgsl_context *context; + /** @work: The work structure to execute syncobj query reply */ + struct kthread_work work; +}; + +static void gen8_process_syncobj_query_work(struct kthread_work *work) +{ + struct syncobj_query_work *query_work = container_of(work, + struct syncobj_query_work, work); + struct hfi_syncobj_query_cmd *cmd = (struct hfi_syncobj_query_cmd *)&query_work->cmd; + struct kgsl_context *context = query_work->context; + struct kgsl_device *device = context->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct cmd_list_obj *obj; + bool missing = true; + + mutex_lock(&hwsched->mutex); + mutex_lock(&device->mutex); + + list_for_each_entry(obj, &hwsched->cmd_list, node) { + struct kgsl_drawobj *drawobj = obj->drawobj; + + if ((drawobj->type & SYNCOBJ_TYPE) == 0) + continue; + + if ((drawobj->context->id == cmd->gmu_ctxt_id) && + (drawobj->timestamp == cmd->sync_obj_ts)) { + gen8_syncobj_query_reply(adreno_dev, drawobj, cmd); + missing = false; + break; + } + } + + if (missing) { + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; + + /* + * If the sync object is not found, it can only mean that the sync object was + * retired by the GMU in the meanwhile. However, if that is not the case, then + * we have a problem. + */ + if (timestamp_cmp(cmd->sync_obj_ts, hdr->sync_obj_ts) > 0) { + dev_err(&gmu->pdev->dev, "Missing sync object ctx:%d ts:%d retired:%d\n", + context->id, cmd->sync_obj_ts, hdr->sync_obj_ts); + gmu_core_fault_snapshot(device); + gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + } + } + + mutex_unlock(&device->mutex); + mutex_unlock(&hwsched->mutex); + + kgsl_context_put(context); + kfree(query_work); +} + +static void gen8_trigger_syncobj_query(struct adreno_device *adreno_dev, + u32 *rcvd) +{ + struct syncobj_query_work *query_work; + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct hfi_syncobj_query_cmd *cmd = (struct hfi_syncobj_query_cmd *)rcvd; + struct kgsl_context *context = NULL; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int ret; + + trace_adreno_syncobj_query(cmd->gmu_ctxt_id, cmd->sync_obj_ts, + gpudev->read_alwayson(adreno_dev)); + + /* + * We need the context even if it is detached. Hence, we can't use kgsl_context_get here. + * We must make sure that this context id doesn't get destroyed (to avoid re-use) until GMU + * has ack'd the query reply. + */ + read_lock(&device->context_lock); + context = idr_find(&device->context_idr, cmd->gmu_ctxt_id); + ret = _kgsl_context_get(context); + read_unlock(&device->context_lock); + + if (!ret) + return; + + query_work = kzalloc(sizeof(*query_work), GFP_KERNEL); + if (!query_work) { + kgsl_context_put(context); + return; + } + + kthread_init_work(&query_work->work, gen8_process_syncobj_query_work); + memcpy(&query_work->cmd, cmd, sizeof(*cmd)); + query_work->context = context; + + kthread_queue_work(hwsched->worker, &query_work->work); +} + +/* + * This defines the maximum unack'd hardware fences that we allow. When this limit is reached, we + * will put all threads (that want to create a hardware fence) to sleep until the maximum unack'd + * hardware fence count drops to MIN_HW_FENCE_UNACK_COUNT + */ +#define MAX_HW_FENCE_UNACK_COUNT 20 + +/* + * Once the maximum unack'd hardware fences drops to this value, wake up all the threads (that want + * to create hardware fences) + */ +#define MIN_HW_FENCE_UNACK_COUNT 10 + +/* + * This is the maximum duration (in milliseconds) a thread (that wants to create a hardware fence) + * is put to sleep while we wait for the maximum number of unack'd hardware fences to drop from + * MAX_HW_FENCE_UNACK_COUNT to MIN_HW_FENCE_UNACK_COUNT. If the count doesn't drop to the desired + * value, then log an error and trigger snapshot and recovery. + */ +#define HW_FENCE_SLEEP_MS 200 + +static void _enable_hw_fence_throttle(struct adreno_device *adreno_dev) +{ + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + + set_bit(GEN8_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags); + set_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags); + + /* Avoid submitting new work to gpu until the unack count drops to a desired threshold */ + adreno_get_gpu_halt(adreno_dev); + + mod_timer(&hfi->hw_fence_timer, jiffies + msecs_to_jiffies(HW_FENCE_SLEEP_MS)); +} + +static void _increment_hw_fence_unack_count(struct adreno_device *adreno_dev) +{ + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + + if ((++hfi->hw_fence.unack_count) == MAX_HW_FENCE_UNACK_COUNT) + _enable_hw_fence_throttle(adreno_dev); +} + +/** + * _send_hw_fence_no_ack - Send a hardware fence hfi packet to GMU without waiting for its ack. + * Increment the unack count on success + * + * Return: 0 on success or negative error on failure + */ +static int _send_hw_fence_no_ack(struct adreno_device *adreno_dev, + struct adreno_hw_fence_entry *entry) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; + + entry->cmd.hdr = MSG_HDR_SET_SEQNUM(entry->cmd.hdr, + atomic_inc_return(&gmu->hfi.seqnum)); + + ret = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&entry->cmd, sizeof(entry->cmd)); + if (!ret) + _increment_hw_fence_unack_count(adreno_dev); + + return ret; +} + +static struct adreno_hw_fence_entry *_get_deferred_hw_fence(struct adreno_context *drawctxt, u32 ts) +{ + struct adreno_hw_fence_entry *entry = NULL, *next, *deferred_hw_fence_entry = NULL; + + spin_lock(&drawctxt->lock); + list_for_each_entry_safe(entry, next, &drawctxt->hw_fence_list, node) { + + if (timestamp_cmp((u32)entry->cmd.ts, ts) > 0) + break; + + /* We found a deferred hardware fence */ + deferred_hw_fence_entry = entry; + break; + } + spin_unlock(&drawctxt->lock); + + /* + * This path executes in isolation from any paths that may release this entry. So, it is + * safe to handle this entry outside of the drawctxt spinlock + */ + return deferred_hw_fence_entry; +} + +static int _send_deferred_hw_fence(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct adreno_hw_fence_entry *entry, u32 ts) +{ + bool retired = kgsl_check_timestamp(KGSL_DEVICE(adreno_dev), &drawctxt->base, ts) || + kgsl_context_is_bad(&drawctxt->base); + int ret = 0; + u32 flags = 0; + + if (retired) + flags |= HW_FENCE_FLAG_SKIP_MEMSTORE; + + ret = gen8_send_hw_fence_hfi_wait_ack(adreno_dev, entry, flags); + if (ret) + return ret; + + spin_lock(&drawctxt->lock); + if (!retired) + list_move_tail(&entry->node, &drawctxt->hw_fence_inflight_list); + else + gen8_remove_hw_fence_entry(adreno_dev, entry); + spin_unlock(&drawctxt->lock); + + return 0; +} + +/** + * process_hw_fence_deferred_ctxt - This function sends hardware fences to GMU (from the + * deferred drawctxt) which couldn't be sent earlier + */ +static int process_hw_fence_deferred_ctxt(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 ts) +{ + struct adreno_hw_fence_entry *deferred_hw_fence_entry = NULL; + int ret = 0; + + do { + deferred_hw_fence_entry = _get_deferred_hw_fence(drawctxt, ts); + + if (!deferred_hw_fence_entry) + break; + + ret = _send_deferred_hw_fence(adreno_dev, drawctxt, deferred_hw_fence_entry, ts); + if (ret) + break; + + } while (deferred_hw_fence_entry != NULL); + + return ret; +} + +static void _disable_hw_fence_throttle(struct adreno_device *adreno_dev, bool clear_abort_bit) +{ + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + bool max; + + spin_lock(&hfi->hw_fence.lock); + + hfi->hw_fence.defer_drawctxt = NULL; + hfi->hw_fence.defer_ts = 0; + max = test_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags); + if (max) { + clear_bit(GEN8_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags); + clear_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags); + } + + if (clear_abort_bit) + clear_bit(GEN8_HWSCHED_HW_FENCE_ABORT_BIT, &hfi->hw_fence.flags); + spin_unlock(&hfi->hw_fence.lock); + + /* Wake up dispatcher and any sleeping threads that want to create hardware fences */ + if (max) { + adreno_put_gpu_halt(adreno_dev); + adreno_hwsched_trigger(adreno_dev); + wake_up_all(&hfi->hw_fence.unack_wq); + } +} + +static void gen8_defer_hw_fence_work(struct kthread_work *work) +{ + struct gen8_hwsched_hfi *hfi = container_of(work, + struct gen8_hwsched_hfi, defer_hw_fence_work); + struct adreno_context *drawctxt = NULL; + struct kgsl_device *device; + struct adreno_device *adreno_dev; + u32 ts; + int ret; + + spin_lock(&hfi->hw_fence.lock); + drawctxt = hfi->hw_fence.defer_drawctxt; + ts = hfi->hw_fence.defer_ts; + spin_unlock(&hfi->hw_fence.lock); + + device = drawctxt->base.device; + adreno_dev = ADRENO_DEVICE(device); + + /* + * Grab the dispatcher and device mutex as we don't want to race with concurrent fault + * recovery + */ + mutex_lock(&adreno_dev->hwsched.mutex); + mutex_lock(&device->mutex); + + ret = process_hw_fence_deferred_ctxt(adreno_dev, drawctxt, ts); + if (ret) { + /* the deferred drawctxt will be handled post fault recovery */ + gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + goto unlock; + } + + /* + * Put back the context reference which was incremented when hw_fence.defer_drawctxt was set + */ + kgsl_context_put(&drawctxt->base); + + gen8_hwsched_active_count_put(adreno_dev); + + _disable_hw_fence_throttle(adreno_dev, false); + +unlock: + mutex_unlock(&device->mutex); + mutex_unlock(&adreno_dev->hwsched.mutex); +} + +static void process_hw_fence_ack(struct adreno_device *adreno_dev, u32 received_hdr) +{ + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct adreno_context *drawctxt = NULL; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + spin_lock(&hfi->hw_fence.lock); + + /* If this ack is being waited on, we don't need to touch the unack count */ + if (gen8_hw_fence_ack.sent_hdr && + HDR_CMP_SEQNUM(gen8_hw_fence_ack.sent_hdr, received_hdr)) { + spin_unlock(&hfi->hw_fence.lock); + complete(&gen8_hw_fence_ack.complete); + return; + } + + hfi->hw_fence.unack_count--; + + /* The unack count should never be greater than MAX_HW_FENCE_UNACK_COUNT */ + if (hfi->hw_fence.unack_count > MAX_HW_FENCE_UNACK_COUNT) + dev_err(&gmu->pdev->dev, "unexpected hardware fence unack count:%d\n", + hfi->hw_fence.unack_count); + + if (!test_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags) || + (hfi->hw_fence.unack_count != MIN_HW_FENCE_UNACK_COUNT)) { + spin_unlock(&hfi->hw_fence.lock); + return; + } + + drawctxt = hfi->hw_fence.defer_drawctxt; + + spin_unlock(&hfi->hw_fence.lock); + + del_timer_sync(&hfi->hw_fence_timer); + + /* + * We need to handle the deferred context in another thread so that we can unblock the f2h + * daemon here as it will need to process the acks for the hardware fences belonging to the + * deferred context + */ + if (drawctxt) { + kthread_init_work(&hfi->defer_hw_fence_work, gen8_defer_hw_fence_work); + kthread_queue_work(adreno_dev->hwsched.worker, &hfi->defer_hw_fence_work); + return; + } + + _disable_hw_fence_throttle(adreno_dev, false); +} + +void gen8_hwsched_process_msgq(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); + u32 rcvd[MAX_RCVD_SIZE], next_hdr, type; + + mutex_lock(&hw_hfi->msgq_mutex); + + for (;;) { + next_hdr = peek_next_header(gmu, HFI_MSG_ID); + + if (!next_hdr) + break; + + if (MSG_HDR_GET_TYPE(next_hdr) == HFI_MSG_ACK) + type = HFI_MSG_ACK; + else + type = MSG_HDR_GET_ID(next_hdr); + + if (type != F2H_MSG_CONTEXT_BAD) + gen8_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)); + + switch (type) { + case HFI_MSG_ACK: + /* + * We are assuming that there is only one outstanding ack because hfi + * sending thread waits for completion while holding the device mutex + * (except when we send H2F_MSG_HW_FENCE_INFO packets) + */ + if (MSG_HDR_GET_ID(rcvd[1]) == H2F_MSG_HW_FENCE_INFO) + process_hw_fence_ack(adreno_dev, rcvd[1]); + else + gen8_receive_ack_async(adreno_dev, rcvd); + break; + case F2H_MSG_CONTEXT_BAD: + gen8_hfi_queue_read(gmu, HFI_MSG_ID, (u32 *)adreno_dev->hwsched.ctxt_bad, + HFI_MAX_MSG_SIZE); + process_ctx_bad(adreno_dev); + break; + case F2H_MSG_TS_RETIRE: + log_profiling_info(adreno_dev, rcvd); + adreno_hwsched_trigger(adreno_dev); + break; + case F2H_MSG_SYNCOBJ_QUERY: + gen8_trigger_syncobj_query(adreno_dev, rcvd); + break; + case F2H_MSG_GMU_CNTR_RELEASE: { + struct hfi_gmu_cntr_release_cmd *cmd = + (struct hfi_gmu_cntr_release_cmd *) rcvd; + + adreno_perfcounter_put(adreno_dev, + cmd->group_id, cmd->countable, PERFCOUNTER_FLAG_KERNEL); + } + break; + } + } + mutex_unlock(&hw_hfi->msgq_mutex); +} + +static void process_log_block(struct adreno_device *adreno_dev, void *data) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct hfi_log_block *cmd = data; + u32 *log_event = gmu->gmu_log->hostptr; + u32 start, end; + + start = cmd->start_index; + end = cmd->stop_index; + + log_event += start * 4; + while (start != end) { + trace_gmu_event(log_event); + log_event += 4; + start++; + } +} + +static void gen8_hwsched_process_dbgq(struct adreno_device *adreno_dev, bool limited) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 rcvd[MAX_RCVD_SIZE]; + bool recovery = false; + + while (gen8_hfi_queue_read(gmu, HFI_DBG_ID, rcvd, sizeof(rcvd)) > 0) { + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_ERR) { + adreno_gen8_receive_err_req(gmu, rcvd); + recovery = true; + break; + } + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_DEBUG) + adreno_gen8_receive_debug_req(gmu, rcvd); + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_LOG_BLOCK) + process_log_block(adreno_dev, rcvd); + + /* Process one debug queue message and return to not delay msgq processing */ + if (limited) + break; + } + + if (!recovery) + return; + + gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); +} + +/* HFI interrupt handler */ +static irqreturn_t gen8_hwsched_hfi_handler(int irq, void *data) +{ + struct adreno_device *adreno_dev = data; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status = 0; + + gmu_core_regread(device, GEN8_GMUCX_GMU2HOST_INTR_INFO, &status); + gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, hfi->irq_mask); + + /* + * If interrupts are not enabled on the HFI message queue, + * the inline message processing loop will process it, + * else, process it here. + */ + if (!(hfi->irq_mask & HFI_IRQ_MSGQ_MASK)) + status &= ~HFI_IRQ_MSGQ_MASK; + + if (status & (HFI_IRQ_MSGQ_MASK | HFI_IRQ_DBGQ_MASK)) { + wake_up_interruptible(&hfi->f2h_wq); + adreno_hwsched_trigger(adreno_dev); + } + if (status & HFI_IRQ_CM3_FAULT_MASK) { + atomic_set(&gmu->cm3_fault, 1); + + /* make sure other CPUs see the update */ + smp_wmb(); + + dev_err_ratelimited(&gmu->pdev->dev, + "GMU CM3 fault interrupt received\n"); + + gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + } + + /* Ignore OOB bits */ + status &= GENMASK(31 - (oob_max - 1), 0); + + if (status & ~hfi->irq_mask) + dev_err_ratelimited(&gmu->pdev->dev, + "Unhandled HFI interrupts 0x%x\n", + status & ~hfi->irq_mask); + + return IRQ_HANDLED; +} + +#define HFI_IRQ_MSGQ_MASK BIT(0) + +static int check_ack_failure(struct adreno_device *adreno_dev, + struct pending_cmd *ack) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + if (ack->results[2] != 0xffffffff) + return 0; + + dev_err(&gmu->pdev->dev, + "ACK error: sender id %d seqnum %d\n", + MSG_HDR_GET_ID(ack->sent_hdr), + MSG_HDR_GET_SEQNUM(ack->sent_hdr)); + + return -EINVAL; +} + +int gen8_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 size_bytes) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + u32 *cmd = data; + u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); + int rc; + struct pending_cmd pending_ack; + + *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + + add_waiter(hfi, *cmd, &pending_ack); + + rc = gen8_hfi_cmdq_write(adreno_dev, cmd, size_bytes); + if (rc) + goto done; + + rc = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &pending_ack, + gen8_hwsched_process_msgq); + if (rc) + goto done; + + rc = check_ack_failure(adreno_dev, &pending_ack); + +done: + del_waiter(hfi, &pending_ack); + + return rc; +} + +static void init_queues(struct gen8_hfi *hfi) +{ + u32 gmuaddr = hfi->hfi_mem->gmuaddr; + struct hfi_queue_table hfi_table = { + .qtbl_hdr = { + .version = 0, + .size = sizeof(struct hfi_queue_table) >> 2, + .qhdr0_offset = + sizeof(struct hfi_queue_table_header) >> 2, + .qhdr_size = sizeof(struct hfi_queue_header) >> 2, + .num_q = HFI_QUEUE_MAX, + .num_active_q = HFI_QUEUE_MAX, + }, + .qhdr = { + DEFINE_QHDR(gmuaddr, HFI_CMD_ID, 0), + DEFINE_QHDR(gmuaddr, HFI_MSG_ID, 0), + DEFINE_QHDR(gmuaddr, HFI_DBG_ID, 0), + /* 4 DQs for RB priority 0 */ + DEFINE_QHDR(gmuaddr, 3, 0), + DEFINE_QHDR(gmuaddr, 4, 0), + DEFINE_QHDR(gmuaddr, 5, 0), + DEFINE_QHDR(gmuaddr, 6, 0), + /* 4 DQs for RB priority 1 */ + DEFINE_QHDR(gmuaddr, 7, 1), + DEFINE_QHDR(gmuaddr, 8, 1), + DEFINE_QHDR(gmuaddr, 9, 1), + DEFINE_QHDR(gmuaddr, 10, 1), + /* 3 DQs for RB priority 2 */ + DEFINE_QHDR(gmuaddr, 11, 2), + DEFINE_QHDR(gmuaddr, 12, 2), + DEFINE_QHDR(gmuaddr, 13, 2), + /* 2 DQs for RB priority 3 */ + DEFINE_QHDR(gmuaddr, 14, 3), + DEFINE_QHDR(gmuaddr, 15, 3), + /* 1 DQ for LPAC RB priority 4 */ + DEFINE_QHDR(gmuaddr, 16, 4), + }, + }; + + memcpy(hfi->hfi_mem->hostptr, &hfi_table, sizeof(hfi_table)); +} + +/* Total header sizes + queue sizes + 16 for alignment */ +#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \ + (SZ_4K * HFI_QUEUE_MAX)) + +static int hfi_f2h_main(void *arg); + +int gen8_hwsched_hfi_init(struct adreno_device *adreno_dev) +{ + struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); + struct gen8_hfi *hfi = to_gen8_hfi(adreno_dev); + + if (IS_ERR_OR_NULL(hw_hfi->big_ib)) { + hw_hfi->big_ib = gen8_reserve_gmu_kernel_block( + to_gen8_gmu(adreno_dev), 0, + HWSCHED_MAX_IBS * sizeof(struct hfi_issue_ib), + GMU_NONCACHED_KERNEL, 0); + if (IS_ERR(hw_hfi->big_ib)) + return PTR_ERR(hw_hfi->big_ib); + } + + if (ADRENO_FEATURE(adreno_dev, ADRENO_LSR) && + IS_ERR_OR_NULL(hw_hfi->big_ib_recurring)) { + hw_hfi->big_ib_recurring = gen8_reserve_gmu_kernel_block( + to_gen8_gmu(adreno_dev), 0, + HWSCHED_MAX_IBS * sizeof(struct hfi_issue_ib), + GMU_NONCACHED_KERNEL, 0); + if (IS_ERR(hw_hfi->big_ib_recurring)) + return PTR_ERR(hw_hfi->big_ib_recurring); + } + + if (IS_ERR_OR_NULL(hfi->hfi_mem)) { + hfi->hfi_mem = gen8_reserve_gmu_kernel_block( + to_gen8_gmu(adreno_dev), + 0, HFIMEM_SIZE, GMU_NONCACHED_KERNEL, 0); + if (IS_ERR(hfi->hfi_mem)) + return PTR_ERR(hfi->hfi_mem); + init_queues(hfi); + } + + if (IS_ERR_OR_NULL(hw_hfi->f2h_task)) { + hw_hfi->f2h_task = kthread_run(hfi_f2h_main, adreno_dev, "gmu_f2h"); + if (!IS_ERR(hw_hfi->f2h_task)) + sched_set_fifo(hw_hfi->f2h_task); + } + + return PTR_ERR_OR_ZERO(hw_hfi->f2h_task); +} + +static int get_attrs(u32 flags) +{ + int attrs = IOMMU_READ; + + if (flags & HFI_MEMFLAG_GMU_PRIV) + attrs |= IOMMU_PRIV; + + if (flags & HFI_MEMFLAG_GMU_WRITEABLE) + attrs |= IOMMU_WRITE; + + return attrs; +} + +static int gmu_import_buffer(struct adreno_device *adreno_dev, + struct hfi_mem_alloc_entry *entry) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct hfi_mem_alloc_desc *desc = &entry->desc; + u32 vma_id = (desc->flags & HFI_MEMFLAG_GMU_CACHEABLE) ? GMU_CACHE : GMU_NONCACHED_KERNEL; + + return gen8_gmu_import_buffer(gmu, vma_id, entry->md, get_attrs(desc->flags), desc->align); +} + +static struct hfi_mem_alloc_entry *lookup_mem_alloc_table( + struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc) +{ + struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); + int i; + + for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { + struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i]; + + if ((entry->desc.mem_kind == desc->mem_kind) && + (entry->desc.gmu_mem_handle == desc->gmu_mem_handle)) + return entry; + } + + return NULL; +} + +static struct hfi_mem_alloc_entry *get_mem_alloc_entry( + struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct hfi_mem_alloc_entry *entry = + lookup_mem_alloc_table(adreno_dev, desc); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u64 flags = 0; + u32 priv = 0; + int ret; + const char *memkind_string = desc->mem_kind < HFI_MEMKIND_MAX ? + hfi_memkind_strings[desc->mem_kind] : "UNKNOWN"; + + if (entry) + return entry; + + if (desc->mem_kind >= HFI_MEMKIND_MAX) { + dev_err(&gmu->pdev->dev, "Invalid mem kind: %d\n", + desc->mem_kind); + return ERR_PTR(-EINVAL); + } + + if (hfi->mem_alloc_entries == ARRAY_SIZE(hfi->mem_alloc_table)) { + dev_err(&gmu->pdev->dev, + "Reached max mem alloc entries\n"); + return ERR_PTR(-ENOMEM); + } + + entry = &hfi->mem_alloc_table[hfi->mem_alloc_entries]; + + memcpy(&entry->desc, desc, sizeof(*desc)); + + entry->desc.host_mem_handle = desc->gmu_mem_handle; + + if (desc->flags & HFI_MEMFLAG_GFX_PRIV) + priv |= KGSL_MEMDESC_PRIVILEGED; + + if (!(desc->flags & HFI_MEMFLAG_GFX_WRITEABLE)) + flags |= KGSL_MEMFLAGS_GPUREADONLY; + + if (desc->flags & HFI_MEMFLAG_GFX_SECURE) + flags |= KGSL_MEMFLAGS_SECURE; + + if (!(desc->flags & HFI_MEMFLAG_GFX_ACC) && + (desc->mem_kind != HFI_MEMKIND_HW_FENCE)) { + if (desc->mem_kind == HFI_MEMKIND_MMIO_IPC_CORE) + entry->md = gen8_reserve_gmu_kernel_block_fixed(gmu, 0, + desc->size, + (desc->flags & HFI_MEMFLAG_GMU_CACHEABLE) ? + GMU_CACHE : GMU_NONCACHED_KERNEL, + "qcom,ipc-core", get_attrs(desc->flags), + desc->align); + else + entry->md = gen8_reserve_gmu_kernel_block(gmu, 0, + desc->size, + (desc->flags & HFI_MEMFLAG_GMU_CACHEABLE) ? + GMU_CACHE : GMU_NONCACHED_KERNEL, + desc->align); + + if (IS_ERR(entry->md)) { + int ret = PTR_ERR(entry->md); + + memset(entry, 0, sizeof(*entry)); + return ERR_PTR(ret); + } + entry->desc.size = entry->md->size; + entry->desc.gmu_addr = entry->md->gmuaddr; + + goto done; + } + + /* + * Use pre-allocated memory descriptors to map the HFI_MEMKIND_HW_FENCE and + * HFI_MEMKIND_MEMSTORE + */ + switch (desc->mem_kind) { + case HFI_MEMKIND_HW_FENCE: + entry->md = &adreno_dev->hwsched.hw_fence.memdesc; + break; + case HFI_MEMKIND_MEMSTORE: + entry->md = device->memstore; + break; + default: + entry->md = kgsl_allocate_global(device, desc->size, 0, flags, + priv, memkind_string); + break; + } + if (IS_ERR(entry->md)) { + int ret = PTR_ERR(entry->md); + + memset(entry, 0, sizeof(*entry)); + return ERR_PTR(ret); + } + + entry->desc.size = entry->md->size; + entry->desc.gpu_addr = entry->md->gpuaddr; + + if (!(desc->flags & HFI_MEMFLAG_GMU_ACC)) + goto done; + + /* + * If gmu mapping fails, then we have to live with + * leaking the gpu global buffer allocated above. + */ + ret = gmu_import_buffer(adreno_dev, entry); + if (ret) { + dev_err(&gmu->pdev->dev, + "gpuaddr: 0x%llx size: %lld bytes lost\n", + entry->md->gpuaddr, entry->md->size); + memset(entry, 0, sizeof(*entry)); + return ERR_PTR(ret); + } + + entry->desc.gmu_addr = entry->md->gmuaddr; +done: + hfi->mem_alloc_entries++; + + return entry; +} + +static int process_mem_alloc(struct adreno_device *adreno_dev, + struct hfi_mem_alloc_desc *mad) +{ + struct hfi_mem_alloc_entry *entry; + + entry = get_mem_alloc_entry(adreno_dev, mad); + if (IS_ERR(entry)) + return PTR_ERR(entry); + + if (entry->md) { + mad->gpu_addr = entry->md->gpuaddr; + mad->gmu_addr = entry->md->gmuaddr; + } + + /* + * GMU uses the host_mem_handle to check if this memalloc was + * successful + */ + mad->host_mem_handle = mad->gmu_mem_handle; + + return 0; +} + +static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) +{ + struct hfi_mem_alloc_desc desc = {0}; + struct hfi_mem_alloc_reply_cmd out = {0}; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; + + hfi_get_mem_alloc_desc(rcvd, &desc); + + ret = process_mem_alloc(adreno_dev, &desc); + if (ret) + return ret; + + memcpy(&out.desc, &desc, sizeof(out.desc)); + + out.hdr = ACK_MSG_HDR(F2H_MSG_MEM_ALLOC); + + out.hdr = MSG_HDR_SET_SEQNUM(out.hdr, + atomic_inc_return(&gmu->hfi.seqnum)); + + out.req_hdr = *(u32 *)rcvd; + + return gen8_hfi_cmdq_write(adreno_dev, (u32 *)&out, sizeof(out)); +} + +static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) +{ + struct hfi_gmu_cntr_register_cmd *in = (struct hfi_gmu_cntr_register_cmd *)rcvd; + struct hfi_gmu_cntr_register_reply_cmd out = {0}; + u32 lo = 0, hi = 0; + + /* + * Failure to allocate counter is not fatal. Sending lo = 0, hi = 0 + * indicates to GMU that counter allocation failed. + */ + adreno_perfcounter_get(adreno_dev, + in->group_id, in->countable, &lo, &hi, PERFCOUNTER_FLAG_KERNEL); + + out.hdr = ACK_MSG_HDR(F2H_MSG_GMU_CNTR_REGISTER); + out.req_hdr = in->hdr; + out.group_id = in->group_id; + out.countable = in->countable; + /* Fill in byte offset of counter */ + out.cntr_lo = lo << 2; + out.cntr_hi = hi << 2; + + return gen8_hfi_cmdq_write(adreno_dev, (u32 *)&out, sizeof(out)); +} + +static int send_warmboot_start_msg(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); + int ret = 0; + struct hfi_start_cmd cmd; + + if (!adreno_dev->warmboot_enabled) + return ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_START); + if (ret) + return ret; + + cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + + cmd.hdr = RECORD_NOP_MSG_HDR(cmd.hdr); + + return gen8_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); +} + +static int send_start_msg(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); + int ret, rc = 0; + struct hfi_start_cmd cmd; + u32 rcvd[MAX_RCVD_SIZE]; + struct pending_cmd pending_ack = {0}; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_START); + if (ret) + return ret; + + cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + + pending_ack.sent_hdr = cmd.hdr; + + rc = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd)); + if (rc) + return rc; + +poll: + rc = gmu_core_timed_poll_check(device, GEN8_GMUCX_GMU2HOST_INTR_INFO, + HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK); + + if (rc) { + dev_err(&gmu->pdev->dev, + "Timed out processing MSG_START seqnum: %d\n", + seqnum); + gmu_core_fault_snapshot(device); + return rc; + } + + /* Clear the interrupt */ + gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, + HFI_IRQ_MSGQ_MASK); + + if (gen8_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)) <= 0) { + dev_err(&gmu->pdev->dev, "MSG_START: no payload\n"); + gmu_core_fault_snapshot(device); + return -EINVAL; + } + + if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { + rc = gen8_receive_ack_cmd(gmu, rcvd, &pending_ack); + if (rc) + return rc; + + return check_ack_failure(adreno_dev, &pending_ack); + } + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_MEM_ALLOC) { + rc = mem_alloc_reply(adreno_dev, rcvd); + if (rc) + return rc; + + goto poll; + } + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_GMU_CNTR_REGISTER) { + rc = gmu_cntr_register_reply(adreno_dev, rcvd); + if (rc) + return rc; + goto poll; + } + + dev_err(&gmu->pdev->dev, + "MSG_START: unexpected response id:%d, type:%d\n", + MSG_HDR_GET_ID(rcvd[0]), + MSG_HDR_GET_TYPE(rcvd[0])); + + gmu_core_fault_snapshot(device); + + return rc; +} + +static void reset_hfi_mem_records(struct adreno_device *adreno_dev) +{ + struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); + struct kgsl_memdesc *md = NULL; + u32 i; + + for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { + struct hfi_mem_alloc_desc *desc = &hw_hfi->mem_alloc_table[i].desc; + + if (desc->flags & HFI_MEMFLAG_HOST_INIT) { + md = hw_hfi->mem_alloc_table[i].md; + memset(md->hostptr, 0x0, md->size); + } + } +} + +static void reset_hfi_queues(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr; + u32 i; + + /* Flush HFI queues */ + for (i = 0; i < HFI_QUEUE_MAX; i++) { + struct hfi_queue_header *hdr = &tbl->qhdr[i]; + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + continue; + + hdr->read_index = hdr->write_index; + } +} + +void gen8_hwsched_hfi_stop(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + + hfi->irq_mask &= ~HFI_IRQ_MSGQ_MASK; + + /* + * In some corner cases, it is possible that GMU put TS_RETIRE + * on the msgq after we have turned off gmu interrupts. Hence, + * drain the queue one last time before we reset HFI queues. + */ + gen8_hwsched_process_msgq(adreno_dev); + + /* Drain the debug queue before we reset HFI queues */ + gen8_hwsched_process_dbgq(adreno_dev, false); + + kgsl_pwrctrl_axi(KGSL_DEVICE(adreno_dev), false); + + clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); + + /* + * Reset the hfi host access memory records, As GMU expects hfi memory + * records to be clear in bootup. + */ + reset_hfi_mem_records(adreno_dev); +} + +static void gen8_hwsched_enable_async_hfi(struct adreno_device *adreno_dev) +{ + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + + hfi->irq_mask |= HFI_IRQ_MSGQ_MASK; + + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN8_GMUCX_GMU2HOST_INTR_MASK, + (u32)~hfi->irq_mask); +} + +static int enable_preemption(struct adreno_device *adreno_dev) +{ + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + u32 data; + int ret; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + /* + * Bits [0:1] contains the preemption level + * Bit 2 is to enable/disable gmem save/restore + * Bit 3 is to enable/disable skipsaverestore + */ + data = FIELD_PREP(GENMASK(1, 0), adreno_dev->preempt.preempt_level) | + FIELD_PREP(BIT(2), adreno_dev->preempt.usesgmem) | + FIELD_PREP(BIT(3), adreno_dev->preempt.skipsaverestore); + + ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_PREEMPTION, 1, + data); + if (ret) + return ret; + + if (gen8_core->qos_value) { + int i; + + for (i = 0; i < KGSL_PRIORITY_MAX_RB_LEVELS; i++) { + if (!gen8_core->qos_value[i]) + continue; + + gen8_hfi_send_set_value(adreno_dev, + HFI_VALUE_RB_GPU_QOS, i, + gen8_core->qos_value[i]); + } + } + + /* + * Bits[3:0] contain the preemption timeout enable bit per ringbuffer + * Bits[31:4] contain the timeout in ms + */ + return gen8_hfi_send_set_value(adreno_dev, HFI_VALUE_BIN_TIME, 1, + FIELD_PREP(GENMASK(31, 4), ADRENO_PREEMPT_TIMEOUT) | + FIELD_PREP(GENMASK(3, 0), 0xf)); + +} + +static int enable_gmu_stats(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 data; + + if (!gmu->stats_enable) + return 0; + + /* + * Bits [23:0] contains the countables mask + * Bits [31:24] is the sampling interval + */ + data = FIELD_PREP(GENMASK(23, 0), gmu->stats_mask) | + FIELD_PREP(GENMASK(31, 24), gmu->stats_interval); + + return gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_GMU_STATS, 1, data); +} + +static int gen8_hfi_send_perfcounter_feature_ctrl(struct adreno_device *adreno_dev) +{ + /* + * Perfcounter retention is disabled by default in GMU firmware. + * In case perfcounter retention behaviour is overwritten by sysfs + * setting dynmaically, send this HFI feature with 'enable = 0' to + * disable this feature in GMU firmware. + */ + if (adreno_dev->perfcounter) + return gen8_hfi_send_feature_ctrl(adreno_dev, + HFI_FEATURE_PERF_NORETAIN, 0, 0); + + return 0; +} + +u32 gen8_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop) +{ + struct hfi_get_value_cmd cmd; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct pending_cmd pending_ack; + int rc; + + rc = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE); + if (rc) + return 0; + + cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, + atomic_inc_return(&gmu->hfi.seqnum)); + cmd.type = prop; + cmd.subtype = 0; + + add_waiter(hfi, cmd.hdr, &pending_ack); + + rc = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd)); + if (rc) + goto done; + + rc = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &pending_ack, + gen8_hwsched_process_msgq); + +done: + del_waiter(hfi, &pending_ack); + + if (rc || (pending_ack.results[2] == UINT_MAX)) + return 0; + + return pending_ack.results[2]; +} + +static void _context_queue_enable(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + if (GMU_VER_MINOR(gmu->ver.hfi) >= 3) { + if (gen8_hfi_send_get_value(adreno_dev, HFI_VALUE_CONTEXT_QUEUE, 0) == 1) + set_bit(ADRENO_HWSCHED_CONTEXT_QUEUE, &adreno_dev->hwsched.flags); + } +} + +static int gen8_hfi_send_hw_fence_feature_ctrl(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + int ret; + + if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags)) + return 0; + + ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_HW_FENCE, 1, 0); + if (ret && (ret == -ENOENT)) { + dev_err(&gmu->pdev->dev, "GMU doesn't support HW_FENCE feature\n"); + adreno_hwsched_deregister_hw_fence(hwsched->hw_fence.handle); + return 0; + } + + return ret; +} + +static int gen8_hfi_send_dms_feature_ctrl(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; + + if (!test_bit(ADRENO_DEVICE_DMS, &adreno_dev->priv)) + return 0; + + ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_DMS, 1, 0); + if (ret == -ENOENT) { + dev_err(&gmu->pdev->dev, "GMU doesn't support DMS feature\n"); + clear_bit(ADRENO_DEVICE_DMS, &adreno_dev->priv); + adreno_dev->dms_enabled = false; + return 0; + } + + return ret; +} + +static void gen8_spin_idle_debug_lpac(struct adreno_device *adreno_dev, + const char *str) +{ + struct kgsl_device *device = &adreno_dev->dev; + u32 rptr, wptr, status, intstatus, global_status; + bool val = adreno_is_preemption_enabled(adreno_dev); + + dev_err(device->dev, str); + + kgsl_regread(device, GEN8_CP_RB_RPTR_LPAC, &rptr); + kgsl_regread(device, GEN8_CP_RB_WPTR_LPAC, &wptr); + + kgsl_regread(device, GEN8_RBBM_STATUS, &status); + kgsl_regread(device, GEN8_RBBM_INT_0_STATUS, &intstatus); + kgsl_regread(device, GEN8_CP_INTERRUPT_STATUS_GLOBAL, &global_status); + + dev_err(device->dev, + "LPAC rb=%d pos=%X/%X rbbm_status=%8.8X int_0_status=%8.8X global_status=%8.8X\n", + val ? KGSL_LPAC_RB_ID : 1, rptr, wptr, + status, intstatus, global_status); + + kgsl_device_snapshot(device, NULL, NULL, false); +} + +static bool gen8_hwsched_warmboot_possible(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + if (adreno_dev->warmboot_enabled && test_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags) + && test_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags) && + !test_bit(ADRENO_DEVICE_FORCE_COLDBOOT, &adreno_dev->priv)) + return true; + + return false; +} + +static int gen8_hwsched_hfi_send_warmboot_cmd(struct adreno_device *adreno_dev, + struct kgsl_memdesc *desc, u32 flag, bool async, struct pending_cmd *ack) +{ + struct hfi_warmboot_scratch_cmd cmd; + int ret; + + if (!adreno_dev->warmboot_enabled) + return 0; + + cmd.scratch_addr = desc->gmuaddr; + cmd.scratch_size = desc->size; + cmd.flags = flag; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_WARMBOOT_CMD); + if (ret) + return ret; + + if (async) + return gen8_hfi_send_cmd_async(adreno_dev, &cmd, sizeof(cmd)); + + return gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, ack, sizeof(cmd)); +} + +static int gen8_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev, + struct pending_cmd *ret_cmd) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct hfi_warmboot_scratch_cmd cmd = { + .scratch_addr = gmu->gpu_boot_scratch->gmuaddr, + .scratch_size = gmu->gpu_boot_scratch->size, + .flags = HFI_WARMBOOT_EXEC_SCRATCH, + }; + int ret = 0; + + if (!adreno_dev->warmboot_enabled) + return 0; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_WARMBOOT_CMD); + if (ret) + return ret; + + cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, + atomic_inc_return(&gmu->hfi.seqnum)); + + add_waiter(hfi, cmd.hdr, ret_cmd); + + ret = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd)); + if (ret) + goto err; + + ret = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, ret_cmd, + gen8_hwsched_process_msgq); +err: + del_waiter(hfi, ret_cmd); + + return ret; +} + +static int gen8_hwsched_warmboot_gpu(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct pending_cmd ret_cmd = {0}; + int ret = 0; + + ret = gen8_hwsched_hfi_warmboot_gpu_cmd(adreno_dev, &ret_cmd); + if (!ret) + return ret; + + if (MSG_HDR_GET_TYPE(ret_cmd.results[1]) != H2F_MSG_WARMBOOT_CMD) + goto err; + + switch (MSG_HDR_GET_TYPE(ret_cmd.results[2])) { + case H2F_MSG_ISSUE_CMD_RAW: { + if (ret_cmd.results[2] == gmu->cp_init_hdr) + gen8_spin_idle_debug(adreno_dev, + "CP initialization failed to idle\n"); + else if (ret_cmd.results[2] == gmu->switch_to_unsec_hdr) + gen8_spin_idle_debug(adreno_dev, + "Switch to unsecure failed to idle\n"); + } + break; + case H2F_MSG_ISSUE_LPAC_CMD_RAW: + gen8_spin_idle_debug_lpac(adreno_dev, + "LPAC CP initialization failed to idle\n"); + break; + } +err: + /* Clear the bit on error so that in the next slumber exit we coldboot */ + clear_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags); + gen8_disable_gpu_irq(adreno_dev); + return ret; +} + +static int gen8_hwsched_coldboot_gpu(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hfi *hfi = to_gen8_hfi(adreno_dev); + struct pending_cmd ack = {0}; + int ret = 0; + + /* Clear the bit so we can set it when GPU bootup message recording is successful */ + clear_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags); + + ret = gen8_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gpu_boot_scratch, + HFI_WARMBOOT_SET_SCRATCH, true, &ack); + if (ret) + goto done; + + ret = gen8_hwsched_cp_init(adreno_dev); + if (ret) + goto done; + + ret = gen8_hwsched_lpac_cp_init(adreno_dev); + if (ret) + goto done; + + ret = gen8_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gpu_boot_scratch, + HFI_WARMBOOT_QUERY_SCRATCH, true, &ack); + if (ret) + goto done; + + if (adreno_dev->warmboot_enabled) + set_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags); + +done: + /* Clear the bitmask so that we don't send record bit with future HFI messages */ + memset(hfi->wb_set_record_bitmask, 0x0, sizeof(hfi->wb_set_record_bitmask)); + + if (ret) + gen8_disable_gpu_irq(adreno_dev); + + return ret; +} + +int gen8_hwsched_boot_gpu(struct adreno_device *adreno_dev) +{ + /* If warmboot is possible just send the warmboot command else coldboot */ + if (gen8_hwsched_warmboot_possible(adreno_dev)) + return gen8_hwsched_warmboot_gpu(adreno_dev); + else + return gen8_hwsched_coldboot_gpu(adreno_dev); +} + +static int gen8_hwsched_setup_default_votes(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = 0; + + /* Request default DCVS level */ + ret = kgsl_pwrctrl_set_default_gpu_pwrlevel(device); + if (ret) + return ret; + + /* Request default BW vote */ + return kgsl_pwrctrl_axi(device, true); +} + +int gen8_hwsched_warmboot_init_gmu(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct pending_cmd ack = {0}; + int ret = 0; + + ret = gen8_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gmu_init_scratch, + HFI_WARMBOOT_EXEC_SCRATCH, false, &ack); + if (ret) + goto err; + + gen8_hwsched_enable_async_hfi(adreno_dev); + + set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); + + ret = gen8_hwsched_setup_default_votes(adreno_dev); + +err: + if (ret) { + /* Clear the bit in case of an error so next boot will be coldboot */ + clear_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags); + gen8_hwsched_hfi_stop(adreno_dev); + } + + return ret; +} + +static void warmboot_init_message_record_bitmask(struct adreno_device *adreno_dev) +{ + struct gen8_hfi *hfi = to_gen8_hfi(adreno_dev); + + if (!adreno_dev->warmboot_enabled) + return; + + /* Set the record bit for all the messages */ + memset(hfi->wb_set_record_bitmask, 0xFF, sizeof(hfi->wb_set_record_bitmask)); + + /* These messages should not be recorded */ + clear_bit(H2F_MSG_WARMBOOT_CMD, hfi->wb_set_record_bitmask); + clear_bit(H2F_MSG_START, hfi->wb_set_record_bitmask); + clear_bit(H2F_MSG_GET_VALUE, hfi->wb_set_record_bitmask); + clear_bit(H2F_MSG_GX_BW_PERF_VOTE, hfi->wb_set_record_bitmask); +} + +int gen8_hwsched_hfi_start(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct pending_cmd ack = {0}; + int ret; + + reset_hfi_queues(adreno_dev); + + ret = gen8_gmu_hfi_start(adreno_dev); + if (ret) + goto err; + + if (gen8_hwsched_warmboot_possible(adreno_dev)) + return gen8_hwsched_warmboot_init_gmu(adreno_dev); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_GMU_WARMBOOT) && + (!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))) { + if (gen8_hfi_send_get_value(adreno_dev, HFI_VALUE_GMU_WARMBOOT, 0) == 1) + adreno_dev->warmboot_enabled = true; + } + + warmboot_init_message_record_bitmask(adreno_dev); + + /* Reset the variable here and set it when we successfully record the scratch */ + clear_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags); + + ret = gen8_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gmu_init_scratch, + HFI_WARMBOOT_SET_SCRATCH, false, &ack); + if (ret) + goto err; + + ret = gen8_hfi_send_generic_req(adreno_dev, &gmu->hfi.dcvs_table, + sizeof(gmu->hfi.dcvs_table)); + if (ret) + goto err; + + ret = gen8_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table, sizeof(gmu->hfi.bw_table)); + if (ret) + goto err; + + ret = gen8_hfi_send_acd_feature_ctrl(adreno_dev); + if (ret) + goto err; + + ret = gen8_hfi_send_bcl_feature_ctrl(adreno_dev); + if (ret) + goto err; + + ret = gen8_hfi_send_ifpc_feature_ctrl(adreno_dev); + if (ret) + goto err; + + ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_HWSCHED, 1, 0); + if (ret) + goto err; + + ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_KPROF, 1, 0); + if (ret) + goto err; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_LSR)) { + ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_LSR, + 1, 0); + if (ret) + goto err; + } + + ret = gen8_hfi_send_perfcounter_feature_ctrl(adreno_dev); + if (ret) + goto err; + + ret = gen8_hfi_send_dms_feature_ctrl(adreno_dev); + if (ret) + goto err; + + /* Enable the long ib timeout detection */ + if (adreno_long_ib_detect(adreno_dev)) { + ret = gen8_hfi_send_feature_ctrl(adreno_dev, + HFI_FEATURE_BAIL_OUT_TIMER, 1, 0); + if (ret) + goto err; + } + + enable_gmu_stats(adreno_dev); + + if (gmu->log_stream_enable) + gen8_hfi_send_set_value(adreno_dev, + HFI_VALUE_LOG_STREAM_ENABLE, 0, 1); + + if (gmu->log_group_mask) + gen8_hfi_send_set_value(adreno_dev, + HFI_VALUE_LOG_GROUP, 0, gmu->log_group_mask); + + ret = gen8_hfi_send_core_fw_start(adreno_dev); + if (ret) + goto err; + + /* + * HFI_VALUE_CONTEXT_QUEUE can only be queried after GMU has initialized some of the + * required resources as part of handling gen8_hfi_send_core_fw_start() + */ + if (!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) { + _context_queue_enable(adreno_dev); + adreno_hwsched_register_hw_fence(adreno_dev); + } + + ret = gen8_hfi_send_hw_fence_feature_ctrl(adreno_dev); + if (ret) + goto err; + + ret = enable_preemption(adreno_dev); + if (ret) + goto err; + + ret = gen8_hfi_send_lpac_feature_ctrl(adreno_dev); + if (ret) + goto err; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) { + ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_AQE, 1, 0); + if (ret) + goto err; + } + + ret = send_start_msg(adreno_dev); + if (ret) + goto err; + + /* + * Send this additional start message on cold boot if warmboot is enabled. + * This message will be recorded and on a warmboot this will trigger the + * sequence to replay memory allocation requests and ECP task setup + */ + ret = send_warmboot_start_msg(adreno_dev); + if (ret) + goto err; + + gen8_hwsched_enable_async_hfi(adreno_dev); + + set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); + + /* Send this message only on cold boot */ + ret = gen8_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gmu_init_scratch, + HFI_WARMBOOT_QUERY_SCRATCH, true, &ack); + if (ret) + goto err; + + if (adreno_dev->warmboot_enabled) + set_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags); + + ret = gen8_hwsched_setup_default_votes(adreno_dev); + +err: + if (ret) + gen8_hwsched_hfi_stop(adreno_dev); + + return ret; +} + +static int submit_raw_cmds(struct adreno_device *adreno_dev, void *cmds, u32 size_bytes, + const char *str) +{ + int ret; + + ret = gen8_hfi_send_cmd_async(adreno_dev, cmds, size_bytes); + if (ret) + return ret; + + ret = gmu_core_timed_poll_check(KGSL_DEVICE(adreno_dev), + GEN8_GMUAO_GPU_CX_BUSY_STATUS, 0, 200, BIT(23)); + if (ret) + gen8_spin_idle_debug(adreno_dev, str); + + return ret; +} + +static int submit_lpac_raw_cmds(struct adreno_device *adreno_dev, void *cmds, u32 size_bytes, + const char *str) +{ + int ret; + + ret = gen8_hfi_send_cmd_async(adreno_dev, cmds, size_bytes); + if (ret) + return ret; + + ret = gmu_core_timed_poll_check(KGSL_DEVICE(adreno_dev), + GEN8_GMUAO_LPAC_BUSY_STATUS, 0, 200, BIT(23)); + if (ret) + gen8_spin_idle_debug_lpac(adreno_dev, str); + + return ret; +} + +static int cp_init(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 cmds[GEN8_CP_INIT_DWORDS + 1]; + int ret = 0; + + cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, HFI_MSG_CMD); + + gen8_cp_init_cmds(adreno_dev, &cmds[1]); + + ret = submit_raw_cmds(adreno_dev, cmds, sizeof(cmds), + "CP initialization failed to idle\n"); + + /* Save the header incase we need a warmboot debug */ + gmu->cp_init_hdr = cmds[0]; + + return ret; +} + +static int send_switch_to_unsecure(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 cmds[3]; + int ret = 0; + + cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, HFI_MSG_CMD); + + cmds[1] = cp_type7_packet(CP_SET_SECURE_MODE, 1); + cmds[2] = 0; + + ret = submit_raw_cmds(adreno_dev, cmds, sizeof(cmds), + "Switch to unsecure failed to idle\n"); + + /* Save the header incase we need a warmboot debug */ + gmu->switch_to_unsec_hdr = cmds[0]; + + return ret; +} + +int gen8_hwsched_cp_init(struct adreno_device *adreno_dev) +{ + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + int ret; + + ret = cp_init(adreno_dev); + if (ret) + return ret; + + ret = adreno_zap_shader_load(adreno_dev, gen8_core->zap_name); + if (ret) + return ret; + + if (!adreno_dev->zap_loaded) + kgsl_regwrite(KGSL_DEVICE(adreno_dev), + GEN8_RBBM_SECVID_TRUST_CNTL, 0x0); + else + ret = send_switch_to_unsecure(adreno_dev); + + return ret; +} + +int gen8_hwsched_lpac_cp_init(struct adreno_device *adreno_dev) +{ + u32 cmds[GEN8_CP_INIT_DWORDS + 1]; + + if (!adreno_dev->lpac_enabled) + return 0; + + cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_LPAC_CMD_RAW, HFI_MSG_CMD); + + gen8_cp_init_cmds(adreno_dev, &cmds[1]); + + return submit_lpac_raw_cmds(adreno_dev, cmds, sizeof(cmds), + "LPAC CP initialization failed to idle\n"); +} + +static bool is_queue_empty(struct adreno_device *adreno_dev, u32 queue_idx) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + return true; + + if (hdr->read_index == hdr->write_index) + return true; + + return false; +} + +static int hfi_f2h_main(void *arg) +{ + struct adreno_device *adreno_dev = arg; + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + + while (!kthread_should_stop()) { + wait_event_interruptible(hfi->f2h_wq, kthread_should_stop() || + (!(is_queue_empty(adreno_dev, HFI_MSG_ID) && + is_queue_empty(adreno_dev, HFI_DBG_ID)) && + (hfi->irq_mask & HFI_IRQ_MSGQ_MASK))); + + if (kthread_should_stop()) + break; + + gen8_hwsched_process_msgq(adreno_dev); + gen8_hwsched_process_dbgq(adreno_dev, true); + } + + return 0; +} + +static void gen8_hwsched_hw_fence_timeout(struct work_struct *work) +{ + struct gen8_hwsched_hfi *hfi = container_of(work, struct gen8_hwsched_hfi, hw_fence_ws); + struct gen8_hwsched_device *gen8_hw_dev = container_of(hfi, struct gen8_hwsched_device, + hwsched_hfi); + struct adreno_device *adreno_dev = &gen8_hw_dev->gen8_dev.adreno_dev; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 unack_count, ts; + struct adreno_context *drawctxt = NULL; + bool fault; + + /* Check msgq one last time before recording a fault */ + gen8_hwsched_process_msgq(adreno_dev); + + spin_lock(&hfi->hw_fence.lock); + + unack_count = hfi->hw_fence.unack_count; + + fault = test_bit(GEN8_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags) && + test_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags) && + (unack_count > MIN_HW_FENCE_UNACK_COUNT); + + drawctxt = hfi->hw_fence.defer_drawctxt; + ts = hfi->hw_fence.defer_ts; + + spin_unlock(&hfi->hw_fence.lock); + + if (!fault) + return; + + dev_err(&gmu->pdev->dev, "Hardware fence unack(%d) timeout\n", unack_count); + + if (drawctxt) { + struct kgsl_process_private *proc_priv = drawctxt->base.proc_priv; + + dev_err(&gmu->pdev->dev, + "Hardware fence got deferred for ctx:%d ts:%d pid:%d proc:%s\n", + drawctxt->base.id, ts, pid_nr(proc_priv->pid), proc_priv->comm); + } + gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); +} + +static void gen8_hwsched_hw_fence_timer(struct timer_list *t) +{ + struct gen8_hwsched_hfi *hfi = from_timer(hfi, t, hw_fence_timer); + + kgsl_schedule_work(&hfi->hw_fence_ws); +} + +int gen8_hwsched_hfi_probe(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); + + gmu->hfi.irq = kgsl_request_irq(gmu->pdev, "hfi", + gen8_hwsched_hfi_handler, adreno_dev); + + if (gmu->hfi.irq < 0) + return gmu->hfi.irq; + + hw_hfi->irq_mask = HFI_IRQ_MASK; + + rwlock_init(&hw_hfi->msglock); + + INIT_LIST_HEAD(&hw_hfi->msglist); + INIT_LIST_HEAD(&hw_hfi->detached_hw_fence_list); + + init_waitqueue_head(&hw_hfi->f2h_wq); + init_waitqueue_head(&hw_hfi->hw_fence.unack_wq); + + spin_lock_init(&hw_hfi->hw_fence.lock); + + mutex_init(&hw_hfi->msgq_mutex); + + INIT_WORK(&hw_hfi->hw_fence_ws, gen8_hwsched_hw_fence_timeout); + + timer_setup(&hw_hfi->hw_fence_timer, gen8_hwsched_hw_fence_timer, 0); + + return 0; +} + +void gen8_hwsched_hfi_remove(struct adreno_device *adreno_dev) +{ + struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); + + if (hw_hfi->f2h_task) + kthread_stop(hw_hfi->f2h_task); +} + +static void gen8_add_profile_events(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, struct adreno_submit_time *time) +{ + unsigned long flags; + u64 time_in_s; + unsigned long time_in_ns; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct kgsl_context *context = drawobj->context; + struct submission_info info = {0}; + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (!time) + return; + + /* + * Here we are attempting to create a mapping between the + * GPU time domain (alwayson counter) and the CPU time domain + * (local_clock) by sampling both values as close together as + * possible. This is useful for many types of debugging and + * profiling. In order to make this mapping as accurate as + * possible, we must turn off interrupts to avoid running + * interrupt handlers between the two samples. + */ + + local_irq_save(flags); + + /* Read always on registers */ + time->ticks = gpudev->read_alwayson(adreno_dev); + + /* Trace the GPU time to create a mapping to ftrace time */ + trace_adreno_cmdbatch_sync(context->id, context->priority, + drawobj->timestamp, time->ticks); + + /* Get the kernel clock for time since boot */ + time->ktime = local_clock(); + + /* Get the timeofday for the wall time (for the user) */ + ktime_get_real_ts64(&time->utime); + + local_irq_restore(flags); + + /* Return kernel clock time to the client if requested */ + time_in_s = time->ktime; + time_in_ns = do_div(time_in_s, 1000000000); + + info.inflight = hwsched->inflight; + info.rb_id = adreno_get_level(context); + info.gmu_dispatch_queue = context->gmu_dispatch_queue; + + cmdobj->submit_ticks = time->ticks; + + msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_SUBMIT, + pid_nr(context->proc_priv->pid), + context->id, drawobj->timestamp, + !!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME)); + trace_adreno_cmdbatch_submitted(drawobj, &info, time->ticks, + (unsigned long) time_in_s, time_in_ns / 1000, 0); + + log_kgsl_cmdbatch_submitted_event(context->id, drawobj->timestamp, + context->priority, drawobj->flags); +} + +static void init_gmu_context_queue(struct adreno_context *drawctxt) +{ + struct kgsl_memdesc *md = &drawctxt->gmu_context_queue; + struct gmu_context_queue_header *hdr = md->hostptr; + + hdr->start_addr = md->gmuaddr + sizeof(*hdr); + hdr->queue_size = (md->size - sizeof(*hdr)) >> 2; + hdr->hw_fence_buffer_va = drawctxt->gmu_hw_fence_queue.gmuaddr; + hdr->hw_fence_buffer_size = drawctxt->gmu_hw_fence_queue.size; +} + +static u32 get_dq_id(struct adreno_device *adreno_dev, struct kgsl_context *context) +{ + struct dq_info *info; + u32 next; + u32 priority = adreno_get_level(context); + + if (adreno_dev->lpac_enabled) + info = &gen8_hfi_dqs_lpac[priority]; + else + info = &gen8_hfi_dqs[priority]; + + next = info->base_dq_id + info->offset; + + info->offset = (info->offset + 1) % info->max_dq; + + return next; +} + +static int allocate_context_queues(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + int ret = 0; + + if (!adreno_hwsched_context_queue_enabled(adreno_dev)) + return 0; + + if (test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags) && + !drawctxt->gmu_hw_fence_queue.gmuaddr) { + ret = gen8_alloc_gmu_kernel_block( + to_gen8_gmu(adreno_dev), &drawctxt->gmu_hw_fence_queue, + HW_FENCE_QUEUE_SIZE, GMU_NONCACHED_KERNEL, + IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV); + if (ret) { + memset(&drawctxt->gmu_hw_fence_queue, 0x0, + sizeof(drawctxt->gmu_hw_fence_queue)); + return ret; + } + } + + if (!drawctxt->gmu_context_queue.gmuaddr) { + ret = gen8_alloc_gmu_kernel_block( + to_gen8_gmu(adreno_dev), &drawctxt->gmu_context_queue, + SZ_4K, GMU_NONCACHED_KERNEL, + IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV); + if (ret) { + memset(&drawctxt->gmu_context_queue, 0x0, + sizeof(drawctxt->gmu_context_queue)); + return ret; + } + init_gmu_context_queue(drawctxt); + } + + return 0; +} + +static int send_context_register(struct adreno_device *adreno_dev, + struct kgsl_context *context) +{ + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + struct hfi_register_ctxt_cmd cmd; + struct kgsl_pagetable *pt = context->proc_priv->pagetable; + int ret, asid = kgsl_mmu_pagetable_get_asid(pt, context); + + if (asid < 0) + return asid; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_REGISTER_CONTEXT); + if (ret) + return ret; + + ret = allocate_context_queues(adreno_dev, drawctxt); + if (ret) + return ret; + + cmd.ctxt_id = context->id; + cmd.flags = HFI_CTXT_FLAG_NOTIFY | context->flags; + /* + * HLOS SMMU driver programs context bank to look up ASID from TTBR0 during a page + * table walk. So the TLB entries are tagged with the ASID from TTBR0. TLBIASID + * invalidates TLB entries whose ASID matches the value that was written to the + * CBn_TLBIASID register. Set ASID along with PT address. + */ + cmd.pt_addr = kgsl_mmu_pagetable_get_ttbr0(pt) | + FIELD_PREP(GENMASK_ULL(63, KGSL_IOMMU_ASID_START_BIT), asid); + cmd.ctxt_idr = context->id; + cmd.ctxt_bank = kgsl_mmu_pagetable_get_context_bank(pt, context); + + return gen8_hfi_send_cmd_async(adreno_dev, &cmd, sizeof(cmd)); +} + +static int send_context_pointers(struct adreno_device *adreno_dev, + struct kgsl_context *context) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct hfi_context_pointers_cmd cmd = {0}; + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_CONTEXT_POINTERS); + if (ret) + return ret; + + cmd.ctxt_id = context->id; + cmd.sop_addr = MEMSTORE_ID_GPU_ADDR(device, context->id, soptimestamp); + cmd.eop_addr = MEMSTORE_ID_GPU_ADDR(device, context->id, eoptimestamp); + if (context->user_ctxt_record) + cmd.user_ctxt_record_addr = + context->user_ctxt_record->memdesc.gpuaddr; + + if (adreno_hwsched_context_queue_enabled(adreno_dev)) + cmd.gmu_context_queue_addr = drawctxt->gmu_context_queue.gmuaddr; + + return gen8_hfi_send_cmd_async(adreno_dev, &cmd, sizeof(cmd)); +} + +static int hfi_context_register(struct adreno_device *adreno_dev, + struct kgsl_context *context) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + if (context->gmu_registered) + return 0; + + ret = send_context_register(adreno_dev, context); + if (ret) { + dev_err(&gmu->pdev->dev, + "Unable to register context %u: %d\n", + context->id, ret); + + if (device->gmu_fault) + gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + + return ret; + } + + ret = send_context_pointers(adreno_dev, context); + if (ret) { + dev_err(&gmu->pdev->dev, + "Unable to register context %u pointers: %d\n", + context->id, ret); + + if (device->gmu_fault) + gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + + return ret; + } + + context->gmu_registered = true; + if (adreno_hwsched_context_queue_enabled(adreno_dev)) + context->gmu_dispatch_queue = UINT_MAX; + else + context->gmu_dispatch_queue = get_dq_id(adreno_dev, context); + + return 0; +} + +static void populate_ibs(struct adreno_device *adreno_dev, + struct hfi_submit_cmd *cmd, struct kgsl_drawobj_cmd *cmdobj) +{ + struct hfi_issue_ib *issue_ib; + struct kgsl_memobj_node *ib; + + if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS) { + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct kgsl_memdesc *big_ib; + + if (test_bit(CMDOBJ_RECURRING_START, &cmdobj->priv)) + big_ib = hfi->big_ib_recurring; + else + big_ib = hfi->big_ib; + /* + * The dispatcher ensures that there is only one big IB inflight + */ + cmd->big_ib_gmu_va = big_ib->gmuaddr; + cmd->flags |= CMDBATCH_INDIRECT; + issue_ib = big_ib->hostptr; + } else { + issue_ib = (struct hfi_issue_ib *)&cmd[1]; + } + + list_for_each_entry(ib, &cmdobj->cmdlist, node) { + issue_ib->addr = ib->gpuaddr; + issue_ib->size = ib->size; + issue_ib++; + } + + cmd->numibs = cmdobj->numibs; +} + +#define HFI_DSP_IRQ_BASE 2 + +#define DISPQ_IRQ_BIT(_idx) BIT((_idx) + HFI_DSP_IRQ_BASE) + +int gen8_gmu_context_queue_write(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *msg, u32 size_bytes, + struct kgsl_drawobj *drawobj, struct adreno_submit_time *time) +{ + struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u32 *queue = drawctxt->gmu_context_queue.hostptr + sizeof(*hdr); + u32 i, empty_space, write_idx = hdr->write_index, read_idx = hdr->read_index; + u32 size_dwords = size_bytes >> 2; + u32 align_size = ALIGN(size_dwords, SZ_4); + u32 id = MSG_HDR_GET_ID(*msg); + struct kgsl_drawobj_cmd *cmdobj = NULL; + + empty_space = (write_idx >= read_idx) ? + (hdr->queue_size - (write_idx - read_idx)) + : (read_idx - write_idx); + + if (empty_space <= align_size) + return -ENOSPC; + + if (!IS_ALIGNED(size_bytes, sizeof(u32))) + return -EINVAL; + + *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); + + for (i = 0; i < size_dwords; i++) { + queue[write_idx] = msg[i]; + write_idx = (write_idx + 1) % hdr->queue_size; + } + + /* Cookify any non used data at the end of the write buffer */ + for (; i < align_size; i++) { + queue[write_idx] = 0xfafafafa; + write_idx = (write_idx + 1) % hdr->queue_size; + } + + /* Ensure packet is written out before proceeding */ + wmb(); + + if (drawobj->type & SYNCOBJ_TYPE) { + struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); + + trace_adreno_syncobj_submitted(drawobj->context->id, drawobj->timestamp, + syncobj->numsyncs, gpudev->read_alwayson(adreno_dev)); + goto done; + } + + cmdobj = CMDOBJ(drawobj); + + gen8_add_profile_events(adreno_dev, cmdobj, time); + + /* + * Put the profiling information in the user profiling buffer. + * The hfi_update_write_idx below has a wmb() before the actual + * write index update to ensure that the GMU does not see the + * packet before the profile data is written out. + */ + adreno_profile_submit_time(time); + +done: + trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg)); + + hfi_update_write_idx(&hdr->write_index, write_idx); + + return 0; +} + +static u32 get_irq_bit(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) +{ + if (!adreno_hwsched_context_queue_enabled(adreno_dev)) + return drawobj->context->gmu_dispatch_queue; + + if (adreno_is_preemption_enabled(adreno_dev)) + return adreno_get_level(drawobj->context); + + if (kgsl_context_is_lpac(drawobj->context)) + return 1; + + return 0; +} + +static int add_gmu_waiter(struct adreno_device *adreno_dev, + struct dma_fence *fence) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = msm_hw_fence_wait_update(adreno_dev->hwsched.hw_fence.handle, + &fence, 1, true); + + if (ret) + dev_err_ratelimited(device->dev, + "Failed to add GMU as waiter ret:%d fence ctx:%llu ts:%llu\n", + ret, fence->context, fence->seqno); + + return ret; +} + +static void populate_kgsl_fence(struct hfi_syncobj *obj, + struct dma_fence *fence) +{ + struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; + struct kgsl_sync_timeline *ktimeline = kfence->parent; + unsigned long flags; + + obj->flags |= BIT(GMU_SYNCOBJ_FLAG_KGSL_FENCE_BIT); + + spin_lock_irqsave(&ktimeline->lock, flags); + /* If the context is going away or the dma fence is signaled, mark the fence as triggered */ + if (!ktimeline->context || dma_fence_is_signaled_locked(fence)) { + obj->flags |= BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT); + spin_unlock_irqrestore(&ktimeline->lock, flags); + return; + } + obj->ctxt_id = ktimeline->context->id; + spin_unlock_irqrestore(&ktimeline->lock, flags); + + obj->seq_no = kfence->timestamp; +} + +static int _submit_hw_fence(struct adreno_device *adreno_dev, + struct kgsl_drawobj *drawobj, void *cmdbuf) +{ + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + struct gen8_hfi *hfi = to_gen8_hfi(adreno_dev); + int i, j; + u32 cmd_sizebytes; + struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); + struct hfi_submit_syncobj *cmd; + struct hfi_syncobj *obj = NULL; + + /* Add hfi_syncobj struct for sync object */ + cmd_sizebytes = sizeof(*cmd) + + (sizeof(struct hfi_syncobj) * + syncobj->num_hw_fence); + + if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE)) + return -EMSGSIZE; + + memset(cmdbuf, 0x0, cmd_sizebytes); + cmd = cmdbuf; + cmd->num_syncobj = syncobj->num_hw_fence; + obj = (struct hfi_syncobj *)&cmd[1]; + + for (i = 0; i < syncobj->numsyncs; i++) { + struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i]; + struct kgsl_sync_fence_cb *kcb = event->handle; + struct dma_fence **fences; + struct dma_fence_array *array; + u32 num_fences; + + if (!kcb) + return -EINVAL; + + array = to_dma_fence_array(kcb->fence); + if (array != NULL) { + num_fences = array->num_fences; + fences = array->fences; + } else { + num_fences = 1; + fences = &kcb->fence; + } + + for (j = 0; j < num_fences; j++) { + + /* + * If this sync object has a software only fence, make sure that it is + * already signaled so that we can skip sending this fence to the GMU. + */ + if (!test_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &fences[j]->flags)) { + if (WARN(!dma_fence_is_signaled(fences[j]), + "sync object has unsignaled software fence")) + return -EINVAL; + continue; + } + + if (is_kgsl_fence(fences[j])) { + populate_kgsl_fence(obj, fences[j]); + } else { + int ret = add_gmu_waiter(adreno_dev, fences[j]); + + if (ret) { + syncobj->flags &= ~KGSL_SYNCOBJ_HW; + return ret; + } + + if (test_bit(MSM_HW_FENCE_FLAG_SIGNALED_BIT, &fences[j]->flags) || + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fences[j]->flags)) + obj->flags |= BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT); + + obj->ctxt_id = fences[j]->context; + obj->seq_no = fences[j]->seqno; + } + trace_adreno_input_hw_fence(drawobj->context->id, obj->ctxt_id, + obj->seq_no, obj->flags, fences[j]->ops->get_timeline_name ? + fences[j]->ops->get_timeline_name(fences[j]) : "unknown"); + + obj++; + } + } + + /* + * Attach a timestamp to this SYNCOBJ to keep track whether GMU has deemed it signaled + * or not. + */ + drawobj->timestamp = ++drawctxt->syncobj_timestamp; + cmd->timestamp = drawobj->timestamp; + + cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_SYNCOBJ, HFI_MSG_CMD); + cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, + atomic_inc_return(&hfi->seqnum)); + + return gen8_gmu_context_queue_write(adreno_dev, drawctxt, (u32 *)cmd, cmd_sizebytes, + drawobj, NULL); +} + +int gen8_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct adreno_hw_fence_entry *entry, *tmp; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret = 0; + + spin_lock(&drawctxt->lock); + list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) { + struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; + + if (timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0) { + dev_err(&gmu->pdev->dev, + "detached ctx:%d has unsignaled fence ts:%d retired:%d\n", + drawctxt->base.id, (u32)entry->cmd.ts, hdr->out_fence_ts); + ret = -EINVAL; + break; + } + + gen8_remove_hw_fence_entry(adreno_dev, entry); + } + spin_unlock(&drawctxt->lock); + + return ret; +} + +/** + * move_detached_context_hardware_fences - Move all pending hardware fences belonging to this + * context to the detached hardware fence list so as to send them to TxQueue after fault recovery. + * This is needed because this context may get destroyed before fault recovery gets executed. + */ +static void move_detached_context_hardware_fences(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct adreno_hw_fence_entry *entry, *tmp; + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + + /* We don't need the drawctxt lock here because this context has already been detached */ + list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) { + struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; + + if ((timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)) { + list_move_tail(&entry->node, &hfi->detached_hw_fence_list); + continue; + } + + gen8_remove_hw_fence_entry(adreno_dev, entry); + } + + /* Also grab all the hardware fences which were never sent to GMU */ + list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { + list_move_tail(&entry->node, &hfi->detached_hw_fence_list); + } +} + +/** + * check_detached_context_hardware_fences - When this context has been un-registered with the GMU, + * make sure all the hardware fences(that were sent to GMU) for this context have been sent to + * TxQueue. Also, send any hardware fences (to GMU) that were not yet dispatched to the GMU. In case + * of an error, move the pending hardware fences to detached hardware fence list, log the error, + * take a snapshot and trigger recovery. + */ +static int check_detached_context_hardware_fences(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hw_fence_entry *entry, *tmp; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret = 0; + + /* We don't need the drawctxt lock because this context has been detached */ + list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) { + struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; + + if ((timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)) { + dev_err(&gmu->pdev->dev, + "detached ctx:%d has unsignaled fence ts:%d retired:%d\n", + drawctxt->base.id, (u32)entry->cmd.ts, hdr->out_fence_ts); + ret = -EINVAL; + goto fault; + } + gen8_remove_hw_fence_entry(adreno_dev, entry); + } + + /* Send hardware fences (to TxQueue) that were not dispatched to GMU */ + list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { + + ret = gen8_send_hw_fence_hfi_wait_ack(adreno_dev, entry, + HW_FENCE_FLAG_SKIP_MEMSTORE); + if (ret) + goto fault; + + gen8_remove_hw_fence_entry(adreno_dev, entry); + } + + return 0; + +fault: + move_detached_context_hardware_fences(adreno_dev, drawctxt); + gmu_core_fault_snapshot(device); + gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + + return ret; +} + +static inline int setup_hw_fence_info_cmd(struct adreno_device *adreno_dev, + struct adreno_hw_fence_entry *entry) +{ + struct kgsl_sync_fence *kfence = entry->kfence; + int ret; + + ret = CMD_MSG_HDR(entry->cmd, H2F_MSG_HW_FENCE_INFO); + if (ret) + return ret; + + entry->cmd.gmu_ctxt_id = entry->drawctxt->base.id; + entry->cmd.ctxt_id = kfence->fence.context; + entry->cmd.ts = kfence->fence.seqno; + + entry->cmd.hash_index = kfence->hw_fence_index; + + return 0; +} + +/* + * gen8_send_hw_fence_hfi_wait_ack - This function is used in cases where multiple hardware fences + * are to be sent to GMU. Hence, we must send them one by one to avoid overwhelming the GMU with + * mutliple fences in a short span of time. + */ +int gen8_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, + struct adreno_hw_fence_entry *entry, u64 flags) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = 0; + + /* Device mutex is necessary to ensure only one hardware fence ack is being waited for */ + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EINVAL; + + spin_lock(&hfi->hw_fence.lock); + + init_completion(&gen8_hw_fence_ack.complete); + + entry->cmd.flags |= flags; + entry->cmd.hdr = MSG_HDR_SET_SEQNUM(entry->cmd.hdr, + atomic_inc_return(&gmu->hfi.seqnum)); + + gen8_hw_fence_ack.sent_hdr = entry->cmd.hdr; + + /* + * We don't need to increment the unack count here as we are waiting for the ack for + * this fence before sending another hardware fence. + */ + ret = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&entry->cmd, sizeof(entry->cmd)); + + spin_unlock(&hfi->hw_fence.lock); + + if (!ret) + ret = adreno_hwsched_wait_ack_completion(adreno_dev, + &gmu->pdev->dev, &gen8_hw_fence_ack, + gen8_hwsched_process_msgq); + + memset(&gen8_hw_fence_ack, 0x0, sizeof(gen8_hw_fence_ack)); + return ret; +} + +/** + * drawctxt_queue_hw_fence - Add a hardware fence to draw context's hardware fence list and make + * sure the list remains sorted (with the fence with the largest timestamp at the end) + */ +static void drawctxt_queue_hw_fence(struct adreno_context *drawctxt, + struct adreno_hw_fence_entry *new) +{ + struct adreno_hw_fence_entry *entry = NULL; + u32 ts = (u32)new->cmd.ts; + + /* Walk the list backwards to find the right spot for this fence */ + list_for_each_entry_reverse(entry, &drawctxt->hw_fence_list, node) { + if (timestamp_cmp(ts, (u32)entry->cmd.ts) > 0) + break; + } + + list_add(&new->node, &entry->node); +} + +#define DRAWCTXT_SLOT_AVAILABLE(count) \ + ((count + 1) < (HW_FENCE_QUEUE_SIZE / sizeof(struct hfi_hw_fence_info))) + +/** + * allocate_hw_fence_entry - Allocate an entry to keep track of a hardware fence. This is free'd + * when we know GMU has sent this fence to the TxQueue + */ +static struct adreno_hw_fence_entry *allocate_hw_fence_entry(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct kgsl_sync_fence *kfence) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct adreno_hw_fence_entry *entry; + + if (!DRAWCTXT_SLOT_AVAILABLE(drawctxt->hw_fence_count)) + return NULL; + + entry = kmem_cache_zalloc(hwsched->hw_fence_cache, GFP_ATOMIC); + if (!entry) + return NULL; + + entry->kfence = kfence; + entry->drawctxt = drawctxt; + + if (setup_hw_fence_info_cmd(adreno_dev, entry)) { + kmem_cache_free(hwsched->hw_fence_cache, entry); + return NULL; + } + + dma_fence_get(&kfence->fence); + + drawctxt->hw_fence_count++; + atomic_inc(&hwsched->hw_fence_count); + + INIT_LIST_HEAD(&entry->node); + INIT_LIST_HEAD(&entry->reset_node); + return entry; +} + +static bool _hw_fence_end_sleep(struct adreno_device *adreno_dev) +{ + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + bool ret; + + spin_lock(&hfi->hw_fence.lock); + ret = !test_bit(GEN8_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags); + spin_unlock(&hfi->hw_fence.lock); + + return ret; +} + +/** + * _hw_fence_sleep() - Check if the thread needs to sleep until the hardware fence unack count + * drops to a desired threshold. + * + * Return: negative error code if the thread was woken up by a signal, or the context became bad in + * the meanwhile, or the hardware fence unack count hasn't yet dropped to a desired threshold, or + * if fault recovery is imminent. + * Otherwise, return 0. + */ +static int _hw_fence_sleep(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) +{ + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + int ret = 0; + + if (!test_bit(GEN8_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags)) + return 0; + + spin_unlock(&hfi->hw_fence.lock); + spin_unlock(&drawctxt->lock); + + ret = wait_event_interruptible(hfi->hw_fence.unack_wq, + _hw_fence_end_sleep(adreno_dev)); + + spin_lock(&drawctxt->lock); + spin_lock(&hfi->hw_fence.lock); + + /* + * If the thread received a signal, or the context became bad in the meanwhile or the limit + * is still not settled, then return error to avoid creating this hardware fence + */ + if ((ret == -ERESTARTSYS) || kgsl_context_is_bad(&drawctxt->base) || + test_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags)) + return -EINVAL; + + /* + * If fault recovery is imminent then return error code to avoid creating new hardware + * fences until recovery is complete + */ + if (test_bit(GEN8_HWSCHED_HW_FENCE_ABORT_BIT, &hfi->hw_fence.flags)) + return -EBUSY; + + return ret; +} + +void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, + struct kgsl_sync_fence *kfence) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_sync_timeline *ktimeline = kfence->parent; + struct kgsl_context *context = ktimeline->context; + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + struct adreno_hw_fence_entry *entry = NULL; + struct msm_hw_fence_create_params params = {0}; + /* Only allow a single log in a second */ + static DEFINE_RATELIMIT_STATE(_rs, HZ, 1); + struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 retired = 0; + int ret = 0; + bool destroy_hw_fence = true; + + params.fence = &kfence->fence; + params.handle = &kfence->hw_fence_index; + kfence->hw_fence_handle = adreno_dev->hwsched.hw_fence.handle; + + ret = msm_hw_fence_create(kfence->hw_fence_handle, ¶ms); + if ((ret || IS_ERR_OR_NULL(params.handle))) { + if (__ratelimit(&_rs)) + dev_err(device->dev, "Failed to create ctx:%d ts:%d hardware fence:%d\n", + kfence->context_id, kfence->timestamp, ret); + return; + } + + spin_lock(&drawctxt->lock); + spin_lock(&hw_hfi->hw_fence.lock); + + /* + * If we create a hardware fence and this context is going away, we may never dispatch + * this fence to the GMU. Hence, avoid creating a hardware fence if context is going away. + */ + if (kgsl_context_is_bad(context)) + goto done; + + entry = allocate_hw_fence_entry(adreno_dev, drawctxt, kfence); + if (!entry) + goto done; + + /* If recovery is imminent, then do not create a hardware fence */ + if (test_bit(GEN8_HWSCHED_HW_FENCE_ABORT_BIT, &hw_hfi->hw_fence.flags)) { + destroy_hw_fence = true; + goto done; + } + + ret = _hw_fence_sleep(adreno_dev, drawctxt); + if (ret) + goto done; + + /* + * If this ts hasn't been submitted yet, then store it in the drawctxt hardware fence + * list and return. This fence will be sent to GMU when this ts is dispatched to GMU. + */ + if (timestamp_cmp(kfence->timestamp, drawctxt->internal_timestamp) > 0) { + drawctxt_queue_hw_fence(drawctxt, entry); + destroy_hw_fence = false; + goto done; + } + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &retired); + + /* + * Check if timestamp is retired. If we are in SLUMBER at this point, the timestamp is + * guaranteed to be retired. This way, we don't need the device mutex to check the device + * state explicitly. + */ + if (timestamp_cmp(retired, kfence->timestamp) >= 0) { + kgsl_sync_timeline_signal(ktimeline, kfence->timestamp); + goto done; + } + + /* + * If timestamp is not retired then GMU must already be powered up. This is because SLUMBER + * thread has to wait for hardware fence spinlock to make sure the hardware fence unack + * count is zero. + */ + ret = _send_hw_fence_no_ack(adreno_dev, entry); + if (ret) { + if (__ratelimit(&_rs)) + dev_err(&gmu->pdev->dev, "Aborting hw fence for ctx:%d ts:%d ret:%d\n", + kfence->context_id, kfence->timestamp, ret); + goto done; + } + + list_add_tail(&entry->node, &drawctxt->hw_fence_inflight_list); + + destroy_hw_fence = false; + +done: + if (destroy_hw_fence) { + msm_hw_fence_destroy(kfence->hw_fence_handle, &kfence->fence); + if (entry) + gen8_remove_hw_fence_entry(adreno_dev, entry); + } + + spin_unlock(&hw_hfi->hw_fence.lock); + spin_unlock(&drawctxt->lock); +} + +/** + * setup_hw_fence_deferred_ctxt - The hardware fence(s) from this context couldn't be sent to the + * GMU because the hardware fence unack count reached a threshold. Hence, setup this context such + * that these hardware fences are sent to the GMU when the unack count drops to a desired threshold. + */ +static void setup_hw_fence_deferred_ctxt(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 ts) +{ + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + + if (!_kgsl_context_get(&drawctxt->base)) + return; + + hfi->hw_fence.defer_drawctxt = drawctxt; + hfi->hw_fence.defer_ts = ts; + /* + * Increment the active count so that device doesn't get powered off until this fence has + * been sent to GMU + */ + gen8_hwsched_active_count_get(adreno_dev); +} + +/** + * process_hw_fence_queue - This function walks the draw context's list of hardware fences + * and sends the ones which have a timestamp less than or equal to the timestamp that just + * got submitted to the GMU. + */ +static void process_hw_fence_queue(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 ts) +{ + struct adreno_hw_fence_entry *entry = NULL, *next; + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + int ret = 0; + + /* This list is sorted with smallest timestamp at head and highest timestamp at tail */ + list_for_each_entry_safe(entry, next, &drawctxt->hw_fence_list, node) { + + if (timestamp_cmp((u32)entry->cmd.ts, ts) > 0) + return; + + spin_lock(&hfi->hw_fence.lock); + + if (test_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags)) { + setup_hw_fence_deferred_ctxt(adreno_dev, drawctxt, ts); + spin_unlock(&hfi->hw_fence.lock); + return; + } + + ret = _send_hw_fence_no_ack(adreno_dev, entry); + + spin_unlock(&hfi->hw_fence.lock); + + if (ret) + return; + + /* + * A fence that is sent to GMU must be added to the drawctxt->hw_fence_inflight_list + * so that we can keep track of when GMU sends it to the TxQueue + */ + list_del_init(&entry->node); + list_add_tail(&entry->node, &drawctxt->hw_fence_inflight_list); + } +} + +/* Size in below functions are in unit of dwords */ +static int gen8_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 queue_idx, + u32 *msg, u32 size_bytes, struct kgsl_drawobj_cmd *cmdobj, struct adreno_submit_time *time) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + u32 *queue; + u32 i, write, empty_space; + u32 size_dwords = size_bytes >> 2; + u32 align_size = ALIGN(size_dwords, SZ_4); + u32 id = MSG_HDR_GET_ID(*msg); + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED || !IS_ALIGNED(size_bytes, sizeof(u32))) + return -EINVAL; + + queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx); + + empty_space = (hdr->write_index >= hdr->read_index) ? + (hdr->queue_size - (hdr->write_index - hdr->read_index)) + : (hdr->read_index - hdr->write_index); + + if (empty_space <= align_size) + return -ENOSPC; + + *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); + + write = hdr->write_index; + + for (i = 0; i < size_dwords; i++) { + queue[write] = msg[i]; + write = (write + 1) % hdr->queue_size; + } + + /* Cookify any non used data at the end of the write buffer */ + for (; i < align_size; i++) { + queue[write] = 0xfafafafa; + write = (write + 1) % hdr->queue_size; + } + + /* Ensure packet is written out before proceeding */ + wmb(); + + gen8_add_profile_events(adreno_dev, cmdobj, time); + + /* + * Put the profiling information in the user profiling buffer. + * The hfi_update_write_idx below has a wmb() before the actual + * write index update to ensure that the GMU does not see the + * packet before the profile data is written out. + */ + adreno_profile_submit_time(time); + + trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg)); + + hfi_update_write_idx(&hdr->write_index, write); + + return 0; +} + +int gen8_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) +{ + struct gen8_hfi *hfi = to_gen8_hfi(adreno_dev); + int ret = 0; + u32 cmd_sizebytes; + struct kgsl_drawobj_cmd *cmdobj = NULL; + struct hfi_submit_cmd *cmd; + struct adreno_submit_time time = {0}; + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + static void *cmdbuf; + + if (cmdbuf == NULL) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + cmdbuf = devm_kzalloc(&device->pdev->dev, HFI_MAX_MSG_SIZE, + GFP_KERNEL); + if (!cmdbuf) + return -ENOMEM; + } + + ret = hfi_context_register(adreno_dev, drawobj->context); + if (ret) + return ret; + + if ((drawobj->type & SYNCOBJ_TYPE) != 0) + return _submit_hw_fence(adreno_dev, drawobj, cmdbuf); + + cmdobj = CMDOBJ(drawobj); + + /* + * If the MARKER object is retired, it doesn't need to be dispatched to GMU. Simply trigger + * any pending fences that are less than/equal to this object's timestamp. + */ + if (test_bit(CMDOBJ_MARKER_EXPIRED, &cmdobj->priv)) { + spin_lock(&drawctxt->lock); + process_hw_fence_queue(adreno_dev, drawctxt, drawobj->timestamp); + spin_unlock(&drawctxt->lock); + return 0; + } + + /* Add a *issue_ib struct for each IB */ + if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS || + test_bit(CMDOBJ_SKIP, &cmdobj->priv)) + cmd_sizebytes = sizeof(*cmd); + else + cmd_sizebytes = sizeof(*cmd) + + (sizeof(struct hfi_issue_ib) * cmdobj->numibs); + + if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE)) + return -EMSGSIZE; + + memset(cmdbuf, 0x0, cmd_sizebytes); + + cmd = cmdbuf; + + cmd->ctxt_id = drawobj->context->id; + cmd->flags = HFI_CTXT_FLAG_NOTIFY; + if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME) + cmd->flags |= CMDBATCH_EOF; + + cmd->ts = drawobj->timestamp; + + if (test_bit(CMDOBJ_SKIP, &cmdobj->priv)) + goto skipib; + + populate_ibs(adreno_dev, cmd, cmdobj); + + if ((drawobj->flags & KGSL_DRAWOBJ_PROFILING) && + cmdobj->profiling_buf_entry) { + + time.drawobj = drawobj; + + cmd->profile_gpuaddr_lo = + lower_32_bits(cmdobj->profiling_buffer_gpuaddr); + cmd->profile_gpuaddr_hi = + upper_32_bits(cmdobj->profiling_buffer_gpuaddr); + + /* Indicate to GMU to do user profiling for this submission */ + cmd->flags |= CMDBATCH_PROFILING; + } + +skipib: + adreno_drawobj_set_constraint(KGSL_DEVICE(adreno_dev), drawobj); + + cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD); + cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, + atomic_inc_return(&hfi->seqnum)); + + if (adreno_hwsched_context_queue_enabled(adreno_dev)) + ret = gen8_gmu_context_queue_write(adreno_dev, + drawctxt, (u32 *)cmd, cmd_sizebytes, drawobj, &time); + else + ret = gen8_hfi_dispatch_queue_write(adreno_dev, + HFI_DSP_ID_0 + drawobj->context->gmu_dispatch_queue, + (u32 *)cmd, cmd_sizebytes, cmdobj, &time); + if (ret) + return ret; + + /* Send interrupt to GMU to receive the message */ + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN8_GMUCX_HOST2GMU_INTR_SET, + DISPQ_IRQ_BIT(get_irq_bit(adreno_dev, drawobj))); + + spin_lock(&drawctxt->lock); + process_hw_fence_queue(adreno_dev, drawctxt, drawobj->timestamp); + /* + * We need to update the internal timestamp while holding the drawctxt lock since we have to + * check it in the hardware fence creation path, where we are not taking the device mutex. + */ + drawctxt->internal_timestamp = drawobj->timestamp; + spin_unlock(&drawctxt->lock); + + return 0; +} + +int gen8_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct gen8_hfi *hfi = to_gen8_hfi(adreno_dev); + struct hfi_submit_cmd *cmd; + struct kgsl_memobj_node *ib; + u32 cmd_sizebytes; + int ret; + static bool active; + + if (adreno_gpu_halt(adreno_dev) || hwsched_in_fault(hwsched)) + return -EBUSY; + + if (test_bit(CMDOBJ_RECURRING_STOP, &cmdobj->priv)) { + cmdobj->numibs = 0; + } else { + list_for_each_entry(ib, &cmdobj->cmdlist, node) + cmdobj->numibs++; + } + + if (cmdobj->numibs > HWSCHED_MAX_IBS) + return -EINVAL; + + if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS) + cmd_sizebytes = sizeof(*cmd); + else + cmd_sizebytes = sizeof(*cmd) + + (sizeof(struct hfi_issue_ib) * cmdobj->numibs); + + if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE)) + return -EMSGSIZE; + + cmd = kzalloc(cmd_sizebytes, GFP_KERNEL); + if (cmd == NULL) + return -ENOMEM; + + if (test_bit(CMDOBJ_RECURRING_START, &cmdobj->priv)) { + if (!active) { + ret = adreno_active_count_get(adreno_dev); + if (ret) { + kfree(cmd); + return ret; + } + active = true; + } + cmd->flags |= CMDBATCH_RECURRING_START; + populate_ibs(adreno_dev, cmd, cmdobj); + } else + cmd->flags |= CMDBATCH_RECURRING_STOP; + + cmd->ctxt_id = drawobj->context->id; + + ret = hfi_context_register(adreno_dev, drawobj->context); + if (ret) { + adreno_active_count_put(adreno_dev); + active = false; + kfree(cmd); + return ret; + } + + cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_RECURRING_CMD, HFI_MSG_CMD); + cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, + atomic_inc_return(&hfi->seqnum)); + + ret = gen8_hfi_send_cmd_async(adreno_dev, cmd, sizeof(*cmd)); + + kfree(cmd); + + if (ret) { + adreno_active_count_put(adreno_dev); + active = false; + return ret; + } + + if (test_bit(CMDOBJ_RECURRING_STOP, &cmdobj->priv)) { + adreno_hwsched_retire_cmdobj(hwsched, hwsched->recurring_cmdobj); + del_timer_sync(&hwsched->lsr_timer); + hwsched->recurring_cmdobj = NULL; + if (active) + adreno_active_count_put(adreno_dev); + active = false; + return ret; + } + + hwsched->recurring_cmdobj = cmdobj; + /* Star LSR timer for power stats collection */ + mod_timer(&hwsched->lsr_timer, jiffies + msecs_to_jiffies(10)); + return ret; +} + +void gen8_trigger_hw_fence_cpu(struct adreno_device *adreno_dev, + struct adreno_hw_fence_entry *entry) +{ + int ret = msm_hw_fence_update_txq(adreno_dev->hwsched.hw_fence.handle, + entry->cmd.hash_index, 0, 0); + + if (ret) { + dev_err_ratelimited(adreno_dev->dev.dev, + "Failed to trigger hw fence via cpu: ctx:%d ts:%d ret:%d\n", + entry->drawctxt->base.id, (u32)entry->cmd.ts, ret); + return; + } + + msm_hw_fence_trigger_signal(adreno_dev->hwsched.hw_fence.handle, IPCC_CLIENT_GPU, + IPCC_CLIENT_APSS, 0); +} + +/* We don't want to unnecessarily wake the GMU to trigger hardware fences */ +static void drain_context_hw_fence_cpu(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct adreno_hw_fence_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { + + gen8_trigger_hw_fence_cpu(adreno_dev, entry); + + gen8_remove_hw_fence_entry(adreno_dev, entry); + } +} + +int gen8_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct adreno_hw_fence_entry *entry, *tmp; + int ret = 0; + + /* We don't need the drawctxt lock here as this context has already been invalidated */ + list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { + + /* Any error here is fatal */ + ret = gen8_send_hw_fence_hfi_wait_ack(adreno_dev, entry, + HW_FENCE_FLAG_SKIP_MEMSTORE); + if (ret) + break; + + gen8_remove_hw_fence_entry(adreno_dev, entry); + } + + return ret; +} + +static int send_context_unregister_hfi(struct adreno_device *adreno_dev, + struct kgsl_context *context, u32 ts) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + struct pending_cmd pending_ack; + struct hfi_unregister_ctxt_cmd cmd; + u32 seqnum; + int rc, ret; + + /* Only send HFI if device is not in SLUMBER */ + if (!context->gmu_registered || + !test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) { + drain_context_hw_fence_cpu(adreno_dev, drawctxt); + return 0; + } + + ret = CMD_MSG_HDR(cmd, H2F_MSG_UNREGISTER_CONTEXT); + if (ret) + return ret; + + cmd.ctxt_id = context->id, + cmd.ts = ts, + + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + + add_waiter(hfi, cmd.hdr, &pending_ack); + + /* + * Although we know device is powered on, we can still enter SLUMBER + * because the wait for ack below is done without holding the mutex. So + * take an active count before releasing the mutex so as to avoid a + * concurrent SLUMBER sequence while GMU is un-registering this context. + */ + gen8_hwsched_active_count_get(adreno_dev); + + rc = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd)); + if (rc) + goto done; + + mutex_unlock(&device->mutex); + + rc = wait_for_completion_timeout(&pending_ack.complete, + msecs_to_jiffies(30 * 1000)); + if (!rc) { + dev_err(&gmu->pdev->dev, + "Ack timeout for context unregister seq: %d ctx: %u ts: %u\n", + MSG_HDR_GET_SEQNUM(pending_ack.sent_hdr), + context->id, ts); + rc = -ETIMEDOUT; + + mutex_lock(&device->mutex); + + gmu_core_fault_snapshot(device); + + /* + * Make sure we send all fences from this context to the TxQueue after recovery + */ + move_detached_context_hardware_fences(adreno_dev, drawctxt); + gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + + goto done; + } + + mutex_lock(&device->mutex); + + rc = check_detached_context_hardware_fences(adreno_dev, drawctxt); + if (rc) + goto done; + + rc = check_ack_failure(adreno_dev, &pending_ack); +done: + gen8_hwsched_active_count_put(adreno_dev); + + del_waiter(hfi, &pending_ack); + + return rc; +} + +void gen8_hwsched_context_detach(struct adreno_context *drawctxt) +{ + struct kgsl_context *context = &drawctxt->base; + struct kgsl_device *device = context->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int ret = 0; + + mutex_lock(&device->mutex); + + ret = send_context_unregister_hfi(adreno_dev, context, + drawctxt->internal_timestamp); + + if (!ret) { + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp), + drawctxt->timestamp); + + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp), + drawctxt->timestamp); + + adreno_profile_process_results(adreno_dev); + } + + context->gmu_registered = false; + + mutex_unlock(&device->mutex); +} + +u32 gen8_hwsched_preempt_count_get(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (device->state != KGSL_STATE_ACTIVE) + return 0; + + return gen8_hwsched_hfi_get_value(adreno_dev, HFI_VALUE_PREEMPT_COUNT); +} + +void gen8_hwsched_context_destroy(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + if (!adreno_hwsched_context_queue_enabled(adreno_dev)) + return; + + if (drawctxt->gmu_context_queue.gmuaddr) + gen8_free_gmu_block(to_gen8_gmu(adreno_dev), &drawctxt->gmu_context_queue); + + if (drawctxt->gmu_hw_fence_queue.gmuaddr) + gen8_free_gmu_block(to_gen8_gmu(adreno_dev), &drawctxt->gmu_hw_fence_queue); +} + +int gen8_hwsched_counter_inline_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + u32 counter, u32 countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 val, cmds[GEN8_PERF_COUNTER_ENABLE_DWORDS + 1]; + int ret; + + if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) + gen8_perfcounter_update(adreno_dev, reg, false, + FIELD_PREP(GENMASK(13, 12), PIPE_NONE)); + + cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, HFI_MSG_CMD); + + cmds[1] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + cmds[2] = cp_type4_packet(reg->select, 1); + cmds[3] = countable; + + ret = gen8_hfi_send_cmd_async(adreno_dev, cmds, sizeof(cmds)); + if (ret) + goto err; + + /* Wait till the register is programmed with the countable */ + ret = kgsl_regmap_read_poll_timeout(&device->regmap, reg->select, val, + val == countable, 100, ADRENO_IDLE_TIMEOUT); + if (!ret) { + reg->value = 0; + return ret; + } + +err: + dev_err(device->dev, "Perfcounter %s/%u/%u start via commands failed\n", + group->name, counter, countable); + + return ret; +} + +int gen8_hwsched_disable_hw_fence_throttle(struct adreno_device *adreno_dev) +{ + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct adreno_context *drawctxt = NULL; + u32 ts = 0; + int ret = 0; + + if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags)) + return 0; + + spin_lock(&hfi->hw_fence.lock); + + drawctxt = hfi->hw_fence.defer_drawctxt; + ts = hfi->hw_fence.defer_ts; + + spin_unlock(&hfi->hw_fence.lock); + + if (!drawctxt) + goto done; + + ret = process_hw_fence_deferred_ctxt(adreno_dev, drawctxt, ts); + + kgsl_context_put(&drawctxt->base); + gen8_hwsched_active_count_put(adreno_dev); + +done: + _disable_hw_fence_throttle(adreno_dev, true); + + return ret; +} diff --git a/adreno_gen8_hwsched_hfi.h b/adreno_gen8_hwsched_hfi.h new file mode 100644 index 0000000000..73ab8f397f --- /dev/null +++ b/adreno_gen8_hwsched_hfi.h @@ -0,0 +1,371 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _ADRENO_GEN8_HWSCHED_HFI_H_ +#define _ADRENO_GEN8_HWSCHED_HFI_H_ + +/* Maximum number of IBs in a submission */ +#define HWSCHED_MAX_NUMIBS \ + ((HFI_MAX_MSG_SIZE - offsetof(struct hfi_issue_cmd_cmd, ibs)) \ + / sizeof(struct hfi_issue_ib)) + +/* + * This is used to put userspace threads to sleep when hardware fence unack count reaches a + * threshold. This bit is cleared in two scenarios: + * 1. If the hardware fence unack count drops to a desired threshold. + * 2. If there is a GMU/GPU fault. Because we don't want the threads to keep sleeping through fault + * recovery, which can easily take 100s of milliseconds to complete. + */ +#define GEN8_HWSCHED_HW_FENCE_SLEEP_BIT 0x0 + +/* + * This is used to avoid creating any more hardware fences until the hardware fence unack count + * drops to a desired threshold. This bit is required in cases where GEN8_HWSCHED_HW_FENCE_SLEEP_BIT + * will be cleared, but we still want to avoid creating any more hardware fences. For example, if + * hardware fence unack count reaches a maximum threshold, both GEN8_HWSCHED_HW_FENCE_SLEEP_BIT and + * GEN8_HWSCHED_HW_FENCE_MAX_BIT will be set. Say, a GMU/GPU fault happens and + * GEN8_HWSCHED_HW_FENCE_SLEEP_BIT will be cleared to wake up any sleeping threads. But, + * GEN8_HWSCHED_HW_FENCE_MAX_BIT will remain set to avoid creating any new hardware fences until + * recovery is complete and deferred drawctxt (if any) is handled. + */ +#define GEN8_HWSCHED_HW_FENCE_MAX_BIT 0x1 + +/* + * This is used to avoid creating any more hardware fences until concurrent reset/recovery completes + */ +#define GEN8_HWSCHED_HW_FENCE_ABORT_BIT 0x2 + +struct gen8_hwsched_hfi { + struct hfi_mem_alloc_entry mem_alloc_table[32]; + u32 mem_alloc_entries; + /** @irq_mask: Store the hfi interrupt mask */ + u32 irq_mask; + /** @msglock: To protect the list of un-ACKed hfi packets */ + rwlock_t msglock; + /** @msglist: List of un-ACKed hfi packets */ + struct list_head msglist; + /** @f2h_task: Task for processing gmu fw to host packets */ + struct task_struct *f2h_task; + /** @f2h_wq: Waitqueue for the f2h_task */ + wait_queue_head_t f2h_wq; + /** @big_ib: GMU buffer to hold big IBs */ + struct kgsl_memdesc *big_ib; + /** @big_ib_recurring: GMU buffer to hold big recurring IBs */ + struct kgsl_memdesc *big_ib_recurring; + /** @msg_mutex: Mutex for accessing the msgq */ + struct mutex msgq_mutex; + struct { + /** @lock: Spinlock for managing hardware fences */ + spinlock_t lock; + /** + * @unack_count: Number of hardware fences sent to GMU but haven't yet been ack'd + * by GMU + */ + u32 unack_count; + /** + * @unack_wq: Waitqueue to wait on till number of unacked hardware fences drops to + * a desired threshold + */ + wait_queue_head_t unack_wq; + /** + * @defer_drawctxt: Drawctxt to send hardware fences from as soon as unacked + * hardware fences drops to a desired threshold + */ + struct adreno_context *defer_drawctxt; + /** + * @defer_ts: The timestamp of the hardware fence which got deferred + */ + u32 defer_ts; + /** + * @flags: Flags to control the creation of new hardware fences + */ + unsigned long flags; + } hw_fence; + /** + * @hw_fence_timer: Timer to trigger fault if unack'd hardware fence count does'nt drop + * to a desired threshold in given amount of time + */ + struct timer_list hw_fence_timer; + /** + * @hw_fence_ws: Work struct that gets scheduled when hw_fence_timer expires + */ + struct work_struct hw_fence_ws; + /** @detached_hw_fences_list: List of hardware fences belonging to detached contexts */ + struct list_head detached_hw_fence_list; + /** @defer_hw_fence_work: The work structure to send deferred hardware fences to GMU */ + struct kthread_work defer_hw_fence_work; +}; + +struct kgsl_drawobj_cmd; + +/** + * gen8_hwsched_hfi_probe - Probe hwsched hfi resources + * @adreno_dev: Pointer to adreno device structure + * + * Return: 0 on success and negative error on failure. + */ +int gen8_hwsched_hfi_probe(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_hfi_remove - Release hwsched hfi resources + * @adreno_dev: Pointer to adreno device structure + */ +void gen8_hwsched_hfi_remove(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_hfi_init - Initialize hfi resources + * @adreno_dev: Pointer to adreno device structure + * + * This function is used to initialize hfi resources + * once before the very first gmu boot + * + * Return: 0 on success and negative error on failure. + */ +int gen8_hwsched_hfi_init(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_hfi_start - Start hfi resources + * @adreno_dev: Pointer to adreno device structure + * + * Send the various hfi packets before booting the gpu + * + * Return: 0 on success and negative error on failure. + */ +int gen8_hwsched_hfi_start(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_hfi_stop - Stop the hfi resources + * @adreno_dev: Pointer to the adreno device + * + * This function does the hfi cleanup when powering down the gmu + */ +void gen8_hwsched_hfi_stop(struct adreno_device *adreno_dev); + +/** + * gen8_hwched_cp_init - Send CP_INIT via HFI + * @adreno_dev: Pointer to adreno device structure + * + * This function is used to send CP INIT packet and bring + * GPU out of secure mode using hfi raw packets. + * + * Return: 0 on success and negative error on failure. + */ +int gen8_hwsched_cp_init(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_counter_inline_enable - Configure a performance counter for a countable + * @adreno_dev - Adreno device to configure + * @group - Desired performance counter group + * @counter - Desired performance counter in the group + * @countable - Desired countable + * + * Physically set up a counter within a group with the desired countable + * Return 0 on success or negative error on failure. + */ +int gen8_hwsched_counter_inline_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + u32 counter, u32 countable); + +/** + * gen8_hfi_send_cmd_async - Send an hfi packet + * @adreno_dev: Pointer to adreno device structure + * @data: Data to be sent in the hfi packet + * @size_bytes: Size of the packet in bytes + * + * Send data in the form of an HFI packet to gmu and wait for + * it's ack asynchronously + * + * Return: 0 on success and negative error on failure. + */ +int gen8_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 size_bytes); + +/** + * gen8_hwsched_submit_drawobj - Dispatch IBs to dispatch queues + * @adreno_dev: Pointer to adreno device structure + * @drawobj: The command draw object which needs to be submitted + * + * This function is used to register the context if needed and submit + * IBs to the hfi dispatch queues. + + * Return: 0 on success and negative error on failure + */ +int gen8_hwsched_submit_drawobj(struct adreno_device *adreno_dev, + struct kgsl_drawobj *drawobj); + +/** + * gen8_hwsched_context_detach - Unregister a context with GMU + * @drawctxt: Pointer to the adreno context + * + * This function sends context unregister HFI and waits for the ack + * to ensure all submissions from this context have retired + */ +void gen8_hwsched_context_detach(struct adreno_context *drawctxt); + +/* Helper function to get to gen8 hwsched hfi device from adreno device */ +struct gen8_hwsched_hfi *to_gen8_hwsched_hfi(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_preempt_count_get - Get preemption count from GMU + * @adreno_dev: Pointer to adreno device + * + * This function sends a GET_VALUE HFI packet to get the number of + * preemptions completed since last SLUMBER exit. + * + * Return: Preemption count + */ +u32 gen8_hwsched_preempt_count_get(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_parse_payload - Parse payload to look up a key + * @payload: Pointer to a payload section + * @key: The key who's value is to be looked up + * + * This function parses the payload data which is a sequence + * of key-value pairs. + * + * Return: The value of the key or 0 if key is not found + */ +u32 gen8_hwsched_parse_payload(struct payload_section *payload, u32 key); + +/** + * gen8_hwsched_lpac_cp_init - Send CP_INIT to LPAC via HFI + * @adreno_dev: Pointer to adreno device structure + * + * This function is used to send CP INIT packet to LPAC and + * enable submission to LPAC queue. + * + * Return: 0 on success and negative error on failure. + */ +int gen8_hwsched_lpac_cp_init(struct adreno_device *adreno_dev); + +/** + * gen8_hfi_send_lpac_feature_ctrl - Send the lpac feature hfi packet + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_send_lpac_feature_ctrl(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_context_destroy - Destroy any hwsched related resources during context destruction + * @adreno_dev: Pointer to adreno device + * @drawctxt: Pointer to the adreno context + * + * This functions destroys any hwsched related resources when this context is destroyed + */ +void gen8_hwsched_context_destroy(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt); + +/** + * gen8_hwsched_hfi_get_value - Send GET_VALUE packet to GMU to get the value of a property + * @adreno_dev: Pointer to adreno device + * @prop: property to get from GMU + * + * This functions sends GET_VALUE HFI packet to query value of a property + * + * Return: On success, return the value in the GMU response. On failure, return 0 + */ +u32 gen8_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop); + +/** + * gen8_send_hw_fence_hfi_wait_ack - Send hardware fence info to GMU + * @adreno_dev: Pointer to adreno device + * @entry: Pointer to the adreno hardware fence entry + * @flags: Flags for this hardware fence + * + * Send the hardware fence info to the GMU and wait for the ack + * + * Return: 0 on success or negative error on failure + */ +int gen8_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, + struct adreno_hw_fence_entry *entry, u64 flags); + +/** + * gen8_hwsched_create_hw_fence - Create a hardware fence + * @adreno_dev: Pointer to adreno device + * @kfence: Pointer to the kgsl fence + * + * Create a hardware fence, set up hardware fence info and send it to GMU if required + */ +void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, + struct kgsl_sync_fence *kfence); + +/** + * gen8_hwsched_drain_context_hw_fences - Drain context's hardware fences via GMU + * @adreno_dev: Pointer to adreno device + * @drawctxt: Pointer to the adreno context which is to be flushed + * + * Trigger hardware fences that were never dispatched to GMU + * + * Return: Zero on success or negative error on failure + */ +int gen8_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt); + +/** + * gen8_hwsched_check_context_inflight_hw_fences - Check whether all hardware fences + * from a context have been sent to the TxQueue or not + * @adreno_dev: Pointer to adreno device + * @drawctxt: Pointer to the adreno context which is to be flushed + * + * Check if all hardware fences from this context have been sent to the + * TxQueue. If not, log an error and return error code. + * + * Return: Zero on success or negative error on failure + */ +int gen8_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt); + +/** + * gen8_remove_hw_fence_entry - Remove hardware fence entry + * @adreno_dev: pointer to the adreno device + * @entry: Pointer to the hardware fence entry + */ +void gen8_remove_hw_fence_entry(struct adreno_device *adreno_dev, + struct adreno_hw_fence_entry *entry); + +/** + * gen8_trigger_hw_fence_cpu - Trigger hardware fence from cpu + * @adreno_dev: pointer to the adreno device + * @fence: hardware fence entry to be triggered + * + * Trigger the hardware fence by sending it to GMU's TxQueue and raise the + * interrupt from GMU to APPS + */ +void gen8_trigger_hw_fence_cpu(struct adreno_device *adreno_dev, + struct adreno_hw_fence_entry *fence); + +/** + * gen8_hwsched_disable_hw_fence_throttle - Disable hardware fence throttling after reset + * @adreno_dev: pointer to the adreno device + * + * After device reset, clear hardware fence related data structures and send any hardware fences + * that got deferred (prior to reset) and re-open the gates for hardware fence creation + * + * Return: Zero on success or negative error on failure + */ +int gen8_hwsched_disable_hw_fence_throttle(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_process_msgq - Process msgq + * @adreno_dev: pointer to the adreno device + * + * This function grabs the msgq mutex and processes msgq for any outstanding hfi packets + */ +void gen8_hwsched_process_msgq(struct adreno_device *adreno_dev); + +/** + * gen8_hwsched_boot_gpu - Send the command to boot GPU + * @adreno_dev: Pointer to adreno device + * + * Send the hfi to boot GPU, and check the ack, incase of a failure + * get a snapshot and capture registers of interest. + * + * Return: Zero on success or negative error on failure + */ +int gen8_hwsched_boot_gpu(struct adreno_device *adreno_dev); + +#endif diff --git a/adreno_gen8_preempt.c b/adreno_gen8_preempt.c new file mode 100644 index 0000000000..cf8ecd8141 --- /dev/null +++ b/adreno_gen8_preempt.c @@ -0,0 +1,807 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_gen8.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" + +#define PREEMPT_RECORD(_field) \ + offsetof(struct gen8_cp_preemption_record, _field) + +#define PREEMPT_SMMU_RECORD(_field) \ + offsetof(struct gen8_cp_smmu_info, _field) + +static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer, + bool atomic) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb = adreno_dev->cur_rb; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&rb->preempt_lock, flags); + + if (!atomic) { + /* + * We might have skipped updating the wptr in case we are in + * dispatcher context. Do it now. + */ + if (rb->skip_inline_wptr) { + + ret = gen8_fenced_write(adreno_dev, + GEN8_CP_RB_WPTR_GC, rb->wptr, + FENCE_STATUS_WRITEDROPPED0_MASK); + + reset_timer = true; + rb->skip_inline_wptr = false; + } + } else { + u32 wptr; + + kgsl_regread(device, GEN8_CP_RB_WPTR_GC, &wptr); + if (wptr != rb->wptr) { + kgsl_regwrite(device, GEN8_CP_RB_WPTR_GC, rb->wptr); + reset_timer = true; + } + } + + if (reset_timer) + rb->dispatch_q.expires = jiffies + + msecs_to_jiffies(adreno_drawobj_timeout); + + spin_unlock_irqrestore(&rb->preempt_lock, flags); + + if (!atomic) { + /* If WPTR update fails, set the fault and trigger recovery */ + if (ret) { + gmu_core_fault_snapshot(device); + adreno_dispatcher_fault(adreno_dev, + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + } + } +} + +static void _power_collapse_set(struct adreno_device *adreno_dev, bool val) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gmu_core_regwrite(device, + GEN8_GMUCX_PWR_COL_PREEMPTION_KEEPALIVE, (val ? 1 : 0)); +} + +static void _gen8_preemption_done(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status; + + /* + * In the very unlikely case that the power is off, do nothing - the + * state will be reset on power up and everybody will be happy + */ + + if (!kgsl_state_is_awake(device)) + return; + + kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_CNTL, &status); + + if (status & 0x1) { + dev_err(device->dev, + "Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n", + status, adreno_dev->cur_rb->id, + adreno_get_rptr(adreno_dev->cur_rb), + adreno_dev->cur_rb->wptr, + adreno_dev->next_rb->id, + adreno_get_rptr(adreno_dev->next_rb), + adreno_dev->next_rb->wptr); + + /* Set a fault and restart */ + adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + + return; + } + + adreno_dev->preempt.count++; + + del_timer_sync(&adreno_dev->preempt.timer); + + kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status); + + trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id, + status, 0); + + /* Clean up all the bits */ + adreno_dev->prev_rb = adreno_dev->cur_rb; + adreno_dev->cur_rb = adreno_dev->next_rb; + adreno_dev->next_rb = NULL; + + /* Update the wptr for the new command queue */ + _update_wptr(adreno_dev, true, false); + + /* Update the dispatcher timer for the new command queue */ + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + + /* Clear the preempt state */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); +} + +static void _gen8_preemption_fault(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status; + + /* + * If the power is on check the preemption status one more time - if it + * was successful then just transition to the complete state + */ + if (kgsl_state_is_awake(device)) { + kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_CNTL, &status); + + if (!(status & 0x1)) { + adreno_set_preempt_state(adreno_dev, + ADRENO_PREEMPT_COMPLETE); + + adreno_dispatcher_schedule(device); + return; + } + } + + dev_err(device->dev, + "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n", + adreno_dev->cur_rb->id, + adreno_get_rptr(adreno_dev->cur_rb), + adreno_dev->cur_rb->wptr, + adreno_dev->next_rb->id, + adreno_get_rptr(adreno_dev->next_rb), + adreno_dev->next_rb->wptr); + + adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); +} + +static void _gen8_preemption_worker(struct work_struct *work) +{ + struct adreno_preemption *preempt = container_of(work, + struct adreno_preemption, work); + struct adreno_device *adreno_dev = container_of(preempt, + struct adreno_device, preempt); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Need to take the mutex to make sure that the power stays on */ + mutex_lock(&device->mutex); + + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED)) + _gen8_preemption_fault(adreno_dev); + + mutex_unlock(&device->mutex); +} + +/* Find the highest priority active ringbuffer */ +static struct adreno_ringbuffer *gen8_next_ringbuffer( + struct adreno_device *adreno_dev) +{ + struct adreno_ringbuffer *rb; + unsigned long flags; + u32 i; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + bool empty; + + spin_lock_irqsave(&rb->preempt_lock, flags); + empty = adreno_rb_empty(rb); + spin_unlock_irqrestore(&rb->preempt_lock, flags); + + if (!empty) + return rb; + } + + return NULL; +} + +void gen8_preemption_trigger(struct adreno_device *adreno_dev, bool atomic) +{ + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_ringbuffer *next; + u64 ttbr0, gpuaddr; + u32 contextidr, cntl; + unsigned long flags; + struct adreno_preemption *preempt = &adreno_dev->preempt; + + /* Put ourselves into a possible trigger state */ + if (!adreno_move_preempt_state(adreno_dev, + ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START)) + return; + + /* Get the next ringbuffer to preempt in */ + next = gen8_next_ringbuffer(adreno_dev); + + /* + * Nothing to do if every ringbuffer is empty or if the current + * ringbuffer is the only active one + */ + if (next == NULL || next == adreno_dev->cur_rb) { + /* + * Update any critical things that might have been skipped while + * we were looking for a new ringbuffer + */ + + if (next != NULL) { + _update_wptr(adreno_dev, false, atomic); + + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + } + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + return; + } + + /* Turn off the dispatcher timer */ + del_timer(&adreno_dev->dispatcher.timer); + + /* + * This is the most critical section - we need to take care not to race + * until we have programmed the CP for the switch + */ + + spin_lock_irqsave(&next->preempt_lock, flags); + + /* Get the pagetable from the pagetable info. */ + kgsl_sharedmem_readq(device->scratch, &ttbr0, + SCRATCH_RB_OFFSET(next->id, ttbr0)); + kgsl_sharedmem_readl(device->scratch, &contextidr, + SCRATCH_RB_OFFSET(next->id, contextidr)); + + kgsl_sharedmem_writel(next->preemption_desc, + PREEMPT_RECORD(wptr), next->wptr); + + spin_unlock_irqrestore(&next->preempt_lock, flags); + + /* And write it to the smmu info */ + if (kgsl_mmu_is_perprocess(&device->mmu)) { + kgsl_sharedmem_writeq(iommu->smmu_info, + PREEMPT_SMMU_RECORD(ttbr0), ttbr0); + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(context_idr), contextidr); + } + + kgsl_sharedmem_readq(preempt->scratch, &gpuaddr, + next->id * sizeof(u64)); + + /* + * Set a keepalive bit before the first preemption register write. + * This is required since while each individual write to the context + * switch registers will wake the GPU from collapse, it will not in + * itself cause GPU activity. Thus, the GPU could technically be + * re-collapsed between subsequent register writes leading to a + * prolonged preemption sequence. The keepalive bit prevents any + * further power collapse while it is set. + * It is more efficient to use a keepalive+wake-on-fence approach here + * rather than an OOB. Both keepalive and the fence are effectively + * free when the GPU is already powered on, whereas an OOB requires an + * unconditional handshake with the GMU. + */ + _power_collapse_set(adreno_dev, true); + + /* + * Fenced writes on this path will make sure the GPU is woken up + * in case it was power collapsed by the GMU. + */ + if (gen8_fenced_write(adreno_dev, + GEN8_CP_CONTEXT_SWITCH_PNSR_ADDR_LO, + lower_32_bits(next->preemption_desc->gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + /* + * Above fence writes will make sure GMU comes out of + * IFPC state if its was in IFPC state but it doesn't + * guarantee that GMU FW actually moved to ACTIVE state + * i.e. wake-up from IFPC is complete. + * Wait for GMU to move to ACTIVE state before triggering + * preemption. This is require to make sure CP doesn't + * interrupt GMU during wake-up from IFPC. + */ + if (!atomic && gmu_core_dev_wait_for_active_transition(device)) + goto err; + + if (gen8_fenced_write(adreno_dev, + GEN8_CP_CONTEXT_SWITCH_PNSR_ADDR_HI, + upper_32_bits(next->preemption_desc->gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + if (gen8_fenced_write(adreno_dev, + GEN8_CP_CONTEXT_SWITCH_PSR_ADDR_LO, + lower_32_bits(next->secure_preemption_desc->gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + if (gen8_fenced_write(adreno_dev, + GEN8_CP_CONTEXT_SWITCH_PSR_ADDR_HI, + upper_32_bits(next->secure_preemption_desc->gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + if (gen8_fenced_write(adreno_dev, + GEN8_CP_CONTEXT_SWITCH_NPR_ADDR_LO, + lower_32_bits(gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + if (gen8_fenced_write(adreno_dev, + GEN8_CP_CONTEXT_SWITCH_NPR_ADDR_HI, + upper_32_bits(gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + adreno_dev->next_rb = next; + + /* Start the timer to detect a stuck preemption */ + mod_timer(&adreno_dev->preempt.timer, + jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT)); + + cntl = (preempt->preempt_level << 6) | 0x01; + + /* Skip save/restore during L1 preemption */ + if (preempt->skipsaverestore) + cntl |= (1 << 9); + + /* Enable GMEM save/restore across preemption */ + if (preempt->usesgmem) + cntl |= (1 << 8); + + trace_adreno_preempt_trigger(adreno_dev->cur_rb->id, adreno_dev->next_rb->id, + cntl, 0); + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED); + + if (gen8_core->qos_value) + kgsl_sharedmem_writel(preempt->scratch, + PREEMPT_SCRATCH_OFFSET(QOS_VALUE_IDX), + gen8_core->qos_value[next->id]); + + /* Trigger the preemption */ + if (gen8_fenced_write(adreno_dev, GEN8_CP_CONTEXT_SWITCH_CNTL, cntl, + FENCE_STATUS_WRITEDROPPED1_MASK)) { + adreno_dev->next_rb = NULL; + del_timer(&adreno_dev->preempt.timer); + goto err; + } + + return; +err: + /* If fenced write fails, take inline snapshot and trigger recovery */ + if (!in_interrupt()) { + gmu_core_fault_snapshot(device); + adreno_dispatcher_fault(adreno_dev, + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + } else { + adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT); + } + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + /* Clear the keep alive */ + _power_collapse_set(adreno_dev, false); + +} + +void gen8_preemption_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status; + + if (!adreno_move_preempt_state(adreno_dev, + ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING)) + return; + + kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_CNTL, &status); + + if (status & 0x1) { + dev_err(KGSL_DEVICE(adreno_dev)->dev, + "preempt interrupt with non-zero status: %X\n", + status); + + /* + * Under the assumption that this is a race between the + * interrupt and the register, schedule the worker to clean up. + * If the status still hasn't resolved itself by the time we get + * there then we have to assume something bad happened + */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); + adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev)); + return; + } + + adreno_dev->preempt.count++; + + /* + * We can now safely clear the preemption keepalive bit, allowing + * power collapse to resume its regular activity. + */ + _power_collapse_set(adreno_dev, false); + + del_timer(&adreno_dev->preempt.timer); + + kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status); + + trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id, + status, 0); + + adreno_dev->prev_rb = adreno_dev->cur_rb; + adreno_dev->cur_rb = adreno_dev->next_rb; + adreno_dev->next_rb = NULL; + + /* Update the wptr if it changed while preemption was ongoing */ + _update_wptr(adreno_dev, true, true); + + /* Update the dispatcher timer for the new command queue */ + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + + gen8_preemption_trigger(adreno_dev, true); +} + +void gen8_preemption_prepare_postamble(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + struct adreno_preemption *preempt = &adreno_dev->preempt; + u32 *postamble, count = 0; + + /* + * First 28 dwords of the device scratch buffer are used to store shadow rb data. + * Reserve 15 dwords in the device scratch buffer from SCRATCH_POSTAMBLE_OFFSET for + * KMD postamble pm4 packets. This should be in *device->scratch* so that userspace + * cannot access it. + */ + postamble = device->scratch->hostptr + SCRATCH_POSTAMBLE_OFFSET; + + /* + * Reserve 4 dwords in the scratch buffer for dynamic QOS control feature. To ensure QOS + * value is updated for first preemption, send it during bootup + */ + if (gen8_core->qos_value) { + postamble[count++] = cp_type7_packet(CP_MEM_TO_REG, 3); + postamble[count++] = GEN8_RBBM_GBIF_CLIENT_QOS_CNTL; + postamble[count++] = lower_32_bits(PREEMPT_SCRATCH_ADDR(adreno_dev, QOS_VALUE_IDX)); + postamble[count++] = upper_32_bits(PREEMPT_SCRATCH_ADDR(adreno_dev, QOS_VALUE_IDX)); + } + + /* + * Since postambles are not preserved across slumber, necessary packets + * must be sent to GPU before first submission. + * + * If a packet needs to be sent before first submission, add it above this. + */ + preempt->postamble_bootup_len = count; + + /* Reserve 15 dwords in the device scratch buffer to clear perfcounters */ + if (!adreno_dev->perfcounter) { + postamble[count++] = cp_type7_packet(CP_REG_RMW, 3); + postamble[count++] = GEN8_RBBM_PERFCTR_SRAM_INIT_CMD; + postamble[count++] = 0x0; + postamble[count++] = 0x1; + postamble[count++] = cp_type7_packet(CP_REG_RMW, 3); + postamble[count++] = GEN8_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD; + postamble[count++] = 0x0; + postamble[count++] = 0x1; + + + postamble[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6); + postamble[count++] = 0x3; + postamble[count++] = GEN8_RBBM_PERFCTR_SRAM_INIT_STATUS; + postamble[count++] = 0x0; + postamble[count++] = 0x1; + postamble[count++] = 0x1; + postamble[count++] = 0x0; + } + + preempt->postamble_len = count; +} + +void gen8_preemption_schedule(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + mutex_lock(&device->mutex); + + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE)) + _gen8_preemption_done(adreno_dev); + + gen8_preemption_trigger(adreno_dev, false); + + mutex_unlock(&device->mutex); +} + +u32 gen8_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 *cmds) +{ + u32 *cmds_orig = cmds; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + if (test_and_set_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags)) + goto done; + + *cmds++ = cp_type7_packet(CP_THREAD_CONTROL, 1); + *cmds++ = CP_SET_THREAD_BR; + + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); + + /* NULL SMMU_INFO buffer - we track in KMD */ + *cmds++ = SET_PSEUDO_SMMU_INFO; + cmds += cp_gpuaddr(adreno_dev, cmds, 0x0); + + *cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR; + cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr); + + *cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR; + cmds += cp_gpuaddr(adreno_dev, cmds, + rb->secure_preemption_desc->gpuaddr); + + /* + * There is no need to specify this address when we are about to + * trigger preemption. This is because CP internally stores this + * address specified here in the CP_SET_PSEUDO_REGISTER payload to + * the context record and thus knows from where to restore + * the saved perfcounters for the new ringbuffer. + */ + *cmds++ = SET_PSEUDO_COUNTER; + cmds += cp_gpuaddr(adreno_dev, cmds, + rb->perfcounter_save_restore_desc->gpuaddr); + +done: + if (drawctxt) { + struct adreno_ringbuffer *rb = drawctxt->rb; + u64 dest = PREEMPT_SCRATCH_ADDR(adreno_dev, rb->id); + u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; + + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2); + cmds += cp_gpuaddr(adreno_dev, cmds, dest); + *cmds++ = lower_32_bits(gpuaddr); + *cmds++ = upper_32_bits(gpuaddr); + + if (adreno_dev->preempt.postamble_len) { + u64 kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev)); + + *cmds++ = cp_type7_packet(CP_SET_AMBLE, 3); + *cmds++ = lower_32_bits(kmd_postamble_addr); + *cmds++ = upper_32_bits(kmd_postamble_addr); + *cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE) + | (FIELD_PREP(GENMASK(19, 0), adreno_dev->preempt.postamble_len)); + } + } + + return (u32) (cmds - cmds_orig); +} + +u32 gen8_preemption_post_ibsubmit(struct adreno_device *adreno_dev, + u32 *cmds) +{ + u32 index = 0; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + if (adreno_dev->cur_rb) { + u64 dest = PREEMPT_SCRATCH_ADDR(adreno_dev, adreno_dev->cur_rb->id); + + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 4); + cmds[index++] = lower_32_bits(dest); + cmds[index++] = upper_32_bits(dest); + cmds[index++] = 0; + cmds[index++] = 0; + } + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BOTH; + cmds[index++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4); + cmds[index++] = 0; + cmds[index++] = 0; + cmds[index++] = 1; + cmds[index++] = 0; + + return index; +} + +void gen8_preemption_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_ringbuffer *rb; + u32 i; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + /* Force the state to be clear */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + + if (kgsl_mmu_is_perprocess(&device->mmu)) { + /* smmu_info is allocated and mapped in gen8_preemption_iommu_init */ + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(magic), GEN8_CP_SMMU_INFO_MAGIC_REF); + kgsl_sharedmem_writeq(iommu->smmu_info, + PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device)); + + /* The CP doesn't use the asid record, so poison it */ + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(asid), 0xdecafbad); + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(context_idr), 0); + + kgsl_regwrite(device, GEN8_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + lower_32_bits(iommu->smmu_info->gpuaddr)); + + kgsl_regwrite(device, GEN8_CP_CONTEXT_SWITCH_SMMU_INFO_HI, + upper_32_bits(iommu->smmu_info->gpuaddr)); + } + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(rptr), 0); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(wptr), 0); + + adreno_ringbuffer_set_pagetable(device, rb, + device->mmu.defaultpagetable); + + clear_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags); + } +} + +static void reset_rb_preempt_record(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + memset(rb->preemption_desc->hostptr, 0x0, rb->preemption_desc->size); + + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(magic), GEN8_CP_CTXRECORD_MAGIC_REF); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(cntl), GEN8_CP_RB_CNTL_DEFAULT); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR( + KGSL_DEVICE(adreno_dev), rb->id, rptr)); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(bv_rptr_addr), SCRATCH_RB_GPU_ADDR( + KGSL_DEVICE(adreno_dev), rb->id, bv_rptr)); +} + +void gen8_reset_preempt_records(struct adreno_device *adreno_dev) +{ + int i; + struct adreno_ringbuffer *rb; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + reset_rb_preempt_record(adreno_dev, rb); + } +} + +static int gen8_preemption_ringbuffer_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + u64 ctxt_record_size = GEN8_CP_CTXRECORD_SIZE_IN_BYTES; + int ret; + + if (gen8_core->ctxt_record_size) + ctxt_record_size = gen8_core->ctxt_record_size; + + ret = adreno_allocate_global(device, &rb->preemption_desc, + ctxt_record_size, SZ_16K, 0, + KGSL_MEMDESC_PRIVILEGED, "preemption_desc"); + if (ret) + return ret; + + ret = adreno_allocate_global(device, &rb->secure_preemption_desc, + ctxt_record_size, 0, + KGSL_MEMFLAGS_SECURE, KGSL_MEMDESC_PRIVILEGED, + "secure_preemption_desc"); + if (ret) + return ret; + + ret = adreno_allocate_global(device, &rb->perfcounter_save_restore_desc, + GEN8_CP_PERFCOUNTER_SAVE_RESTORE_SIZE, 0, 0, + KGSL_MEMDESC_PRIVILEGED, + "perfcounter_save_restore_desc"); + if (ret) + return ret; + + reset_rb_preempt_record(adreno_dev, rb); + + return 0; +} + +int gen8_preemption_init(struct adreno_device *adreno_dev) +{ + u32 flags = ADRENO_FEATURE(adreno_dev, ADRENO_APRIV) ? KGSL_MEMDESC_PRIVILEGED : 0; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_preemption *preempt = &adreno_dev->preempt; + struct adreno_ringbuffer *rb; + int ret; + u32 i; + + /* We are dependent on IOMMU to make preemption go on the CP side */ + if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU) { + ret = -ENODEV; + goto done; + } + + INIT_WORK(&preempt->work, _gen8_preemption_worker); + + /* Allocate mem for storing preemption switch record */ + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + ret = gen8_preemption_ringbuffer_init(adreno_dev, rb); + if (ret) + goto done; + } + + ret = adreno_allocate_global(device, &preempt->scratch, PAGE_SIZE, + 0, 0, flags, "preempt_scratch"); + if (ret) + goto done; + + /* Allocate mem for storing preemption smmu record */ + if (kgsl_mmu_is_perprocess(&device->mmu)) { + ret = adreno_allocate_global(device, &iommu->smmu_info, PAGE_SIZE, 0, + KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED, + "smmu_info"); + if (ret) + goto done; + } + + return 0; +done: + clear_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); + return ret; +} + +int gen8_preemption_context_init(struct kgsl_context *context) +{ + struct kgsl_device *device = context->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u64 flags = 0; + + if (!adreno_preemption_feature_set(adreno_dev)) + return 0; + + if (context->flags & KGSL_CONTEXT_SECURE) + flags |= KGSL_MEMFLAGS_SECURE; + + if (is_compat_task()) + flags |= KGSL_MEMFLAGS_FORCE_32BIT; + + /* + * gpumem_alloc_entry takes an extra refcount. Put it only when + * destroying the context to keep the context record valid + */ + context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv, + GEN8_CP_CTXRECORD_USER_RESTORE_SIZE, flags); + if (IS_ERR(context->user_ctxt_record)) { + int ret = PTR_ERR(context->user_ctxt_record); + + context->user_ctxt_record = NULL; + return ret; + } + + return 0; +} diff --git a/adreno_gen8_ringbuffer.c b/adreno_gen8_ringbuffer.c new file mode 100644 index 0000000000..31adaec1df --- /dev/null +++ b/adreno_gen8_ringbuffer.c @@ -0,0 +1,649 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_gen8.h" +#include "adreno_pm4types.h" +#include "adreno_ringbuffer.h" +#include "adreno_trace.h" +#include "kgsl_trace.h" + +static bool is_concurrent_binning(struct adreno_context *drawctxt) +{ + if (!drawctxt) + return false; + + return !(drawctxt->base.flags & KGSL_CONTEXT_SECURE); +} + +static int gen8_rb_pagetable_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + struct kgsl_pagetable *pagetable, u32 *cmds) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable); + int count = 0; + u32 id = drawctxt ? drawctxt->base.id : 0; + + if (pagetable == device->mmu.defaultpagetable) + return 0; + + /* CP switches the pagetable and flushes the Caches */ + cmds[count++] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3); + cmds[count++] = lower_32_bits(ttbr0); + cmds[count++] = upper_32_bits(ttbr0); + cmds[count++] = id; + + cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 5); + cmds[count++] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, + rb->id, ttbr0)); + cmds[count++] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, + rb->id, ttbr0)); + cmds[count++] = lower_32_bits(ttbr0); + cmds[count++] = upper_32_bits(ttbr0); + cmds[count++] = id; + + /* + * Sync both threads after switching pagetables and enable BR only + * to make sure BV doesn't race ahead while BR is still switching + * pagetables. + */ + cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR; + + return count; +} + +static int gen8_rb_context_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_pagetable *pagetable = + adreno_drawctxt_get_pagetable(drawctxt); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int count = 0; + u32 cmds[57]; + + /* Sync both threads */ + cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BOTH; + /* Reset context state */ + cmds[count++] = cp_type7_packet(CP_RESET_CONTEXT_STATE, 1); + cmds[count++] = CP_RESET_GLOBAL_LOCAL_TS | CP_CLEAR_BV_BR_COUNTER | + CP_CLEAR_RESOURCE_TABLE | CP_CLEAR_ON_CHIP_TS; + /* + * Enable/disable concurrent binning for pagetable switch and + * set the thread to BR since only BR can execute the pagetable + * switch packets. + */ + /* Sync both threads and enable BR only */ + cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR; + + if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) { + + /* Clear performance counters during context switches */ + if (!adreno_dev->perfcounter) { + cmds[count++] = cp_type4_packet(GEN8_RBBM_PERFCTR_SRAM_INIT_CMD, 1); + cmds[count++] = 0x1; + cmds[count++] = cp_type4_packet(GEN8_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1); + cmds[count++] = 0x1; + } + + count += gen8_rb_pagetable_switch(adreno_dev, rb, + drawctxt, pagetable, &cmds[count]); + + /* Wait for performance counter clear to finish */ + if (!adreno_dev->perfcounter) { + cmds[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6); + cmds[count++] = 0x3; + cmds[count++] = GEN8_RBBM_PERFCTR_SRAM_INIT_STATUS; + cmds[count++] = 0x0; + cmds[count++] = 0x1; + cmds[count++] = 0x1; + cmds[count++] = 0x0; + } + } else { + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + + u32 offset = GEN8_SMMU_BASE + (iommu->cb0_offset >> 2) + 0x0d; + + /* + * Set the CONTEXTIDR register to the current context id so we + * can use it in pagefault debugging. Unlike TTBR0 we don't + * need any special sequence or locking to change it + */ + cmds[count++] = cp_type4_packet(offset, 1); + cmds[count++] = drawctxt->base.id; + } + + cmds[count++] = cp_type7_packet(CP_NOP, 1); + cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER; + + cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, + current_context)); + cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, + current_context)); + cmds[count++] = drawctxt->base.id; + + cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, current_context)); + cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, current_context)); + cmds[count++] = drawctxt->base.id; + + cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1); + cmds[count++] = 0x31; + + if (adreno_is_preemption_enabled(adreno_dev)) { + u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; + + cmds[count++] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 3); + cmds[count++] = SET_PSEUDO_NON_PRIV_SAVE_ADDR; + cmds[count++] = lower_32_bits(gpuaddr); + cmds[count++] = upper_32_bits(gpuaddr); + } + + return gen8_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, + cmds, count, 0, NULL); +} + +#define RB_SOPTIMESTAMP(device, rb) \ + MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp) +#define CTXT_SOPTIMESTAMP(device, drawctxt) \ + MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp) + +#define RB_EOPTIMESTAMP(device, rb) \ + MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp) +#define CTXT_EOPTIMESTAMP(device, drawctxt) \ + MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp) + +int gen8_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time) +{ + struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = 0; + unsigned long flags; + + adreno_get_submit_time(adreno_dev, rb, time); + adreno_profile_submit_time(time); + + spin_lock_irqsave(&rb->preempt_lock, flags); + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) { + if (adreno_dev->cur_rb == rb) { + kgsl_pwrscale_busy(device); + ret = gen8_fenced_write(adreno_dev, + GEN8_CP_RB_WPTR_GC, rb->_wptr, + FENCE_STATUS_WRITEDROPPED0_MASK); + rb->skip_inline_wptr = false; + } + } else { + if (adreno_dev->cur_rb == rb) + rb->skip_inline_wptr = true; + } + + rb->wptr = rb->_wptr; + spin_unlock_irqrestore(&rb->preempt_lock, flags); + + if (ret) { + /* + * If WPTR update fails, take inline snapshot and trigger + * recovery. + */ + gmu_core_fault_snapshot(device); + adreno_dispatcher_fault(adreno_dev, + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + } + + return ret; +} + +int gen8_ringbuffer_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int i, ret; + + ret = adreno_allocate_global(device, &device->scratch, PAGE_SIZE, + 0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED, + "scratch"); + if (ret) + return ret; + + adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]); + + if (!adreno_preemption_feature_set(adreno_dev)) { + adreno_dev->num_ringbuffers = 1; + return adreno_ringbuffer_setup(adreno_dev, + &adreno_dev->ringbuffers[0], 0); + } + + adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers); + + for (i = 0; i < adreno_dev->num_ringbuffers; i++) { + int ret; + + ret = adreno_ringbuffer_setup(adreno_dev, + &adreno_dev->ringbuffers[i], i); + if (ret) + return ret; + } + + timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0); + gen8_preemption_init(adreno_dev); + return 0; +} + +#define GEN8_SUBMIT_MAX 104 + +int gen8_ringbuffer_addcmds(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 flags, u32 *in, u32 dwords, u32 timestamp, + struct adreno_submit_time *time) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 size = GEN8_SUBMIT_MAX + dwords; + u32 *cmds, index = 0; + u64 profile_gpuaddr; + u32 profile_dwords; + + if (adreno_drawctxt_detached(drawctxt)) + return -ENOENT; + + if (adreno_gpu_fault(adreno_dev) != 0) + return -EPROTO; + + rb->timestamp++; + + if (drawctxt) + drawctxt->internal_timestamp = rb->timestamp; + + /* All submissions are run with protected mode off due to APRIV */ + flags &= ~F_NOTPROTECTED; + + cmds = adreno_ringbuffer_allocspace(rb, size); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + /* Identify the start of a command */ + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER; + + /* This is 25 dwords when drawctxt is not NULL and perfcounter needs to be zapped*/ + index += gen8_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt, + &cmds[index]); + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BOTH; + + cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1); + cmds[index++] = 0x101; /* IFPC disable */ + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BR; + + profile_gpuaddr = adreno_profile_preib_processing(adreno_dev, + drawctxt, &profile_dwords); + + if (profile_gpuaddr) { + cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(profile_gpuaddr); + cmds[index++] = upper_32_bits(profile_gpuaddr); + cmds[index++] = profile_dwords; + } + + if (drawctxt) { + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = timestamp; + } + + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + + if (IS_SECURE(flags)) { + /* Sync BV and BR if entering secure mode */ + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SYNC_THREADS | CP_CONCURRENT_BIN_DISABLE; + cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1); + cmds[index++] = 1; + } + + memcpy(&cmds[index], in, dwords << 2); + index += dwords; + + profile_gpuaddr = adreno_profile_postib_processing(adreno_dev, + drawctxt, &dwords); + + if (profile_gpuaddr) { + cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(profile_gpuaddr); + cmds[index++] = upper_32_bits(profile_gpuaddr); + cmds[index++] = profile_dwords; + } + + if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &device->mmu.pfpolicy)) + cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0); + + if (is_concurrent_binning(drawctxt)) { + u64 addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_ts); + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BV; + + /* + * Make sure the timestamp is committed once BV pipe is + * completely done with this submission. + */ + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_CLEAN | BIT(27); + cmds[index++] = lower_32_bits(addr); + cmds[index++] = upper_32_bits(addr); + cmds[index++] = rb->timestamp; + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BR; + + /* + * This makes sure that BR doesn't race ahead and commit + * timestamp to memstore while BV is still processing + * this submission. + */ + cmds[index++] = cp_type7_packet(CP_WAIT_TIMESTAMP, 4); + cmds[index++] = 0; + cmds[index++] = lower_32_bits(addr); + cmds[index++] = upper_32_bits(addr); + cmds[index++] = rb->timestamp; + } + + /* + * If this is an internal command, just write the ringbuffer timestamp, + * otherwise, write both + */ + if (!drawctxt) { + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27); + cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + } else { + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27); + cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = timestamp; + + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_CLEAN | BIT(27); + cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + } + + if (IS_WFI(flags)) + cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + + if (IS_SECURE(flags)) { + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_CONCURRENT_BIN_DISABLE; + cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1); + cmds[index++] = 0; + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SYNC_THREADS; + } + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BOTH; + + cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1); + cmds[index++] = 0x100; /* IFPC enable */ + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BR; + + /* 10 dwords */ + index += gen8_preemption_post_ibsubmit(adreno_dev, &cmds[index]); + + /* Adjust the thing for the number of bytes we actually wrote */ + rb->_wptr -= (size - index); + + return gen8_ringbuffer_submit(rb, time); +} + +static u32 gen8_get_alwayson_counter(u32 *cmds, u64 gpuaddr) +{ + cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3); + cmds[1] = GEN8_CP_ALWAYS_ON_COUNTER_LO | (1 << 30) | (2 << 18); + cmds[2] = lower_32_bits(gpuaddr); + cmds[3] = upper_32_bits(gpuaddr); + + return 4; +} + +static u32 gen8_get_alwayson_context(u32 *cmds, u64 gpuaddr) +{ + cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3); + cmds[1] = GEN8_CP_ALWAYS_ON_CONTEXT_LO | (1 << 30) | (2 << 18); + cmds[2] = lower_32_bits(gpuaddr); + cmds[3] = upper_32_bits(gpuaddr); + + return 4; +} + +#define PROFILE_IB_DWORDS 4 +#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2)) + +static u64 gen8_get_user_profiling_ib(struct adreno_ringbuffer *rb, + struct kgsl_drawobj_cmd *cmdobj, u32 target_offset, u32 *cmds) +{ + u32 offset, *ib, dwords; + + if (IS_ERR(rb->profile_desc)) + return 0; + + offset = rb->profile_index * (PROFILE_IB_DWORDS << 2); + ib = rb->profile_desc->hostptr + offset; + dwords = gen8_get_alwayson_counter(ib, + cmdobj->profiling_buffer_gpuaddr + target_offset); + + cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[1] = lower_32_bits(rb->profile_desc->gpuaddr + offset); + cmds[2] = upper_32_bits(rb->profile_desc->gpuaddr + offset); + cmds[3] = dwords; + + rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS; + + return 4; +} + +static int gen8_drawctxt_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + if (rb->drawctxt_active == drawctxt) + return 0; + + if (kgsl_context_detached(&drawctxt->base)) + return -ENOENT; + + if (!_kgsl_context_get(&drawctxt->base)) + return -ENOENT; + + ret = gen8_rb_context_switch(adreno_dev, rb, drawctxt); + if (ret) { + kgsl_context_put(&drawctxt->base); + return ret; + } + + trace_adreno_drawctxt_switch(rb, drawctxt); + + /* Release the current drawctxt as soon as the new one is switched */ + adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active, + rb, rb->timestamp); + + rb->drawctxt_active = drawctxt; + return 0; +} + + +#define GEN8_USER_PROFILE_IB(rb, cmdobj, cmds, field) \ + gen8_get_user_profiling_ib((rb), (cmdobj), \ + offsetof(struct kgsl_drawobj_profiling_buffer, field), \ + (cmds)) + +#define GEN8_KERNEL_PROFILE(dev, cmdobj, cmds, field) \ + gen8_get_alwayson_counter((cmds), \ + (dev)->profile_buffer->gpuaddr + \ + ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \ + field)) + +#define GEN8_KERNEL_PROFILE_CONTEXT(dev, cmdobj, cmds, field) \ + gen8_get_alwayson_context((cmds), \ + (dev)->profile_buffer->gpuaddr + \ + ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \ + field)) + +#define GEN8_COMMAND_DWORDS 60 + +int gen8_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + struct adreno_ringbuffer *rb = drawctxt->rb; + int ret = 0, numibs = 0, index = 0; + u32 *cmds; + + /* Count the number of IBs (if we are not skipping) */ + if (!IS_SKIP(flags)) { + struct list_head *tmp; + + list_for_each(tmp, &cmdobj->cmdlist) + numibs++; + } + + cmds = kvmalloc((GEN8_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL); + if (!cmds) { + ret = -ENOMEM; + goto done; + } + + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = START_IB_IDENTIFIER; + + /* Kernel profiling: 8 dwords */ + if (IS_KERNEL_PROFILE(flags)) { + index += GEN8_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index], + started); + index += GEN8_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index], + ctx_start); + } + + /* User profiling: 4 dwords */ + if (IS_USER_PROFILE(flags)) + index += GEN8_USER_PROFILE_IB(rb, cmdobj, &cmds[index], + gpu_ticks_submitted); + + if (is_concurrent_binning(drawctxt)) { + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BOTH; + } + if (numibs) { + struct kgsl_memobj_node *ib; + + cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1); + cmds[index++] = 0x00d; /* IB1LIST start */ + + list_for_each_entry(ib, &cmdobj->cmdlist, node) { + if (ib->priv & MEMOBJ_SKIP || + (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE && + !IS_PREAMBLE(flags))) + cmds[index++] = cp_type7_packet(CP_NOP, 4); + + cmds[index++] = + cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(ib->gpuaddr); + cmds[index++] = upper_32_bits(ib->gpuaddr); + + /* Double check that IB_PRIV is never set */ + cmds[index++] = (ib->size >> 2) & 0xfffff; + } + + cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1); + cmds[index++] = 0x00e; /* IB1LIST end */ + } + + if (is_concurrent_binning(drawctxt)) { + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BR; + } + /* CCU invalidate depth */ + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1); + cmds[index++] = 24; + + /* CCU invalidate color */ + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1); + cmds[index++] = 25; + + /* 8 dwords */ + if (IS_KERNEL_PROFILE(flags)) { + index += GEN8_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index], + retired); + index += GEN8_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index], + ctx_end); + } + + /* 4 dwords */ + if (IS_USER_PROFILE(flags)) + index += GEN8_USER_PROFILE_IB(rb, cmdobj, &cmds[index], + gpu_ticks_retired); + + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = END_IB_IDENTIFIER; + + ret = gen8_drawctxt_switch(adreno_dev, rb, drawctxt); + + /* + * In the unlikely event of an error in the drawctxt switch, + * treat it like a hang + */ + if (ret) { + /* + * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it, + * the upper layers know how to handle it + */ + if (ret != -ENOSPC && ret != -ENOENT) + dev_err(device->dev, + "Unable to switch draw context: %d\n", ret); + goto done; + } + + adreno_drawobj_set_constraint(device, drawobj); + + ret = gen8_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt, + flags, cmds, index, drawobj->timestamp, time); + +done: + trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs, + drawobj->timestamp, drawobj->flags, ret, drawctxt->type); + + kvfree(cmds); + return ret; +} diff --git a/adreno_gen8_rpmh.c b/adreno_gen8_rpmh.c new file mode 100644 index 0000000000..82927a76b4 --- /dev/null +++ b/adreno_gen8_rpmh.c @@ -0,0 +1,527 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include +#include +#include + +#include "adreno.h" +#include "adreno_gen8.h" +#include "kgsl_bus.h" +#include "kgsl_device.h" + +struct rpmh_arc_vals { + u32 num; + const u16 *val; +}; + +struct bcm { + const char *name; + u32 buswidth; + u32 channels; + u32 unit; + u16 width; + u8 vcd; + bool fixed; +}; + +struct bcm_data { + __le32 unit; + __le16 width; + u8 vcd; + u8 reserved; +}; + +struct rpmh_bw_votes { + u32 wait_bitmask; + u32 num_cmds; + u32 *addrs; + u32 num_levels; + u32 **cmds; +}; + +#define ARC_VOTE_SET(pri, sec, vlvl) \ + (FIELD_PREP(GENMASK(31, 16), vlvl) | \ + FIELD_PREP(GENMASK(15, 8), sec) | \ + FIELD_PREP(GENMASK(7, 0), pri)) + +static int rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id) +{ + size_t len = 0; + + arc->val = cmd_db_read_aux_data(res_id, &len); + + /* + * cmd_db_read_aux_data() gives us a zero-padded table of + * size len that contains the arc values. To determine the + * number of arc values, we loop through the table and count + * them until we get to the end of the buffer or hit the + * zero padding. + */ + for (arc->num = 1; arc->num < (len >> 1); arc->num++) { + if (arc->val[arc->num - 1] != 0 && arc->val[arc->num] == 0) + break; + } + + return 0; +} + +static int setup_volt_dependency_tbl(u32 *votes, + struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, + u16 *vlvl, u32 num_entries) +{ + int i, j, k; + uint16_t cur_vlvl; + bool found_match; + + /* i tracks current KGSL GPU frequency table entry + * j tracks secondary rail voltage table entry + * k tracks primary rail voltage table entry + */ + for (i = 0; i < num_entries; i++) { + found_match = false; + + /* Look for a primary rail voltage that matches a VLVL level */ + for (k = 0; k < pri_rail->num; k++) { + if (pri_rail->val[k] >= vlvl[i]) { + cur_vlvl = pri_rail->val[k]; + found_match = true; + break; + } + } + + /* If we did not find a matching VLVL level then abort */ + if (!found_match) + return -EINVAL; + + /* + * Look for a secondary rail index whose VLVL value + * is greater than or equal to the VLVL value of the + * corresponding index of the primary rail + */ + for (j = 0; j < sec_rail->num; j++) { + if (sec_rail->val[j] >= cur_vlvl || + j + 1 == sec_rail->num) + break; + } + + if (j == sec_rail->num) + j = 0; + + votes[i] = ARC_VOTE_SET(k, j, cur_vlvl); + } + + return 0; +} + +/* Generate a set of bandwidth votes for the list of BCMs */ +static void tcs_cmd_data(struct bcm *bcms, int count, + u32 ab, u32 ib, u32 *data, u32 perfmode_vote, bool set_perfmode) +{ + int i; + + for (i = 0; i < count; i++) { + bool valid = true; + bool commit = false; + u64 avg, peak, x, y; + + if (i == count - 1 || bcms[i].vcd != bcms[i + 1].vcd) + commit = true; + + if (bcms[i].fixed) { + if (!ab && !ib) + data[i] = BCM_TCS_CMD(commit, false, 0x0, 0x0); + else + data[i] = BCM_TCS_CMD(commit, true, 0x0, + set_perfmode ? perfmode_vote : 0x0); + continue; + } + + /* Multiple the bandwidth by the width of the connection */ + avg = ((u64) ab) * bcms[i].width; + + /* And then divide by the total width */ + do_div(avg, bcms[i].buswidth); + + peak = ((u64) ib) * bcms[i].width; + do_div(peak, bcms[i].buswidth); + + /* Input bandwidth value is in KBps */ + x = avg * 1000ULL; + do_div(x, bcms[i].unit); + + /* Input bandwidth value is in KBps */ + y = peak * 1000ULL; + do_div(y, bcms[i].unit); + + /* + * If a bandwidth value was specified but the calculation ends + * rounding down to zero, set a minimum level + */ + if (ab && x == 0) + x = 1; + + if (ib && y == 0) + y = 1; + + x = min_t(u64, x, BCM_TCS_CMD_VOTE_MASK); + y = min_t(u64, y, BCM_TCS_CMD_VOTE_MASK); + + if (!x && !y) + valid = false; + + data[i] = BCM_TCS_CMD(commit, valid, x, y); + } +} + +static void free_rpmh_bw_votes(struct rpmh_bw_votes *votes) +{ + int i; + + if (!votes) + return; + + for (i = 0; votes->cmds && i < votes->num_levels; i++) + kfree(votes->cmds[i]); + + kfree(votes->cmds); + kfree(votes->addrs); + kfree(votes); +} + +/* Build the votes table from the specified bandwidth levels */ +static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms, + int bcm_count, u32 *levels, int levels_count, u32 perfmode_vote, u32 perfmode_lvl) +{ + struct rpmh_bw_votes *votes; + bool set_perfmode; + int i; + + votes = kzalloc(sizeof(*votes), GFP_KERNEL); + if (!votes) + return ERR_PTR(-ENOMEM); + + votes->addrs = kcalloc(bcm_count, sizeof(*votes->cmds), GFP_KERNEL); + if (!votes->addrs) { + free_rpmh_bw_votes(votes); + return ERR_PTR(-ENOMEM); + } + + votes->cmds = kcalloc(levels_count, sizeof(*votes->cmds), GFP_KERNEL); + if (!votes->cmds) { + free_rpmh_bw_votes(votes); + return ERR_PTR(-ENOMEM); + } + + votes->num_cmds = bcm_count; + votes->num_levels = levels_count; + + /* Get the cmd-db information for each BCM */ + for (i = 0; i < bcm_count; i++) { + size_t l; + const struct bcm_data *data; + + data = cmd_db_read_aux_data(bcms[i].name, &l); + + votes->addrs[i] = cmd_db_read_addr(bcms[i].name); + + bcms[i].unit = le32_to_cpu(data->unit); + bcms[i].width = le16_to_cpu(data->width); + bcms[i].vcd = data->vcd; + } + + for (i = 0; i < bcm_count; i++) { + if (i == (bcm_count - 1) || bcms[i].vcd != bcms[i + 1].vcd) + votes->wait_bitmask |= (1 << i); + } + + for (i = 0; i < levels_count; i++) { + votes->cmds[i] = kcalloc(bcm_count, sizeof(u32), GFP_KERNEL); + if (!votes->cmds[i]) { + free_rpmh_bw_votes(votes); + return ERR_PTR(-ENOMEM); + } + + set_perfmode = (i >= perfmode_lvl) ? true : false; + tcs_cmd_data(bcms, bcm_count, levels[i], levels[i], votes->cmds[i], + perfmode_vote, set_perfmode); + } + + return votes; +} + +/* + * setup_gmu_arc_votes - Build the gmu voting table + * @hfi: Pointer to hfi device + * @pri_rail: Pointer to primary power rail vlvl table + * @sec_rail: Pointer to second/dependent power rail vlvl table + * @freqs: List of GMU frequencies + * @vlvls: List of GMU voltage levels + * + * This function initializes the cx votes for all gmu frequencies + * for gmu dcvs + */ +static int setup_cx_arc_votes(struct gen8_hfi *hfi, + struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, + u32 *freqs, u32 *vlvls) +{ + /* Hardcoded values of GMU CX voltage levels */ + u16 gmu_cx_vlvl[MAX_CX_LEVELS]; + u32 cx_votes[MAX_CX_LEVELS]; + struct hfi_dcvstable_cmd *table = &hfi->dcvs_table; + int ret, i; + + gmu_cx_vlvl[0] = 0; + gmu_cx_vlvl[1] = vlvls[0]; + gmu_cx_vlvl[2] = vlvls[1]; + + table->gmu_level_num = 3; + + table->cx_votes[0].freq = 0; + table->cx_votes[1].freq = freqs[0] / 1000; + table->cx_votes[2].freq = freqs[1] / 1000; + + ret = setup_volt_dependency_tbl(cx_votes, pri_rail, + sec_rail, gmu_cx_vlvl, table->gmu_level_num); + if (!ret) { + for (i = 0; i < table->gmu_level_num; i++) + table->cx_votes[i].vote = cx_votes[i]; + } + + return ret; +} + +static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl) +{ + u32 i; + + /* + * This means that the Gx level doesn't have a dependency on Cx level. + * Return the same value to disable cx voting at GMU. + */ + if (vlvl == 0xffffffff) { + *hlvl = vlvl; + return 0; + } + + for (i = 0; i < cx_rail->num; i++) { + if (cx_rail->val[i] >= vlvl) { + *hlvl = i; + return 0; + } + } + + return -EINVAL; +} + +/* + * setup_gx_arc_votes - Build the gpu dcvs voting table + * @hfi: Pointer to hfi device + * @pri_rail: Pointer to primary power rail vlvl table + * @sec_rail: Pointer to second/dependent power rail vlvl table + * + * This function initializes the gx votes for all gpu frequencies + * for gpu dcvs + */ +static int setup_gx_arc_votes(struct adreno_device *adreno_dev, + struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, + struct rpmh_arc_vals *cx_rail) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + u32 index; + u16 vlvl_tbl[MAX_GX_LEVELS]; + u32 gx_votes[MAX_GX_LEVELS]; + int ret, i; + + /* Add the zero powerlevel for the perf table */ + table->gpu_level_num = device->pwrctrl.num_pwrlevels + 1; + + if (table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) { + dev_err(&gmu->pdev->dev, + "Defined more GPU DCVS levels than RPMh can support\n"); + return -ERANGE; + } + + memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); + + table->gx_votes[0].freq = 0; + table->gx_votes[0].cx_vote = 0; + /* Disable cx vote in gmu dcvs table if it is not supported in DT */ + if (pwr->pwrlevels[0].cx_level == 0xffffffff) + table->gx_votes[0].cx_vote = 0xffffffff; + + /* GMU power levels are in ascending order */ + for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) { + u32 cx_vlvl = pwr->pwrlevels[i].cx_level; + + vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level; + table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000; + + ret = to_cx_hlvl(cx_rail, cx_vlvl, + &table->gx_votes[index].cx_vote); + if (ret) { + dev_err(&gmu->pdev->dev, "Unsupported cx corner: %u\n", + cx_vlvl); + return ret; + } + } + + ret = setup_volt_dependency_tbl(gx_votes, pri_rail, + sec_rail, vlvl_tbl, table->gpu_level_num); + if (!ret) { + for (i = 0; i < table->gpu_level_num; i++) + table->gx_votes[i].vote = gx_votes[i]; + } + + return ret; + +} + +static int build_dcvs_table(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hfi *hfi = &gmu->hfi; + struct rpmh_arc_vals gx_arc, cx_arc, mx_arc; + int ret; + + ret = CMD_MSG_HDR(hfi->dcvs_table, H2F_MSG_PERF_TBL); + if (ret) + return ret; + + ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl"); + if (ret) + return ret; + + ret = rpmh_arc_cmds(&cx_arc, "cx.lvl"); + if (ret) + return ret; + + ret = rpmh_arc_cmds(&mx_arc, "mx.lvl"); + if (ret) + return ret; + + ret = setup_cx_arc_votes(hfi, &cx_arc, &mx_arc, + gmu->freqs, gmu->vlvls); + if (ret) + return ret; + + return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &cx_arc); +} + +/* + * List of Bus Control Modules (BCMs) that need to be configured for the GPU + * to access DDR. For each bus level we will generate a vote each BC + */ +static struct bcm gen8_ddr_bcms[] = { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { .name = "ACV", .fixed = true }, +}; + +/* Same as above, but for the CNOC BCMs */ +static struct bcm gen8_cnoc_bcms[] = { + { .name = "CN0", .buswidth = 4 }, +}; + +static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd, + struct rpmh_bw_votes *ddr, struct rpmh_bw_votes *cnoc) +{ + u32 i, j; + + cmd->bw_level_num = ddr->num_levels; + cmd->ddr_cmds_num = ddr->num_cmds; + cmd->ddr_wait_bitmask = ddr->wait_bitmask; + + for (i = 0; i < ddr->num_cmds; i++) + cmd->ddr_cmd_addrs[i] = ddr->addrs[i]; + + for (i = 0; i < ddr->num_levels; i++) + for (j = 0; j < ddr->num_cmds; j++) + cmd->ddr_cmd_data[i][j] = (u32) ddr->cmds[i][j]; + + if (!cnoc) + return; + + cmd->cnoc_cmds_num = cnoc->num_cmds; + cmd->cnoc_wait_bitmask = cnoc->wait_bitmask; + + for (i = 0; i < cnoc->num_cmds; i++) + cmd->cnoc_cmd_addrs[i] = cnoc->addrs[i]; + + for (i = 0; i < cnoc->num_levels; i++) + for (j = 0; j < cnoc->num_cmds; j++) + cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j]; +} + +#define GEN8_DDR_NOM_IDX 6 + +static int build_bw_table(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct rpmh_bw_votes *ddr, *cnoc = NULL; + u32 perfmode_vote = gen8_core->acv_perfmode_vote; + u32 perfmode_lvl = GEN8_DDR_NOM_IDX; + u32 *cnoc_table; + u32 count; + int ret; + + /* If perfmode vote is not defined, use default value as 0x8 */ + if (!perfmode_vote) + perfmode_vote = BIT(3); + + ddr = build_rpmh_bw_votes(gen8_ddr_bcms, ARRAY_SIZE(gen8_ddr_bcms), + pwr->ddr_table, pwr->ddr_table_count, perfmode_vote, perfmode_lvl); + if (IS_ERR(ddr)) + return PTR_ERR(ddr); + + cnoc_table = kgsl_bus_get_table(device->pdev, "qcom,bus-table-cnoc", + &count); + + if (count > 0) + cnoc = build_rpmh_bw_votes(gen8_cnoc_bcms, + ARRAY_SIZE(gen8_cnoc_bcms), cnoc_table, count, 0, 0); + + kfree(cnoc_table); + + if (IS_ERR(cnoc)) { + free_rpmh_bw_votes(ddr); + return PTR_ERR(cnoc); + } + + ret = CMD_MSG_HDR(gmu->hfi.bw_table, H2F_MSG_BW_VOTE_TBL); + if (ret) + return ret; + + build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc); + + free_rpmh_bw_votes(ddr); + free_rpmh_bw_votes(cnoc); + + return 0; +} + +int gen8_build_rpmh_tables(struct adreno_device *adreno_dev) +{ + int ret; + + ret = build_dcvs_table(adreno_dev); + if (ret) { + dev_err(adreno_dev->dev.dev, "Failed to build dcvs table\n"); + return ret; + } + + ret = build_bw_table(adreno_dev); + if (ret) + dev_err(adreno_dev->dev.dev, "Failed to build bw table\n"); + + return ret; +} diff --git a/adreno_hfi.h b/adreno_hfi.h index 293979ba29..e12eea2444 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -95,6 +95,9 @@ enum hfi_table_type { /* A6xx uses a different value for KPROF */ #define HFI_FEATURE_A6XX_KPROF 14 +/* For Gen7 & Gen8 ACD */ +#define F_PWR_ACD_CALIBRATE 78 + #define HFI_VALUE_FT_POLICY 100 #define HFI_VALUE_RB_MAX_CMDS 101 #define HFI_VALUE_CTX_MAX_CMDS 102 diff --git a/adreno_trace.h b/adreno_trace.h index 5f8dc88bf7..62ee64f626 100644 --- a/adreno_trace.h +++ b/adreno_trace.h @@ -17,6 +17,7 @@ #include #include "adreno_a5xx.h" #include "adreno_gen7.h" +#include "adreno_gen8.h" #include "adreno_hfi.h" #define ADRENO_FT_TYPES \ @@ -722,6 +723,70 @@ TRACE_EVENT(kgsl_gen7_irq_status, ) ); +/* + * Tracepoint for gen8 irq. Includes status info + */ +TRACE_EVENT(kgsl_gen8_irq_status, + + TP_PROTO(struct adreno_device *adreno_dev, u32 status), + + TP_ARGS(adreno_dev, status), + + TP_STRUCT__entry( + __string(device_name, adreno_dev->dev.name) + __field(u32, status) + ), + + TP_fast_assign( + __assign_str(device_name, adreno_dev->dev.name); + __entry->status = status; + ), + + TP_printk( + "d_name=%s status=%s", + __get_str(device_name), + __entry->status ? __print_flags(__entry->status, "|", + { BIT(GEN8_INT_GPUIDLE), "GPUIDLE" }, + { BIT(GEN8_INT_AHBERROR), "AHBERROR" }, + { BIT(GEN8_INT_CPIPCINT0), "CPIPCINT0" }, + { BIT(GEN8_INT_CPIPCINT1), "CPIPCINT1" }, + { BIT(GEN8_INT_ATBASYNCFIFOOVERFLOW), + "ATBASYNCFIFOOVERFLOW" }, + { BIT(GEN8_INT_GPCERROR), "GPCERROR" }, + { BIT(GEN8_INT_SWINTERRUPT), "SWINTERRUPT" }, + { BIT(GEN8_INT_HWERROR), "HWERROR" }, + { BIT(GEN8_INT_CCU_CLEAN_DEPTH_TS), + "CCU_CLEAN_DEPTH_TS" }, + { BIT(GEN8_INT_CCU_CLEAN_COLOR_TS), + "CCU_CLEAN_COLOR_TS" }, + { BIT(GEN8_INT_CCU_RESOLVE_CLEAN_TS), + "CCU_RESOLVE_CLEAN_TS" }, + { BIT(GEN8_INT_PM4CPINTERRUPT), "PM4CPINTERRUPT" }, + { BIT(GEN8_INT_PM4CPINTERRUPTLPAC), + "PM4CPINTERRUPTLPAC" }, + { BIT(GEN8_INT_RB_DONE_TS), "RB_DONE_TS" }, + { BIT(GEN8_INT_CACHE_CLEAN_TS), "CACHE_CLEAN_TS" }, + { BIT(GEN8_INT_CACHE_CLEAN_TS_LPAC), + "CACHE_CLEAN_TS_LPAC" }, + { BIT(GEN8_INT_ATBBUSOVERFLOW), "ATBBUSOVERFLOW" }, + { BIT(GEN8_INT_HANGDETECTINTERRUPT), + "HANGDETECTINTERRUPT" }, + { BIT(GEN8_INT_OUTOFBOUNDACCESS), + "OUTOFBOUNDACCESS" }, + { BIT(GEN8_INT_UCHETRAPINTERRUPT), + "UCHETRAPINTERRUPT" }, + { BIT(GEN8_INT_DEBUGBUSINTERRUPT0), + "DEBUGBUSINTERRUPT0" }, + { BIT(GEN8_INT_DEBUGBUSINTERRUPT1), + "DEBUGBUSINTERRUPT1" }, + { BIT(GEN8_INT_TSBWRITEERROR), "TSBWRITEERROR" }, + { BIT(GEN8_INT_SWFUSEVIOLATION), "SWFUSEVIOLATION" }, + { BIT(GEN8_INT_ISDBCPUIRQ), "ISDBCPUIRQ" }, + { BIT(GEN8_INT_ISDBUNDERDEBUG), "ISDBUNDERDEBUG" }) + : "None" + ) +); + DECLARE_EVENT_CLASS(adreno_hw_preempt_template, TP_PROTO(struct adreno_ringbuffer *cur_rb, struct adreno_ringbuffer *new_rb, diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index b68d10f814..709a25f6b2 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -41,6 +41,14 @@ def kgsl_get_srcs(): "adreno_gen7_ringbuffer.c", "adreno_gen7_rpmh.c", "adreno_gen7_snapshot.c", + "adreno_gen8.c", + "adreno_gen8_gmu.c", + "adreno_gen8_hfi.c", + "adreno_gen8_hwsched.c", + "adreno_gen8_hwsched_hfi.c", + "adreno_gen8_preempt.c", + "adreno_gen8_ringbuffer.c", + "adreno_gen8_rpmh.c", "adreno_hwsched.c", "adreno_ioctl.c", "adreno_perfcounter.c", diff --git a/gen8_reg.h b/gen8_reg.h new file mode 100644 index 0000000000..9ed1f9a436 --- /dev/null +++ b/gen8_reg.h @@ -0,0 +1,1196 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _GEN8_REG_H +#define _GEN8_REG_H + +/* GEN8 interrupt bits */ +#define GEN8_INT_GPUIDLE 0 +#define GEN8_INT_AHBERROR 1 +#define GEN8_INT_CPIPCINT0 4 +#define GEN8_INT_CPIPCINT1 5 +#define GEN8_INT_ATBASYNCFIFOOVERFLOW 6 +#define GEN8_INT_GPCERROR 7 +#define GEN8_INT_SWINTERRUPT 8 +#define GEN8_INT_HWERROR 9 +#define GEN8_INT_CCU_CLEAN_DEPTH_TS 10 +#define GEN8_INT_CCU_CLEAN_COLOR_TS 11 +#define GEN8_INT_CCU_RESOLVE_CLEAN_TS 12 +#define GEN8_INT_PM4CPINTERRUPT 15 +#define GEN8_INT_PM4CPINTERRUPTLPAC 16 +#define GEN8_INT_RB_DONE_TS 17 +#define GEN8_INT_CACHE_CLEAN_TS 20 +#define GEN8_INT_CACHE_CLEAN_TS_LPAC 21 +#define GEN8_INT_ATBBUSOVERFLOW 22 +#define GEN8_INT_HANGDETECTINTERRUPT 23 +#define GEN8_INT_OUTOFBOUNDACCESS 24 +#define GEN8_INT_UCHETRAPINTERRUPT 25 +#define GEN8_INT_DEBUGBUSINTERRUPT0 26 +#define GEN8_INT_DEBUGBUSINTERRUPT1 27 +#define GEN8_INT_TSBWRITEERROR 28 +#define GEN8_INT_SWFUSEVIOLATION 29 +#define GEN8_INT_ISDBCPUIRQ 30 +#define GEN8_INT_ISDBUNDERDEBUG 31 + +/* RBBM registers */ +#define GEN8_RBBM_GBIF_CLIENT_QOS_CNTL 0x008 +#define GEN8_RBBM_GBIF_HALT 0x00a +#define GEN8_RBBM_GBIF_HALT_ACK 0x00b +#define GEN8_RBBM_STATUS 0x012 +#define GEN8_RBBM_STATUS1 0x013 +#define GEN8_RBBM_GFX_STATUS 0x015 +#define GEN8_RBBM_GFX_STATUS1 0x016 +#define GEN8_RBBM_LPAC_STATUS 0x018 +#define GEN8_RBBM_GFX_BR_STATUS 0x01a +#define GEN8_RBBM_GFX_BV_STATUS 0x01c +#define GEN8_RBBM_ISDB_CNT 0x02d +#define GEN8_RBBM_SNAPSHOT_STATUS 0x02e +#define GEN8_RBBM_INTERFACE_HANG_INT_CNTL 0x02f +#define GEN8_RBBM_INT_CLEAR_CMD 0x061 +#define GEN8_RBBM_INT_0_MASK 0x062 +#define GEN8_RBBM_INT_2_MASK 0x064 +#define GEN8_RBBM_INT_0_STATUS 0x06a +#define GEN8_RBBM_SW_FUSE_INT_STATUS 0x071 +#define GEN8_RBBM_SW_FUSE_INT_MASK 0x072 +#define GEN8_RBBM_SW_RESET_CMD 0x073 +#define GEN8_RBBM_CLOCK_CNTL_GLOBAL 0x09a +#define GEN8_RBBM_CGC_GLOBAL_LOAD_CMD 0x09b +#define GEN8_RBBM_CGC_P2S_TRIG_CMD 0x09c +#define GEN8_RBBM_CGC_P2S_STATUS 0x09f + +#define GEN8_RBBM_CGC_0_PC 0x10b +#define GEN8_RBBM_PERFCTR_GPU_BUSY_MASKED 0x19e +#define GEN8_RBBM_PERFCTR_SRAM_INIT_STATUS 0x19f +#define GEN8_RBBM_PERFCTR_CP_0_LO 0x1b0 +#define GEN8_RBBM_PERFCTR_CP_0_HI 0x1b1 +#define GEN8_RBBM_PERFCTR_CP_1_LO 0x1b2 +#define GEN8_RBBM_PERFCTR_CP_1_HI 0x1b3 +#define GEN8_RBBM_PERFCTR_CP_2_LO 0x1b4 +#define GEN8_RBBM_PERFCTR_CP_2_HI 0x1b5 +#define GEN8_RBBM_PERFCTR_CP_3_LO 0x1b6 +#define GEN8_RBBM_PERFCTR_CP_3_HI 0x1b7 +#define GEN8_RBBM_PERFCTR_CP_4_LO 0x1b8 +#define GEN8_RBBM_PERFCTR_CP_4_HI 0x1b9 +#define GEN8_RBBM_PERFCTR_CP_5_LO 0x1ba +#define GEN8_RBBM_PERFCTR_CP_5_HI 0x1bb +#define GEN8_RBBM_PERFCTR_CP_6_LO 0x1bc +#define GEN8_RBBM_PERFCTR_CP_6_HI 0x1bd +#define GEN8_RBBM_PERFCTR_CP_7_LO 0x1be +#define GEN8_RBBM_PERFCTR_CP_7_HI 0x1bf +#define GEN8_RBBM_PERFCTR_CP_8_LO 0x1c0 +#define GEN8_RBBM_PERFCTR_CP_8_HI 0x1c1 +#define GEN8_RBBM_PERFCTR_CP_9_LO 0x1c2 +#define GEN8_RBBM_PERFCTR_CP_9_HI 0x1c3 +#define GEN8_RBBM_PERFCTR_CP_10_LO 0x1c4 +#define GEN8_RBBM_PERFCTR_CP_10_HI 0x1c5 +#define GEN8_RBBM_PERFCTR_CP_11_LO 0x1c6 +#define GEN8_RBBM_PERFCTR_CP_11_HI 0x1c7 +#define GEN8_RBBM_PERFCTR_CP_12_LO 0x1c8 +#define GEN8_RBBM_PERFCTR_CP_12_HI 0x1c9 +#define GEN8_RBBM_PERFCTR_CP_13_LO 0x1ca +#define GEN8_RBBM_PERFCTR_CP_13_HI 0x1cb +#define GEN8_RBBM_PERFCTR_RBBM_0_LO 0x1cc +#define GEN8_RBBM_PERFCTR_RBBM_0_HI 0x1cd +#define GEN8_RBBM_PERFCTR_RBBM_1_LO 0x1ce +#define GEN8_RBBM_PERFCTR_RBBM_1_HI 0x1cf +#define GEN8_RBBM_PERFCTR_RBBM_2_LO 0x1d0 +#define GEN8_RBBM_PERFCTR_RBBM_2_HI 0x1d1 +#define GEN8_RBBM_PERFCTR_RBBM_3_LO 0x1d2 +#define GEN8_RBBM_PERFCTR_RBBM_3_HI 0x1d3 +#define GEN8_RBBM_PERFCTR_PC_0_LO 0x1d4 +#define GEN8_RBBM_PERFCTR_PC_0_HI 0x1d5 +#define GEN8_RBBM_PERFCTR_PC_1_LO 0x1d6 +#define GEN8_RBBM_PERFCTR_PC_1_HI 0x1d7 +#define GEN8_RBBM_PERFCTR_PC_2_LO 0x1d8 +#define GEN8_RBBM_PERFCTR_PC_2_HI 0x1d9 +#define GEN8_RBBM_PERFCTR_PC_3_LO 0x1da +#define GEN8_RBBM_PERFCTR_PC_3_HI 0x1db +#define GEN8_RBBM_PERFCTR_PC_4_LO 0x1dc +#define GEN8_RBBM_PERFCTR_PC_4_HI 0x1dd +#define GEN8_RBBM_PERFCTR_PC_5_LO 0x1de +#define GEN8_RBBM_PERFCTR_PC_5_HI 0x1df +#define GEN8_RBBM_PERFCTR_PC_6_LO 0x1e0 +#define GEN8_RBBM_PERFCTR_PC_6_HI 0x1e1 +#define GEN8_RBBM_PERFCTR_PC_7_LO 0x1e2 +#define GEN8_RBBM_PERFCTR_PC_7_HI 0x1e3 +#define GEN8_RBBM_PERFCTR_VFD_0_LO 0x1e4 +#define GEN8_RBBM_PERFCTR_VFD_0_HI 0x1e5 +#define GEN8_RBBM_PERFCTR_VFD_1_LO 0x1e6 +#define GEN8_RBBM_PERFCTR_VFD_1_HI 0x1e7 +#define GEN8_RBBM_PERFCTR_VFD_2_LO 0x1e8 +#define GEN8_RBBM_PERFCTR_VFD_2_HI 0x1e9 +#define GEN8_RBBM_PERFCTR_VFD_3_LO 0x1ea +#define GEN8_RBBM_PERFCTR_VFD_3_HI 0x1eb +#define GEN8_RBBM_PERFCTR_VFD_4_LO 0x1ec +#define GEN8_RBBM_PERFCTR_VFD_4_HI 0x1ed +#define GEN8_RBBM_PERFCTR_VFD_5_LO 0x1ee +#define GEN8_RBBM_PERFCTR_VFD_5_HI 0x1ef +#define GEN8_RBBM_PERFCTR_VFD_6_LO 0x1f0 +#define GEN8_RBBM_PERFCTR_VFD_6_HI 0x1f1 +#define GEN8_RBBM_PERFCTR_VFD_7_LO 0x1f2 +#define GEN8_RBBM_PERFCTR_VFD_7_HI 0x1f3 +#define GEN8_RBBM_PERFCTR_HLSQ_0_LO 0x1f4 +#define GEN8_RBBM_PERFCTR_HLSQ_0_HI 0x1f5 +#define GEN8_RBBM_PERFCTR_HLSQ_1_LO 0x1f6 +#define GEN8_RBBM_PERFCTR_HLSQ_1_HI 0x1f7 +#define GEN8_RBBM_PERFCTR_HLSQ_2_LO 0x1f8 +#define GEN8_RBBM_PERFCTR_HLSQ_2_HI 0x1f9 +#define GEN8_RBBM_PERFCTR_HLSQ_3_LO 0x1fa +#define GEN8_RBBM_PERFCTR_HLSQ_3_HI 0x1fb +#define GEN8_RBBM_PERFCTR_HLSQ_4_LO 0x1fc +#define GEN8_RBBM_PERFCTR_HLSQ_4_HI 0x1fd +#define GEN8_RBBM_PERFCTR_HLSQ_5_LO 0x1fe +#define GEN8_RBBM_PERFCTR_HLSQ_5_HI 0x1ff +#define GEN8_RBBM_PERFCTR_VPC_0_LO 0x200 +#define GEN8_RBBM_PERFCTR_VPC_0_HI 0x201 +#define GEN8_RBBM_PERFCTR_VPC_1_LO 0x202 +#define GEN8_RBBM_PERFCTR_VPC_1_HI 0x203 +#define GEN8_RBBM_PERFCTR_VPC_2_LO 0x204 +#define GEN8_RBBM_PERFCTR_VPC_2_HI 0x205 +#define GEN8_RBBM_PERFCTR_VPC_3_LO 0x206 +#define GEN8_RBBM_PERFCTR_VPC_3_HI 0x207 +#define GEN8_RBBM_PERFCTR_VPC_4_LO 0x208 +#define GEN8_RBBM_PERFCTR_VPC_4_HI 0x209 +#define GEN8_RBBM_PERFCTR_VPC_5_LO 0x20a +#define GEN8_RBBM_PERFCTR_VPC_5_HI 0x20b +#define GEN8_RBBM_PERFCTR_CCU_0_LO 0x20c +#define GEN8_RBBM_PERFCTR_CCU_0_HI 0x20d +#define GEN8_RBBM_PERFCTR_CCU_1_LO 0x20e +#define GEN8_RBBM_PERFCTR_CCU_1_HI 0x20f +#define GEN8_RBBM_PERFCTR_CCU_2_LO 0x210 +#define GEN8_RBBM_PERFCTR_CCU_2_HI 0x211 +#define GEN8_RBBM_PERFCTR_CCU_3_LO 0x212 +#define GEN8_RBBM_PERFCTR_CCU_3_HI 0x213 +#define GEN8_RBBM_PERFCTR_CCU_4_LO 0x214 +#define GEN8_RBBM_PERFCTR_CCU_4_HI 0x215 +#define GEN8_RBBM_PERFCTR_TSE_0_LO 0x216 +#define GEN8_RBBM_PERFCTR_TSE_0_HI 0x217 +#define GEN8_RBBM_PERFCTR_TSE_1_LO 0x218 +#define GEN8_RBBM_PERFCTR_TSE_1_HI 0x219 +#define GEN8_RBBM_PERFCTR_TSE_2_LO 0x21a +#define GEN8_RBBM_PERFCTR_TSE_2_HI 0x21b +#define GEN8_RBBM_PERFCTR_TSE_3_LO 0x21c +#define GEN8_RBBM_PERFCTR_TSE_3_HI 0x21d +#define GEN8_RBBM_PERFCTR_RAS_0_LO 0x21e +#define GEN8_RBBM_PERFCTR_RAS_0_HI 0x21f +#define GEN8_RBBM_NC_MODE_CNTL 0x440 +#define GEN8_RBBM_PERFCTR_SRAM_INIT_CMD 0x449 +#define GEN8_RBBM_PERFCTR_CNTL 0x460 + +/* GPU Slice registers */ +#define GEN8_RBBM_SLICE_PERFCTR_CNTL 0x500 +#define GEN8_RBBM_SLICE_INTERFACE_HANG_INT_CNTL 0x58f +#define GEN8_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD 0x5e8 +#define GEN8_RBBM_SLICE_NC_MODE_CNTL 0x5ec +#define GEN8_VSC_BIN_SIZE 0xc02 + +/* DBGC_CFG registers */ +#define GEN8_DBGC_CFG_DBGBUS_SEL_A 0x600 +#define GEN8_DBGC_CFG_DBGBUS_SEL_B 0x601 +#define GEN8_DBGC_CFG_DBGBUS_SEL_C 0x602 +#define GEN8_DBGC_CFG_DBGBUS_SEL_D 0x603 +#define GEN8_DBGC_CFG_DBGBUS_CNTLT 0x604 +#define GEN8_DBGC_CFG_DBGBUS_CNTLM 0x605 +#define GEN8_DBGC_CFG_DBGBUS_OPL 0x606 +#define GEN8_DBGC_CFG_DBGBUS_OPE 0x607 +#define GEN8_DBGC_CFG_DBGBUS_IVTL_0 0x608 +#define GEN8_DBGC_CFG_DBGBUS_IVTL_1 0x609 +#define GEN8_DBGC_CFG_DBGBUS_IVTL_2 0x60a +#define GEN8_DBGC_CFG_DBGBUS_IVTL_3 0x60b +#define GEN8_DBGC_CFG_DBGBUS_MASKL_0 0x60c +#define GEN8_DBGC_CFG_DBGBUS_MASKL_1 0x60d +#define GEN8_DBGC_CFG_DBGBUS_MASKL_2 0x60e +#define GEN8_DBGC_CFG_DBGBUS_MASKL_3 0x60f +#define GEN8_DBGC_CFG_DBGBUS_BYTEL_0 0x610 +#define GEN8_DBGC_CFG_DBGBUS_BYTEL_1 0x611 +#define GEN8_DBGC_CFG_DBGBUS_IVTE_0 0x612 +#define GEN8_DBGC_CFG_DBGBUS_IVTE_1 0x613 +#define GEN8_DBGC_CFG_DBGBUS_IVTE_2 0x614 +#define GEN8_DBGC_CFG_DBGBUS_IVTE_3 0x615 +#define GEN8_DBGC_CFG_DBGBUS_MASKE_0 0x616 +#define GEN8_DBGC_CFG_DBGBUS_MASKE_1 0x617 +#define GEN8_DBGC_CFG_DBGBUS_MASKE_2 0x618 +#define GEN8_DBGC_CFG_DBGBUS_MASKE_3 0x619 +#define GEN8_DBGC_CFG_DBGBUS_NIBBLEE 0x61a +#define GEN8_DBGC_CFG_DBGBUS_PTRC0 0x61b +#define GEN8_DBGC_CFG_DBGBUS_PTRC1 0x61c +#define GEN8_DBGC_CFG_DBGBUS_LOADREG 0x61d +#define GEN8_DBGC_CFG_DBGBUS_IDX 0x61e +#define GEN8_DBGC_CFG_DBGBUS_CLRC 0x61f +#define GEN8_DBGC_CFG_DBGBUS_LOADIVT 0x620 +#define GEN8_DBGC_VBIF_DBG_CNTL 0x621 +#define GEN8_DBGC_DBG_LO_HI_GPIO 0x622 +#define GEN8_DBGC_EXT_TRACE_BUS_CNTL 0x623 +#define GEN8_DBGC_READ_AHB_THROUGH_DBG 0x624 +#define GEN8_DBGC_CFG_DBGBUS_EVENT_LOGIC 0x625 +#define GEN8_DBGC_CFG_DBGBUS_OVER 0x626 +#define GEN8_DBGC_CFG_DBGBUS_COUNT0 0x627 +#define GEN8_DBGC_CFG_DBGBUS_COUNT1 0x628 +#define GEN8_DBGC_CFG_DBGBUS_COUNT2 0x629 +#define GEN8_DBGC_CFG_DBGBUS_COUNT3 0x62a +#define GEN8_DBGC_CFG_DBGBUS_COUNT4 0x62b +#define GEN8_DBGC_CFG_DBGBUS_COUNT5 0x62c +#define GEN8_DBGC_CFG_DBGBUS_TRACE_ADDR 0x62d +#define GEN8_DBGC_CFG_DBGBUS_TRACE_BUF0 0x62e +#define GEN8_DBGC_CFG_DBGBUS_TRACE_BUF1 0x62f +#define GEN8_DBGC_CFG_DBGBUS_TRACE_BUF2 0x630 +#define GEN8_DBGC_CFG_DBGBUS_TRACE_BUF3 0x631 +#define GEN8_DBGC_CFG_DBGBUS_TRACE_BUF4 0x632 +#define GEN8_DBGC_CFG_DBGBUS_MISR0 0x633 +#define GEN8_DBGC_CFG_DBGBUS_MISR1 0x634 +#define GEN8_DBGC_EVT_CFG 0x635 +#define GEN8_DBGC_EVT_INTF_SEL_0 0x636 /* Indexed Register */ +#define GEN8_DBGC_EVT_INTF_SEL_1 0x637 /* Indexed Register */ +#define GEN8_DBGC_EVT_SLICE_CFG 0x638 +#define GEN8_DBGC_QDSS_TIMESTAMP_0 0x639 /* Indexed Register */ +#define GEN8_DBGC_QDSS_TIMESTAMP_1 0x63a /* Indexed Register */ +#define GEN8_DBGC_ECO_CNTL 0x63b +#define GEN8_DBGC_AHB_DBG_CNTL 0x63c +#define GEN8_DBGC_EVT_INTF_SEL_2 0x63d +#define GEN8_DBGC_CFG_DBGBUS_PONG_SEL_A 0x640 +#define GEN8_DBGC_CFG_DBGBUS_PONG_SEL_B 0x641 +#define GEN8_DBGC_CFG_DBGBUS_PONG_SEL_C 0x642 +#define GEN8_DBGC_CFG_DBGBUS_PONG_SEL_D 0x643 +#define GEN8_DBGC_CFG_DBGBUS_MISC_MODE 0x644 +#define GEN8_DBGC_EVT_INTF_SEL_3_0 0x650 /* Indexed Register */ +#define GEN8_DBGC_EVT_INTF_SEL_3_1 0x651 /* Indexed Register */ +#define GEN8_DBGC_EVT_INTF_SEL_3_2 0x652 /* Indexed Register */ +#define GEN8_DBGC_EVT_INTF_SEL_3_3 0x653 /* Indexed Register */ +#define GEN8_DBGC_EVT_INTF_SEL_3_4 0x654 /* Indexed Register */ +#define GEN8_DBGC_EVT_INTF_SEL_3_5 0x655 /* Indexed Register */ +#define GEN8_DBGC_TRACE_BUFFER_STATUS 0x660 +#define GEN8_DBGC_TRACE_BUFFER_CMD 0x661 +#define GEN8_DBGC_DBG_TRACE_BUFFER_RD_ADDR 0x662 +#define GEN8_DBGC_DBG_TRACE_BUFFER_RD_DATA 0x663 +#define GEN8_DBGC_TRACE_BUFFER_ATB_RD_STATUS 0x664 +#define GEN8_DBGC_SMMU_FAULT_BLOCK_HALT_CFG 0x665 +#define GEN8_DBGC_DBG_LOPC_SB_RD_ADDR 0x666 +#define GEN8_DBGC_DBG_LOPC_SB_RD_DATA 0x667 +#define GEN8_DBGC_DBG_LOPC_SB_WR_ADDR 0x668 +#define GEN8_DBGC_DBG_LOPC_SB_WR_DATA 0x669 +#define GEN8_DBGC_INTERRUPT_STATUS 0x66a +#define GEN8_DBGC_GBIF_DBG_BASE_LO 0x680 +#define GEN8_DBGC_GBIF_DBG_BASE_HI 0x681 +#define GEN8_DBGC_GBIF_DBG_BUFF_SIZE 0x682 +#define GEN8_DBGC_GBIF_DBG_CNTL 0x683 +#define GEN8_DBGC_GBIF_DBG_CMD 0x684 +#define GEN8_DBGC_GBIF_DBG_STATUS 0x685 +#define GEN8_DBGC_SCOPE_PERF_COUNTER_CFG_US 0x700 +#define GEN8_DBGC_CFG_PERF_TRIG_CLUSTER_FE_US 0x701 +#define GEN8_DBGC_CFG_PERF_TRIG_CLUSTER_VPC_US 0x702 +#define GEN8_DBGC_CFG_PERF_TRIG_CLUSTER_SP_VS_US 0x703 +#define GEN8_DBGC_CFG_PERF_TRIG_CLUSTER_SP_PS_US 0x704 +#define GEN8_DBGC_CFG_PERF_TRIG_CLUSTER_NONE_US 0x707 +#define GEN8_DBGC_CFG_BV_PERF_TRIG_CLUSTER_FE_US 0x708 +#define GEN8_DBGC_CFG_BV_PERF_TRIG_CLUSTER_VPC_US 0x709 +#define GEN8_DBGC_CFG_BV_PERF_TRIG_CLUSTER_SP_VS_US 0x70a +#define GEN8_DBGC_CFG_BV_PERF_TRIG_CLUSTER_NONE_US 0x70f +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_FE_US 0x710 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_FE_US_1 0x711 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_FE_US_2 0x712 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_VPC_US 0x713 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_VPC_US_1 0x714 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_SP_VS_US 0x715 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_SP_PS_US 0x716 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_NONE_US 0x720 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_NONE_US_1 0x721 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_FE_US 0x722 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_FE_US_1 0x723 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_FE_US_2 0x724 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_VPC_US 0x730 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_VPC_US_1 0x731 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_SP_VS_US 0x732 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_NONE_US 0x740 +#define GEN8_DBGC_CFG_PERF_TIMESTAMP_TRIG_SEL_US 0x742 +#define GEN8_DBGC_CFG_BV_PERF_TIMESTAMP_TRIG_SEL_US 0x743 +#define GEN8_DBGC_CFG_GBIF_BR_PERF_CNTR_BASE_LO 0x744 +#define GEN8_DBGC_CFG_GBIF_BR_PERF_CNTR_BASE_HI 0x745 +#define GEN8_DBGC_CFG_GBIF_BR_BUFFER_SIZE 0x746 +#define GEN8_DBGC_CFG_GBIF_BV_PERF_CNTR_BASE_LO 0x747 +#define GEN8_DBGC_CFG_GBIF_BV_PERF_CNTR_BASE_HI 0x748 +#define GEN8_DBGC_CFG_GBIF_BV_BUFFER_SIZE 0x749 +#define GEN8_DBGC_CFG_GBIF_QOS_CTRL 0x74a +#define GEN8_DBGC_GBIF_BR_PERF_CNTR_WRITE_POINTER 0x750 +#define GEN8_DBGC_GBIF_BV_PERF_CNTR_WRITE_POINTER 0x751 +#define GEN8_DBGC_PERF_COUNTER_FE_LOCAL_BATCH_ID 0x752 +#define GEN8_DBGC_CFG_PERF_WAIT_IDLE_CLOCKS_CNTL 0x753 +#define GEN8_DBGC_PERF_COUNTER_SCOPING_CMD_US 0x754 +#define GEN8_DBGC_PERF_SKEW_BUFFER_INIT_CMD 0x755 +#define GEN8_DBGC_LOPC_INTERRUPT_STATUS 0x759 +#define GEN8_DBGC_LOPC_BUFFER_PTR_STATUS 0x75a +#define GEN8_DBGC_PERF_SCOPING_STATUS 0x75b +#define GEN8_DBGC_PERF_COUNTER_PKT_STATUS 0x75c +#define GEN8_DBGC_GC_LIVE_MBX_PKT_STATUS 0x760 +#define GEN8_DBGC_GC_ALW_MBX_PKT_STATUS 0x761 +#define GEN8_DBGC_AO_CNTR_LO_STATUS 0x762 +#define GEN8_DBGC_AO_CNTR_HI_STATUS 0x763 +#define GEN8_DBGC_LOPC_GC_SB_DEPTH_STATUS 0x770 +#define GEN8_DBGC_LPAC_SCOPE_PERF_COUNTER_CFG_US 0x780 +#define GEN8_DBGC_CFG_PERF_TRIG_LPAC_US 0x781 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_LPAC_US 0x782 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_LPAC_US_1 0x783 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_LPAC_US_2 0x784 +#define GEN8_DBGC_CFG_PERF_TIMESTAMP_TRIG_SEL_LPAC_US 0x785 +#define GEN8_DBGC_CFG_GBIF_LPAC_PERF_CNTR_BASE_LO 0x786 +#define GEN8_DBGC_CFG_GBIF_LPAC_PERF_CNTR_BASE_HI 0x787 +#define GEN8_DBGC_CFG_GBIF_LPAC_BUFFER_SIZE 0x788 +#define GEN8_DBGC_GBIF_LPAC_PERF_CNTR_WRITE_POINTER 0x789 +#define GEN8_DBGC_CFG_LPAC_PERF_WAIT_IDLE_CLOCKS_CNTL 0x78a +#define GEN8_DBGC_LPAC_PERF_COUNTER_SCOPING_CMD_US 0x78b +#define GEN8_DBGC_LPAC_MBX_PKT_STATUS 0x78c +#define GEN8_DBGC_LPAC_PERF_SCOPING_STATUS 0x78d +#define GEN8_DBGC_LOPC_LPAC_SB_DEPTH_STATUS 0x790 +#define GEN8_DBGC_SCOPE_PERF_COUNTER_CFG_S 0x7a0 +#define GEN8_DBGC_CFG_PERF_TRIG_CLUSTER_FE_S 0x7a1 +#define GEN8_DBGC_CFG_PERF_TRIG_CLUSTER_SP_VS 0x7a2 +#define GEN8_DBGC_CFG_PERF_TRIG_CLUSTER_VPC_VS 0x7a3 +#define GEN8_DBGC_CFG_PERF_TRIG_CLUSTER_GRAS 0x7a4 +#define GEN8_DBGC_CFG_PERF_TRIG_CLUSTER_SP_PS 0x7a5 +#define GEN8_DBGC_CFG_PERF_TRIG_CLUSTER_VPC_PS 0x7a6 +#define GEN8_DBGC_CFG_PERF_TRIG_CLUSTER_PS 0x7a7 +#define GEN8_DBGC_CFG_BV_PERF_TRIG_CLUSTER_FE_S 0x7a8 +#define GEN8_DBGC_CFG_BV_PERF_TRIG_CLUSTER_SP_VS 0x7a9 +#define GEN8_DBGC_CFG_BV_PERF_TRIG_CLUSTER_VPC_VS 0x7aa +#define GEN8_DBGC_CFG_BV_PERF_TRIG_CLUSTER_GRAS 0x7ab +#define GEN8_DBGC_CFG_BV_PERF_TRIG_CLUSTER_VPC_PS 0x7ac +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_FE_S 0x7ad +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_FE_S_1 0x7ae +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_FE_S_2 0x7af +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_FE_S_3 0x7b0 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_SP_VS 0x7b1 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_SP_VS_1 0x7b2 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_SP_VS_2 0x7b3 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_SP_VS_3 0x7b4 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_VPC_VS 0x7b5 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_VPC_VS_1 0x7b6 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_GRAS 0x7b7 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_GRAS_1 0x7b8 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_GRAS_2 0x7b9 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_SP_PS 0x7ba +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_SP_PS_1 0x7bb +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_SP_PS_2 0x7bc +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_SP_PS_3 0x7bd +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_VPC_PS 0x7be +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_VPC_PS_1 0x7bf +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_PS 0x7c0 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_PS_1 0x7c1 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_PS_2 0x7c2 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_PS_3 0x7c3 +#define GEN8_DBGC_CFG_PERF_TIMESTAMP_TRIG_SEL_S 0x7c4 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_FE_S 0x7c5 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_FE_S_1 0x7c6 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_FE_S_2 0x7c7 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_FE_S_3 0x7c8 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_SP_VS 0x7c9 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_SP_VS_1 0x7ca +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_SP_VS_2 0x7cb +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_SP_VS_3 0x7cc +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_VPC_VS 0x7cd +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_VPC_VS_1 0x7ce +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_GRAS 0x7cf +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_GRAS_1 0x7d0 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_GRAS_2 0x7d1 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_VPC_PS 0x7d2 +#define GEN8_DBGC_CFG_BV_PERF_COUNTER_SEL_VPC_PS_1 0x7d3 +#define GEN8_DBGC_CFG_BV_PERF_TIMESTAMP_TRIG_SEL_S 0x7d4 +#define GEN8_DBGC_PERF_COUNTER_SCOPING_CMD_S 0x7d5 +#define GEN8_DBGC_LPAC_SCOPE_PERF_COUNTER_CFG_S 0x7e0 +#define GEN8_DBGC_CFG_PERF_TRIG_LPAC_S 0x7e1 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_LPAC_S 0x7e2 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_LPAC_S_1 0x7e3 +#define GEN8_DBGC_CFG_PERF_COUNTER_SEL_LPAC_S_2 0x7e4 +#define GEN8_DBGC_CFG_PERF_TIMESTAMP_TRIG_SEL_LPAC_S 0x7e5 +#define GEN8_DBGC_LPAC_PERF_COUNTER_SCOPING_CMD_S 0x7e6 + +/* VSC registers */ +#define GEN8_VSC_PERFCTR_VSC_SEL_0 0xcd8 +#define GEN8_VSC_PERFCTR_VSC_SEL_1 0xcd9 + +/* CP registers */ +#define GEN8_CP_RB_BASE_LO_GC 0x800 +#define GEN8_CP_RB_BASE_HI_GC 0x801 +#define GEN8_CP_RB_CNTL_GC 0x802 +#define GEN8_CP_RB_RPTR_WR_GC 0x803 +#define GEN8_CP_RB_RPTR_ADDR_LO_BR 0x804 +#define GEN8_CP_RB_RPTR_ADDR_HI_BR 0x805 +#define GEN8_CP_RB_RPTR_BR 0x806 +#define GEN8_CP_RB_WPTR_GC 0x807 +#define GEN8_CP_RB_RPTR_ADDR_LO_BV 0x808 +#define GEN8_CP_RB_RPTR_ADDR_HI_BV 0x809 +#define GEN8_CP_RB_RPTR_BV 0x80a +#define GEN8_CP_RB_BASE_LO_LPAC 0x80b +#define GEN8_CP_RB_BASE_HI_LPAC 0x80c +#define GEN8_CP_RB_CNTL_LPAC 0x80d +#define GEN8_CP_RB_RPTR_WR_LPAC 0x80e +#define GEN8_CP_RB_RPTR_ADDR_LO_LPAC 0x80f +#define GEN8_CP_RB_RPTR_ADDR_HI_LPAC 0x810 +#define GEN8_CP_RB_RPTR_LPAC 0x811 +#define GEN8_CP_RB_WPTR_LPAC 0x812 +#define GEN8_CP_SQE_CNTL 0x815 +#define GEN8_CP_SQE_INSTR_BASE_LO 0x816 +#define GEN8_CP_SQE_INSTR_BASE_HI 0x817 +#define GEN8_CP_AQE_INSTR_BASE_LO_0 0x818 +#define GEN8_CP_AQE_INSTR_BASE_HI_0 0x819 +#define GEN8_CP_AQE_INSTR_BASE_LO_1 0x81a +#define GEN8_CP_AQE_INSTR_BASE_HI_1 0x81b +#define GEN8_CP_APERTURE_CNTL_HOST 0x81c +#define GEN8_CP_APERTURE_CNTL_GMU 0x81d +#define GEN8_CP_APERTURE_CNTL_CD 0x81e +#define GEN8_CP_CP2GMU_STATUS 0x822 +#define GEN8_CP_AHB_CNTL 0x838 +#define GEN8_CP_RL_ERROR_DETAILS_0 0x840 +#define GEN8_CP_RL_ERROR_DETAILS_1 0x841 +#define GEN8_CP_CRASH_DUMP_SCRIPT_BASE_LO 0x842 +#define GEN8_CP_CRASH_DUMP_SCRIPT_BASE_HI 0x843 +#define GEN8_CP_CRASH_DUMP_CNTL 0x844 +#define GEN8_CP_CRASH_DUMP_STATUS 0x845 +#define GEN8_CP_DBG_ECO_CNTL 0x84b +#define GEN8_CP_MISC_CNTL 0x84c +#define GEN8_CP_APRIV_CNTL_PIPE 0x84d +#define GEN8_CP_PROTECT_CNTL_PIPE 0x84e +#define GEN8_CP_PROTECT_STATUS_PIPE 0x84f +#define GEN8_CP_PROTECT_REG_GLOBAL 0x850 +#define GEN8_CP_PROTECT_REG_PIPE 0x8a0 + +#define GEN8_CP_SQE_ICACHE_CNTL_PIPE 0x8b0 +#define GEN8_CP_SQE_DCACHE_CNTL_PIPE 0x8b1 +#define GEN8_CP_CHICKEN_DBG_PIPE 0x8b2 +#define GEN8_CP_HW_FAULT_STATUS_PIPE 0x8b3 +#define GEN8_CP_HW_FAULT_STATUS_MASK_PIPE 0x8b4 +#define GEN8_CP_INTERRUPT_STATUS_GLOBAL 0x8b5 +#define GEN8_CP_INTERRUPT_STATUS_MASK_GLOBAL 0x8b6 +#define GEN8_CP_INTERRUPT_STATUS_PIPE 0x8b7 +#define GEN8_CP_INTERRUPT_STATUS_MASK_PIPE 0x8b8 +#define GEN8_CP_PIPE_STATUS_PIPE 0x8b9 +#define GEN8_CP_GPU_BATCH_ID_PIPE 0x8ba +#define GEN8_CP_SQE_STATUS_PIPE 0x8bb +#define GEN8_CP_CONTEXT_SWITCH_CNTL 0x8c0 +#define GEN8_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x8c1 +#define GEN8_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x8c2 +#define GEN8_CP_CONTEXT_SWITCH_PNSR_ADDR_LO 0x8c3 +#define GEN8_CP_CONTEXT_SWITCH_PNSR_ADDR_HI 0x8c4 +#define GEN8_CP_CONTEXT_SWITCH_PSR_ADDR_LO 0x8c5 +#define GEN8_CP_CONTEXT_SWITCH_PSR_ADDR_HI 0x8c6 +#define GEN8_CP_CONTEXT_SWITCH_NPR_ADDR_LO 0x8c7 +#define GEN8_CP_CONTEXT_SWITCH_NPR_ADDR_HI 0x8c8 +#define GEN8_CP_CONTEXT_SWITCH_LEVEL_STATUS 0x8cb +#define GEN8_CP_PERFCTR_CP_SEL_0 0x8d0 +#define GEN8_CP_PERFCTR_CP_SEL_1 0x8d1 +#define GEN8_CP_PERFCTR_CP_SEL_2 0x8d2 +#define GEN8_CP_PERFCTR_CP_SEL_3 0x8d3 +#define GEN8_CP_PERFCTR_CP_SEL_4 0x8d4 +#define GEN8_CP_PERFCTR_CP_SEL_5 0x8d5 +#define GEN8_CP_PERFCTR_CP_SEL_6 0x8d6 +#define GEN8_CP_PERFCTR_CP_SEL_7 0x8d7 +#define GEN8_CP_PERFCTR_CP_SEL_8 0x8d8 +#define GEN8_CP_PERFCTR_CP_SEL_9 0x8d9 +#define GEN8_CP_PERFCTR_CP_SEL_10 0x8da +#define GEN8_CP_PERFCTR_CP_SEL_11 0x8db +#define GEN8_CP_PERFCTR_CP_SEL_12 0x8dc +#define GEN8_CP_PERFCTR_CP_SEL_13 0x8dd +#define GEN8_CP_PERFCTR_CP_SEL_14 0x8de +#define GEN8_CP_PERFCTR_CP_SEL_15 0x8df +#define GEN8_CP_PERFCTR_CP_SEL_16 0x8e0 +#define GEN8_CP_PERFCTR_CP_SEL_17 0x8e1 +#define GEN8_CP_PERFCTR_CP_SEL_18 0x8e2 +#define GEN8_CP_PERFCTR_CP_SEL_19 0x8e3 +#define GEN8_CP_PERFCTR_CP_SEL_20 0x8e4 +#define GEN8_CP_ALWAYS_ON_COUNTER_LO 0x8e7 +#define GEN8_CP_ALWAYS_ON_COUNTER_HI 0x8e8 +#define GEN8_CP_ALWAYS_ON_CONTEXT_LO 0x8e9 +#define GEN8_CP_ALWAYS_ON_CONTEXT_HI 0x8ea +#define GEN8_CP_SQE_STAT_ADDR_PIPE 0x8f0 +#define GEN8_CP_SQE_STAT_DATA_PIPE 0x8f1 +#define GEN8_CP_DRAW_STATE_ADDR_PIPE 0x8f2 +#define GEN8_CP_DRAW_STATE_DATA_PIPE 0x8f3 +#define GEN8_CP_ROQ_DBG_ADDR_PIPE 0x8f4 +#define GEN8_CP_ROQ_DBG_DATA_PIPE 0x8f5 +#define GEN8_CP_MEM_POOL_DBG_ADDR_PIPE 0x8f6 +#define GEN8_CP_MEM_POOL_DBG_DATA_PIPE 0x8f7 +#define GEN8_CP_SQE_UCODE_DBG_ADDR_PIPE 0x8f8 +#define GEN8_CP_SQE_UCODE_DBG_DATA_PIPE 0x8f9 +#define GEN8_CP_RESOURCE_TABLE_DBG_ADDR_BV 0x8fa +#define GEN8_CP_RESOURCE_TABLE_DBG_DATA_BV 0x8fb +#define GEN8_CP_FIFO_DBG_ADDR_LPAC 0x8fc +#define GEN8_CP_FIFO_DBG_DATA_LPAC 0x8fd +#define GEN8_CP_FIFO_DBG_ADDR_DDE_PIPE 0x8fe +#define GEN8_CP_FIFO_DBG_DATA_DDE_PIPE 0x8ff +#define GEN8_CP_IB1_BASE_LO_PIPE 0x900 +#define GEN8_CP_IB1_BASE_HI_PIPE 0x901 +#define GEN8_CP_IB1_REM_SIZE_PIPE 0x902 +#define GEN8_CP_IB1_INIT_SIZE_PIPE 0x903 +#define GEN8_CP_IB2_BASE_LO_PIPE 0x904 +#define GEN8_CP_IB2_BASE_HI_PIPE 0x905 +#define GEN8_CP_IB2_REM_SIZE_PIPE 0x906 +#define GEN8_CP_IB2_INIT_SIZE_PIPE 0x907 +#define GEN8_CP_IB3_BASE_LO_PIPE 0x908 +#define GEN8_CP_IB3_BASE_HI_PIPE 0x909 +#define GEN8_CP_IB3_REM_SIZE_PIPE 0x90a +#define GEN8_CP_IB3_INIT_SIZE_PIPE 0x90b +#define GEN8_CP_SDS_BASE_LO_PIPE 0x90c +#define GEN8_CP_SDS_BASE_HI_PIPE 0x90d +#define GEN8_CP_SDS_REM_SIZE_PIPE 0x90e +#define GEN8_CP_SDS_INIT_SIZE_PIPE 0x90f +#define GEN8_CP_MRB_BASE_LO_PIPE 0x910 +#define GEN8_CP_MRB_BASE_HI_PIPE 0x911 +#define GEN8_CP_MRB_REM_SIZE_PIPE 0x912 +#define GEN8_CP_MRB_INIT_SIZE_PIPE 0x913 +#define GEN8_CP_VSD_BASE_LO_PIPE 0x914 +#define GEN8_CP_VSD_BASE_HI_PIPE 0x915 +#define GEN8_CP_VSD_REM_SIZE_PIPE 0x916 +#define GEN8_CP_VSD_INIT_SIZE_PIPE 0x917 +#define GEN8_CP_ROQ_AVAIL_RB_PIPE 0x918 +#define GEN8_CP_ROQ_AVAIL_IB1_PIPE 0x919 +#define GEN8_CP_ROQ_AVAIL_IB2_PIPE 0x91a +#define GEN8_CP_ROQ_AVAIL_IB3_PIPE 0x91b +#define GEN8_CP_ROQ_AVAIL_SDS_PIPE 0x91c +#define GEN8_CP_ROQ_AVAIL_MRB_PIPE 0x91d +#define GEN8_CP_ROQ_AVAIL_VSD_PIPE 0x91e +#define GEN8_CP_ROQ_RB_STATUS_PIPE 0x920 +#define GEN8_CP_ROQ_IB1_STATUS_PIPE 0x921 +#define GEN8_CP_ROQ_IB2_STATUS_PIPE 0x922 +#define GEN8_CP_ROQ_IB3_STATUS_PIPE 0x923 +#define GEN8_CP_ROQ_SDS_STATUS_PIPE 0x924 +#define GEN8_CP_ROQ_MRB_STATUS_PIPE 0x925 +#define GEN8_CP_ROQ_VSD_STATUS_PIPE 0x926 + +/* UCHE registers */ +#define GEN8_UCHE_MODE_CNTL 0xe01 +#define GEN8_UCHE_CACHE_WAYS 0xe04 +#define GEN8_UCHE_WRITE_THRU_BASE_LO 0xe06 +#define GEN8_UCHE_WRITE_THRU_BASE_HI 0xe07 +#define GEN8_UCHE_TRAP_BASE_LO 0xe08 +#define GEN8_UCHE_TRAP_BASE_HI 0xe09 +#define GEN8_UCHE_VARB_IDLE_TIMEOUT 0xe10 +#define GEN8_UCHE_CLIENT_PF 0xe11 +#define GEN8_UCHE_GBIF_GX_CONFIG 0xe12 +#define GEN8_UCHE_DBG_ECO_CNTL_0 0xe15 +#define GEN8_UCHE_HW_DBG_CNTL 0xe16 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_0 0xe20 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_1 0xe21 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_2 0xe22 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_3 0xe23 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_4 0xe24 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_5 0xe25 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_6 0xe26 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_7 0xe27 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_8 0xe28 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_9 0xe29 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_10 0xe2a +#define GEN8_UCHE_PERFCTR_UCHE_SEL_11 0xe2b +#define GEN8_UCHE_PERFCTR_UCHE_SEL_12 0xe2c +#define GEN8_UCHE_PERFCTR_UCHE_SEL_13 0xe2d +#define GEN8_UCHE_PERFCTR_UCHE_SEL_14 0xe2e +#define GEN8_UCHE_PERFCTR_UCHE_SEL_15 0xe2f +#define GEN8_UCHE_PERFCTR_UCHE_SEL_16 0xe30 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_17 0xe31 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_18 0xe32 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_19 0xe33 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_20 0xe34 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_21 0xe35 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_22 0xe36 +#define GEN8_UCHE_PERFCTR_UCHE_SEL_23 0xe37 + +/* UCHE CCHE registers */ +#define GEN8_UCHE_CCHE_MODE_CNTL 0xf01 +#define GEN8_UCHE_CCHE_CACHE_WAYS 0xf02 +#define GEN8_UCHE_CCHE_WRITE_THRU_BASE_LO 0xf04 +#define GEN8_UCHE_CCHE_WRITE_THRU_BASE_HI 0xf05 +#define GEN8_UCHE_CCHE_TRAP_BASE_LO 0xf06 +#define GEN8_UCHE_CCHE_TRAP_BASE_HI 0xf07 +#define GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_LO 0xf08 +#define GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_HI 0xf09 +#define GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_LO 0xf0a +#define GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_HI 0xf0b +#define GEN8_UCHE_CCHE_HW_DBG_CNTL 0xf0c + +#define GEN8_GBIF_CX_CONFIG 0x3c00 + +/* GRAS registers */ +#define GEN8_GRAS_TSEFE_DBG_ECO_CNTL 0x8600 +#define GEN8_GRAS_NC_MODE_CNTL 0x8700 +#define GEN8_GRAS_DBG_ECO_CNTL 0x8702 +#define GEN8_GRAS_PERFCTR_TSE_SEL_0 0x8710 +#define GEN8_GRAS_PERFCTR_TSE_SEL_1 0x8711 +#define GEN8_GRAS_PERFCTR_TSE_SEL_2 0x8712 +#define GEN8_GRAS_PERFCTR_TSE_SEL_3 0x8713 +#define GEN8_GRAS_PERFCTR_RAS_SEL_0 0x8720 +#define GEN8_GRAS_PERFCTR_RAS_SEL_1 0x8721 +#define GEN8_GRAS_PERFCTR_RAS_SEL_2 0x8722 +#define GEN8_GRAS_PERFCTR_RAS_SEL_3 0x8723 +#define GEN8_GRAS_PERFCTR_LRZ_SEL_0 0x8730 +#define GEN8_GRAS_PERFCTR_LRZ_SEL_1 0x8731 +#define GEN8_GRAS_PERFCTR_LRZ_SEL_2 0x8732 +#define GEN8_GRAS_PERFCTR_LRZ_SEL_3 0x8733 + +/* RB registers */ +#define GEN8_RB_CCU_CNTL 0x8e07 +#define GEN8_RB_CCU_NC_MODE_CNTL 0x8e08 +#define GEN8_RB_GC_GMEM_PROTECT 0x8e09 +#define GEN8_RB_LPAC_GMEM_PROTECT 0x8e0a +#define GEN8_RB_PERFCTR_RB_SEL_0 0x8e10 +#define GEN8_RB_PERFCTR_RB_SEL_1 0x8e11 +#define GEN8_RB_PERFCTR_RB_SEL_2 0x8e12 +#define GEN8_RB_PERFCTR_RB_SEL_3 0x8e13 +#define GEN8_RB_PERFCTR_RB_SEL_4 0x8e14 +#define GEN8_RB_PERFCTR_RB_SEL_5 0x8e15 +#define GEN8_RB_PERFCTR_RB_SEL_6 0x8e16 +#define GEN8_RB_PERFCTR_RB_SEL_7 0x8e17 +#define GEN8_RB_PERFCTR_CCU_SEL_0 0x8e18 +#define GEN8_RB_PERFCTR_CCU_SEL_1 0x8e19 +#define GEN8_RB_PERFCTR_CCU_SEL_2 0x8e1a +#define GEN8_RB_PERFCTR_CCU_SEL_3 0x8e1b +#define GEN8_RB_PERFCTR_CCU_SEL_4 0x8e1c +#define GEN8_RB_PERFCTR_CMP_SEL_0 0x8f04 +#define GEN8_RB_PERFCTR_CMP_SEL_1 0x8f05 +#define GEN8_RB_PERFCTR_CMP_SEL_2 0x8f06 +#define GEN8_RB_PERFCTR_CMP_SEL_3 0x8f07 +#define GEN8_RB_PERFCTR_UFC_SEL_0 0x8f10 +#define GEN8_RB_PERFCTR_UFC_SEL_1 0x8f11 +#define GEN8_RB_PERFCTR_UFC_SEL_2 0x8f12 +#define GEN8_RB_PERFCTR_UFC_SEL_3 0x8f13 +#define GEN8_RB_PERFCTR_UFC_SEL_4 0x8f14 +#define GEN8_RB_PERFCTR_UFC_SEL_5 0x8f15 +#define GEN8_RB_SUB_BLOCK_SEL_CNTL_HOST 0x8e3b +#define GEN8_RB_SUB_BLOCK_SEL_CNTL_CD 0x8e3d +#define GEN8_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE 0x8e50 +#define GEN8_RB_CMP_NC_MODE_CNTL 0x8f00 +#define GEN8_RB_RESOLVE_PREFETCH_CNTL 0x8f01 +#define GEN8_RB_CMP_DBG_ECO_CNTL 0x8f02 +#define GEN8_RB_UNSLICE_STATUS 0x8f03 + +/* VPC registers */ +#define GEN8_VPC_PERFCTR_VPC_SEL_0 0x9690 /* Indexed Register */ +#define GEN8_VPC_PERFCTR_VPC_SEL_1 0x9691 /* Indexed Register */ +#define GEN8_VPC_PERFCTR_VPC_SEL_2 0x9692 /* Indexed Register */ +#define GEN8_VPC_PERFCTR_VPC_SEL_3 0x9693 /* Indexed Register */ +#define GEN8_VPC_PERFCTR_VPC_SEL_4 0x9694 /* Indexed Register */ +#define GEN8_VPC_PERFCTR_VPC_SEL_5 0x9695 /* Indexed Register */ +#define GEN8_VPC_PERFCTR_VPC_SEL_6 0x9696 /* Indexed Register */ +#define GEN8_VPC_PERFCTR_VPC_SEL_7 0x9697 /* Indexed Register */ +#define GEN8_VPC_PERFCTR_VPC_SEL_8 0x9698 /* Indexed Register */ +#define GEN8_VPC_PERFCTR_VPC_SEL_9 0x9699 /* Indexed Register */ +#define GEN8_VPC_PERFCTR_VPC_SEL_10 0x969a /* Indexed Register */ +#define GEN8_VPC_PERFCTR_VPC_SEL_11 0x969b /* Indexed Register */ +#define GEN8_VPC_LB_MODE_CNTL 0x9740 +#define GEN8_VPC_FLATSHADE_MODE_CNTL 0x9741 +#define GEN8_VPC_DBG_ECO_CNTL_1 0x9742 + +/* PC registers:*/ +#define GEN8_PC_AUTO_VERTEX_STRIDE 0x9e0a +#define GEN8_PC_VIS_STREAM_CNTL 0x9e0d +#define GEN8_PC_CHICKEN_BITS_2 0x9f20 +#define GEN8_PC_CHICKEN_BITS_3 0x9e22 +#define GEN8_PC_CHICKEN_BITS_4 0x9e23 +#define GEN8_PC_CHICKEN_BITS_1 0x9e50 +#define GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1 0x9e64 + + +/* VFD registers */ +#define GEN8_VFD_PERFCTR_VFD_SEL_0 0xa610 +#define GEN8_VFD_PERFCTR_VFD_SEL_1 0xa611 +#define GEN8_VFD_PERFCTR_VFD_SEL_2 0xa612 +#define GEN8_VFD_PERFCTR_VFD_SEL_3 0xa613 +#define GEN8_VFD_PERFCTR_VFD_SEL_4 0xa614 +#define GEN8_VFD_PERFCTR_VFD_SEL_5 0xa615 +#define GEN8_VFD_PERFCTR_VFD_SEL_6 0xa616 +#define GEN8_VFD_PERFCTR_VFD_SEL_7 0xa617 +#define GEN8_VFD_PERFCTR_VFD_SEL_8 0xa618 +#define GEN8_VFD_PERFCTR_VFD_SEL_9 0xa619 +#define GEN8_VFD_PERFCTR_VFD_SEL_10 0xa61a +#define GEN8_VFD_PERFCTR_VFD_SEL_11 0xa61b +#define GEN8_VFD_PERFCTR_VFD_SEL_12 0xa61c +#define GEN8_VFD_PERFCTR_VFD_SEL_13 0xa61d +#define GEN8_VFD_PERFCTR_VFD_SEL_14 0xa61e +#define GEN8_VFD_PERFCTR_VFD_SEL_15 0xa61f +#define GEN8_VFD_CB_BV_THRESHOLD 0xa639 +#define GEN8_VFD_CB_BR_THRESHOLD 0xa63a +#define GEN8_VFD_CB_BUSY_REQ_CNT 0xa63b +#define GEN8_VFD_CB_LP_REQ_CNT 0xa63c + +/* SP registers */ +#define GEN8_SP_DBG_ECO_CNTL 0xae00 +#define GEN8_SP_SHADER_PROFILING 0xae01 +#define GEN8_SP_NC_MODE_CNTL 0xae02 +#define GEN8_SP_CHICKEN_BITS 0xae03 +#define GEN8_SP_NC_MODE_CNTL_2 0xae04 +#define GEN8_SP_SS_CHICKEN_BITS_0 0xae05 +#define GEN8_SP_ISDB_CNTL 0xae06 +#define GEN8_SP_PERFCTR_CNTL 0xae07 +#define GEN8_SP_CHICKEN_BITS_1 0xae08 +#define GEN8_SP_CHICKEN_BITS_2 0xae09 +#define GEN8_SP_CHICKEN_BITS_3 0xae0a +#define GEN8_SP_CHICKEN_BITS_4 0xae0b +#define GEN8_SP_STATUS 0xae0c +#define GEN8_SP_PERFCTR_SHADER_MASK 0xae0f +#define GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_LO 0xae10 +#define GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_HI 0xae11 +#define GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_LO 0xae12 +#define GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_HI 0xae13 +#define GEN8_SP_LPAC_CPI_STATUS 0xae15 +#define GEN8_SP_LPAC_DBG_STATUS 0xae16 +#define GEN8_SP_LPAC_ISDB_BATCH_COUNT 0xae17 +#define GEN8_SP_LPAC_ISDB_BATCH_COUNT_INCR_EN 0xae18 +#define GEN8_SP_LPAC_ISDB_BATCH_COUNT_SHADERS 0xae19 +#define GEN8_SP_ISDB_BATCH_COUNT 0xae30 +#define GEN8_SP_ISDB_BATCH_COUNT_INCR_EN 0xae31 +#define GEN8_SP_ISDB_BATCH_COUNT_SHADERS 0xae32 +#define GEN8_SP_ISDB_DEBUG_CONFIG 0xae35 +#define GEN8_SP_SELF_THROTTLE_CONTROL 0xae3a +#define GEN8_SP_DISPATCH_CNTL 0xae3b +#define GEN8_SP_SW_DEBUG_ADDR_LO 0xae3c +#define GEN8_SP_SW_DEBUG_ADDR_HI 0xae3d +#define GEN8_SP_ISDB_DEBUG_ADDR_LO 0xae3e +#define GEN8_SP_ISDB_DEBUG_ADDR_HI 0xae3f +#define GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP 0xae6b +#define GEN8_SP_READ_SEL 0xae6d +#define GEN8_SP_DBG_CNTL 0xae71 + +#define GEN8_SP_PERFCTR_HLSQ_SEL_0 0xae60 +#define GEN8_SP_PERFCTR_HLSQ_SEL_1 0xae61 +#define GEN8_SP_PERFCTR_HLSQ_SEL_2 0xae62 +#define GEN8_SP_PERFCTR_HLSQ_SEL_3 0xae63 +#define GEN8_SP_PERFCTR_HLSQ_SEL_4 0xae64 +#define GEN8_SP_PERFCTR_HLSQ_SEL_5 0xae65 +#define GEN8_SP_PERFCTR_SP_SEL_0 0xae80 +#define GEN8_SP_PERFCTR_SP_SEL_1 0xae81 +#define GEN8_SP_PERFCTR_SP_SEL_2 0xae82 +#define GEN8_SP_PERFCTR_SP_SEL_3 0xae83 +#define GEN8_SP_PERFCTR_SP_SEL_4 0xae84 +#define GEN8_SP_PERFCTR_SP_SEL_5 0xae85 +#define GEN8_SP_PERFCTR_SP_SEL_6 0xae86 +#define GEN8_SP_PERFCTR_SP_SEL_7 0xae87 +#define GEN8_SP_PERFCTR_SP_SEL_8 0xae88 +#define GEN8_SP_PERFCTR_SP_SEL_9 0xae89 +#define GEN8_SP_PERFCTR_SP_SEL_10 0xae8a +#define GEN8_SP_PERFCTR_SP_SEL_11 0xae8b +#define GEN8_SP_PERFCTR_SP_SEL_12 0xae8c +#define GEN8_SP_PERFCTR_SP_SEL_13 0xae8d +#define GEN8_SP_PERFCTR_SP_SEL_14 0xae8e +#define GEN8_SP_PERFCTR_SP_SEL_15 0xae8f +#define GEN8_SP_PERFCTR_SP_SEL_16 0xae90 +#define GEN8_SP_PERFCTR_SP_SEL_17 0xae91 +#define GEN8_SP_PERFCTR_SP_SEL_18 0xae92 +#define GEN8_SP_PERFCTR_SP_SEL_19 0xae93 +#define GEN8_SP_PERFCTR_SP_SEL_20 0xae94 +#define GEN8_SP_PERFCTR_SP_SEL_21 0xae95 +#define GEN8_SP_PERFCTR_SP_SEL_22 0xae96 +#define GEN8_SP_PERFCTR_SP_SEL_23 0xae97 +#define GEN8_SP_PERFCTR_SP_SEL_24 0xae98 +#define GEN8_SP_PERFCTR_SP_SEL_25 0xae99 +#define GEN8_SP_PERFCTR_SP_SEL_26 0xae9a +#define GEN8_SP_PERFCTR_SP_SEL_27 0xae9b +#define GEN8_SP_PERFCTR_SP_SEL_28 0xae9c +#define GEN8_SP_PERFCTR_SP_SEL_29 0xae9d +#define GEN8_SP_PERFCTR_SP_SEL_30 0xae9e +#define GEN8_SP_PERFCTR_SP_SEL_31 0xae9f +#define GEN8_SP_PERFCTR_SP_SEL_32 0xaea0 +#define GEN8_SP_PERFCTR_SP_SEL_33 0xaea1 +#define GEN8_SP_PERFCTR_SP_SEL_34 0xaea2 +#define GEN8_SP_PERFCTR_SP_SEL_35 0xaea3 + +/* TP registers */ +#define GEN8_TPL1_DBG_ECO_CNTL 0xb600 +#define GEN8_TPL1_DBG_ECO_CNTL1 0xb602 +#define GEN8_TPL1_NC_MODE_CNTL 0xb604 +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_0 0xb606 +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_1 0xb607 +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_2 0xb608 +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_3 0xb609 +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_4 0xb60a +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_5 0xb60b +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_6 0xb60c +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_7 0xb60d +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_8 0xb60e +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_9 0xb60f +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_10 0xb610 +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_11 0xb611 +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_12 0xb612 +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_13 0xb613 +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_14 0xb614 +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_15 0xb615 +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_16 0xb616 +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_17 0xb617 +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_18 0xb618 +#define GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_19 0xb619 + +#define GEN8_TPL1_PERFCTR_TP_SEL_0 0xb620 +#define GEN8_TPL1_PERFCTR_TP_SEL_1 0xb621 +#define GEN8_TPL1_PERFCTR_TP_SEL_2 0xb622 +#define GEN8_TPL1_PERFCTR_TP_SEL_3 0xb623 +#define GEN8_TPL1_PERFCTR_TP_SEL_4 0xb624 +#define GEN8_TPL1_PERFCTR_TP_SEL_5 0xb625 +#define GEN8_TPL1_PERFCTR_TP_SEL_6 0xb626 +#define GEN8_TPL1_PERFCTR_TP_SEL_7 0xb627 +#define GEN8_TPL1_PERFCTR_TP_SEL_8 0xb628 +#define GEN8_TPL1_PERFCTR_TP_SEL_9 0xb629 +#define GEN8_TPL1_PERFCTR_TP_SEL_10 0xb62a +#define GEN8_TPL1_PERFCTR_TP_SEL_11 0xb62b +#define GEN8_TPL1_PERFCTR_TP_SEL_12 0xb62c +#define GEN8_TPL1_PERFCTR_TP_SEL_13 0xb62d +#define GEN8_TPL1_PERFCTR_TP_SEL_14 0xb62e +#define GEN8_TPL1_PERFCTR_TP_SEL_15 0xb62f +#define GEN8_TPL1_PERFCTR_TP_SEL_16 0xb630 +#define GEN8_TPL1_PERFCTR_TP_SEL_17 0xb631 +#define GEN8_TPL1_PERFCTR_TP_SEL_18 0xb632 +#define GEN8_TPL1_PERFCTR_TP_SEL_19 0xb633 + + +#define GEN8_SP_AHB_READ_APERTURE 0xc000 + +#define GEN8_RBBM_SECVID_TRUST_CNTL 0xf400 +#define GEN8_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0xf800 +#define GEN8_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0xf801 +#define GEN8_RBBM_SECVID_TSB_TRUSTED_SIZE 0xf802 +#define GEN8_RBBM_SECVID_TSB_CNTL 0xf803 +#define GEN8_RBBM_SECVID_TSB_STATUS_LO 0xfc00 +#define GEN8_RBBM_SECVID_TSB_STATUS_HI 0xfc01 + +/* GBIF countables */ +#define GBIF_AXI0_READ_DATA_TOTAL_BEATS 34 +#define GBIF_AXI1_READ_DATA_TOTAL_BEATS 35 +#define GBIF_AXI0_WRITE_DATA_TOTAL_BEATS 46 +#define GBIF_AXI1_WRITE_DATA_TOTAL_BEATS 47 + +/* GBIF registers */ +#define GEN8_GBIF_SCACHE_CNTL0 0x3c01 +#define GEN8_GBIF_SCACHE_CNTL1 0x3c02 +#define GEN8_GBIF_QSB_SIDE0 0x3c03 +#define GEN8_GBIF_QSB_SIDE1 0x3c04 +#define GEN8_GBIF_QSB_SIDE2 0x3c05 +#define GEN8_GBIF_QSB_SIDE3 0x3c06 +#define GEN8_GBIF_HALT 0x3c45 +#define GEN8_GBIF_HALT_ACK 0x3c46 + +#define GEN8_GBIF_CLIENT_HALT_MASK BIT(0) +#define GEN8_GBIF_ARB_HALT_MASK BIT(1) +#define GEN8_GBIF_GX_HALT_MASK BIT(0) + +#define GEN8_GBIF_PERF_PWR_CNT_EN 0x3cc0 +#define GEN8_GBIF_PERF_PWR_CNT_CLR 0x3cc1 +#define GEN8_GBIF_PERF_CNT_SEL_0 0x3cc2 +#define GEN8_GBIF_PERF_CNT_SEL_1 0x3cc3 +#define GEN8_GBIF_PWR_CNT_SEL 0x3cc4 +#define GEN8_GBIF_PERF_CNT_LO_0 0x3cc6 +#define GEN8_GBIF_PERF_CNT_HI_0 0x3cc7 +#define GEN8_GBIF_PERF_CNT_LO_1 0x3cc8 +#define GEN8_GBIF_PERF_CNT_HI_1 0x3cc9 +#define GEN8_GBIF_PERF_CNT_LO_2 0x3cca +#define GEN8_GBIF_PERF_CNT_HI_2 0x3ccb +#define GEN8_GBIF_PERF_CNT_LO_3 0x3ccc +#define GEN8_GBIF_PERF_CNT_HI_3 0x3ccd +#define GEN8_GBIF_PERF_CNT_LO_4 0x3cce +#define GEN8_GBIF_PERF_CNT_HI_4 0x3ccf +#define GEN8_GBIF_PERF_CNT_LO_5 0x3cd0 +#define GEN8_GBIF_PERF_CNT_HI_5 0x3cd1 +#define GEN8_GBIF_PERF_CNT_LO_6 0x3cd2 +#define GEN8_GBIF_PERF_CNT_HI_6 0x3cd3 +#define GEN8_GBIF_PERF_CNT_LO_7 0x3cd4 +#define GEN8_GBIF_PERF_CNT_HI_7 0x3cd5 +#define GEN8_GBIF_PWR_CNT_LO_0 0x3ce0 /* Indexed Register */ +#define GEN8_GBIF_PWR_CNT_LO_1 0x3ce1 /* Indexed Register */ +#define GEN8_GBIF_PWR_CNT_LO_2 0x3ce2 /* Indexed Register */ +#define GEN8_GBIF_PWR_CNT_HI_0 0x3ce3 /* Indexed Register */ +#define GEN8_GBIF_PWR_CNT_HI_1 0x3ce4 /* Indexed Register */ +#define GEN8_GBIF_PWR_CNT_HI_2 0x3ce5 /* Indexed Register */ + +/* CX_DBGC_CFG registers: Fixme for Snapshot */ +#define GEN8_CX_DBGC_CFG_DBGBUS_SEL_A 0x18400 +#define GEN8_CX_DBGC_CFG_DBGBUS_SEL_B 0x18401 +#define GEN8_CX_DBGC_CFG_DBGBUS_SEL_C 0x18402 +#define GEN8_CX_DBGC_CFG_DBGBUS_SEL_D 0x18403 +#define GEN8_CX_DBGC_CFG_DBGBUS_CNTLT 0x18404 +#define GEN8_CX_DBGC_CFG_DBGBUS_CNTLM 0x18405 +#define GEN8_CX_DBGC_CFG_DBGBUS_OPL 0x18406 +#define GEN8_CX_DBGC_CFG_DBGBUS_OPE 0x18407 +#define GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0 0x18408 +#define GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1 0x18409 +#define GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2 0x1840a +#define GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3 0x1840b +#define GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0 0x1840c +#define GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1 0x1840d +#define GEN8_CX_DBGC_CFG_DBGBUS_MASKL_2 0x1840e +#define GEN8_CX_DBGC_CFG_DBGBUS_MASKL_3 0x1840f +#define GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0 0x18410 +#define GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1 0x18411 +#define GEN8_CX_DBGC_CFG_DBGBUS_IVTE_0 0x18412 +#define GEN8_CX_DBGC_CFG_DBGBUS_IVTE_1 0x18413 +#define GEN8_CX_DBGC_CFG_DBGBUS_IVTE_2 0x18414 +#define GEN8_CX_DBGC_CFG_DBGBUS_IVTE_3 0x18415 +#define GEN8_CX_DBGC_CFG_DBGBUS_MASKE_0 0x18416 +#define GEN8_CX_DBGC_CFG_DBGBUS_MASKE_1 0x18417 +#define GEN8_CX_DBGC_CFG_DBGBUS_MASKE_2 0x18418 +#define GEN8_CX_DBGC_CFG_DBGBUS_MASKE_3 0x18419 +#define GEN8_CX_DBGC_CFG_DBGBUS_NIBBLEE 0x1841a +#define GEN8_CX_DBGC_CFG_DBGBUS_PTRC0 0x1841b +#define GEN8_CX_DBGC_CFG_DBGBUS_PTRC1 0x1841c +#define GEN8_CX_DBGC_CFG_DBGBUS_LOADREG 0x1841d +#define GEN8_CX_DBGC_CFG_DBGBUS_IDX 0x1841e +#define GEN8_CX_DBGC_CFG_DBGBUS_CLRC 0x1841f +#define GEN8_CX_DBGC_CFG_DBGBUS_LOADIVT 0x18420 +#define GEN8_CX_DBGC_VBIF_DBG_CNTL 0x18421 +#define GEN8_CX_DBGC_DBG_LO_HI_GPIO 0x18422 +#define GEN8_CX_DBGC_EXT_TRACE_BUS_CNTL 0x18423 +#define GEN8_CX_DBGC_READ_AHB_THROUGH_DBG 0x18424 +#define GEN8_CX_DBGC_CFG_DBGBUS_OVER 0x18426 +#define GEN8_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 0x1842f +#define GEN8_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 0x18430 +#define GEN8_CX_DBGC_EVT_CFG 0x18435 +#define GEN8_CX_DBGC_EVT_INTF_SEL_0 0x18436 +#define GEN8_CX_DBGC_EVT_INTF_SEL_1 0x18437 +#define GEN8_CX_DBGC_PERF_ATB_CFG 0x18438 +#define GEN8_CX_DBGC_PERF_ATB_DRAIN_CMD 0x1844a +#define GEN8_CX_DBGC_ECO_CNTL 0x1843b +#define GEN8_CX_DBGC_AHB_DBG_CNTL 0x1843c +#define GEN8_CX_DBGC_TCM_DBG_ADDR 0x18580 +#define GEN8_CX_DBGC_TCM_DBG_DATA 0x18581 + +/* GMU control registers */ +#define GEN8_GMU_CM3_ITCM_START 0x1b400 +#define GEN8_GMU_CM3_DTCM_START 0x1c400 +#define GEN8_GMU_NMI_CONTROL_STATUS 0x1cbf0 +#define GEN8_GMU_BOOT_SLUMBER_OPTION 0x1cbf8 +#define GEN8_GMU_GX_VOTE_IDX 0x1cbf9 +#define GEN8_GMU_MX_VOTE_IDX 0x1cbfa +#define GEN8_GMU_DCVS_ACK_OPTION 0x1cbfc +#define GEN8_GMU_DCVS_PERF_SETTING 0x1cbfd +#define GEN8_GMU_DCVS_BW_SETTING 0x1cbfe +#define GEN8_GMU_DCVS_RETURN 0x1cbff +#define GEN8_GMU_ICACHE_CONFIG 0x1f400 +#define GEN8_GMU_DCACHE_CONFIG 0x1f401 +#define GEN8_GMU_SYS_BUS_CONFIG 0x1f40f +#define GEN8_GMUCX_MRC_GBIF_QOS_CTRL 0x1f50b +#define GEN8_GMUCX_PWR_COL_KEEPALIVE 0x1f7e4 +#define GEN8_GMUCX_PWR_COL_PREEMPTION_KEEPALIVE 0x1f7e5 +#define GEN8_GMUCX_GFX_PWR_CLK_STATUS 0x1f7e8 +#define GEN8_GMUCX_RPMH_POWER_STATE 0x1f7e9 +/* FAL10 veto register */ +#define GEN8_GMUCX_CX_FAL_INTF 0x1f7ec +#define GEN8_GMUCX_CX_FALNEXT_INTF 0x1f7ed +#define GEN8_GMUCX_CM3_SYSRESET 0x1f800 +#define GEN8_GMUCX_CM3_BOOT_CONFIG 0x1f801 +#define GEN8_GMUCX_WFI_CONFIG 0x1f802 +#define GEN8_GMUCX_WDOG_CTRL 0x1f813 +#define GEN8_GMUCX_CM3_FW_INIT_RESULT 0x1f81c +#define GEN8_GMUCX_CM3_CFG 0x1f82d +#define GEN8_GMUCX_AO_COUNTER_LO 0x1f840 +#define GEN8_GMUCX_AO_COUNTER_HI 0x1f841 +#define GEN8_GMUCX_PERF_COUNTER_ENABLE 0x1f848 +#define GEN8_GMUCX_POWER_COUNTER_ENABLE 0x1fc10 + +#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_0 0x1fc30 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_1 0x1fc31 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_2 0x1fc32 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_3 0x1fc33 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0 0x1fc40 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1 0x1fc41 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_2 0x1fc42 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_3 0x1fc43 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_4 0x1fc44 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_5 0x1fc45 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_6 0x1fc46 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_7 0x1fc47 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_8 0x1fc48 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_9 0x1fc49 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_0 0x1fc50 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_1 0x1fc51 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_2 0x1fc52 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_3 0x1fc53 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_4 0x1fc54 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_5 0x1fc55 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_6 0x1fc56 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_7 0x1fc57 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_8 0x1fc58 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_9 0x1fc59 + +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_0 0x1fca0 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_0 0x1fca1 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_1 0x1fca2 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_1 0x1fca3 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_2 0x1fca4 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_2 0x1fca5 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_3 0x1fca6 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_3 0x1fca7 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_4 0x1fca8 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_4 0x1fca9 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_5 0x1fcaa +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_5 0x1fcab +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_6 0x1fcac +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_6 0x1fcad +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_7 0x1fcae +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_7 0x1fcaf +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_8 0x1fcb0 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_8 0x1fcb1 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_9 0x1fcb2 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_9 0x1fcb3 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_10 0x1fcb4 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_10 0x1fcb5 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_11 0x1fcb6 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_11 0x1fcb7 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_12 0x1fcb8 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_12 0x1fcb9 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_13 0x1fcba +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_13 0x1fcbb +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_14 0x1fcbc +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_14 0x1fcbd +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_15 0x1fcbe +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_15 0x1fcbf +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_16 0x1fcc0 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_16 0x1fcc1 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_17 0x1fcc2 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_17 0x1fcc3 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_18 0x1fcc4 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_18 0x1fcc5 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_19 0x1fcc6 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_19 0x1fcc7 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_20 0x1fcc8 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_20 0x1fcc9 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_21 0x1fcca +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_21 0x1fccb +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_22 0x1fccc +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_22 0x1fccd +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_23 0x1fcce +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_23 0x1fccf +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_24 0x1fcd0 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_24 0x1fcd1 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_25 0x1fcd2 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_25 0x1fcd3 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_26 0x1fcd4 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_26 0x1fcd5 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_27 0x1fcd6 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_27 0x1fcd7 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_28 0x1fcd8 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_28 0x1fcd9 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_29 0x1fcda +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_29 0x1fcdb +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_30 0x1fcdc +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_30 0x1fcdd +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_31 0x1fcde +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_31 0x1fcdf +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_32 0x1fce0 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_32 0x1fce1 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_33 0x1fce2 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_33 0x1fce3 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_34 0x1fce4 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_34 0x1fce5 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_35 0x1fce6 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_35 0x1fce7 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_36 0x1fce8 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_36 0x1fce9 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_37 0x1fcea +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_37 0x1fceb +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_38 0x1fcec +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_38 0x1fced +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_39 0x1fcee +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_39 0x1fcef + +/* HFI registers*/ +#define GEN8_GMUCX_HFI_CTRL_STATUS 0x1f980 +#define GEN8_GMUCX_HFI_QTBL_INFO 0x1f984 +#define GEN8_GMUCX_HFI_QTBL_ADDR 0x1f985 +#define GEN8_GMUCX_HFI_CTRL_INIT 0x1f986 +#define GEN8_GMUCX_GMU2HOST_INTR_SET 0x1f990 +#define GEN8_GMUCX_GMU2HOST_INTR_CLR 0x1f991 +#define GEN8_GMUCX_GMU2HOST_INTR_INFO 0x1f992 +#define GEN8_GMUCX_GMU2HOST_INTR_MASK 0x1f993 +#define GEN8_GMUCX_HOST2GMU_INTR_SET 0x1f994 +#define GEN8_GMUCX_HOST2GMU_INTR_CLR 0x1f995 +#define GEN8_GMUCX_HOST2GMU_INTR_RAW_INFO 0x1f996 +#define GEN8_GMUCX_GENERAL_8 0x1f9c8 +#define GEN8_GMUCX_GENERAL_9 0x1f9c9 +#define GEN8_GMUCX_GENERAL_10 0x1f9ca + +/* Always on registers */ +#define GEN8_GMUAO_AO_INTERRUPT_EN 0x23b03 +#define GEN8_GMUAO_AO_HOST_INTERRUPT_CLR 0x23b04 +#define GEN8_GMUAO_AO_HOST_INTERRUPT_STATUS 0x23b05 +#define GEN8_GMUAO_AO_HOST_INTERRUPT_MASK 0x23b06 + +/* GMU RSC control registers */ +#define GEN8_GMUAO_RSCC_CONTROL_REQ 0x23b07 +#define GEN8_GMUAO_RSCC_CONTROL_ACK 0x23b08 + +#define GEN8_GMUAO_CGC_MODE_CNTL 0x23b09 +#define GEN8_GMUAO_CGC_DELAY_CNTL 0x23b0a +#define GEN8_GMUAO_CGC_HYST_CNTL 0x23b0b +#define GEN8_GMUAO_GPU_CX_BUSY_STATUS 0x23b0c +#define GEN8_GMUAO_GPU_CX_BUSY_STATUS2 0x23b0d +#define GEN8_GMUAO_GPU_CX_BUSY_MASK 0x23b0e + +/* FENCE control registers */ +#define GEN8_GMUAO_AHB_FENCE_CTRL 0x23b10 +#define GEN8_GMUAO_AHB_FENCE_RANGE_0 0x23b11 +#define GEN8_GMUAO_AHB_FENCE_STATUS 0x23b13 +#define GEN8_GMUAO_AHB_FENCE_STATUS_CLR 0x23b14 +#define GEN8_GMUAO_RBBM_INT_UNMASKED_STATUS_SHADOW 0x23b15 +#define GEN8_GMUAO_AO_SPARE_CNTL 0x23b16 +#define GEN8_GMUAO_LPAC_BUSY_STATUS 0x23b30 + +/* GMU countables */ +#define GEN8_GMU_CM3_BUSY_CYCLES 0 + +/* GPUCC registers */ +#define GEN8_GPU_CC_GX_DOMAIN_MISC3 0x26541 +#define GEN8_GPU_CC_CX_GDSCR 0x26420 + +/* GPU RSC sequencer registers */ +#define GEN8_GPU_RSCC_RSC_STATUS0_DRV0 0x00004 +#define GEN8_RSCC_PDC_SEQ_START_ADDR 0x00008 +#define GEN8_RSCC_PDC_MATCH_VALUE_LO 0x00009 +#define GEN8_RSCC_PDC_MATCH_VALUE_HI 0x0000a +#define GEN8_RSCC_PDC_SLAVE_ID_DRV0 0x0000b +#define GEN8_RSCC_HIDDEN_TCS_CMD0_ADDR 0x0000d +#define GEN8_RSCC_HIDDEN_TCS_CMD0_DATA 0x0000e +#define GEN8_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_L_DRV0 0x00082 +#define GEN8_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_H_DRV0 0x00083 +#define GEN8_RSCC_TIMESTAMP_UNIT1_EN_DRV0 0x00089 +#define GEN8_RSCC_TIMESTAMP_UNIT1_OUTPUT_DRV0 0x0008c +#define GEN8_RSCC_OVERRIDE_START_ADDR 0x00100 +#define GEN8_RSCC_SEQ_BUSY_DRV0 0x00101 +#define GEN8_RSCC_SEQ_MEM_0_DRV0 0x00154 +#define GEN8_RSCC_TCS0_DRV0_STATUS 0x0034a +#define GEN8_RSCC_TCS1_DRV0_STATUS 0x003f2 +#define GEN8_RSCC_TCS2_DRV0_STATUS 0x0049a +#define GEN8_RSCC_TCS3_DRV0_STATUS 0x00542 +#define GEN8_RSCC_TCS4_DRV0_STATUS 0x005ea +#define GEN8_RSCC_TCS5_DRV0_STATUS 0x00692 +#define GEN8_RSCC_TCS6_DRV0_STATUS 0x0073a +#define GEN8_RSCC_TCS7_DRV0_STATUS 0x007e2 +#define GEN8_RSCC_TCS8_DRV0_STATUS 0x0088a +#define GEN8_RSCC_TCS9_DRV0_STATUS 0x00932 + +/* GPU PDC sequencer registers in AOSS.RPMh domain */ +#define GEN8_PDC_GPU_ENABLE_PDC 0x002c + +#define GEN8_SMMU_BASE 0x28000 + +/* GPU CX_MISC registers */ +#define GEN8_GPU_CX_MISC_TCM_RET_CNTL 0x39 +#define GEN8_GPU_CX_MISC_SW_FUSE_VALUE 0x400 + +/* GPU SW Fuse Feature bit fields */ +#define GEN8_FASTBLEND_SW_FUSE 0 +#define GEN8_LPAC_SW_FUSE 1 +#define GEN8_RAYTRACING_SW_FUSE 2 + +#define GEN8_SW_FUSE_INT_MASK \ + ((1 << GEN8_FASTBLEND_SW_FUSE) | \ + (1 << GEN8_LPAC_SW_FUSE) | \ + (1 << GEN8_RAYTRACING_SW_FUSE)) + +/* QDSS register offsets */ +#define QDSS_AOSS_APB_TMC_RSZ 0x04 +#define QDSS_AOSS_APB_TMC_RRD 0x10 +#define QDSS_AOSS_APB_TMC_RRP 0x14 +#define QDSS_AOSS_APB_TMC_RWP 0x18 +#define QDSS_AOSS_APB_TMC_CTRL 0x20 +#define QDSS_AOSS_APB_TMC_MODE 0x28 +#define QDSS_AOSS_APB_TMC_FFCR 0x304 +#define QDSS_AOSS_APB_TMC_LAR 0xfb0 +#define QDSS_AOSS_APB_ETR_CTRL 0x20 +#define QDSS_AOSS_APB_ETR1_CTRL 0x7020 + +#endif /* _GEN8_REG_H */ diff --git a/include/uapi/linux/msm_kgsl.h b/include/uapi/linux/msm_kgsl.h index ad30ad128b..66251cfb0b 100644 --- a/include/uapi/linux/msm_kgsl.h +++ b/include/uapi/linux/msm_kgsl.h @@ -247,6 +247,7 @@ enum kgsl_user_mem_type { #define KGSL_UBWC_2_0 2 #define KGSL_UBWC_3_0 3 #define KGSL_UBWC_4_0 4 +#define KGSL_UBWC_5_0 5 /* * Reset status values for context diff --git a/kgsl_gmu_core.c b/kgsl_gmu_core.c index 482ad65299..dfa635f50e 100644 --- a/kgsl_gmu_core.c +++ b/kgsl_gmu_core.c @@ -19,6 +19,7 @@ static const struct of_device_id gmu_match_table[] = { { .compatible = "qcom,gpu-gmu", .data = &a6xx_gmu_driver }, { .compatible = "qcom,gpu-rgmu", .data = &a6xx_rgmu_driver }, { .compatible = "qcom,gen7-gmu", .data = &gen7_gmu_driver }, + { .compatible = "qcom,gen8-gmu", .data = &gen8_gmu_driver }, {}, }; diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 57e1c80f88..cba1b31130 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -398,9 +398,8 @@ struct gmu_core_device { extern struct platform_driver a6xx_gmu_driver; extern struct platform_driver a6xx_rgmu_driver; -extern struct platform_driver a6xx_hwsched_driver; extern struct platform_driver gen7_gmu_driver; -extern struct platform_driver gen7_hwsched_driver; +extern struct platform_driver gen8_gmu_driver; /* GMU core functions */ diff --git a/kgsl_util.h b/kgsl_util.h index 24e041f720..67e28e4b5d 100644 --- a/kgsl_util.h +++ b/kgsl_util.h @@ -14,6 +14,7 @@ #define KGSL_A6XX_DEVICE "kgsl_a6xx_device" #define KGSL_GEN7_DEVICE "kgsl_gen7_device" #define KGSL_HWSCHED_DEVICE "kgsl_hwsched_device" +#define KGSL_GEN8_DEVICE "kgsl_gen8_device" #define KGSL_SCRATCH_ENTRY "kgsl_scratch" #define KGSL_MEMSTORE_ENTRY "kgsl_memstore" From 35391f7545fc553bdbe71c50d30ce3dcfec35bb4 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Tue, 24 Oct 2023 11:59:15 -0700 Subject: [PATCH 0548/1016] kgsl: gen8: Add support to program noncontext registers For gen8 all noncontext registers should be programmed from KGSL. Hence add required support to program the noncontext registers during gen8 bootup. Change-Id: I8ab81fc39fdc14369da3b3f290d6121f3778579b Signed-off-by: Hareesh Gundu --- adreno-gpulist.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ adreno_gen8.c | 32 ++++++++++++++++++++++++++++++ adreno_gen8.h | 14 +++++++++++++ gen8_reg.h | 2 ++ 4 files changed, 99 insertions(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index a2e730d6a8..13e94204b6 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2511,6 +2511,56 @@ static const struct kgsl_regmap_list gen8_0_0_gbif_cx_regs[] = { { GEN8_GMUCX_MRC_GBIF_QOS_CTRL, 0x33 }, }; +/* GEN8_0_0 noncontext register list */ +static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { + { GEN8_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_CNTLT, 0xf0004000, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_MASKL_0, 0x00000003, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0xffffffff, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0xffffffff, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_BYTEL_0, 0x00000008, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_BYTEL_1, 0x76543210, BIT(PIPE_NONE) }, + { GEN8_GRAS_DBG_ECO_CNTL, 0x00000800, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_AUTO_VERTEX_STRIDE, 0x00000001, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_VIS_STREAM_CNTL, 0x10010000, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1, 0x00000002, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_CHICKEN_BITS_1, 0x00000003, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_CHICKEN_BITS_2, 0x00000200, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_CHICKEN_BITS_3, 0x00500000, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_CHICKEN_BITS_4, 0x00500050, BIT(PIPE_BR) | BIT(PIPE_BV) }, + /* Enable full concurrent resolve and unresolves */ + { GEN8_RB_CCU_CNTL, 0x00000068, BIT(PIPE_BR) }, + { GEN8_RB_GC_GMEM_PROTECT, 0x0c000000, BIT(PIPE_BR) }, + /* Configure number of outstanding transactions to 32 */ + { GEN8_RB_RESOLVE_PREFETCH_CNTL, 0x00000007, BIT(PIPE_BR) }, + /* Disable ubwc merged UFC request feature */ + { GEN8_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) }, + { GEN8_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + { GEN8_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + { GEN8_SP_CHICKEN_BITS_2, 0x00800000, BIT(PIPE_NONE) }, + { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, + { GEN8_SP_READ_SEL, 0x0001ff00, BIT(PIPE_NONE) }, + { GEN8_TPL1_DBG_ECO_CNTL, 0x10000000, BIT(PIPE_NONE) }, + /* Enable cubemap small miplevel optimization settings */ + { GEN8_TPL1_DBG_ECO_CNTL1, 0x00000724, BIT(PIPE_NONE) }, + /* Disable tag bank id hashing */ + { GEN8_UCHE_MODE_CNTL, 0x000a0000, BIT(PIPE_NONE) }, + { GEN8_UCHE_CCHE_MODE_CNTL, 0x00001000, BIT(PIPE_NONE) }, + /* Limit gmem number of ways for GMEM requests in each set */ + { GEN8_UCHE_CCHE_CACHE_WAYS, 0x00000800, BIT(PIPE_NONE)}, + { GEN8_UCHE_CACHE_WAYS, 0x00080000, BIT(PIPE_NONE) }, + /* Configure UCHE to CCU switchthreshold timeout cycles */ + { GEN8_UCHE_VARB_IDLE_TIMEOUT, 0x00000020, BIT(PIPE_NONE) }, + { GEN8_VFD_DBG_ECO_CNTL, 0x00008000, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VFD_CB_BV_THRESHOLD, 0x00500050, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VFD_CB_BR_THRESHOLD, 0x00600060, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VFD_CB_BUSY_REQ_CNT, 0x00200020, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VFD_CB_LP_REQ_CNT, 0x00000020, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VPC_FLATSHADE_MODE_CNTL, 0x00000001, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VSC_BIN_SIZE, 0x00010001, BIT(PIPE_NONE) }, + { 0 }, +}; + /* GEN8_0_0 protected register list */ static const struct gen8_protected_regs gen8_0_0_protected_regs[] = { { GEN8_CP_PROTECT_REG_GLOBAL + 0, 0x00000, 0x003a3, 0 }, @@ -2592,6 +2642,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .gbif_count = ARRAY_SIZE(gen8_0_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen8_0_0_protected_regs, + .nonctxt_regs = gen8_0_0_nonctxt_regs, .highest_bank_bit = 16, .gmu_hub_clk_freq = 200000000, }; diff --git a/adreno_gen8.c b/adreno_gen8.c index cb6d54712b..3095efd2e5 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -399,6 +399,35 @@ static void gen8_protect_init(struct adreno_device *adreno_dev) PIPE_LPAC, 0, 0); } +static void gen8_nonctxt_regconfig(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + const struct gen8_nonctxt_regs *regs = gen8_core->nonctxt_regs; + u32 i, pipe_id; + unsigned long pipe; + + /* Program non context registers for all pipes */ + for (pipe_id = PIPE_NONE; pipe_id <= PIPE_AQE1; pipe_id++) { + + if ((pipe_id == PIPE_LPAC) && !ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) + continue; + else if (((pipe_id == PIPE_AQE0) || (pipe_id == PIPE_AQE1)) && + !ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) + continue; + + for (i = 0; regs[i].offset; i++) { + pipe = (unsigned long)regs[i].pipelines; + if (test_bit(pipe_id, &pipe)) + gen8_regwrite_aperture(device, regs[i].offset, + regs[i].val, pipe_id, 0, 0); + } + } + + /* Clear aperture register */ + gen8_host_aperture_set(adreno_dev, 0, 0, 0); +} + #define RBBM_CLOCK_CNTL_ON 0x8aa8aa82 static void gen8_hwcg_set(struct adreno_device *adreno_dev, bool on) @@ -740,6 +769,9 @@ int gen8_start(struct adreno_device *adreno_dev) kgsl_regmap_multi_write(&device->regmap, gen8_0_0_bicubic_regs, ARRAY_SIZE(gen8_0_0_bicubic_regs)); + /* Program noncontext registers */ + gen8_nonctxt_regconfig(adreno_dev); + /* Enable hardware hang detection */ kgsl_regwrite(device, GEN8_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | FIELD_PREP(GENMASK(27, 0), gen8_core->hang_detect_cycles)); diff --git a/adreno_gen8.h b/adreno_gen8.h index 9cd8daa2b5..ae39acba61 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -58,6 +58,18 @@ struct gen8_protected_regs { u32 noaccess; }; +/** + * struct gen8_nonctxt_regs - Container for non context registers span + */ +struct gen8_nonctxt_regs { + /** @offset: Dword offset of the register to write */ + u32 offset; + /** @val: Value to write */ + u32 val; + /** @pipelines: pipelines to write */ + u32 pipelines; +}; + /** * struct adreno_gen8_core - gen8 specific GPU core definitions */ @@ -86,6 +98,8 @@ struct adreno_gen8_core { u32 hang_detect_cycles; /** @protected_regs: Array of protected registers for the target */ const struct gen8_protected_regs *protected_regs; + /** @nonctxt_regs: Array of non context register list */ + const struct gen8_nonctxt_regs *nonctxt_regs; /** @ctxt_record_size: Size of the preemption record in bytes */ u64 ctxt_record_size; /** @highest_bank_bit: Highest bank bit value */ diff --git a/gen8_reg.h b/gen8_reg.h index 9ed1f9a436..4cc82bab23 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -429,6 +429,7 @@ #define GEN8_CP_RB_RPTR_ADDR_HI_LPAC 0x810 #define GEN8_CP_RB_RPTR_LPAC 0x811 #define GEN8_CP_RB_WPTR_LPAC 0x812 +#define GEN8_CP_SMMU_STREAM_ID_LPAC 0x814 #define GEN8_CP_SQE_CNTL 0x815 #define GEN8_CP_SQE_INSTR_BASE_LO 0x816 #define GEN8_CP_SQE_INSTR_BASE_HI 0x817 @@ -690,6 +691,7 @@ /* VFD registers */ +#define GEN8_VFD_DBG_ECO_CNTL 0xa600 #define GEN8_VFD_PERFCTR_VFD_SEL_0 0xa610 #define GEN8_VFD_PERFCTR_VFD_SEL_1 0xa611 #define GEN8_VFD_PERFCTR_VFD_SEL_2 0xa612 From eb8b845bddcceabb0935b7d07c3bf041c0ec3d85 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 30 Oct 2023 23:52:43 -0700 Subject: [PATCH 0549/1016] kgsl: gen8: Add cx gdsc notifier for rgmu and non-gmu targets This change ported from gen7 to gen8 from below commit 2864d0656c05 ("msm: kgsl: Add cx gdsc notifier for rgmu and non-gmu targets"). Change-Id: I1f9351f006aa109c4f159e175431bf6a0b61ffcf Signed-off-by: Hareesh Gundu --- adreno_gen8.c | 1 + adreno_gen8_gmu.c | 112 +++++------------------------------------- adreno_gen8_gmu.h | 31 ------------ adreno_gen8_hwsched.c | 10 ++-- 4 files changed, 17 insertions(+), 137 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 3095efd2e5..02c6459dd3 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1677,6 +1677,7 @@ int gen8_probe_common(struct platform_device *pdev, adreno_dev->uche_client_pf = 1; kgsl_pwrscale_fast_bus_hint(gen8_core->fast_bus_hint); + device->pwrctrl.cx_gdsc_offset = GEN8_GPU_CC_CX_GDSCR; ret = adreno_device_probe(pdev, adreno_dev); if (ret) diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index fcea42776e..cd4603f448 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -6,14 +6,12 @@ #include #include -#include #include #include #include #include #include #include -#include #include #include #include @@ -337,40 +335,6 @@ static void gmu_ao_sync_event(struct adreno_device *adreno_dev) local_irq_restore(flags); } -int gen8_gmu_enable_gdsc(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - int ret; - - ret = wait_for_completion_timeout(&gmu->gdsc_gate, msecs_to_jiffies(5000)); - if (!ret) { - dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); - /* Dump the cx regulator consumer list */ - qcom_clk_dump(NULL, gmu->cx_gdsc, false); - } - - ret = regulator_enable(gmu->cx_gdsc); - if (ret) - dev_err(&gmu->pdev->dev, - "Failed to enable GMU CX gdsc, error %d\n", ret); - - kgsl_mmu_send_tlb_hint(&device->mmu, false); - clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - return ret; -} - -void gen8_gmu_disable_gdsc(struct adreno_device *adreno_dev) -{ - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - kgsl_mmu_send_tlb_hint(&device->mmu, true); - reinit_completion(&gmu->gdsc_gate); - set_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - regulator_disable(gmu->cx_gdsc); -} - int gen8_gmu_device_start(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -1455,6 +1419,7 @@ static void gen8_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) int ret = 0; struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; /* Disconnect GPU from BUS is not needed if CX GDSC goes off later */ @@ -1492,19 +1457,19 @@ static void gen8_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) * the GX HS. This code path is the only client voting for GX through * the regulator interface. */ - if (gmu->gx_gdsc) { + if (pwr->gx_gdsc) { if (gen8_gmu_gx_is_on(adreno_dev)) { /* Switch gx gdsc control from GMU to CPU * force non-zero reference count in clk driver * so next disable call will turn * off the GDSC */ - ret = regulator_enable(gmu->gx_gdsc); + ret = regulator_enable(pwr->gx_gdsc); if (ret) dev_err(&gmu->pdev->dev, "suspend fail: gx enable %d\n", ret); - ret = regulator_disable(gmu->gx_gdsc); + ret = regulator_disable(pwr->gx_gdsc); if (ret) dev_err(&gmu->pdev->dev, "suspend fail: gx disable %d\n", ret); @@ -1559,7 +1524,7 @@ void gen8_gmu_suspend(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); - gen8_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen8_rdpm_cx_freq_update(gmu, 0); @@ -1896,7 +1861,7 @@ static int gen8_gmu_first_boot(struct adreno_device *adreno_dev) gen8_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); - ret = gen8_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -1976,7 +1941,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen8_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen8_rdpm_cx_freq_update(gmu, 0); @@ -1991,7 +1956,7 @@ static int gen8_gmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = gen8_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -2041,7 +2006,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen8_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen8_rdpm_cx_freq_update(gmu, 0); @@ -2400,61 +2365,6 @@ static void gen8_gmu_rdpm_probe(struct gen8_gmu_device *gmu, res->start, resource_size(res)); } -static int gmu_cx_gdsc_event(struct notifier_block *nb, - unsigned long event, void *data) -{ - struct gen8_gmu_device *gmu = container_of(nb, struct gen8_gmu_device, gdsc_nb); - struct adreno_device *adreno_dev = gen8_gmu_to_adreno(gmu); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - u32 val; - - if (!(event & REGULATOR_EVENT_DISABLE) || - !test_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags)) - return 0; - - if (kgsl_regmap_read_poll_timeout(&device->regmap, GEN8_GPU_CC_CX_GDSCR, - val, !(val & BIT(31)), 100, 100 * 1000)) - dev_err(device->dev, "GPU CX wait timeout.\n"); - - clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - complete_all(&gmu->gdsc_gate); - - return 0; -} - -static int gen8_gmu_regulators_probe(struct gen8_gmu_device *gmu, - struct platform_device *pdev) -{ - int ret; - - gmu->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); - if (IS_ERR(gmu->cx_gdsc)) { - if (PTR_ERR(gmu->cx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vddcx gdsc\n"); - return PTR_ERR(gmu->cx_gdsc); - } - - gmu->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); - if (IS_ERR(gmu->gx_gdsc)) { - if (PTR_ERR(gmu->gx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vdd gdsc\n"); - return PTR_ERR(gmu->gx_gdsc); - } - - init_completion(&gmu->gdsc_gate); - complete_all(&gmu->gdsc_gate); - - gmu->gdsc_nb.notifier_call = gmu_cx_gdsc_event; - ret = devm_regulator_register_notifier(gmu->cx_gdsc, &gmu->gdsc_nb); - - if (ret) { - dev_err(&pdev->dev, "Failed to register gmu cx gdsc notifier: %d\n", ret); - return ret; - } - - return 0; -} - void gen8_gmu_remove(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); @@ -2565,7 +2475,7 @@ int gen8_gmu_probe(struct kgsl_device *device, gen8_gmu_rdpm_probe(gmu, device); /* Set up GMU regulators */ - ret = gen8_gmu_regulators_probe(gmu, pdev); + ret = kgsl_pwrctrl_probe_regulators(device, pdev); if (ret) return ret; @@ -2731,7 +2641,7 @@ static int gen8_gmu_power_off(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); - gen8_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen8_rdpm_cx_freq_update(gmu, 0); diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index 1ff5aa23d2..01829d7fef 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -25,9 +25,6 @@ * @num_bwlevel: number of GPU BW levels * @num_cnocbwlevel: number CNOC BW levels * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling - * @cx_gdsc: CX headswitch that controls power of GMU and - * subsystem peripherals - * @gx_gdsc: GX headswitch that controls power of GPU subsystem * @clks: GPU subsystem clocks required for GMU functionality * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different * than default power level @@ -55,8 +52,6 @@ struct gen8_gmu_device { struct kgsl_memdesc *gpu_boot_scratch; struct gen8_hfi hfi; /** @pwrlevels: Array of GMU power levels */ - struct regulator *cx_gdsc; - struct regulator *gx_gdsc; struct clk_bulk_data *clks; /** @num_clks: Number of entries in the @clks array */ int num_clks; @@ -103,10 +98,6 @@ struct gen8_gmu_device { u32 num_oob_perfcntr; /** @acd_debug_val: DVM value to calibrate ACD for a level */ u32 acd_debug_val; - /** @gdsc_nb: Notifier block for cx gdsc regulator */ - struct notifier_block gdsc_nb; - /** @gdsc_gate: Completion to signal cx gdsc collapse status */ - struct completion gdsc_gate; /** @stats_enable: GMU stats feature enable */ bool stats_enable; /** @stats_mask: GMU performance countables to enable */ @@ -299,14 +290,6 @@ int gen8_gmu_memory_init(struct adreno_device *adreno_dev); */ void gen8_gmu_aop_send_acd_state(struct gen8_gmu_device *gmu, bool flag); -/** - * gen8_gmu_enable_clocks - Enable gmu clocks - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int gen8_gmu_enable_gdsc(struct adreno_device *adreno_dev); - /** * gen8_gmu_load_fw - Load gmu firmware * @adreno_dev: Pointer to the adreno device @@ -473,20 +456,6 @@ void gen8_gmu_remove(struct kgsl_device *device); */ int gen8_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level); -/** - * gen8_gmu_enable_gdsc - Enable gmu gdsc - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int gen8_gmu_enable_gdsc(struct adreno_device *adreno_dev); - -/** - * gen8_gmu_disable_gdsc - Disable gmu gdsc - * @adreno_dev: Pointer to the adreno device - */ -void gen8_gmu_disable_gdsc(struct adreno_device *adreno_dev); - /** * gen8_gmu_handle_watchdog - Handle watchdog interrupt * @adreno_dev: Pointer to the adreno device diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 5401cc3e53..feee8bafff 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -92,7 +92,7 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) gen8_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); - ret = gen8_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -179,7 +179,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen8_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen8_rdpm_cx_freq_update(gmu, 0); @@ -194,7 +194,7 @@ static int gen8_hwsched_gmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = gen8_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -246,7 +246,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen8_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen8_rdpm_cx_freq_update(gmu, 0); @@ -334,7 +334,7 @@ static int gen8_hwsched_gmu_power_off(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); - gen8_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen8_rdpm_cx_freq_update(gmu, 0); From c1378410cdb06d67f00ebfe05d5e9c4f1cc90a35 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Thu, 7 Sep 2023 22:21:33 -0700 Subject: [PATCH 0550/1016] kgsl: gen8: Fix HFI sequence number wrap issue This change ports commit f572cc31f20e ("kgsl: hwsched: Fix HFI sequence number wrap issue") from gen7 to gen8. Change-Id: I80a0ec1b91bd266cfe6940802de868c27bec841e Signed-off-by: Hareesh Gundu --- adreno_gen8_hfi.c | 6 +-- adreno_gen8_hwsched_hfi.c | 77 +++++++++++++++++---------------------- adreno_gen8_hwsched_hfi.h | 2 + 3 files changed, 38 insertions(+), 47 deletions(-) diff --git a/adreno_gen8_hfi.c b/adreno_gen8_hfi.c index b289ad076a..3d78353c4d 100644 --- a/adreno_gen8_hfi.c +++ b/adreno_gen8_hfi.c @@ -126,8 +126,6 @@ int gen8_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx, if (empty_space <= align_size) return -ENOSPC; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - for (i = 0; i < size_dwords; i++) { queue[write_idx] = msg[i]; write_idx = (write_idx + 1) % hdr->queue_size; @@ -246,7 +244,7 @@ int gen8_receive_ack_cmd(struct gen8_gmu_device *gmu, void *rcvd, if (ret_cmd == NULL) return -EINVAL; - if (HDR_CMP_SEQNUM(ret_cmd->sent_hdr, req_hdr)) { + if (CMP_HFI_ACK_HDR(ret_cmd->sent_hdr, req_hdr)) { memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2); return 0; } @@ -316,7 +314,7 @@ static int gen8_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, struct gen8_hfi *hfi = &gmu->hfi; u32 seqnum = atomic_inc_return(&hfi->seqnum); - *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + *cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2); if (ret_cmd == NULL) return gen8_hfi_cmdq_write(adreno_dev, cmd, size_bytes); diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index ae6d9f59b2..531122964f 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -127,7 +127,7 @@ static void gen8_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) read_lock(&hfi->msglock); list_for_each_entry(cmd, &hfi->msglist, node) { - if (HDR_CMP_SEQNUM(cmd->sent_hdr, req_hdr)) { + if (CMP_HFI_ACK_HDR(cmd->sent_hdr, req_hdr)) { memcpy(cmd->results, ack, min_t(u32, size_bytes, sizeof(cmd->results))); @@ -837,7 +837,6 @@ static void gen8_syncobj_query_reply(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj, struct hfi_syncobj_query_cmd *cmd) { struct hfi_syncobj_query_cmd reply = {0}; - struct gen8_hfi *hfi = to_gen8_hfi(adreno_dev); int i, j, fence_index = 0; struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); @@ -869,8 +868,6 @@ static void gen8_syncobj_query_reply(struct adreno_device *adreno_dev, } reply.hdr = CREATE_MSG_HDR(F2H_MSG_SYNCOBJ_QUERY, HFI_MSG_CMD); - reply.hdr = MSG_HDR_SET_SEQNUM(reply.hdr, - atomic_inc_return(&hfi->seqnum)); reply.gmu_ctxt_id = cmd->gmu_ctxt_id; reply.sync_obj_ts = cmd->sync_obj_ts; @@ -1034,11 +1031,12 @@ static void _increment_hw_fence_unack_count(struct adreno_device *adreno_dev) static int _send_hw_fence_no_ack(struct adreno_device *adreno_dev, struct adreno_hw_fence_entry *entry) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + u32 seqnum; int ret; - entry->cmd.hdr = MSG_HDR_SET_SEQNUM(entry->cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&hfi->hw_fence.seqnum); + entry->cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(entry->cmd.hdr, seqnum, sizeof(entry->cmd) >> 2); ret = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&entry->cmd, sizeof(entry->cmd)); if (!ret) @@ -1203,7 +1201,7 @@ static void process_hw_fence_ack(struct adreno_device *adreno_dev, u32 received_ /* If this ack is being waited on, we don't need to touch the unack count */ if (gen8_hw_fence_ack.sent_hdr && - HDR_CMP_SEQNUM(gen8_hw_fence_ack.sent_hdr, received_hdr)) { + CMP_HFI_ACK_HDR(gen8_hw_fence_ack.sent_hdr, received_hdr)) { spin_unlock(&hfi->hw_fence.lock); complete(&gen8_hw_fence_ack.complete); return; @@ -1420,11 +1418,12 @@ int gen8_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 si struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); u32 *cmd = data; - u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); + u32 seqnum; int rc; struct pending_cmd pending_ack; - *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + *cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2); add_waiter(hfi, *cmd, &pending_ack); @@ -1724,6 +1723,7 @@ static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) struct hfi_mem_alloc_desc desc = {0}; struct hfi_mem_alloc_reply_cmd out = {0}; struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 seqnum; int ret; hfi_get_mem_alloc_desc(rcvd, &desc); @@ -1736,8 +1736,8 @@ static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) out.hdr = ACK_MSG_HDR(F2H_MSG_MEM_ALLOC); - out.hdr = MSG_HDR_SET_SEQNUM(out.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2); out.req_hdr = *(u32 *)rcvd; @@ -1748,7 +1748,8 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) { struct hfi_gmu_cntr_register_cmd *in = (struct hfi_gmu_cntr_register_cmd *)rcvd; struct hfi_gmu_cntr_register_reply_cmd out = {0}; - u32 lo = 0, hi = 0; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 lo = 0, hi = 0, seqnum; /* * Failure to allocate counter is not fatal. Sending lo = 0, hi = 0 @@ -1758,6 +1759,8 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) in->group_id, in->countable, &lo, &hi, PERFCOUNTER_FLAG_KERNEL); out.hdr = ACK_MSG_HDR(F2H_MSG_GMU_CNTR_REGISTER); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2); out.req_hdr = in->hdr; out.group_id = in->group_id; out.countable = in->countable; @@ -1770,8 +1773,6 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) static int send_warmboot_start_msg(struct adreno_device *adreno_dev) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); int ret = 0; struct hfi_start_cmd cmd; @@ -1782,8 +1783,6 @@ static int send_warmboot_start_msg(struct adreno_device *adreno_dev) if (ret) return ret; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); - cmd.hdr = RECORD_NOP_MSG_HDR(cmd.hdr); return gen8_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); @@ -1793,17 +1792,17 @@ static int send_start_msg(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); int ret, rc = 0; struct hfi_start_cmd cmd; - u32 rcvd[MAX_RCVD_SIZE]; + u32 seqnum, rcvd[MAX_RCVD_SIZE]; struct pending_cmd pending_ack = {0}; ret = CMD_MSG_HDR(cmd, H2F_MSG_START); if (ret) return ret; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); pending_ack.sent_hdr = cmd.hdr; @@ -2022,14 +2021,15 @@ u32 gen8_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop) struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); struct pending_cmd pending_ack; + u32 seqnum; int rc; rc = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE); if (rc) return 0; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); cmd.type = prop; cmd.subtype = 0; @@ -2169,6 +2169,7 @@ static int gen8_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev, .flags = HFI_WARMBOOT_EXEC_SCRATCH, }; int ret = 0; + u32 seqnum; if (!adreno_dev->warmboot_enabled) return 0; @@ -2177,9 +2178,8 @@ static int gen8_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev, if (ret) return ret; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); - + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); add_waiter(hfi, cmd.hdr, ret_cmd); ret = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd)); @@ -3024,8 +3024,6 @@ int gen8_gmu_context_queue_write(struct adreno_device *adreno_dev, if (!IS_ALIGNED(size_bytes, sizeof(u32))) return -EINVAL; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - for (i = 0; i < size_dwords; i++) { queue[write_idx] = msg[i]; write_idx = (write_idx + 1) % hdr->queue_size; @@ -3123,9 +3121,8 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj, void *cmdbuf) { struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); - struct gen8_hfi *hfi = to_gen8_hfi(adreno_dev); int i, j; - u32 cmd_sizebytes; + u32 cmd_sizebytes, seqnum; struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); struct hfi_submit_syncobj *cmd; struct hfi_syncobj *obj = NULL; @@ -3208,8 +3205,8 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, cmd->timestamp = drawobj->timestamp; cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_SYNCOBJ, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); + seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); + cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); return gen8_gmu_context_queue_write(adreno_dev, drawctxt, (u32 *)cmd, cmd_sizebytes, drawobj, NULL); @@ -3351,6 +3348,7 @@ int gen8_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret = 0; + u32 seqnum; /* Device mutex is necessary to ensure only one hardware fence ack is being waited for */ if (WARN_ON(!mutex_is_locked(&device->mutex))) @@ -3361,8 +3359,7 @@ int gen8_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, init_completion(&gen8_hw_fence_ack.complete); entry->cmd.flags |= flags; - entry->cmd.hdr = MSG_HDR_SET_SEQNUM(entry->cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&hfi->hw_fence.seqnum); gen8_hw_fence_ack.sent_hdr = entry->cmd.hdr; @@ -3688,8 +3685,6 @@ static int gen8_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 q if (empty_space <= align_size) return -ENOSPC; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - write = hdr->write_index; for (i = 0; i < size_dwords; i++) { @@ -3725,9 +3720,8 @@ static int gen8_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 q int gen8_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { - struct gen8_hfi *hfi = to_gen8_hfi(adreno_dev); int ret = 0; - u32 cmd_sizebytes; + u32 cmd_sizebytes, seqnum; struct kgsl_drawobj_cmd *cmdobj = NULL; struct hfi_submit_cmd *cmd; struct adreno_submit_time time = {0}; @@ -3808,8 +3802,8 @@ skipib: adreno_drawobj_set_constraint(KGSL_DEVICE(adreno_dev), drawobj); cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); + seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); + cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); if (adreno_hwsched_context_queue_enabled(adreno_dev)) ret = gen8_gmu_context_queue_write(adreno_dev, @@ -3842,7 +3836,6 @@ int gen8_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, { struct adreno_hwsched *hwsched = &adreno_dev->hwsched; struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); - struct gen8_hfi *hfi = to_gen8_hfi(adreno_dev); struct hfi_submit_cmd *cmd; struct kgsl_memobj_node *ib; u32 cmd_sizebytes; @@ -3900,8 +3893,6 @@ int gen8_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, } cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_RECURRING_CMD, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); ret = gen8_hfi_send_cmd_async(adreno_dev, cmd, sizeof(*cmd)); @@ -4008,7 +3999,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, cmd.ts = ts, seqnum = atomic_inc_return(&gmu->hfi.seqnum); - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); add_waiter(hfi, cmd.hdr, &pending_ack); diff --git a/adreno_gen8_hwsched_hfi.h b/adreno_gen8_hwsched_hfi.h index 73ab8f397f..17bf380048 100644 --- a/adreno_gen8_hwsched_hfi.h +++ b/adreno_gen8_hwsched_hfi.h @@ -83,6 +83,8 @@ struct gen8_hwsched_hfi { * @flags: Flags to control the creation of new hardware fences */ unsigned long flags; + /** @seqnum: Sequence number for hardware fence packet header */ + atomic_t seqnum; } hw_fence; /** * @hw_fence_timer: Timer to trigger fault if unack'd hardware fence count does'nt drop From 21105257553938c8d874f6eeba5db20b8da3d4fe Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Wed, 11 Oct 2023 23:12:39 -0700 Subject: [PATCH 0551/1016] kgsl: Use macro while defining gpurev enum values For gen7 and later version gpurev values embeds major, minor and patchid. Use the macros to set the gpurev values to align major, minor version numbers with hardware specification. Change-Id: Ia574d5d0578f391e187363045ae39b764b49d158 Signed-off-by: Hareesh Gundu --- adreno.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/adreno.h b/adreno.h index 6e4d17ee41..884f5bdf4f 100644 --- a/adreno.h +++ b/adreno.h @@ -184,6 +184,9 @@ #define ADRENO_FW_PM4 1 #define ADRENO_FW_AQE 1 +#define ADRENO_GPUREV_VALUE(_major, _minor, _patchid) (((_major & 0xFF) << 16) | \ + ((_minor & 0xFF) << 8) | \ + (_patchid & 0xFF)) enum adreno_gpurev { ADRENO_REV_UNKNOWN = 0, ADRENO_REV_A304 = 304, @@ -229,14 +232,14 @@ enum adreno_gpurev { * Bits 8-15: Minor * Bits 0-7: Patch id */ - ADRENO_REV_GEN7_0_0 = 0x070000, - ADRENO_REV_GEN7_0_1 = 0x070001, - ADRENO_REV_GEN7_2_0 = 0x070200, - ADRENO_REV_GEN7_2_1 = 0x070201, - ADRENO_REV_GEN7_4_0 = 0x070400, - ADRENO_REV_GEN7_9_0 = 0x070900, - ADRENO_REV_GEN7_9_1 = 0x070901, - ADRENO_REV_GEN8_0_0 = 0x080000, + ADRENO_REV_GEN7_0_0 = ADRENO_GPUREV_VALUE(7, 0, 0), + ADRENO_REV_GEN7_0_1 = ADRENO_GPUREV_VALUE(7, 0, 1), + ADRENO_REV_GEN7_2_0 = ADRENO_GPUREV_VALUE(7, 2, 0), + ADRENO_REV_GEN7_2_1 = ADRENO_GPUREV_VALUE(7, 2, 1), + ADRENO_REV_GEN7_4_0 = ADRENO_GPUREV_VALUE(7, 4, 0), + ADRENO_REV_GEN7_9_0 = ADRENO_GPUREV_VALUE(7, 9, 0), + ADRENO_REV_GEN7_9_1 = ADRENO_GPUREV_VALUE(7, 9, 1), + ADRENO_REV_GEN8_0_0 = ADRENO_GPUREV_VALUE(8, 0, 0), }; #define ADRENO_SOFT_FAULT BIT(0) From c70d78b740a3ee7676e195c5eecd48879eeac214 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Fri, 13 Oct 2023 20:45:39 -0700 Subject: [PATCH 0552/1016] kgsl: gen8: Implement adreno_smmu_is_stalled() For Gen8 target SMMU stalled on fault status is not available through the HW register. Hence implement new function to get the smmu stall on fault status. Change-Id: I78aa030b8eb7586f1a6c272613491263fa2e95ba Signed-off-by: Hareesh Gundu --- adreno.c | 25 +++++++++++++++++++++++++ adreno.h | 10 ++++++++++ adreno_a5xx_snapshot.c | 4 +--- adreno_a6xx.h | 15 --------------- adreno_a6xx_gmu.c | 2 +- adreno_a6xx_gmu_snapshot.c | 2 +- adreno_a6xx_hwsched_hfi.c | 2 +- adreno_a6xx_snapshot.c | 4 ++-- adreno_dispatch.c | 14 +++++--------- adreno_gen7.h | 15 --------------- adreno_gen7_gmu.c | 2 +- adreno_gen7_gmu_snapshot.c | 2 +- adreno_gen7_hwsched_hfi.c | 2 +- adreno_gen7_snapshot.c | 2 +- adreno_gen8.h | 13 ------------- adreno_gen8_gmu.c | 2 +- adreno_gen8_hwsched_hfi.c | 2 +- adreno_hwsched.c | 15 +++++++++++---- adreno_hwsched.h | 15 ++++++++------- 19 files changed, 71 insertions(+), 77 deletions(-) diff --git a/adreno.c b/adreno.c index b3e6325e84..c7e8cd434d 100644 --- a/adreno.c +++ b/adreno.c @@ -3232,6 +3232,31 @@ void adreno_mark_for_coldboot(struct adreno_device *adreno_dev) set_bit(ADRENO_DEVICE_FORCE_COLDBOOT, &adreno_dev->priv); } +bool adreno_smmu_is_stalled(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_mmu *mmu = &device->mmu; + u32 fault, val; + + /* + * RBBM_STATUS3:SMMU_STALLED_ON_FAULT (BIT 24) to tells if GPU + * encoutnered a pagefault. Gen8 page fault status checked from + * the software condition as RBBM_STATS3 is not available. + */ + if (ADRENO_GPUREV(adreno_dev) < ADRENO_REV_GEN8_0_0) { + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS3, &val); + return (val & BIT(24)); + } + + if (WARN_ON(!adreno_dev->dispatch_ops || !adreno_dev->dispatch_ops->get_fault)) + return false; + + fault = adreno_dev->dispatch_ops->get_fault(adreno_dev); + + return ((fault & ADRENO_IOMMU_PAGE_FAULT) && + test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &mmu->pfpolicy)) ? true : false; +} + int adreno_power_cycle(struct adreno_device *adreno_dev, void (*callback)(struct adreno_device *adreno_dev, void *priv), void *priv) diff --git a/adreno.h b/adreno.h index 884f5bdf4f..3971a40d52 100644 --- a/adreno.h +++ b/adreno.h @@ -494,6 +494,8 @@ struct adreno_dispatch_ops { void (*fault)(struct adreno_device *adreno_dev, u32 fault); /* @create_hw_fence: Create a hardware fence */ void (*create_hw_fence)(struct adreno_device *adreno_dev, struct kgsl_sync_fence *kfence); + /* @get_fault: Get the GPU fault status */ + u32 (*get_fault)(struct adreno_device *adreno_dev); }; /** @@ -1962,4 +1964,12 @@ int adreno_verify_cmdobj(struct kgsl_device_private *dev_priv, * */ void adreno_mark_for_coldboot(struct adreno_device *adreno_dev); + +/** + * adreno_smmu_is_stalled() - Check whether smmu is stalled or not + * @device: Pointer to adreno device + * + * Return - True if smmu is stalled or false otherwise + */ +bool adreno_smmu_is_stalled(struct adreno_device *adreno_dev); #endif /*__ADRENO_H */ diff --git a/adreno_a5xx_snapshot.c b/adreno_a5xx_snapshot.c index 0187097e84..232319a61e 100644 --- a/adreno_a5xx_snapshot.c +++ b/adreno_a5xx_snapshot.c @@ -753,7 +753,6 @@ static void _a5xx_do_crashdump(struct kgsl_device *device) { unsigned long wait_time; unsigned int reg = 0; - unsigned int val; crash_dump_valid = false; @@ -764,8 +763,7 @@ static void _a5xx_do_crashdump(struct kgsl_device *device) return; /* IF the SMMU is stalled we cannot do a crash dump */ - kgsl_regread(device, A5XX_RBBM_STATUS3, &val); - if (val & BIT(24)) + if (adreno_smmu_is_stalled(ADRENO_DEVICE(device))) return; /* Turn on APRIV so we can access the buffers */ diff --git a/adreno_a6xx.h b/adreno_a6xx.h index 48f6345846..35fec861a5 100644 --- a/adreno_a6xx.h +++ b/adreno_a6xx.h @@ -206,21 +206,6 @@ to_a6xx_core(struct adreno_device *adreno_dev) return container_of(core, struct adreno_a6xx_core, base); } -/** - * a6xx_is_smmu_stalled() - Check whether smmu is stalled or not - * @device: Pointer to KGSL device - * - * Return - True if smmu is stalled or false otherwise - */ -static inline bool a6xx_is_smmu_stalled(struct kgsl_device *device) -{ - u32 val; - - kgsl_regread(device, A6XX_RBBM_STATUS3, &val); - - return val & BIT(24); -} - /* Preemption functions */ void a6xx_preemption_trigger(struct adreno_device *adreno_dev, bool atomic); void a6xx_preemption_schedule(struct adreno_device *adreno_dev); diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 5ac165f8b1..b13ca9c9a4 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -2111,7 +2111,7 @@ void a6xx_gmu_send_nmi(struct kgsl_device *device, bool force) * Do not send NMI if the SMMU is stalled because GMU will not be able * to save cm3 state to DDR. */ - if (a6xx_gmu_gx_is_on(adreno_dev) && a6xx_is_smmu_stalled(device)) { + if (a6xx_gmu_gx_is_on(adreno_dev) && adreno_smmu_is_stalled(adreno_dev)) { dev_err(&gmu->pdev->dev, "Skipping NMI because SMMU is stalled\n"); return; diff --git a/adreno_a6xx_gmu_snapshot.c b/adreno_a6xx_gmu_snapshot.c index 9e0c73aabe..40fde7369d 100644 --- a/adreno_a6xx_gmu_snapshot.c +++ b/adreno_a6xx_gmu_snapshot.c @@ -457,7 +457,7 @@ void a6xx_gmu_device_snapshot(struct kgsl_device *device, ARRAY_SIZE(a6xx_gmu_gx_registers) / 2); /* A stalled SMMU can lead to NoC timeouts when host accesses DTCM */ - if (a6xx_is_smmu_stalled(device)) { + if (adreno_smmu_is_stalled(adreno_dev)) { dev_err(&gmu->pdev->dev, "Not dumping dtcm because SMMU is stalled\n"); return; diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 3992cea8e9..5e3d21cfef 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -1937,7 +1937,7 @@ int a6xx_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, int ret; static bool active; - if (adreno_gpu_halt(adreno_dev) || hwsched_in_fault(hwsched)) + if (adreno_gpu_halt(adreno_dev) || adreno_hwsched_gpu_fault(adreno_dev)) return -EBUSY; if (test_bit(CMDOBJ_RECURRING_STOP, &cmdobj->priv)) { diff --git a/adreno_a6xx_snapshot.c b/adreno_a6xx_snapshot.c index d6fe0ecab5..b5ca76686a 100644 --- a/adreno_a6xx_snapshot.c +++ b/adreno_a6xx_snapshot.c @@ -1593,7 +1593,7 @@ static void _a6xx_do_crashdump(struct kgsl_device *device) return; /* IF the SMMU is stalled we cannot do a crash dump */ - if (a6xx_is_smmu_stalled(device)) + if (adreno_smmu_is_stalled(ADRENO_DEVICE(device))) return; /* Turn on APRIV for legacy targets so we can access the buffers */ @@ -1915,7 +1915,7 @@ void a6xx_snapshot(struct adreno_device *adreno_dev, /* Shader memory */ a6xx_snapshot_shader(device, snapshot); - if (!a6xx_is_smmu_stalled(device)) + if (!adreno_smmu_is_stalled(adreno_dev)) memset(a6xx_crashdump_registers->hostptr, 0xaa, a6xx_crashdump_registers->size); } diff --git a/adreno_dispatch.c b/adreno_dispatch.c index 834e2291eb..b5e5ed4b02 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -1915,17 +1915,12 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) gx_on = adreno_gx_is_on(adreno_dev); /* - * Read RBBM_STATUS3:SMMU_STALLED_ON_FAULT (BIT 24) to tell if this - * function was entered after a pagefault. If so, only proceed if the - * fault handler has already run in the IRQ thread, else return early - * to give the fault handler a chance to run. + * Check if this function was entered after a pagefault. If so, only + * proceed if the fault handler has already run in the IRQ thread, + * else return early to give the fault handler a chance to run. */ if (!(fault & ADRENO_IOMMU_PAGE_FAULT) && gx_on) { - unsigned int val; - - /* FIXME: Use adreno_is_smmu_stalled() for Gen8 */ - adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS3, &val); - if (val & BIT(24)) { + if (adreno_smmu_is_stalled(adreno_dev)) { mutex_unlock(&device->mutex); dev_err(device->dev, "SMMU is stalled without a pagefault\n"); @@ -2689,6 +2684,7 @@ static const struct adreno_dispatch_ops swsched_ops = { .setup_context = adreno_dispatcher_setup_context, .queue_context = adreno_dispatcher_queue_context, .fault = adreno_dispatcher_fault, + .get_fault = adreno_gpu_fault, }; /** diff --git a/adreno_gen7.h b/adreno_gen7.h index 31111f0116..5834a836f0 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -231,21 +231,6 @@ to_gen7_core(struct adreno_device *adreno_dev) return container_of(core, struct adreno_gen7_core, base); } -/** - * gen7_is_smmu_stalled() - Check whether smmu is stalled or not - * @device: Pointer to KGSL device - * - * Return - True if smmu is stalled or false otherwise - */ -static inline bool gen7_is_smmu_stalled(struct kgsl_device *device) -{ - u32 val; - - kgsl_regread(device, GEN7_RBBM_STATUS3, &val); - - return val & BIT(24); -} - /* Preemption functions */ void gen7_preemption_trigger(struct adreno_device *adreno_dev, bool atomic); void gen7_preemption_schedule(struct adreno_device *adreno_dev); diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 6b386e8d98..49f0257702 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1682,7 +1682,7 @@ void gen7_gmu_send_nmi(struct kgsl_device *device, bool force) * Do not send NMI if the SMMU is stalled because GMU will not be able * to save cm3 state to DDR. */ - if (gen7_gmu_gx_is_on(adreno_dev) && gen7_is_smmu_stalled(device)) { + if (gen7_gmu_gx_is_on(adreno_dev) && adreno_smmu_is_stalled(adreno_dev)) { dev_err(&gmu->pdev->dev, "Skipping NMI because SMMU is stalled\n"); return; diff --git a/adreno_gen7_gmu_snapshot.c b/adreno_gen7_gmu_snapshot.c index bd4df95184..dede529959 100644 --- a/adreno_gen7_gmu_snapshot.c +++ b/adreno_gen7_gmu_snapshot.c @@ -285,7 +285,7 @@ static void gen7_gmu_device_snapshot(struct kgsl_device *device, * A stalled SMMU can lead to NoC timeouts when host accesses DTCM. * DTCM can be read through side-band DBGC interface on gen7_2_x family. */ - if (gen7_is_smmu_stalled(device) && !adreno_is_gen7_2_x_family(adreno_dev)) { + if (adreno_smmu_is_stalled(adreno_dev) && !adreno_is_gen7_2_x_family(adreno_dev)) { dev_err(&gmu->pdev->dev, "Not dumping dtcm because SMMU is stalled\n"); return; diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 6daf926268..10ede8e356 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -3866,7 +3866,7 @@ int gen7_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, int ret; static bool active; - if (adreno_gpu_halt(adreno_dev) || hwsched_in_fault(hwsched)) + if (adreno_gpu_halt(adreno_dev) || adreno_hwsched_gpu_fault(adreno_dev)) return -EBUSY; if (test_bit(CMDOBJ_RECURRING_STOP, &cmdobj->priv)) { diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 51aa8fd31a..2fd728852c 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -142,7 +142,7 @@ static void CD_FINISH(u64 *ptr, u32 offset) static bool CD_SCRIPT_CHECK(struct kgsl_device *device) { - return (gen7_is_smmu_stalled(device) || (!device->snapshot_crashdumper) || + return (adreno_smmu_is_stalled(ADRENO_DEVICE(device)) || (!device->snapshot_crashdumper) || IS_ERR_OR_NULL(gen7_capturescript) || IS_ERR_OR_NULL(gen7_crashdump_registers) || gen7_crashdump_timedout); diff --git a/adreno_gen8.h b/adreno_gen8.h index ae39acba61..444bb2bb41 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -236,19 +236,6 @@ to_gen8_core(struct adreno_device *adreno_dev) return container_of(core, struct adreno_gen8_core, base); } -/** - * gen8_is_smmu_stalled() - Check whether smmu is stalled or not - * @device: Pointer to KGSL device - * - * Return - True if smmu is stalled or false otherwise - */ -static inline bool gen8_is_smmu_stalled(struct kgsl_device *device) -{ - - /* FIXME: Implment SW smmu stall check */ - return false; -} - /* Preemption functions */ void gen8_preemption_trigger(struct adreno_device *adreno_dev, bool atomic); void gen8_preemption_schedule(struct adreno_device *adreno_dev); diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index cd4603f448..dbcc054acb 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1639,7 +1639,7 @@ void gen8_gmu_send_nmi(struct kgsl_device *device, bool force) * Do not send NMI if the SMMU is stalled because GMU will not be able * to save cm3 state to DDR. */ - if (gen8_gmu_gx_is_on(adreno_dev) && gen8_is_smmu_stalled(device)) { + if (gen8_gmu_gx_is_on(adreno_dev) && adreno_smmu_is_stalled(adreno_dev)) { dev_err(&gmu->pdev->dev, "Skipping NMI because SMMU is stalled\n"); return; diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 531122964f..bfc65900a3 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -3842,7 +3842,7 @@ int gen8_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, int ret; static bool active; - if (adreno_gpu_halt(adreno_dev) || hwsched_in_fault(hwsched)) + if (adreno_gpu_halt(adreno_dev) || adreno_hwsched_gpu_fault(adreno_dev)) return -EBUSY; if (test_bit(CMDOBJ_RECURRING_STOP, &cmdobj->priv)) { diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 48660a2476..b057237306 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -707,6 +707,13 @@ static inline void _decrement_submit_now(struct kgsl_device *device) spin_unlock(&device->submit_lock); } +u32 adreno_hwsched_gpu_fault(struct adreno_device *adreno_dev) +{ + /* make sure we're reading the latest value */ + smp_rmb(); + return atomic_read(&adreno_dev->hwsched.fault); +} + /** * adreno_hwsched_issuecmds() - Issue commmands from pending contexts * @adreno_dev: Pointer to the adreno device struct @@ -733,7 +740,7 @@ static void adreno_hwsched_issuecmds(struct adreno_device *adreno_dev) goto done; } - if (!hwsched_in_fault(hwsched)) + if (!adreno_hwsched_gpu_fault(adreno_dev)) hwsched_issuecmds(adreno_dev); if (hwsched->inflight > 0) { @@ -2017,6 +2024,7 @@ static const struct adreno_dispatch_ops hwsched_ops = { .queue_context = adreno_hwsched_queue_context, .fault = adreno_hwsched_fault, .create_hw_fence = adreno_hwsched_create_hw_fence, + .get_fault = adreno_hwsched_gpu_fault, }; static void hwsched_lsr_check(struct work_struct *work) @@ -2238,7 +2246,6 @@ static int hwsched_idle(struct adreno_device *adreno_dev) int adreno_hwsched_idle(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_hwsched *hwsched = &adreno_dev->hwsched; unsigned long wait = jiffies + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); int ret; @@ -2254,7 +2261,7 @@ int adreno_hwsched_idle(struct adreno_device *adreno_dev) return ret; do { - if (hwsched_in_fault(hwsched)) + if (adreno_hwsched_gpu_fault(adreno_dev)) return -EDEADLK; if (gpudev->hw_isidle(adreno_dev)) @@ -2266,7 +2273,7 @@ int adreno_hwsched_idle(struct adreno_device *adreno_dev) * without checking if the gpu is idle. check one last time before we * return failure. */ - if (hwsched_in_fault(hwsched)) + if (adreno_hwsched_gpu_fault(adreno_dev)) return -EDEADLK; if (gpudev->hw_isidle(adreno_dev)) diff --git a/adreno_hwsched.h b/adreno_hwsched.h index 14610f5f52..d0b315e49e 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -202,13 +202,6 @@ void adreno_hwsched_unregister_contexts(struct adreno_device *adreno_dev); */ int adreno_hwsched_idle(struct adreno_device *adreno_dev); -static inline bool hwsched_in_fault(struct adreno_hwsched *hwsched) -{ - /* make sure we're reading the latest value */ - smp_rmb(); - return atomic_read(&hwsched->fault) != 0; -} - void adreno_hwsched_retire_cmdobj(struct adreno_hwsched *hwsched, struct kgsl_drawobj_cmd *cmdobj); @@ -252,4 +245,12 @@ void adreno_hwsched_replay(struct adreno_device *adreno_dev); * Return: The value of the key or 0 if key is not found */ u32 adreno_hwsched_parse_payload(struct payload_section *payload, u32 key); + +/** + * adreno_hwsched_gpu_fault - Gets hwsched gpu fault info + * @adreno_dev: pointer to the adreno device + * + * Returns zero for hwsched fault else non zero value + */ +u32 adreno_hwsched_gpu_fault(struct adreno_device *adreno_dev); #endif From 6e125d9f87f1ba2ed68305e4694653c726ce9d74 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Thu, 2 Nov 2023 09:30:07 -0700 Subject: [PATCH 0553/1016] kgsl: build: Add hw_fence_driver compilation to sun This is needed for kgsl to compile on sun until we add synx support. Change-Id: I64ea45de62fee0bff8906b2f01262a6325c18309 Signed-off-by: Harshdeep Dhatt Signed-off-by: Hareesh Gundu --- build/kgsl_defs.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index 709a25f6b2..c8646e39c6 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -85,10 +85,10 @@ def external_deps(target, variant): defconfigs = [] # Add msm_hw_fence in the dependency and defconfig lists for targets that use it - if target in [ "pineapple" ]: + if target in [ "pineapple", "sun" ]: deplist = deplist + [ "//vendor/qcom/opensource/mm-drivers/hw_fence:{}_msm_hw_fence".format(tv), - "//vendor/qcom/opensource/mm-drivers/hw_fence:hw_fence_headers".format(tv) + "//vendor/qcom/opensource/mm-drivers/hw_fence:hw_fence_headers" ] defconfigs = defconfigs + [ "//vendor/qcom/opensource/mm-drivers/hw_fence:defconfig" From 83cac6fa2d43f74134217d67dae0a05b06b4b6e2 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 26 Sep 2023 18:01:19 -0700 Subject: [PATCH 0554/1016] kgsl: gen8: Add perfcounter support Add perfcounter support for gen8 targets. Change-Id: I9e6d08ef57b1ee88753287bdd341bef2e01f0e7b Signed-off-by: Kamal Agrawal Signed-off-by: Hareesh Gundu --- Kbuild | 1 + adreno-gpulist.h | 1 + adreno_gen8.c | 32 +- adreno_gen8.h | 1 + adreno_gen8_perfcounter.c | 1189 +++++++++++++++++++++++++++++++++++++ build/kgsl_defs.bzl | 1 + gen8_reg.h | 637 ++++++++++++++++---- 7 files changed, 1746 insertions(+), 116 deletions(-) create mode 100644 adreno_gen8_perfcounter.c diff --git a/Kbuild b/Kbuild index befd8d9e0d..47363980b3 100644 --- a/Kbuild +++ b/Kbuild @@ -127,6 +127,7 @@ msm_kgsl-y += \ adreno_gen8_hfi.o \ adreno_gen8_hwsched.o \ adreno_gen8_hwsched_hfi.o \ + adreno_gen8_perfcounter.o \ adreno_gen8_preempt.o \ adreno_gen8_ringbuffer.o \ adreno_gen8_rpmh.o \ diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 13e94204b6..b9c3eb609b 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2627,6 +2627,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION, .gpudev = &adreno_gen8_hwsched_gpudev.base, + .perfcounters = &adreno_gen8_hwsched_perfcounters, .uche_gmem_alignment = SZ_16M, .gmem_size = 12 * SZ_1M, .bus_width = 32, diff --git a/adreno_gen8.c b/adreno_gen8.c index 02c6459dd3..026e16469e 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1724,9 +1724,13 @@ static u32 gen8_register_offsets[ADRENO_REG_REGISTER_MAX] = { static u32 _get_pipeid(u32 groupid) { - u32 pipe; - switch (groupid) { + case KGSL_PERFCOUNTER_GROUP_BV_PC: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_BV_VFD: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_BV_VPC: + fallthrough; case KGSL_PERFCOUNTER_GROUP_BV_TSE: fallthrough; case KGSL_PERFCOUNTER_GROUP_BV_RAS: @@ -1734,22 +1738,30 @@ static u32 _get_pipeid(u32 groupid) case KGSL_PERFCOUNTER_GROUP_BV_LRZ: fallthrough; case KGSL_PERFCOUNTER_GROUP_BV_HLSQ: - pipe = PIPE_BV; - break; + return PIPE_BV; + case KGSL_PERFCOUNTER_GROUP_PC: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_VFD: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_HLSQ: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_VPC: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_CCU: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_CMP: + fallthrough; case KGSL_PERFCOUNTER_GROUP_TSE: fallthrough; case KGSL_PERFCOUNTER_GROUP_RAS: fallthrough; case KGSL_PERFCOUNTER_GROUP_LRZ: fallthrough; - case KGSL_PERFCOUNTER_GROUP_HLSQ: - pipe = PIPE_BR; - break; + case KGSL_PERFCOUNTER_GROUP_RB: + return PIPE_BR; default: - pipe = PIPE_NONE; + return PIPE_NONE; } - - return pipe; } int gen8_perfcounter_remove(struct adreno_device *adreno_dev, diff --git a/adreno_gen8.h b/adreno_gen8.h index 444bb2bb41..d5aee01180 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -18,6 +18,7 @@ struct gen8_snapshot_block_list; extern const struct adreno_power_ops gen8_gmu_power_ops; extern const struct adreno_power_ops gen8_hwsched_power_ops; extern const struct adreno_perfcounters adreno_gen8_perfcounters; +extern const struct adreno_perfcounters adreno_gen8_hwsched_perfcounters; struct gen8_gpudev { struct adreno_gpudev base; diff --git a/adreno_gen8_perfcounter.c b/adreno_gen8_perfcounter.c new file mode 100644 index 0000000000..7df4e95538 --- /dev/null +++ b/adreno_gen8_perfcounter.c @@ -0,0 +1,1189 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_gen8.h" +#include "adreno_gen8_hwsched_hfi.h" +#include "adreno_perfcounter.h" +#include "adreno_pm4types.h" +#include "kgsl_device.h" + +/* + * For registers that do not get restored on power cycle, read the value and add + * the stored shadow value + */ +static u64 gen8_counter_read_norestore(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, u32 counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 hi, lo; + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + return ((((u64) hi) << 32) | lo) + reg->value; +} + +static int gen8_counter_br_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + u32 counter, u32 countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + int ret = 0; + u32 val = 0; + + kgsl_regread(device, GEN8_CP_APERTURE_CNTL_HOST, &val); + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(13, 12), PIPE_BR)); + + if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) + ret = gen8_perfcounter_update(adreno_dev, reg, true, + FIELD_PREP(GENMASK(13, 12), PIPE_BR)); + else + kgsl_regwrite(device, reg->select, countable); + + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val); + + if (!ret) + reg->value = 0; + + return ret; +} + +static int gen8_counter_bv_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + u32 counter, u32 countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + int ret = 0; + u32 val = 0; + + kgsl_regread(device, GEN8_CP_APERTURE_CNTL_HOST, &val); + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(13, 12), PIPE_BV)); + + if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) + ret = gen8_perfcounter_update(adreno_dev, reg, true, + FIELD_PREP(GENMASK(13, 12), PIPE_BV)); + else + kgsl_regwrite(device, reg->select, countable); + + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val); + + if (!ret) + reg->value = 0; + + return ret; +} + +static int gen8_counter_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + u32 counter, u32 countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + int ret = 0; + + if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) + ret = gen8_perfcounter_update(adreno_dev, reg, true, + FIELD_PREP(GENMASK(13, 12), PIPE_NONE)); + else + kgsl_regwrite(device, reg->select, countable); + + if (!ret) + reg->value = 0; + + return ret; +} + +static int gen8_hwsched_counter_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + u32 counter, u32 countable) +{ + if (!(KGSL_DEVICE(adreno_dev)->state == KGSL_STATE_ACTIVE)) + return gen8_counter_enable(adreno_dev, group, counter, countable); + + return gen8_hwsched_counter_inline_enable(adreno_dev, group, counter, countable); +} + +/* This function is specific to sw-scheduler and not applicable for hw-scheduler */ +static int gen8_counter_inline_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + u32 counter, u32 countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0]; + u32 cmds[3]; + int ret; + + /* Fallback when we reach here from GPU initialization sequence */ + if (!(device->state == KGSL_STATE_ACTIVE)) + return gen8_counter_enable(adreno_dev, group, counter, + countable); + + if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) + gen8_perfcounter_update(adreno_dev, reg, false, + FIELD_PREP(GENMASK(13, 12), PIPE_NONE)); + + cmds[0] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + cmds[1] = cp_type4_packet(reg->select, 1); + cmds[2] = countable; + + /* submit to highest priority RB always */ + ret = gen8_ringbuffer_addcmds(adreno_dev, rb, NULL, + F_NOTPROTECTED, cmds, 3, 0, NULL); + if (ret) + return ret; + + /* + * schedule dispatcher to make sure rb[0] is run, because + * if the current RB is not rb[0] and gpu is idle then + * rb[0] will not get scheduled to run + */ + if (adreno_dev->cur_rb != rb) + adreno_dispatcher_schedule(device); + + /* wait for the above commands submitted to complete */ + ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp, + ADRENO_IDLE_TIMEOUT); + + if (ret) { + /* + * If we were woken up because of cancelling rb events + * either due to soft reset or adreno_stop, ignore the + * error and return 0 here. The perfcounter is already + * set up in software and it will be programmed in + * hardware when we wake up or come up after soft reset, + * by adreno_perfcounter_restore. + */ + if (ret == -EAGAIN) + ret = 0; + else + dev_err_ratelimited(device->dev, + "Perfcounter %s/%u/%u start via commands failed %d\n", + group->name, counter, countable, ret); + } + + if (!ret) + reg->value = 0; + + return ret; +} + +static u64 gen8_counter_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, u32 counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 hi, lo; + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + /* These registers are restored on power resume */ + return (((u64) hi) << 32) | lo; +} + +static int gen8_counter_gbif_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + u32 counter, u32 countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 shift = counter << 3; + u32 select = BIT(counter); + + if (countable > 0xff) + return -EINVAL; + + /* + * Write 1, followed by 0 to CLR register for + * clearing the counter + */ + kgsl_regrmw(device, GEN8_GBIF_PERF_PWR_CNT_CLR, select, select); + kgsl_regrmw(device, GEN8_GBIF_PERF_PWR_CNT_CLR, select, 0); + + /* select the desired countable */ + kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift); + + /* enable counter */ + kgsl_regrmw(device, GEN8_GBIF_PERF_PWR_CNT_EN, select, select); + + reg->value = 0; + return 0; +} + +static int gen8_counter_gbif_pwr_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + u32 counter, u32 countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 shift = counter << 3; + u32 select = BIT(16 + counter); + + if (countable > 0xff) + return -EINVAL; + + /* + * Write 1, followed by 0 to CLR register for + * clearing the counter + */ + kgsl_regrmw(device, GEN8_GBIF_PERF_PWR_CNT_CLR, select, select); + kgsl_regrmw(device, GEN8_GBIF_PERF_PWR_CNT_CLR, select, 0); + + /* select the desired countable */ + kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift); + + /* Enable the counter */ + kgsl_regrmw(device, GEN8_GBIF_PERF_PWR_CNT_EN, select, select); + + reg->value = 0; + return 0; +} + +static int gen8_counter_alwayson_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + u32 counter, u32 countable) +{ + return 0; +} + +static u64 gen8_counter_alwayson_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, u32 counter) +{ + struct adreno_perfcount_register *reg = &group->regs[counter]; + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + return gpudev->read_alwayson(adreno_dev) + reg->value; +} + +static void gen8_write_gmu_counter_enable(struct kgsl_device *device, + struct adreno_perfcount_register *reg, u32 bit, u32 countable) +{ + kgsl_regrmw(device, reg->select, 0xff << bit, countable << bit); +} + +static int gen8_counter_gmu_pwr_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + u32 counter, u32 countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + /* Four counters can be programmed per select register */ + int offset = counter % 4; + + if (countable > 0xff) + return -EINVAL; + + gen8_write_gmu_counter_enable(device, reg, offset << 3, countable); + + kgsl_regwrite(device, GEN8_GMUCX_POWER_COUNTER_ENABLE, 1); + + reg->value = 0; + return 0; +} + +static int gen8_counter_gmu_perf_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + u32 counter, u32 countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + /* Four counters can be programmed per select register */ + int offset = counter % 4; + + if (countable > 0xff) + return -EINVAL; + + gen8_write_gmu_counter_enable(device, reg, offset << 3, countable); + + kgsl_regwrite(device, GEN8_GMUCX_PERF_COUNTER_ENABLE, 1); + + reg->value = 0; + return 0; +} + +static struct adreno_perfcount_register gen8_perfcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_0_LO, + GEN8_RBBM_PERFCTR_CP_0_HI, -1, GEN8_CP_PERFCTR_CP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_1_LO, + GEN8_RBBM_PERFCTR_CP_1_HI, -1, GEN8_CP_PERFCTR_CP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_2_LO, + GEN8_RBBM_PERFCTR_CP_2_HI, -1, GEN8_CP_PERFCTR_CP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_3_LO, + GEN8_RBBM_PERFCTR_CP_3_HI, -1, GEN8_CP_PERFCTR_CP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_4_LO, + GEN8_RBBM_PERFCTR_CP_4_HI, -1, GEN8_CP_PERFCTR_CP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_5_LO, + GEN8_RBBM_PERFCTR_CP_5_HI, -1, GEN8_CP_PERFCTR_CP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_6_LO, + GEN8_RBBM_PERFCTR_CP_6_HI, -1, GEN8_CP_PERFCTR_CP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_7_LO, + GEN8_RBBM_PERFCTR_CP_7_HI, -1, GEN8_CP_PERFCTR_CP_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_8_LO, + GEN8_RBBM_PERFCTR_CP_8_HI, -1, GEN8_CP_PERFCTR_CP_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_9_LO, + GEN8_RBBM_PERFCTR_CP_9_HI, -1, GEN8_CP_PERFCTR_CP_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_10_LO, + GEN8_RBBM_PERFCTR_CP_10_HI, -1, GEN8_CP_PERFCTR_CP_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_11_LO, + GEN8_RBBM_PERFCTR_CP_11_HI, -1, GEN8_CP_PERFCTR_CP_SEL_11 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_12_LO, + GEN8_RBBM_PERFCTR_CP_12_HI, -1, GEN8_CP_PERFCTR_CP_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_13_LO, + GEN8_RBBM_PERFCTR_CP_13_HI, -1, GEN8_CP_PERFCTR_CP_SEL_13 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_bv_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_CP_0_LO, + GEN8_RBBM_PERFCTR2_CP_0_HI, -1, GEN8_CP_PERFCTR_CP_SEL_14 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_CP_1_LO, + GEN8_RBBM_PERFCTR2_CP_1_HI, -1, GEN8_CP_PERFCTR_CP_SEL_15 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_CP_2_LO, + GEN8_RBBM_PERFCTR2_CP_2_HI, -1, GEN8_CP_PERFCTR_CP_SEL_16 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_CP_3_LO, + GEN8_RBBM_PERFCTR2_CP_3_HI, -1, GEN8_CP_PERFCTR_CP_SEL_17 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_CP_4_LO, + GEN8_RBBM_PERFCTR2_CP_4_HI, -1, GEN8_CP_PERFCTR_CP_SEL_18 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_CP_5_LO, + GEN8_RBBM_PERFCTR2_CP_5_HI, -1, GEN8_CP_PERFCTR_CP_SEL_19 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_CP_6_LO, + GEN8_RBBM_PERFCTR2_CP_6_HI, -1, GEN8_CP_PERFCTR_CP_SEL_20 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_rbbm[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RBBM_0_LO, + GEN8_RBBM_PERFCTR_RBBM_0_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RBBM_1_LO, + GEN8_RBBM_PERFCTR_RBBM_1_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RBBM_2_LO, + GEN8_RBBM_PERFCTR_RBBM_2_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RBBM_3_LO, + GEN8_RBBM_PERFCTR_RBBM_3_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_3 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_pc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_0_LO, + GEN8_RBBM_PERFCTR_PC_0_HI, -1, GEN8_PC_PERFCTR_PC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_1_LO, + GEN8_RBBM_PERFCTR_PC_1_HI, -1, GEN8_PC_PERFCTR_PC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_2_LO, + GEN8_RBBM_PERFCTR_PC_2_HI, -1, GEN8_PC_PERFCTR_PC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_3_LO, + GEN8_RBBM_PERFCTR_PC_3_HI, -1, GEN8_PC_PERFCTR_PC_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_4_LO, + GEN8_RBBM_PERFCTR_PC_4_HI, -1, GEN8_PC_PERFCTR_PC_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_5_LO, + GEN8_RBBM_PERFCTR_PC_5_HI, -1, GEN8_PC_PERFCTR_PC_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_6_LO, + GEN8_RBBM_PERFCTR_PC_6_HI, -1, GEN8_PC_PERFCTR_PC_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_7_LO, + GEN8_RBBM_PERFCTR_PC_7_HI, -1, GEN8_PC_PERFCTR_PC_SEL_7 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_bv_pc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_0_LO, + GEN8_RBBM_PERFCTR_BV_PC_0_HI, -1, GEN8_PC_PERFCTR_PC_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_1_LO, + GEN8_RBBM_PERFCTR_BV_PC_1_HI, -1, GEN8_PC_PERFCTR_PC_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_2_LO, + GEN8_RBBM_PERFCTR_BV_PC_2_HI, -1, GEN8_PC_PERFCTR_PC_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_3_LO, + GEN8_RBBM_PERFCTR_BV_PC_3_HI, -1, GEN8_PC_PERFCTR_PC_SEL_11 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_4_LO, + GEN8_RBBM_PERFCTR_BV_PC_4_HI, -1, GEN8_PC_PERFCTR_PC_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_5_LO, + GEN8_RBBM_PERFCTR_BV_PC_5_HI, -1, GEN8_PC_PERFCTR_PC_SEL_13 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_6_LO, + GEN8_RBBM_PERFCTR_BV_PC_6_HI, -1, GEN8_PC_PERFCTR_PC_SEL_14 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_7_LO, + GEN8_RBBM_PERFCTR_BV_PC_7_HI, -1, GEN8_PC_PERFCTR_PC_SEL_15 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_vfd[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_0_LO, + GEN8_RBBM_PERFCTR_VFD_0_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_1_LO, + GEN8_RBBM_PERFCTR_VFD_1_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_2_LO, + GEN8_RBBM_PERFCTR_VFD_2_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_3_LO, + GEN8_RBBM_PERFCTR_VFD_3_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_4_LO, + GEN8_RBBM_PERFCTR_VFD_4_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_5_LO, + GEN8_RBBM_PERFCTR_VFD_5_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_6_LO, + GEN8_RBBM_PERFCTR_VFD_6_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_7_LO, + GEN8_RBBM_PERFCTR_VFD_7_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_7 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_bv_vfd[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_0_LO, + GEN8_RBBM_PERFCTR_BV_VFD_0_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_1_LO, + GEN8_RBBM_PERFCTR_BV_VFD_1_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_2_LO, + GEN8_RBBM_PERFCTR_BV_VFD_2_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_3_LO, + GEN8_RBBM_PERFCTR_BV_VFD_3_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_11 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_4_LO, + GEN8_RBBM_PERFCTR_BV_VFD_4_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_5_LO, + GEN8_RBBM_PERFCTR_BV_VFD_5_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_13 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_6_LO, + GEN8_RBBM_PERFCTR_BV_VFD_6_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_14 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_7_LO, + GEN8_RBBM_PERFCTR_BV_VFD_7_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_15 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_hlsq[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_0_LO, + GEN8_RBBM_PERFCTR_HLSQ_0_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_1_LO, + GEN8_RBBM_PERFCTR_HLSQ_1_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_2_LO, + GEN8_RBBM_PERFCTR_HLSQ_2_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_3_LO, + GEN8_RBBM_PERFCTR_HLSQ_3_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_4_LO, + GEN8_RBBM_PERFCTR_HLSQ_4_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_5_LO, + GEN8_RBBM_PERFCTR_HLSQ_5_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_5 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_bv_hlsq[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_0_LO, + GEN8_RBBM_PERFCTR2_HLSQ_0_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_1_LO, + GEN8_RBBM_PERFCTR2_HLSQ_1_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_2_LO, + GEN8_RBBM_PERFCTR2_HLSQ_2_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_3_LO, + GEN8_RBBM_PERFCTR2_HLSQ_3_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_4_LO, + GEN8_RBBM_PERFCTR2_HLSQ_4_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_5_LO, + GEN8_RBBM_PERFCTR2_HLSQ_5_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_5 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_vpc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_0_LO, + GEN8_RBBM_PERFCTR_VPC_0_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_1_LO, + GEN8_RBBM_PERFCTR_VPC_1_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_2_LO, + GEN8_RBBM_PERFCTR_VPC_2_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_3_LO, + GEN8_RBBM_PERFCTR_VPC_3_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_4_LO, + GEN8_RBBM_PERFCTR_VPC_4_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_5_LO, + GEN8_RBBM_PERFCTR_VPC_5_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_5 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_bv_vpc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_0_LO, + GEN8_RBBM_PERFCTR_BV_VPC_0_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_1_LO, + GEN8_RBBM_PERFCTR_BV_VPC_1_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_2_LO, + GEN8_RBBM_PERFCTR_BV_VPC_2_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_3_LO, + GEN8_RBBM_PERFCTR_BV_VPC_3_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_4_LO, + GEN8_RBBM_PERFCTR_BV_VPC_4_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_5_LO, + GEN8_RBBM_PERFCTR_BV_VPC_5_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_11 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_ccu[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CCU_0_LO, + GEN8_RBBM_PERFCTR_CCU_0_HI, -1, GEN8_RB_PERFCTR_CCU_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CCU_1_LO, + GEN8_RBBM_PERFCTR_CCU_1_HI, -1, GEN8_RB_PERFCTR_CCU_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CCU_2_LO, + GEN8_RBBM_PERFCTR_CCU_2_HI, -1, GEN8_RB_PERFCTR_CCU_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CCU_3_LO, + GEN8_RBBM_PERFCTR_CCU_3_HI, -1, GEN8_RB_PERFCTR_CCU_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CCU_4_LO, + GEN8_RBBM_PERFCTR_CCU_4_HI, -1, GEN8_RB_PERFCTR_CCU_SEL_4 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_tse[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TSE_0_LO, + GEN8_RBBM_PERFCTR_TSE_0_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TSE_1_LO, + GEN8_RBBM_PERFCTR_TSE_1_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TSE_2_LO, + GEN8_RBBM_PERFCTR_TSE_2_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TSE_3_LO, + GEN8_RBBM_PERFCTR_TSE_3_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_3 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_bv_tse[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_TSE_0_LO, + GEN8_RBBM_PERFCTR_BV_TSE_0_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_TSE_1_LO, + GEN8_RBBM_PERFCTR_BV_TSE_1_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_TSE_2_LO, + GEN8_RBBM_PERFCTR_BV_TSE_2_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_TSE_3_LO, + GEN8_RBBM_PERFCTR_BV_TSE_3_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_3 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_ras[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RAS_0_LO, + GEN8_RBBM_PERFCTR_RAS_0_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RAS_1_LO, + GEN8_RBBM_PERFCTR_RAS_1_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RAS_2_LO, + GEN8_RBBM_PERFCTR_RAS_2_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RAS_3_LO, + GEN8_RBBM_PERFCTR_RAS_3_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_3 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_bv_ras[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_RAS_0_LO, + GEN8_RBBM_PERFCTR_BV_RAS_0_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_RAS_1_LO, + GEN8_RBBM_PERFCTR_BV_RAS_1_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_RAS_2_LO, + GEN8_RBBM_PERFCTR_BV_RAS_2_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_RAS_3_LO, + GEN8_RBBM_PERFCTR_BV_RAS_3_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_3 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_uche[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_0_LO, + GEN8_RBBM_PERFCTR_UCHE_0_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_1_LO, + GEN8_RBBM_PERFCTR_UCHE_1_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_2_LO, + GEN8_RBBM_PERFCTR_UCHE_2_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_3_LO, + GEN8_RBBM_PERFCTR_UCHE_3_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_4_LO, + GEN8_RBBM_PERFCTR_UCHE_4_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_5_LO, + GEN8_RBBM_PERFCTR_UCHE_5_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_6_LO, + GEN8_RBBM_PERFCTR_UCHE_6_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_7_LO, + GEN8_RBBM_PERFCTR_UCHE_7_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_8_LO, + GEN8_RBBM_PERFCTR_UCHE_8_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_9_LO, + GEN8_RBBM_PERFCTR_UCHE_9_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_10_LO, + GEN8_RBBM_PERFCTR_UCHE_10_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_11_LO, + GEN8_RBBM_PERFCTR_UCHE_11_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_11 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_12_LO, + GEN8_RBBM_PERFCTR_UCHE_12_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_13_LO, + GEN8_RBBM_PERFCTR_UCHE_13_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_13 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_14_LO, + GEN8_RBBM_PERFCTR_UCHE_14_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_14 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_15_LO, + GEN8_RBBM_PERFCTR_UCHE_15_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_15 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_16_LO, + GEN8_RBBM_PERFCTR_UCHE_16_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_16 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_17_LO, + GEN8_RBBM_PERFCTR_UCHE_17_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_17 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_18_LO, + GEN8_RBBM_PERFCTR_UCHE_18_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_18 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_19_LO, + GEN8_RBBM_PERFCTR_UCHE_19_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_19 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_20_LO, + GEN8_RBBM_PERFCTR_UCHE_20_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_20 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_21_LO, + GEN8_RBBM_PERFCTR_UCHE_21_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_21 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_22_LO, + GEN8_RBBM_PERFCTR_UCHE_22_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_22 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_23_LO, + GEN8_RBBM_PERFCTR_UCHE_23_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_23 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_0_LO, + GEN8_RBBM_PERFCTR_TP_0_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_1_LO, + GEN8_RBBM_PERFCTR_TP_1_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_2_LO, + GEN8_RBBM_PERFCTR_TP_2_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_3_LO, + GEN8_RBBM_PERFCTR_TP_3_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_4_LO, + GEN8_RBBM_PERFCTR_TP_4_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_5_LO, + GEN8_RBBM_PERFCTR_TP_5_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_6_LO, + GEN8_RBBM_PERFCTR_TP_6_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_7_LO, + GEN8_RBBM_PERFCTR_TP_7_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_8_LO, + GEN8_RBBM_PERFCTR_TP_8_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_9_LO, + GEN8_RBBM_PERFCTR_TP_9_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_10_LO, + GEN8_RBBM_PERFCTR_TP_10_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_11_LO, + GEN8_RBBM_PERFCTR_TP_11_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_11 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_bv_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_0_LO, + GEN8_RBBM_PERFCTR2_TP_0_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_1_LO, + GEN8_RBBM_PERFCTR2_TP_1_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_13 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_2_LO, + GEN8_RBBM_PERFCTR2_TP_2_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_14 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_3_LO, + GEN8_RBBM_PERFCTR2_TP_3_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_15 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_4_LO, + GEN8_RBBM_PERFCTR2_TP_4_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_16 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_5_LO, + GEN8_RBBM_PERFCTR2_TP_5_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_17 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_6_LO, + GEN8_RBBM_PERFCTR2_TP_6_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_18 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_7_LO, + GEN8_RBBM_PERFCTR2_TP_7_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_19 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_0_LO, + GEN8_RBBM_PERFCTR_SP_0_HI, -1, GEN8_SP_PERFCTR_SP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_1_LO, + GEN8_RBBM_PERFCTR_SP_1_HI, -1, GEN8_SP_PERFCTR_SP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_2_LO, + GEN8_RBBM_PERFCTR_SP_2_HI, -1, GEN8_SP_PERFCTR_SP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_3_LO, + GEN8_RBBM_PERFCTR_SP_3_HI, -1, GEN8_SP_PERFCTR_SP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_4_LO, + GEN8_RBBM_PERFCTR_SP_4_HI, -1, GEN8_SP_PERFCTR_SP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_5_LO, + GEN8_RBBM_PERFCTR_SP_5_HI, -1, GEN8_SP_PERFCTR_SP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_6_LO, + GEN8_RBBM_PERFCTR_SP_6_HI, -1, GEN8_SP_PERFCTR_SP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_7_LO, + GEN8_RBBM_PERFCTR_SP_7_HI, -1, GEN8_SP_PERFCTR_SP_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_8_LO, + GEN8_RBBM_PERFCTR_SP_8_HI, -1, GEN8_SP_PERFCTR_SP_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_9_LO, + GEN8_RBBM_PERFCTR_SP_9_HI, -1, GEN8_SP_PERFCTR_SP_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_10_LO, + GEN8_RBBM_PERFCTR_SP_10_HI, -1, GEN8_SP_PERFCTR_SP_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_11_LO, + GEN8_RBBM_PERFCTR_SP_11_HI, -1, GEN8_SP_PERFCTR_SP_SEL_11 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_12_LO, + GEN8_RBBM_PERFCTR_SP_12_HI, -1, GEN8_SP_PERFCTR_SP_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_13_LO, + GEN8_RBBM_PERFCTR_SP_13_HI, -1, GEN8_SP_PERFCTR_SP_SEL_13 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_14_LO, + GEN8_RBBM_PERFCTR_SP_14_HI, -1, GEN8_SP_PERFCTR_SP_SEL_14 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_15_LO, + GEN8_RBBM_PERFCTR_SP_15_HI, -1, GEN8_SP_PERFCTR_SP_SEL_15 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_16_LO, + GEN8_RBBM_PERFCTR_SP_16_HI, -1, GEN8_SP_PERFCTR_SP_SEL_16 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_17_LO, + GEN8_RBBM_PERFCTR_SP_17_HI, -1, GEN8_SP_PERFCTR_SP_SEL_17 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_18_LO, + GEN8_RBBM_PERFCTR_SP_18_HI, -1, GEN8_SP_PERFCTR_SP_SEL_18 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_19_LO, + GEN8_RBBM_PERFCTR_SP_19_HI, -1, GEN8_SP_PERFCTR_SP_SEL_19 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_20_LO, + GEN8_RBBM_PERFCTR_SP_20_HI, -1, GEN8_SP_PERFCTR_SP_SEL_20 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_21_LO, + GEN8_RBBM_PERFCTR_SP_21_HI, -1, GEN8_SP_PERFCTR_SP_SEL_21 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_22_LO, + GEN8_RBBM_PERFCTR_SP_22_HI, -1, GEN8_SP_PERFCTR_SP_SEL_22 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_23_LO, + GEN8_RBBM_PERFCTR_SP_23_HI, -1, GEN8_SP_PERFCTR_SP_SEL_23 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_bv_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_0_LO, + GEN8_RBBM_PERFCTR2_SP_0_HI, -1, GEN8_SP_PERFCTR_SP_SEL_24 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_1_LO, + GEN8_RBBM_PERFCTR2_SP_1_HI, -1, GEN8_SP_PERFCTR_SP_SEL_25 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_2_LO, + GEN8_RBBM_PERFCTR2_SP_2_HI, -1, GEN8_SP_PERFCTR_SP_SEL_26 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_3_LO, + GEN8_RBBM_PERFCTR2_SP_3_HI, -1, GEN8_SP_PERFCTR_SP_SEL_27 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_4_LO, + GEN8_RBBM_PERFCTR2_SP_4_HI, -1, GEN8_SP_PERFCTR_SP_SEL_28 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_5_LO, + GEN8_RBBM_PERFCTR2_SP_5_HI, -1, GEN8_SP_PERFCTR_SP_SEL_29 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_6_LO, + GEN8_RBBM_PERFCTR2_SP_6_HI, -1, GEN8_SP_PERFCTR_SP_SEL_30 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_7_LO, + GEN8_RBBM_PERFCTR2_SP_7_HI, -1, GEN8_SP_PERFCTR_SP_SEL_31 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_8_LO, + GEN8_RBBM_PERFCTR2_SP_8_HI, -1, GEN8_SP_PERFCTR_SP_SEL_32 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_9_LO, + GEN8_RBBM_PERFCTR2_SP_9_HI, -1, GEN8_SP_PERFCTR_SP_SEL_33 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_10_LO, + GEN8_RBBM_PERFCTR2_SP_10_HI, -1, GEN8_SP_PERFCTR_SP_SEL_34 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_11_LO, + GEN8_RBBM_PERFCTR2_SP_11_HI, -1, GEN8_SP_PERFCTR_SP_SEL_35 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_rb[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_0_LO, + GEN8_RBBM_PERFCTR_RB_0_HI, -1, GEN8_RB_PERFCTR_RB_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_1_LO, + GEN8_RBBM_PERFCTR_RB_1_HI, -1, GEN8_RB_PERFCTR_RB_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_2_LO, + GEN8_RBBM_PERFCTR_RB_2_HI, -1, GEN8_RB_PERFCTR_RB_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_3_LO, + GEN8_RBBM_PERFCTR_RB_3_HI, -1, GEN8_RB_PERFCTR_RB_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_4_LO, + GEN8_RBBM_PERFCTR_RB_4_HI, -1, GEN8_RB_PERFCTR_RB_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_5_LO, + GEN8_RBBM_PERFCTR_RB_5_HI, -1, GEN8_RB_PERFCTR_RB_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_6_LO, + GEN8_RBBM_PERFCTR_RB_6_HI, -1, GEN8_RB_PERFCTR_RB_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_7_LO, + GEN8_RBBM_PERFCTR_RB_7_HI, -1, GEN8_RB_PERFCTR_RB_SEL_7 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_vsc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VSC_0_LO, + GEN8_RBBM_PERFCTR_VSC_0_HI, -1, GEN8_VSC_PERFCTR_VSC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VSC_1_LO, + GEN8_RBBM_PERFCTR_VSC_1_HI, -1, GEN8_VSC_PERFCTR_VSC_SEL_1 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_lrz[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_LRZ_0_LO, + GEN8_RBBM_PERFCTR_LRZ_0_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_LRZ_1_LO, + GEN8_RBBM_PERFCTR_LRZ_1_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_LRZ_2_LO, + GEN8_RBBM_PERFCTR_LRZ_2_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_LRZ_3_LO, + GEN8_RBBM_PERFCTR_LRZ_3_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_3 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_bv_lrz[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_LRZ_0_LO, + GEN8_RBBM_PERFCTR_BV_LRZ_0_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_LRZ_1_LO, + GEN8_RBBM_PERFCTR_BV_LRZ_1_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_LRZ_2_LO, + GEN8_RBBM_PERFCTR_BV_LRZ_2_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_LRZ_3_LO, + GEN8_RBBM_PERFCTR_BV_LRZ_3_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_3 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_cmp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CMP_0_LO, + GEN8_RBBM_PERFCTR_CMP_0_HI, -1, GEN8_RB_PERFCTR_CMP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CMP_1_LO, + GEN8_RBBM_PERFCTR_CMP_1_HI, -1, GEN8_RB_PERFCTR_CMP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CMP_2_LO, + GEN8_RBBM_PERFCTR_CMP_2_HI, -1, GEN8_RB_PERFCTR_CMP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CMP_3_LO, + GEN8_RBBM_PERFCTR_CMP_3_HI, -1, GEN8_RB_PERFCTR_CMP_SEL_3 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_ufc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UFC_0_LO, + GEN8_RBBM_PERFCTR_UFC_0_HI, -1, GEN8_RB_PERFCTR_UFC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UFC_1_LO, + GEN8_RBBM_PERFCTR_UFC_1_HI, -1, GEN8_RB_PERFCTR_UFC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UFC_2_LO, + GEN8_RBBM_PERFCTR_UFC_2_HI, -1, GEN8_RB_PERFCTR_UFC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UFC_3_LO, + GEN8_RBBM_PERFCTR_UFC_3_HI, -1, GEN8_RB_PERFCTR_UFC_SEL_3 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_bv_ufc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_UFC_0_LO, + GEN8_RBBM_PERFCTR2_UFC_0_HI, -1, GEN8_RB_PERFCTR_UFC_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_UFC_1_LO, + GEN8_RBBM_PERFCTR2_UFC_1_HI, -1, GEN8_RB_PERFCTR_UFC_SEL_5 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_gbif[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_0, + GEN8_GBIF_PERF_CNT_HI_0, -1, GEN8_GBIF_PERF_CNT_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_1, + GEN8_GBIF_PERF_CNT_HI_1, -1, GEN8_GBIF_PERF_CNT_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_2, + GEN8_GBIF_PERF_CNT_HI_2, -1, GEN8_GBIF_PERF_CNT_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_3, + GEN8_GBIF_PERF_CNT_HI_3, -1, GEN8_GBIF_PERF_CNT_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_4, + GEN8_GBIF_PERF_CNT_HI_4, -1, GEN8_GBIF_PERF_CNT_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_5, + GEN8_GBIF_PERF_CNT_HI_5, -1, GEN8_GBIF_PERF_CNT_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_6, + GEN8_GBIF_PERF_CNT_HI_6, -1, GEN8_GBIF_PERF_CNT_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_7, + GEN8_GBIF_PERF_CNT_HI_7, -1, GEN8_GBIF_PERF_CNT_SEL_1 }, +}; + +static struct adreno_perfcount_register gen8_perfcounters_gbif_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PWR_CNT_LO_0, + GEN8_GBIF_PWR_CNT_HI_0, -1, GEN8_GBIF_PWR_CNT_SEL }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PWR_CNT_LO_1, + GEN8_GBIF_PWR_CNT_HI_1, -1, GEN8_GBIF_PWR_CNT_SEL }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PWR_CNT_LO_2, + GEN8_GBIF_PWR_CNT_HI_2, -1, GEN8_GBIF_PWR_CNT_SEL }, +}; + +#define GMU_COUNTER(lo, hi, sel) \ + { .countable = KGSL_PERFCOUNTER_NOT_USED, \ + .offset = lo, .offset_hi = hi, .select = sel } + +#define GMU_COUNTER_RESERVED(lo, hi, sel) \ + { .countable = KGSL_PERFCOUNTER_BROKEN, \ + .offset = lo, .offset_hi = hi, .select = sel } + +static struct adreno_perfcount_register gen8_perfcounters_gmu_xoclk[] = { + /* + * COUNTER_XOCLK_0 and COUNTER_XOCLK_4 are used for the GPU + * busy and ifpc count. Mark them as reserved to ensure they + * are not re-used. + */ + GMU_COUNTER_RESERVED(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_0, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_0, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_1, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_1, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_2, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_2, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_3, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_3, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0), + GMU_COUNTER_RESERVED(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_4, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_4, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_5, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_5, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_6, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_6, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_7, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_7, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_8, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_8, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_2), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_9, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_9, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_2), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_10, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_10, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_2), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_11, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_11, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_2), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_12, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_12, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_3), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_13, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_13, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_3), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_14, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_14, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_3), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_15, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_15, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_3), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_16, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_16, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_4), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_17, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_17, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_4), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_18, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_18, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_4), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_19, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_19, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_4), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_20, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_20, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_5), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_21, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_21, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_5), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_22, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_22, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_5), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_23, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_23, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_5), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_24, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_24, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_6), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_25, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_25, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_6), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_26, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_26, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_6), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_27, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_27, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_6), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_28, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_28, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_7), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_29, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_29, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_7), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_30, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_30, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_7), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_31, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_31, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_7), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_32, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_32, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_8), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_33, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_33, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_8), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_34, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_34, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_8), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_35, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_35, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_8), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_36, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_36, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_9), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_37, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_37, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_9), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_38, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_38, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_9), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_39, + GEN8_GMUCX_POWER_COUNTER_XOCLK_H_39, + GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_9), +}; + +static struct adreno_perfcount_register gen8_perfcounters_gmu_gmuclk[] = { + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_0, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_0, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_0), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_1, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_1, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_0), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_2, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_2, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_0), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_3, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_3, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_0), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_4, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_4, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_1), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_5, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_5, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_1), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_6, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_6, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_1), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_7, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_7, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_1), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_8, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_8, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_2), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_9, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_9, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_2), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_10, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_10, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_2), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_11, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_11, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_2), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_12, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_12, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_3), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_13, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_13, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_3), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_14, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_14, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_3), + GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_15, + GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_15, + GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_3), +}; + +static struct adreno_perfcount_register gen8_perfcounters_gmu_perf[] = { + GMU_COUNTER(GEN8_GMUCX_PERF_COUNTER_L_0, + GEN8_GMUCX_PERF_COUNTER_H_0, + GEN8_GMUCX_PERF_COUNTER_SELECT_0), + GMU_COUNTER(GEN8_GMUCX_PERF_COUNTER_L_1, + GEN8_GMUCX_PERF_COUNTER_H_1, + GEN8_GMUCX_PERF_COUNTER_SELECT_0), + GMU_COUNTER(GEN8_GMUCX_PERF_COUNTER_L_2, + GEN8_GMUCX_PERF_COUNTER_H_2, + GEN8_GMUCX_PERF_COUNTER_SELECT_0), + GMU_COUNTER(GEN8_GMUCX_PERF_COUNTER_L_3, + GEN8_GMUCX_PERF_COUNTER_H_3, + GEN8_GMUCX_PERF_COUNTER_SELECT_0), + GMU_COUNTER(GEN8_GMUCX_PERF_COUNTER_L_4, + GEN8_GMUCX_PERF_COUNTER_H_4, + GEN8_GMUCX_PERF_COUNTER_SELECT_1), + GMU_COUNTER(GEN8_GMUCX_PERF_COUNTER_L_5, + GEN8_GMUCX_PERF_COUNTER_H_5, + GEN8_GMUCX_PERF_COUNTER_SELECT_1), +}; + +static struct adreno_perfcount_register gen8_perfcounters_alwayson[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_CP_ALWAYS_ON_COUNTER_LO, + GEN8_CP_ALWAYS_ON_COUNTER_HI, -1 }, +}; + +/* + * ADRENO_PERFCOUNTER_GROUP_RESTORE flag is enabled by default + * because most of the perfcounter groups need to be restored + * as part of preemption and IFPC. Perfcounter groups that are + * not restored as part of preemption and IFPC should be defined + * using GEN8_PERFCOUNTER_GROUP_FLAGS macro + */ + +#define GEN8_PERFCOUNTER_GROUP_FLAGS(core, offset, name, flags, \ + enable, read) \ + [KGSL_PERFCOUNTER_GROUP_##offset] = { core##_perfcounters_##name, \ + ARRAY_SIZE(core##_perfcounters_##name), __stringify(name), flags, \ + enable, read } + +#define GEN8_PERFCOUNTER_GROUP(offset, name, enable, read) \ + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, offset, name, \ + ADRENO_PERFCOUNTER_GROUP_RESTORE, enable, read) + +#define GEN8_REGULAR_PERFCOUNTER_GROUP(offset, name) \ + GEN8_PERFCOUNTER_GROUP(offset, name, \ + gen8_counter_enable, gen8_counter_read) + +#define GEN8_BV_PERFCOUNTER_GROUP(offset, name, enable, read) \ + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, BV_##offset, bv_##name, \ + ADRENO_PERFCOUNTER_GROUP_RESTORE, enable, read) + +#define GEN8_BV_REGULAR_PERFCOUNTER_GROUP(offset, name) \ + GEN8_BV_PERFCOUNTER_GROUP(offset, name, \ + gen8_counter_enable, gen8_counter_read) + +static const struct adreno_perfcount_group gen8_hwsched_perfcounter_groups + [KGSL_PERFCOUNTER_GROUP_MAX] = { + GEN8_REGULAR_PERFCOUNTER_GROUP(CP, cp), + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, RBBM, rbbm, 0, + gen8_counter_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(PC, pc, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(VFD, vfd, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(HLSQ, hlsq, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(VPC, vpc, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(CCU, ccu, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(CMP, cmp, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(TSE, tse, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(RAS, ras, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(LRZ, lrz, gen8_counter_br_enable, gen8_counter_read), + GEN8_REGULAR_PERFCOUNTER_GROUP(UCHE, uche), + GEN8_PERFCOUNTER_GROUP(TP, tp, gen8_hwsched_counter_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(SP, sp, gen8_hwsched_counter_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(RB, rb, gen8_counter_br_enable, gen8_counter_read), + GEN8_REGULAR_PERFCOUNTER_GROUP(VSC, vsc), + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, VBIF, gbif, 0, + gen8_counter_gbif_enable, gen8_counter_read_norestore), + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, VBIF_PWR, gbif_pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED, + gen8_counter_gbif_pwr_enable, gen8_counter_read_norestore), + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, ALWAYSON, alwayson, + ADRENO_PERFCOUNTER_GROUP_FIXED, + gen8_counter_alwayson_enable, gen8_counter_alwayson_read), + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, GMU_XOCLK, gmu_xoclk, 0, + gen8_counter_gmu_pwr_enable, gen8_counter_read_norestore), + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, GMU_GMUCLK, gmu_gmuclk, 0, + gen8_counter_gmu_pwr_enable, gen8_counter_read_norestore), + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, GMU_PERF, gmu_perf, 0, + gen8_counter_gmu_perf_enable, gen8_counter_read_norestore), + GEN8_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), + GEN8_BV_REGULAR_PERFCOUNTER_GROUP(CP, cp), + GEN8_BV_PERFCOUNTER_GROUP(PC, pc, gen8_counter_bv_enable, gen8_counter_read), + GEN8_BV_PERFCOUNTER_GROUP(VFD, vfd, gen8_counter_bv_enable, gen8_counter_read), + GEN8_BV_PERFCOUNTER_GROUP(VPC, vpc, gen8_counter_bv_enable, gen8_counter_read), + GEN8_BV_REGULAR_PERFCOUNTER_GROUP(TP, tp), + GEN8_BV_REGULAR_PERFCOUNTER_GROUP(SP, sp), + GEN8_BV_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), + GEN8_BV_PERFCOUNTER_GROUP(TSE, tse, gen8_counter_bv_enable, gen8_counter_read), + GEN8_BV_PERFCOUNTER_GROUP(RAS, ras, gen8_counter_bv_enable, gen8_counter_read), + GEN8_BV_PERFCOUNTER_GROUP(LRZ, lrz, gen8_counter_bv_enable, gen8_counter_read), + GEN8_BV_PERFCOUNTER_GROUP(HLSQ, hlsq, gen8_counter_bv_enable, gen8_counter_read), +}; + +static const struct adreno_perfcount_group gen8_perfcounter_groups + [KGSL_PERFCOUNTER_GROUP_MAX] = { + GEN8_REGULAR_PERFCOUNTER_GROUP(CP, cp), + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, RBBM, rbbm, 0, + gen8_counter_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(PC, pc, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(VFD, vfd, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(HLSQ, hlsq, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(VPC, vpc, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(CCU, ccu, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(CMP, cmp, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(TSE, tse, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(RAS, ras, gen8_counter_br_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(LRZ, lrz, gen8_counter_br_enable, gen8_counter_read), + GEN8_REGULAR_PERFCOUNTER_GROUP(UCHE, uche), + GEN8_PERFCOUNTER_GROUP(TP, tp, gen8_counter_inline_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(SP, sp, gen8_counter_inline_enable, gen8_counter_read), + GEN8_PERFCOUNTER_GROUP(RB, rb, gen8_counter_br_enable, gen8_counter_read), + GEN8_REGULAR_PERFCOUNTER_GROUP(VSC, vsc), + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, VBIF, gbif, 0, + gen8_counter_gbif_enable, gen8_counter_read_norestore), + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, VBIF_PWR, gbif_pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED, + gen8_counter_gbif_pwr_enable, gen8_counter_read_norestore), + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, ALWAYSON, alwayson, + ADRENO_PERFCOUNTER_GROUP_FIXED, + gen8_counter_alwayson_enable, gen8_counter_alwayson_read), + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, GMU_XOCLK, gmu_xoclk, 0, + gen8_counter_gmu_pwr_enable, gen8_counter_read_norestore), + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, GMU_GMUCLK, gmu_gmuclk, 0, + gen8_counter_gmu_pwr_enable, gen8_counter_read_norestore), + GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, GMU_PERF, gmu_perf, 0, + gen8_counter_gmu_perf_enable, gen8_counter_read_norestore), + GEN8_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), + GEN8_BV_REGULAR_PERFCOUNTER_GROUP(CP, cp), + GEN8_BV_PERFCOUNTER_GROUP(PC, pc, gen8_counter_bv_enable, gen8_counter_read), + GEN8_BV_PERFCOUNTER_GROUP(VFD, vfd, gen8_counter_bv_enable, gen8_counter_read), + GEN8_BV_PERFCOUNTER_GROUP(VPC, vpc, gen8_counter_bv_enable, gen8_counter_read), + GEN8_BV_REGULAR_PERFCOUNTER_GROUP(TP, tp), + GEN8_BV_REGULAR_PERFCOUNTER_GROUP(SP, sp), + GEN8_BV_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), + GEN8_BV_PERFCOUNTER_GROUP(TSE, tse, gen8_counter_bv_enable, gen8_counter_read), + GEN8_BV_PERFCOUNTER_GROUP(RAS, ras, gen8_counter_bv_enable, gen8_counter_read), + GEN8_BV_PERFCOUNTER_GROUP(LRZ, lrz, gen8_counter_bv_enable, gen8_counter_read), + GEN8_BV_PERFCOUNTER_GROUP(HLSQ, hlsq, gen8_counter_bv_enable, gen8_counter_read), +}; + +const struct adreno_perfcounters adreno_gen8_perfcounters = { + gen8_perfcounter_groups, + ARRAY_SIZE(gen8_perfcounter_groups), +}; + +const struct adreno_perfcounters adreno_gen8_hwsched_perfcounters = { + gen8_hwsched_perfcounter_groups, + ARRAY_SIZE(gen8_hwsched_perfcounter_groups), +}; diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index c8646e39c6..5a4514d2d8 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -46,6 +46,7 @@ def kgsl_get_srcs(): "adreno_gen8_hfi.c", "adreno_gen8_hwsched.c", "adreno_gen8_hwsched_hfi.c", + "adreno_gen8_perfcounter.c", "adreno_gen8_preempt.c", "adreno_gen8_ringbuffer.c", "adreno_gen8_rpmh.c", diff --git a/gen8_reg.h b/gen8_reg.h index 4cc82bab23..3dc6ce6989 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -176,7 +176,322 @@ #define GEN8_RBBM_PERFCTR_TSE_3_HI 0x21d #define GEN8_RBBM_PERFCTR_RAS_0_LO 0x21e #define GEN8_RBBM_PERFCTR_RAS_0_HI 0x21f +#define GEN8_RBBM_PERFCTR_RAS_1_LO 0x220 +#define GEN8_RBBM_PERFCTR_RAS_1_HI 0x221 +#define GEN8_RBBM_PERFCTR_RAS_2_LO 0x222 +#define GEN8_RBBM_PERFCTR_RAS_2_HI 0x223 +#define GEN8_RBBM_PERFCTR_RAS_3_LO 0x224 +#define GEN8_RBBM_PERFCTR_RAS_3_HI 0x225 +#define GEN8_RBBM_PERFCTR_UCHE_0_LO 0x226 +#define GEN8_RBBM_PERFCTR_UCHE_0_HI 0x227 +#define GEN8_RBBM_PERFCTR_UCHE_1_LO 0x228 +#define GEN8_RBBM_PERFCTR_UCHE_1_HI 0x229 +#define GEN8_RBBM_PERFCTR_UCHE_2_LO 0x22a +#define GEN8_RBBM_PERFCTR_UCHE_2_HI 0x22b +#define GEN8_RBBM_PERFCTR_UCHE_3_LO 0x22c +#define GEN8_RBBM_PERFCTR_UCHE_3_HI 0x22d +#define GEN8_RBBM_PERFCTR_UCHE_4_LO 0x22e +#define GEN8_RBBM_PERFCTR_UCHE_4_HI 0x22f +#define GEN8_RBBM_PERFCTR_UCHE_5_LO 0x230 +#define GEN8_RBBM_PERFCTR_UCHE_5_HI 0x231 +#define GEN8_RBBM_PERFCTR_UCHE_6_LO 0x232 +#define GEN8_RBBM_PERFCTR_UCHE_6_HI 0x233 +#define GEN8_RBBM_PERFCTR_UCHE_7_LO 0x234 +#define GEN8_RBBM_PERFCTR_UCHE_7_HI 0x235 +#define GEN8_RBBM_PERFCTR_UCHE_8_LO 0x236 +#define GEN8_RBBM_PERFCTR_UCHE_8_HI 0x237 +#define GEN8_RBBM_PERFCTR_UCHE_9_LO 0x238 +#define GEN8_RBBM_PERFCTR_UCHE_9_HI 0x239 +#define GEN8_RBBM_PERFCTR_UCHE_10_LO 0x23a +#define GEN8_RBBM_PERFCTR_UCHE_10_HI 0x23b +#define GEN8_RBBM_PERFCTR_UCHE_11_LO 0x23c +#define GEN8_RBBM_PERFCTR_UCHE_11_HI 0x23d +#define GEN8_RBBM_PERFCTR_UCHE_12_LO 0x23e +#define GEN8_RBBM_PERFCTR_UCHE_12_HI 0x23f +#define GEN8_RBBM_PERFCTR_UCHE_13_LO 0x240 +#define GEN8_RBBM_PERFCTR_UCHE_13_HI 0x241 +#define GEN8_RBBM_PERFCTR_UCHE_14_LO 0x242 +#define GEN8_RBBM_PERFCTR_UCHE_14_HI 0x243 +#define GEN8_RBBM_PERFCTR_UCHE_15_LO 0x244 +#define GEN8_RBBM_PERFCTR_UCHE_15_HI 0x245 +#define GEN8_RBBM_PERFCTR_UCHE_16_LO 0x246 +#define GEN8_RBBM_PERFCTR_UCHE_16_HI 0x247 +#define GEN8_RBBM_PERFCTR_UCHE_17_LO 0x248 +#define GEN8_RBBM_PERFCTR_UCHE_17_HI 0x249 +#define GEN8_RBBM_PERFCTR_UCHE_18_LO 0x24a +#define GEN8_RBBM_PERFCTR_UCHE_18_HI 0x24b +#define GEN8_RBBM_PERFCTR_UCHE_19_LO 0x24c +#define GEN8_RBBM_PERFCTR_UCHE_19_HI 0x24d +#define GEN8_RBBM_PERFCTR_UCHE_20_LO 0x24e +#define GEN8_RBBM_PERFCTR_UCHE_20_HI 0x24f +#define GEN8_RBBM_PERFCTR_UCHE_21_LO 0x250 +#define GEN8_RBBM_PERFCTR_UCHE_21_HI 0x251 +#define GEN8_RBBM_PERFCTR_UCHE_22_LO 0x252 +#define GEN8_RBBM_PERFCTR_UCHE_22_HI 0x253 +#define GEN8_RBBM_PERFCTR_UCHE_23_LO 0x254 +#define GEN8_RBBM_PERFCTR_UCHE_23_HI 0x255 +#define GEN8_RBBM_PERFCTR_TP_0_LO 0x256 +#define GEN8_RBBM_PERFCTR_TP_0_HI 0x257 +#define GEN8_RBBM_PERFCTR_TP_1_LO 0x258 +#define GEN8_RBBM_PERFCTR_TP_1_HI 0x259 +#define GEN8_RBBM_PERFCTR_TP_2_LO 0x25a +#define GEN8_RBBM_PERFCTR_TP_2_HI 0x25b +#define GEN8_RBBM_PERFCTR_TP_3_LO 0x25c +#define GEN8_RBBM_PERFCTR_TP_3_HI 0x25d +#define GEN8_RBBM_PERFCTR_TP_4_LO 0x25e +#define GEN8_RBBM_PERFCTR_TP_4_HI 0x25f +#define GEN8_RBBM_PERFCTR_TP_5_LO 0x260 +#define GEN8_RBBM_PERFCTR_TP_5_HI 0x261 +#define GEN8_RBBM_PERFCTR_TP_6_LO 0x262 +#define GEN8_RBBM_PERFCTR_TP_6_HI 0x263 +#define GEN8_RBBM_PERFCTR_TP_7_LO 0x264 +#define GEN8_RBBM_PERFCTR_TP_7_HI 0x265 +#define GEN8_RBBM_PERFCTR_TP_8_LO 0x266 +#define GEN8_RBBM_PERFCTR_TP_8_HI 0x267 +#define GEN8_RBBM_PERFCTR_TP_9_LO 0x268 +#define GEN8_RBBM_PERFCTR_TP_9_HI 0x269 +#define GEN8_RBBM_PERFCTR_TP_10_LO 0x26a +#define GEN8_RBBM_PERFCTR_TP_10_HI 0x26b +#define GEN8_RBBM_PERFCTR_TP_11_LO 0x26c +#define GEN8_RBBM_PERFCTR_TP_11_HI 0x26d +#define GEN8_RBBM_PERFCTR_SP_0_LO 0x26e +#define GEN8_RBBM_PERFCTR_SP_0_HI 0x26f +#define GEN8_RBBM_PERFCTR_SP_1_LO 0x270 +#define GEN8_RBBM_PERFCTR_SP_1_HI 0x271 +#define GEN8_RBBM_PERFCTR_SP_2_LO 0x272 +#define GEN8_RBBM_PERFCTR_SP_2_HI 0x273 +#define GEN8_RBBM_PERFCTR_SP_3_LO 0x274 +#define GEN8_RBBM_PERFCTR_SP_3_HI 0x275 +#define GEN8_RBBM_PERFCTR_SP_4_LO 0x276 +#define GEN8_RBBM_PERFCTR_SP_4_HI 0x277 +#define GEN8_RBBM_PERFCTR_SP_5_LO 0x278 +#define GEN8_RBBM_PERFCTR_SP_5_HI 0x279 +#define GEN8_RBBM_PERFCTR_SP_6_LO 0x27a +#define GEN8_RBBM_PERFCTR_SP_6_HI 0x27b +#define GEN8_RBBM_PERFCTR_SP_7_LO 0x27c +#define GEN8_RBBM_PERFCTR_SP_7_HI 0x27d +#define GEN8_RBBM_PERFCTR_SP_8_LO 0x27e +#define GEN8_RBBM_PERFCTR_SP_8_HI 0x27f +#define GEN8_RBBM_PERFCTR_SP_9_LO 0x280 +#define GEN8_RBBM_PERFCTR_SP_9_HI 0x281 +#define GEN8_RBBM_PERFCTR_SP_10_LO 0x282 +#define GEN8_RBBM_PERFCTR_SP_10_HI 0x283 +#define GEN8_RBBM_PERFCTR_SP_11_LO 0x284 +#define GEN8_RBBM_PERFCTR_SP_11_HI 0x285 +#define GEN8_RBBM_PERFCTR_SP_12_LO 0x286 +#define GEN8_RBBM_PERFCTR_SP_12_HI 0x287 +#define GEN8_RBBM_PERFCTR_SP_13_LO 0x288 +#define GEN8_RBBM_PERFCTR_SP_13_HI 0x289 +#define GEN8_RBBM_PERFCTR_SP_14_LO 0x28a +#define GEN8_RBBM_PERFCTR_SP_14_HI 0x28b +#define GEN8_RBBM_PERFCTR_SP_15_LO 0x28c +#define GEN8_RBBM_PERFCTR_SP_15_HI 0x28d +#define GEN8_RBBM_PERFCTR_SP_16_LO 0x28e +#define GEN8_RBBM_PERFCTR_SP_16_HI 0x28f +#define GEN8_RBBM_PERFCTR_SP_17_LO 0x290 +#define GEN8_RBBM_PERFCTR_SP_17_HI 0x291 +#define GEN8_RBBM_PERFCTR_SP_18_LO 0x292 +#define GEN8_RBBM_PERFCTR_SP_18_HI 0x293 +#define GEN8_RBBM_PERFCTR_SP_19_LO 0x294 +#define GEN8_RBBM_PERFCTR_SP_19_HI 0x295 +#define GEN8_RBBM_PERFCTR_SP_20_LO 0x296 +#define GEN8_RBBM_PERFCTR_SP_20_HI 0x297 +#define GEN8_RBBM_PERFCTR_SP_21_LO 0x298 +#define GEN8_RBBM_PERFCTR_SP_21_HI 0x299 +#define GEN8_RBBM_PERFCTR_SP_22_LO 0x29a +#define GEN8_RBBM_PERFCTR_SP_22_HI 0x29b +#define GEN8_RBBM_PERFCTR_SP_23_LO 0x29c +#define GEN8_RBBM_PERFCTR_SP_23_HI 0x29d +#define GEN8_RBBM_PERFCTR_RB_0_LO 0x29e +#define GEN8_RBBM_PERFCTR_RB_0_HI 0x29f +#define GEN8_RBBM_PERFCTR_RB_1_LO 0x2a0 +#define GEN8_RBBM_PERFCTR_RB_1_HI 0x2a1 +#define GEN8_RBBM_PERFCTR_RB_2_LO 0x2a2 +#define GEN8_RBBM_PERFCTR_RB_2_HI 0x2a3 +#define GEN8_RBBM_PERFCTR_RB_3_LO 0x2a4 +#define GEN8_RBBM_PERFCTR_RB_3_HI 0x2a5 +#define GEN8_RBBM_PERFCTR_RB_4_LO 0x2a6 +#define GEN8_RBBM_PERFCTR_RB_4_HI 0x2a7 +#define GEN8_RBBM_PERFCTR_RB_5_LO 0x2a8 +#define GEN8_RBBM_PERFCTR_RB_5_HI 0x2a9 +#define GEN8_RBBM_PERFCTR_RB_6_LO 0x2aa +#define GEN8_RBBM_PERFCTR_RB_6_HI 0x2ab +#define GEN8_RBBM_PERFCTR_RB_7_LO 0x2ac +#define GEN8_RBBM_PERFCTR_RB_7_HI 0x2ad +#define GEN8_RBBM_PERFCTR_VSC_0_LO 0x2ae +#define GEN8_RBBM_PERFCTR_VSC_0_HI 0x2af +#define GEN8_RBBM_PERFCTR_VSC_1_LO 0x2b0 +#define GEN8_RBBM_PERFCTR_VSC_1_HI 0x2b1 +#define GEN8_RBBM_PERFCTR_LRZ_0_LO 0x2b2 +#define GEN8_RBBM_PERFCTR_LRZ_0_HI 0x2b3 +#define GEN8_RBBM_PERFCTR_LRZ_1_LO 0x2b4 +#define GEN8_RBBM_PERFCTR_LRZ_1_HI 0x2b5 +#define GEN8_RBBM_PERFCTR_LRZ_2_LO 0x2b6 +#define GEN8_RBBM_PERFCTR_LRZ_2_HI 0x2b7 +#define GEN8_RBBM_PERFCTR_LRZ_3_LO 0x2b8 +#define GEN8_RBBM_PERFCTR_LRZ_3_HI 0x2b9 +#define GEN8_RBBM_PERFCTR_CMP_0_LO 0x2ba +#define GEN8_RBBM_PERFCTR_CMP_0_HI 0x2bb +#define GEN8_RBBM_PERFCTR_CMP_1_LO 0x2bc +#define GEN8_RBBM_PERFCTR_CMP_1_HI 0x2bd +#define GEN8_RBBM_PERFCTR_CMP_2_LO 0x2be +#define GEN8_RBBM_PERFCTR_CMP_2_HI 0x2bf +#define GEN8_RBBM_PERFCTR_CMP_3_LO 0x2c0 +#define GEN8_RBBM_PERFCTR_CMP_3_HI 0x2c1 +#define GEN8_RBBM_PERFCTR_UFC_0_LO 0x2c2 +#define GEN8_RBBM_PERFCTR_UFC_0_HI 0x2c3 +#define GEN8_RBBM_PERFCTR_UFC_1_LO 0x2c4 +#define GEN8_RBBM_PERFCTR_UFC_1_HI 0x2c5 +#define GEN8_RBBM_PERFCTR_UFC_2_LO 0x2c6 +#define GEN8_RBBM_PERFCTR_UFC_2_HI 0x2c7 +#define GEN8_RBBM_PERFCTR_UFC_3_LO 0x2c8 +#define GEN8_RBBM_PERFCTR_UFC_3_HI 0x2c9 + +#define GEN8_RBBM_PERFCTR2_HLSQ_0_LO 0x2e2 +#define GEN8_RBBM_PERFCTR2_HLSQ_0_HI 0x2e3 +#define GEN8_RBBM_PERFCTR2_HLSQ_1_LO 0x2e4 +#define GEN8_RBBM_PERFCTR2_HLSQ_1_HI 0x2e5 +#define GEN8_RBBM_PERFCTR2_HLSQ_2_LO 0x2e6 +#define GEN8_RBBM_PERFCTR2_HLSQ_2_HI 0x2e7 +#define GEN8_RBBM_PERFCTR2_HLSQ_3_LO 0x2e8 +#define GEN8_RBBM_PERFCTR2_HLSQ_3_HI 0x2e9 +#define GEN8_RBBM_PERFCTR2_HLSQ_4_LO 0x2ea +#define GEN8_RBBM_PERFCTR2_HLSQ_4_HI 0x2eb +#define GEN8_RBBM_PERFCTR2_HLSQ_5_LO 0x2ec +#define GEN8_RBBM_PERFCTR2_HLSQ_5_HI 0x2ed +#define GEN8_RBBM_PERFCTR2_CP_0_LO 0x2ee +#define GEN8_RBBM_PERFCTR2_CP_0_HI 0x2ef +#define GEN8_RBBM_PERFCTR2_CP_1_LO 0x2f0 +#define GEN8_RBBM_PERFCTR2_CP_1_HI 0x2f1 +#define GEN8_RBBM_PERFCTR2_CP_2_LO 0x2f2 +#define GEN8_RBBM_PERFCTR2_CP_2_HI 0x2f3 +#define GEN8_RBBM_PERFCTR2_CP_3_LO 0x2f4 +#define GEN8_RBBM_PERFCTR2_CP_3_HI 0x2f5 +#define GEN8_RBBM_PERFCTR2_CP_4_LO 0x2f6 +#define GEN8_RBBM_PERFCTR2_CP_4_HI 0x2f7 +#define GEN8_RBBM_PERFCTR2_CP_5_LO 0x2f8 +#define GEN8_RBBM_PERFCTR2_CP_5_HI 0x2f9 +#define GEN8_RBBM_PERFCTR2_CP_6_LO 0x2fa +#define GEN8_RBBM_PERFCTR2_CP_6_HI 0x2fb +#define GEN8_RBBM_PERFCTR2_SP_0_LO 0x2fc +#define GEN8_RBBM_PERFCTR2_SP_0_HI 0x2fd +#define GEN8_RBBM_PERFCTR2_SP_1_LO 0x2fe +#define GEN8_RBBM_PERFCTR2_SP_1_HI 0x2ff +#define GEN8_RBBM_PERFCTR2_SP_2_LO 0x300 +#define GEN8_RBBM_PERFCTR2_SP_2_HI 0x301 +#define GEN8_RBBM_PERFCTR2_SP_3_LO 0x302 +#define GEN8_RBBM_PERFCTR2_SP_3_HI 0x303 +#define GEN8_RBBM_PERFCTR2_SP_4_LO 0x304 +#define GEN8_RBBM_PERFCTR2_SP_4_HI 0x305 +#define GEN8_RBBM_PERFCTR2_SP_5_LO 0x306 +#define GEN8_RBBM_PERFCTR2_SP_5_HI 0x307 +#define GEN8_RBBM_PERFCTR2_SP_6_LO 0x308 +#define GEN8_RBBM_PERFCTR2_SP_6_HI 0x309 +#define GEN8_RBBM_PERFCTR2_SP_7_LO 0x30a +#define GEN8_RBBM_PERFCTR2_SP_7_HI 0x30b +#define GEN8_RBBM_PERFCTR2_SP_8_LO 0x30c +#define GEN8_RBBM_PERFCTR2_SP_8_HI 0x30d +#define GEN8_RBBM_PERFCTR2_SP_9_LO 0x30e +#define GEN8_RBBM_PERFCTR2_SP_9_HI 0x30f +#define GEN8_RBBM_PERFCTR2_SP_10_LO 0x310 +#define GEN8_RBBM_PERFCTR2_SP_10_HI 0x311 +#define GEN8_RBBM_PERFCTR2_SP_11_LO 0x312 +#define GEN8_RBBM_PERFCTR2_SP_11_HI 0x313 +#define GEN8_RBBM_PERFCTR2_TP_0_LO 0x314 +#define GEN8_RBBM_PERFCTR2_TP_0_HI 0x315 +#define GEN8_RBBM_PERFCTR2_TP_1_LO 0x316 +#define GEN8_RBBM_PERFCTR2_TP_1_HI 0x317 +#define GEN8_RBBM_PERFCTR2_TP_2_LO 0x318 +#define GEN8_RBBM_PERFCTR2_TP_2_HI 0x319 +#define GEN8_RBBM_PERFCTR2_TP_3_LO 0x31a +#define GEN8_RBBM_PERFCTR2_TP_3_HI 0x31b +#define GEN8_RBBM_PERFCTR2_TP_4_LO 0x31c +#define GEN8_RBBM_PERFCTR2_TP_4_HI 0x31d +#define GEN8_RBBM_PERFCTR2_TP_5_LO 0x31e +#define GEN8_RBBM_PERFCTR2_TP_5_HI 0x31f +#define GEN8_RBBM_PERFCTR2_TP_6_LO 0x320 +#define GEN8_RBBM_PERFCTR2_TP_6_HI 0x321 +#define GEN8_RBBM_PERFCTR2_TP_7_LO 0x322 +#define GEN8_RBBM_PERFCTR2_TP_7_HI 0x323 +#define GEN8_RBBM_PERFCTR2_UFC_0_LO 0x324 +#define GEN8_RBBM_PERFCTR2_UFC_0_HI 0x325 +#define GEN8_RBBM_PERFCTR2_UFC_1_LO 0x326 +#define GEN8_RBBM_PERFCTR2_UFC_1_HI 0x327 +#define GEN8_RBBM_PERFCTR_BV_PC_0_LO 0x328 +#define GEN8_RBBM_PERFCTR_BV_PC_0_HI 0x329 +#define GEN8_RBBM_PERFCTR_BV_PC_1_LO 0x32a +#define GEN8_RBBM_PERFCTR_BV_PC_1_HI 0x32b +#define GEN8_RBBM_PERFCTR_BV_PC_2_LO 0x32c +#define GEN8_RBBM_PERFCTR_BV_PC_2_HI 0x32d +#define GEN8_RBBM_PERFCTR_BV_PC_3_LO 0x32e +#define GEN8_RBBM_PERFCTR_BV_PC_3_HI 0x32f +#define GEN8_RBBM_PERFCTR_BV_PC_4_LO 0x330 +#define GEN8_RBBM_PERFCTR_BV_PC_4_HI 0x331 +#define GEN8_RBBM_PERFCTR_BV_PC_5_LO 0x332 +#define GEN8_RBBM_PERFCTR_BV_PC_5_HI 0x333 +#define GEN8_RBBM_PERFCTR_BV_PC_6_LO 0x334 +#define GEN8_RBBM_PERFCTR_BV_PC_6_HI 0x335 +#define GEN8_RBBM_PERFCTR_BV_PC_7_LO 0x336 +#define GEN8_RBBM_PERFCTR_BV_PC_7_HI 0x337 +#define GEN8_RBBM_PERFCTR_BV_VFD_0_LO 0x338 +#define GEN8_RBBM_PERFCTR_BV_VFD_0_HI 0x339 +#define GEN8_RBBM_PERFCTR_BV_VFD_1_LO 0x33a +#define GEN8_RBBM_PERFCTR_BV_VFD_1_HI 0x33b +#define GEN8_RBBM_PERFCTR_BV_VFD_2_LO 0x33c +#define GEN8_RBBM_PERFCTR_BV_VFD_2_HI 0x33d +#define GEN8_RBBM_PERFCTR_BV_VFD_3_LO 0x33e +#define GEN8_RBBM_PERFCTR_BV_VFD_3_HI 0x33f +#define GEN8_RBBM_PERFCTR_BV_VFD_4_LO 0x340 +#define GEN8_RBBM_PERFCTR_BV_VFD_4_HI 0x341 +#define GEN8_RBBM_PERFCTR_BV_VFD_5_LO 0x342 +#define GEN8_RBBM_PERFCTR_BV_VFD_5_HI 0x343 +#define GEN8_RBBM_PERFCTR_BV_VFD_6_LO 0x344 +#define GEN8_RBBM_PERFCTR_BV_VFD_6_HI 0x345 +#define GEN8_RBBM_PERFCTR_BV_VFD_7_LO 0x346 +#define GEN8_RBBM_PERFCTR_BV_VFD_7_HI 0x347 +#define GEN8_RBBM_PERFCTR_BV_VPC_0_LO 0x348 +#define GEN8_RBBM_PERFCTR_BV_VPC_0_HI 0x349 +#define GEN8_RBBM_PERFCTR_BV_VPC_1_LO 0x34a +#define GEN8_RBBM_PERFCTR_BV_VPC_1_HI 0x34b +#define GEN8_RBBM_PERFCTR_BV_VPC_2_LO 0x34c +#define GEN8_RBBM_PERFCTR_BV_VPC_2_HI 0x34d +#define GEN8_RBBM_PERFCTR_BV_VPC_3_LO 0x34e +#define GEN8_RBBM_PERFCTR_BV_VPC_3_HI 0x34f +#define GEN8_RBBM_PERFCTR_BV_VPC_4_LO 0x350 +#define GEN8_RBBM_PERFCTR_BV_VPC_4_HI 0x351 +#define GEN8_RBBM_PERFCTR_BV_VPC_5_LO 0x352 +#define GEN8_RBBM_PERFCTR_BV_VPC_5_HI 0x353 +#define GEN8_RBBM_PERFCTR_BV_TSE_0_LO 0x354 +#define GEN8_RBBM_PERFCTR_BV_TSE_0_HI 0x355 +#define GEN8_RBBM_PERFCTR_BV_TSE_1_LO 0x356 +#define GEN8_RBBM_PERFCTR_BV_TSE_1_HI 0x357 +#define GEN8_RBBM_PERFCTR_BV_TSE_2_LO 0x358 +#define GEN8_RBBM_PERFCTR_BV_TSE_2_HI 0x359 +#define GEN8_RBBM_PERFCTR_BV_TSE_3_LO 0x35a +#define GEN8_RBBM_PERFCTR_BV_TSE_3_HI 0x35b +#define GEN8_RBBM_PERFCTR_BV_RAS_0_LO 0x35c +#define GEN8_RBBM_PERFCTR_BV_RAS_0_HI 0x35d +#define GEN8_RBBM_PERFCTR_BV_RAS_1_LO 0x35e +#define GEN8_RBBM_PERFCTR_BV_RAS_1_HI 0x35f +#define GEN8_RBBM_PERFCTR_BV_RAS_2_LO 0x360 +#define GEN8_RBBM_PERFCTR_BV_RAS_2_HI 0x361 +#define GEN8_RBBM_PERFCTR_BV_RAS_3_LO 0x362 +#define GEN8_RBBM_PERFCTR_BV_RAS_3_HI 0x363 +#define GEN8_RBBM_PERFCTR_BV_LRZ_0_LO 0x364 +#define GEN8_RBBM_PERFCTR_BV_LRZ_0_HI 0x365 +#define GEN8_RBBM_PERFCTR_BV_LRZ_1_LO 0x366 +#define GEN8_RBBM_PERFCTR_BV_LRZ_1_HI 0x367 +#define GEN8_RBBM_PERFCTR_BV_LRZ_2_LO 0x368 +#define GEN8_RBBM_PERFCTR_BV_LRZ_2_HI 0x369 +#define GEN8_RBBM_PERFCTR_BV_LRZ_3_LO 0x36a +#define GEN8_RBBM_PERFCTR_BV_LRZ_3_HI 0x36b + #define GEN8_RBBM_NC_MODE_CNTL 0x440 +#define GEN8_RBBM_PERFCTR_RBBM_SEL_0 0x441 +#define GEN8_RBBM_PERFCTR_RBBM_SEL_1 0x442 +#define GEN8_RBBM_PERFCTR_RBBM_SEL_2 0x443 +#define GEN8_RBBM_PERFCTR_RBBM_SEL_3 0x444 + #define GEN8_RBBM_PERFCTR_SRAM_INIT_CMD 0x449 #define GEN8_RBBM_PERFCTR_CNTL 0x460 @@ -662,8 +977,30 @@ #define GEN8_RB_RESOLVE_PREFETCH_CNTL 0x8f01 #define GEN8_RB_CMP_DBG_ECO_CNTL 0x8f02 #define GEN8_RB_UNSLICE_STATUS 0x8f03 +#define GEN8_RB_PERFCTR_CMP_SEL_0 0x8f04 +#define GEN8_RB_PERFCTR_CMP_SEL_1 0x8f05 +#define GEN8_RB_PERFCTR_CMP_SEL_2 0x8f06 +#define GEN8_RB_PERFCTR_CMP_SEL_3 0x8f07 +#define GEN8_RB_PERFCTR_UFC_SEL_0 0x8f10 +#define GEN8_RB_PERFCTR_UFC_SEL_1 0x8f11 +#define GEN8_RB_PERFCTR_UFC_SEL_2 0x8f12 +#define GEN8_RB_PERFCTR_UFC_SEL_3 0x8f13 +#define GEN8_RB_PERFCTR_UFC_SEL_4 0x8f14 +#define GEN8_RB_PERFCTR_UFC_SEL_5 0x8f15 /* VPC registers */ +#define GEN8_VPC_PERFCTR_VPC_SEL_2_0 0x9670 +#define GEN8_VPC_PERFCTR_VPC_SEL_2_1 0x9671 +#define GEN8_VPC_PERFCTR_VPC_SEL_2_2 0x9672 +#define GEN8_VPC_PERFCTR_VPC_SEL_2_3 0x9673 +#define GEN8_VPC_PERFCTR_VPC_SEL_2_4 0x9674 +#define GEN8_VPC_PERFCTR_VPC_SEL_2_5 0x9675 +#define GEN8_VPC_PERFCTR_VPC_SEL_2_6 0x9676 +#define GEN8_VPC_PERFCTR_VPC_SEL_2_7 0x9677 +#define GEN8_VPC_PERFCTR_VPC_SEL_2_8 0x9678 +#define GEN8_VPC_PERFCTR_VPC_SEL_2_9 0x9679 +#define GEN8_VPC_PERFCTR_VPC_SEL_2_10 0x967a +#define GEN8_VPC_PERFCTR_VPC_SEL_2_11 0x967b #define GEN8_VPC_PERFCTR_VPC_SEL_0 0x9690 /* Indexed Register */ #define GEN8_VPC_PERFCTR_VPC_SEL_1 0x9691 /* Indexed Register */ #define GEN8_VPC_PERFCTR_VPC_SEL_2 0x9692 /* Indexed Register */ @@ -679,6 +1016,18 @@ #define GEN8_VPC_LB_MODE_CNTL 0x9740 #define GEN8_VPC_FLATSHADE_MODE_CNTL 0x9741 #define GEN8_VPC_DBG_ECO_CNTL_1 0x9742 +#define GEN8_VPC_PERFCTR_VPC_SEL_1_0 0x9750 +#define GEN8_VPC_PERFCTR_VPC_SEL_1_1 0x9751 +#define GEN8_VPC_PERFCTR_VPC_SEL_1_2 0x9752 +#define GEN8_VPC_PERFCTR_VPC_SEL_1_3 0x9753 +#define GEN8_VPC_PERFCTR_VPC_SEL_1_4 0x9754 +#define GEN8_VPC_PERFCTR_VPC_SEL_1_5 0x9755 +#define GEN8_VPC_PERFCTR_VPC_SEL_1_6 0x9756 +#define GEN8_VPC_PERFCTR_VPC_SEL_1_7 0x9757 +#define GEN8_VPC_PERFCTR_VPC_SEL_1_8 0x9758 +#define GEN8_VPC_PERFCTR_VPC_SEL_1_9 0x9759 +#define GEN8_VPC_PERFCTR_VPC_SEL_1_10 0x975a +#define GEN8_VPC_PERFCTR_VPC_SEL_1_11 0x975b /* PC registers:*/ #define GEN8_PC_AUTO_VERTEX_STRIDE 0x9e0a @@ -686,6 +1035,22 @@ #define GEN8_PC_CHICKEN_BITS_2 0x9f20 #define GEN8_PC_CHICKEN_BITS_3 0x9e22 #define GEN8_PC_CHICKEN_BITS_4 0x9e23 +#define GEN8_PC_PERFCTR_PC_SEL_0 0x9e30 +#define GEN8_PC_PERFCTR_PC_SEL_1 0x9e31 +#define GEN8_PC_PERFCTR_PC_SEL_2 0x9e32 +#define GEN8_PC_PERFCTR_PC_SEL_3 0x9e33 +#define GEN8_PC_PERFCTR_PC_SEL_4 0x9e34 +#define GEN8_PC_PERFCTR_PC_SEL_5 0x9e35 +#define GEN8_PC_PERFCTR_PC_SEL_6 0x9e36 +#define GEN8_PC_PERFCTR_PC_SEL_7 0x9e37 +#define GEN8_PC_PERFCTR_PC_SEL_8 0x9e38 +#define GEN8_PC_PERFCTR_PC_SEL_9 0x9e39 +#define GEN8_PC_PERFCTR_PC_SEL_10 0x9e3a +#define GEN8_PC_PERFCTR_PC_SEL_11 0x9e3b +#define GEN8_PC_PERFCTR_PC_SEL_12 0x9e3c +#define GEN8_PC_PERFCTR_PC_SEL_13 0x9e3d +#define GEN8_PC_PERFCTR_PC_SEL_14 0x9e3e +#define GEN8_PC_PERFCTR_PC_SEL_15 0x9e3f #define GEN8_PC_CHICKEN_BITS_1 0x9e50 #define GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1 0x9e64 @@ -793,6 +1158,12 @@ #define GEN8_SP_PERFCTR_SP_SEL_33 0xaea1 #define GEN8_SP_PERFCTR_SP_SEL_34 0xaea2 #define GEN8_SP_PERFCTR_SP_SEL_35 0xaea3 +#define GEN8_SP_PERFCTR_HLSQ_SEL_2_0 0xaec0 +#define GEN8_SP_PERFCTR_HLSQ_SEL_2_1 0xaec1 +#define GEN8_SP_PERFCTR_HLSQ_SEL_2_2 0xaec2 +#define GEN8_SP_PERFCTR_HLSQ_SEL_2_3 0xaec3 +#define GEN8_SP_PERFCTR_HLSQ_SEL_2_4 0xaec4 +#define GEN8_SP_PERFCTR_HLSQ_SEL_2_5 0xaec5 /* TP registers */ #define GEN8_TPL1_DBG_ECO_CNTL 0xb600 @@ -980,114 +1351,168 @@ #define GEN8_GMUCX_CM3_CFG 0x1f82d #define GEN8_GMUCX_AO_COUNTER_LO 0x1f840 #define GEN8_GMUCX_AO_COUNTER_HI 0x1f841 -#define GEN8_GMUCX_PERF_COUNTER_ENABLE 0x1f848 -#define GEN8_GMUCX_POWER_COUNTER_ENABLE 0x1fc10 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_0 0x1fc30 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_1 0x1fc31 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_2 0x1fc32 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_3 0x1fc33 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0 0x1fc40 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1 0x1fc41 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_2 0x1fc42 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_3 0x1fc43 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_4 0x1fc44 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_5 0x1fc45 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_6 0x1fc46 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_7 0x1fc47 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_8 0x1fc48 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_9 0x1fc49 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_0 0x1fc50 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_1 0x1fc51 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_2 0x1fc52 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_3 0x1fc53 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_4 0x1fc54 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_5 0x1fc55 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_6 0x1fc56 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_7 0x1fc57 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_8 0x1fc58 -#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_9 0x1fc59 +#define GEN8_GMUCX_PERF_COUNTER_ENABLE 0x1f848 +#define GEN8_GMUCX_PERF_COUNTER_SELECT_0 0x1f858 +#define GEN8_GMUCX_PERF_COUNTER_SELECT_1 0x1f859 +#define GEN8_GMUCX_PERF_COUNTER_SELECT_H_0 0x1f868 +#define GEN8_GMUCX_PERF_COUNTER_SELECT_H_1 0x1f869 +#define GEN8_GMUCX_PERF_COUNTER_L_0 0x1f878 +#define GEN8_GMUCX_PERF_COUNTER_H_0 0x1f879 +#define GEN8_GMUCX_PERF_COUNTER_L_1 0x1f87a +#define GEN8_GMUCX_PERF_COUNTER_H_1 0x1f87b +#define GEN8_GMUCX_PERF_COUNTER_L_2 0x1f87c +#define GEN8_GMUCX_PERF_COUNTER_H_2 0x1f87d +#define GEN8_GMUCX_PERF_COUNTER_L_3 0x1f87e +#define GEN8_GMUCX_PERF_COUNTER_H_3 0x1f87f +#define GEN8_GMUCX_PERF_COUNTER_L_4 0x1f880 +#define GEN8_GMUCX_PERF_COUNTER_H_4 0x1f881 +#define GEN8_GMUCX_PERF_COUNTER_L_5 0x1f882 +#define GEN8_GMUCX_PERF_COUNTER_H_5 0x1f883 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_0 0x1fca0 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_0 0x1fca1 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_1 0x1fca2 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_1 0x1fca3 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_2 0x1fca4 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_2 0x1fca5 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_3 0x1fca6 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_3 0x1fca7 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_4 0x1fca8 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_4 0x1fca9 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_5 0x1fcaa -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_5 0x1fcab -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_6 0x1fcac -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_6 0x1fcad -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_7 0x1fcae -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_7 0x1fcaf -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_8 0x1fcb0 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_8 0x1fcb1 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_9 0x1fcb2 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_9 0x1fcb3 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_10 0x1fcb4 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_10 0x1fcb5 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_11 0x1fcb6 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_11 0x1fcb7 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_12 0x1fcb8 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_12 0x1fcb9 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_13 0x1fcba -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_13 0x1fcbb -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_14 0x1fcbc -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_14 0x1fcbd -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_15 0x1fcbe -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_15 0x1fcbf -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_16 0x1fcc0 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_16 0x1fcc1 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_17 0x1fcc2 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_17 0x1fcc3 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_18 0x1fcc4 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_18 0x1fcc5 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_19 0x1fcc6 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_19 0x1fcc7 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_20 0x1fcc8 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_20 0x1fcc9 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_21 0x1fcca -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_21 0x1fccb -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_22 0x1fccc -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_22 0x1fccd -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_23 0x1fcce -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_23 0x1fccf -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_24 0x1fcd0 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_24 0x1fcd1 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_25 0x1fcd2 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_25 0x1fcd3 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_26 0x1fcd4 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_26 0x1fcd5 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_27 0x1fcd6 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_27 0x1fcd7 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_28 0x1fcd8 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_28 0x1fcd9 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_29 0x1fcda -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_29 0x1fcdb -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_30 0x1fcdc -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_30 0x1fcdd -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_31 0x1fcde -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_31 0x1fcdf -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_32 0x1fce0 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_32 0x1fce1 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_33 0x1fce2 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_33 0x1fce3 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_34 0x1fce4 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_34 0x1fce5 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_35 0x1fce6 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_35 0x1fce7 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_36 0x1fce8 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_36 0x1fce9 -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_37 0x1fcea -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_37 0x1fceb -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_38 0x1fcec -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_38 0x1fced -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_39 0x1fcee -#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_39 0x1fcef +#define GEN8_GMUCX_POWER_COUNTER_ENABLE 0x1fc10 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_0 0x1fc30 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_1 0x1fc31 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_2 0x1fc32 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_3 0x1fc33 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_H_0 0x1fc38 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_H_1 0x1fc39 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_H_2 0x1fc3a +#define GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_H_3 0x1fc3b +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0 0x1fc40 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1 0x1fc41 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_2 0x1fc42 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_3 0x1fc43 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_4 0x1fc44 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_5 0x1fc45 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_6 0x1fc46 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_7 0x1fc47 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_8 0x1fc48 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_9 0x1fc49 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_0 0x1fc50 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_1 0x1fc51 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_2 0x1fc52 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_3 0x1fc53 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_4 0x1fc54 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_5 0x1fc55 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_6 0x1fc56 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_7 0x1fc57 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_8 0x1fc58 +#define GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_H_9 0x1fc59 + +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_0 0x1fc60 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_0 0x1fc61 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_1 0x1fc62 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_1 0x1fc63 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_2 0x1fc64 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_2 0x1fc65 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_3 0x1fc66 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_3 0x1fc67 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_4 0x1fc68 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_4 0x1fc69 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_5 0x1fc6a +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_5 0x1fc6b +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_6 0x1fc6c +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_6 0x1fc6d +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_7 0x1fc6e +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_7 0x1fc6f +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_8 0x1fc70 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_8 0x1fc71 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_9 0x1fc72 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_9 0x1fc73 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_10 0x1fc74 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_10 0x1fc75 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_11 0x1fc76 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_11 0x1fc77 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_12 0x1fc78 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_12 0x1fc79 +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_13 0x1fc7a +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_13 0x1fc7b +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_14 0x1fc7c +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_14 0x1fc7d +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_15 0x1fc7e +#define GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_15 0x1fc7f + +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_0 0x1fca0 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_0 0x1fca1 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_1 0x1fca2 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_1 0x1fca3 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_2 0x1fca4 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_2 0x1fca5 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_3 0x1fca6 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_3 0x1fca7 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_4 0x1fca8 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_4 0x1fca9 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_5 0x1fcaa +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_5 0x1fcab +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_6 0x1fcac +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_6 0x1fcad +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_7 0x1fcae +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_7 0x1fcaf +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_8 0x1fcb0 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_8 0x1fcb1 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_9 0x1fcb2 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_9 0x1fcb3 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_10 0x1fcb4 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_10 0x1fcb5 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_11 0x1fcb6 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_11 0x1fcb7 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_12 0x1fcb8 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_12 0x1fcb9 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_13 0x1fcba +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_13 0x1fcbb +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_14 0x1fcbc +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_14 0x1fcbd +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_15 0x1fcbe +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_15 0x1fcbf +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_16 0x1fcc0 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_16 0x1fcc1 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_17 0x1fcc2 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_17 0x1fcc3 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_18 0x1fcc4 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_18 0x1fcc5 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_19 0x1fcc6 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_19 0x1fcc7 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_20 0x1fcc8 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_20 0x1fcc9 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_21 0x1fcca +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_21 0x1fccb +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_22 0x1fccc +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_22 0x1fccd +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_23 0x1fcce +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_23 0x1fccf +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_24 0x1fcd0 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_24 0x1fcd1 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_25 0x1fcd2 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_25 0x1fcd3 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_26 0x1fcd4 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_26 0x1fcd5 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_27 0x1fcd6 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_27 0x1fcd7 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_28 0x1fcd8 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_28 0x1fcd9 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_29 0x1fcda +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_29 0x1fcdb +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_30 0x1fcdc +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_30 0x1fcdd +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_31 0x1fcde +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_31 0x1fcdf +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_32 0x1fce0 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_32 0x1fce1 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_33 0x1fce2 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_33 0x1fce3 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_34 0x1fce4 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_34 0x1fce5 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_35 0x1fce6 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_35 0x1fce7 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_36 0x1fce8 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_36 0x1fce9 +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_37 0x1fcea +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_37 0x1fceb +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_38 0x1fcec +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_38 0x1fced +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_L_39 0x1fcee +#define GEN8_GMUCX_POWER_COUNTER_XOCLK_H_39 0x1fcef /* HFI registers*/ #define GEN8_GMUCX_HFI_CTRL_STATUS 0x1f980 From dfdb7ef904130669db4b7a2f31e6d5e7c2a9fc04 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 22 Oct 2023 11:53:13 +0530 Subject: [PATCH 0555/1016] kgsl: gen8: Fix fence error during perfcounter put This change ports commit 373a2c6e1968 ("msm: kgsl: Fix fence error during perfcounter put in gen7") from gen7 to gen8. Change-Id: Ia6be1bdbf530bbc9555718c94d6e733f72fdf184 Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 026e16469e..f961b024be 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1822,13 +1822,11 @@ int gen8_perfcounter_remove(struct adreno_device *adreno_dev, /* * If dynamic list length is 2, the only entry in the list is the GEN8_RBBM_PERFCTR_CNTL. - * Remove the same as we can disable perfcounters now. + * Remove the same. */ if (lock->dynamic_list_len == 2) { memset(&data[offset], 0, 6 * sizeof(u32)); lock->dynamic_list_len = 0; - kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_RBBM_PERFCTR_CNTL, 0x0); - kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_RBBM_SLICE_PERFCTR_CNTL, 0x0); } kgsl_hwunlock(lock); From f54edb36aebb2d7931a32d7e17a060342a54160c Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 22 Oct 2023 10:52:48 +0530 Subject: [PATCH 0556/1016] kgsl: gen8: Reduce contention in cpu gpu shared lock This change ports commit a404f6fde547 ("msm: kgsl: Reduce contention in cpu gpu shared lock") to gen8. Change-Id: I2d67c2911f7ee01053a8290f2925ef60c30acc50 Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index f961b024be..dc704aa32d 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1775,15 +1775,8 @@ int gen8_perfcounter_remove(struct adreno_device *adreno_dev, bool remove_counter = false; u32 pipe = FIELD_PREP(GENMASK(13, 12), _get_pipeid(groupid)); - if (kgsl_hwlock(lock)) { - kgsl_hwunlock(lock); - return -EBUSY; - } - - if (lock->dynamic_list_len < 3) { - kgsl_hwunlock(lock); + if (lock->dynamic_list_len < 3) return -EINVAL; - } second_last_offset = offset + (lock->dynamic_list_len - 3) * 3; last_offset = second_last_offset + 3; @@ -1797,9 +1790,12 @@ int gen8_perfcounter_remove(struct adreno_device *adreno_dev, offset += 3; } - if (!remove_counter) { - kgsl_hwunlock(lock); + if (!remove_counter) return -ENOENT; + + if (kgsl_hwlock(lock)) { + kgsl_hwunlock(lock); + return -EBUSY; } /* @@ -1840,6 +1836,19 @@ int gen8_perfcounter_update(struct adreno_device *adreno_dev, struct cpu_gpu_lock *lock = ptr; u32 *data = ptr + sizeof(*lock); int i, offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; + bool select_reg_present = false; + + for (i = 0; i < lock->dynamic_list_len; i++) { + if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { + select_reg_present = true; + break; + } + + if (data[offset + 1] == GEN8_RBBM_PERFCTR_CNTL) + break; + + offset += 3; + } if (kgsl_hwlock(lock)) { kgsl_hwunlock(lock); @@ -1851,16 +1860,9 @@ int gen8_perfcounter_update(struct adreno_device *adreno_dev, * update it, otherwise append the * triplet to the end of the list. */ - for (i = 0; i < lock->dynamic_list_len; i++) { - if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { - data[offset + 2] = reg->countable; - goto update; - } - - if (data[offset + 1] == GEN8_RBBM_PERFCTR_CNTL) - break; - - offset += 3; + if (select_reg_present) { + data[offset + 2] = reg->countable; + goto update; } /* From 756efe65399d8366b98630100f2b9202138ef887 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 22 Oct 2023 11:28:34 +0530 Subject: [PATCH 0557/1016] kgsl: gen8: Update dynamic reg list for all perfcounter blocks This change ports commit 25f3b4e2ac90 ("msm: kgsl: Update dynamic reg list for all perfcounter blocks") from gen7 to gen8. Change-Id: I32d1de1ea611b381352c6c82bc09fa169c45a7ae Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 75 +++++++++++++++++++++++++-------------- adreno_gen8.h | 4 ++- adreno_gen8_hwsched_hfi.c | 2 +- adreno_gen8_perfcounter.c | 28 +++++---------- 4 files changed, 60 insertions(+), 49 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index dc704aa32d..e60ee87b53 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1767,6 +1767,8 @@ static u32 _get_pipeid(u32 groupid) int gen8_perfcounter_remove(struct adreno_device *adreno_dev, struct adreno_perfcount_register *reg, u32 groupid) { + const struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; u32 *data = ptr + sizeof(*lock); @@ -1775,9 +1777,22 @@ int gen8_perfcounter_remove(struct adreno_device *adreno_dev, bool remove_counter = false; u32 pipe = FIELD_PREP(GENMASK(13, 12), _get_pipeid(groupid)); - if (lock->dynamic_list_len < 3) + if (!lock->dynamic_list_len) return -EINVAL; + group = &(counters->groups[groupid]); + + if (!(group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE)) { + if (lock->dynamic_list_len != 2) + return 0; + + if (kgsl_hwlock(lock)) { + kgsl_hwunlock(lock); + return -EBUSY; + } + goto disable_perfcounter; + } + second_last_offset = offset + (lock->dynamic_list_len - 3) * 3; last_offset = second_last_offset + 3; @@ -1816,11 +1831,12 @@ int gen8_perfcounter_remove(struct adreno_device *adreno_dev, lock->dynamic_list_len--; +disable_perfcounter: /* - * If dynamic list length is 2, the only entry in the list is the GEN8_RBBM_PERFCTR_CNTL. - * Remove the same. + * If dynamic list length is 2 and no_restore_count is 0, then we can remove + * the perfcounter controls from the list. */ - if (lock->dynamic_list_len == 2) { + if (lock->dynamic_list_len == 2 && !adreno_dev->no_restore_count) { memset(&data[offset], 0, 6 * sizeof(u32)); lock->dynamic_list_len = 0; } @@ -1830,7 +1846,7 @@ int gen8_perfcounter_remove(struct adreno_device *adreno_dev, } int gen8_perfcounter_update(struct adreno_device *adreno_dev, - struct adreno_perfcount_register *reg, bool update_reg, u32 pipe) + struct adreno_perfcount_register *reg, bool update_reg, u32 pipe, unsigned long flags) { void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; @@ -1838,16 +1854,20 @@ int gen8_perfcounter_update(struct adreno_device *adreno_dev, int i, offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; bool select_reg_present = false; - for (i = 0; i < lock->dynamic_list_len; i++) { - if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { - select_reg_present = true; - break; + if (flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) { + for (i = 0; i < lock->dynamic_list_len; i++) { + if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { + select_reg_present = true; + break; + } + + if (data[offset + 1] == GEN8_RBBM_PERFCTR_CNTL) + break; + + offset += 3; } - - if (data[offset + 1] == GEN8_RBBM_PERFCTR_CNTL) - break; - - offset += 3; + } else if (lock->dynamic_list_len) { + goto update; } if (kgsl_hwlock(lock)) { @@ -1862,40 +1882,41 @@ int gen8_perfcounter_update(struct adreno_device *adreno_dev, */ if (select_reg_present) { data[offset + 2] = reg->countable; + kgsl_hwunlock(lock); goto update; } + /* Initialize the lock->dynamic_list_len to account for perfcounter controls */ + if (!lock->dynamic_list_len) + lock->dynamic_list_len = 2; + /* - * For all targets GEN8_RBBM_PERFCTR_CNTL needs to be the last entry, - * so overwrite the existing GEN8_RBBM_PERFCNTL_CTRL and add it back to + * For all targets GEN8_SLICE_RBBM_PERFCTR_CNTL needs to be the last entry, + * so overwrite the existing GEN8_SLICE_RBBM_PERFCNTL_CNTL and add it back to * the end. */ - data[offset++] = pipe; - data[offset++] = reg->select; - data[offset++] = reg->countable; + if (flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) { + data[offset++] = pipe; + data[offset++] = reg->select; + data[offset++] = reg->countable; + lock->dynamic_list_len++; + } data[offset++] = FIELD_PREP(GENMASK(13, 12), PIPE_NONE); data[offset++] = GEN8_RBBM_PERFCTR_CNTL; data[offset++] = 1; - lock->dynamic_list_len++; data[offset++] = FIELD_PREP(GENMASK(13, 12), PIPE_NONE); data[offset++] = GEN8_RBBM_SLICE_PERFCTR_CNTL; data[offset++] = 1; - lock->dynamic_list_len++; - /* If this is the first entry, enable perfcounters */ - if (lock->dynamic_list_len == 2) { - kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_RBBM_PERFCTR_CNTL, 0x1); - kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_RBBM_SLICE_PERFCTR_CNTL, 0x1); - } + kgsl_hwunlock(lock); update: if (update_reg) kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg->select, reg->countable); - kgsl_hwunlock(lock); return 0; } diff --git a/adreno_gen8.h b/adreno_gen8.h index d5aee01180..6550a16d91 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -363,11 +363,13 @@ void gen8_spin_idle_debug(struct adreno_device *adreno_dev, * @reg: Perfcounter reg struct to add/remove to the list * @update_reg: true if the perfcounter needs to be programmed by the CPU * @pipe: pipe id for CP aperture control + * @flags: Flags set for requested perfcounter group * * Return: 0 on success or -EBUSY if the lock couldn't be taken */ int gen8_perfcounter_update(struct adreno_device *adreno_dev, - struct adreno_perfcount_register *reg, bool update_reg, u32 pipe); + struct adreno_perfcount_register *reg, bool update_reg, u32 pipe, + unsigned long flags); /* * gen8_ringbuffer_init - Initialize the ringbuffers diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index bfc65900a3..33a977d824 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -4117,7 +4117,7 @@ int gen8_hwsched_counter_inline_enable(struct adreno_device *adreno_dev, if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) gen8_perfcounter_update(adreno_dev, reg, false, - FIELD_PREP(GENMASK(13, 12), PIPE_NONE)); + FIELD_PREP(GENMASK(13, 12), PIPE_NONE), group->flags); cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, HFI_MSG_CMD); diff --git a/adreno_gen8_perfcounter.c b/adreno_gen8_perfcounter.c index 7df4e95538..12fdebbc81 100644 --- a/adreno_gen8_perfcounter.c +++ b/adreno_gen8_perfcounter.c @@ -40,11 +40,8 @@ static int gen8_counter_br_enable(struct adreno_device *adreno_dev, kgsl_regread(device, GEN8_CP_APERTURE_CNTL_HOST, &val); kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(13, 12), PIPE_BR)); - if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) - ret = gen8_perfcounter_update(adreno_dev, reg, true, - FIELD_PREP(GENMASK(13, 12), PIPE_BR)); - else - kgsl_regwrite(device, reg->select, countable); + ret = gen8_perfcounter_update(adreno_dev, reg, true, + FIELD_PREP(GENMASK(13, 12), PIPE_BR), group->flags); kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val); @@ -66,11 +63,8 @@ static int gen8_counter_bv_enable(struct adreno_device *adreno_dev, kgsl_regread(device, GEN8_CP_APERTURE_CNTL_HOST, &val); kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(13, 12), PIPE_BV)); - if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) - ret = gen8_perfcounter_update(adreno_dev, reg, true, - FIELD_PREP(GENMASK(13, 12), PIPE_BV)); - else - kgsl_regwrite(device, reg->select, countable); + ret = gen8_perfcounter_update(adreno_dev, reg, true, + FIELD_PREP(GENMASK(13, 12), PIPE_BV), group->flags); kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val); @@ -84,16 +78,11 @@ static int gen8_counter_enable(struct adreno_device *adreno_dev, const struct adreno_perfcount_group *group, u32 counter, u32 countable) { - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_perfcount_register *reg = &group->regs[counter]; int ret = 0; - if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) - ret = gen8_perfcounter_update(adreno_dev, reg, true, - FIELD_PREP(GENMASK(13, 12), PIPE_NONE)); - else - kgsl_regwrite(device, reg->select, countable); - + ret = gen8_perfcounter_update(adreno_dev, reg, true, + FIELD_PREP(GENMASK(13, 12), PIPE_NONE), group->flags); if (!ret) reg->value = 0; @@ -126,9 +115,8 @@ static int gen8_counter_inline_enable(struct adreno_device *adreno_dev, return gen8_counter_enable(adreno_dev, group, counter, countable); - if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) - gen8_perfcounter_update(adreno_dev, reg, false, - FIELD_PREP(GENMASK(13, 12), PIPE_NONE)); + gen8_perfcounter_update(adreno_dev, reg, false, + FIELD_PREP(GENMASK(13, 12), PIPE_NONE), group->flags); cmds[0] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); cmds[1] = cp_type4_packet(reg->select, 1); From b43d74c8bf63e31cdcc705ef22d6582a3340280e Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 31 Aug 2023 16:10:28 +0530 Subject: [PATCH 0558/1016] kgsl: gen8: Remove inline performance counter enable Hardware improvements in Gen8 GPUs allows to read performance counters directly from the driver. So, Gen8 GPUs don't need inline perfcounter counter enable path. Hence, remove the support for inline performance counter enable. Change-Id: I0f0930c10f4bcc47d76dd638fea14962f9112f66 Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 2 +- adreno_gen8.h | 4 -- adreno_gen8_hwsched_hfi.c | 38 ----------- adreno_gen8_hwsched_hfi.h | 14 ---- adreno_gen8_perfcounter.c | 130 +------------------------------------- 5 files changed, 3 insertions(+), 185 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index b9c3eb609b..5152102bde 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2627,7 +2627,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION, .gpudev = &adreno_gen8_hwsched_gpudev.base, - .perfcounters = &adreno_gen8_hwsched_perfcounters, + .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_16M, .gmem_size = 12 * SZ_1M, .bus_width = 32, diff --git a/adreno_gen8.h b/adreno_gen8.h index 6550a16d91..93695c85a1 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -18,7 +18,6 @@ struct gen8_snapshot_block_list; extern const struct adreno_power_ops gen8_gmu_power_ops; extern const struct adreno_power_ops gen8_hwsched_power_ops; extern const struct adreno_perfcounters adreno_gen8_perfcounters; -extern const struct adreno_perfcounters adreno_gen8_hwsched_perfcounters; struct gen8_gpudev { struct adreno_gpudev base; @@ -196,9 +195,6 @@ struct gen8_cp_smmu_info { /* Size of the CP_INIT pm4 stream in dwords */ #define GEN8_CP_INIT_DWORDS 10 -/* Size of the perf counter enable pm4 stream in dwords */ -#define GEN8_PERF_COUNTER_ENABLE_DWORDS 3 - #define GEN8_INT_MASK \ ((1 << GEN8_INT_AHBERROR) | \ (1 << GEN8_INT_ATBASYNCFIFOOVERFLOW) | \ diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 33a977d824..a4d6f24e37 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -4106,44 +4106,6 @@ void gen8_hwsched_context_destroy(struct adreno_device *adreno_dev, gen8_free_gmu_block(to_gen8_gmu(adreno_dev), &drawctxt->gmu_hw_fence_queue); } -int gen8_hwsched_counter_inline_enable(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, - u32 counter, u32 countable) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_perfcount_register *reg = &group->regs[counter]; - u32 val, cmds[GEN8_PERF_COUNTER_ENABLE_DWORDS + 1]; - int ret; - - if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) - gen8_perfcounter_update(adreno_dev, reg, false, - FIELD_PREP(GENMASK(13, 12), PIPE_NONE), group->flags); - - cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, HFI_MSG_CMD); - - cmds[1] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); - cmds[2] = cp_type4_packet(reg->select, 1); - cmds[3] = countable; - - ret = gen8_hfi_send_cmd_async(adreno_dev, cmds, sizeof(cmds)); - if (ret) - goto err; - - /* Wait till the register is programmed with the countable */ - ret = kgsl_regmap_read_poll_timeout(&device->regmap, reg->select, val, - val == countable, 100, ADRENO_IDLE_TIMEOUT); - if (!ret) { - reg->value = 0; - return ret; - } - -err: - dev_err(device->dev, "Perfcounter %s/%u/%u start via commands failed\n", - group->name, counter, countable); - - return ret; -} - int gen8_hwsched_disable_hw_fence_throttle(struct adreno_device *adreno_dev) { struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); diff --git a/adreno_gen8_hwsched_hfi.h b/adreno_gen8_hwsched_hfi.h index 17bf380048..b1dc120e0d 100644 --- a/adreno_gen8_hwsched_hfi.h +++ b/adreno_gen8_hwsched_hfi.h @@ -157,20 +157,6 @@ void gen8_hwsched_hfi_stop(struct adreno_device *adreno_dev); */ int gen8_hwsched_cp_init(struct adreno_device *adreno_dev); -/** - * gen8_hwsched_counter_inline_enable - Configure a performance counter for a countable - * @adreno_dev - Adreno device to configure - * @group - Desired performance counter group - * @counter - Desired performance counter in the group - * @countable - Desired countable - * - * Physically set up a counter within a group with the desired countable - * Return 0 on success or negative error on failure. - */ -int gen8_hwsched_counter_inline_enable(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, - u32 counter, u32 countable); - /** * gen8_hfi_send_cmd_async - Send an hfi packet * @adreno_dev: Pointer to adreno device structure diff --git a/adreno_gen8_perfcounter.c b/adreno_gen8_perfcounter.c index 12fdebbc81..c452d7e6b3 100644 --- a/adreno_gen8_perfcounter.c +++ b/adreno_gen8_perfcounter.c @@ -6,7 +6,6 @@ #include "adreno.h" #include "adreno_gen8.h" -#include "adreno_gen8_hwsched_hfi.h" #include "adreno_perfcounter.h" #include "adreno_pm4types.h" #include "kgsl_device.h" @@ -89,80 +88,6 @@ static int gen8_counter_enable(struct adreno_device *adreno_dev, return ret; } -static int gen8_hwsched_counter_enable(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, - u32 counter, u32 countable) -{ - if (!(KGSL_DEVICE(adreno_dev)->state == KGSL_STATE_ACTIVE)) - return gen8_counter_enable(adreno_dev, group, counter, countable); - - return gen8_hwsched_counter_inline_enable(adreno_dev, group, counter, countable); -} - -/* This function is specific to sw-scheduler and not applicable for hw-scheduler */ -static int gen8_counter_inline_enable(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, - u32 counter, u32 countable) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_perfcount_register *reg = &group->regs[counter]; - struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0]; - u32 cmds[3]; - int ret; - - /* Fallback when we reach here from GPU initialization sequence */ - if (!(device->state == KGSL_STATE_ACTIVE)) - return gen8_counter_enable(adreno_dev, group, counter, - countable); - - gen8_perfcounter_update(adreno_dev, reg, false, - FIELD_PREP(GENMASK(13, 12), PIPE_NONE), group->flags); - - cmds[0] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); - cmds[1] = cp_type4_packet(reg->select, 1); - cmds[2] = countable; - - /* submit to highest priority RB always */ - ret = gen8_ringbuffer_addcmds(adreno_dev, rb, NULL, - F_NOTPROTECTED, cmds, 3, 0, NULL); - if (ret) - return ret; - - /* - * schedule dispatcher to make sure rb[0] is run, because - * if the current RB is not rb[0] and gpu is idle then - * rb[0] will not get scheduled to run - */ - if (adreno_dev->cur_rb != rb) - adreno_dispatcher_schedule(device); - - /* wait for the above commands submitted to complete */ - ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp, - ADRENO_IDLE_TIMEOUT); - - if (ret) { - /* - * If we were woken up because of cancelling rb events - * either due to soft reset or adreno_stop, ignore the - * error and return 0 here. The perfcounter is already - * set up in software and it will be programmed in - * hardware when we wake up or come up after soft reset, - * by adreno_perfcounter_restore. - */ - if (ret == -EAGAIN) - ret = 0; - else - dev_err_ratelimited(device->dev, - "Perfcounter %s/%u/%u start via commands failed %d\n", - group->name, counter, countable, ret); - } - - if (!ret) - reg->value = 0; - - return ret; -} - static u64 gen8_counter_read(struct adreno_device *adreno_dev, const struct adreno_perfcount_group *group, u32 counter) { @@ -1072,53 +997,6 @@ static struct adreno_perfcount_register gen8_perfcounters_alwayson[] = { GEN8_BV_PERFCOUNTER_GROUP(offset, name, \ gen8_counter_enable, gen8_counter_read) -static const struct adreno_perfcount_group gen8_hwsched_perfcounter_groups - [KGSL_PERFCOUNTER_GROUP_MAX] = { - GEN8_REGULAR_PERFCOUNTER_GROUP(CP, cp), - GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, RBBM, rbbm, 0, - gen8_counter_enable, gen8_counter_read), - GEN8_PERFCOUNTER_GROUP(PC, pc, gen8_counter_br_enable, gen8_counter_read), - GEN8_PERFCOUNTER_GROUP(VFD, vfd, gen8_counter_br_enable, gen8_counter_read), - GEN8_PERFCOUNTER_GROUP(HLSQ, hlsq, gen8_counter_br_enable, gen8_counter_read), - GEN8_PERFCOUNTER_GROUP(VPC, vpc, gen8_counter_br_enable, gen8_counter_read), - GEN8_PERFCOUNTER_GROUP(CCU, ccu, gen8_counter_br_enable, gen8_counter_read), - GEN8_PERFCOUNTER_GROUP(CMP, cmp, gen8_counter_br_enable, gen8_counter_read), - GEN8_PERFCOUNTER_GROUP(TSE, tse, gen8_counter_br_enable, gen8_counter_read), - GEN8_PERFCOUNTER_GROUP(RAS, ras, gen8_counter_br_enable, gen8_counter_read), - GEN8_PERFCOUNTER_GROUP(LRZ, lrz, gen8_counter_br_enable, gen8_counter_read), - GEN8_REGULAR_PERFCOUNTER_GROUP(UCHE, uche), - GEN8_PERFCOUNTER_GROUP(TP, tp, gen8_hwsched_counter_enable, gen8_counter_read), - GEN8_PERFCOUNTER_GROUP(SP, sp, gen8_hwsched_counter_enable, gen8_counter_read), - GEN8_PERFCOUNTER_GROUP(RB, rb, gen8_counter_br_enable, gen8_counter_read), - GEN8_REGULAR_PERFCOUNTER_GROUP(VSC, vsc), - GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, VBIF, gbif, 0, - gen8_counter_gbif_enable, gen8_counter_read_norestore), - GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, VBIF_PWR, gbif_pwr, - ADRENO_PERFCOUNTER_GROUP_FIXED, - gen8_counter_gbif_pwr_enable, gen8_counter_read_norestore), - GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, ALWAYSON, alwayson, - ADRENO_PERFCOUNTER_GROUP_FIXED, - gen8_counter_alwayson_enable, gen8_counter_alwayson_read), - GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, GMU_XOCLK, gmu_xoclk, 0, - gen8_counter_gmu_pwr_enable, gen8_counter_read_norestore), - GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, GMU_GMUCLK, gmu_gmuclk, 0, - gen8_counter_gmu_pwr_enable, gen8_counter_read_norestore), - GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, GMU_PERF, gmu_perf, 0, - gen8_counter_gmu_perf_enable, gen8_counter_read_norestore), - GEN8_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), - GEN8_BV_REGULAR_PERFCOUNTER_GROUP(CP, cp), - GEN8_BV_PERFCOUNTER_GROUP(PC, pc, gen8_counter_bv_enable, gen8_counter_read), - GEN8_BV_PERFCOUNTER_GROUP(VFD, vfd, gen8_counter_bv_enable, gen8_counter_read), - GEN8_BV_PERFCOUNTER_GROUP(VPC, vpc, gen8_counter_bv_enable, gen8_counter_read), - GEN8_BV_REGULAR_PERFCOUNTER_GROUP(TP, tp), - GEN8_BV_REGULAR_PERFCOUNTER_GROUP(SP, sp), - GEN8_BV_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), - GEN8_BV_PERFCOUNTER_GROUP(TSE, tse, gen8_counter_bv_enable, gen8_counter_read), - GEN8_BV_PERFCOUNTER_GROUP(RAS, ras, gen8_counter_bv_enable, gen8_counter_read), - GEN8_BV_PERFCOUNTER_GROUP(LRZ, lrz, gen8_counter_bv_enable, gen8_counter_read), - GEN8_BV_PERFCOUNTER_GROUP(HLSQ, hlsq, gen8_counter_bv_enable, gen8_counter_read), -}; - static const struct adreno_perfcount_group gen8_perfcounter_groups [KGSL_PERFCOUNTER_GROUP_MAX] = { GEN8_REGULAR_PERFCOUNTER_GROUP(CP, cp), @@ -1134,8 +1012,8 @@ static const struct adreno_perfcount_group gen8_perfcounter_groups GEN8_PERFCOUNTER_GROUP(RAS, ras, gen8_counter_br_enable, gen8_counter_read), GEN8_PERFCOUNTER_GROUP(LRZ, lrz, gen8_counter_br_enable, gen8_counter_read), GEN8_REGULAR_PERFCOUNTER_GROUP(UCHE, uche), - GEN8_PERFCOUNTER_GROUP(TP, tp, gen8_counter_inline_enable, gen8_counter_read), - GEN8_PERFCOUNTER_GROUP(SP, sp, gen8_counter_inline_enable, gen8_counter_read), + GEN8_REGULAR_PERFCOUNTER_GROUP(TP, tp), + GEN8_REGULAR_PERFCOUNTER_GROUP(SP, sp), GEN8_PERFCOUNTER_GROUP(RB, rb, gen8_counter_br_enable, gen8_counter_read), GEN8_REGULAR_PERFCOUNTER_GROUP(VSC, vsc), GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, VBIF, gbif, 0, @@ -1171,7 +1049,3 @@ const struct adreno_perfcounters adreno_gen8_perfcounters = { ARRAY_SIZE(gen8_perfcounter_groups), }; -const struct adreno_perfcounters adreno_gen8_hwsched_perfcounters = { - gen8_hwsched_perfcounter_groups, - ARRAY_SIZE(gen8_hwsched_perfcounter_groups), -}; From d2dbead0af041dccdddaa1582cb3950ec74e019f Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 9 Aug 2023 11:57:40 +0530 Subject: [PATCH 0559/1016] kgsl: gen8: Flush perfcounters and pipe stats to perf RAM logic In A8X, perfcounters are spread across slice and unslice. In order to get latest data, driver needs to explicitly flush the delta counters (both perf counters and pipe stats) present in both RBBM_S and RBBM_US to perf RAM logic. Poll for the perfcounter flush done status before reading perfcounters. Change-Id: If73e1622921f1d04eed95ea5fb194eefc4c18ece Signed-off-by: Kamal Agrawal --- adreno_gen8_perfcounter.c | 26 ++++++++++++++++++++++++++ gen8_reg.h | 3 +++ 2 files changed, 29 insertions(+) diff --git a/adreno_gen8_perfcounter.c b/adreno_gen8_perfcounter.c index c452d7e6b3..ae25a65258 100644 --- a/adreno_gen8_perfcounter.c +++ b/adreno_gen8_perfcounter.c @@ -10,6 +10,28 @@ #include "adreno_pm4types.h" #include "kgsl_device.h" +#define PERFCOUNTER_FLUSH_DONE_MASK BIT(0) + +static void gen8_rbbm_perfctr_flush(struct kgsl_device *device) +{ + u32 val; + int ret; + + /* + * Flush delta counters (both perf counters and pipe stats) present in + * RBBM_S and RBBM_US to perf RAM logic to get the latest data. + */ + kgsl_regwrite(device, GEN8_RBBM_PERFCTR_FLUSH_HOST_CMD, BIT(0)); + kgsl_regwrite(device, GEN8_RBBM_SLICE_PERFCTR_FLUSH_HOST_CMD, BIT(0)); + + ret = kgsl_regmap_read_poll_timeout(&device->regmap, GEN8_RBBM_PERFCTR_FLUSH_HOST_STATUS, + val, (val & PERFCOUNTER_FLUSH_DONE_MASK) == PERFCOUNTER_FLUSH_DONE_MASK, + 100, 100 * 1000); + + if (ret) + dev_err(device->dev, "Perfcounter flush timed out: status=0x%08x\n", val); +} + /* * For registers that do not get restored on power cycle, read the value and add * the stored shadow value @@ -21,6 +43,8 @@ static u64 gen8_counter_read_norestore(struct adreno_device *adreno_dev, struct adreno_perfcount_register *reg = &group->regs[counter]; u32 hi, lo; + gen8_rbbm_perfctr_flush(device); + kgsl_regread(device, reg->offset, &lo); kgsl_regread(device, reg->offset_hi, &hi); @@ -95,6 +119,8 @@ static u64 gen8_counter_read(struct adreno_device *adreno_dev, struct adreno_perfcount_register *reg = &group->regs[counter]; u32 hi, lo; + gen8_rbbm_perfctr_flush(device); + kgsl_regread(device, reg->offset, &lo); kgsl_regread(device, reg->offset_hi, &hi); diff --git a/gen8_reg.h b/gen8_reg.h index 3dc6ce6989..be8166a24b 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -64,6 +64,7 @@ #define GEN8_RBBM_CGC_0_PC 0x10b #define GEN8_RBBM_PERFCTR_GPU_BUSY_MASKED 0x19e #define GEN8_RBBM_PERFCTR_SRAM_INIT_STATUS 0x19f +#define GEN8_RBBM_PERFCTR_FLUSH_HOST_STATUS 0x1a1 #define GEN8_RBBM_PERFCTR_CP_0_LO 0x1b0 #define GEN8_RBBM_PERFCTR_CP_0_HI 0x1b1 #define GEN8_RBBM_PERFCTR_CP_1_LO 0x1b2 @@ -493,12 +494,14 @@ #define GEN8_RBBM_PERFCTR_RBBM_SEL_3 0x444 #define GEN8_RBBM_PERFCTR_SRAM_INIT_CMD 0x449 +#define GEN8_RBBM_PERFCTR_FLUSH_HOST_CMD 0x44c #define GEN8_RBBM_PERFCTR_CNTL 0x460 /* GPU Slice registers */ #define GEN8_RBBM_SLICE_PERFCTR_CNTL 0x500 #define GEN8_RBBM_SLICE_INTERFACE_HANG_INT_CNTL 0x58f #define GEN8_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD 0x5e8 +#define GEN8_RBBM_SLICE_PERFCTR_FLUSH_HOST_CMD 0x5eb #define GEN8_RBBM_SLICE_NC_MODE_CNTL 0x5ec #define GEN8_VSC_BIN_SIZE 0xc02 From bd813c9abd2f154a3be04eb53df58173a44c04c4 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Mon, 16 Oct 2023 14:17:30 +0530 Subject: [PATCH 0560/1016] kgsl: gen8: Add support to program dependent perfcounter registers In A8X, few hardware blocks are split into multiple sub-blocks (front end and back end). Some of the blocks are present in both slice and unslice. Program the dependent perfcounter registers for such blocks. Change-Id: I07347de7142eb145e80c43eda6fc35cc7a6770a3 Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 69 +++++++++-------- adreno_gen8_perfcounter.c | 156 +++++++++++++++++++++++++------------- adreno_perfcounter.h | 8 ++ gen8_reg.h | 24 ++++++ 4 files changed, 174 insertions(+), 83 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index e60ee87b53..4027db2591 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1773,8 +1773,7 @@ int gen8_perfcounter_remove(struct adreno_device *adreno_dev, struct cpu_gpu_lock *lock = ptr; u32 *data = ptr + sizeof(*lock); int offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; - int i, second_last_offset, last_offset; - bool remove_counter = false; + int i, last_offset, num_removed, start_offset = -1; u32 pipe = FIELD_PREP(GENMASK(13, 12), _get_pipeid(groupid)); if (!lock->dynamic_list_len) @@ -1793,43 +1792,39 @@ int gen8_perfcounter_remove(struct adreno_device *adreno_dev, goto disable_perfcounter; } - second_last_offset = offset + (lock->dynamic_list_len - 3) * 3; - last_offset = second_last_offset + 3; + last_offset = offset + lock->dynamic_list_len * 3; /* Look for the perfcounter to remove in the list */ for (i = 0; i < lock->dynamic_list_len - 2; i++) { if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { - remove_counter = true; + start_offset = offset; break; } offset += 3; } - if (!remove_counter) + if (start_offset == -1) return -ENOENT; + for (i = 0; i < PERFCOUNTER_REG_DEPENDENCY_LEN && reg->reg_dependency[i]; i++) + offset += 3; + if (kgsl_hwlock(lock)) { kgsl_hwunlock(lock); return -EBUSY; } - /* - * If the entry is found, remove it from the list by overwriting with second last - * entry. Skip this if data at offset is already second last entry - */ - if (offset != second_last_offset) - memcpy(&data[offset], &data[second_last_offset], 6 * sizeof(u32)); + /* Let offset point to the first entry that is going to be retained */ + offset += 3; - /* - * Overwrite the second last entry with last entry as last entry always has to be - * GEN8_RBBM_SLICE_PERFCTR_CNTL. - */ - memcpy(&data[second_last_offset], &data[last_offset], 6 * sizeof(u32)); + memcpy(&data[start_offset], &data[offset], (last_offset - offset) * sizeof(u32)); - /* Clear the last entry */ - memset(&data[last_offset], 0, 6 * sizeof(u32)); + memset(&data[start_offset + (last_offset - offset)], 0, + (offset - start_offset) * sizeof(u32)); - lock->dynamic_list_len--; + num_removed = offset - start_offset; + do_div(num_removed, 3); + lock->dynamic_list_len -= num_removed; disable_perfcounter: /* @@ -1851,19 +1846,15 @@ int gen8_perfcounter_update(struct adreno_device *adreno_dev, void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; u32 *data = ptr + sizeof(*lock); - int i, offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; - bool select_reg_present = false; + int i, start_offset = -1, offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; if (flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) { - for (i = 0; i < lock->dynamic_list_len; i++) { + for (i = 0; i < lock->dynamic_list_len - 2; i++) { if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { - select_reg_present = true; + start_offset = offset; break; } - if (data[offset + 1] == GEN8_RBBM_PERFCTR_CNTL) - break; - offset += 3; } } else if (lock->dynamic_list_len) { @@ -1880,8 +1871,12 @@ int gen8_perfcounter_update(struct adreno_device *adreno_dev, * update it, otherwise append the * triplet to the end of the list. */ - if (select_reg_present) { + if (start_offset != -1) { data[offset + 2] = reg->countable; + for (i = 0; i < PERFCOUNTER_REG_DEPENDENCY_LEN && reg->reg_dependency[i]; i++) { + offset += 3; + data[offset + 2] = reg->countable; + } kgsl_hwunlock(lock); goto update; } @@ -1900,6 +1895,13 @@ int gen8_perfcounter_update(struct adreno_device *adreno_dev, data[offset++] = reg->select; data[offset++] = reg->countable; lock->dynamic_list_len++; + + for (i = 0; i < PERFCOUNTER_REG_DEPENDENCY_LEN && reg->reg_dependency[i]; i++) { + data[offset++] = pipe; + data[offset++] = reg->reg_dependency[i]; + data[offset++] = reg->countable; + lock->dynamic_list_len++; + } } data[offset++] = FIELD_PREP(GENMASK(13, 12), PIPE_NONE); @@ -1913,9 +1915,14 @@ int gen8_perfcounter_update(struct adreno_device *adreno_dev, kgsl_hwunlock(lock); update: - if (update_reg) - kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg->select, - reg->countable); + if (update_reg) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_regwrite(device, reg->select, reg->countable); + + for (i = 0; i < PERFCOUNTER_REG_DEPENDENCY_LEN && reg->reg_dependency[i]; i++) + kgsl_regwrite(device, reg->reg_dependency[i], reg->countable); + } return 0; } diff --git a/adreno_gen8_perfcounter.c b/adreno_gen8_perfcounter.c index ae25a65258..d96cac5c91 100644 --- a/adreno_gen8_perfcounter.c +++ b/adreno_gen8_perfcounter.c @@ -298,51 +298,71 @@ static struct adreno_perfcount_register gen8_perfcounters_bv_cp[] = { static struct adreno_perfcount_register gen8_perfcounters_rbbm[] = { { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RBBM_0_LO, - GEN8_RBBM_PERFCTR_RBBM_0_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_0 }, + GEN8_RBBM_PERFCTR_RBBM_0_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_0, 0, + { GEN8_RBBM_SLICE_PERFCTR_RBBM_SEL_0 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RBBM_1_LO, - GEN8_RBBM_PERFCTR_RBBM_1_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_1 }, + GEN8_RBBM_PERFCTR_RBBM_1_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_1, 0, + { GEN8_RBBM_SLICE_PERFCTR_RBBM_SEL_0 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RBBM_2_LO, - GEN8_RBBM_PERFCTR_RBBM_2_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_2 }, + GEN8_RBBM_PERFCTR_RBBM_2_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_2, 0, + { GEN8_RBBM_SLICE_PERFCTR_RBBM_SEL_0 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RBBM_3_LO, - GEN8_RBBM_PERFCTR_RBBM_3_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_3 }, + GEN8_RBBM_PERFCTR_RBBM_3_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_3, 0, + { GEN8_RBBM_SLICE_PERFCTR_RBBM_SEL_0 } }, }; static struct adreno_perfcount_register gen8_perfcounters_pc[] = { { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_0_LO, - GEN8_RBBM_PERFCTR_PC_0_HI, -1, GEN8_PC_PERFCTR_PC_SEL_0 }, + GEN8_RBBM_PERFCTR_PC_0_HI, -1, GEN8_PC_PERFCTR_PC_SEL_0, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_0 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_1_LO, - GEN8_RBBM_PERFCTR_PC_1_HI, -1, GEN8_PC_PERFCTR_PC_SEL_1 }, + GEN8_RBBM_PERFCTR_PC_1_HI, -1, GEN8_PC_PERFCTR_PC_SEL_1, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_1 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_2_LO, - GEN8_RBBM_PERFCTR_PC_2_HI, -1, GEN8_PC_PERFCTR_PC_SEL_2 }, + GEN8_RBBM_PERFCTR_PC_2_HI, -1, GEN8_PC_PERFCTR_PC_SEL_2, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_2 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_3_LO, - GEN8_RBBM_PERFCTR_PC_3_HI, -1, GEN8_PC_PERFCTR_PC_SEL_3 }, + GEN8_RBBM_PERFCTR_PC_3_HI, -1, GEN8_PC_PERFCTR_PC_SEL_3, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_3 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_4_LO, - GEN8_RBBM_PERFCTR_PC_4_HI, -1, GEN8_PC_PERFCTR_PC_SEL_4 }, + GEN8_RBBM_PERFCTR_PC_4_HI, -1, GEN8_PC_PERFCTR_PC_SEL_4, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_4 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_5_LO, - GEN8_RBBM_PERFCTR_PC_5_HI, -1, GEN8_PC_PERFCTR_PC_SEL_5 }, + GEN8_RBBM_PERFCTR_PC_5_HI, -1, GEN8_PC_PERFCTR_PC_SEL_5, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_5 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_6_LO, - GEN8_RBBM_PERFCTR_PC_6_HI, -1, GEN8_PC_PERFCTR_PC_SEL_6 }, + GEN8_RBBM_PERFCTR_PC_6_HI, -1, GEN8_PC_PERFCTR_PC_SEL_6, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_6 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_7_LO, - GEN8_RBBM_PERFCTR_PC_7_HI, -1, GEN8_PC_PERFCTR_PC_SEL_7 }, + GEN8_RBBM_PERFCTR_PC_7_HI, -1, GEN8_PC_PERFCTR_PC_SEL_7, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_7 } }, }; static struct adreno_perfcount_register gen8_perfcounters_bv_pc[] = { { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_0_LO, - GEN8_RBBM_PERFCTR_BV_PC_0_HI, -1, GEN8_PC_PERFCTR_PC_SEL_8 }, + GEN8_RBBM_PERFCTR_BV_PC_0_HI, -1, GEN8_PC_PERFCTR_PC_SEL_8, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_8 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_1_LO, - GEN8_RBBM_PERFCTR_BV_PC_1_HI, -1, GEN8_PC_PERFCTR_PC_SEL_9 }, + GEN8_RBBM_PERFCTR_BV_PC_1_HI, -1, GEN8_PC_PERFCTR_PC_SEL_9, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_9 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_2_LO, - GEN8_RBBM_PERFCTR_BV_PC_2_HI, -1, GEN8_PC_PERFCTR_PC_SEL_10 }, + GEN8_RBBM_PERFCTR_BV_PC_2_HI, -1, GEN8_PC_PERFCTR_PC_SEL_10, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_10 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_3_LO, - GEN8_RBBM_PERFCTR_BV_PC_3_HI, -1, GEN8_PC_PERFCTR_PC_SEL_11 }, + GEN8_RBBM_PERFCTR_BV_PC_3_HI, -1, GEN8_PC_PERFCTR_PC_SEL_11, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_11 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_4_LO, - GEN8_RBBM_PERFCTR_BV_PC_4_HI, -1, GEN8_PC_PERFCTR_PC_SEL_12 }, + GEN8_RBBM_PERFCTR_BV_PC_4_HI, -1, GEN8_PC_PERFCTR_PC_SEL_12, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_12 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_5_LO, - GEN8_RBBM_PERFCTR_BV_PC_5_HI, -1, GEN8_PC_PERFCTR_PC_SEL_13 }, + GEN8_RBBM_PERFCTR_BV_PC_5_HI, -1, GEN8_PC_PERFCTR_PC_SEL_13, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_13 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_6_LO, - GEN8_RBBM_PERFCTR_BV_PC_6_HI, -1, GEN8_PC_PERFCTR_PC_SEL_14 }, + GEN8_RBBM_PERFCTR_BV_PC_6_HI, -1, GEN8_PC_PERFCTR_PC_SEL_14, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_14 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_7_LO, - GEN8_RBBM_PERFCTR_BV_PC_7_HI, -1, GEN8_PC_PERFCTR_PC_SEL_15 }, + GEN8_RBBM_PERFCTR_BV_PC_7_HI, -1, GEN8_PC_PERFCTR_PC_SEL_15, 0, + { GEN8_PC_SLICE_PERFCTR_PC_SEL_15 } }, }; static struct adreno_perfcount_register gen8_perfcounters_vfd[] = { @@ -385,62 +405,86 @@ static struct adreno_perfcount_register gen8_perfcounters_bv_vfd[] = { static struct adreno_perfcount_register gen8_perfcounters_hlsq[] = { { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_0_LO, - GEN8_RBBM_PERFCTR_HLSQ_0_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_0 }, + GEN8_RBBM_PERFCTR_HLSQ_0_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_0, 0, + { GEN8_SP_PERFCTR_HLSQ_SEL_2_0 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_1_LO, - GEN8_RBBM_PERFCTR_HLSQ_1_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_1 }, + GEN8_RBBM_PERFCTR_HLSQ_1_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_1, 0, + { GEN8_SP_PERFCTR_HLSQ_SEL_2_1 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_2_LO, - GEN8_RBBM_PERFCTR_HLSQ_2_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_2 }, + GEN8_RBBM_PERFCTR_HLSQ_2_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_2, 0, + { GEN8_SP_PERFCTR_HLSQ_SEL_2_2 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_3_LO, - GEN8_RBBM_PERFCTR_HLSQ_3_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_3 }, + GEN8_RBBM_PERFCTR_HLSQ_3_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_3, 0, + { GEN8_SP_PERFCTR_HLSQ_SEL_2_3 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_4_LO, - GEN8_RBBM_PERFCTR_HLSQ_4_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_4 }, + GEN8_RBBM_PERFCTR_HLSQ_4_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_4, 0, + { GEN8_SP_PERFCTR_HLSQ_SEL_2_4 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_5_LO, - GEN8_RBBM_PERFCTR_HLSQ_5_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_5 }, + GEN8_RBBM_PERFCTR_HLSQ_5_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_5, 0, + { GEN8_SP_PERFCTR_HLSQ_SEL_2_5 } }, }; static struct adreno_perfcount_register gen8_perfcounters_bv_hlsq[] = { { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_0_LO, - GEN8_RBBM_PERFCTR2_HLSQ_0_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_0 }, + GEN8_RBBM_PERFCTR2_HLSQ_0_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_0, 0, + { GEN8_SP_PERFCTR_HLSQ_SEL_2_0 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_1_LO, - GEN8_RBBM_PERFCTR2_HLSQ_1_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_1 }, + GEN8_RBBM_PERFCTR2_HLSQ_1_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_1, 0, + { GEN8_SP_PERFCTR_HLSQ_SEL_2_1 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_2_LO, - GEN8_RBBM_PERFCTR2_HLSQ_2_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_2 }, + GEN8_RBBM_PERFCTR2_HLSQ_2_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_2, 0, + { GEN8_SP_PERFCTR_HLSQ_SEL_2_2 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_3_LO, - GEN8_RBBM_PERFCTR2_HLSQ_3_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_3 }, + GEN8_RBBM_PERFCTR2_HLSQ_3_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_3, 0, + { GEN8_SP_PERFCTR_HLSQ_SEL_2_3 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_4_LO, - GEN8_RBBM_PERFCTR2_HLSQ_4_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_4 }, + GEN8_RBBM_PERFCTR2_HLSQ_4_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_4, 0, + { GEN8_SP_PERFCTR_HLSQ_SEL_2_4 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_5_LO, - GEN8_RBBM_PERFCTR2_HLSQ_5_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_5 }, + GEN8_RBBM_PERFCTR2_HLSQ_5_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_5, 0, + { GEN8_SP_PERFCTR_HLSQ_SEL_2_5 } }, }; static struct adreno_perfcount_register gen8_perfcounters_vpc[] = { { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_0_LO, - GEN8_RBBM_PERFCTR_VPC_0_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_0 }, + GEN8_RBBM_PERFCTR_VPC_0_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_0, 0, + { GEN8_VPC_PERFCTR_VPC_SEL_1_0, GEN8_VPC_PERFCTR_VPC_SEL_2_0 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_1_LO, - GEN8_RBBM_PERFCTR_VPC_1_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_1 }, + GEN8_RBBM_PERFCTR_VPC_1_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_1, 0, + { GEN8_VPC_PERFCTR_VPC_SEL_1_1, GEN8_VPC_PERFCTR_VPC_SEL_2_1 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_2_LO, - GEN8_RBBM_PERFCTR_VPC_2_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2 }, + GEN8_RBBM_PERFCTR_VPC_2_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2, 0, + { GEN8_VPC_PERFCTR_VPC_SEL_1_2, GEN8_VPC_PERFCTR_VPC_SEL_2_2 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_3_LO, - GEN8_RBBM_PERFCTR_VPC_3_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_3 }, + GEN8_RBBM_PERFCTR_VPC_3_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_3, 0, + { GEN8_VPC_PERFCTR_VPC_SEL_1_3, GEN8_VPC_PERFCTR_VPC_SEL_2_3 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_4_LO, - GEN8_RBBM_PERFCTR_VPC_4_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_4 }, + GEN8_RBBM_PERFCTR_VPC_4_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_4, 0, + { GEN8_VPC_PERFCTR_VPC_SEL_1_4, GEN8_VPC_PERFCTR_VPC_SEL_2_4 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_5_LO, - GEN8_RBBM_PERFCTR_VPC_5_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_5 }, + GEN8_RBBM_PERFCTR_VPC_5_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_5, 0, + { GEN8_VPC_PERFCTR_VPC_SEL_1_5, GEN8_VPC_PERFCTR_VPC_SEL_2_5 } }, }; static struct adreno_perfcount_register gen8_perfcounters_bv_vpc[] = { { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_0_LO, - GEN8_RBBM_PERFCTR_BV_VPC_0_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_6 }, + GEN8_RBBM_PERFCTR_BV_VPC_0_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2_6, 0, + { GEN8_VPC_PERFCTR_VPC_SEL_1_6, GEN8_VPC_PERFCTR_VPC_SEL_2_6 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_1_LO, - GEN8_RBBM_PERFCTR_BV_VPC_1_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_7 }, + GEN8_RBBM_PERFCTR_BV_VPC_1_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2_7, 0, + { GEN8_VPC_PERFCTR_VPC_SEL_1_7, GEN8_VPC_PERFCTR_VPC_SEL_2_7 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_2_LO, - GEN8_RBBM_PERFCTR_BV_VPC_2_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_8 }, + GEN8_RBBM_PERFCTR_BV_VPC_2_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2_8, 0, + { GEN8_VPC_PERFCTR_VPC_SEL_1_8, GEN8_VPC_PERFCTR_VPC_SEL_2_8 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_3_LO, - GEN8_RBBM_PERFCTR_BV_VPC_3_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_9 }, + GEN8_RBBM_PERFCTR_BV_VPC_3_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2_9, 0, + { GEN8_VPC_PERFCTR_VPC_SEL_1_9, GEN8_VPC_PERFCTR_VPC_SEL_2_9 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_4_LO, - GEN8_RBBM_PERFCTR_BV_VPC_4_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_10 }, + GEN8_RBBM_PERFCTR_BV_VPC_4_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2_10, 0, + { GEN8_VPC_PERFCTR_VPC_SEL_1_10, GEN8_VPC_PERFCTR_VPC_SEL_2_10 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_5_LO, - GEN8_RBBM_PERFCTR_BV_VPC_5_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_11 }, + GEN8_RBBM_PERFCTR_BV_VPC_5_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2_11, 0, + { GEN8_VPC_PERFCTR_VPC_SEL_1_11, GEN8_VPC_PERFCTR_VPC_SEL_2_11 } }, }; static struct adreno_perfcount_register gen8_perfcounters_ccu[] = { @@ -458,24 +502,32 @@ static struct adreno_perfcount_register gen8_perfcounters_ccu[] = { static struct adreno_perfcount_register gen8_perfcounters_tse[] = { { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TSE_0_LO, - GEN8_RBBM_PERFCTR_TSE_0_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_0 }, + GEN8_RBBM_PERFCTR_TSE_0_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_0, 0, + { GEN8_GRAS_PERFCTR_TSEFE_SEL_0 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TSE_1_LO, - GEN8_RBBM_PERFCTR_TSE_1_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_1 }, + GEN8_RBBM_PERFCTR_TSE_1_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_1, 0, + { GEN8_GRAS_PERFCTR_TSEFE_SEL_1 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TSE_2_LO, - GEN8_RBBM_PERFCTR_TSE_2_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_2 }, + GEN8_RBBM_PERFCTR_TSE_2_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_2, 0, + { GEN8_GRAS_PERFCTR_TSEFE_SEL_2 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TSE_3_LO, - GEN8_RBBM_PERFCTR_TSE_3_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_3 }, + GEN8_RBBM_PERFCTR_TSE_3_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_3, 0, + { GEN8_GRAS_PERFCTR_TSEFE_SEL_3 } }, }; static struct adreno_perfcount_register gen8_perfcounters_bv_tse[] = { { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_TSE_0_LO, - GEN8_RBBM_PERFCTR_BV_TSE_0_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_0 }, + GEN8_RBBM_PERFCTR_BV_TSE_0_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_0, 0, + { GEN8_GRAS_PERFCTR_TSEFE_SEL_0 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_TSE_1_LO, - GEN8_RBBM_PERFCTR_BV_TSE_1_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_1 }, + GEN8_RBBM_PERFCTR_BV_TSE_1_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_1, 0, + { GEN8_GRAS_PERFCTR_TSEFE_SEL_1 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_TSE_2_LO, - GEN8_RBBM_PERFCTR_BV_TSE_2_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_2 }, + GEN8_RBBM_PERFCTR_BV_TSE_2_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_2, 0, + { GEN8_GRAS_PERFCTR_TSEFE_SEL_2 } }, { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_TSE_3_LO, - GEN8_RBBM_PERFCTR_BV_TSE_3_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_3 }, + GEN8_RBBM_PERFCTR_BV_TSE_3_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_3, 0, + { GEN8_GRAS_PERFCTR_TSEFE_SEL_3 } }, }; static struct adreno_perfcount_register gen8_perfcounters_ras[] = { diff --git a/adreno_perfcounter.h b/adreno_perfcounter.h index 85006b0174..7613ab2ac8 100644 --- a/adreno_perfcounter.h +++ b/adreno_perfcounter.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2008-2015,2017,2019-2021 The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_PERFCOUNTER_H #define __ADRENO_PERFCOUNTER_H @@ -13,6 +14,8 @@ struct adreno_device; #define PERFCOUNTER_FLAG_NONE 0x0 #define PERFCOUNTER_FLAG_KERNEL 0x1 +#define PERFCOUNTER_REG_DEPENDENCY_LEN 2 + /* Structs to maintain the list of active performance counters */ /** @@ -34,6 +37,11 @@ struct adreno_perfcount_register { int load_bit; unsigned int select; uint64_t value; + /* + * @reg_dependency: Dependent registers that should be programmed along + * with this register. + */ + u32 reg_dependency[PERFCOUNTER_REG_DEPENDENCY_LEN]; }; /** diff --git a/gen8_reg.h b/gen8_reg.h index be8166a24b..65a5a2bdf0 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -500,6 +500,10 @@ /* GPU Slice registers */ #define GEN8_RBBM_SLICE_PERFCTR_CNTL 0x500 #define GEN8_RBBM_SLICE_INTERFACE_HANG_INT_CNTL 0x58f +#define GEN8_RBBM_SLICE_PERFCTR_RBBM_SEL_0 0x5e0 +#define GEN8_RBBM_SLICE_PERFCTR_RBBM_SEL_1 0x5e1 +#define GEN8_RBBM_SLICE_PERFCTR_RBBM_SEL_2 0x5e2 +#define GEN8_RBBM_SLICE_PERFCTR_RBBM_SEL_3 0x5e3 #define GEN8_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD 0x5e8 #define GEN8_RBBM_SLICE_PERFCTR_FLUSH_HOST_CMD 0x5eb #define GEN8_RBBM_SLICE_NC_MODE_CNTL 0x5ec @@ -930,6 +934,10 @@ /* GRAS registers */ #define GEN8_GRAS_TSEFE_DBG_ECO_CNTL 0x8600 +#define GEN8_GRAS_PERFCTR_TSEFE_SEL_0 0x8610 +#define GEN8_GRAS_PERFCTR_TSEFE_SEL_1 0x8611 +#define GEN8_GRAS_PERFCTR_TSEFE_SEL_2 0x8612 +#define GEN8_GRAS_PERFCTR_TSEFE_SEL_3 0x8613 #define GEN8_GRAS_NC_MODE_CNTL 0x8700 #define GEN8_GRAS_DBG_ECO_CNTL 0x8702 #define GEN8_GRAS_PERFCTR_TSE_SEL_0 0x8710 @@ -1056,6 +1064,22 @@ #define GEN8_PC_PERFCTR_PC_SEL_15 0x9e3f #define GEN8_PC_CHICKEN_BITS_1 0x9e50 #define GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1 0x9e64 +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_0 0x9f00 +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_1 0x9f01 +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_2 0x9f02 +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_3 0x9f03 +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_4 0x9f04 +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_5 0x9f05 +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_6 0x9f06 +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_7 0x9f07 +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_8 0x9f08 +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_9 0x9f09 +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_10 0x9f0a +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_11 0x9f0b +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_12 0x9f0c +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_13 0x9f0d +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_14 0x9f0e +#define GEN8_PC_SLICE_PERFCTR_PC_SEL_15 0x9f0f /* VFD registers */ From f1689e0b8568c0470c7078a131aedaf429bdd0e5 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 2 Nov 2023 18:38:56 +0530 Subject: [PATCH 0561/1016] kgsl: gen8: Cleanup register definitions Remove duplicate, unused registers and update few register names. Change-Id: Ib27760cdf9d527f555894e9fd3b689346b889e3c Signed-off-by: Kamal Agrawal --- adreno_gen8_gmu.c | 6 +++--- gen8_reg.h | 27 +++------------------------ 2 files changed, 6 insertions(+), 27 deletions(-) diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index dbcc054acb..0800664d9e 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -308,9 +308,9 @@ static void gen8_gmu_power_config(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); /* Disable GMU WB/RB buffer and caches at boot */ - gmu_core_regwrite(device, GEN8_GMU_SYS_BUS_CONFIG, 0x1); - gmu_core_regwrite(device, GEN8_GMU_ICACHE_CONFIG, 0x1); - gmu_core_regwrite(device, GEN8_GMU_DCACHE_CONFIG, 0x1); + gmu_core_regwrite(device, GEN8_GMUCX_SYS_BUS_CONFIG, 0x1); + gmu_core_regwrite(device, GEN8_GMUCX_ICACHE_CONFIG, 0x1); + gmu_core_regwrite(device, GEN8_GMUCX_DCACHE_CONFIG, 0x1); } static void gmu_ao_sync_event(struct adreno_device *adreno_dev) diff --git a/gen8_reg.h b/gen8_reg.h index 65a5a2bdf0..05099e2d38 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -763,7 +763,6 @@ #define GEN8_CP_APERTURE_CNTL_GMU 0x81d #define GEN8_CP_APERTURE_CNTL_CD 0x81e #define GEN8_CP_CP2GMU_STATUS 0x822 -#define GEN8_CP_AHB_CNTL 0x838 #define GEN8_CP_RL_ERROR_DETAILS_0 0x840 #define GEN8_CP_RL_ERROR_DETAILS_1 0x841 #define GEN8_CP_CRASH_DUMP_SCRIPT_BASE_LO 0x842 @@ -971,16 +970,6 @@ #define GEN8_RB_PERFCTR_CCU_SEL_2 0x8e1a #define GEN8_RB_PERFCTR_CCU_SEL_3 0x8e1b #define GEN8_RB_PERFCTR_CCU_SEL_4 0x8e1c -#define GEN8_RB_PERFCTR_CMP_SEL_0 0x8f04 -#define GEN8_RB_PERFCTR_CMP_SEL_1 0x8f05 -#define GEN8_RB_PERFCTR_CMP_SEL_2 0x8f06 -#define GEN8_RB_PERFCTR_CMP_SEL_3 0x8f07 -#define GEN8_RB_PERFCTR_UFC_SEL_0 0x8f10 -#define GEN8_RB_PERFCTR_UFC_SEL_1 0x8f11 -#define GEN8_RB_PERFCTR_UFC_SEL_2 0x8f12 -#define GEN8_RB_PERFCTR_UFC_SEL_3 0x8f13 -#define GEN8_RB_PERFCTR_UFC_SEL_4 0x8f14 -#define GEN8_RB_PERFCTR_UFC_SEL_5 0x8f15 #define GEN8_RB_SUB_BLOCK_SEL_CNTL_HOST 0x8e3b #define GEN8_RB_SUB_BLOCK_SEL_CNTL_CD 0x8e3d #define GEN8_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE 0x8e50 @@ -1342,7 +1331,6 @@ #define GEN8_CX_DBGC_EVT_INTF_SEL_0 0x18436 #define GEN8_CX_DBGC_EVT_INTF_SEL_1 0x18437 #define GEN8_CX_DBGC_PERF_ATB_CFG 0x18438 -#define GEN8_CX_DBGC_PERF_ATB_DRAIN_CMD 0x1844a #define GEN8_CX_DBGC_ECO_CNTL 0x1843b #define GEN8_CX_DBGC_AHB_DBG_CNTL 0x1843c #define GEN8_CX_DBGC_TCM_DBG_ADDR 0x18580 @@ -1351,17 +1339,9 @@ /* GMU control registers */ #define GEN8_GMU_CM3_ITCM_START 0x1b400 #define GEN8_GMU_CM3_DTCM_START 0x1c400 -#define GEN8_GMU_NMI_CONTROL_STATUS 0x1cbf0 -#define GEN8_GMU_BOOT_SLUMBER_OPTION 0x1cbf8 -#define GEN8_GMU_GX_VOTE_IDX 0x1cbf9 -#define GEN8_GMU_MX_VOTE_IDX 0x1cbfa -#define GEN8_GMU_DCVS_ACK_OPTION 0x1cbfc -#define GEN8_GMU_DCVS_PERF_SETTING 0x1cbfd -#define GEN8_GMU_DCVS_BW_SETTING 0x1cbfe -#define GEN8_GMU_DCVS_RETURN 0x1cbff -#define GEN8_GMU_ICACHE_CONFIG 0x1f400 -#define GEN8_GMU_DCACHE_CONFIG 0x1f401 -#define GEN8_GMU_SYS_BUS_CONFIG 0x1f40f +#define GEN8_GMUCX_ICACHE_CONFIG 0x1f400 +#define GEN8_GMUCX_DCACHE_CONFIG 0x1f401 +#define GEN8_GMUCX_SYS_BUS_CONFIG 0x1f40f #define GEN8_GMUCX_MRC_GBIF_QOS_CTRL 0x1f50b #define GEN8_GMUCX_PWR_COL_KEEPALIVE 0x1f7e4 #define GEN8_GMUCX_PWR_COL_PREEMPTION_KEEPALIVE 0x1f7e5 @@ -1622,7 +1602,6 @@ #define GEN8_SMMU_BASE 0x28000 /* GPU CX_MISC registers */ -#define GEN8_GPU_CX_MISC_TCM_RET_CNTL 0x39 #define GEN8_GPU_CX_MISC_SW_FUSE_VALUE 0x400 /* GPU SW Fuse Feature bit fields */ From a76ed3a056495cf0ae569b52cd80f18f43cc0066 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Thu, 2 Nov 2023 15:14:12 -0700 Subject: [PATCH 0562/1016] kgsl: gen8: Remove the RSC and PDC setup For Gen8 gmu power firmware does RSC setup and AOP does PDC initialization. Hence remove this support for gen8. Signed-off-by: Hareesh Gundu Change-Id: I2fcfcb84a48e9c52fb45617fc3e15dce0e07f53e --- adreno_gen8_gmu.c | 70 ------------------------------------------- adreno_gen8_gmu.h | 14 --------- adreno_gen8_hwsched.c | 9 ------ gen8_reg.h | 15 ---------- 4 files changed, 108 deletions(-) diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 0800664d9e..e91571545c 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -241,67 +241,6 @@ struct adreno_device *gen8_gmu_to_adreno(struct gen8_gmu_device *gmu) return &gen8_dev->adreno_dev; } -#define RSC_CMD_OFFSET 2 - -static void _regwrite(void __iomem *regbase, - u32 offsetwords, u32 value) -{ - void __iomem *reg; - - reg = regbase + (offsetwords << 2); - __raw_writel(value, reg); -} - -void gen8_load_rsc_ucode(struct adreno_device *adreno_dev) -{ - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - void __iomem *rscc = gmu->rscc_virt; - u32 seq_offset = GEN8_RSCC_SEQ_MEM_0_DRV0; - - /* Disable SDE clock gating */ - _regwrite(rscc, GEN8_GPU_RSCC_RSC_STATUS0_DRV0, BIT(24)); - - /* Setup RSC PDC handshake for sleep and wakeup */ - _regwrite(rscc, GEN8_RSCC_PDC_SLAVE_ID_DRV0, 1); - _regwrite(rscc, GEN8_RSCC_HIDDEN_TCS_CMD0_DATA, 0); - _regwrite(rscc, GEN8_RSCC_HIDDEN_TCS_CMD0_ADDR, 0); - _regwrite(rscc, GEN8_RSCC_HIDDEN_TCS_CMD0_DATA + RSC_CMD_OFFSET, 0); - _regwrite(rscc, GEN8_RSCC_HIDDEN_TCS_CMD0_ADDR + RSC_CMD_OFFSET, 0); - _regwrite(rscc, GEN8_RSCC_HIDDEN_TCS_CMD0_DATA + RSC_CMD_OFFSET * 2, 0x80000021); - _regwrite(rscc, GEN8_RSCC_HIDDEN_TCS_CMD0_ADDR + RSC_CMD_OFFSET * 2, 0); - - /* Load RSC sequencer uCode for sleep and wakeup */ - _regwrite(rscc, seq_offset, 0xeaaae5a0); - _regwrite(rscc, seq_offset + 1, 0xe1a1ebab); - _regwrite(rscc, seq_offset + 2, 0xa2e0a581); - _regwrite(rscc, seq_offset + 3, 0xecac82e2); - _regwrite(rscc, seq_offset + 4, 0x0020edad); -} - -int gen8_load_pdc_ucode(struct adreno_device *adreno_dev) -{ - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - struct resource *res_cfg; - void __iomem *cfg = NULL; - - res_cfg = platform_get_resource_byname(gmu->pdev, IORESOURCE_MEM, - "gmu_pdc"); - if (res_cfg) - cfg = ioremap(res_cfg->start, resource_size(res_cfg)); - - if (!cfg) { - dev_err(&gmu->pdev->dev, "Failed to map PDC CFG\n"); - return -ENODEV; - } - - /* Setup GPU PDC */ - _regwrite(cfg, GEN8_PDC_GPU_ENABLE_PDC, 0x80000001); - - iounmap(cfg); - - return 0; -} - /* Configure and enable GMU low power mode */ static void gen8_gmu_power_config(struct adreno_device *adreno_dev) { @@ -1897,15 +1836,6 @@ static int gen8_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto err; - if (!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) { - ret = gen8_load_pdc_ucode(adreno_dev); - if (ret) - goto err; - - gen8_load_rsc_ucode(adreno_dev); - set_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags); - } - ret = gen8_gmu_hfi_start(adreno_dev); if (ret) goto err; diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index 01829d7fef..4ee97fad2f 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -427,20 +427,6 @@ int gen8_rscc_wakeup_sequence(struct adreno_device *adreno_dev); */ int gen8_halt_gbif(struct adreno_device *adreno_dev); -/** - * gen8_load_pdc_ucode - Load and enable pdc sequence - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int gen8_load_pdc_ucode(struct adreno_device *adreno_dev); - -/** - * gen8_load_rsc_ucode - Load rscc sequence - * @adreno_dev: Pointer to the adreno device - */ -void gen8_load_rsc_ucode(struct adreno_device *adreno_dev); - /** * gen8_gmu_remove - Clean up gmu probed resources * @device: Pointer to the kgsl device diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index feee8bafff..d61411c465 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -108,15 +108,6 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto clks_gdsc_off; - if (!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) { - ret = gen8_load_pdc_ucode(adreno_dev); - if (ret) - goto clks_gdsc_off; - - gen8_load_rsc_ucode(adreno_dev); - set_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags); - } - ret = gen8_scm_gpu_init_cx_regs(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/gen8_reg.h b/gen8_reg.h index 05099e2d38..18bf1cfb81 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -1572,19 +1572,7 @@ /* GPU RSC sequencer registers */ #define GEN8_GPU_RSCC_RSC_STATUS0_DRV0 0x00004 -#define GEN8_RSCC_PDC_SEQ_START_ADDR 0x00008 -#define GEN8_RSCC_PDC_MATCH_VALUE_LO 0x00009 -#define GEN8_RSCC_PDC_MATCH_VALUE_HI 0x0000a -#define GEN8_RSCC_PDC_SLAVE_ID_DRV0 0x0000b -#define GEN8_RSCC_HIDDEN_TCS_CMD0_ADDR 0x0000d -#define GEN8_RSCC_HIDDEN_TCS_CMD0_DATA 0x0000e -#define GEN8_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_L_DRV0 0x00082 -#define GEN8_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_H_DRV0 0x00083 -#define GEN8_RSCC_TIMESTAMP_UNIT1_EN_DRV0 0x00089 -#define GEN8_RSCC_TIMESTAMP_UNIT1_OUTPUT_DRV0 0x0008c -#define GEN8_RSCC_OVERRIDE_START_ADDR 0x00100 #define GEN8_RSCC_SEQ_BUSY_DRV0 0x00101 -#define GEN8_RSCC_SEQ_MEM_0_DRV0 0x00154 #define GEN8_RSCC_TCS0_DRV0_STATUS 0x0034a #define GEN8_RSCC_TCS1_DRV0_STATUS 0x003f2 #define GEN8_RSCC_TCS2_DRV0_STATUS 0x0049a @@ -1596,9 +1584,6 @@ #define GEN8_RSCC_TCS8_DRV0_STATUS 0x0088a #define GEN8_RSCC_TCS9_DRV0_STATUS 0x00932 -/* GPU PDC sequencer registers in AOSS.RPMh domain */ -#define GEN8_PDC_GPU_ENABLE_PDC 0x002c - #define GEN8_SMMU_BASE 0x28000 /* GPU CX_MISC registers */ From ab80b83d59ae1f266963bffbc2f1e58a3e77290a Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 9 Nov 2023 17:33:32 +0530 Subject: [PATCH 0563/1016] kgsl: Fix upper bound check for iommu address kgsl_iommu_addr_in_range() incorrectly checks the virtual address range, fix this by updating upper bound condition. Change-Id: I7e443c51be723b70b6367c8d9c75480b2762fe1c Signed-off-by: Pankaj Gupta --- kgsl_iommu.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 8750b946a7..2bb10fbbcc 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -2308,19 +2308,20 @@ static int kgsl_iommu_svm_range(struct kgsl_pagetable *pagetable, static bool kgsl_iommu_addr_in_range(struct kgsl_pagetable *pagetable, uint64_t gpuaddr, uint64_t size) { - if (gpuaddr == 0) + u64 end = gpuaddr + size; + + /* Make sure we don't wrap around */ + if (gpuaddr == 0 || end < gpuaddr) return false; - if (gpuaddr >= pagetable->va_start && (gpuaddr + size) < - pagetable->va_end) + if (gpuaddr >= pagetable->va_start && end <= pagetable->va_end) return true; - if (gpuaddr >= pagetable->compat_va_start && (gpuaddr + size) < - pagetable->compat_va_end) + if (gpuaddr >= pagetable->compat_va_start && + end <= pagetable->compat_va_end) return true; - if (gpuaddr >= pagetable->svm_start && (gpuaddr + size) < - pagetable->svm_end) + if (gpuaddr >= pagetable->svm_start && end <= pagetable->svm_end) return true; return false; From a73edec93422c4b2cd1c410cde99731892a30d51 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 10 Nov 2023 07:32:06 -0800 Subject: [PATCH 0564/1016] kgsl: Update kgsl_cancel_event() to cancel the first matching event When cancelling events, we go through the entire list and remove all matching events. This is not what we want since callers expect to cancel a single event. So, exit the search when we find and cancel the first matching event in the list. Change-Id: I81af95e853f6408c6a0c5769fcece5f4493938bc Signed-off-by: Lynus Vaz --- kgsl_events.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kgsl_events.c b/kgsl_events.c index 1d5711c58f..24c56e1b9c 100644 --- a/kgsl_events.c +++ b/kgsl_events.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2011-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -190,8 +190,10 @@ void kgsl_cancel_event(struct kgsl_device *device, list_for_each_entry_safe(event, tmp, &group->events, node) { if (timestamp == event->timestamp && func == event->func && - event->priv == priv) + event->priv == priv) { signal_event(device, event, KGSL_EVENT_CANCELLED); + break; + } } spin_unlock(&group->lock); From 4e984ee3b55429d943e689730a608b24a81ca260 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 10 Nov 2023 07:46:49 -0800 Subject: [PATCH 0565/1016] kgsl: Cancel waittimestamp events on timeout When a waittimestamp times out, the corresponding event structure still remains on the event list. If repeated timeouts occur, this can cause them to pile up and delay processing of other events. Instead of leaving these events active, cancel them. Change-Id: I4932636a4dfbec8a01533fafe772a99b1c48167a Signed-off-by: Lynus Vaz --- adreno_drawctxt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/adreno_drawctxt.c b/adreno_drawctxt.c index fb1dd828a9..3402620305 100644 --- a/adreno_drawctxt.c +++ b/adreno_drawctxt.c @@ -168,11 +168,11 @@ int adreno_drawctxt_wait(struct adreno_device *adreno_dev, _check_context_timestamp(device, context, timestamp), msecs_to_jiffies(timeout)); - if (ret_temp == 0) { - ret = -ETIMEDOUT; - goto done; - } else if (ret_temp < 0) { - ret = (int) ret_temp; + if (ret_temp <= 0) { + kgsl_cancel_event(device, &context->events, timestamp, + wait_callback, (void *)drawctxt); + + ret = ret_temp ? (int)ret_temp : -ETIMEDOUT; goto done; } ret = 0; From 43455e5dbfad2ffc77fef5a3f4445a5b7184a8d0 Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Fri, 13 Oct 2023 14:51:40 +0530 Subject: [PATCH 0566/1016] kgsl: build: Add changes to compile graphics-kernel for pitti Add changes to compile graphics kernel code for pitti. Change-Id: Ifd0da255e0f9a1250484f727646afc6fd3e189a6 Signed-off-by: Harshitha Sai Neelati --- Kbuild | 3 +++ config/gki_pitti.conf | 20 ++++++++++++++++++++ config/pitti_consolidate_gpuconf | 8 ++++++++ config/pitti_gki_gpuconf | 1 + 4 files changed, 32 insertions(+) create mode 100644 config/gki_pitti.conf create mode 100644 config/pitti_consolidate_gpuconf create mode 120000 config/pitti_gki_gpuconf diff --git a/Kbuild b/Kbuild index 47363980b3..17ef7e6fee 100644 --- a/Kbuild +++ b/Kbuild @@ -25,6 +25,9 @@ endif ifeq ($(CONFIG_ARCH_BLAIR), y) include $(KGSL_PATH)/config/gki_blair.conf endif +ifeq ($(CONFIG_ARCH_PITTI), y) + include $(KGSL_PATH)/config/gki_pitti.conf +endif ifeq ($(CONFIG_ARCH_SA8155), y) include $(KGSL_PATH)/config/gki_sa8155.conf endif diff --git a/config/gki_pitti.conf b/config/gki_pitti.conf new file mode 100644 index 0000000000..a414481466 --- /dev/null +++ b/config/gki_pitti.conf @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL_SORT_POOL = y +CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 +CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" + +ifneq ($(CONFIG_CORESIGHT),) + CONFIG_QCOM_KGSL_CORESIGHT = y +endif + +ccflags-y += -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ + -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ + -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ + -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" + +ifneq ($(CONFIG_CORESIGHT),) + ccflags-y += -DCONFIG_QCOM_KGSL_CORESIGHT=1 +endif diff --git a/config/pitti_consolidate_gpuconf b/config/pitti_consolidate_gpuconf new file mode 100644 index 0000000000..953bb31391 --- /dev/null +++ b/config/pitti_consolidate_gpuconf @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL=m +CONFIG_QCOM_KGSL_SORT_POOL=y +CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 +CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" diff --git a/config/pitti_gki_gpuconf b/config/pitti_gki_gpuconf new file mode 120000 index 0000000000..e1237774c6 --- /dev/null +++ b/config/pitti_gki_gpuconf @@ -0,0 +1 @@ +pitti_consolidate_gpuconf \ No newline at end of file From d5bfb426668948214ebf77ffb2e512b6eb1b8c7a Mon Sep 17 00:00:00 2001 From: Amit Kushwaha Date: Wed, 4 Oct 2023 14:19:59 +0530 Subject: [PATCH 0567/1016] kgsl: build: Enable KGSL Bazel compilation for monaco Add the required build config support for monaco to enable KGSL bazel compilation. Change-Id: I113d18225e8e14269c929758ee779f50f5dd4750 Signed-off-by: Abhishek Barman Signed-off-by: Amit Kushwaha --- Kconfig | 2 +- config/monaco_consolidate_gpuconf | 10 ++++++++++ config/monaco_gki_gpuconf | 10 ++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 config/monaco_consolidate_gpuconf create mode 100644 config/monaco_gki_gpuconf diff --git a/Kconfig b/Kconfig index 5dca817d92..506bf6eac7 100644 --- a/Kconfig +++ b/Kconfig @@ -2,7 +2,7 @@ config QCOM_KGSL tristate "Qualcomm Technologies, Inc. 3D Graphics driver" depends on ARCH_QCOM - depends on NVMEM_QCOM_QFPROM + depends on NVMEM_QCOM_QFPROM || QCOM_QFPROM select QCOM_MDT_LOADER select INTERVAL_TREE select TRACE_GPU_MEM diff --git a/config/monaco_consolidate_gpuconf b/config/monaco_consolidate_gpuconf new file mode 100644 index 0000000000..8319734c76 --- /dev/null +++ b/config/monaco_consolidate_gpuconf @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL=m +CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=y +CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=y +CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 +CONFIG_QCOM_KGSL_SORT_POOL=y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" diff --git a/config/monaco_gki_gpuconf b/config/monaco_gki_gpuconf new file mode 100644 index 0000000000..8319734c76 --- /dev/null +++ b/config/monaco_gki_gpuconf @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL=m +CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=y +CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=y +CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 +CONFIG_QCOM_KGSL_SORT_POOL=y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" From 92c2943046a62f895f5aca4bca71d678f736b1f3 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 27 Oct 2023 12:58:18 +0530 Subject: [PATCH 0568/1016] kgsl: Fix regwrite trace in kgsl_regmap_multi_write Regwrite trace expects offset as first argument and value as second argument. Fix the regwrite trace arguments in kgsl_regmap_multi_write() to log the trace correctly. Change-Id: I8e4d0b651a6a5d7bd97f85e3171d63496016c7f0 Signed-off-by: Kamal Agrawal --- kgsl_regmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kgsl_regmap.c b/kgsl_regmap.c index 50e9103052..f16ec93947 100644 --- a/kgsl_regmap.c +++ b/kgsl_regmap.c @@ -180,7 +180,7 @@ void kgsl_regmap_multi_write(struct kgsl_regmap *regmap, prev = region; writel_relaxed(list[i].val, region_addr(region, list[i].offset)); - trace_kgsl_regwrite(list[i].val, list[i].offset); + trace_kgsl_regwrite(list[i].offset, list[i].val); } } From 28e9ef97b4588094ff15720b334965b37031b58d Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Thu, 26 Oct 2023 16:05:17 -0700 Subject: [PATCH 0569/1016] kgsl: Gen7: Enable access protection for LPAC pipeline Enable access protection for LPAC pipeline so that command processor will restrict access to a block of registers from within the command stream. Any attempt to access protected register space will result in fault. Change-Id: Ia8d391f7b9ad1b5d38e7d4d8ac727d2d3f54f53a Signed-off-by: Hareesh Gundu --- adreno_gen7.c | 9 +++++++-- gen7_reg.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/adreno_gen7.c b/adreno_gen7.c index 58b7d8224b..5464b41694 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -69,6 +69,7 @@ static const u32 gen7_ifpc_pwrup_reglist[] = { GEN7_SP_NC_MODE_CNTL, GEN7_CP_DBG_ECO_CNTL, GEN7_CP_PROTECT_CNTL, + GEN7_CP_LPAC_PROTECT_CNTL, GEN7_CP_PROTECT_REG, GEN7_CP_PROTECT_REG+1, GEN7_CP_PROTECT_REG+2, @@ -124,6 +125,7 @@ static const u32 gen7_0_0_ifpc_pwrup_reglist[] = { GEN7_TPL1_NC_MODE_CNTL, GEN7_CP_DBG_ECO_CNTL, GEN7_CP_PROTECT_CNTL, + GEN7_CP_LPAC_PROTECT_CNTL, GEN7_CP_PROTECT_REG, GEN7_CP_PROTECT_REG+1, GEN7_CP_PROTECT_REG+2, @@ -425,6 +427,7 @@ void gen7_get_gpu_feature_info(struct adreno_device *adreno_dev) adreno_dev->feature_fuse = feature_fuse; } +#define GEN7_PROTECT_DEFAULT (BIT(0) | BIT(1) | BIT(3)) static void gen7_protect_init(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -437,8 +440,10 @@ static void gen7_protect_init(struct adreno_device *adreno_dev) * protect violation and select the last span to protect from the start * address all the way to the end of the register address space */ - kgsl_regwrite(device, GEN7_CP_PROTECT_CNTL, - BIT(0) | BIT(1) | BIT(3)); + kgsl_regwrite(device, GEN7_CP_PROTECT_CNTL, GEN7_PROTECT_DEFAULT); + + if (adreno_dev->lpac_enabled) + kgsl_regwrite(device, GEN7_CP_LPAC_PROTECT_CNTL, GEN7_PROTECT_DEFAULT); /* Program each register defined by the core definition */ for (i = 0; regs[i].reg; i++) { diff --git a/gen7_reg.h b/gen7_reg.h index bc78f6bc5c..40e34ecf36 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -141,6 +141,7 @@ #define GEN7_CP_LPAC_RB_BASE_HI 0xb01 #define GEN7_CP_LPAC_RB_RPTR 0xb06 #define GEN7_CP_LPAC_RB_WPTR 0xb07 +#define GEN7_CP_LPAC_PROTECT_CNTL 0xb09 #define GEN7_CP_LPAC_DRAW_STATE_ADDR 0xb0a #define GEN7_CP_LPAC_DRAW_STATE_DATA 0xb0b #define GEN7_CP_LPAC_ROQ_DBG_ADDR 0xb0c From f66e8b0bba6f3af6bd9e34b2f652cd91ef301f80 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 29 Oct 2023 11:00:26 +0530 Subject: [PATCH 0570/1016] kgsl: gen7: Fix perfcounter remove for PC and VPC _get_pipeid() returns incorrect pipe id for PC and VPC. This results in perfcounter removal failure. Fix this by returning correct pipe id for PC and VPC. Also, use switch statement instead of 'if' statements for readability. Change-Id: I4b481394533a818084f7b778e75ac1029764a685 Signed-off-by: Kamal Agrawal --- adreno_gen7.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/adreno_gen7.c b/adreno_gen7.c index 5464b41694..9b44b57da3 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1671,16 +1671,34 @@ static unsigned int gen7_register_offsets[ADRENO_REG_REGISTER_MAX] = { static u32 _get_pipeid(u32 groupid) { - if (groupid == KGSL_PERFCOUNTER_GROUP_BV_TSE || groupid == KGSL_PERFCOUNTER_GROUP_BV_RAS - || groupid == KGSL_PERFCOUNTER_GROUP_BV_LRZ - || groupid == KGSL_PERFCOUNTER_GROUP_BV_HLSQ) + switch (groupid) { + case KGSL_PERFCOUNTER_GROUP_BV_PC: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_BV_VPC: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_BV_TSE: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_BV_RAS: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_BV_LRZ: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_BV_HLSQ: return PIPE_BV; - else if (groupid == KGSL_PERFCOUNTER_GROUP_HLSQ || groupid == KGSL_PERFCOUNTER_GROUP_TSE - || groupid == KGSL_PERFCOUNTER_GROUP_RAS - || groupid == KGSL_PERFCOUNTER_GROUP_LRZ) + case KGSL_PERFCOUNTER_GROUP_PC: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_VPC: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_TSE: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_RAS: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_LRZ: + fallthrough; + case KGSL_PERFCOUNTER_GROUP_HLSQ: return PIPE_BR; - else + default: return PIPE_NONE; + } } int gen7_perfcounter_remove(struct adreno_device *adreno_dev, From 71480e4cfc176d658ab37e4f0d88c1d33cf6b5ac Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Wed, 8 Nov 2023 23:28:50 -0800 Subject: [PATCH 0571/1016] kgsl: Fix adreno coresight related compilation errors kernel version 6.1 above having changes in coresight_ops_link function pointer arguments. Hence disable these function pointers usage in latest kernel to fix incompatible function pointer types initialization errors. Change-Id: I69c6341ef9fb87083b422d27255208ec18b01343 Signed-off-by: Hareesh Gundu --- adreno_coresight.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/adreno_coresight.c b/adreno_coresight.c index b2b2399ad8..4d8e3cca97 100644 --- a/adreno_coresight.c +++ b/adreno_coresight.c @@ -217,6 +217,7 @@ void adreno_coresight_remove(struct adreno_device *adreno_dev) coresight_unregister(adreno_dev->cx_coresight.dev); } +#if (KERNEL_VERSION(6, 1, 0) >= LINUX_VERSION_CODE) static int funnel_gfx_enable(struct coresight_device *csdev, int inport, int outport) { @@ -266,10 +267,13 @@ err: mutex_unlock(&device->mutex); return; } +#endif struct coresight_ops_link funnel_link_gfx_ops = { +#if (KERNEL_VERSION(6, 1, 0) >= LINUX_VERSION_CODE) .enable = funnel_gfx_enable, .disable = funnel_gfx_disable, +#endif }; struct coresight_ops funnel_gfx_ops = { From 324edce0d648ad50e5904e4ce05cad19354dc6d5 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Fri, 3 Nov 2023 14:35:50 -0700 Subject: [PATCH 0572/1016] kgsl: gen8: Update noncontext register list Add few registers to noncontext list. Change-Id: I6686fa144d423a45ccdf1e340bfab44e8efe2d2b Signed-off-by: Hareesh Gundu --- adreno-gpulist.h | 8 ++++++++ adreno_gen8.c | 4 ---- gen8_reg.h | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 5152102bde..bbc1c9965b 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2528,6 +2528,9 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { { GEN8_PC_CHICKEN_BITS_2, 0x00000200, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_PC_CHICKEN_BITS_3, 0x00500000, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_PC_CHICKEN_BITS_4, 0x00500050, BIT(PIPE_BR) | BIT(PIPE_BV) }, + /* Configure GBIF GX registers */ + { GEN8_UCHE_GBIF_GX_CONFIG, 0x010240e0, BIT(PIPE_NONE) }, + { GEN8_RBBM_GBIF_CLIENT_QOS_CNTL, 0x22122212, BIT(PIPE_NONE) }, /* Enable full concurrent resolve and unresolves */ { GEN8_RB_CCU_CNTL, 0x00000068, BIT(PIPE_BR) }, { GEN8_RB_GC_GMEM_PROTECT, 0x0c000000, BIT(PIPE_BR) }, @@ -2537,7 +2540,10 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { { GEN8_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) }, { GEN8_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, { GEN8_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + /* Limit the number of wave-slots for Eviction buffer to 1 per ALU GRP */ + { GEN8_SP_CHICKEN_BITS_1, BIT(26), BIT(PIPE_NONE) }, { GEN8_SP_CHICKEN_BITS_2, 0x00800000, BIT(PIPE_NONE) }, + { GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, { GEN8_SP_READ_SEL, 0x0001ff00, BIT(PIPE_NONE) }, { GEN8_TPL1_DBG_ECO_CNTL, 0x10000000, BIT(PIPE_NONE) }, @@ -2558,6 +2564,8 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { { GEN8_VFD_CB_LP_REQ_CNT, 0x00000020, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_VPC_FLATSHADE_MODE_CNTL, 0x00000001, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_VSC_BIN_SIZE, 0x00010001, BIT(PIPE_NONE) }, + /* Disable redundant tile data optimization */ + { GEN8_VSC_KMD_DBG_ECO_CNTL, BIT(11), BIT(PIPE_NONE)}, { 0 }, }; diff --git a/adreno_gen8.c b/adreno_gen8.c index 4027db2591..abaa1e6f58 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -642,10 +642,6 @@ int gen8_start(struct adreno_device *adreno_dev) /* Reset aperture fields to go through first aperture write check */ gen8_dev->aperture = UINT_MAX; - /* Configure GBIF GX registers */ - kgsl_regwrite(device, GEN8_UCHE_GBIF_GX_CONFIG, 0x010240e0); - kgsl_regwrite(device, GEN8_RBBM_GBIF_CLIENT_QOS_CNTL, 0x22122212); - /* Make all blocks contribute to the GPU BUSY perf counter */ kgsl_regwrite(device, GEN8_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); diff --git a/gen8_reg.h b/gen8_reg.h index 18bf1cfb81..9d48bf8e6e 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -508,6 +508,7 @@ #define GEN8_RBBM_SLICE_PERFCTR_FLUSH_HOST_CMD 0x5eb #define GEN8_RBBM_SLICE_NC_MODE_CNTL 0x5ec #define GEN8_VSC_BIN_SIZE 0xc02 +#define GEN8_VSC_KMD_DBG_ECO_CNTL 0xdf0 /* DBGC_CFG registers */ #define GEN8_DBGC_CFG_DBGBUS_SEL_A 0x600 From dc9b23ee69b05725d8b1f69250fe6b87746b5764 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 11 Nov 2023 10:42:21 +0530 Subject: [PATCH 0573/1016] kgsl: gen8: Update select register for GPU busy counter GPU busy is read from counter 0 in gen8_power_stats(). Update the select register such that the busy cycles are reported in counter 0. Change-Id: I7d5162f95d859443800819d95b692ca4c547d37a Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index abaa1e6f58..846b177293 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -787,7 +787,7 @@ int gen8_start(struct adreno_device *adreno_dev) /* Enable GMU power counter 0 to count GPU busy */ kgsl_regwrite(device, GEN8_GMUAO_GPU_CX_BUSY_MASK, 0xff000000); - kgsl_regrmw(device, GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1, 0xFF, 0x20); + kgsl_regrmw(device, GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0, 0xFF, 0x20); kgsl_regwrite(device, GEN8_GMUCX_POWER_COUNTER_ENABLE, 0x1); gen8_protect_init(adreno_dev); From 4eadf5ae6764b7f6357dacef4cf0db42f62f8243 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 11 Nov 2023 11:51:22 +0530 Subject: [PATCH 0574/1016] kgsl: gen8: Update pipe id programming for perfcounters In gen8, bits[12:15] represent pipe id. Thus, update it from bits[12:13] to bits[12:15]. Change-Id: Ia07ee79624e88f1de270d83cc4c3d5417b9e5998 Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 4 ++-- adreno_gen8_perfcounter.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 846b177293..cca388115c 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1900,11 +1900,11 @@ int gen8_perfcounter_update(struct adreno_device *adreno_dev, } } - data[offset++] = FIELD_PREP(GENMASK(13, 12), PIPE_NONE); + data[offset++] = FIELD_PREP(GENMASK(15, 12), PIPE_NONE); data[offset++] = GEN8_RBBM_PERFCTR_CNTL; data[offset++] = 1; - data[offset++] = FIELD_PREP(GENMASK(13, 12), PIPE_NONE); + data[offset++] = FIELD_PREP(GENMASK(15, 12), PIPE_NONE); data[offset++] = GEN8_RBBM_SLICE_PERFCTR_CNTL; data[offset++] = 1; diff --git a/adreno_gen8_perfcounter.c b/adreno_gen8_perfcounter.c index d96cac5c91..41e9ba34d9 100644 --- a/adreno_gen8_perfcounter.c +++ b/adreno_gen8_perfcounter.c @@ -61,10 +61,10 @@ static int gen8_counter_br_enable(struct adreno_device *adreno_dev, u32 val = 0; kgsl_regread(device, GEN8_CP_APERTURE_CNTL_HOST, &val); - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(13, 12), PIPE_BR)); + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(15, 12), PIPE_BR)); ret = gen8_perfcounter_update(adreno_dev, reg, true, - FIELD_PREP(GENMASK(13, 12), PIPE_BR), group->flags); + FIELD_PREP(GENMASK(15, 12), PIPE_BR), group->flags); kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val); @@ -84,10 +84,10 @@ static int gen8_counter_bv_enable(struct adreno_device *adreno_dev, u32 val = 0; kgsl_regread(device, GEN8_CP_APERTURE_CNTL_HOST, &val); - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(13, 12), PIPE_BV)); + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(15, 12), PIPE_BV)); ret = gen8_perfcounter_update(adreno_dev, reg, true, - FIELD_PREP(GENMASK(13, 12), PIPE_BV), group->flags); + FIELD_PREP(GENMASK(15, 12), PIPE_BV), group->flags); kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val); @@ -105,7 +105,7 @@ static int gen8_counter_enable(struct adreno_device *adreno_dev, int ret = 0; ret = gen8_perfcounter_update(adreno_dev, reg, true, - FIELD_PREP(GENMASK(13, 12), PIPE_NONE), group->flags); + FIELD_PREP(GENMASK(15, 12), PIPE_NONE), group->flags); if (!ret) reg->value = 0; From 3cd6c4a1e3432c97e1cf664e6368ec6195f28dee Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 14 Nov 2023 14:33:48 +0530 Subject: [PATCH 0575/1016] kgsl: gen8: Log hfi send trace upon successful write to hfi queue This change ports commit cd97affef5d9 ("msm: kgsl: Log hfi send trace upon successful write to hfi queue") to gen8. Change-Id: I51833319b60145455b15e686c44458080eef0193 Signed-off-by: Kamal Agrawal --- adreno_gen8_hfi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno_gen8_hfi.c b/adreno_gen8_hfi.c index 3d78353c4d..bd8b932ac7 100644 --- a/adreno_gen8_hfi.c +++ b/adreno_gen8_hfi.c @@ -114,8 +114,6 @@ int gen8_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx, queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx); - trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg)); - write_idx = hdr->write_index; read_idx = hdr->read_index; @@ -137,6 +135,8 @@ int gen8_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx, write_idx = (write_idx + 1) % hdr->queue_size; } + trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg)); + hfi_update_write_idx(&hdr->write_index, write_idx); return 0; From 71980888ee9c63886aa827874f9c07c747897f59 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 14 Nov 2023 15:02:07 +0530 Subject: [PATCH 0576/1016] kgsl: gen8: Add support for RT bus hint This change ports commit 619ba9c2482e ("msm: kgsl: Add support for RT bus hint") from gen7 to gen8. Change-Id: Ieafb85a319e719b35f3e35829a6c5572d23a175e Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 2 ++ adreno_gen8.h | 2 ++ adreno_gen8_hwsched_hfi.c | 8 ++++++++ 3 files changed, 12 insertions(+) diff --git a/adreno_gen8.c b/adreno_gen8.c index cca388115c..7c75b58b52 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1675,6 +1675,8 @@ int gen8_probe_common(struct platform_device *pdev, kgsl_pwrscale_fast_bus_hint(gen8_core->fast_bus_hint); device->pwrctrl.cx_gdsc_offset = GEN8_GPU_CC_CX_GDSCR; + device->pwrctrl.rt_bus_hint = gen8_core->rt_bus_hint; + ret = adreno_device_probe(pdev, adreno_dev); if (ret) return ret; diff --git a/adreno_gen8.h b/adreno_gen8.h index 93695c85a1..455c755981 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -120,6 +120,8 @@ struct adreno_gen8_core { const u32 *qos_value; /** @acv_perfmode_vote: ACV vote for GPU perfmode */ u32 acv_perfmode_vote; + /** @rt_bus_hint: IB level hint for real time clients i.e. RB-0 */ + const u32 rt_bus_hint; /** @fast_bus_hint: Whether or not to increase IB vote on high ddr stall */ bool fast_bus_hint; }; diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index a4d6f24e37..08eebb06ad 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -1939,6 +1939,7 @@ static void gen8_hwsched_enable_async_hfi(struct adreno_device *adreno_dev) static int enable_preemption(struct adreno_device *adreno_dev) { const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 data; int ret; @@ -1972,6 +1973,13 @@ static int enable_preemption(struct adreno_device *adreno_dev) } } + if (device->pwrctrl.rt_bus_hint) { + ret = gen8_hfi_send_set_value(adreno_dev, HFI_VALUE_RB_IB_RULE, 0, + device->pwrctrl.rt_bus_hint); + if (ret) + device->pwrctrl.rt_bus_hint = 0; + } + /* * Bits[3:0] contain the preemption timeout enable bit per ringbuffer * Bits[31:4] contain the timeout in ms From caafb2416148f10519435f4cc6a34985335e366d Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Mon, 13 Nov 2023 11:27:07 -0700 Subject: [PATCH 0577/1016] kgsl: gen8: Allow using up to 32 DCVS points This change ports commit 84659689435e ("kgsl: gen7: Allow using up to 32 DCVS points") from gen7 to gen8. Change-Id: I563b9d5b257ec6d6d94444dbc3b852ee284a98c8 Signed-off-by: Carter Cooper --- adreno_gen8_gmu.c | 6 +-- adreno_gen8_gmu.h | 9 +++++ adreno_gen8_hfi.c | 85 ++++++++++++++++++++++++++++++++++++++- adreno_gen8_hfi.h | 10 ++++- adreno_gen8_hwsched.c | 8 ++-- adreno_gen8_hwsched_hfi.c | 3 +- adreno_gen8_rpmh.c | 35 +++++++--------- 7 files changed, 121 insertions(+), 35 deletions(-) diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index e91571545c..291698e57c 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1430,7 +1430,7 @@ static int gen8_gmu_notify_slumber(struct adreno_device *adreno_dev) struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); int bus_level = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq; - int perf_idx = gmu->hfi.dcvs_table.gpu_level_num - + int perf_idx = gmu->dcvs_table.gpu_level_num - pwr->default_pwrlevel - 1; struct hfi_prep_slumber_cmd req = { .freq = perf_idx, @@ -1478,7 +1478,7 @@ static int gen8_gmu_dcvs_set(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct gen8_dcvs_table *table = &gmu->dcvs_table; struct hfi_gx_bw_perf_vote_cmd req = { .ack_type = DCVS_ACK_BLOCK, .freq = INVALID_DCVS_IDX, @@ -1527,7 +1527,7 @@ static int gen8_gmu_dcvs_set(struct adreno_device *adreno_dev, if (req.freq != INVALID_DCVS_IDX) gen8_rdpm_mx_freq_update(gmu, - gmu->hfi.dcvs_table.gx_votes[req.freq].freq); + gmu->dcvs_table.gx_votes[req.freq].freq); return ret; } diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index 4ee97fad2f..5b2c260a4c 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -12,6 +12,13 @@ #include "adreno_gen8_hfi.h" #include "kgsl_gmu_core.h" +struct gen8_dcvs_table { + u32 gpu_level_num; + u32 gmu_level_num; + struct opp_gx_desc gx_votes[MAX_GX_LEVELS]; + struct opp_desc cx_votes[MAX_CX_LEVELS]; +}; + /** * struct gen8_gmu_device - GMU device structure * @ver: GMU Version information @@ -110,6 +117,8 @@ struct gen8_gmu_device { u32 cp_init_hdr; /** @switch_to_unsec_hdr: raw command header for switch to unsecure packet */ u32 switch_to_unsec_hdr; + /** @dcvs_table: Table for gpu dcvs levels */ + struct gen8_dcvs_table dcvs_table; }; /* Helper function to get to gen8 gmu device from adreno device */ diff --git a/adreno_gen8_hfi.c b/adreno_gen8_hfi.c index bd8b932ac7..6e8944526b 100644 --- a/adreno_gen8_hfi.c +++ b/adreno_gen8_hfi.c @@ -9,6 +9,7 @@ #include "adreno.h" #include "adreno_gen8.h" +#include "adreno_gen8_gmu.h" #include "adreno_gen8_hfi.h" #include "kgsl_device.h" #include "kgsl_trace.h" @@ -624,6 +625,87 @@ static void reset_hfi_queues(struct adreno_device *adreno_dev) } } +/* Fill the entry and return the dword count written */ +static u32 _fill_table_entry(struct hfi_table_entry *entry, u32 count, + u32 stride_bytes, u32 *data) +{ + entry->count = count; + entry->stride = stride_bytes >> 2; /* entry->stride is in dwords */ + memcpy(entry->data, data, stride_bytes * count); + + /* Return total dword count of entry + data */ + return (sizeof(*entry) >> 2) + (entry->count * entry->stride); +} + +int gen8_hfi_send_gpu_perf_table(struct adreno_device *adreno_dev) +{ + /* + * Buffer to store either hfi_table_cmd or hfi_dcvstable_cmd. + * Current max size for either is 165 dwords. + */ + static u32 cmd_buf[200]; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_dcvs_table *tbl = &gmu->dcvs_table; + int ret = 0; + + /* Starting with GMU HFI Version 2.6.1, use H2F_MSG_TABLE */ + if (gmu->ver.hfi >= HFI_VERSION(2, 6, 1)) { + struct hfi_table_cmd *cmd = (struct hfi_table_cmd *)&cmd_buf[0]; + u32 dword_off; + + /* Already setup, so just send cmd */ + if (cmd->hdr) + return gen8_hfi_send_generic_req(adreno_dev, cmd, + MSG_HDR_GET_SIZE(cmd->hdr) << 2); + + if (tbl->gpu_level_num > MAX_GX_LEVELS || tbl->gmu_level_num > MAX_CX_LEVELS) + return -EINVAL; + + /* CMD starts with struct hfi_table_cmd data */ + cmd->type = HFI_TABLE_GPU_PERF; + dword_off = sizeof(*cmd) >> 2; + + /* Fill in the table entry and data starting at dword_off */ + dword_off += _fill_table_entry((struct hfi_table_entry *)&cmd_buf[dword_off], + tbl->gpu_level_num, sizeof(struct opp_gx_desc), + (u32 *)tbl->gx_votes); + + /* Fill in the table entry and data starting at dword_off */ + dword_off += _fill_table_entry((struct hfi_table_entry *)&cmd_buf[dword_off], + tbl->gmu_level_num, sizeof(struct opp_desc), + (u32 *)tbl->cx_votes); + + cmd->hdr = CREATE_MSG_HDR(H2F_MSG_TABLE, HFI_MSG_CMD); + cmd->hdr = MSG_HDR_SET_SIZE(cmd->hdr, dword_off); + + ret = gen8_hfi_send_generic_req(adreno_dev, cmd, dword_off << 2); + } else { + struct hfi_dcvstable_cmd *cmd = (struct hfi_dcvstable_cmd *)&cmd_buf[0]; + + /* Already setup, so just send cmd */ + if (cmd->hdr) + return gen8_hfi_send_generic_req(adreno_dev, cmd, sizeof(*cmd)); + + if (tbl->gpu_level_num > MAX_GX_LEVELS_LEGACY || tbl->gmu_level_num > MAX_CX_LEVELS) + return -EINVAL; + + ret = CMD_MSG_HDR(*cmd, H2F_MSG_PERF_TBL); + if (ret) + return ret; + + cmd->gpu_level_num = tbl->gpu_level_num; + cmd->gmu_level_num = tbl->gmu_level_num; + memcpy(&cmd->gx_votes, tbl->gx_votes, + sizeof(struct opp_gx_desc) * cmd->gpu_level_num); + memcpy(&cmd->cx_votes, tbl->cx_votes, + sizeof(struct opp_desc) * cmd->gmu_level_num); + + ret = gen8_hfi_send_generic_req(adreno_dev, cmd, sizeof(*cmd)); + } + + return ret; +} + int gen8_hfi_start(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); @@ -632,8 +714,7 @@ int gen8_hfi_start(struct adreno_device *adreno_dev) reset_hfi_queues(adreno_dev); - result = gen8_hfi_send_generic_req(adreno_dev, &gmu->hfi.dcvs_table, - sizeof(gmu->hfi.dcvs_table)); + result = gen8_hfi_send_gpu_perf_table(adreno_dev); if (result) goto err; diff --git a/adreno_gen8_hfi.h b/adreno_gen8_hfi.h index b4387c3739..193889ad03 100644 --- a/adreno_gen8_hfi.h +++ b/adreno_gen8_hfi.h @@ -25,8 +25,6 @@ struct gen8_hfi { struct hfi_bwtable_cmd bw_table; /** @acd_table: HFI table for ACD data */ struct hfi_acd_table_cmd acd_table; - /** @dcvs_table: HFI table for gpu dcvs levels */ - struct hfi_dcvstable_cmd dcvs_table; /** @cmdq_lock: Spinlock for accessing the cmdq */ spinlock_t cmdq_lock; /** @@ -194,6 +192,14 @@ int gen8_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev); */ int gen8_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev); +/** + * gen8_hfi_send_gpu_perf_table - Send the gpu perf table hfi packet + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_send_gpu_perf_table(struct adreno_device *adreno_dev); + /* * gen8_hfi_process_queue - Check hfi queue for messages from gmu * @gmu: Pointer to the gen8 gmu device diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index d61411c465..91d0a5495d 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -279,8 +279,7 @@ static int gen8_hwsched_notify_slumber(struct adreno_device *adreno_dev) if (ret) return ret; - req.freq = gmu->hfi.dcvs_table.gpu_level_num - - pwr->default_pwrlevel - 1; + req.freq = gmu->dcvs_table.gpu_level_num - pwr->default_pwrlevel - 1; req.bw = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq; req.bw |= gen8_bus_ab_quantize(adreno_dev, 0); @@ -895,7 +894,7 @@ static int gen8_hwsched_dcvs_set(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct gen8_dcvs_table *table = &gmu->dcvs_table; struct hfi_gx_bw_perf_vote_cmd req = { .ack_type = DCVS_ACK_BLOCK, .freq = INVALID_DCVS_IDX, @@ -946,8 +945,7 @@ static int gen8_hwsched_dcvs_set(struct adreno_device *adreno_dev, } if (req.freq != INVALID_DCVS_IDX) - gen8_rdpm_mx_freq_update(gmu, - gmu->hfi.dcvs_table.gx_votes[req.freq].freq); + gen8_rdpm_mx_freq_update(gmu, gmu->dcvs_table.gx_votes[req.freq].freq); return ret; } diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 08eebb06ad..4c05867e64 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -2376,8 +2376,7 @@ int gen8_hwsched_hfi_start(struct adreno_device *adreno_dev) if (ret) goto err; - ret = gen8_hfi_send_generic_req(adreno_dev, &gmu->hfi.dcvs_table, - sizeof(gmu->hfi.dcvs_table)); + ret = gen8_hfi_send_gpu_perf_table(adreno_dev); if (ret) goto err; diff --git a/adreno_gen8_rpmh.c b/adreno_gen8_rpmh.c index 82927a76b4..a8ee118517 100644 --- a/adreno_gen8_rpmh.c +++ b/adreno_gen8_rpmh.c @@ -255,23 +255,22 @@ static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms, /* * setup_gmu_arc_votes - Build the gmu voting table - * @hfi: Pointer to hfi device + * @gmu: Pointer to gmu device * @pri_rail: Pointer to primary power rail vlvl table * @sec_rail: Pointer to second/dependent power rail vlvl table - * @freqs: List of GMU frequencies - * @vlvls: List of GMU voltage levels * * This function initializes the cx votes for all gmu frequencies * for gmu dcvs */ -static int setup_cx_arc_votes(struct gen8_hfi *hfi, - struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, - u32 *freqs, u32 *vlvls) +static int setup_cx_arc_votes(struct gen8_gmu_device *gmu, + struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail) { /* Hardcoded values of GMU CX voltage levels */ u16 gmu_cx_vlvl[MAX_CX_LEVELS]; u32 cx_votes[MAX_CX_LEVELS]; - struct hfi_dcvstable_cmd *table = &hfi->dcvs_table; + struct gen8_dcvs_table *table = &gmu->dcvs_table; + u32 *freqs = gmu->freqs; + u32 *vlvls = gmu->vlvls; int ret, i; gmu_cx_vlvl[0] = 0; @@ -333,21 +332,21 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; - struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct gen8_dcvs_table *table = &gmu->dcvs_table; u32 index; u16 vlvl_tbl[MAX_GX_LEVELS]; u32 gx_votes[MAX_GX_LEVELS]; int ret, i; - /* Add the zero powerlevel for the perf table */ - table->gpu_level_num = device->pwrctrl.num_pwrlevels + 1; - - if (table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) { - dev_err(&gmu->pdev->dev, + if (pwr->num_pwrlevels + 1 > ARRAY_SIZE(vlvl_tbl)) { + dev_err(device->dev, "Defined more GPU DCVS levels than RPMh can support\n"); return -ERANGE; } + /* Add the zero powerlevel for the perf table */ + table->gpu_level_num = pwr->num_pwrlevels + 1; + memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); table->gx_votes[0].freq = 0; @@ -366,7 +365,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, ret = to_cx_hlvl(cx_rail, cx_vlvl, &table->gx_votes[index].cx_vote); if (ret) { - dev_err(&gmu->pdev->dev, "Unsupported cx corner: %u\n", + dev_err(device->dev, "Unsupported cx corner: %u\n", cx_vlvl); return ret; } @@ -386,14 +385,9 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, static int build_dcvs_table(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - struct gen8_hfi *hfi = &gmu->hfi; struct rpmh_arc_vals gx_arc, cx_arc, mx_arc; int ret; - ret = CMD_MSG_HDR(hfi->dcvs_table, H2F_MSG_PERF_TBL); - if (ret) - return ret; - ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl"); if (ret) return ret; @@ -406,8 +400,7 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) if (ret) return ret; - ret = setup_cx_arc_votes(hfi, &cx_arc, &mx_arc, - gmu->freqs, gmu->vlvls); + ret = setup_cx_arc_votes(gmu, &cx_arc, &mx_arc); if (ret) return ret; From 67652a1345cafab13e7f19dc3d2579b8713ffa6d Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Mon, 13 Nov 2023 11:31:10 -0700 Subject: [PATCH 0578/1016] kgsl: gen8: Clear only GMU2HOST irq bits that are being processed This change ports commit e661ac3bd941 ("kgsl: gen7: Clear only GMU2HOST irq bits that are being processed") from gen7 to gen8. Change-Id: Iec50954651e7550c724c62c7c313aba07ffa073d Signed-off-by: Carter Cooper --- adreno_gen8_hwsched_hfi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 4c05867e64..2e78749933 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -1357,8 +1357,12 @@ static irqreturn_t gen8_hwsched_hfi_handler(int irq, void *data) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 status = 0; + /* + * GEN8_GMUCX_GMU2HOST_INTR_INFO may have bits set not specified in hfi->irq_mask. + * Read and clear only those irq bits that we are processing here. + */ gmu_core_regread(device, GEN8_GMUCX_GMU2HOST_INTR_INFO, &status); - gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, hfi->irq_mask); + gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, status & hfi->irq_mask); /* * If interrupts are not enabled on the HFI message queue, From ba215af9c7403b7b5479f4c55a22b0ce78678155 Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Mon, 13 Nov 2023 11:39:36 -0700 Subject: [PATCH 0579/1016] kgsl: gen8: Add CLX programming infrastructure This change ports commit 6de90019618b ("kgsl: gen7: Add CLX programming infrastructure") from gen7 to gen8. Change-Id: I2d987903b857623da863c36331f02912d452fd8e Signed-off-by: Carter Cooper --- adreno_gen8.c | 7 +++++++ adreno_gen8_hfi.c | 37 +++++++++++++++++++++++++++++++++++++ adreno_gen8_hfi.h | 8 ++++++++ adreno_gen8_hwsched_hfi.c | 4 ++++ 4 files changed, 56 insertions(+) diff --git a/adreno_gen8.c b/adreno_gen8.c index 7c75b58b52..cf00eaafe5 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -876,6 +876,9 @@ int gen8_scm_gpu_init_cx_regs(struct adreno_device *adreno_dev) if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL)) gpu_req |= GPU_BCL_EN_REQ; + if (ADRENO_FEATURE(adreno_dev, ADRENO_CLX)) + gpu_req |= GPU_CLX_EN_REQ; + gpu_req |= GPU_TSENSE_EN_REQ; ret = kgsl_scm_gpu_init_regs(&device->pdev->dev, gpu_req); @@ -887,6 +890,10 @@ int gen8_scm_gpu_init_cx_regs(struct adreno_device *adreno_dev) if (!ret && ADRENO_FEATURE(adreno_dev, ADRENO_BCL)) adreno_dev->bcl_enabled = true; + /* If programming TZ CLX was successful, then program KMD owned CLX regs */ + if (!ret && ADRENO_FEATURE(adreno_dev, ADRENO_CLX)) + adreno_dev->clx_enabled = true; + /* * If scm call returned EOPNOTSUPP, either we are on a kernel version * lesser than 6.1 where scm call is not supported or we are sending an diff --git a/adreno_gen8_hfi.c b/adreno_gen8_hfi.c index 6e8944526b..977e5382b7 100644 --- a/adreno_gen8_hfi.c +++ b/adreno_gen8_hfi.c @@ -572,6 +572,39 @@ int gen8_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev) return gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_BCL, 1, adreno_dev->bcl_data); } +int gen8_hfi_send_clx_feature_ctrl(struct adreno_device *adreno_dev) +{ + int ret = 0; + struct hfi_clx_table_v2_cmd cmd = {0}; + + if (!adreno_dev->clx_enabled) + return 0; + + /* Make sure the table is valid before enabling feature */ + ret = CMD_MSG_HDR(cmd, H2F_MSG_CLX_TBL); + if (ret) + return ret; + + ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_CLX, 1, 0); + if (ret) + return ret; + + cmd.version = FIELD_PREP(GENMASK(31, 16), 0x2) | FIELD_PREP(GENMASK(15, 0), 0x1); + /* cmd.domain[0] is never used but needed per hfi spec */ + cmd.domain[1].data0 = FIELD_PREP(GENMASK(31, 29), 1) | + FIELD_PREP(GENMASK(28, 28), 1) | + FIELD_PREP(GENMASK(27, 22), 1) | + FIELD_PREP(GENMASK(21, 16), 40) | + FIELD_PREP(GENMASK(15, 0), 0); + cmd.domain[1].clxt = 0; + cmd.domain[1].clxh = 0; + cmd.domain[1].urgmode = 1; + cmd.domain[1].lkgen = 0; + cmd.domain[1].currbudget = 50; + + return gen8_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); +} + #define EVENT_PWR_ACD_THROTTLE_PROF 44 int gen8_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev) @@ -731,6 +764,10 @@ int gen8_hfi_start(struct adreno_device *adreno_dev) if (result) goto err; + result = gen8_hfi_send_clx_feature_ctrl(adreno_dev); + if (result) + goto err; + result = gen8_hfi_send_ifpc_feature_ctrl(adreno_dev); if (result) goto err; diff --git a/adreno_gen8_hfi.h b/adreno_gen8_hfi.h index 193889ad03..2bd9211aac 100644 --- a/adreno_gen8_hfi.h +++ b/adreno_gen8_hfi.h @@ -184,6 +184,14 @@ int gen8_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd, */ int gen8_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev); +/** + * gen8_hfi_send_clx_feature_ctrl - Send the clx feature hfi packet + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen8_hfi_send_clx_feature_ctrl(struct adreno_device *adreno_dev); + /** * gen8_hfi_send_ifpc_feature_ctrl - Send the ipfc feature hfi packet * @adreno_dev: Pointer to the adreno device diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 2e78749933..a162156be6 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -2396,6 +2396,10 @@ int gen8_hwsched_hfi_start(struct adreno_device *adreno_dev) if (ret) goto err; + ret = gen8_hfi_send_clx_feature_ctrl(adreno_dev); + if (ret) + goto err; + ret = gen8_hfi_send_ifpc_feature_ctrl(adreno_dev); if (ret) goto err; From bc7c48ecb008d4bf89fa2a51d98f64737f864e96 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Mon, 13 Nov 2023 12:10:12 -0800 Subject: [PATCH 0580/1016] kgsl: gen8: Set up the GPU CX timer on Gen8 devices The GPU CX timer is available on Gen8 devices. Synchronize this timer with the CPU timer at GPU boot time to use it. Change-Id: I9deaa79cb0fdd0dd032bd47005156c7851511268 Signed-off-by: Lynus Vaz --- adreno_gen8.c | 59 +++++++++++++++++++++++++++++++++++++++++++ adreno_gen8.h | 8 ++++++ adreno_gen8_gmu.c | 3 +++ adreno_gen8_hwsched.c | 3 +++ gen8_reg.h | 2 ++ 5 files changed, 75 insertions(+) diff --git a/adreno_gen8.c b/adreno_gen8.c index cf00eaafe5..e3d07ec115 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -282,6 +282,65 @@ int gen8_init(struct adreno_device *adreno_dev) "powerup_register_list"); } +#define CX_TIMER_INIT_SAMPLES 16 +void gen8_cx_timer_init(struct adreno_device *adreno_dev) +{ + u64 seed_val, tmr, skew = 0; + int i; + unsigned long flags; + + /* Set up the CX timer just once */ + if (test_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv)) + return; + + /* Disable irqs to get accurate timings */ + local_irq_save(flags); + + /* Calculate the overhead of timer reads and register writes */ + for (i = 0; i < CX_TIMER_INIT_SAMPLES; i++) { + u64 tmr1, tmr2, tmr3; + + /* Measure time for two reads of the CPU timer */ + tmr1 = arch_timer_read_counter(); + tmr2 = arch_timer_read_counter(); + + /* Write to the register and time it */ + adreno_cx_misc_regwrite(adreno_dev, + GEN8_GPU_CX_MISC_AO_COUNTER_LO, + lower_32_bits(tmr2)); + adreno_cx_misc_regwrite(adreno_dev, + GEN8_GPU_CX_MISC_AO_COUNTER_HI, + upper_32_bits(tmr2)); + + /* Barrier to make sure the write completes before timing it */ + mb(); + tmr3 = arch_timer_read_counter(); + + /* Calculate difference between register write and CPU timer */ + skew += (tmr3 - tmr2) - (tmr2 - tmr1); + } + + local_irq_restore(flags); + + /* Get the average over all our readings, to the closest integer */ + skew = (skew + CX_TIMER_INIT_SAMPLES / 2) / CX_TIMER_INIT_SAMPLES; + + local_irq_save(flags); + tmr = arch_timer_read_counter(); + + seed_val = tmr + skew; + + /* Seed the GPU CX counter with the adjusted timer */ + adreno_cx_misc_regwrite(adreno_dev, + GEN8_GPU_CX_MISC_AO_COUNTER_LO, lower_32_bits(seed_val)); + adreno_cx_misc_regwrite(adreno_dev, + GEN8_GPU_CX_MISC_AO_COUNTER_HI, upper_32_bits(seed_val)); + + local_irq_restore(flags); + + set_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv); +} + void gen8_get_gpu_feature_info(struct adreno_device *adreno_dev) { u32 feature_fuse = 0; diff --git a/adreno_gen8.h b/adreno_gen8.h index 455c755981..53b254eb0d 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -294,6 +294,14 @@ int gen8_start(struct adreno_device *adreno_dev); */ int gen8_init(struct adreno_device *adreno_dev); +/** + * gen8_cx_timer_init - Initialize the CX timer on Gen8 devices + * @adreno_dev: Pointer to the adreno device + * + * Synchronize the GPU CX timer (if we have one) with the CPU timer + */ +void gen8_cx_timer_init(struct adreno_device *adreno_dev); + /** * gen8_get_gpu_feature_info - Get hardware supported feature info * @adreno_dev: Pointer to the adreno device diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 291698e57c..3b8d6269c1 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1808,6 +1808,9 @@ static int gen8_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; + /* Initialize the CX timer */ + gen8_cx_timer_init(adreno_dev); + ret = gen8_gmu_load_fw(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 91d0a5495d..f609dc64d3 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -100,6 +100,9 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; + /* Initialize the CX timer */ + gen8_cx_timer_init(adreno_dev); + ret = gen8_gmu_load_fw(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/gen8_reg.h b/gen8_reg.h index 9d48bf8e6e..7e71e45349 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -1588,6 +1588,8 @@ #define GEN8_SMMU_BASE 0x28000 /* GPU CX_MISC registers */ +#define GEN8_GPU_CX_MISC_AO_COUNTER_LO 0x80 +#define GEN8_GPU_CX_MISC_AO_COUNTER_HI 0x81 #define GEN8_GPU_CX_MISC_SW_FUSE_VALUE 0x400 /* GPU SW Fuse Feature bit fields */ From 97c04d22817fbf5df661590e41ecd286bf5ae6cb Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Wed, 18 Oct 2023 17:52:59 -0700 Subject: [PATCH 0581/1016] msm: kgsl: Add fault header for A8x A8x needs a different function to read and print the faulting registers since most registers are now behind apertures. Add target specific fault header for this chip. Change-Id: I682be05e8586d92e8b836644ffece8e8486dab05 Signed-off-by: Urvashi Agrawal --- adreno.h | 8 +++ adreno_dispatch.c | 6 +- adreno_gen7.c | 38 +++++++++++++ adreno_gen8.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++ adreno_hwsched.c | 48 ++-------------- 5 files changed, 192 insertions(+), 44 deletions(-) diff --git a/adreno.h b/adreno.h index 3971a40d52..af056e8377 100644 --- a/adreno.h +++ b/adreno.h @@ -977,6 +977,14 @@ struct adreno_gpudev { * @get_uche_trap_base: Return the UCHE_TRAP_BASE value */ u64 (*get_uche_trap_base)(void); + /** + * @fault_header: Print fault header + */ + void (*fault_header)(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj); + /** + * @lpac_fault_header: Print LPAC fault header + */ + void (*lpac_fault_header)(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj); }; /** diff --git a/adreno_dispatch.c b/adreno_dispatch.c index b5e5ed4b02..ed621b8b30 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -1470,6 +1470,7 @@ static void adreno_fault_header(struct kgsl_device *device, struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); struct adreno_context *drawctxt = drawobj ? ADRENO_CONTEXT(drawobj->context) : NULL; + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); unsigned int status, rptr, wptr, ib1sz, ib2sz; uint64_t ib1base, ib2base; bool gx_on = adreno_gx_is_on(adreno_dev); @@ -1492,6 +1493,9 @@ static void adreno_fault_header(struct kgsl_device *device, return; } + if (gpudev->fault_header) + return gpudev->fault_header(adreno_dev, drawobj); + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS, &status); adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr); adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr); @@ -1502,8 +1506,6 @@ static void adreno_fault_header(struct kgsl_device *device, ADRENO_REG_CP_IB2_BASE_HI, &ib2base); adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ib2sz); - /* FIXME Add slice and unslice busy status for Gen8 */ - if (drawobj != NULL) { drawctxt->base.total_fault_count++; drawctxt->base.last_faulted_cmd_ts = drawobj->timestamp; diff --git a/adreno_gen7.c b/adreno_gen7.c index 9b44b57da3..1f4fe35b5d 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -2114,6 +2114,42 @@ static void gen7_swfuse_irqctrl(struct adreno_device *adreno_dev, bool state) state ? GEN7_SW_FUSE_INT_MASK : 0); } +static void gen7_lpac_fault_header(struct adreno_device *adreno_dev, + struct kgsl_drawobj *drawobj_lpac) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_context *drawctxt_lpac; + u32 status; + u32 lpac_rptr, lpac_wptr, lpac_ib1sz, lpac_ib2sz; + u64 lpac_ib1base, lpac_ib2base; + + kgsl_regread(device, GEN7_RBBM_STATUS, &status); + kgsl_regread(device, GEN7_CP_LPAC_RB_RPTR, &lpac_rptr); + kgsl_regread(device, GEN7_CP_LPAC_RB_WPTR, &lpac_wptr); + kgsl_regread64(device, GEN7_CP_LPAC_IB1_BASE_HI, GEN7_CP_LPAC_IB1_BASE, &lpac_ib1base); + kgsl_regread(device, GEN7_CP_LPAC_IB1_REM_SIZE, &lpac_ib1sz); + kgsl_regread64(device, GEN7_CP_LPAC_IB2_BASE_HI, GEN7_CP_LPAC_IB2_BASE, &lpac_ib2base); + kgsl_regread(device, GEN7_CP_LPAC_IB2_REM_SIZE, &lpac_ib2sz); + + drawctxt_lpac = ADRENO_CONTEXT(drawobj_lpac->context); + drawobj_lpac->context->last_faulted_cmd_ts = drawobj_lpac->timestamp; + drawobj_lpac->context->total_fault_count++; + + pr_context(device, drawobj_lpac->context, + "LPAC ctx %d ctx_type %s ts %d status %8.8X dispatch_queue=%d rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + drawobj_lpac->context->id, kgsl_context_type(drawctxt_lpac->type), + drawobj_lpac->timestamp, status, + drawobj_lpac->context->gmu_dispatch_queue, lpac_rptr, lpac_wptr, + lpac_ib1base, lpac_ib1sz, lpac_ib2base, lpac_ib2sz); + + pr_context(device, drawobj_lpac->context, "lpac cmdline: %s\n", + drawctxt_lpac->base.proc_priv->cmdline); + + trace_adreno_gpu_fault(drawobj_lpac->context->id, drawobj_lpac->timestamp, status, + lpac_rptr, lpac_wptr, lpac_ib1base, lpac_ib1sz, lpac_ib2base, lpac_ib2sz, + adreno_get_level(drawobj_lpac->context)); +} + const struct gen7_gpudev adreno_gen7_9_0_hwsched_gpudev = { .base = { .reg_offsets = gen7_register_offsets, @@ -2137,6 +2173,7 @@ const struct gen7_gpudev adreno_gen7_9_0_hwsched_gpudev = { .context_destroy = gen7_hwsched_context_destroy, .lpac_store = gen7_9_0_lpac_store, .get_uche_trap_base = gen7_get_uche_trap_base, + .lpac_fault_header = gen7_lpac_fault_header, }, .hfi_probe = gen7_hwsched_hfi_probe, .hfi_remove = gen7_hwsched_hfi_remove, @@ -2166,6 +2203,7 @@ const struct gen7_gpudev adreno_gen7_hwsched_gpudev = { .context_destroy = gen7_hwsched_context_destroy, .lpac_store = gen7_lpac_store, .get_uche_trap_base = gen7_get_uche_trap_base, + .lpac_fault_header = gen7_lpac_fault_header, }, .hfi_probe = gen7_hwsched_hfi_probe, .hfi_remove = gen7_hwsched_hfi_remove, diff --git a/adreno_gen8.c b/adreno_gen8.c index e3d07ec115..7ff5873426 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -377,6 +377,20 @@ static void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id gen8_dev->aperture = aperture_val; } +static inline void gen8_regread64_aperture(struct kgsl_device *device, + u32 offsetwords_lo, u32 offsetwords_hi, u64 *value, u32 pipe, + u32 slice_id, u32 use_slice_id) +{ + u32 val_lo = 0, val_hi = 0; + + gen8_host_aperture_set(ADRENO_DEVICE(device), pipe, slice_id, use_slice_id); + + val_lo = kgsl_regmap_read(&device->regmap, offsetwords_lo); + val_hi = kgsl_regmap_read(&device->regmap, offsetwords_hi); + + *value = (((u64)val_hi << 32) | val_lo); +} + static inline void gen8_regread_aperture(struct kgsl_device *device, u32 offsetwords, u32 *value, u32 pipe, u32 slice_id, u32 use_slice_id) { @@ -2207,6 +2221,125 @@ static void gen8_swfuse_irqctrl(struct adreno_device *adreno_dev, bool state) state ? GEN8_SW_FUSE_INT_MASK : 0); } +static void gen8_lpac_fault_header(struct adreno_device *adreno_dev, + struct kgsl_drawobj *drawobj) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_context *drawctxt; + u32 status, rptr, wptr, ib1sz, ib2sz, ib3sz; + u64 ib1base, ib2base, ib3base; + + kgsl_regread(device, GEN8_RBBM_LPAC_STATUS, &status); + kgsl_regread(device, GEN8_CP_RB_RPTR_LPAC, &rptr); + kgsl_regread(device, GEN8_CP_RB_WPTR_LPAC, &wptr); + gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE, + GEN8_CP_IB1_BASE_HI_PIPE, &ib1base, PIPE_LPAC, 0, 0); + gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE, &ib1sz, PIPE_LPAC, 0, 0); + gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE, + GEN8_CP_IB2_BASE_HI_PIPE, &ib2base, PIPE_LPAC, 0, 0); + gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, &ib2sz, PIPE_LPAC, 0, 0); + gen8_regread64_aperture(device, GEN8_CP_IB3_BASE_LO_PIPE, + GEN8_CP_IB3_BASE_HI_PIPE, &ib3base, PIPE_LPAC, 0, 0); + gen8_regread_aperture(device, GEN8_CP_IB3_REM_SIZE_PIPE, &ib3sz, PIPE_LPAC, 0, 0); + gen8_host_aperture_set(adreno_dev, 0, 0, 0); + + drawctxt = ADRENO_CONTEXT(drawobj->context); + drawobj->context->last_faulted_cmd_ts = drawobj->timestamp; + drawobj->context->total_fault_count++; + + pr_context(device, drawobj->context, + "LPAC ctx %u ctx_type %s ts %u status %8.8X\n", + drawobj->context->id, kgsl_context_type(drawctxt->type), + drawobj->timestamp, status); + + pr_context(device, drawobj->context, + "LPAC: rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", + rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, ib3base, ib3sz); + + pr_context(device, drawobj->context, "lpac cmdline: %s\n", + drawctxt->base.proc_priv->cmdline); + + trace_adreno_gpu_fault(drawobj->context->id, drawobj->timestamp, status, + rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, + adreno_get_level(drawobj->context)); + +} + +static void gen8_fault_header(struct adreno_device *adreno_dev, + struct kgsl_drawobj *drawobj) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_context *drawctxt; + u32 status, rptr, wptr, ib1sz, ib2sz, ib3sz, rptr_bv, ib1sz_bv, ib2sz_bv, ib3sz_bv; + u32 gfx_status, gfx_br_status, gfx_bv_status; + u64 ib1base, ib2base, ib3base, ib1base_bv, ib2base_bv, ib3base_bv; + u32 ctxt_id = 0; + u32 ts = 0; + int rb_id = -1; + + kgsl_regread(device, GEN8_RBBM_STATUS, &status); + kgsl_regread(device, GEN8_RBBM_GFX_STATUS, &gfx_status); + kgsl_regread(device, GEN8_RBBM_GFX_BV_STATUS, &gfx_bv_status); + kgsl_regread(device, GEN8_RBBM_GFX_BR_STATUS, &gfx_br_status); + kgsl_regread(device, GEN8_CP_RB_RPTR_BR, &rptr); + kgsl_regread(device, GEN8_CP_RB_WPTR_GC, &wptr); + kgsl_regread(device, GEN8_CP_RB_RPTR_BV, &rptr_bv); + gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE, + GEN8_CP_IB1_BASE_HI_PIPE, &ib1base, PIPE_BR, 0, 0); + gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE, &ib1sz, PIPE_BR, 0, 0); + gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE, + GEN8_CP_IB2_BASE_HI_PIPE, &ib2base, PIPE_BR, 0, 0); + gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, &ib2sz, PIPE_BR, 0, 0); + gen8_regread64_aperture(device, GEN8_CP_IB3_BASE_LO_PIPE, + GEN8_CP_IB3_BASE_HI_PIPE, &ib3base, PIPE_BR, 0, 0); + gen8_regread_aperture(device, GEN8_CP_IB3_REM_SIZE_PIPE, &ib3sz, PIPE_BR, 0, 0); + gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE, + GEN8_CP_IB1_BASE_HI_PIPE, &ib1base_bv, PIPE_BV, 0, 0); + gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE, &ib1sz_bv, PIPE_BV, 0, 0); + gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE, + GEN8_CP_IB2_BASE_HI_PIPE, &ib2base_bv, PIPE_BV, 0, 0); + gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, &ib2sz_bv, PIPE_BV, 0, 0); + gen8_regread64_aperture(device, GEN8_CP_IB3_BASE_LO_PIPE, + GEN8_CP_IB3_BASE_HI_PIPE, &ib3base_bv, PIPE_BV, 0, 0); + gen8_regread_aperture(device, GEN8_CP_IB3_REM_SIZE_PIPE, &ib3sz_bv, PIPE_BV, 0, 0); + gen8_host_aperture_set(adreno_dev, 0, 0, 0); + + if (drawobj) { + drawctxt = ADRENO_CONTEXT(drawobj->context); + drawobj->context->last_faulted_cmd_ts = drawobj->timestamp; + drawobj->context->total_fault_count++; + ctxt_id = drawobj->context->id; + ts = drawobj->timestamp; + rb_id = adreno_get_level(drawobj->context); + + pr_context(device, drawobj->context, + "ctx %u ctx_type %s ts %u\n", + drawobj->context->id, kgsl_context_type(drawctxt->type), + drawobj->timestamp); + + pr_context(device, drawobj->context, "cmdline: %s\n", + drawctxt->base.proc_priv->cmdline); + + trace_adreno_gpu_fault(drawobj->context->id, drawobj->timestamp, status, + rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, + adreno_get_level(drawobj->context)); + } + dev_err(device->dev, + "status %8.8X gfx_status %8.8X gfx_br_status %8.8X gfx_bv_status %8.8X\n", + status, gfx_status, gfx_br_status, gfx_bv_status); + + dev_err(device->dev, + "BR: rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", + rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, ib3base, ib3sz); + + dev_err(device->dev, + "BV: rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", + rptr_bv, wptr, ib1base_bv, ib1sz_bv, ib2base_bv, ib2sz_bv, ib3base_bv, ib3sz_bv); + + trace_adreno_gpu_fault(ctxt_id, ts, status, + rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, rb_id); +} + const struct gen8_gpudev adreno_gen8_hwsched_gpudev = { .base = { .reg_offsets = gen8_register_offsets, @@ -2229,6 +2362,8 @@ const struct gen8_gpudev adreno_gen8_hwsched_gpudev = { .context_destroy = gen8_hwsched_context_destroy, .lpac_store = gen8_lpac_store, .get_uche_trap_base = gen8_get_uche_trap_base, + .fault_header = gen8_fault_header, + .lpac_fault_header = gen8_lpac_fault_header, }, .hfi_probe = gen8_hwsched_hfi_probe, .hfi_remove = gen8_hwsched_hfi_remove, @@ -2259,6 +2394,7 @@ const struct gen8_gpudev adreno_gen8_gmu_gpudev = { .set_isdb_breakpoint_registers = gen8_set_isdb_breakpoint_registers, .swfuse_irqctrl = gen8_swfuse_irqctrl, .get_uche_trap_base = gen8_get_uche_trap_base, + .fault_header = gen8_fault_header, }, .hfi_probe = gen8_gmu_hfi_probe, .handle_watchdog = gen8_gmu_handle_watchdog, diff --git a/adreno_hwsched.c b/adreno_hwsched.c index b057237306..9779c85245 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1428,55 +1428,18 @@ void adreno_hwsched_replay(struct adreno_device *adreno_dev) kgsl_process_event_groups(device); } -static void do_fault_header_lpac(struct adreno_device *adreno_dev, - struct kgsl_drawobj *drawobj_lpac) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_context *drawctxt_lpac; - u32 status; - u32 lpac_rptr, lpac_wptr, lpac_ib1sz, lpac_ib2sz; - u64 hi, lo, lpac_ib1base, lpac_ib2base; - - adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS, &status); - lpac_rptr = kgsl_regmap_read(&device->regmap, GEN7_CP_LPAC_RB_RPTR); - lpac_wptr = kgsl_regmap_read(&device->regmap, GEN7_CP_LPAC_RB_WPTR); - hi = kgsl_regmap_read(&device->regmap, GEN7_CP_LPAC_IB1_BASE_HI); - lo = kgsl_regmap_read(&device->regmap, GEN7_CP_LPAC_IB1_BASE); - lpac_ib1base = lo | (hi << 32); - lpac_ib1sz = kgsl_regmap_read(&device->regmap, GEN7_CP_LPAC_IB1_REM_SIZE); - hi = kgsl_regmap_read(&device->regmap, GEN7_CP_LPAC_IB2_BASE_HI); - lo = kgsl_regmap_read(&device->regmap, GEN7_CP_LPAC_IB2_BASE); - lpac_ib2base = lo | (hi << 32); - lpac_ib2sz = kgsl_regmap_read(&device->regmap, GEN7_CP_LPAC_IB2_REM_SIZE); - - drawctxt_lpac = ADRENO_CONTEXT(drawobj_lpac->context); - drawobj_lpac->context->last_faulted_cmd_ts = drawobj_lpac->timestamp; - drawobj_lpac->context->total_fault_count++; - - pr_context(device, drawobj_lpac->context, - "LPAC ctx %d ctx_type %s ts %d status %8.8X dispatch_queue=%d rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", - drawobj_lpac->context->id, kgsl_context_type(drawctxt_lpac->type), - drawobj_lpac->timestamp, status, - drawobj_lpac->context->gmu_dispatch_queue, lpac_rptr, lpac_wptr, - lpac_ib1base, lpac_ib1sz, lpac_ib2base, lpac_ib2sz); - - pr_context(device, drawobj_lpac->context, "lpac cmdline: %s\n", - drawctxt_lpac->base.proc_priv->cmdline); - - trace_adreno_gpu_fault(drawobj_lpac->context->id, drawobj_lpac->timestamp, status, - lpac_rptr, lpac_wptr, lpac_ib1base, lpac_ib1sz, lpac_ib2base, lpac_ib2sz, - adreno_get_level(drawobj_lpac->context)); - -} - static void do_fault_header(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); struct adreno_context *drawctxt; u32 status, rptr, wptr, ib1sz, ib2sz; u64 ib1base, ib2base; + if (gpudev->fault_header) + return gpudev->fault_header(adreno_dev, drawobj); + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS, &status); adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr); adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr); @@ -1860,7 +1823,8 @@ static void adreno_hwsched_reset_and_snapshot(struct adreno_device *adreno_dev, if (obj_lpac) { drawobj_lpac = obj_lpac->drawobj; context_lpac = drawobj_lpac->context; - do_fault_header_lpac(adreno_dev, drawobj_lpac); + if (gpudev->lpac_fault_header) + gpudev->lpac_fault_header(adreno_dev, drawobj_lpac); } kgsl_device_snapshot(device, context, context_lpac, false); From 29b5ab0b4cc984139745f11216a56afb27b99dd7 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Thu, 5 Oct 2023 14:45:08 -0700 Subject: [PATCH 0582/1016] msm: kgsl: Add snapshot code for A8x Add new support for slicing architecture and other sections for A8x snasphot. Change-Id: Ic7ffba50b1d680991c3859e65b763388355e2bd7 Signed-off-by: Urvashi Agrawal --- Kbuild | 2 + adreno-gpulist.h | 1 + adreno_gen8.c | 8 +- adreno_gen8.h | 42 + adreno_gen8_0_0_snapshot.h | 2103 ++++++++++++++++++++++++++++++++++++ adreno_gen8_gmu_snapshot.c | 301 ++++++ adreno_gen8_hwsched.c | 390 +++++++ adreno_gen8_hwsched.h | 10 + adreno_gen8_snapshot.c | 1693 +++++++++++++++++++++++++++++ adreno_gen8_snapshot.h | 651 +++++++++++ build/kgsl_defs.bzl | 2 + gen8_reg.h | 2 + kgsl_device.h | 18 + kgsl_snapshot.c | 81 +- kgsl_snapshot.h | 33 + 15 files changed, 5321 insertions(+), 16 deletions(-) create mode 100644 adreno_gen8_0_0_snapshot.h create mode 100644 adreno_gen8_gmu_snapshot.c create mode 100644 adreno_gen8_snapshot.c create mode 100644 adreno_gen8_snapshot.h diff --git a/Kbuild b/Kbuild index 47363980b3..be2529ec98 100644 --- a/Kbuild +++ b/Kbuild @@ -124,6 +124,7 @@ msm_kgsl-y += \ adreno_gen7_snapshot.o \ adreno_gen8.o \ adreno_gen8_gmu.o \ + adreno_gen8_gmu_snapshot.o \ adreno_gen8_hfi.o \ adreno_gen8_hwsched.o \ adreno_gen8_hwsched_hfi.o \ @@ -131,6 +132,7 @@ msm_kgsl-y += \ adreno_gen8_preempt.o \ adreno_gen8_ringbuffer.o \ adreno_gen8_rpmh.o \ + adreno_gen8_snapshot.o \ adreno_hwsched.o \ adreno_ioctl.o \ adreno_perfcounter.o \ diff --git a/adreno-gpulist.h b/adreno-gpulist.h index bbc1c9965b..6f82c1443d 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2654,6 +2654,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .nonctxt_regs = gen8_0_0_nonctxt_regs, .highest_bank_bit = 16, .gmu_hub_clk_freq = 200000000, + .gen8_snapshot_block_list = &gen8_0_0_snapshot_block_list, }; static const struct adreno_gpu_core *adreno_gpulist[] = { diff --git a/adreno_gen8.c b/adreno_gen8.c index 7ff5873426..6a24029904 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -277,6 +277,8 @@ int gen8_init(struct adreno_device *adreno_dev) if (of_fdt_get_ddrtype() == 0x7) adreno_dev->highest_bank_bit = 14; + gen8_crashdump_init(adreno_dev); + return adreno_allocate_global(device, &adreno_dev->pwrup_reglist, PAGE_SIZE, 0, 0, KGSL_MEMDESC_PRIVILEGED, "powerup_register_list"); @@ -377,7 +379,7 @@ static void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id gen8_dev->aperture = aperture_val; } -static inline void gen8_regread64_aperture(struct kgsl_device *device, +void gen8_regread64_aperture(struct kgsl_device *device, u32 offsetwords_lo, u32 offsetwords_hi, u64 *value, u32 pipe, u32 slice_id, u32 use_slice_id) { @@ -391,7 +393,7 @@ static inline void gen8_regread64_aperture(struct kgsl_device *device, *value = (((u64)val_hi << 32) | val_lo); } -static inline void gen8_regread_aperture(struct kgsl_device *device, +void gen8_regread_aperture(struct kgsl_device *device, u32 offsetwords, u32 *value, u32 pipe, u32 slice_id, u32 use_slice_id) { gen8_host_aperture_set(ADRENO_DEVICE(device), pipe, slice_id, use_slice_id); @@ -2344,6 +2346,7 @@ const struct gen8_gpudev adreno_gen8_hwsched_gpudev = { .base = { .reg_offsets = gen8_register_offsets, .probe = gen8_hwsched_probe, + .snapshot = gen8_hwsched_snapshot, .irq_handler = gen8_irq_handler, .iommu_fault_block = gen8_iommu_fault_block, .preemption_context_init = gen8_preemption_context_init, @@ -2374,6 +2377,7 @@ const struct gen8_gpudev adreno_gen8_gmu_gpudev = { .base = { .reg_offsets = gen8_register_offsets, .probe = gen8_gmu_device_probe, + .snapshot = gen8_gmu_snapshot, .irq_handler = gen8_irq_handler, .rb_start = gen8_rb_start, .gpu_keepalive = gen8_gpu_keepalive, diff --git a/adreno_gen8.h b/adreno_gen8.h index 53b254eb0d..c29fc0e401 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -497,4 +497,46 @@ void gen8_rdpm_cx_freq_update(struct gen8_gmu_device *gmu, u32 freq); */ int gen8_scm_gpu_init_cx_regs(struct adreno_device *adreno_dev); +/** + * gen8_legacy_snapshot_registers - Dump registers for GPU/GMU + * @device: Handle to the KGSL device + * @buf: Target buffer to copy the data + * @remain: Buffer size remaining for dump + * @priv: Private data to dump the registers + * + * Return: Size of the section + */ +size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv); + +/** + * gen8_regread64_aperture - Read 64 bit register values + * @device: Handle to the KGSL device + * @offsetwords_lo: Lower 32 bit address to read + * @offsetwords_hi: Higher 32 bit address to read + * @value: The value of register at offsetwords + * @pipe: Pipe for which the register is to be read + * @slice_id: Slice for which the register is to be read + * @use_slice_id: Set if the value to be read is from a sliced register + * + * This function reads the 64 bit value for registers + */ +void gen8_regread64_aperture(struct kgsl_device *device, + u32 offsetwords_lo, u32 offsetwords_hi, u64 *value, u32 pipe, + u32 slice_id, u32 use_slice_id); + +/** + * gen8_regread_aperture - Read 32 bit register values + * @device: Handle to the KGSL device + * @offsetwords: 32 bit address to read + * @value: The value of register at offsetwords + * @pipe: Pipe for which the register is to be read + * @slice_id: Slice for which the register is to be read + * @use_slice_id: Set if the value to be read is from a sliced register + * + * This function reads the 32 bit value for registers + */ +void gen8_regread_aperture(struct kgsl_device *device, + u32 offsetwords, u32 *value, u32 pipe, u32 slice_id, u32 use_slice_id); + #endif diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h new file mode 100644 index 0000000000..5b5bc0c741 --- /dev/null +++ b/adreno_gen8_0_0_snapshot.h @@ -0,0 +1,2103 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef __ADRENO_GEN8_0_0_SNAPSHOT_H +#define __ADRENO_GEN8_0_0_SNAPSHOT_H + +#include "adreno_gen8_snapshot.h" + +static struct gen8_shader_block gen8_0_0_shader_blocks[] = { + { TP0_TMO_DATA, 0x0200, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { TP0_SMO_DATA, 0x0080, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { TP0_MIPMAP_BASE_DATA, 0x0080, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_INST_DATA_3, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_INST_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_INST_DATA_1, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_0_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_1_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_2_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_3_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_4_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_5_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_6_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_7_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_CB_RAM, 0x0390, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_13_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_14_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_INST_TAG, 0x0100, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_INST_DATA_2, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_TMO_TAG, 0x0080, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_SMO_TAG, 0x0080, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_STATE_DATA, 0x0040, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_HWAVE_RAM, 0x0100, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_L0_INST_BUF, 0x0080, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_8_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_9_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_10_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_11_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_12_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { HLSQ_DATAPATH_DSTR_META, 0x0170, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_DATAPATH_DSTR_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_DESC_REMAP_META, 0x0018, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_DESC_REMAP_META, 0x000C, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_DESC_REMAP_META, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_SLICE_TOP_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_L2STC_TAG_RAM, 0x0200, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_L2STC_INFO_CMD, 0x0474, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_CHUNK_CPS_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CHUNK_CPS_RAM, 0x0180, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_MISC_RAM, 0x0640, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_MISC_RAM, 0x00B0, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_MISC_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM, 0x0200, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_MISC_RAM_TAG, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM_TAG, 0x0014, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM_TAG, 0x0004, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x03C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x0280, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x0050, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0008, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_STPROC_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM_2, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_DATAPATH_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_INDIRECT_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, +}; + +/* + * Block : ['AHB_PRECD'] + * pairs : 1 (Regs:2) + */ +static const u32 gen8_0_0_ahb_precd_gpu_registers[] = { + 0x00012, 0x00013, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_ahb_precd_gpu_registers), 8)); + +/* + * Block : ['AHB_PRECD'] + * REGION : SLICE + * pairs : 1 (Regs:3) + */ +static const u32 gen8_0_0_ahb_precd_gpu_slice_slice_registers[] = { + 0x00580, 0x00582, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_ahb_precd_gpu_slice_slice_registers), 8)); + +/* + * Block : ['AHB_SECURE'] + * pairs : 3 (Regs:7) + */ +static const u32 gen8_0_0_ahb_secure_gpu_registers[] = { + 0x0f400, 0x0f400, 0x0f800, 0x0f803, 0x0fc00, 0x0fc01, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_ahb_secure_gpu_registers), 8)); + +/* + * Block : ['AHB_SECURE'] + * pairs : 1 (Regs:3) + */ +static const u32 gen8_0_0_ahb_secure_cp_cp_pipe_none_registers[] = { + 0x0f000, 0x0f002, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_ahb_secure_cp_cp_pipe_none_registers), 8)); + +/* + * Block : ['GBIF'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 8 (Regs:59) + */ +static const u32 gen8_0_0_gbif_registers[] = { + 0x03c00, 0x03c0b, 0x03c40, 0x03c42, 0x03c45, 0x03c47, 0x03c49, 0x03c4e, + 0x03c50, 0x03c57, 0x03cc0, 0x03cc4, 0x03cc6, 0x03cd5, 0x03ce0, 0x03ce5, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gbif_registers), 8)); + +/* + * Block : ['BROADCAST', 'GRAS', 'PC'] + * Block : ['RBBM', 'RDVM', 'UCHE'] + * Block : ['VFD', 'VPC', 'VSC'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 120 (Regs:1161) + */ +static const u32 gen8_0_0_gpu_registers[] = { + 0x00000, 0x00000, 0x00002, 0x00002, 0x00008, 0x0000d, 0x00010, 0x00013, + 0x00015, 0x00016, 0x00018, 0x00018, 0x0001a, 0x0001a, 0x0001c, 0x0001c, + 0x00028, 0x0002b, 0x0002d, 0x00039, 0x00040, 0x00053, 0x00062, 0x00066, + 0x00069, 0x0006e, 0x00071, 0x00072, 0x00074, 0x00074, 0x00076, 0x0007c, + 0x0007f, 0x0009a, 0x0009d, 0x000af, 0x000b2, 0x000d4, 0x000d7, 0x000e2, + 0x000e5, 0x000e6, 0x000e9, 0x000f1, 0x000f4, 0x000f6, 0x000f9, 0x00108, + 0x0010b, 0x0010e, 0x00111, 0x00111, 0x00114, 0x0011c, 0x0011f, 0x00121, + 0x00125, 0x00125, 0x00127, 0x00127, 0x00129, 0x00129, 0x0012b, 0x00131, + 0x00134, 0x00138, 0x0013a, 0x0013a, 0x0013c, 0x0013f, 0x00142, 0x00150, + 0x00153, 0x00155, 0x00158, 0x00159, 0x0015c, 0x0015c, 0x00166, 0x00179, + 0x0019e, 0x001a3, 0x001b0, 0x002c9, 0x002e2, 0x0036b, 0x00380, 0x0039b, + 0x003a4, 0x003ab, 0x003b4, 0x003c5, 0x003ce, 0x003cf, 0x003e0, 0x003e0, + 0x003f0, 0x003f0, 0x00440, 0x00444, 0x00460, 0x00460, 0x00c02, 0x00c04, + 0x00c06, 0x00c06, 0x00c10, 0x00cd9, 0x00ce0, 0x00d0c, 0x00df0, 0x00df4, + 0x00e01, 0x00e04, 0x00e06, 0x00e09, 0x00e0e, 0x00e13, 0x00e15, 0x00e16, + 0x00e20, 0x00e37, 0x0ec00, 0x0ec01, 0x0ec05, 0x0ec05, 0x0ec07, 0x0ec07, + 0x0ec0a, 0x0ec0a, 0x0ec12, 0x0ec12, 0x0ec26, 0x0ec28, 0x0ec2b, 0x0ec2d, + 0x0ec2f, 0x0ec2f, 0x0ec40, 0x0ec41, 0x0ec45, 0x0ec45, 0x0ec47, 0x0ec47, + 0x0ec4a, 0x0ec4a, 0x0ec52, 0x0ec52, 0x0ec66, 0x0ec68, 0x0ec6b, 0x0ec6d, + 0x0ec6f, 0x0ec6f, 0x0ec80, 0x0ec81, 0x0ec85, 0x0ec85, 0x0ec87, 0x0ec87, + 0x0ec8a, 0x0ec8a, 0x0ec92, 0x0ec92, 0x0eca6, 0x0eca8, 0x0ecab, 0x0ecad, + 0x0ecaf, 0x0ecaf, 0x0ecc0, 0x0ecc1, 0x0ecc5, 0x0ecc5, 0x0ecc7, 0x0ecc7, + 0x0ecca, 0x0ecca, 0x0ecd2, 0x0ecd2, 0x0ece6, 0x0ece8, 0x0eceb, 0x0eced, + 0x0ecef, 0x0ecef, 0x0ed00, 0x0ed01, 0x0ed05, 0x0ed05, 0x0ed07, 0x0ed07, + 0x0ed0a, 0x0ed0a, 0x0ed12, 0x0ed12, 0x0ed26, 0x0ed28, 0x0ed2b, 0x0ed2d, + 0x0ed2f, 0x0ed2f, 0x0ed40, 0x0ed41, 0x0ed45, 0x0ed45, 0x0ed47, 0x0ed47, + 0x0ed4a, 0x0ed4a, 0x0ed52, 0x0ed52, 0x0ed66, 0x0ed68, 0x0ed6b, 0x0ed6d, + 0x0ed6f, 0x0ed6f, 0x0ed80, 0x0ed81, 0x0ed85, 0x0ed85, 0x0ed87, 0x0ed87, + 0x0ed8a, 0x0ed8a, 0x0ed92, 0x0ed92, 0x0eda6, 0x0eda8, 0x0edab, 0x0edad, + 0x0edaf, 0x0edaf, 0x0f400, 0x0f400, 0x0f800, 0x0f803, 0x0fc00, 0x0fc01, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gpu_registers), 8)); + +/* + * Block : ['BROADCAST', 'GRAS', 'PC'] + * Block : ['RBBM', 'RDVM', 'UCHE'] + * Block : ['VFD', 'VPC', 'VSC'] + * REGION : SLICE + * Pipeline: PIPE_NONE + * pairs : 12 (Regs:89) + */ +static const u32 gen8_0_0_gpu_slice_registers[] = { + 0x00500, 0x00500, 0x00580, 0x00584, 0x00586, 0x0058b, 0x0058f, 0x00599, + 0x005a0, 0x005b3, 0x005c0, 0x005c0, 0x005c2, 0x005c6, 0x005e0, 0x005e3, + 0x005ec, 0x005ec, 0x00f01, 0x00f02, 0x00f04, 0x00f0c, 0x00f20, 0x00f37, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gpu_slice_registers), 8)); + +/* + * Block : ['GMUAO', 'GMUCX', 'GMUCX_RAM'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 161 (Regs:614) + */ +static const u32 gen8_0_0_gmu_registers[] = { + 0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403, + 0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03, + 0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403, + 0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03, + 0x1f400, 0x1f40b, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507, + 0x1f509, 0x1f50b, 0x1f700, 0x1f701, 0x1f704, 0x1f706, 0x1f708, 0x1f709, + 0x1f70c, 0x1f70d, 0x1f710, 0x1f711, 0x1f713, 0x1f716, 0x1f718, 0x1f71d, + 0x1f720, 0x1f725, 0x1f729, 0x1f729, 0x1f730, 0x1f747, 0x1f750, 0x1f75a, + 0x1f75c, 0x1f75c, 0x1f780, 0x1f781, 0x1f784, 0x1f78b, 0x1f790, 0x1f797, + 0x1f7a0, 0x1f7a7, 0x1f7b0, 0x1f7b7, 0x1f7e0, 0x1f7e1, 0x1f7e4, 0x1f7e5, + 0x1f7e8, 0x1f7e9, 0x1f7ec, 0x1f7ed, 0x1f800, 0x1f804, 0x1f807, 0x1f808, + 0x1f80b, 0x1f80c, 0x1f80f, 0x1f80f, 0x1f811, 0x1f811, 0x1f813, 0x1f817, + 0x1f819, 0x1f81c, 0x1f824, 0x1f830, 0x1f840, 0x1f842, 0x1f848, 0x1f848, + 0x1f84c, 0x1f84c, 0x1f850, 0x1f850, 0x1f858, 0x1f859, 0x1f868, 0x1f869, + 0x1f878, 0x1f883, 0x1f930, 0x1f931, 0x1f934, 0x1f935, 0x1f938, 0x1f939, + 0x1f93c, 0x1f93d, 0x1f940, 0x1f941, 0x1f943, 0x1f943, 0x1f948, 0x1f94a, + 0x1f94f, 0x1f951, 0x1f954, 0x1f955, 0x1f95d, 0x1f95d, 0x1f962, 0x1f96b, + 0x1f970, 0x1f970, 0x1f97c, 0x1f97e, 0x1f980, 0x1f981, 0x1f984, 0x1f986, + 0x1f992, 0x1f993, 0x1f996, 0x1f99e, 0x1f9c0, 0x1f9cf, 0x1f9f0, 0x1f9f1, + 0x1f9f8, 0x1f9fa, 0x1f9fc, 0x1f9fc, 0x1fa00, 0x1fa03, 0x1fc00, 0x1fc01, + 0x1fc04, 0x1fc07, 0x1fc10, 0x1fc10, 0x1fc14, 0x1fc14, 0x1fc18, 0x1fc19, + 0x1fc20, 0x1fc20, 0x1fc24, 0x1fc26, 0x1fc30, 0x1fc33, 0x1fc38, 0x1fc3b, + 0x1fc40, 0x1fc49, 0x1fc50, 0x1fc59, 0x1fc60, 0x1fc7f, 0x1fca0, 0x1fcef, + 0x20000, 0x20007, 0x20010, 0x20015, 0x20018, 0x2001a, 0x2001c, 0x2001d, + 0x20020, 0x20021, 0x20024, 0x20025, 0x2002a, 0x2002c, 0x20030, 0x20031, + 0x20034, 0x20036, 0x20080, 0x20087, 0x20300, 0x20301, 0x20304, 0x20305, + 0x20308, 0x2030c, 0x20310, 0x20314, 0x20318, 0x2031a, 0x20320, 0x20322, + 0x20324, 0x20326, 0x20328, 0x2032a, 0x20330, 0x20333, 0x20338, 0x20338, + 0x20340, 0x20345, 0x20348, 0x20350, 0x20354, 0x2035b, 0x20360, 0x20367, + 0x20370, 0x20377, 0x23801, 0x23801, 0x23803, 0x23803, 0x23805, 0x23805, + 0x23807, 0x23807, 0x23809, 0x23809, 0x2380b, 0x2380b, 0x2380d, 0x2380d, + 0x2380f, 0x2380f, 0x23811, 0x23811, 0x23813, 0x23813, 0x23815, 0x23815, + 0x23817, 0x23817, 0x23819, 0x23819, 0x2381b, 0x2381b, 0x2381d, 0x2381d, + 0x2381f, 0x23820, 0x23822, 0x23822, 0x23824, 0x23824, 0x23826, 0x23826, + 0x23828, 0x23828, 0x2382a, 0x2382a, 0x2382c, 0x2382c, 0x2382e, 0x2382e, + 0x23830, 0x23830, 0x23832, 0x23832, 0x23834, 0x23834, 0x23836, 0x23836, + 0x23838, 0x23838, 0x2383a, 0x2383a, 0x2383c, 0x2383c, 0x2383e, 0x2383e, + 0x23840, 0x23847, 0x23b00, 0x23b01, 0x23b03, 0x23b03, 0x23b05, 0x23b0e, + 0x23b10, 0x23b13, 0x23b15, 0x23b16, 0x23b28, 0x23b28, 0x23b30, 0x23b30, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gmu_registers), 8)); + +/* + * Block : ['GMUGX'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 15 (Regs:174) + */ +static const u32 gen8_0_0_gmugx_registers[] = { + 0x0dc00, 0x0dc0d, 0x0dc10, 0x0dc11, 0x0dc13, 0x0dc15, 0x0dc18, 0x0dc1a, + 0x0dc1c, 0x0dc2f, 0x0dc40, 0x0dc42, 0x0dc60, 0x0dc7f, 0x0dc88, 0x0dc90, + 0x0dc98, 0x0dc99, 0x0dca0, 0x0dcbf, 0x0dcc8, 0x0dcd0, 0x0dcd8, 0x0dcd9, + 0x0dce0, 0x0dcff, 0x0dd08, 0x0dd10, 0x0dd18, 0x0dd19, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gmugx_registers), 8)); + +/* + * Block : ['GMUGX'] + * REGION : SLICE + * Pipeline: PIPE_NONE + * pairs : 23 (Regs:312) + */ +static const u32 gen8_0_0_gmugx_slice_registers[] = { + 0x0e400, 0x0e401, 0x0e404, 0x0e40a, 0x0e40e, 0x0e42f, 0x0e438, 0x0e440, + 0x0e448, 0x0e449, 0x0e450, 0x0e46f, 0x0e478, 0x0e480, 0x0e488, 0x0e489, + 0x0e490, 0x0e4af, 0x0e4b8, 0x0e4c0, 0x0e4c8, 0x0e4c9, 0x0e4d0, 0x0e4ef, + 0x0e4f8, 0x0e500, 0x0e508, 0x0e509, 0x0e510, 0x0e52f, 0x0e538, 0x0e540, + 0x0e548, 0x0e549, 0x0e550, 0x0e56f, 0x0e578, 0x0e580, 0x0e588, 0x0e589, + 0x0e590, 0x0e5af, 0x0e5b8, 0x0e5c0, 0x0e5c8, 0x0e5c9, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gmugx_slice_registers), 8)); + +/* + * Block : ['CX_MISC'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 7 (Regs:58) + */ +static const u32 gen8_0_0_cx_misc_registers[] = { + 0x27800, 0x27800, 0x27810, 0x27814, 0x27820, 0x27824, 0x27828, 0x2782a, + 0x27832, 0x27857, 0x27880, 0x27883, 0x27c00, 0x27c01, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_cx_misc_registers), 8)); + +/* + * Block : ['DBGC'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 21 (Regs:152) + */ +static const u32 gen8_0_0_dbgc_registers[] = { + 0x00600, 0x0061c, 0x0061e, 0x0063d, 0x00640, 0x00644, 0x00650, 0x00655, + 0x00660, 0x00660, 0x00662, 0x00668, 0x0066a, 0x0066a, 0x00680, 0x00685, + 0x00700, 0x00704, 0x00707, 0x0070a, 0x0070f, 0x00716, 0x00720, 0x00724, + 0x00730, 0x00732, 0x00740, 0x00740, 0x00742, 0x0074a, 0x00750, 0x00755, + 0x00759, 0x0075c, 0x00760, 0x00763, 0x00770, 0x00770, 0x00780, 0x0078d, + 0x00790, 0x00790, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_dbgc_registers), 8)); + +/* + * Block : ['DBGC'] + * REGION : SLICE + * Pipeline: PIPE_NONE + * pairs : 2 (Regs:61) + */ +static const u32 gen8_0_0_dbgc_slice_registers[] = { + 0x007a0, 0x007d5, 0x007e0, 0x007e6, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_dbgc_slice_registers), 8)); + +/* + * Block : ['CX_DBGC'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 6 (Regs:75) + */ +static const u32 gen8_0_0_cx_dbgc_registers[] = { + 0x18400, 0x1841c, 0x1841e, 0x1843d, 0x18440, 0x18444, 0x18450, 0x18455, + 0x1846a, 0x1846a, 0x18580, 0x18581, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_cx_dbgc_registers), 8)); + +/* + * Block : ['CP'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * Cluster : CLUSTER_NONE + * pairs : 15 (Regs:310) + */ +static const u32 gen8_0_0_cp_cp_pipe_none_registers[] = { + 0x00800, 0x0081e, 0x00820, 0x0082d, 0x00838, 0x0083e, 0x00840, 0x00847, + 0x0084b, 0x0084c, 0x00850, 0x0088f, 0x008b5, 0x008b6, 0x008c0, 0x008cb, + 0x008d0, 0x008e4, 0x008e7, 0x008ee, 0x008fa, 0x008fd, 0x00928, 0x00929, + 0x00958, 0x0095b, 0x00980, 0x009ff, 0x0f000, 0x0f002, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_cp_cp_pipe_none_registers), 8)); + +/* + * Block : ['CP'] + * REGION : UNSLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * pairs : 8 (Regs:96) + */ +static const u32 gen8_0_0_cp_cp_pipe_br_registers[] = { + 0x00830, 0x00837, 0x0084d, 0x0084f, 0x008a0, 0x008b4, 0x008b7, 0x008bb, + 0x008f0, 0x008f9, 0x00900, 0x0091e, 0x00920, 0x00926, 0x00930, 0x0093a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_cp_cp_pipe_br_registers), 8)); + +/* + * Block : ['CP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * pairs : 4 (Regs:23) + */ +static const u32 gen8_0_0_cp_slice_cp_pipe_br_registers[] = { + 0x00b00, 0x00b0c, 0x00b10, 0x00b10, 0x00b80, 0x00b84, 0x00b90, 0x00b93, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_cp_slice_cp_pipe_br_registers), 8)); + +/* + * Block : ['CP'] + * REGION : UNSLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_NONE + * pairs : 9 (Regs:72) + */ +static const u32 gen8_0_0_cp_cp_pipe_bv_registers[] = { + 0x00830, 0x00835, 0x0084d, 0x0084f, 0x008b0, 0x008b4, 0x008b7, 0x008bb, + 0x008f0, 0x008f9, 0x00900, 0x00913, 0x00918, 0x0091d, 0x00920, 0x00925, + 0x00930, 0x0093a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_cp_cp_pipe_bv_registers), 8)); + +/* + * Block : ['CP'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_NONE + * pairs : 4 (Regs:23) + */ +static const u32 gen8_0_0_cp_slice_cp_pipe_bv_registers[] = { + 0x00b00, 0x00b0c, 0x00b10, 0x00b10, 0x00b80, 0x00b84, 0x00b90, 0x00b93, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_cp_slice_cp_pipe_bv_registers), 8)); + +/* + * Block : ['CP'] + * REGION : UNSLICE + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_NONE + * pairs : 9 (Regs:77) + */ +static const u32 gen8_0_0_cp_cp_pipe_lpac_registers[] = { + 0x00830, 0x00837, 0x0084d, 0x0084f, 0x008a0, 0x008b4, 0x008b7, 0x008bb, + 0x008f0, 0x008f5, 0x008f8, 0x008f9, 0x00900, 0x00913, 0x00918, 0x0091d, + 0x00920, 0x00925, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_cp_cp_pipe_lpac_registers), 8)); + +/* + * Block : ['CP'] + * REGION : UNSLICE + * Pipeline: PIPE_AQE0 + * Cluster : CLUSTER_NONE + * pairs : 10 (Regs:22) + */ +static const u32 gen8_0_0_cp_cp_pipe_aqe0_registers[] = { + 0x0084d, 0x0084d, 0x008b0, 0x008b4, 0x008b7, 0x008b9, 0x008bb, 0x008bb, + 0x008f0, 0x008f1, 0x008f4, 0x008f5, 0x008f8, 0x008f9, 0x00910, 0x00913, + 0x0091d, 0x0091d, 0x00925, 0x00925, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_cp_cp_pipe_aqe0_registers), 8)); + +/* + * Block : ['CP'] + * REGION : UNSLICE + * Pipeline: PIPE_AQE1 + * Cluster : CLUSTER_NONE + * pairs : 10 (Regs:22) + */ +static const u32 gen8_0_0_cp_cp_pipe_aqe1_registers[] = { + 0x0084d, 0x0084d, 0x008b0, 0x008b4, 0x008b7, 0x008b9, 0x008bb, 0x008bb, + 0x008f0, 0x008f1, 0x008f4, 0x008f5, 0x008f8, 0x008f9, 0x00910, 0x00913, + 0x0091d, 0x0091d, 0x00925, 0x00925, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_cp_cp_pipe_aqe1_registers), 8)); + +/* + * Block : ['CP'] + * REGION : UNSLICE + * Pipeline: PIPE_DDE_BR + * Cluster : CLUSTER_NONE + * pairs : 13 (Regs:30) + */ +static const u32 gen8_0_0_cp_cp_pipe_dde_br_registers[] = { + 0x0084d, 0x0084d, 0x008b0, 0x008b4, 0x008b7, 0x008b9, 0x008bb, 0x008bb, + 0x008f0, 0x008f1, 0x008f4, 0x008f5, 0x008f8, 0x008f9, 0x008fe, 0x008ff, + 0x00910, 0x00913, 0x00918, 0x00918, 0x0091c, 0x0091e, 0x00920, 0x00921, + 0x00924, 0x00925, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_cp_cp_pipe_dde_br_registers), 8)); + +/* + * Block : ['CP'] + * REGION : UNSLICE + * Pipeline: PIPE_DDE_BV + * Cluster : CLUSTER_NONE + * pairs : 13 (Regs:29) + */ +static const u32 gen8_0_0_cp_cp_pipe_dde_bv_registers[] = { + 0x0084d, 0x0084d, 0x008b0, 0x008b4, 0x008b7, 0x008b9, 0x008bb, 0x008bb, + 0x008f0, 0x008f1, 0x008f4, 0x008f5, 0x008f8, 0x008f9, 0x008fe, 0x008ff, + 0x00910, 0x00913, 0x00918, 0x00918, 0x0091c, 0x0091d, 0x00920, 0x00921, + 0x00924, 0x00925, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_cp_cp_pipe_dde_bv_registers), 8)); + +/* + * Block : ['BROADCAST', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF', 'GMUAO'] + * Block : ['GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM'] + * Block : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC'] + * REGION : UNSLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * pairs : 11 (Regs:1124) + */ +static const u32 gen8_0_0_non_context_pipe_br_registers[] = { + 0x09600, 0x09605, 0x09610, 0x09617, 0x09620, 0x09627, 0x09670, 0x0967b, + 0x09e00, 0x09e04, 0x09e06, 0x09e15, 0x09e17, 0x09e23, 0x09e30, 0x09e3f, + 0x09e50, 0x09e59, 0x09e60, 0x09e65, 0x0d200, 0x0d5ff, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_pipe_br_registers), 8)); + +/* + * Block : ['BROADCAST', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF', 'GMUAO'] + * Block : ['GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM'] + * Block : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * pairs : 21 (Regs:300) + */ +static const u32 gen8_0_0_non_context_slice_pipe_br_registers[] = { + 0x08600, 0x08602, 0x08610, 0x08613, 0x08700, 0x08703, 0x08710, 0x08713, + 0x08720, 0x08723, 0x08730, 0x08733, 0x08740, 0x08744, 0x09680, 0x09681, + 0x09690, 0x0969b, 0x09740, 0x09745, 0x09750, 0x0975b, 0x09770, 0x097ef, + 0x09f00, 0x09f0f, 0x09f20, 0x09f23, 0x09f30, 0x09f31, 0x0a600, 0x0a600, + 0x0a603, 0x0a603, 0x0a610, 0x0a61f, 0x0a630, 0x0a632, 0x0a638, 0x0a63c, + 0x0a640, 0x0a67f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_slice_pipe_br_registers), 8)); + +/* + * Block : ['BROADCAST', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF', 'GMUAO'] + * Block : ['GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM'] + * Block : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC'] + * REGION : UNSLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_NONE + * pairs : 11 (Regs:1124) + */ +static const u32 gen8_0_0_non_context_pipe_bv_registers[] = { + 0x09600, 0x09605, 0x09610, 0x09617, 0x09620, 0x09627, 0x09670, 0x0967b, + 0x09e00, 0x09e04, 0x09e06, 0x09e15, 0x09e17, 0x09e23, 0x09e30, 0x09e3f, + 0x09e50, 0x09e59, 0x09e60, 0x09e65, 0x0d200, 0x0d5ff, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_pipe_bv_registers), 8)); + +/* + * Block : ['BROADCAST', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF', 'GMUAO'] + * Block : ['GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM'] + * Block : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_NONE + * pairs : 21 (Regs:300) + */ +static const u32 gen8_0_0_non_context_slice_pipe_bv_registers[] = { + 0x08600, 0x08602, 0x08610, 0x08613, 0x08700, 0x08703, 0x08710, 0x08713, + 0x08720, 0x08723, 0x08730, 0x08733, 0x08740, 0x08744, 0x09680, 0x09681, + 0x09690, 0x0969b, 0x09740, 0x09745, 0x09750, 0x0975b, 0x09770, 0x097ef, + 0x09f00, 0x09f0f, 0x09f20, 0x09f23, 0x09f30, 0x09f31, 0x0a600, 0x0a600, + 0x0a603, 0x0a603, 0x0a610, 0x0a61f, 0x0a630, 0x0a632, 0x0a638, 0x0a63c, + 0x0a640, 0x0a67f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_slice_pipe_bv_registers), 8)); + +/* + * Block : ['BROADCAST', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF', 'GMUAO'] + * Block : ['GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM'] + * Block : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC'] + * REGION : UNSLICE + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_NONE + * pairs : 1 (Regs:1) + */ +static const u32 gen8_0_0_non_context_pipe_lpac_registers[] = { + 0x00e14, 0x00e14, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_pipe_lpac_registers), 8)); + +/* + * Block : ['RB'] + * REGION : UNSLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * pairs : 4 (Regs:26) + */ +static const u32 gen8_0_0_non_context_rb_pipe_br_rbp_registers[] = { + 0x08f00, 0x08f07, 0x08f10, 0x08f15, 0x08f20, 0x08f29, 0x08f30, 0x08f31, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_rb_pipe_br_rbp_registers), 8)); + +/* + * Block : ['RB'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * pairs : 5 (Regs:32) + */ +static const u32 gen8_0_0_non_context_rb_slice_pipe_br_rac_registers[] = { + 0x08e09, 0x08e0b, 0x08e10, 0x08e17, 0x08e51, 0x08e5a, 0x08e69, 0x08e6f, + 0x08ea0, 0x08ea3, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_rb_slice_pipe_br_rac_registers), 8)); + +/* + * Block : ['RB'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * pairs : 9 (Regs:28) + */ +static const u32 gen8_0_0_non_context_rb_slice_pipe_br_rbp_registers[] = { + 0x08e01, 0x08e01, 0x08e04, 0x08e04, 0x08e06, 0x08e08, 0x08e0c, 0x08e0c, + 0x08e18, 0x08e1c, 0x08e3b, 0x08e40, 0x08e50, 0x08e50, 0x08e5d, 0x08e5d, + 0x08e77, 0x08e7f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_rb_slice_pipe_br_rbp_registers), 8)); + +/* + * Block : ['SP'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * Cluster : CLUSTER_NONE + * Location: HLSQ_STATE + * pairs : 8 (Regs:34) + */ +static const u32 gen8_0_0_non_context_sp_pipe_none_hlsq_state_registers[] = { + 0x0ae05, 0x0ae05, 0x0ae10, 0x0ae13, 0x0ae15, 0x0ae16, 0x0ae52, 0x0ae52, + 0x0ae60, 0x0ae67, 0x0ae69, 0x0ae6e, 0x0ae70, 0x0ae75, 0x0aec0, 0x0aec5, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_none_hlsq_state_registers), 8)); + +/* + * Block : ['SP'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * Cluster : CLUSTER_NONE + * Location: SP_TOP + * pairs : 6 (Regs:60) + */ +static const u32 gen8_0_0_non_context_sp_pipe_none_sp_top_registers[] = { + 0x0ae00, 0x0ae0c, 0x0ae0f, 0x0ae0f, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3f, + 0x0ae50, 0x0ae52, 0x0ae80, 0x0aea3, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_none_sp_top_registers), 8)); + +/* + * Block : ['SP'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * Cluster : CLUSTER_NONE + * Location: USPTP + * pairs : 9 (Regs:64) + */ +static const u32 gen8_0_0_non_context_sp_pipe_none_usptp_registers[] = { + 0x0ae00, 0x0ae0c, 0x0ae0f, 0x0ae0f, 0x0ae17, 0x0ae19, 0x0ae30, 0x0ae32, + 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3b, 0x0ae3e, 0x0ae3f, 0x0ae50, 0x0ae52, + 0x0ae80, 0x0aea3, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_none_usptp_registers), 8)); + +/* + * Block : ['SP'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * Cluster : CLUSTER_NONE + * Location: HLSQ_DP_STR + * pairs : 5 (Regs:18) + */ +static const u32 gen8_0_0_non_context_sp_pipe_none_hlsq_dp_str_registers[] = { + 0x0ae05, 0x0ae05, 0x0ae60, 0x0ae65, 0x0ae6b, 0x0ae6c, 0x0ae73, 0x0ae75, + 0x0aec0, 0x0aec5, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_none_hlsq_dp_str_registers), 8)); + +/* + * Block : ['TPL1'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * Cluster : CLUSTER_NONE + * Location: USPTP + * pairs : 5 (Regs:48) + */ +static const u32 gen8_0_0_non_context_tpl1_pipe_none_usptp_registers[] = { + 0x0b600, 0x0b600, 0x0b602, 0x0b602, 0x0b604, 0x0b604, 0x0b606, 0x0b61e, + 0x0b620, 0x0b633, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_tpl1_pipe_none_usptp_registers), 8)); + +/* + * Block : ['GRAS'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_VPC_VS + * pairs : 8 (Regs:232) + */ +static const u32 gen8_0_0_gras_slice_pipe_br_cluster_vpc_vs_registers[] = { + 0x08200, 0x08213, 0x08220, 0x08225, 0x08230, 0x0823b, 0x08240, 0x0825f, + 0x08270, 0x0828f, 0x0829f, 0x082b7, 0x082d0, 0x0832f, 0x08500, 0x08508, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gras_slice_pipe_br_cluster_vpc_vs_registers), 8)); + +/* + * Block : ['GRAS'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_GRAS + * pairs : 13 (Regs:326) + */ +static const u32 gen8_0_0_gras_slice_pipe_br_cluster_gras_registers[] = { + 0x08080, 0x08080, 0x08086, 0x08092, 0x080c0, 0x080df, 0x08101, 0x08110, + 0x08130, 0x0814f, 0x08200, 0x08213, 0x08220, 0x08225, 0x08230, 0x0823b, + 0x08240, 0x0825f, 0x08270, 0x0828f, 0x0829f, 0x082b7, 0x082d0, 0x0832f, + 0x08500, 0x08508, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gras_slice_pipe_br_cluster_gras_registers), 8)); + +/* + * Block : ['GRAS'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_VPC_VS + * pairs : 8 (Regs:232) + */ +static const u32 gen8_0_0_gras_slice_pipe_bv_cluster_vpc_vs_registers[] = { + 0x08200, 0x08213, 0x08220, 0x08225, 0x08230, 0x0823b, 0x08240, 0x0825f, + 0x08270, 0x0828f, 0x0829f, 0x082b7, 0x082d0, 0x0832f, 0x08500, 0x08508, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gras_slice_pipe_bv_cluster_vpc_vs_registers), 8)); + +/* + * Block : ['GRAS'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_GRAS + * pairs : 13 (Regs:326) + */ +static const u32 gen8_0_0_gras_slice_pipe_bv_cluster_gras_registers[] = { + 0x08080, 0x08080, 0x08086, 0x08092, 0x080c0, 0x080df, 0x08101, 0x08110, + 0x08130, 0x0814f, 0x08200, 0x08213, 0x08220, 0x08225, 0x08230, 0x0823b, + 0x08240, 0x0825f, 0x08270, 0x0828f, 0x0829f, 0x082b7, 0x082d0, 0x0832f, + 0x08500, 0x08508, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gras_slice_pipe_bv_cluster_gras_registers), 8)); + +/* + * Block : ['PC'] + * REGION : UNSLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_FE_US + * pairs : 6 (Regs:35) + */ +static const u32 gen8_0_0_pc_pipe_br_cluster_fe_us_registers[] = { + 0x09805, 0x09807, 0x0980b, 0x0980b, 0x09812, 0x09817, 0x0981a, 0x0981b, + 0x09b00, 0x09b0d, 0x09b10, 0x09b18, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_pc_pipe_br_cluster_fe_us_registers), 8)); + +/* + * Block : ['PC'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_FE_S + * pairs : 2 (Regs:23) + */ +static const u32 gen8_0_0_pc_slice_pipe_br_cluster_fe_s_registers[] = { + 0x09b00, 0x09b0d, 0x09b10, 0x09b18, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_pc_slice_pipe_br_cluster_fe_s_registers), 8)); + +/* + * Block : ['PC'] + * REGION : UNSLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_FE_US + * pairs : 6 (Regs:35) + */ +static const u32 gen8_0_0_pc_pipe_bv_cluster_fe_us_registers[] = { + 0x09805, 0x09807, 0x0980b, 0x0980b, 0x09812, 0x09817, 0x0981a, 0x0981b, + 0x09b00, 0x09b0d, 0x09b10, 0x09b18, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_pc_pipe_bv_cluster_fe_us_registers), 8)); + +/* + * Block : ['PC'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_FE_S + * pairs : 2 (Regs:23) + */ +static const u32 gen8_0_0_pc_slice_pipe_bv_cluster_fe_s_registers[] = { + 0x09b00, 0x09b0d, 0x09b10, 0x09b18, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_pc_slice_pipe_bv_cluster_fe_s_registers), 8)); + +/* + * Block : ['VFD'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_FE_S + * pairs : 2 (Regs:236) + */ +static const u32 gen8_0_0_vfd_slice_pipe_br_cluster_fe_s_registers[] = { + 0x0a000, 0x0a009, 0x0a00e, 0x0a0ef, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_vfd_slice_pipe_br_cluster_fe_s_registers), 8)); + +/* + * Block : ['VFD'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_FE_S + * pairs : 2 (Regs:236) + */ +static const u32 gen8_0_0_vfd_slice_pipe_bv_cluster_fe_s_registers[] = { + 0x0a000, 0x0a009, 0x0a00e, 0x0a0ef, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_vfd_slice_pipe_bv_cluster_fe_s_registers), 8)); + +/* + * Block : ['VPC'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_FE_S + * pairs : 1 (Regs:27) + */ +static const u32 gen8_0_0_vpc_slice_pipe_br_cluster_fe_s_registers[] = { + 0x09300, 0x0931a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_vpc_slice_pipe_br_cluster_fe_s_registers), 8)); + +/* + * Block : ['VPC'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_VPC_VS + * pairs : 2 (Regs:29) + */ +static const u32 gen8_0_0_vpc_slice_pipe_br_cluster_vpc_vs_registers[] = { + 0x090c0, 0x090c1, 0x09300, 0x0931a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_vpc_slice_pipe_br_cluster_vpc_vs_registers), 8)); + +/* + * Block : ['VPC'] + * REGION : UNSLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_VPC_US + * pairs : 3 (Regs:58) + */ +static const u32 gen8_0_0_vpc_pipe_br_cluster_vpc_us_registers[] = { + 0x09180, 0x09180, 0x09182, 0x0919f, 0x09300, 0x0931a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_vpc_pipe_br_cluster_vpc_us_registers), 8)); + +/* + * Block : ['VPC'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_VPC_PS + * pairs : 4 (Regs:52) + */ +static const u32 gen8_0_0_vpc_slice_pipe_br_cluster_vpc_ps_registers[] = { + 0x09240, 0x0924f, 0x09252, 0x09255, 0x09278, 0x0927c, 0x09300, 0x0931a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_vpc_slice_pipe_br_cluster_vpc_ps_registers), 8)); + +/* + * Block : ['VPC'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_FE_S + * pairs : 1 (Regs:27) + */ +static const u32 gen8_0_0_vpc_slice_pipe_bv_cluster_fe_s_registers[] = { + 0x09300, 0x0931a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_vpc_slice_pipe_bv_cluster_fe_s_registers), 8)); + +/* + * Block : ['VPC'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_VPC_VS + * pairs : 2 (Regs:29) + */ +static const u32 gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_vs_registers[] = { + 0x090c0, 0x090c1, 0x09300, 0x0931a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_vs_registers), 8)); + +/* + * Block : ['VPC'] + * REGION : UNSLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_VPC_US + * pairs : 3 (Regs:58) + */ +static const u32 gen8_0_0_vpc_pipe_bv_cluster_vpc_us_registers[] = { + 0x09180, 0x09180, 0x09182, 0x0919f, 0x09300, 0x0931a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_vpc_pipe_bv_cluster_vpc_us_registers), 8)); + +/* + * Block : ['VPC'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_VPC_PS + * pairs : 4 (Regs:52) + */ +static const u32 gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_ps_registers[] = { + 0x09240, 0x0924f, 0x09252, 0x09255, 0x09278, 0x0927c, 0x09300, 0x0931a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_ps_registers), 8)); + +/* + * Block : ['RB'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_PS + * pairs : 36 (Regs:151) + */ +static const u32 gen8_0_0_rb_slice_pipe_br_cluster_ps_rac_registers[] = { + 0x08802, 0x08802, 0x08804, 0x0880a, 0x0880e, 0x08811, 0x08813, 0x08814, + 0x08818, 0x0881e, 0x08821, 0x08821, 0x08823, 0x08826, 0x08829, 0x08829, + 0x0882b, 0x0882e, 0x08831, 0x08831, 0x08833, 0x08836, 0x08839, 0x08839, + 0x0883b, 0x0883e, 0x08841, 0x08841, 0x08843, 0x08846, 0x08849, 0x08849, + 0x0884b, 0x0884e, 0x08851, 0x08851, 0x08853, 0x08856, 0x08859, 0x08859, + 0x0885b, 0x0885e, 0x08860, 0x08864, 0x08870, 0x08870, 0x08873, 0x08876, + 0x08878, 0x08879, 0x08882, 0x08885, 0x08887, 0x08889, 0x08891, 0x08891, + 0x08898, 0x08899, 0x088b0, 0x088cf, 0x088e5, 0x088e5, 0x088f4, 0x088f5, + 0x08930, 0x08937, 0x08c00, 0x08c01, 0x08c18, 0x08c1f, 0x08c26, 0x08c34, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_rb_slice_pipe_br_cluster_ps_rac_registers), 8)); + +/* + * Block : ['RB'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_PS + * pairs : 36 (Regs:104) + */ +static const u32 gen8_0_0_rb_slice_pipe_br_cluster_ps_rbp_registers[] = { + 0x08800, 0x08801, 0x08803, 0x08803, 0x0880b, 0x0880d, 0x08812, 0x08812, + 0x08815, 0x08816, 0x08820, 0x08820, 0x08822, 0x08822, 0x08827, 0x08828, + 0x0882a, 0x0882a, 0x0882f, 0x08830, 0x08832, 0x08832, 0x08837, 0x08838, + 0x0883a, 0x0883a, 0x0883f, 0x08840, 0x08842, 0x08842, 0x08847, 0x08848, + 0x0884a, 0x0884a, 0x0884f, 0x08850, 0x08852, 0x08852, 0x08857, 0x08858, + 0x0885a, 0x0885a, 0x0885f, 0x0885f, 0x08865, 0x08865, 0x08871, 0x08872, + 0x08877, 0x08877, 0x08880, 0x08881, 0x08886, 0x08886, 0x08890, 0x08890, + 0x088d0, 0x088e4, 0x088e6, 0x088e6, 0x088e8, 0x088ea, 0x088f0, 0x088f1, + 0x08900, 0x0891a, 0x08927, 0x08928, 0x08c17, 0x08c17, 0x08c20, 0x08c25, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_rb_slice_pipe_br_cluster_ps_rbp_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_VS + * Location: HLSQ_STATE + * pairs : 29 (Regs:114) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_registers[] = { + 0x0a800, 0x0a801, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824, + 0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a832, 0x0a835, 0x0a83a, 0x0a83a, + 0x0a83c, 0x0a83c, 0x0a83f, 0x0a841, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862, + 0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a872, 0x0a872, + 0x0a88c, 0x0a88e, 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898, + 0x0a89a, 0x0a89d, 0x0a8b0, 0x0a8bb, 0x0a8c0, 0x0a8c3, 0x0a974, 0x0a977, + 0x0ab00, 0x0ab03, 0x0ab05, 0x0ab05, 0x0ab09, 0x0ab09, 0x0ab23, 0x0ab23, + 0x0abd0, 0x0abff, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_VS + * Location: HLSQ_STATE + * pairs : 2 (Regs:34) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_cctx_registers[] = { + 0x0a8a0, 0x0a8af, 0x0ab0a, 0x0ab1b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED( + sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_cctx_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_VS + * Location: HLSQ_STATE + * pairs : 1 (Regs:160) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_shared_const_registers[] = { + 0x0ab30, 0x0abcf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED( + sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_shared_const_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_VS + * Location: SP_TOP + * pairs : 23 (Regs:39) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_registers[] = { + 0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a826, 0x0a826, + 0x0a82d, 0x0a82d, 0x0a82f, 0x0a831, 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, + 0x0a83e, 0x0a83e, 0x0a840, 0x0a840, 0x0a85c, 0x0a85d, 0x0a862, 0x0a864, + 0x0a866, 0x0a866, 0x0a868, 0x0a868, 0x0a870, 0x0a871, 0x0a88d, 0x0a88e, + 0x0a893, 0x0a895, 0x0a897, 0x0a897, 0x0a899, 0x0a899, 0x0ab00, 0x0ab00, + 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, 0x0ab09, 0x0ab09, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_VS + * Location: SP_TOP + * pairs : 2 (Regs:34) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_cctx_registers[] = { + 0x0a8a0, 0x0a8af, 0x0ab0a, 0x0ab1b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_cctx_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_VS + * Location: USPTP + * pairs : 15 (Regs:145) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_usptp_registers[] = { + 0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a82d, 0x0a82d, + 0x0a82f, 0x0a833, 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861, + 0x0a863, 0x0a868, 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a899, + 0x0a8c0, 0x0a8c3, 0x0a974, 0x0a977, 0x0ab00, 0x0ab07, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_usptp_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_VS + * Location: USPTP + * pairs : 1 (Regs:160) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_usptp_shared_const_registers[] = { + 0x0ab30, 0x0abcf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED( + sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_usptp_shared_const_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_PS + * Location: HLSQ_STATE + * pairs : 21 (Regs:88) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_registers[] = { + 0x0a980, 0x0a984, 0x0a99e, 0x0a99e, 0x0a9a7, 0x0a9a7, 0x0a9aa, 0x0a9aa, + 0x0a9af, 0x0a9b0, 0x0a9b2, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc, + 0x0a9c4, 0x0a9c4, 0x0a9c6, 0x0a9c6, 0x0a9cd, 0x0a9cd, 0x0a9fa, 0x0a9fc, + 0x0aa00, 0x0aa00, 0x0aa0d, 0x0aa12, 0x0aa30, 0x0aa31, 0x0aaf2, 0x0aaf3, + 0x0ab00, 0x0ab03, 0x0ab05, 0x0ab05, 0x0ab09, 0x0ab09, 0x0ab23, 0x0ab23, + 0x0abd0, 0x0abff, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_PS + * Location: HLSQ_STATE + * pairs : 2 (Regs:44) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_cctx_registers[] = { + 0x0a9e0, 0x0a9f9, 0x0ab0a, 0x0ab1b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED( + sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_cctx_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_PS + * Location: HLSQ_STATE + * pairs : 2 (Regs:320) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_shared_const_registers[] = { + 0x0aa40, 0x0aadf, 0x0ab30, 0x0abcf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED( + sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_shared_const_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_PS + * Location: HLSQ_DP + * pairs : 2 (Regs:13) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers[] = { + 0x0a9b1, 0x0a9b1, 0x0a9d4, 0x0a9df, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_PS + * Location: SP_TOP + * pairs : 17 (Regs:34) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_registers[] = { + 0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a9a2, 0x0a9a7, 0x0a9a8, + 0x0a9aa, 0x0a9ab, 0x0a9ae, 0x0a9ae, 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, + 0x0a9ba, 0x0a9bc, 0x0a9be, 0x0a9be, 0x0a9c5, 0x0a9c5, 0x0a9ce, 0x0a9ce, + 0x0aa00, 0x0aa03, 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, + 0x0ab09, 0x0ab09, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_PS + * Location: SP_TOP + * pairs : 2 (Regs:44) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers[] = { + 0x0a9e0, 0x0a9f9, 0x0ab0a, 0x0ab1b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_PS + * Location: USPTP + * pairs : 15 (Regs:89) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_usptp_registers[] = { + 0x0a980, 0x0a982, 0x0a985, 0x0a9a6, 0x0a9a8, 0x0a9a9, 0x0a9ab, 0x0a9ae, + 0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9bf, 0x0a9c2, 0x0a9c3, + 0x0a9c5, 0x0a9c5, 0x0a9cd, 0x0a9ce, 0x0a9d0, 0x0a9d3, 0x0aa01, 0x0aa0c, + 0x0aa30, 0x0aa31, 0x0aaf2, 0x0aaf3, 0x0ab00, 0x0ab07, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_usptp_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_PS + * Location: USPTP + * pairs : 2 (Regs:320) + */ +static const u32 gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_usptp_shared_const_registers[] = { + 0x0aa40, 0x0aadf, 0x0ab30, 0x0abcf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED( + sizeof(gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_usptp_shared_const_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_SP_VS + * Location: HLSQ_STATE + * pairs : 28 (Regs:112) + */ +static const u32 gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_registers[] = { + 0x0a800, 0x0a801, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824, + 0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a832, 0x0a835, 0x0a83a, 0x0a83a, + 0x0a83c, 0x0a83c, 0x0a83f, 0x0a841, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862, + 0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a872, 0x0a872, + 0x0a88c, 0x0a88e, 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898, + 0x0a89a, 0x0a89d, 0x0a8b0, 0x0a8bb, 0x0a8c0, 0x0a8c3, 0x0a974, 0x0a977, + 0x0ab00, 0x0ab02, 0x0ab09, 0x0ab09, 0x0ab23, 0x0ab23, 0x0abd0, 0x0abff, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_SP_VS + * Location: HLSQ_STATE + * pairs : 2 (Regs:34) + */ +static const u32 gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_cctx_registers[] = { + 0x0a8a0, 0x0a8af, 0x0ab0a, 0x0ab1b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED( + sizeof(gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_cctx_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_SP_VS + * Location: HLSQ_STATE + * pairs : 1 (Regs:160) + */ +static const u32 gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_shared_const_registers[] = { + 0x0ab30, 0x0abcf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED( + sizeof(gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_shared_const_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_SP_VS + * Location: SP_TOP + * pairs : 22 (Regs:37) + */ +static const u32 gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_registers[] = { + 0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a826, 0x0a826, + 0x0a82d, 0x0a82d, 0x0a82f, 0x0a831, 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, + 0x0a83e, 0x0a83e, 0x0a840, 0x0a840, 0x0a85c, 0x0a85d, 0x0a862, 0x0a864, + 0x0a866, 0x0a866, 0x0a868, 0x0a868, 0x0a870, 0x0a871, 0x0a88d, 0x0a88e, + 0x0a893, 0x0a895, 0x0a897, 0x0a897, 0x0a899, 0x0a899, 0x0ab00, 0x0ab00, + 0x0ab02, 0x0ab02, 0x0ab09, 0x0ab09, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_SP_VS + * Location: SP_TOP + * pairs : 2 (Regs:34) + */ +static const u32 gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_cctx_registers[] = { + 0x0a8a0, 0x0a8af, 0x0ab0a, 0x0ab1b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_cctx_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_SP_VS + * Location: USPTP + * pairs : 16 (Regs:142) + */ +static const u32 gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_registers[] = { + 0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a82d, 0x0a82d, + 0x0a82f, 0x0a833, 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861, + 0x0a863, 0x0a868, 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a899, + 0x0a8c0, 0x0a8c3, 0x0a974, 0x0a977, 0x0ab00, 0x0ab02, 0x0ab06, 0x0ab07, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_SP_VS + * Location: USPTP + * pairs : 1 (Regs:160) + */ +static const u32 gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_shared_const_registers[] = { + 0x0ab30, 0x0abcf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED( + sizeof(gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_shared_const_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_SP_PS + * Location: HLSQ_STATE + * pairs : 14 (Regs:73) + */ +static const u32 gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_registers[] = { + 0x0a9b0, 0x0a9b0, 0x0a9b2, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc, + 0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9fa, 0x0a9fc, 0x0aa00, 0x0aa00, + 0x0aa10, 0x0aa12, 0x0aa31, 0x0aa35, 0x0aaf3, 0x0aaf3, 0x0ab00, 0x0ab01, + 0x0ab23, 0x0ab23, 0x0abd0, 0x0abff, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED( + sizeof(gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_SP_PS + * Location: HLSQ_STATE + * pairs : 2 (Regs:22) + */ +static const u32 gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_cctx_registers[] = { + 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9f9, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED( + sizeof(gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_cctx_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_SP_PS + * Location: HLSQ_STATE + * pairs : 2 (Regs:320) + */ +static const u32 gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_shared_const_registers[] = { + 0x0aa40, 0x0aadf, 0x0ab30, 0x0abcf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED( + sizeof(gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_shared_const_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_SP_PS + * Location: HLSQ_DP + * pairs : 2 (Regs:13) + */ +static const u32 gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_dp_registers[] = { + 0x0a9b1, 0x0a9b1, 0x0a9d4, 0x0a9df, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_dp_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_SP_PS + * Location: SP_TOP + * pairs : 8 (Regs:13) + */ +static const u32 gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_registers[] = { + 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9bc, 0x0a9be, 0x0a9be, + 0x0a9c5, 0x0a9c5, 0x0a9ce, 0x0a9ce, 0x0aa00, 0x0aa00, 0x0ab00, 0x0ab00, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_SP_PS + * Location: SP_TOP + * pairs : 2 (Regs:22) + */ +static const u32 gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_cctx_registers[] = { + 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9f9, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED( + sizeof(gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_cctx_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_SP_PS + * Location: USPTP + * pairs : 11 (Regs:26) + */ +static const u32 gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_registers[] = { + 0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9be, 0x0a9c2, 0x0a9c3, + 0x0a9c5, 0x0a9c5, 0x0a9cd, 0x0a9ce, 0x0a9d0, 0x0a9d3, 0x0aa31, 0x0aa31, + 0x0aaf3, 0x0aaf3, 0x0ab00, 0x0ab01, 0x0ab06, 0x0ab06, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_registers), 8)); + +/* + * Block : ['SP'] + * REGION : SLICE + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_SP_PS + * Location: USPTP + * pairs : 2 (Regs:320) + */ +static const u32 gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_shared_const_registers[] = { + 0x0aa40, 0x0aadf, 0x0ab30, 0x0abcf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED( + sizeof(gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_shared_const_registers), 8)); + +/* + * Block : ['TPL1'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_VS + * Location: USPTP + * pairs : 3 (Regs:7) + */ +static const u32 gen8_0_0_tpl1_slice_pipe_br_cluster_sp_vs_usptp_registers[] = { + 0x0b300, 0x0b304, 0x0b307, 0x0b307, 0x0b309, 0x0b309, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_tpl1_slice_pipe_br_cluster_sp_vs_usptp_registers), 8)); + +/* + * Block : ['TPL1'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_PS + * Location: USPTP + * pairs : 6 (Regs:41) + */ +static const u32 gen8_0_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers[] = { + 0x0b180, 0x0b183, 0x0b190, 0x0b195, 0x0b2c0, 0x0b2d7, 0x0b300, 0x0b304, + 0x0b307, 0x0b307, 0x0b309, 0x0b309, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers), 8)); + +/* + * Block : ['TPL1'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_SP_VS + * Location: USPTP + * pairs : 3 (Regs:7) + */ +static const u32 gen8_0_0_tpl1_slice_pipe_bv_cluster_sp_vs_usptp_registers[] = { + 0x0b300, 0x0b304, 0x0b307, 0x0b307, 0x0b309, 0x0b309, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_tpl1_slice_pipe_bv_cluster_sp_vs_usptp_registers), 8)); + +/* + * Block : ['TPL1'] + * REGION : SLICE + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_SP_PS + * Location: USPTP + * pairs : 5 (Regs:7) + */ +static const u32 gen8_0_0_tpl1_slice_pipe_lpac_cluster_sp_ps_usptp_registers[] = { + 0x0b180, 0x0b181, 0x0b300, 0x0b301, 0x0b304, 0x0b304, 0x0b307, 0x0b307, + 0x0b309, 0x0b309, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_tpl1_slice_pipe_lpac_cluster_sp_ps_usptp_registers), 8)); + +static const struct sel_reg gen8_0_0_rb_rac_sel = { + .host_reg = GEN8_RB_SUB_BLOCK_SEL_CNTL_HOST, + .cd_reg = GEN8_RB_SUB_BLOCK_SEL_CNTL_CD, + .val = 0, +}; + +static const struct sel_reg gen8_0_0_rb_rbp_sel = { + .host_reg = GEN8_RB_SUB_BLOCK_SEL_CNTL_HOST, + .cd_reg = GEN8_RB_SUB_BLOCK_SEL_CNTL_CD, + .val = 0x9, +}; + +static struct gen8_cluster_registers gen8_0_0_cp_clusters[] = { + { CLUSTER_NONE, UNSLICE, PIPE_NONE, STATE_NON_CONTEXT, + gen8_0_0_cp_cp_pipe_none_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_cp_cp_pipe_br_registers, }, + { CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_cp_slice_cp_pipe_br_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_BV, STATE_NON_CONTEXT, + gen8_0_0_cp_cp_pipe_bv_registers, }, + { CLUSTER_NONE, SLICE, PIPE_BV, STATE_NON_CONTEXT, + gen8_0_0_cp_slice_cp_pipe_bv_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_LPAC, STATE_NON_CONTEXT, + gen8_0_0_cp_cp_pipe_lpac_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_AQE0, STATE_NON_CONTEXT, + gen8_0_0_cp_cp_pipe_aqe0_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_AQE1, STATE_NON_CONTEXT, + gen8_0_0_cp_cp_pipe_aqe1_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_DDE_BR, STATE_NON_CONTEXT, + gen8_0_0_cp_cp_pipe_dde_br_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_DDE_BV, STATE_NON_CONTEXT, + gen8_0_0_cp_cp_pipe_dde_bv_registers, }, +}; + +static struct gen8_cluster_registers gen8_0_0_mvc_clusters[] = { + { CLUSTER_NONE, UNSLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_pipe_br_registers, }, + { CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_slice_pipe_br_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_BV, STATE_NON_CONTEXT, + gen8_0_0_non_context_pipe_bv_registers, }, + { CLUSTER_NONE, SLICE, PIPE_BV, STATE_NON_CONTEXT, + gen8_0_0_non_context_slice_pipe_bv_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_LPAC, STATE_NON_CONTEXT, + gen8_0_0_non_context_pipe_lpac_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_rb_pipe_br_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_rb_slice_pipe_br_rac_registers, &gen8_0_0_rb_rac_sel, }, + { CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_rb_slice_pipe_br_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rac_registers, &gen8_0_0_rb_rac_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rac_registers, &gen8_0_0_rb_rac_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_gras_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_gras_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_gras_slice_pipe_br_cluster_gras_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_gras_slice_pipe_br_cluster_gras_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_gras_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_gras_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_gras_slice_pipe_bv_cluster_gras_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_gras_slice_pipe_bv_cluster_gras_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_pc_pipe_br_cluster_fe_us_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_pc_pipe_br_cluster_fe_us_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_pc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_pc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_pc_pipe_bv_cluster_fe_us_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_pc_pipe_bv_cluster_fe_us_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_pc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_pc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vfd_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vfd_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vfd_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vfd_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_pipe_br_cluster_vpc_us_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_pipe_br_cluster_vpc_us_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_ps_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_ps_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_pipe_bv_cluster_vpc_us_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_pipe_bv_cluster_vpc_us_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_ps_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_ps_registers, }, +}; + +static struct gen8_sptp_cluster_registers gen8_0_0_sptp_clusters[] = { + { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_NONE, 0, HLSQ_STATE, + gen8_0_0_non_context_sp_pipe_none_hlsq_state_registers, 0xae00}, + { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_NONE, 0, SP_TOP, + gen8_0_0_non_context_sp_pipe_none_sp_top_registers, 0xae00}, + { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_NONE, 0, USPTP, + gen8_0_0_non_context_sp_pipe_none_usptp_registers, 0xae00}, + { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_NONE, 0, HLSQ_DP_STR, + gen8_0_0_non_context_sp_pipe_none_hlsq_dp_str_registers, 0xae00}, + { CLUSTER_NONE, UNSLICE, 2, 2, TP0_NCTX_REG, PIPE_NONE, 0, USPTP, + gen8_0_0_non_context_tpl1_pipe_none_usptp_registers, 0xb600}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_usptp_shared_const_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_shared_const_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_cctx_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_shared_const_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_usptp_shared_const_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_cctx_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_shared_const_registers, + 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_DP, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_shared_const_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_cctx_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_shared_const_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 2, 2, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen8_0_0_tpl1_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xb000}, + { CLUSTER_SP_VS, SLICE, 2, 2, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + gen8_0_0_tpl1_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xb000}, + { CLUSTER_SP_VS, SLICE, 2, 2, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + gen8_0_0_tpl1_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xb000}, + { CLUSTER_SP_VS, SLICE, 2, 2, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, + gen8_0_0_tpl1_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, SLICE, 2, 2, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen8_0_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, SLICE, 2, 2, TP0_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + gen8_0_0_tpl1_slice_pipe_lpac_cluster_sp_ps_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, SLICE, 2, 2, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + gen8_0_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, SLICE, 2, 2, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP, + gen8_0_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, SLICE, 2, 2, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP, + gen8_0_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, +}; + +/* + * Before dumping the CP MVC + * Program CP_APERTURE_CNTL_* with pipeID={CP_PIPE} + * Then dump corresponding {Register_PIPE} + */ +static struct gen8_cp_indexed_reg gen8_0_0_cp_indexed_reg_list[] = { + { GEN8_CP_SQE_STAT_ADDR_PIPE, GEN8_CP_SQE_STAT_DATA_PIPE, UNSLICE, PIPE_BR, 0x00040}, + { GEN8_CP_SQE_STAT_ADDR_PIPE, GEN8_CP_SQE_STAT_DATA_PIPE, UNSLICE, PIPE_BV, 0x00040}, + { GEN8_CP_SQE_STAT_ADDR_PIPE, GEN8_CP_SQE_STAT_DATA_PIPE, UNSLICE, PIPE_LPAC, 0x00040}, + { GEN8_CP_SQE_STAT_ADDR_PIPE, GEN8_CP_SQE_STAT_DATA_PIPE, UNSLICE, PIPE_AQE0, 0x00040}, + { GEN8_CP_SQE_STAT_ADDR_PIPE, GEN8_CP_SQE_STAT_DATA_PIPE, UNSLICE, PIPE_AQE1, 0x00040}, + { GEN8_CP_SQE_STAT_ADDR_PIPE, GEN8_CP_SQE_STAT_DATA_PIPE, UNSLICE, PIPE_DDE_BR, 0x00040}, + { GEN8_CP_SQE_STAT_ADDR_PIPE, GEN8_CP_SQE_STAT_DATA_PIPE, UNSLICE, PIPE_DDE_BV, 0x00040}, + { GEN8_CP_DRAW_STATE_ADDR_PIPE, GEN8_CP_DRAW_STATE_DATA_PIPE, UNSLICE, PIPE_BR, 0x00200}, + { GEN8_CP_DRAW_STATE_ADDR_PIPE, GEN8_CP_DRAW_STATE_DATA_PIPE, UNSLICE, PIPE_BV, 0x00200}, + { GEN8_CP_DRAW_STATE_ADDR_PIPE, GEN8_CP_DRAW_STATE_DATA_PIPE, UNSLICE, PIPE_LPAC, 0x00200}, + { GEN8_CP_ROQ_DBG_ADDR_PIPE, GEN8_CP_ROQ_DBG_DATA_PIPE, UNSLICE, PIPE_BR, 0x00800}, + { GEN8_CP_ROQ_DBG_ADDR_PIPE, GEN8_CP_ROQ_DBG_DATA_PIPE, UNSLICE, PIPE_BV, 0x00800}, + { GEN8_CP_ROQ_DBG_ADDR_PIPE, GEN8_CP_ROQ_DBG_DATA_PIPE, UNSLICE, PIPE_LPAC, 0x00200}, + { GEN8_CP_ROQ_DBG_ADDR_PIPE, GEN8_CP_ROQ_DBG_DATA_PIPE, UNSLICE, PIPE_AQE0, 0x00100}, + { GEN8_CP_ROQ_DBG_ADDR_PIPE, GEN8_CP_ROQ_DBG_DATA_PIPE, UNSLICE, PIPE_AQE1, 0x00100}, + { GEN8_CP_ROQ_DBG_ADDR_PIPE, GEN8_CP_ROQ_DBG_DATA_PIPE, UNSLICE, PIPE_DDE_BR, 0x00100}, + { GEN8_CP_ROQ_DBG_ADDR_PIPE, GEN8_CP_ROQ_DBG_DATA_PIPE, UNSLICE, PIPE_DDE_BV, 0x00100}, + { GEN8_CP_SQE_UCODE_DBG_ADDR_PIPE, GEN8_CP_SQE_UCODE_DBG_DATA_PIPE, + UNSLICE, PIPE_BR, 0x08000}, + { GEN8_CP_SQE_UCODE_DBG_ADDR_PIPE, GEN8_CP_SQE_UCODE_DBG_DATA_PIPE, + UNSLICE, PIPE_BV, 0x08000}, + { GEN8_CP_SQE_UCODE_DBG_ADDR_PIPE, GEN8_CP_SQE_UCODE_DBG_DATA_PIPE, + UNSLICE, PIPE_LPAC, 0x08000}, + { GEN8_CP_SQE_UCODE_DBG_ADDR_PIPE, GEN8_CP_SQE_UCODE_DBG_DATA_PIPE, + UNSLICE, PIPE_AQE0, 0x08000}, + { GEN8_CP_SQE_UCODE_DBG_ADDR_PIPE, GEN8_CP_SQE_UCODE_DBG_DATA_PIPE, + UNSLICE, PIPE_AQE1, 0x08000}, + { GEN8_CP_SQE_UCODE_DBG_ADDR_PIPE, GEN8_CP_SQE_UCODE_DBG_DATA_PIPE, + UNSLICE, PIPE_DDE_BR, 0x08000}, + { GEN8_CP_SQE_UCODE_DBG_ADDR_PIPE, GEN8_CP_SQE_UCODE_DBG_DATA_PIPE, + UNSLICE, PIPE_DDE_BV, 0x08000}, + { GEN8_CP_RESOURCE_TABLE_DBG_ADDR_BV, GEN8_CP_RESOURCE_TABLE_DBG_DATA_BV, + UNSLICE, PIPE_NONE, 0x04100}, + { GEN8_CP_FIFO_DBG_ADDR_LPAC, GEN8_CP_FIFO_DBG_DATA_LPAC, UNSLICE, PIPE_NONE, 0x00040}, + { GEN8_CP_FIFO_DBG_ADDR_DDE_PIPE, GEN8_CP_FIFO_DBG_DATA_DDE_PIPE, + UNSLICE, PIPE_DDE_BR, 0x01100}, + { GEN8_CP_FIFO_DBG_ADDR_DDE_PIPE, GEN8_CP_FIFO_DBG_DATA_DDE_PIPE, + UNSLICE, PIPE_DDE_BV, 0x01100}, +}; + + +/* + * Before dumping the CP Mempool over the CP_*_MEM_POOL_DBG_ADDR/DATA + * indexed register pair it must be stabilized. + * for p in [PIPE_BR, PIPE_BV]: + * Program CP_APERTURE_CNTL_* with pipeID={p} sliceID={MAX_UINT} + * Program CP_CHICKEN_DBG_PIPE[crashStabilizeMVC] bit = 1. + * Dump CP_MEM_POOL_DBG_ADDR_PIPE for pipe=p + * Program CP_CHICKEN_DBG_PIPE[crashStabilizeMVC] bit = 0. + * + * same thing for CP_SLICE_MEM_POOL_DBG_ADDR_PIPE + * for p in [PIPE_BR, PIPE_BV]: + * for s in [0,1,2]: + * Program CP_APERTURE_CNTL_* with pipeID={p} sliceID={s} + * Program CP_CHICKEN_DBG_PIPE[crashStabilizeMVC] bit = 1. + * Program CP_SLICE_CHICKEN_DBG[crashStabilizeMVC] bit = 1. + * Dump CP_SLICE_MEM_POOL_DBG_ADDR_PIPE for pipe=p, sliceID=s + * Program CP_CHICKEN_DBG_PIPE[crashStabilizeMVC] bit = 0. + * Program CP_SLICE_CHICKEN_DBG[crashStabilizeMVC] bit = 0. + */ + +static struct gen8_cp_indexed_reg gen8_0_0_cp_mempool_reg_list[] = { + { GEN8_CP_MEM_POOL_DBG_ADDR_PIPE, GEN8_CP_MEM_POOL_DBG_DATA_PIPE, + UNSLICE, PIPE_BR, 0x02400}, + { GEN8_CP_MEM_POOL_DBG_ADDR_PIPE, GEN8_CP_MEM_POOL_DBG_DATA_PIPE, + UNSLICE, PIPE_BV, 0x02400}, + { GEN8_CP_SLICE_MEM_POOL_DBG_ADDR_PIPE, GEN8_CP_SLICE_MEM_POOL_DBG_DATA_PIPE, + SLICE, PIPE_BR, 0x02400}, + { GEN8_CP_SLICE_MEM_POOL_DBG_ADDR_PIPE, GEN8_CP_SLICE_MEM_POOL_DBG_DATA_PIPE, + SLICE, PIPE_BV, 0x02400}, +}; + +/* + * this is just a temp table to give hint of reg pointers + * which are not included so far in MVC and SPTP structs + */ +static struct gen8_reg_list gen8_0_0_reg_list[] = { + { UNSLICE, gen8_0_0_gpu_registers }, + { SLICE, gen8_0_0_gpu_slice_registers }, + { UNSLICE, gen8_0_0_cx_misc_registers }, + { UNSLICE, gen8_0_0_dbgc_registers }, + { SLICE, gen8_0_0_dbgc_slice_registers }, + { UNSLICE, gen8_0_0_cx_dbgc_registers }, + { UNSLICE, NULL}, +}; + +static struct gen8_reg_list gen8_0_0_ahb_registers[] = { + { UNSLICE, gen8_0_0_gbif_registers }, + { UNSLICE, gen8_0_0_ahb_precd_gpu_registers }, + { SLICE, gen8_0_0_ahb_precd_gpu_slice_slice_registers }, + { UNSLICE, gen8_0_0_ahb_secure_gpu_registers }, + { UNSLICE, gen8_0_0_ahb_secure_cp_cp_pipe_none_registers }, +}; + +static struct gen8_reg_list gen8_gmu_registers[] = { + { UNSLICE, gen8_0_0_gmu_registers }, + { UNSLICE, gen8_0_0_gmugx_registers }, + { SLICE, gen8_0_0_gmugx_slice_registers }, +}; + +/* + * Block : ['GDPM_LKG'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 9 (Regs:108) + */ +static const u32 gen8_0_0_gdpm_lkg_registers[] = { + 0x21c00, 0x21c00, 0x21c08, 0x21c09, 0x21c0e, 0x21c0f, 0x21c4f, 0x21c50, + 0x21c52, 0x21c52, 0x21c54, 0x21c56, 0x21c58, 0x21c5a, 0x21c5c, 0x21c60, + 0x22048, 0x220a0, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gdpm_lkg_registers), 8)); + +/* + * Block : ['GPU_CC_AHB2PHY_BROADCAST_SWMAN'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 1 (Regs:256) + */ +static const u32 gen8_0_0_gpu_cc_ahb2phy_broadcast_swman_registers[] = { + 0x24c00, 0x24cff, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gpu_cc_ahb2phy_broadcast_swman_registers), 8)); + +/* + * Block : ['GPU_CC_AHB2PHY_SWMAN'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 1 (Regs:6) + */ +static const u32 gen8_0_0_gpu_cc_ahb2phy_swman_registers[] = { + 0x24800, 0x24805, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gpu_cc_ahb2phy_swman_registers), 8)); + +/* + * Block : ['GPU_CC_GPU_CC_REG'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 26 (Regs:133) + */ +static const u32 gen8_0_0_gpu_cc_gpu_cc_reg_registers[] = { + 0x25000, 0x25002, 0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, + 0x26000, 0x26004, 0x26400, 0x26406, 0x26415, 0x2641d, 0x2641f, 0x26440, + 0x26443, 0x26444, 0x26478, 0x2647a, 0x26489, 0x2648a, 0x2649c, 0x2649e, + 0x264a0, 0x264a1, 0x264c5, 0x264c7, 0x264e8, 0x264ea, 0x264f9, 0x264fc, + 0x2650b, 0x2650b, 0x2651c, 0x2651e, 0x26540, 0x2654b, 0x26554, 0x26556, + 0x26558, 0x2655c, 0x2655e, 0x2655f, 0x26563, 0x26563, 0x2656d, 0x26573, + 0x26576, 0x26576, 0x26578, 0x2657a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gpu_cc_gpu_cc_reg_registers), 8)); + +/* + * Block : ['GPU_CC_PLL0_CM_PLL_TAYCAN_COMMON'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 1 (Regs:14) + */ +static const u32 gen8_0_0_gpu_cc_pll0_cm_pll_taycan_common_registers[] = { + 0x24000, 0x2400d, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gpu_cc_pll0_cm_pll_taycan_common_registers), 8)); + +/* + * Block : ['ACD_ACD_MND'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 11 (Regs:68) + */ +static const u32 gen8_0_0_acd_acd_mnd_registers[] = { + 0x1a400, 0x1a416, 0x1a420, 0x1a42d, 0x1a430, 0x1a431, 0x1a435, 0x1a435, + 0x1a437, 0x1a437, 0x1a43a, 0x1a43a, 0x1a442, 0x1a44b, 0x1a44e, 0x1a453, + 0x1a456, 0x1a458, 0x1a45b, 0x1a45d, 0x1a45f, 0x1a462, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_acd_acd_mnd_registers), 8)); + +/* + * Block : ['GX_CLKCTL_AHB2PHY_BROADCAST_SWMAN'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 1 (Regs:256) + */ +static const u32 gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers[] = { + 0x19c00, 0x19cff, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers), 8)); + +/* + * Block : ['GX_CLKCTL_AHB2PHY_SWMAN'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 1 (Regs:6) + */ +static const u32 gen8_0_0_gx_clkctl_ahb2phy_swman_registers[] = { + 0x19800, 0x19805, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gx_clkctl_ahb2phy_swman_registers), 8)); + +/* + * Block : ['GX_CLKCTL_PLL0_CM_PLL_TAYCAN_COMMON'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 1 (Regs:14) + */ +static const u32 gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers[] = { + 0x19000, 0x1900d, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers), 8)); + +/* + * Block : ['GX_CLKCTL_GX_CLKCTL_REG'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 11 (Regs:83) + */ +static const u32 gen8_0_0_gx_clkctl_gx_clkctl_reg_registers[] = { + 0x1a000, 0x1a004, 0x1a008, 0x1a014, 0x1a017, 0x1a017, 0x1a019, 0x1a019, + 0x1a022, 0x1a022, 0x1a024, 0x1a029, 0x1a03f, 0x1a05d, 0x1a060, 0x1a063, + 0x1a065, 0x1a066, 0x1a068, 0x1a076, 0x1a078, 0x1a07b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_gx_clkctl_gx_clkctl_reg_registers), 8)); + +/* + * Block : ['RSCC_RSC'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 101 (Regs:606) + */ +static const u32 gen8_0_0_rscc_rsc_registers[] = { + 0x14000, 0x14034, 0x14036, 0x14036, 0x14040, 0x14042, 0x14044, 0x14045, + 0x14047, 0x14047, 0x14080, 0x14084, 0x14089, 0x1408c, 0x14091, 0x14094, + 0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac, 0x140b1, 0x140b4, + 0x140b9, 0x140bc, 0x14100, 0x14104, 0x14114, 0x14119, 0x14124, 0x14132, + 0x14154, 0x1416b, 0x14340, 0x14341, 0x14344, 0x14344, 0x14346, 0x1437c, + 0x143f0, 0x143f8, 0x143fa, 0x143fe, 0x14400, 0x14404, 0x14406, 0x1440a, + 0x1440c, 0x14410, 0x14412, 0x14416, 0x14418, 0x1441c, 0x1441e, 0x14422, + 0x14424, 0x14424, 0x14498, 0x144a0, 0x144a2, 0x144a6, 0x144a8, 0x144ac, + 0x144ae, 0x144b2, 0x144b4, 0x144b8, 0x144ba, 0x144be, 0x144c0, 0x144c4, + 0x144c6, 0x144ca, 0x144cc, 0x144cc, 0x14540, 0x14548, 0x1454a, 0x1454e, + 0x14550, 0x14554, 0x14556, 0x1455a, 0x1455c, 0x14560, 0x14562, 0x14566, + 0x14568, 0x1456c, 0x1456e, 0x14572, 0x14574, 0x14574, 0x145e8, 0x145f0, + 0x145f2, 0x145f6, 0x145f8, 0x145fc, 0x145fe, 0x14602, 0x14604, 0x14608, + 0x1460a, 0x1460e, 0x14610, 0x14614, 0x14616, 0x1461a, 0x1461c, 0x1461c, + 0x14690, 0x14698, 0x1469a, 0x1469e, 0x146a0, 0x146a4, 0x146a6, 0x146aa, + 0x146ac, 0x146b0, 0x146b2, 0x146b6, 0x146b8, 0x146bc, 0x146be, 0x146c2, + 0x146c4, 0x146c4, 0x14738, 0x14740, 0x14742, 0x14746, 0x14748, 0x1474c, + 0x1474e, 0x14752, 0x14754, 0x14758, 0x1475a, 0x1475e, 0x14760, 0x14764, + 0x14766, 0x1476a, 0x1476c, 0x1476c, 0x147e0, 0x147e8, 0x147ea, 0x147ee, + 0x147f0, 0x147f4, 0x147f6, 0x147fa, 0x147fc, 0x14800, 0x14802, 0x14806, + 0x14808, 0x1480c, 0x1480e, 0x14812, 0x14814, 0x14814, 0x14888, 0x14890, + 0x14892, 0x14896, 0x14898, 0x1489c, 0x1489e, 0x148a2, 0x148a4, 0x148a8, + 0x148aa, 0x148ae, 0x148b0, 0x148b4, 0x148b6, 0x148ba, 0x148bc, 0x148bc, + 0x14930, 0x14938, 0x1493a, 0x1493e, 0x14940, 0x14944, 0x14946, 0x1494a, + 0x1494c, 0x14950, 0x14952, 0x14956, 0x14958, 0x1495c, 0x1495e, 0x14962, + 0x14964, 0x14964, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_rscc_rsc_registers), 8)); + +static const u32 *gen8_0_0_external_core_regs[] = { + gen8_0_0_gdpm_lkg_registers, + gen8_0_0_gpu_cc_ahb2phy_broadcast_swman_registers, + gen8_0_0_gpu_cc_ahb2phy_swman_registers, + gen8_0_0_gpu_cc_gpu_cc_reg_registers, + gen8_0_0_gpu_cc_pll0_cm_pll_taycan_common_registers, + gen8_0_0_acd_acd_mnd_registers, + gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers, + gen8_0_0_gx_clkctl_ahb2phy_swman_registers, + gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers, + gen8_0_0_gx_clkctl_gx_clkctl_reg_registers, +}; +#endif /*_ADRENO_GEN8_0_0_SNAPSHOT_H */ diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c new file mode 100644 index 0000000000..d005d0ec30 --- /dev/null +++ b/adreno_gen8_gmu_snapshot.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_gen8.h" +#include "adreno_gen8_gmu.h" +#include "adreno_gen8_0_0_snapshot.h" +#include "adreno_snapshot.h" +#include "gen8_reg.h" +#include "kgsl_device.h" + +size_t gen8_snapshot_gmu_mem(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_gmu_mem *mem_hdr = + (struct kgsl_snapshot_gmu_mem *)buf; + u32 *data = (u32 *)(buf + sizeof(*mem_hdr)); + struct gmu_mem_type_desc *desc = priv; + + if (priv == NULL || desc->memdesc->hostptr == NULL) + return 0; + + if (remain < desc->memdesc->size + sizeof(*mem_hdr)) { + dev_err(device->dev, + "snapshot: Not enough memory for the gmu section %d\n", + desc->type); + return 0; + } + + mem_hdr->type = desc->type; + mem_hdr->hostaddr = (u64)(uintptr_t)desc->memdesc->hostptr; + mem_hdr->gmuaddr = desc->memdesc->gmuaddr; + mem_hdr->gpuaddr = 0; + + /* The hw fence queues are mapped as iomem in the kernel */ + if (desc->type == SNAPSHOT_GMU_MEM_HW_FENCE) + memcpy_fromio(data, desc->memdesc->hostptr, desc->memdesc->size); + else + memcpy(data, desc->memdesc->hostptr, desc->memdesc->size); + + return desc->memdesc->size + sizeof(*mem_hdr); +} + +static size_t gen8_gmu_snapshot_dtcm(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_gmu_mem *mem_hdr = + (struct kgsl_snapshot_gmu_mem *)buf; + struct gen8_gmu_device *gmu = (struct gen8_gmu_device *)priv; + u32 *data = (u32 *)(buf + sizeof(*mem_hdr)); + u32 i; + + if (remain < gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr)) { + SNAPSHOT_ERR_NOMEM(device, "GMU DTCM Memory"); + return 0; + } + + mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK; + mem_hdr->hostaddr = 0; + mem_hdr->gmuaddr = gmu->vma[GMU_DTCM].start; + mem_hdr->gpuaddr = 0; + + /* + * Read of GMU TCMs over side-band debug controller interface is + * supported on gen8 family + * region [20]: Dump ITCM/DTCM. Select 1 for DTCM. + * autoInc [31]: Autoincrement the address field after each + * access to TCM_DBG_DATA + */ + kgsl_regwrite(device, GEN8_CX_DBGC_TCM_DBG_ADDR, BIT(20) | BIT(31)); + + for (i = 0; i < (gmu->vma[GMU_DTCM].size >> 2); i++) + kgsl_regread(device, GEN8_CX_DBGC_TCM_DBG_DATA, data++); + + return gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr); +} + +static size_t gen8_gmu_snapshot_itcm(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_gmu_mem *mem_hdr = + (struct kgsl_snapshot_gmu_mem *)buf; + void *dest = buf + sizeof(*mem_hdr); + struct gen8_gmu_device *gmu = (struct gen8_gmu_device *)priv; + + if (!gmu->itcm_shadow) { + dev_err(&gmu->pdev->dev, "No memory allocated for ITCM shadow capture\n"); + return 0; + } + + if (remain < gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr)) { + SNAPSHOT_ERR_NOMEM(device, "GMU ITCM Memory"); + return 0; + } + + mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK; + mem_hdr->hostaddr = 0; + mem_hdr->gmuaddr = gmu->vma[GMU_ITCM].start; + mem_hdr->gpuaddr = 0; + + memcpy(dest, gmu->itcm_shadow, gmu->vma[GMU_ITCM].size); + + return gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr); +} + +static void gen8_gmu_snapshot_memories(struct kgsl_device *device, + struct gen8_gmu_device *gmu, struct kgsl_snapshot *snapshot) +{ + struct gmu_mem_type_desc desc; + struct kgsl_memdesc *md; + int i; + + for (i = 0; i < ARRAY_SIZE(gmu->gmu_globals); i++) { + + md = &gmu->gmu_globals[i]; + if (!md->size) + continue; + + desc.memdesc = md; + if (md == gmu->hfi.hfi_mem) + desc.type = SNAPSHOT_GMU_MEM_HFI; + else if (md == gmu->gmu_log) + desc.type = SNAPSHOT_GMU_MEM_LOG; + else if (md == gmu->dump_mem) + desc.type = SNAPSHOT_GMU_MEM_DEBUG; + else if ((md == gmu->gmu_init_scratch) || (md == gmu->gpu_boot_scratch)) + desc.type = SNAPSHOT_GMU_MEM_WARMBOOT; + else + desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK; + + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GMU_MEMORY, + snapshot, gen8_snapshot_gmu_mem, &desc); + } +} + +struct kgsl_snapshot_gmu_version { + u32 type; + u32 value; +}; + +static size_t gen8_snapshot_gmu_version(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + u32 *data = (u32 *) (buf + sizeof(*header)); + struct kgsl_snapshot_gmu_version *ver = priv; + + if (remain < DEBUG_SECTION_SZ(1)) { + SNAPSHOT_ERR_NOMEM(device, "GMU Version"); + return 0; + } + + header->type = ver->type; + header->size = 1; + + *data = ver->value; + + return DEBUG_SECTION_SZ(1); +} + +static void gen8_gmu_snapshot_versions(struct kgsl_device *device, + struct gen8_gmu_device *gmu, + struct kgsl_snapshot *snapshot) +{ + int i; + + struct kgsl_snapshot_gmu_version gmu_vers[] = { + { .type = SNAPSHOT_DEBUG_GMU_CORE_VERSION, + .value = gmu->ver.core, }, + { .type = SNAPSHOT_DEBUG_GMU_CORE_DEV_VERSION, + .value = gmu->ver.core_dev, }, + { .type = SNAPSHOT_DEBUG_GMU_PWR_VERSION, + .value = gmu->ver.pwr, }, + { .type = SNAPSHOT_DEBUG_GMU_PWR_DEV_VERSION, + .value = gmu->ver.pwr_dev, }, + { .type = SNAPSHOT_DEBUG_GMU_HFI_VERSION, + .value = gmu->ver.hfi, }, + }; + + for (i = 0; i < ARRAY_SIZE(gmu_vers); i++) + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, gen8_snapshot_gmu_version, + &gmu_vers[i]); +} + +#define RSCC_OFFSET_DWORDS 0x14000 + +static size_t gen8_snapshot_rscc_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + const u32 *regs = priv; + u32 *data = (u32 *)buf; + int count = 0, k; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + /* Figure out how many registers we are going to dump */ + count = adreno_snapshot_regs_count(regs); + + if (remain < (count * 4)) { + SNAPSHOT_ERR_NOMEM(device, "RSCC REGISTERS"); + return 0; + } + + for (regs = priv; regs[0] != UINT_MAX; regs += 2) { + u32 cnt = REG_COUNT(regs); + + if (cnt == 1) { + *data++ = BIT(31) | regs[0]; + *data++ = __raw_readl(gmu->rscc_virt + + ((regs[0] - RSCC_OFFSET_DWORDS) << 2)); + continue; + } + *data++ = regs[0]; + *data++ = cnt; + for (k = regs[0]; k <= regs[1]; k++) + *data++ = __raw_readl(gmu->rscc_virt + + ((k - RSCC_OFFSET_DWORDS) << 2)); + } + + /* Return the size of the section */ + return (count * 4); +} + +/* + * gen8_gmu_device_snapshot() - GEN8 GMU snapshot function + * @device: Device being snapshotted + * @snapshot: Pointer to the snapshot instance + * + * This is where all of the GEN8 GMU specific bits and pieces are grabbed + * into the snapshot memory + */ +static void gen8_gmu_device_snapshot(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + const struct adreno_gen8_core *gpucore = to_gen8_core(ADRENO_DEVICE(device)); + const struct gen8_snapshot_block_list *gen8_snapshot_block_list = + gpucore->gen8_snapshot_block_list; + u32 i, slice, j; + struct gen8_reg_list_info info; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, + snapshot, gen8_gmu_snapshot_itcm, gmu); + + gen8_gmu_snapshot_versions(device, gmu, snapshot); + + gen8_gmu_snapshot_memories(device, gmu, snapshot); + + for (i = 0 ; i < gen8_snapshot_block_list->num_gmu_regs; i++) { + struct gen8_reg_list *regs = &gen8_snapshot_block_list->gmu_regs[i]; + + slice = regs->slice_region ? MAX_PHYSICAL_SLICES : 1; + for (j = 0 ; j < slice; j++) { + info.regs = regs; + info.slice_id = (slice > 1) ? j : UINT_MAX; + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, + gen8_legacy_snapshot_registers, &info); + } + } + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, + gen8_snapshot_rscc_registers, (void *) gen8_snapshot_block_list->rscc_regs); + + if (!gen8_gmu_gx_is_on(adreno_dev)) + goto dtcm; + + /* Set fence to ALLOW mode so registers can be read */ + kgsl_regwrite(device, GEN8_GMUAO_AHB_FENCE_CTRL, 0); + /* Make sure the previous write posted before reading */ + wmb(); + +dtcm: + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, + snapshot, gen8_gmu_snapshot_dtcm, gmu); +} + +void gen8_gmu_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* + * Dump external register first to have GPUCC and other external + * register in snapshot to analyze the system state even in partial + * snapshot dump + */ + gen8_snapshot_external_core_regs(device, snapshot); + + gen8_gmu_device_snapshot(device, snapshot); + + gen8_snapshot(adreno_dev, snapshot); + + gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, UINT_MAX); + gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_MASK, HFI_IRQ_MASK); +} diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index f609dc64d3..43f13a1b21 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -12,6 +12,7 @@ #include "adreno.h" #include "adreno_gen8.h" #include "adreno_gen8_hwsched.h" +#include "adreno_snapshot.h" #include "kgsl_bus.h" #include "kgsl_device.h" #include "kgsl_trace.h" @@ -62,6 +63,395 @@ void gen8_hwsched_fault(struct adreno_device *adreno_dev, u32 fault) adreno_hwsched_fault(adreno_dev, fault); } +static size_t gen8_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + struct kgsl_memdesc *rb = (struct kgsl_memdesc *)priv; + + if (remain < rb->size + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "RB"); + return 0; + } + + header->start = 0; + header->end = rb->size >> 2; + header->rptr = 0; + header->rbsize = rb->size >> 2; + header->count = rb->size >> 2; + header->timestamp_queued = 0; + header->timestamp_retired = 0; + header->gpuaddr = rb->gpuaddr; + header->id = 0; + + memcpy(data, rb->hostptr, rb->size); + + return rb->size + sizeof(*header); +} + +static void gen8_hwsched_snapshot_preemption_record(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset) +{ + struct kgsl_snapshot_section_header *section_header = + (struct kgsl_snapshot_section_header *)snapshot->ptr; + u8 *dest = snapshot->ptr + sizeof(*section_header); + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)dest; + const struct adreno_gen8_core *gen8_core = to_gen8_core(ADRENO_DEVICE(device)); + u64 ctxt_record_size = GEN8_CP_CTXRECORD_SIZE_IN_BYTES; + size_t section_size; + + if (gen8_core->ctxt_record_size) + ctxt_record_size = gen8_core->ctxt_record_size; + + ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size); + + section_size = sizeof(*section_header) + sizeof(*header) + ctxt_record_size; + if (snapshot->remain < section_size) { + SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); + return; + } + + section_header->magic = SNAPSHOT_SECTION_MAGIC; + section_header->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2; + section_header->size = section_size; + + header->size = ctxt_record_size >> 2; + header->gpuaddr = md->gpuaddr + offset; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + dest += sizeof(*header); + + memcpy(dest, md->hostptr + offset, ctxt_record_size); + + snapshot->ptr += section_header->size; + snapshot->remain -= section_header->size; + snapshot->size += section_header->size; +} + +static void snapshot_preemption_records(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md) +{ + const struct adreno_gen8_core *gen8_core = + to_gen8_core(ADRENO_DEVICE(device)); + u64 ctxt_record_size = GEN8_CP_CTXRECORD_SIZE_IN_BYTES; + u64 offset; + + if (gen8_core->ctxt_record_size) + ctxt_record_size = gen8_core->ctxt_record_size; + + /* All preemption records exist as a single mem alloc entry */ + for (offset = 0; offset < md->size; offset += ctxt_record_size) + gen8_hwsched_snapshot_preemption_record(device, snapshot, md, + offset); +} + +static void *get_rb_hostptr(struct adreno_device *adreno_dev, + u64 gpuaddr, u32 size) +{ + struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); + u64 offset; + u32 i; + + for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { + struct kgsl_memdesc *md = hw_hfi->mem_alloc_table[i].md; + + if (md && (gpuaddr >= md->gpuaddr) && + ((gpuaddr + size) <= (md->gpuaddr + md->size))) { + offset = gpuaddr - md->gpuaddr; + return md->hostptr + offset; + } + } + + return NULL; +} + +static u32 gen8_copy_gpu_global(void *out, void *in, u32 size) +{ + if (out && in) { + memcpy(out, in, size); + return size; + } + + return 0; +} + +static void adreno_hwsched_snapshot_rb_payload(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot, struct payload_section *payload) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_snapshot_section_header *section_header = + (struct kgsl_snapshot_section_header *)snapshot->ptr; + u8 *buf = snapshot->ptr + sizeof(*section_header); + struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + u32 size = gen8_hwsched_parse_payload(payload, KEY_RB_SIZEDWORDS) << 2; + u64 lo, hi, gpuaddr; + void *rb_hostptr; + char str[16]; + + lo = gen8_hwsched_parse_payload(payload, KEY_RB_GPUADDR_LO); + hi = gen8_hwsched_parse_payload(payload, KEY_RB_GPUADDR_HI); + gpuaddr = hi << 32 | lo; + + /* Sanity check to make sure there is enough for the header */ + if (snapshot->remain < sizeof(*section_header)) + goto err; + + rb_hostptr = get_rb_hostptr(adreno_dev, gpuaddr, size); + + /* If the gpuaddress and size don't match any allocation, then abort */ + if (((snapshot->remain - sizeof(*section_header)) < + (size + sizeof(*header))) || + !gen8_copy_gpu_global(data, rb_hostptr, size)) + goto err; + + if (device->dump_all_ibs) { + u64 rbaddr, lpac_rbaddr; + + kgsl_regread64(device, GEN8_CP_RB_BASE_LO_GC, + GEN8_CP_RB_BASE_HI_GC, &rbaddr); + kgsl_regread64(device, GEN8_CP_RB_BASE_LO_LPAC, + GEN8_CP_RB_BASE_HI_LPAC, &lpac_rbaddr); + + /* Parse all IBs from current RB */ + if ((rbaddr == gpuaddr) || (lpac_rbaddr == gpuaddr)) + adreno_snapshot_dump_all_ibs(device, rb_hostptr, snapshot); + } + + header->start = 0; + header->end = size >> 2; + header->rptr = gen8_hwsched_parse_payload(payload, KEY_RB_RPTR); + header->wptr = gen8_hwsched_parse_payload(payload, KEY_RB_WPTR); + header->rbsize = size >> 2; + header->count = size >> 2; + header->timestamp_queued = gen8_hwsched_parse_payload(payload, + KEY_RB_QUEUED_TS); + header->timestamp_retired = gen8_hwsched_parse_payload(payload, + KEY_RB_RETIRED_TS); + header->gpuaddr = gpuaddr; + header->id = gen8_hwsched_parse_payload(payload, KEY_RB_ID); + + section_header->magic = SNAPSHOT_SECTION_MAGIC; + section_header->id = KGSL_SNAPSHOT_SECTION_RB_V2; + section_header->size = size + sizeof(*header) + sizeof(*section_header); + + snapshot->ptr += section_header->size; + snapshot->remain -= section_header->size; + snapshot->size += section_header->size; + + return; +err: + snprintf(str, sizeof(str), "RB addr:0x%llx", gpuaddr); + SNAPSHOT_ERR_NOMEM(device, str); +} + +static bool parse_payload_rb_legacy(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + bool ret = false; + + /* Skip if we didn't receive a context bad HFI */ + if (!cmd->hdr) + return false; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd_legacy, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == PAYLOAD_RB) { + adreno_hwsched_snapshot_rb_payload(adreno_dev, + snapshot, payload); + ret = true; + } + + i += sizeof(*payload) + (payload->dwords << 2); + } + + return ret; +} + +static bool parse_payload_rb(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + bool ret = false; + + /* Skip if we didn't receive a context bad HFI */ + if (!cmd->hdr) + return false; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == PAYLOAD_RB) { + adreno_hwsched_snapshot_rb_payload(adreno_dev, + snapshot, payload); + ret = true; + } + + i += sizeof(*payload) + (payload->dwords << 2); + } + + return ret; +} + +static int snapshot_context_queue(int id, void *ptr, void *data) +{ + struct kgsl_snapshot *snapshot = data; + struct kgsl_context *context = ptr; + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + struct gmu_mem_type_desc desc; + + if (!context->gmu_registered) + return 0; + + desc.memdesc = &drawctxt->gmu_context_queue; + desc.type = SNAPSHOT_GMU_MEM_CONTEXT_QUEUE; + kgsl_snapshot_add_section(context->device, + KGSL_SNAPSHOT_SECTION_GMU_MEMORY, + snapshot, gen8_snapshot_gmu_mem, &desc); + + return 0; +} + +/* Snapshot AQE buffer */ +static size_t snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_memdesc *memdesc = priv; + + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)buf; + + u8 *ptr = buf + sizeof(*header); + + if (IS_ERR_OR_NULL(memdesc) || memdesc->size == 0) + return 0; + + if (remain < (memdesc->size + sizeof(*header))) { + SNAPSHOT_ERR_NOMEM(device, "AQE BUFFER"); + return 0; + } + + header->size = memdesc->size >> 2; + header->gpuaddr = memdesc->gpuaddr; + header->ptbase = MMU_DEFAULT_TTBR0(device); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + memcpy(ptr, memdesc->hostptr, memdesc->size); + + return memdesc->size + sizeof(*header); +} + +void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); + bool skip_memkind_rb = false; + u32 i; + bool parse_payload; + + gen8_gmu_snapshot(adreno_dev, snapshot); + + adreno_hwsched_parse_fault_cmdobj(adreno_dev, snapshot); + + /* + * First try to dump ringbuffers using context bad HFI payloads + * because they have all the ringbuffer parameters. If ringbuffer + * payloads are not present, fall back to dumping ringbuffers + * based on MEMKIND_RB + */ + if (GMU_VER_MINOR(gmu->ver.hfi) < 2) + parse_payload = parse_payload_rb_legacy(adreno_dev, snapshot); + else + parse_payload = parse_payload_rb(adreno_dev, snapshot); + + if (parse_payload) + skip_memkind_rb = true; + + for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { + struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i]; + + if (entry->desc.mem_kind == HFI_MEMKIND_RB && !skip_memkind_rb) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_RB_V2, + snapshot, gen8_hwsched_snapshot_rb, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_SCRATCH) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_PROFILE) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_CSW_SMMU_INFO) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_CSW_PRIV_NON_SECURE) + snapshot_preemption_records(device, snapshot, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_PREEMPT_SCRATCH) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_AQE_BUFFER) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, snapshot_aqe_buffer, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_HW_FENCE) { + struct gmu_mem_type_desc desc; + + desc.memdesc = entry->md; + desc.type = SNAPSHOT_GMU_MEM_HW_FENCE; + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GMU_MEMORY, + snapshot, gen8_snapshot_gmu_mem, &desc); + } + + } + + if (!adreno_hwsched_context_queue_enabled(adreno_dev)) + return; + + read_lock(&device->context_lock); + idr_for_each(&device->context_idr, snapshot_context_queue, snapshot); + read_unlock(&device->context_lock); +} + static int gmu_clock_set_rate(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); diff --git a/adreno_gen8_hwsched.h b/adreno_gen8_hwsched.h index 6c790c6698..0d166f8114 100644 --- a/adreno_gen8_hwsched.h +++ b/adreno_gen8_hwsched.h @@ -40,6 +40,16 @@ int gen8_hwsched_probe(struct platform_device *pdev, */ int gen8_hwsched_reset_replay(struct adreno_device *adreno_dev); +/** + * gen8_hwsched_snapshot - take gen8 hwsched snapshot + * @adreno_dev: Pointer to the adreno device + * @snapshot: Pointer to the snapshot instance + * + * Snapshot the faulty ib and then snapshot rest of gen8 gmu things + */ +void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + /** * gen8_hwsched_handle_watchdog - Handle watchdog interrupt * @adreno_dev: Pointer to the adreno device diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c new file mode 100644 index 0000000000..9c515026c4 --- /dev/null +++ b/adreno_gen8_snapshot.c @@ -0,0 +1,1693 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_gen8_0_0_snapshot.h" +#include "adreno_snapshot.h" + +static struct kgsl_memdesc *gen8_capturescript; +static struct kgsl_memdesc *gen8_crashdump_registers; +static u32 *gen8_cd_reg_end; +static const struct gen8_snapshot_block_list *gen8_snapshot_block_list; +static bool gen8_crashdump_timedout; + +/* Starting kernel virtual address for QDSS TMC register block */ +static void __iomem *tmc_virt; + +const struct gen8_snapshot_block_list gen8_0_0_snapshot_block_list = { + .pre_crashdumper_regs = gen8_0_0_ahb_registers, + .num_pre_crashdumper_regs = ARRAY_SIZE(gen8_0_0_ahb_registers), + .debugbus_blocks = gen8_debugbus_blocks, + .debugbus_blocks_len = ARRAY_SIZE(gen8_debugbus_blocks), + .gbif_debugbus_blocks = gen8_gbif_debugbus_blocks, + .gbif_debugbus_blocks_len = ARRAY_SIZE(gen8_gbif_debugbus_blocks), + .cx_debugbus_blocks = gen8_cx_debugbus_blocks, + .cx_debugbus_blocks_len = ARRAY_SIZE(gen8_cx_debugbus_blocks), + .external_core_regs = gen8_0_0_external_core_regs, + .num_external_core_regs = ARRAY_SIZE(gen8_0_0_external_core_regs), + .gmu_regs = gen8_gmu_registers, + .num_gmu_regs = ARRAY_SIZE(gen8_gmu_registers), + .rscc_regs = gen8_0_0_rscc_rsc_registers, + .reg_list = gen8_0_0_reg_list, + .shader_blocks = gen8_0_0_shader_blocks, + .num_shader_blocks = ARRAY_SIZE(gen8_0_0_shader_blocks), + .cp_clusters = gen8_0_0_cp_clusters, + .num_cp_clusters = ARRAY_SIZE(gen8_0_0_cp_clusters), + .clusters = gen8_0_0_mvc_clusters, + .num_clusters = ARRAY_SIZE(gen8_0_0_mvc_clusters), + .sptp_clusters = gen8_0_0_sptp_clusters, + .num_sptp_clusters = ARRAY_SIZE(gen8_0_0_sptp_clusters), + .index_registers = gen8_0_0_cp_indexed_reg_list, + .index_registers_len = ARRAY_SIZE(gen8_0_0_cp_indexed_reg_list), + .mempool_index_registers = gen8_0_0_cp_mempool_reg_list, + .mempool_index_registers_len = ARRAY_SIZE(gen8_0_0_cp_mempool_reg_list), +}; + +#define GEN8_SP_READ_SEL_VAL(_sliceid, _location, _pipe, _statetype, _usptp, _sptp) \ + (FIELD_PREP(GENMASK(25, 21), _sliceid) | \ + FIELD_PREP(GENMASK(20, 18), _location) | \ + FIELD_PREP(GENMASK(17, 16), _pipe) | \ + FIELD_PREP(GENMASK(15, 8), _statetype) | \ + FIELD_PREP(GENMASK(7, 4), _usptp) | \ + FIELD_PREP(GENMASK(3, 0), _sptp)) + +#define GEN8_CP_APERTURE_REG_VAL(_sliceid, _pipe, _cluster, _context) \ + (FIELD_PREP(GENMASK(23, 23), 1) | \ + FIELD_PREP(GENMASK(18, 16), _sliceid) | \ + FIELD_PREP(GENMASK(15, 12), _pipe) | \ + FIELD_PREP(GENMASK(11, 8), _cluster) | \ + FIELD_PREP(GENMASK(5, 4), _context)) + +#define GEN8_DEBUGBUS_SECTION_SIZE (sizeof(struct kgsl_snapshot_debugbus) \ + + (GEN8_DEBUGBUS_BLOCK_SIZE << 3)) + +#define CD_REG_END 0xaaaaaaaa + +#define NUMBER_OF_SLICES(is_sliced) (is_sliced ? MAX_PHYSICAL_SLICES : 1) +#define SLICE_ID(slices, j) ((slices > 1) ? j : UINT_MAX) + +static u32 CD_WRITE(u64 *ptr, u32 offset, u64 val) +{ + ptr[0] = val; + ptr[1] = FIELD_PREP(GENMASK(63, 44), offset) | BIT(21) | BIT(0); + + return 2; +} + +static u32 CD_READ(u64 *ptr, u32 offset, u32 size, u64 target) +{ + ptr[0] = target; + ptr[1] = FIELD_PREP(GENMASK(63, 44), offset) | size; + + return 2; +} + +static void CD_FINISH(u64 *ptr, u32 offset) +{ + gen8_cd_reg_end = gen8_crashdump_registers->hostptr + offset; + *gen8_cd_reg_end = CD_REG_END; + ptr[0] = gen8_crashdump_registers->gpuaddr + offset; + ptr[1] = FIELD_PREP(GENMASK(63, 44), GEN8_CP_CRASH_DUMP_STATUS) | BIT(0); + ptr[2] = 0; + ptr[3] = 0; +} + +static bool CD_SCRIPT_CHECK(struct kgsl_device *device) +{ + return (adreno_smmu_is_stalled(ADRENO_DEVICE(device)) || + (!device->snapshot_crashdumper) || + IS_ERR_OR_NULL(gen8_capturescript) || + IS_ERR_OR_NULL(gen8_crashdump_registers) || + gen8_crashdump_timedout); +} + +static bool _gen8_do_crashdump(struct kgsl_device *device) +{ + u32 reg = 0; + ktime_t timeout; + + kgsl_regwrite(device, GEN8_CP_CRASH_DUMP_SCRIPT_BASE_LO, + lower_32_bits(gen8_capturescript->gpuaddr)); + kgsl_regwrite(device, GEN8_CP_CRASH_DUMP_SCRIPT_BASE_HI, + upper_32_bits(gen8_capturescript->gpuaddr)); + kgsl_regwrite(device, GEN8_CP_CRASH_DUMP_CNTL, 1); + + timeout = ktime_add_ms(ktime_get(), CP_CRASH_DUMPER_TIMEOUT); + + if (!device->snapshot_atomic) + might_sleep(); + for (;;) { + /* make sure we're reading the latest value */ + rmb(); + if ((*gen8_cd_reg_end) != CD_REG_END) + break; + if (ktime_compare(ktime_get(), timeout) > 0) + break; + /* Wait 1msec to avoid unnecessary looping */ + if (!device->snapshot_atomic) + usleep_range(100, 1000); + } + + kgsl_regread(device, GEN8_CP_CRASH_DUMP_STATUS, ®); + + /* + * Writing to the GEN8_CP_CRASH_DUMP_CNTL also resets the + * GEN8_CP_CRASH_DUMP_STATUS. Make sure the read above is + * complete before we change the value + */ + rmb(); + + kgsl_regwrite(device, GEN8_CP_CRASH_DUMP_CNTL, 0); + + if (WARN(!(reg & 0x2), "Crashdumper timed out\n")) { + /* + * Gen7 crash dumper script is broken down into multiple chunks + * and script will be invoked multiple times to capture snapshot + * of different sections of GPU. If crashdumper fails once, it is + * highly likely it will fail subsequently as well. Hence update + * gen8_crashdump_timedout variable to avoid running crashdumper + * after it fails once. + */ + gen8_crashdump_timedout = true; + return false; + } + + return true; +} + +size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct gen8_reg_list_info *info = (struct gen8_reg_list_info *)priv; + const u32 *ptr = info->regs->regs; + struct kgsl_snapshot_mvc_regs_v3 *header = + (struct kgsl_snapshot_mvc_regs_v3 *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + u32 size = adreno_snapshot_regs_count(ptr) * sizeof(*data); + u32 count, k; + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + header->ctxt_id = 0; + header->cluster_id = CLUSTER_NONE; + header->pipe_id = PIPE_NONE; + header->location_id = UINT_MAX; + header->sp_id = UINT_MAX; + header->usptp_id = UINT_MAX; + header->slice_id = info->slice_id; + + if (info->regs->sel) + kgsl_regwrite(device, info->regs->sel->host_reg, info->regs->sel->val); + + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL + (info->slice_id, 0, 0, 0)); + + for (ptr = info->regs->regs; ptr[0] != UINT_MAX; ptr += 2) { + count = REG_COUNT(ptr); + + if (count == 1) + *data++ = ptr[0]; + else { + *data++ = ptr[0] | (1 << 31); + *data++ = ptr[1]; + } + for (k = ptr[0]; k <= ptr[1]; k++) + kgsl_regread(device, k, data++); + } + + return (size + sizeof(*header)); +} + +static size_t gen8_snapshot_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct gen8_reg_list_info *info = (struct gen8_reg_list_info *)priv; + const u32 *ptr = info->regs->regs; + struct kgsl_snapshot_mvc_regs_v3 *header = + (struct kgsl_snapshot_mvc_regs_v3 *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + u32 *src; + u32 cnt; + u32 size = adreno_snapshot_regs_count(ptr) * sizeof(*data); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + header->ctxt_id = 0; + header->cluster_id = CLUSTER_NONE; + header->pipe_id = PIPE_NONE; + header->location_id = UINT_MAX; + header->sp_id = UINT_MAX; + header->usptp_id = UINT_MAX; + header->slice_id = info->slice_id; + + src = gen8_crashdump_registers->hostptr + info->offset; + + for (ptr = info->regs->regs; ptr[0] != UINT_MAX; ptr += 2) { + cnt = REG_COUNT(ptr); + + if (cnt == 1) + *data++ = ptr[0]; + else { + *data++ = BIT(31) | ptr[0]; + *data++ = ptr[1]; + } + memcpy(data, src, cnt << 2); + data += cnt; + src += cnt; + } + + /* Return the size of the section */ + return size; +} + +static size_t gen8_legacy_snapshot_shader(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_shader_v3 *header = + (struct kgsl_snapshot_shader_v3 *) buf; + struct gen8_shader_block_info *info = (struct gen8_shader_block_info *) priv; + struct gen8_shader_block *block = info->block; + u32 *data = (u32 *)(buf + sizeof(*header)); + u32 read_sel, i; + + if (remain < (sizeof(*header) + (block->size << 2))) { + SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); + return 0; + } + + header->type = block->statetype; + header->slice_id = info->slice_id; + header->sp_index = info->sp_id; + header->usptp = info->usptp; + header->pipe_id = block->pipeid; + header->location = block->location; + header->ctxt_id = 1; + header->size = block->size; + + read_sel = GEN8_SP_READ_SEL_VAL(info->slice_id, block->location, block->pipeid, + block->statetype, info->usptp, info->sp_id); + + kgsl_regwrite(device, GEN8_SP_READ_SEL, read_sel); + + /* + * An explicit barrier is needed so that reads do not happen before + * the register write. + */ + mb(); + + for (i = 0; i < block->size; i++) + data[i] = kgsl_regmap_read(&device->regmap, GEN8_SP_AHB_READ_APERTURE + i); + + return (sizeof(*header) + (block->size << 2)); +} + +static size_t gen8_snapshot_shader_memory(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_shader_v3 *header = + (struct kgsl_snapshot_shader_v3 *) buf; + struct gen8_shader_block_info *info = (struct gen8_shader_block_info *) priv; + struct gen8_shader_block *block = info->block; + u32 *data = (u32 *) (buf + sizeof(*header)); + + if (remain < (sizeof(*header) + (block->size << 2))) { + SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); + return 0; + } + + header->type = block->statetype; + header->slice_id = info->slice_id; + header->sp_index = info->sp_id; + header->usptp = info->usptp; + header->pipe_id = block->pipeid; + header->location = block->location; + header->ctxt_id = 1; + header->size = block->size; + + memcpy(data, gen8_crashdump_registers->hostptr + info->offset, + (block->size << 2)); + + return (sizeof(*header) + (block->size << 2)); +} + +static void qdss_regwrite(void __iomem *regbase, u32 offsetbytes, u32 value) +{ + void __iomem *reg; + + reg = regbase + offsetbytes; + + /* Ensure previous write is committed */ + wmb(); + __raw_writel(value, reg); +} + +static u32 qdss_regread(void __iomem *regbase, u32 offsetbytes) +{ + void __iomem *reg; + u32 val; + + reg = regbase + offsetbytes; + val = __raw_readl(reg); + + /* Make sure memory is updated before next access */ + rmb(); + return val; +} + +static size_t gen8_snapshot_trace_buffer_gfx_trace(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + u32 start_idx = 0, status = 0, count = 0, wrap_count = 0, write_ptr = 0; + struct kgsl_snapshot_trace_buffer *header = + (struct kgsl_snapshot_trace_buffer *) buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + struct gen8_trace_buffer_info *info = + (struct gen8_trace_buffer_info *) priv; + + if (remain < SZ_2K + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "TRACE 2K BUFFER"); + return 0; + } + + memcpy(header->ping_blk, info->ping_blk, sizeof(header->ping_blk)); + memcpy(header->ping_idx, info->ping_idx, sizeof(header->ping_idx)); + header->granularity = info->granularity; + header->segment = info->segment; + header->dbgc_ctrl = info->dbgc_ctrl; + + /* Read the status of trace buffer to determine if it's full or empty */ + kgsl_regread(device, GEN8_DBGC_TRACE_BUFFER_STATUS, &status); + + /* + * wrap_count and write ptr are part of status. + * if status is 0 => wrap_count = 0 and write ptr = 0 buffer is empty. + * if status is non zero and wrap count is 0 read partial buffer. + * if wrap count in non zero read entier 2k buffer. + * Always read the oldest data available. + */ + + /* if status is 0 then buffer is empty */ + if (!status) { + header->size = 0; + return sizeof(*header); + } + + /* Number of times the circular buffer has wrapped around */ + wrap_count = FIELD_GET(GENMASK(31, 12), status); + write_ptr = FIELD_GET(GENMASK(8, 0), status); + + /* Read partial buffer starting from 0 */ + if (!wrap_count) { + /* No of dwords to read : (write ptr - 0) of indexed register */ + count = write_ptr; + header->size = count << 2; + start_idx = 0; + } else { + /* Read entire 2k buffer starting from write ptr */ + start_idx = write_ptr + 1; + count = SZ_512; + header->size = SZ_2K; + } + + kgsl_regmap_read_indexed_interleaved(&device->regmap, + GEN8_DBGC_DBG_TRACE_BUFFER_RD_ADDR, GEN8_DBGC_DBG_TRACE_BUFFER_RD_DATA, data, + start_idx, count); + + return (sizeof(*header) + header->size); +} + +static size_t gen8_snapshot_trace_buffer_etb(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + u32 read_ptr, count, write_ptr, val, idx = 0; + struct kgsl_snapshot_trace_buffer *header = (struct kgsl_snapshot_trace_buffer *) buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + struct gen8_trace_buffer_info *info = (struct gen8_trace_buffer_info *) priv; + + /* Unlock ETB buffer */ + qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_LAR, 0xC5ACCE55); + + /* Make sure unlock goes through before proceeding further */ + mb(); + + /* Flush the QDSS pipeline to ensure completion of pending write to buffer */ + val = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_FFCR); + qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_FFCR, val | 0x40); + + /* Make sure pipeline is flushed before we get read and write pointers */ + mb(); + + /* Disable ETB */ + qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_CTRL, 0); + + /* Set to circular mode */ + qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_MODE, 0); + + /* Ensure buffer is set to circular mode before accessing it */ + mb(); + + /* Size of buffer is specified in register TMC_RSZ */ + count = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_RSZ) << 2; + read_ptr = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_RRP); + write_ptr = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_RWP); + + /* ETB buffer if full read_ptr will be equal to write_ptr else write_ptr leads read_ptr */ + count = (read_ptr == write_ptr) ? count : (write_ptr - read_ptr); + + if (remain < count + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "ETB BUFFER"); + return 0; + } + + /* + * Read pointer is 4 byte aligned and write pointer is 2 byte aligned + * We read 4 bytes of data in one iteration below so aligin it down + * to 4 bytes. + */ + count = ALIGN_DOWN(count, 4); + + header->size = count; + header->dbgc_ctrl = info->dbgc_ctrl; + memcpy(header->ping_blk, info->ping_blk, sizeof(header->ping_blk)); + memcpy(header->ping_idx, info->ping_idx, sizeof(header->ping_idx)); + header->granularity = info->granularity; + header->segment = info->segment; + + while (count != 0) { + /* This indexed register auto increments index as we read */ + data[idx++] = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_RRD); + count = count - 4; + } + + return (sizeof(*header) + header->size); +} + +static void gen8_snapshot_trace_buffer(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + u32 val_tmc_ctrl = 0, val_etr_ctrl = 0, val_etr1_ctrl = 0; + u32 i = 0, sel_gx = 0, sel_cx = 0, val_gx = 0, val_cx = 0, val = 0; + struct gen8_trace_buffer_info info; + struct resource *res1, *res2; + struct clk *clk; + int ret; + void __iomem *etr_virt; + + /* + * Data can be collected from CX_DBGC or DBGC and it's mutually exclusive. + * Read the necessary select registers and determine the source of data. + * This loop reads SEL_A to SEL_D of both CX_DBGC and DBGC and accordingly + * updates the header information of trace buffer section. + */ + for (i = 0; i < TRACE_BUF_NUM_SIG; i++) { + kgsl_regread(device, GEN8_DBGC_CFG_DBGBUS_SEL_A + i, &sel_gx); + kgsl_regread(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A + i, &sel_cx); + val_gx |= sel_gx; + val_cx |= sel_cx; + info.ping_idx[i] = FIELD_GET(GENMASK(7, 0), (sel_gx | sel_cx)); + info.ping_blk[i] = FIELD_GET(GENMASK(24, 16), (sel_gx | sel_cx)); + } + + /* Zero the header if not programmed to export any buffer */ + if (!val_gx && !val_cx) { + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_TRACE_BUFFER, + snapshot, NULL, &info); + return; + } + + /* Enable APB clock to read data from trace buffer */ + clk = clk_get(&device->pdev->dev, "apb_pclk"); + + if (IS_ERR(clk)) { + dev_err(device->dev, "Unable to get QDSS clock\n"); + return; + } + + ret = clk_prepare_enable(clk); + + if (ret) { + dev_err(device->dev, "QDSS Clock enable error: %d\n", ret); + clk_put(clk); + return; + } + + res1 = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "qdss_etr"); + res2 = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "qdss_tmc"); + + if (!res1 || !res2) + goto err_clk_put; + + etr_virt = ioremap(res1->start, resource_size(res1)); + tmc_virt = ioremap(res2->start, resource_size(res2)); + + if (!etr_virt || !tmc_virt) + goto err_unmap; + + /* + * Update header information based on source of data, read necessary CNTLT registers + * for granularity and segment information. + */ + if (val_gx) { + info.dbgc_ctrl = GX_DBGC; + kgsl_regread(device, GEN8_DBGC_CFG_DBGBUS_CNTLT, &val); + } else { + info.dbgc_ctrl = CX_DBGC; + kgsl_regread(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, &val); + } + + info.granularity = FIELD_GET(GENMASK(14, 12), val); + info.segment = FIELD_GET(GENMASK(31, 28), val); + + val_tmc_ctrl = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_CTRL); + + /* + * Incase TMC CTRL is 0 and val_cx is non zero dump empty buffer. + * Incase TMC CTRL is 0 and val_gx is non zero dump 2k gfx buffer. + * 2k buffer is not present for CX blocks. + * Incase both ETR's CTRL is 0 Dump ETB QDSS buffer and disable QDSS. + * Incase either ETR's CTRL is 1 Disable QDSS dumping ETB buffer to DDR. + */ + if (!val_tmc_ctrl) { + if (val_gx) + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_TRACE_BUFFER, + snapshot, gen8_snapshot_trace_buffer_gfx_trace, &info); + else + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_TRACE_BUFFER, + snapshot, NULL, &info); + } else { + val_etr_ctrl = qdss_regread(etr_virt, QDSS_AOSS_APB_ETR_CTRL); + val_etr1_ctrl = qdss_regread(etr_virt, QDSS_AOSS_APB_ETR1_CTRL); + if (!val_etr_ctrl && !val_etr1_ctrl) + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_TRACE_BUFFER, + snapshot, gen8_snapshot_trace_buffer_etb, &info); + qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_CTRL, 0); + } + +err_unmap: + iounmap(tmc_virt); + iounmap(etr_virt); + +err_clk_put: + clk_disable_unprepare(clk); + clk_put(clk); +} + +static void gen8_snapshot_shader(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + struct gen8_shader_block_info info = {0}; + u64 *ptr; + u32 offset = 0; + struct gen8_shader_block *shader_blocks = gen8_snapshot_block_list->shader_blocks; + size_t num_shader_blocks = gen8_snapshot_block_list->num_shader_blocks; + u32 i, sp, usptp, slice; + size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, + void *priv) = gen8_legacy_snapshot_shader; + + kgsl_regrmw(device, GEN8_SP_DBG_CNTL, GENMASK(1, 0), 3); + + if (CD_SCRIPT_CHECK(device)) { + for (i = 0; i < num_shader_blocks; i++) { + struct gen8_shader_block *block = &shader_blocks[i]; + + for (slice = 0; sp < block->num_slices; slice++) { + for (sp = 0; sp < block->num_sps; sp++) { + for (usptp = 0; usptp < block->num_usptps; usptp++) { + info.block = block; + info.sp_id = sp; + info.usptp = usptp; + info.slice_id = slice; + info.offset = offset; + offset += block->size << 2; + + /* Shader working/shadow memory */ + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SHADER_V3, + snapshot, func, &info); + } + } + } + } + + goto done; + } + + for (i = 0; i < num_shader_blocks; i++) { + struct gen8_shader_block *block = &shader_blocks[i]; + + /* Build the crash script */ + ptr = gen8_capturescript->hostptr; + offset = 0; + + for (slice = 0; slice < block->num_slices; slice++) { + for (sp = 0; sp < block->num_sps; sp++) { + for (usptp = 0; usptp < block->num_usptps; usptp++) { + /* Program the aperture */ + ptr += CD_WRITE(ptr, GEN8_SP_READ_SEL, + GEN8_SP_READ_SEL_VAL(slice, block->location, + block->pipeid, block->statetype, usptp, sp)); + + /* Read all the data in one chunk */ + ptr += CD_READ(ptr, GEN8_SP_AHB_READ_APERTURE, block->size, + gen8_crashdump_registers->gpuaddr + offset); + offset += block->size << 2; + } + } + } + /* Marker for end of script */ + CD_FINISH(ptr, offset); + + /* Try to run the crash dumper */ + func = gen8_legacy_snapshot_shader; + if (_gen8_do_crashdump(device)) + func = gen8_snapshot_shader_memory; + + offset = 0; + for (slice = 0; slice < block->num_slices; slice++) { + for (sp = 0; sp < block->num_sps; sp++) { + for (usptp = 0; usptp < block->num_usptps; usptp++) { + info.block = block; + info.sp_id = sp; + info.usptp = usptp; + info.slice_id = slice; + info.offset = offset; + offset += block->size << 2; + + /* Shader working/shadow memory */ + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SHADER_V3, snapshot, func, &info); + } + } + } + } + +done: + kgsl_regrmw(device, GEN8_SP_DBG_CNTL, GENMASK(1, 0), 0x0); +} + +static void gen8_snapshot_mempool(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + struct gen8_cp_indexed_reg *cp_indexed_reg; + size_t mempool_index_registers_len = gen8_snapshot_block_list->mempool_index_registers_len; + u32 i, j, slice; + + for (i = 0; i < mempool_index_registers_len; i++) { + cp_indexed_reg = &gen8_snapshot_block_list->mempool_index_registers[i]; + slice = NUMBER_OF_SLICES(cp_indexed_reg->slice_region); + + for (j = 0; j < slice; j++) { + + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL + (j, cp_indexed_reg->pipe_id, 0, 0)); + + /* set CP_CHICKEN_DBG[StabilizeMVC] to stabilize it while dumping */ + kgsl_regrmw(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x4); + + kgsl_snapshot_indexed_registers_v2(device, snapshot, + cp_indexed_reg->addr, cp_indexed_reg->data, + 0, cp_indexed_reg->size, cp_indexed_reg->pipe_id, + ((slice > 1) ? j : UINT_MAX)); + + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL + (j, cp_indexed_reg->pipe_id, 0, 0)); + + kgsl_regrmw(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x0); + } + } +} + +static u32 gen8_read_dbgahb(struct kgsl_device *device, + u32 regbase, u32 reg) +{ + u32 val; + + kgsl_regread(device, (GEN8_SP_AHB_READ_APERTURE + reg - regbase), &val); + return val; +} + +static size_t gen8_legacy_snapshot_cluster_dbgahb(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_mvc_regs_v3 *header = + (struct kgsl_snapshot_mvc_regs_v3 *)buf; + struct gen8_sptp_cluster_registers_info *info = + (struct gen8_sptp_cluster_registers_info *)priv; + const u32 *ptr = info->cluster->regs; + u32 *data = (u32 *)(buf + sizeof(*header)); + u32 read_sel, j; + u32 size = adreno_snapshot_regs_count(ptr) * sizeof(*data); + + if (remain < (sizeof(*header) + size)) { + SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS"); + return 0; + } + + header->ctxt_id = info->context_id; + header->cluster_id = info->cluster_id; + header->pipe_id = info->pipe_id; + header->location_id = info->location_id; + header->sp_id = info->sp_id; + header->usptp_id = info->usptp_id; + header->slice_id = info->slice_id; + + read_sel = GEN8_SP_READ_SEL_VAL(info->slice_id, info->location_id, + info->pipe_id, info->statetype_id, info->sp_id, info->usptp_id); + + kgsl_regwrite(device, GEN8_SP_READ_SEL, read_sel); + + for (; ptr[0] != UINT_MAX; ptr += 2) { + u32 count = REG_COUNT(ptr); + + if (count == 1) + *data++ = ptr[0]; + else { + *data++ = ptr[0] | (1 << 31); + *data++ = ptr[1]; + } + for (j = ptr[0]; j <= ptr[1]; j++) + *data++ = gen8_read_dbgahb(device, info->cluster->regbase, j); + } + + return (size + sizeof(*header)); +} + +static size_t gen8_snapshot_cluster_dbgahb(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_mvc_regs_v3 *header = + (struct kgsl_snapshot_mvc_regs_v3 *)buf; + struct gen8_sptp_cluster_registers_info *info = + (struct gen8_sptp_cluster_registers_info *)priv; + const u32 *ptr = info->cluster->regs; + u32 *data = (u32 *)(buf + sizeof(*header)); + u32 *src; + u32 size = adreno_snapshot_regs_count(ptr) * sizeof(*data); + + if (remain < (sizeof(*header) + size)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + header->ctxt_id = info->context_id; + header->cluster_id = info->cluster_id; + header->pipe_id = info->pipe_id; + header->location_id = info->location_id; + header->sp_id = info->sp_id; + header->usptp_id = info->usptp_id; + header->slice_id = info->slice_id; + + src = gen8_crashdump_registers->hostptr + info->offset; + + for (ptr = info->cluster->regs; ptr[0] != UINT_MAX; ptr += 2) { + u32 cnt = REG_COUNT(ptr); + + if (cnt == 1) + *data++ = ptr[0]; + else { + *data++ = ptr[0] | (1 << 31); + *data++ = ptr[1]; + } + memcpy(data, src, cnt << 2); + data += cnt; + src += cnt; + } + + return (size + sizeof(*header)); +} + +static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + u32 i, j, sp, usptp, count, slice; + u64 *ptr, offset = 0; + struct gen8_sptp_cluster_registers_info info = {0}; + struct gen8_sptp_cluster_registers *sptp_clusters = gen8_snapshot_block_list->sptp_clusters; + size_t num_sptp_clusters = gen8_snapshot_block_list->num_sptp_clusters; + size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, + void *priv) = gen8_legacy_snapshot_cluster_dbgahb; + + if (CD_SCRIPT_CHECK(device)) { + for (i = 0; i < num_sptp_clusters; i++) { + struct gen8_sptp_cluster_registers *cluster = &sptp_clusters[i]; + + slice = NUMBER_OF_SLICES(cluster->slice_region); + for (sp = 0; sp < cluster->num_sps; sp++) { + for (usptp = 0; usptp < cluster->num_usptps; usptp++) { + for (j = 0; j < slice; j++) { + info.cluster = cluster; + info.location_id = cluster->location_id; + info.pipe_id = cluster->pipe_id; + info.usptp_id = usptp; + info.sp_id = sp; + info.slice_id = SLICE_ID(slice, j); + info.cluster_id = cluster->cluster_id; + info.context_id = cluster->context_id; + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, + func, &info); + } + } + } + } + return; + } + + /* Build the crash script */ + ptr = gen8_capturescript->hostptr; + + for (i = 0; i < num_sptp_clusters; i++) { + struct gen8_sptp_cluster_registers *cluster = &sptp_clusters[i]; + + slice = NUMBER_OF_SLICES(cluster->slice_region); + + cluster->offset = offset; + + for (sp = 0; sp < cluster->num_sps; sp++) { + for (usptp = 0; usptp < cluster->num_usptps; usptp++) { + for (j = 0; j < slice; j++) { + const u32 *regs = cluster->regs; + + info.cluster = cluster; + info.location_id = cluster->location_id; + info.pipe_id = cluster->pipe_id; + info.usptp_id = usptp; + info.sp_id = sp; + info.slice_id = SLICE_ID(slice, j); + info.statetype_id = cluster->statetype; + info.cluster_id = cluster->cluster_id; + info.context_id = cluster->context_id; + info.offset = offset; + + /* Program the aperture */ + ptr += CD_WRITE(ptr, GEN8_SP_READ_SEL, GEN8_SP_READ_SEL_VAL + (j, cluster->location_id, cluster->pipe_id, + cluster->statetype, sp, usptp)); + + for (; regs[0] != UINT_MAX; regs += 2) { + count = REG_COUNT(regs); + ptr += CD_READ(ptr, (GEN8_SP_AHB_READ_APERTURE + + regs[0] - cluster->regbase), count, + (gen8_crashdump_registers->gpuaddr + + offset)); + + offset += count * sizeof(u32); + } + /* Marker for end of script */ + CD_FINISH(ptr, offset); + + /* Try to run the crash dumper */ + if (_gen8_do_crashdump(device)) + func = gen8_snapshot_cluster_dbgahb; + + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, + func, &info); + } + } + } + } +} + +static size_t gen8_legacy_snapshot_mvc(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_mvc_regs_v3 *header = + (struct kgsl_snapshot_mvc_regs_v3 *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + struct gen8_cluster_registers_info *info = + (struct gen8_cluster_registers_info *)priv; + const u32 *ptr = info->cluster->regs; + u32 size = adreno_snapshot_regs_count(ptr) * sizeof(*data); + u32 j; + + if (remain < (sizeof(*header) + size)) { + SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS"); + return 0; + } + + header->ctxt_id = (info->context_id == STATE_FORCE_CTXT_1) ? 1 : 0; + header->cluster_id = info->cluster_id; + header->pipe_id = info->pipe_id; + header->location_id = UINT_MAX; + header->sp_id = UINT_MAX; + header->usptp_id = UINT_MAX; + header->slice_id = info->slice_id; + + /* + * Set the AHB control for the Host to read from the + * cluster/context for this iteration. + */ + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL + (info->slice_id, info->pipe_id, info->cluster_id, info->context_id)); + + if (info->cluster->sel) + kgsl_regwrite(device, info->cluster->sel->host_reg, info->cluster->sel->val); + + for (; ptr[0] != UINT_MAX; ptr += 2) { + u32 count = REG_COUNT(ptr); + + if (count == 1) + *data++ = ptr[0]; + else { + *data++ = ptr[0] | (1 << 31); + *data++ = ptr[1]; + } + for (j = ptr[0]; j <= ptr[1]; j++) + kgsl_regread(device, j, data++); + } + + return (size + sizeof(*header)); +} + +static size_t gen8_snapshot_mvc(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_mvc_regs_v3 *header = + (struct kgsl_snapshot_mvc_regs_v3 *)buf; + struct gen8_cluster_registers_info *info = + (struct gen8_cluster_registers_info *)priv; + const u32 *ptr = info->cluster->regs; + u32 *data = (u32 *)(buf + sizeof(*header)); + u32 *src; + u32 cnt; + u32 size = adreno_snapshot_regs_count(ptr) * sizeof(*data); + + if (remain < (sizeof(*header) + size)) { + SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS"); + return 0; + } + + header->ctxt_id = (info->context_id == STATE_FORCE_CTXT_1) ? 1 : 0; + header->cluster_id = info->cluster_id; + header->pipe_id = info->pipe_id; + header->location_id = UINT_MAX; + header->sp_id = UINT_MAX; + header->usptp_id = UINT_MAX; + header->slice_id = info->slice_id; + + src = gen8_crashdump_registers->hostptr + info->offset; + + for (; ptr[0] != UINT_MAX; ptr += 2) { + cnt = REG_COUNT(ptr); + + if (cnt == 1) + *data++ = ptr[0]; + else { + *data++ = ptr[0] | (1 << 31); + *data++ = ptr[1]; + } + memcpy(data, src, cnt << 2); + src += cnt; + data += cnt; + } + + return (size + sizeof(*header)); +} + +static void gen8_snapshot_mvc_regs(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + struct gen8_cluster_registers *clusters, + size_t num_cluster) +{ + u32 i, j; + u64 *ptr, offset = 0; + u32 count, slice; + struct gen8_cluster_registers_info info = {0}; + size_t (*func)(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) = gen8_legacy_snapshot_mvc; + + if (CD_SCRIPT_CHECK(device)) { + for (i = 0; i < num_cluster; i++) { + struct gen8_cluster_registers *cluster = &clusters[i]; + + slice = NUMBER_OF_SLICES(cluster->slice_region); + for (j = 0; j < slice; j++) { + info.cluster = cluster; + info.pipe_id = cluster->pipe_id; + info.cluster_id = cluster->cluster_id; + info.context_id = cluster->context_id; + info.slice_id = SLICE_ID(slice, j); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, func, &info); + } + } + return; + } + + /* Build the crash script */ + ptr = gen8_capturescript->hostptr; + + for (i = 0; i < num_cluster; i++) { + struct gen8_cluster_registers *cluster = &clusters[i]; + + slice = NUMBER_OF_SLICES(cluster->slice_region); + cluster->offset = offset; + + for (j = 0; j < slice; j++) { + const u32 *regs = cluster->regs; + + info.cluster = cluster; + info.pipe_id = cluster->pipe_id; + info.cluster_id = cluster->cluster_id; + info.context_id = cluster->context_id; + info.slice_id = SLICE_ID(slice, j); + info.offset = offset; + + ptr += CD_WRITE(ptr, GEN8_CP_APERTURE_CNTL_CD, GEN8_CP_APERTURE_REG_VAL + (j, cluster->pipe_id, cluster->cluster_id, cluster->context_id)); + + if (cluster->sel) + ptr += CD_WRITE(ptr, cluster->sel->cd_reg, cluster->sel->val); + + for (; regs[0] != UINT_MAX; regs += 2) { + count = REG_COUNT(regs); + + ptr += CD_READ(ptr, regs[0], + count, (gen8_crashdump_registers->gpuaddr + offset)); + + offset += count * sizeof(u32); + } + + /* Marker for end of script */ + CD_FINISH(ptr, offset); + + /* Try to run the crash dumper */ + if (_gen8_do_crashdump(device)) + func = gen8_snapshot_mvc; + + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, func, &info); + } + } +} + +/* gen8_dbgc_debug_bus_read() - Read data from trace bus */ +static void gen8_dbgc_debug_bus_read(struct kgsl_device *device, + u32 block_id, u32 index, u32 *val) +{ + u32 reg; + + reg = FIELD_PREP(GENMASK(7, 0), index) | + FIELD_PREP(GENMASK(24, 16), block_id); + + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_A, reg); + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_B, reg); + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_C, reg); + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_D, reg); + + /* + * There needs to be a delay of 1 us to ensure enough time for correct + * data is funneled into the trace buffer + */ + udelay(1); + + kgsl_regread(device, GEN8_DBGC_CFG_DBGBUS_TRACE_BUF2, val); + val++; + kgsl_regread(device, GEN8_DBGC_CFG_DBGBUS_TRACE_BUF1, val); +} + +/* gen8_snapshot_dbgc_debugbus_block() - Capture debug data for a gpu block */ +static size_t gen8_snapshot_dbgc_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header = + (struct kgsl_snapshot_debugbus *)buf; + const u32 *block = priv; + u32 i; + u32 *data = (u32 *)(buf + sizeof(*header)); + + if (remain < GEN8_DEBUGBUS_SECTION_SIZE) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = *block; + header->count = GEN8_DEBUGBUS_BLOCK_SIZE * 2; + + for (i = 0; i < GEN8_DEBUGBUS_BLOCK_SIZE; i++) + gen8_dbgc_debug_bus_read(device, *block, i, &data[i*2]); + + return GEN8_DEBUGBUS_SECTION_SIZE; +} + +static void gen8_dbgc_side_debug_bus_read(struct kgsl_device *device, + u32 block_id, u32 index, u32 *val) +{ + u32 reg = FIELD_PREP(GENMASK(7, 0), index) | + FIELD_PREP(GENMASK(24, 16), block_id); + + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_A, reg); + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_B, reg); + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_C, reg); + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_D, reg); + + /* + * There needs to be a delay of 1 us to ensure enough time for correct + * data is funneled into the trace buffer + */ + udelay(1); + + reg = kgsl_regmap_read(&device->regmap, GEN8_DBGC_CFG_DBGBUS_OVER); + + *val = FIELD_GET(GENMASK(27, 24), reg); +} + +static size_t gen8_snapshot_dbgc_side_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_side_debugbus *header = + (struct kgsl_snapshot_side_debugbus *)buf; + const u32 *block = priv; + int i; + u32 *data = (u32 *)(buf + sizeof(*header)); + size_t size = (GEN8_DEBUGBUS_BLOCK_SIZE * sizeof(u32)) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = *block; + header->size = GEN8_DEBUGBUS_BLOCK_SIZE; + header->valid_data = 0x4; + + for (i = 0; i < GEN8_DEBUGBUS_BLOCK_SIZE; i++) + gen8_dbgc_side_debug_bus_read(device, *block, i, &data[i]); + + return size; +} + +/* gen8_cx_dbgc_debug_bus_read() - Read data from trace bus */ +static void gen8_cx_debug_bus_read(struct kgsl_device *device, + u32 block_id, u32 index, u32 *val) +{ + u32 reg = FIELD_PREP(GENMASK(7, 0), index) | + FIELD_PREP(GENMASK(24, 16), block_id); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A, reg); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_B, reg); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_C, reg); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_D, reg); + + /* + * There needs to be a delay of 1 us to ensure enough time for correct + * data is funneled into the trace buffer + */ + udelay(1); + + kgsl_regread(device, GEN8_CX_DBGC_CFG_DBGBUS_TRACE_BUF2, val); + val++; + kgsl_regread(device, GEN8_CX_DBGC_CFG_DBGBUS_TRACE_BUF1, val); +} + +/* + * gen8_snapshot_cx_dbgc_debugbus_block() - Capture debug data for a gpu + * block from the CX DBGC block + */ +static size_t gen8_snapshot_cx_dbgc_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header = + (struct kgsl_snapshot_debugbus *)buf; + const u32 *block = priv; + int i; + u32 *data = (u32 *)(buf + sizeof(*header)); + + if (remain < GEN8_DEBUGBUS_SECTION_SIZE) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = *block; + header->count = GEN8_DEBUGBUS_BLOCK_SIZE * 2; + + for (i = 0; i < GEN8_DEBUGBUS_BLOCK_SIZE; i++) + gen8_cx_debug_bus_read(device, *block, i, &data[i*2]); + + return GEN8_DEBUGBUS_SECTION_SIZE; +} + +/* gen8_cx_side_dbgc_debug_bus_read() - Read data from trace bus */ +static void gen8_cx_side_debug_bus_read(struct kgsl_device *device, + u32 block_id, u32 index, u32 *val) +{ + u32 reg = FIELD_PREP(GENMASK(7, 0), index) | + FIELD_PREP(GENMASK(24, 16), block_id); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A, reg); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_B, reg); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_C, reg); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_D, reg); + + /* + * There needs to be a delay of 1 us to ensure enough time for correct + * data is funneled into the trace buffer + */ + udelay(1); + + kgsl_regread(device, GEN8_CX_DBGC_CFG_DBGBUS_OVER, ®); + *val = FIELD_GET(GENMASK(27, 24), reg); +} + +/* + * gen8_snapshot_cx_dbgc_debugbus_block() - Capture debug data for a gpu + * block from the CX DBGC block + */ +static size_t gen8_snapshot_cx_side_dbgc_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_side_debugbus *header = + (struct kgsl_snapshot_side_debugbus *)buf; + const u32 *block = priv; + int i; + u32 *data = (u32 *)(buf + sizeof(*header)); + size_t size = (GEN8_DEBUGBUS_BLOCK_SIZE * sizeof(u32)) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = *block; + header->size = GEN8_DEBUGBUS_BLOCK_SIZE; + header->valid_data = 0x4; + + for (i = 0; i < GEN8_DEBUGBUS_BLOCK_SIZE; i++) + gen8_cx_side_debug_bus_read(device, *block, i, &data[i]); + + return size; +} + +/* gen8_snapshot_debugbus() - Capture debug bus data */ +static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + u32 i; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_CNTLT, + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_CNTLM, + FIELD_PREP(GENMASK(27, 24), 0xf)); + + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_IVTL_0, 0); + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_IVTL_1, 0); + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_IVTL_2, 0); + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_IVTL_3, 0); + + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_BYTEL_0, + FIELD_PREP(GENMASK(3, 0), 0x0) | + FIELD_PREP(GENMASK(7, 4), 0x1) | + FIELD_PREP(GENMASK(11, 8), 0x2) | + FIELD_PREP(GENMASK(15, 12), 0x3) | + FIELD_PREP(GENMASK(19, 16), 0x4) | + FIELD_PREP(GENMASK(23, 20), 0x5) | + FIELD_PREP(GENMASK(27, 24), 0x6) | + FIELD_PREP(GENMASK(31, 28), 0x7)); + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_BYTEL_1, + FIELD_PREP(GENMASK(3, 0), 0x8) | + FIELD_PREP(GENMASK(7, 4), 0x9) | + FIELD_PREP(GENMASK(11, 8), 0xa) | + FIELD_PREP(GENMASK(15, 12), 0xb) | + FIELD_PREP(GENMASK(19, 16), 0xc) | + FIELD_PREP(GENMASK(23, 20), 0xd) | + FIELD_PREP(GENMASK(27, 24), 0xe) | + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_0, 0); + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_1, 0); + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0); + kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, + FIELD_PREP(GENMASK(27, 24), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, + FIELD_PREP(GENMASK(3, 0), 0x0) | + FIELD_PREP(GENMASK(7, 4), 0x1) | + FIELD_PREP(GENMASK(11, 8), 0x2) | + FIELD_PREP(GENMASK(15, 12), 0x3) | + FIELD_PREP(GENMASK(19, 16), 0x4) | + FIELD_PREP(GENMASK(23, 20), 0x5) | + FIELD_PREP(GENMASK(27, 24), 0x6) | + FIELD_PREP(GENMASK(31, 28), 0x7)); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, + FIELD_PREP(GENMASK(3, 0), 0x8) | + FIELD_PREP(GENMASK(7, 4), 0x9) | + FIELD_PREP(GENMASK(11, 8), 0xa) | + FIELD_PREP(GENMASK(15, 12), 0xb) | + FIELD_PREP(GENMASK(19, 16), 0xc) | + FIELD_PREP(GENMASK(23, 20), 0xd) | + FIELD_PREP(GENMASK(27, 24), 0xe) | + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); + + for (i = 0; i < gen8_snapshot_block_list->debugbus_blocks_len; i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen8_snapshot_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->debugbus_blocks[i]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen8_snapshot_dbgc_side_debugbus_block, + (void *) &gen8_snapshot_block_list->debugbus_blocks[i]); + } + + for (i = 0; i < gen8_snapshot_block_list->gbif_debugbus_blocks_len; i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen8_snapshot_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->gbif_debugbus_blocks[i]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen8_snapshot_dbgc_side_debugbus_block, + (void *) &gen8_snapshot_block_list->gbif_debugbus_blocks[i]); + } + + /* Dump the CX debugbus data if the block exists */ + if (kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) { + for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen8_snapshot_cx_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + } + } +} + +/* gen8_snapshot_sqe() - Dump SQE data in snapshot */ +static size_t gen8_snapshot_sqe(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); + + if (remain < DEBUG_SECTION_SZ(GEN8_SQE_FW_SNAPSHOT_DWORDS)) { + SNAPSHOT_ERR_NOMEM(device, "SQE VERSION DEBUG"); + return 0; + } + + /* Dump the SQE firmware version */ + header->type = SNAPSHOT_DEBUG_SQE_VERSION; + header->size = GEN8_SQE_FW_SNAPSHOT_DWORDS; + memcpy(data, fw->memdesc->hostptr, (GEN8_SQE_FW_SNAPSHOT_DWORDS * sizeof(u32))); + + return DEBUG_SECTION_SZ(GEN8_SQE_FW_SNAPSHOT_DWORDS); +} + +/* gen8_snapshot_aqe() - Dump AQE data in snapshot */ +static size_t gen8_snapshot_aqe(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_AQE); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) + return 0; + + if (remain < DEBUG_SECTION_SZ(1)) { + SNAPSHOT_ERR_NOMEM(device, "AQE VERSION DEBUG"); + return 0; + } + + /* Dump the AQE firmware version */ + header->type = SNAPSHOT_DEBUG_AQE_VERSION; + header->size = 1; + *data = fw->version; + + return DEBUG_SECTION_SZ(1); +} + +/* Snapshot the preemption related buffers */ +static size_t snapshot_preemption_record(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_memdesc *memdesc = priv; + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)buf; + u8 *ptr = buf + sizeof(*header); + const struct adreno_gen8_core *gpucore = to_gen8_core(ADRENO_DEVICE(device)); + u64 ctxt_record_size = GEN8_CP_CTXRECORD_SIZE_IN_BYTES; + + if (gpucore->ctxt_record_size) + ctxt_record_size = gpucore->ctxt_record_size; + + ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size); + + if (remain < (ctxt_record_size + sizeof(*header))) { + SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); + return 0; + } + + header->size = ctxt_record_size >> 2; + header->gpuaddr = memdesc->gpuaddr; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + memcpy(ptr, memdesc->hostptr, ctxt_record_size); + + return ctxt_record_size + sizeof(*header); +} + +static void gen8_reglist_snapshot(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + u64 *ptr, offset = 0; + u32 i, j, r, slices; + struct gen8_reg_list *reg_list = gen8_snapshot_block_list->reg_list; + size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, + void *priv) = gen8_legacy_snapshot_registers; + struct gen8_reg_list_info info = {0}; + + if (CD_SCRIPT_CHECK(device)) { + for (i = 0; reg_list[i].regs; i++) { + struct gen8_reg_list *regs = ®_list[i]; + + slices = NUMBER_OF_SLICES(regs->slice_region); + for (j = 0; j < slices; j++) { + info.regs = regs; + info.slice_id = SLICE_ID(slices, j); + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, + snapshot, func, &info); + } + } + return; + } + + /* Build the crash script */ + ptr = (u64 *)gen8_capturescript->hostptr; + + for (i = 0; reg_list[i].regs; i++) { + struct gen8_reg_list *regs = ®_list[i]; + + slices = NUMBER_OF_SLICES(regs->slice_region); + regs->offset = offset; + + for (j = 0; j < slices; j++) { + const u32 *regs_ptr = regs->regs; + + ptr += CD_WRITE(ptr, GEN8_CP_APERTURE_CNTL_CD, GEN8_CP_APERTURE_REG_VAL + (j, 0, 0, 0)); + /* Program the SEL_CNTL_CD register appropriately */ + if (regs->sel) + ptr += CD_WRITE(ptr, regs->sel->cd_reg, regs->sel->val); + info.regs = regs; + info.slice_id = SLICE_ID(slices, j); + info.offset = offset; + + for (; regs_ptr[0] != UINT_MAX; regs_ptr += 2) { + r = REG_COUNT(regs_ptr); + ptr += CD_READ(ptr, regs_ptr[0], r, + (gen8_crashdump_registers->gpuaddr + offset)); + offset += r * sizeof(u32); + } + + /* Marker for end of script */ + CD_FINISH(ptr, offset); + + /* Try to run the crash dumper */ + if (_gen8_do_crashdump(device)) + func = gen8_snapshot_registers; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, + snapshot, func, &info); + } + } +} + +void gen8_snapshot_external_core_regs(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + const u32 **external_core_regs; + u32 i, num_external_core_regs; + const struct adreno_gen8_core *gpucore = to_gen8_core(ADRENO_DEVICE(device)); + + gen8_snapshot_block_list = gpucore->gen8_snapshot_block_list; + external_core_regs = gen8_snapshot_block_list->external_core_regs; + num_external_core_regs = gen8_snapshot_block_list->num_external_core_regs; + + for (i = 0; i < num_external_core_regs; i++) + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, adreno_snapshot_registers_v2, + (void *) external_core_regs[i]); +} + +/* + * gen8_snapshot() - GEN8 GPU snapshot function + * @adreno_dev: Device being snapshotted + * @snapshot: Pointer to the snapshot instance + * + * This is where all of the GEN8 specific bits and pieces are grabbed + * into the snapshot memory + */ +void gen8_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb; + u32 i; + const struct adreno_gen8_core *gpucore = to_gen8_core(ADRENO_DEVICE(device)); + int is_current_rt; + + gen8_crashdump_timedout = false; + gen8_snapshot_block_list = gpucore->gen8_snapshot_block_list; + + /* External registers are dumped in the beginning of gmu snapshot */ + if (!gmu_core_isenabled(device)) + gen8_snapshot_external_core_regs(device, snapshot); + + gen8_snapshot_trace_buffer(device, snapshot); + + gen8_snapshot_debugbus(adreno_dev, snapshot); + + if (!adreno_gx_is_on(adreno_dev)) + return; + + is_current_rt = rt_task(current); + + if (is_current_rt) + sched_set_normal(current, 0); + + gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE, + GEN8_CP_IB1_BASE_HI_PIPE, &snapshot->ib1base, PIPE_BR, 0, 0); + + gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE, + GEN8_CP_IB2_BASE_HI_PIPE, &snapshot->ib2base, PIPE_BR, 0, 0); + + gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE, + &snapshot->ib1size, PIPE_BR, 0, 0); + gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, + &snapshot->ib2size, PIPE_BR, 0, 0); + + gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE, + GEN8_CP_IB1_BASE_HI_PIPE, &snapshot->ib1base_lpac, PIPE_LPAC, 0, 0); + + gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE, + GEN8_CP_IB2_BASE_HI_PIPE, &snapshot->ib2base_lpac, PIPE_LPAC, 0, 0); + + gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE, + &snapshot->ib1size_lpac, PIPE_LPAC, 0, 0); + gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, + &snapshot->ib2size_lpac, PIPE_LPAC, 0, 0); + + /* Assert the isStatic bit before triggering snapshot */ + kgsl_regwrite(device, GEN8_RBBM_SNAPSHOT_STATUS, 0x1); + + /* Dump the registers which get affected by crash dumper trigger */ + for (i = 0; i < gen8_snapshot_block_list->num_pre_crashdumper_regs; i++) { + struct gen8_reg_list *regs = &gen8_snapshot_block_list->pre_crashdumper_regs[i]; + struct gen8_reg_list_info info = {0}; + u32 j, slices; + + slices = NUMBER_OF_SLICES(regs->slice_region); + + for (j = 0; j < slices; j++) { + info.regs = regs; + info.slice_id = SLICE_ID(slices, j); + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, + snapshot, gen8_legacy_snapshot_registers, &info); + } + } + + gen8_reglist_snapshot(device, snapshot); + + for (i = 0; i < gen8_snapshot_block_list->index_registers_len; i++) { + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL + (0, gen8_snapshot_block_list->index_registers[i].pipe_id, 0, 0)); + + kgsl_snapshot_indexed_registers_v2(device, snapshot, + gen8_snapshot_block_list->index_registers[i].addr, + gen8_snapshot_block_list->index_registers[i].data, 0, + gen8_snapshot_block_list->index_registers[i].size, + gen8_snapshot_block_list->index_registers[i].pipe_id, UINT_MAX); + } + + /* SQE Firmware */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, gen8_snapshot_sqe, NULL); + + /* AQE Firmware */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, gen8_snapshot_aqe, NULL); + + /* Mempool debug data */ + gen8_snapshot_mempool(device, snapshot); + + /* CP MVC register section */ + gen8_snapshot_mvc_regs(device, snapshot, + gen8_snapshot_block_list->cp_clusters, gen8_snapshot_block_list->num_cp_clusters); + + /* MVC register section */ + gen8_snapshot_mvc_regs(device, snapshot, + gen8_snapshot_block_list->clusters, gen8_snapshot_block_list->num_clusters); + + /* registers dumped through DBG AHB */ + gen8_snapshot_dbgahb_regs(device, snapshot); + + /* Shader memory */ + gen8_snapshot_shader(device, snapshot); + + kgsl_regwrite(device, GEN8_RBBM_SNAPSHOT_STATUS, 0x0); + + /* Preemption record */ + if (adreno_is_preemption_enabled(adreno_dev)) { + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, snapshot_preemption_record, + rb->preemption_desc); + } + } + if (is_current_rt) + sched_set_fifo(current); +} + +void gen8_crashdump_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + ret = adreno_allocate_global(device, &gen8_capturescript, + 50 * PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY, + KGSL_MEMDESC_PRIVILEGED, "capturescript"); + + if (!ret) + ret = adreno_allocate_global(device, &gen8_crashdump_registers, + 200 * PAGE_SIZE, 0, 0, + KGSL_MEMDESC_PRIVILEGED, "capturescript_regs"); + + if (ret) + dev_err(device->dev, "Failed to init crashdumper err = %d\n", ret); +} diff --git a/adreno_gen8_snapshot.h b/adreno_gen8_snapshot.h new file mode 100644 index 0000000000..83090b67de --- /dev/null +++ b/adreno_gen8_snapshot.h @@ -0,0 +1,651 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#ifndef __ADRENO_GEN8_SNAPSHOT_H +#define __ADRENO_GEN8_SNAPSHOT_H + +#include "adreno.h" +#include "adreno_gen8.h" +#include "kgsl_regmap.h" +#include "kgsl_snapshot.h" + +enum cluster_id { + CLUSTER_NONE = 0, + CLUSTER_FE_US = 1, + CLUSTER_FE_S = 2, + CLUSTER_SP_VS = 3, + CLUSTER_VPC_VS = 4, + CLUSTER_VPC_US = 5, + CLUSTER_GRAS = 6, + CLUSTER_SP_PS = 7, + CLUSTER_VPC_PS = 8, + CLUSTER_PS = 9, +}; + +enum location_id { + HLSQ_STATE = 0, + HLSQ_DP = 1, + SP_TOP = 2, + USPTP = 3, + HLSQ_DP_STR = 4, +}; + +#define STATE_NON_CONTEXT 0 +#define STATE_TOGGLE_CTXT 1 +#define STATE_FORCE_CTXT_0 2 +#define STATE_FORCE_CTXT_1 3 + +#define UNSLICE 0 +#define SLICE 1 + +#define MAX_PHYSICAL_SLICES 3 + +#define GEN8_DEBUGBUS_BLOCK_SIZE 0x100 + +/* Number of dword to dump in snapshot for CP SQE */ +#define GEN8_SQE_FW_SNAPSHOT_DWORDS 5 + +struct sel_reg { + u32 host_reg; + u32 cd_reg; + u32 val; +}; + +struct gen8_shader_block_info { + struct gen8_shader_block *block; + u32 sp_id; + u32 usptp; + u32 slice_id; + u32 location_id; + u32 context_id; + u32 bank; + u64 offset; +}; + +struct gen8_shader_block { + /* statetype: Type identifier for the block */ + u32 statetype; + /* size: Size of the block (in dwords) */ + u32 size; + /* num_sps: The number of SPs to dump */ + u32 num_sps; + /* num_usptps: The number of USPTPs to dump */ + u32 num_usptps; + /* pipeid: Pipe identifier for the block data */ + u32 pipeid; + /* location: Location identifier for the block data */ + u32 location; + /* num_slices: the number of slices to dump */ + u32 num_slices; + /* num_ctx: repeat id to loop */ + u32 num_ctx; + /* offset: The offset in the snasphot dump */ + u64 offset; +}; + +struct gen8_cluster_registers_info { + struct gen8_cluster_registers *cluster; + u32 cluster_id; + u32 slice_id; + u32 pipe_id; + u32 context_id; + u64 offset; +}; + +struct gen8_cluster_registers { + /* cluster_id: Cluster identifier */ + u32 cluster_id; + /* slice_region: is it slice or unslice */ + u32 slice_region; + /* pipe_id: Pipe Identifier */ + u32 pipe_id; + /* context_id: one of STATE_ that identifies the context to dump */ + u32 context_id; + /* regs: Pointer to an array of register pairs */ + const u32 *regs; + /* sel: Pointer to a selector register to write before reading */ + const struct sel_reg *sel; + /* offset: Internal variable to track the state of the crashdump */ + u32 offset; +}; + +struct gen8_reg_list_info { + struct gen8_reg_list *regs; + u32 cluster_id; + u32 slice_id; + u32 pipe_id; + u32 sp_id; + u32 usptp_id; + u32 context_id; + u64 offset; +}; + +struct gen8_sptp_cluster_registers_info { + struct gen8_sptp_cluster_registers *cluster; + u32 cluster_id; + u32 slice_id; + u32 pipe_id; + u32 sp_id; + u32 usptp_id; + u32 location_id; + u32 context_id; + u32 statetype_id; + u64 offset; +}; + +struct gen8_sptp_cluster_registers { + /* cluster_id: Cluster identifier */ + u32 cluster_id; + /* slice_region: is it slice or unslice */ + u32 slice_region; + /* num_sps: The number of SPs to dump */ + u32 num_sps; + /* num_usptps: The number of USPs to dump */ + u32 num_usptps; + /* statetype: SP block state type for the cluster */ + u32 statetype; + /* pipe_id: Pipe identifier */ + u32 pipe_id; + /* context_id: Context identifier */ + u32 context_id; + /* location_id: Location identifier */ + u32 location_id; + /* regs: Pointer to the list of register pairs to read */ + const u32 *regs; + /* regbase: Dword offset of the register block in the GPu register space */ + u32 regbase; + /* offset: Internal variable used to track the crashdump state */ + u32 offset; +}; + +struct gen8_cp_indexed_reg { + u32 addr; + u32 data; + u32 slice_region; + u32 pipe_id; + u32 size; +}; + +struct gen8_reg_list { + u32 slice_region; + const u32 *regs; + const struct sel_reg *sel; + u64 offset; +}; + +struct gen8_trace_buffer_info { + u16 dbgc_ctrl; + u16 segment; + u16 granularity; + u16 ping_blk[TRACE_BUF_NUM_SIG]; + u16 ping_idx[TRACE_BUF_NUM_SIG]; +}; + +enum gen8_debugbus_ids { + DEBUGBUS_GBIF_CX_GC_US_I_0 = 1, + DEBUGBUS_GMU_CX_GC_US_I_0 = 2, + DEBUGBUS_CX_GC_US_I_0 = 3, + DEBUGBUS_GBIF_GX_GC_US_I_0 = 8, + DEBUGBUS_GMU_GX_GC_US_I_0 = 9, + DEBUGBUS_DBGC_GC_US_I_0 = 10, + DEBUGBUS_RBBM_GC_US_I_0 = 11, + DEBUGBUS_LARC_GC_US_I_0 = 12, + DEBUGBUS_COM_GC_US_I_0 = 13, + DEBUGBUS_HLSQ_GC_US_I_0 = 14, + DEBUGBUS_CGC_GC_US_I_0 = 15, + DEBUGBUS_VSC_GC_US_I_0_0 = 20, + DEBUGBUS_VSC_GC_US_I_0_1 = 21, + DEBUGBUS_UFC_GC_US_I_0 = 24, + DEBUGBUS_UFC_GC_US_I_1 = 25, + DEBUGBUS_CP_GC_US_I_0_0 = 40, + DEBUGBUS_CP_GC_US_I_0_1 = 41, + DEBUGBUS_CP_GC_US_I_0_2 = 42, + DEBUGBUS_PC_BR_US_I_0 = 56, + DEBUGBUS_PC_BV_US_I_0 = 57, + DEBUGBUS_GPC_BR_US_I_0 = 58, + DEBUGBUS_GPC_BV_US_I_0 = 59, + DEBUGBUS_VPC_BR_US_I_0 = 60, + DEBUGBUS_VPC_BV_US_I_0 = 61, + DEBUGBUS_UCHE_WRAPPER_GC_US_I_0 = 80, + DEBUGBUS_UCHE_GC_US_I_0 = 81, + DEBUGBUS_UCHE_GC_US_I_1 = 82, + DEBUGBUS_CP_GC_S_0_I_0 = 128, + DEBUGBUS_PC_BR_S_0_I_0 = 129, + DEBUGBUS_PC_BV_S_0_I_0 = 130, + DEBUGBUS_TESS_GC_S_0_I_0 = 131, + DEBUGBUS_TSEFE_GC_S_0_I_0 = 132, + DEBUGBUS_TSEBE_GC_S_0_I_0 = 133, + DEBUGBUS_RAS_GC_S_0_I_0 = 134, + DEBUGBUS_LRZ_BR_S_0_I_0 = 135, + DEBUGBUS_LRZ_BV_S_0_I_0 = 136, + DEBUGBUS_VFDP_GC_S_0_I_0 = 137, + DEBUGBUS_GPC_BR_S_0_I_0 = 138, + DEBUGBUS_GPC_BV_S_0_I_0 = 139, + DEBUGBUS_VPCFE_BR_S_0_I_0 = 140, + DEBUGBUS_VPCFE_BV_S_0_I_0 = 141, + DEBUGBUS_VPCBE_BR_S_0_I_0 = 142, + DEBUGBUS_VPCBE_BV_S_0_I_0 = 143, + DEBUGBUS_CCHE_GC_S_0_I_0 = 144, + DEBUGBUS_DBGC_GC_S_0_I_0 = 145, + DEBUGBUS_LARC_GC_S_0_I_0 = 146, + DEBUGBUS_RBBM_GC_S_0_I_0 = 147, + DEBUGBUS_CCRE_GC_S_0_I_0 = 148, + DEBUGBUS_CGC_GC_S_0_I_0 = 149, + DEBUGBUS_GMU_GC_S_0_I_0 = 150, + DEBUGBUS_SLICE_GC_S_0_I_0 = 151, + DEBUGBUS_HLSQ_SPTP_STAR_GC_S_0_I_0 = 152, + DEBUGBUS_USP_GC_S_0_I_0 = 160, + DEBUGBUS_USP_GC_S_0_I_1 = 161, + DEBUGBUS_USPTP_GC_S_0_I_0 = 166, + DEBUGBUS_USPTP_GC_S_0_I_1 = 167, + DEBUGBUS_USPTP_GC_S_0_I_2 = 168, + DEBUGBUS_USPTP_GC_S_0_I_3 = 169, + DEBUGBUS_TP_GC_S_0_I_0 = 178, + DEBUGBUS_TP_GC_S_0_I_1 = 179, + DEBUGBUS_TP_GC_S_0_I_2 = 180, + DEBUGBUS_TP_GC_S_0_I_3 = 181, + DEBUGBUS_RB_GC_S_0_I_0 = 190, + DEBUGBUS_RB_GC_S_0_I_1 = 191, + DEBUGBUS_CCU_GC_S_0_I_0 = 196, + DEBUGBUS_CCU_GC_S_0_I_1 = 197, + DEBUGBUS_HLSQ_GC_S_0_I_0 = 202, + DEBUGBUS_HLSQ_GC_S_0_I_1 = 203, + DEBUGBUS_VFD_GC_S_0_I_0 = 208, + DEBUGBUS_VFD_GC_S_0_I_1 = 209, + DEBUGBUS_CP_GC_S_1_I_0 = 256, + DEBUGBUS_PC_BR_S_1_I_0 = 257, + DEBUGBUS_PC_BV_S_1_I_0 = 258, + DEBUGBUS_TESS_GC_S_1_I_0 = 259, + DEBUGBUS_TSEFE_GC_S_1_I_0 = 260, + DEBUGBUS_TSEBE_GC_S_1_I_0 = 261, + DEBUGBUS_RAS_GC_S_1_I_0 = 262, + DEBUGBUS_LRZ_BR_S_1_I_0 = 263, + DEBUGBUS_LRZ_BV_S_1_I_0 = 264, + DEBUGBUS_VFDP_GC_S_1_I_0 = 265, + DEBUGBUS_GPC_BR_S_1_I_0 = 266, + DEBUGBUS_GPC_BV_S_1_I_0 = 267, + DEBUGBUS_VPCFE_BR_S_1_I_0 = 268, + DEBUGBUS_VPCFE_BV_S_1_I_0 = 269, + DEBUGBUS_VPCBE_BR_S_1_I_0 = 270, + DEBUGBUS_VPCBE_BV_S_1_I_0 = 271, + DEBUGBUS_CCHE_GC_S_1_I_0 = 272, + DEBUGBUS_DBGC_GC_S_1_I_0 = 273, + DEBUGBUS_LARC_GC_S_1_I_0 = 274, + DEBUGBUS_RBBM_GC_S_1_I_0 = 275, + DEBUGBUS_CCRE_GC_S_1_I_0 = 276, + DEBUGBUS_CGC_GC_S_1_I_0 = 277, + DEBUGBUS_GMU_GC_S_1_I_0 = 278, + DEBUGBUS_SLICE_GC_S_1_I_0 = 279, + DEBUGBUS_HLSQ_SPTP_STAR_GC_S_1_I_0 = 280, + DEBUGBUS_USP_GC_S_1_I_0 = 288, + DEBUGBUS_USP_GC_S_1_I_1 = 289, + DEBUGBUS_USPTP_GC_S_1_I_0 = 294, + DEBUGBUS_USPTP_GC_S_1_I_1 = 295, + DEBUGBUS_USPTP_GC_S_1_I_2 = 296, + DEBUGBUS_USPTP_GC_S_1_I_3 = 297, + DEBUGBUS_TP_GC_S_1_I_0 = 306, + DEBUGBUS_TP_GC_S_1_I_1 = 307, + DEBUGBUS_TP_GC_S_1_I_2 = 308, + DEBUGBUS_TP_GC_S_1_I_3 = 309, + DEBUGBUS_RB_GC_S_1_I_0 = 318, + DEBUGBUS_RB_GC_S_1_I_1 = 319, + DEBUGBUS_CCU_GC_S_1_I_0 = 324, + DEBUGBUS_CCU_GC_S_1_I_1 = 325, + DEBUGBUS_HLSQ_GC_S_1_I_0 = 330, + DEBUGBUS_HLSQ_GC_S_1_I_1 = 331, + DEBUGBUS_VFD_GC_S_1_I_0 = 336, + DEBUGBUS_VFD_GC_S_1_I_1 = 337, + DEBUGBUS_CP_GC_S_2_I_0 = 384, + DEBUGBUS_PC_BR_S_2_I_0 = 385, + DEBUGBUS_PC_BV_S_2_I_0 = 386, + DEBUGBUS_TESS_GC_S_2_I_0 = 387, + DEBUGBUS_TSEFE_GC_S_2_I_0 = 388, + DEBUGBUS_TSEBE_GC_S_2_I_0 = 389, + DEBUGBUS_RAS_GC_S_2_I_0 = 390, + DEBUGBUS_LRZ_BR_S_2_I_0 = 391, + DEBUGBUS_LRZ_BV_S_2_I_0 = 392, + DEBUGBUS_VFDP_GC_S_2_I_0 = 393, + DEBUGBUS_GPC_BR_S_2_I_0 = 394, + DEBUGBUS_GPC_BV_S_2_I_0 = 395, + DEBUGBUS_VPCFE_BR_S_2_I_0 = 396, + DEBUGBUS_VPCFE_BV_S_2_I_0 = 397, + DEBUGBUS_VPCBE_BR_S_2_I_0 = 398, + DEBUGBUS_VPCBE_BV_S_2_I_0 = 399, + DEBUGBUS_CCHE_GC_S_2_I_0 = 400, + DEBUGBUS_DBGC_GC_S_2_I_0 = 401, + DEBUGBUS_LARC_GC_S_2_I_0 = 402, + DEBUGBUS_RBBM_GC_S_2_I_0 = 403, + DEBUGBUS_CCRE_GC_S_2_I_0 = 404, + DEBUGBUS_CGC_GC_S_2_I_0 = 405, + DEBUGBUS_GMU_GC_S_2_I_0 = 406, + DEBUGBUS_SLICE_GC_S_2_I_0 = 407, + DEBUGBUS_HLSQ_SPTP_STAR_GC_S_2_I_0 = 408, + DEBUGBUS_USP_GC_S_2_I_0 = 416, + DEBUGBUS_USP_GC_S_2_I_1 = 417, + DEBUGBUS_USPTP_GC_S_2_I_0 = 422, + DEBUGBUS_USPTP_GC_S_2_I_1 = 423, + DEBUGBUS_USPTP_GC_S_2_I_2 = 424, + DEBUGBUS_USPTP_GC_S_2_I_3 = 425, + DEBUGBUS_TP_GC_S_2_I_0 = 434, + DEBUGBUS_TP_GC_S_2_I_1 = 435, + DEBUGBUS_TP_GC_S_2_I_2 = 436, + DEBUGBUS_TP_GC_S_2_I_3 = 437, + DEBUGBUS_RB_GC_S_2_I_0 = 446, + DEBUGBUS_RB_GC_S_2_I_1 = 447, + DEBUGBUS_CCU_GC_S_2_I_0 = 452, + DEBUGBUS_CCU_GC_S_2_I_1 = 453, + DEBUGBUS_HLSQ_GC_S_2_I_0 = 458, + DEBUGBUS_HLSQ_GC_S_2_I_1 = 459, + DEBUGBUS_VFD_GC_S_2_I_0 = 464, + DEBUGBUS_VFD_GC_S_2_I_1 = 465, +}; + +static const u32 gen8_debugbus_blocks[] = { + DEBUGBUS_GMU_GX_GC_US_I_0, + DEBUGBUS_DBGC_GC_US_I_0, + DEBUGBUS_RBBM_GC_US_I_0, + DEBUGBUS_LARC_GC_US_I_0, + DEBUGBUS_COM_GC_US_I_0, + DEBUGBUS_HLSQ_GC_US_I_0, + DEBUGBUS_CGC_GC_US_I_0, + DEBUGBUS_VSC_GC_US_I_0_0, + DEBUGBUS_VSC_GC_US_I_0_1, + DEBUGBUS_UFC_GC_US_I_0, + DEBUGBUS_UFC_GC_US_I_1, + DEBUGBUS_CP_GC_US_I_0_0, + DEBUGBUS_CP_GC_US_I_0_1, + DEBUGBUS_CP_GC_US_I_0_2, + DEBUGBUS_PC_BR_US_I_0, + DEBUGBUS_PC_BV_US_I_0, + DEBUGBUS_GPC_BR_US_I_0, + DEBUGBUS_GPC_BV_US_I_0, + DEBUGBUS_VPC_BR_US_I_0, + DEBUGBUS_VPC_BV_US_I_0, + DEBUGBUS_UCHE_WRAPPER_GC_US_I_0, + DEBUGBUS_UCHE_GC_US_I_0, + DEBUGBUS_UCHE_GC_US_I_1, + DEBUGBUS_CP_GC_S_0_I_0, + DEBUGBUS_PC_BR_S_0_I_0, + DEBUGBUS_PC_BV_S_0_I_0, + DEBUGBUS_TESS_GC_S_0_I_0, + DEBUGBUS_TSEFE_GC_S_0_I_0, + DEBUGBUS_TSEBE_GC_S_0_I_0, + DEBUGBUS_RAS_GC_S_0_I_0, + DEBUGBUS_LRZ_BR_S_0_I_0, + DEBUGBUS_LRZ_BV_S_0_I_0, + DEBUGBUS_VFDP_GC_S_0_I_0, + DEBUGBUS_GPC_BR_S_0_I_0, + DEBUGBUS_GPC_BV_S_0_I_0, + DEBUGBUS_VPCFE_BR_S_0_I_0, + DEBUGBUS_VPCFE_BV_S_0_I_0, + DEBUGBUS_VPCBE_BR_S_0_I_0, + DEBUGBUS_VPCBE_BV_S_0_I_0, + DEBUGBUS_CCHE_GC_S_0_I_0, + DEBUGBUS_DBGC_GC_S_0_I_0, + DEBUGBUS_LARC_GC_S_0_I_0, + DEBUGBUS_RBBM_GC_S_0_I_0, + DEBUGBUS_CCRE_GC_S_0_I_0, + DEBUGBUS_CGC_GC_S_0_I_0, + DEBUGBUS_GMU_GC_S_0_I_0, + DEBUGBUS_SLICE_GC_S_0_I_0, + DEBUGBUS_HLSQ_SPTP_STAR_GC_S_0_I_0, + DEBUGBUS_USP_GC_S_0_I_0, + DEBUGBUS_USP_GC_S_0_I_1, + DEBUGBUS_USPTP_GC_S_0_I_0, + DEBUGBUS_USPTP_GC_S_0_I_1, + DEBUGBUS_USPTP_GC_S_0_I_2, + DEBUGBUS_USPTP_GC_S_0_I_3, + DEBUGBUS_TP_GC_S_0_I_0, + DEBUGBUS_TP_GC_S_0_I_1, + DEBUGBUS_TP_GC_S_0_I_2, + DEBUGBUS_TP_GC_S_0_I_3, + DEBUGBUS_RB_GC_S_0_I_0, + DEBUGBUS_RB_GC_S_0_I_1, + DEBUGBUS_CCU_GC_S_0_I_0, + DEBUGBUS_CCU_GC_S_0_I_1, + DEBUGBUS_HLSQ_GC_S_0_I_0, + DEBUGBUS_HLSQ_GC_S_0_I_1, + DEBUGBUS_VFD_GC_S_0_I_0, + DEBUGBUS_VFD_GC_S_0_I_1, + DEBUGBUS_CP_GC_S_1_I_0, + DEBUGBUS_PC_BR_S_1_I_0, + DEBUGBUS_PC_BV_S_1_I_0, + DEBUGBUS_TESS_GC_S_1_I_0, + DEBUGBUS_TSEFE_GC_S_1_I_0, + DEBUGBUS_TSEBE_GC_S_1_I_0, + DEBUGBUS_RAS_GC_S_1_I_0, + DEBUGBUS_LRZ_BR_S_1_I_0, + DEBUGBUS_LRZ_BV_S_1_I_0, + DEBUGBUS_VFDP_GC_S_1_I_0, + DEBUGBUS_GPC_BR_S_1_I_0, + DEBUGBUS_GPC_BV_S_1_I_0, + DEBUGBUS_VPCFE_BR_S_1_I_0, + DEBUGBUS_VPCFE_BV_S_1_I_0, + DEBUGBUS_VPCBE_BR_S_1_I_0, + DEBUGBUS_VPCBE_BV_S_1_I_0, + DEBUGBUS_CCHE_GC_S_1_I_0, + DEBUGBUS_DBGC_GC_S_1_I_0, + DEBUGBUS_LARC_GC_S_1_I_0, + DEBUGBUS_RBBM_GC_S_1_I_0, + DEBUGBUS_CCRE_GC_S_1_I_0, + DEBUGBUS_CGC_GC_S_1_I_0, + DEBUGBUS_GMU_GC_S_1_I_0, + DEBUGBUS_SLICE_GC_S_1_I_0, + DEBUGBUS_HLSQ_SPTP_STAR_GC_S_1_I_0, + DEBUGBUS_USP_GC_S_1_I_0, + DEBUGBUS_USP_GC_S_1_I_1, + DEBUGBUS_USPTP_GC_S_1_I_0, + DEBUGBUS_USPTP_GC_S_1_I_1, + DEBUGBUS_USPTP_GC_S_1_I_2, + DEBUGBUS_USPTP_GC_S_1_I_3, + DEBUGBUS_TP_GC_S_1_I_0, + DEBUGBUS_TP_GC_S_1_I_1, + DEBUGBUS_TP_GC_S_1_I_2, + DEBUGBUS_TP_GC_S_1_I_3, + DEBUGBUS_RB_GC_S_1_I_0, + DEBUGBUS_RB_GC_S_1_I_1, + DEBUGBUS_CCU_GC_S_1_I_0, + DEBUGBUS_CCU_GC_S_1_I_1, + DEBUGBUS_HLSQ_GC_S_1_I_0, + DEBUGBUS_HLSQ_GC_S_1_I_1, + DEBUGBUS_VFD_GC_S_1_I_0, + DEBUGBUS_VFD_GC_S_1_I_1, + DEBUGBUS_CP_GC_S_2_I_0, + DEBUGBUS_PC_BR_S_2_I_0, + DEBUGBUS_PC_BV_S_2_I_0, + DEBUGBUS_TESS_GC_S_2_I_0, + DEBUGBUS_TSEFE_GC_S_2_I_0, + DEBUGBUS_TSEBE_GC_S_2_I_0, + DEBUGBUS_RAS_GC_S_2_I_0, + DEBUGBUS_LRZ_BR_S_2_I_0, + DEBUGBUS_LRZ_BV_S_2_I_0, + DEBUGBUS_VFDP_GC_S_2_I_0, + DEBUGBUS_GPC_BR_S_2_I_0, + DEBUGBUS_GPC_BV_S_2_I_0, + DEBUGBUS_VPCFE_BR_S_2_I_0, + DEBUGBUS_VPCFE_BV_S_2_I_0, + DEBUGBUS_VPCBE_BR_S_2_I_0, + DEBUGBUS_VPCBE_BV_S_2_I_0, + DEBUGBUS_CCHE_GC_S_2_I_0, + DEBUGBUS_DBGC_GC_S_2_I_0, + DEBUGBUS_LARC_GC_S_2_I_0, + DEBUGBUS_RBBM_GC_S_2_I_0, + DEBUGBUS_CCRE_GC_S_2_I_0, + DEBUGBUS_CGC_GC_S_2_I_0, + DEBUGBUS_GMU_GC_S_2_I_0, + DEBUGBUS_SLICE_GC_S_2_I_0, + DEBUGBUS_HLSQ_SPTP_STAR_GC_S_2_I_0, + DEBUGBUS_USP_GC_S_2_I_0, + DEBUGBUS_USP_GC_S_2_I_1, + DEBUGBUS_USPTP_GC_S_2_I_0, + DEBUGBUS_USPTP_GC_S_2_I_1, + DEBUGBUS_USPTP_GC_S_2_I_2, + DEBUGBUS_USPTP_GC_S_2_I_3, + DEBUGBUS_TP_GC_S_2_I_0, + DEBUGBUS_TP_GC_S_2_I_1, + DEBUGBUS_TP_GC_S_2_I_2, + DEBUGBUS_TP_GC_S_2_I_3, + DEBUGBUS_RB_GC_S_2_I_0, + DEBUGBUS_RB_GC_S_2_I_1, + DEBUGBUS_CCU_GC_S_2_I_0, + DEBUGBUS_CCU_GC_S_2_I_1, + DEBUGBUS_HLSQ_GC_S_2_I_0, + DEBUGBUS_HLSQ_GC_S_2_I_1, + DEBUGBUS_VFD_GC_S_2_I_0, + DEBUGBUS_VFD_GC_S_2_I_1, +}; + +static const u32 gen8_gbif_debugbus_blocks[] = { + DEBUGBUS_GBIF_GX_GC_US_I_0, +}; + +static const u32 gen8_cx_debugbus_blocks[] = { + DEBUGBUS_GBIF_CX_GC_US_I_0, + DEBUGBUS_GMU_CX_GC_US_I_0, + DEBUGBUS_CX_GC_US_I_0, +}; + +enum gen8_statetype_ids { + TP0_NCTX_REG = 0, + TP0_CTX0_3D_CVS_REG = 1, + TP0_CTX0_3D_CPS_REG = 2, + TP0_CTX1_3D_CVS_REG = 3, + TP0_CTX1_3D_CPS_REG = 4, + TP0_CTX2_3D_CPS_REG = 5, + TP0_CTX3_3D_CPS_REG = 6, + TP0_TMO_DATA = 9, + TP0_SMO_DATA = 10, + TP0_MIPMAP_BASE_DATA = 11, + SP_INST_DATA_3 = 31, + SP_NCTX_REG = 32, + SP_CTX0_3D_CVS_REG = 33, + SP_CTX0_3D_CPS_REG = 34, + SP_CTX1_3D_CVS_REG = 35, + SP_CTX1_3D_CPS_REG = 36, + SP_CTX2_3D_CPS_REG = 37, + SP_CTX3_3D_CPS_REG = 38, + SP_INST_DATA = 39, + SP_INST_DATA_1 = 40, + SP_LB_0_DATA = 41, + SP_LB_1_DATA = 42, + SP_LB_2_DATA = 43, + SP_LB_3_DATA = 44, + SP_LB_4_DATA = 45, + SP_LB_5_DATA = 46, + SP_LB_6_DATA = 47, + SP_LB_7_DATA = 48, + SP_CB_RAM = 49, + SP_LB_13_DATA = 50, + SP_LB_14_DATA = 51, + SP_INST_TAG = 52, + SP_INST_DATA_2 = 53, + SP_TMO_TAG = 54, + SP_SMO_TAG = 55, + SP_STATE_DATA = 56, + SP_HWAVE_RAM = 57, + SP_L0_INST_BUF = 58, + SP_LB_8_DATA = 59, + SP_LB_9_DATA = 60, + SP_LB_10_DATA = 61, + SP_LB_11_DATA = 62, + SP_LB_12_DATA = 63, + HLSQ_DATAPATH_DSTR_META = 64, + HLSQ_DESC_REMAP_META = 65, + HLSQ_SLICE_TOP_META = 66, + HLSQ_L2STC_TAG_RAM = 67, + HLSQ_L2STC_INFO_CMD = 68, + HLSQ_CVS_BE_CTXT_BUF_RAM_TAG = 69, + HLSQ_CPS_BE_CTXT_BUF_RAM_TAG = 70, + HLSQ_GFX_CVS_BE_CTXT_BUF_RAM = 71, + HLSQ_GFX_CPS_BE_CTXT_BUF_RAM = 72, + HLSQ_CHUNK_CVS_RAM = 73, + HLSQ_CHUNK_CPS_RAM = 74, + HLSQ_CHUNK_CVS_RAM_TAG = 75, + HLSQ_CHUNK_CPS_RAM_TAG = 76, + HLSQ_ICB_CVS_CB_BASE_TAG = 77, + HLSQ_ICB_CPS_CB_BASE_TAG = 78, + HLSQ_CVS_MISC_RAM = 79, + HLSQ_CPS_MISC_RAM = 80, + HLSQ_CPS_MISC_RAM_1 = 81, + HLSQ_INST_RAM = 82, + HLSQ_GFX_CVS_CONST_RAM = 83, + HLSQ_GFX_CPS_CONST_RAM = 84, + HLSQ_CVS_MISC_RAM_TAG = 85, + HLSQ_CPS_MISC_RAM_TAG = 86, + HLSQ_INST_RAM_TAG = 87, + HLSQ_GFX_CVS_CONST_RAM_TAG = 88, + HLSQ_GFX_CPS_CONST_RAM_TAG = 89, + HLSQ_GFX_LOCAL_MISC_RAM = 90, + HLSQ_GFX_LOCAL_MISC_RAM_TAG = 91, + HLSQ_INST_RAM_1 = 92, + HLSQ_STPROC_META = 93, + HLSQ_SLICE_BACKEND_META = 94, + HLSQ_INST_RAM_2 = 95, + HLSQ_DATAPATH_META = 96, + HLSQ_FRONTEND_META = 97, + HLSQ_INDIRECT_META = 98, + HLSQ_BACKEND_META = 99, +}; + +struct gen8_snapshot_block_list { + /* pre_crashdumper_regs : Registers which need to be dumped before CD runs */ + struct gen8_reg_list *pre_crashdumper_regs; + /* pre_crashdumper_regs_size : Size of registers which need to be dumped before CD runs */ + size_t num_pre_crashdumper_regs; + /* debugbus_blocks : List of debugbus blocks */ + const u32 *debugbus_blocks; + /* debugbus_blocks_len : Length of the debugbus list */ + size_t debugbus_blocks_len; + /* gbif_debugbus_blocks : List of GBIF debugbus blocks */ + const u32 *gbif_debugbus_blocks; + /* gbif_debugbus_blocks_len : Length of GBIF debugbus list */ + size_t gbif_debugbus_blocks_len; + /* cx_debugbus_blocks : List of CX debugbus blocks */ + const u32 *cx_debugbus_blocks; + /* cx_debugbus_blocks_len : Length of the CX debugbus list */ + size_t cx_debugbus_blocks_len; + /* external_core_regs : List of external core registers */ + const u32 **external_core_regs; + /* num_external_core_regs : length of external core registers list */ + size_t num_external_core_regs; + /* gmu_registers : List of GMU registers */ + struct gen8_reg_list *gmu_regs; + /* num_gmu_regs : Length of GMU registers list */ + size_t num_gmu_regs; + /* rscc_regs : List of RSCC registers */ + const u32 *rscc_regs; + /* reg_list : List of GPU internal registers */ + struct gen8_reg_list *reg_list; + /* reg_list : List of cx_misc registers */ + const u32 *cx_misc_regs; + /* shader_blocks : List of GPU shader memory */ + struct gen8_shader_block *shader_blocks; + /* num_shader_blocks : Length of the shader memory list */ + size_t num_shader_blocks; + /* cp_cluster_registers : List of GPU CP cluster registers */ + struct gen8_cluster_registers *cp_clusters; + /* num_cp_clusters : Length of GPU CP cluster registers list */ + size_t num_cp_clusters; + /* cluster_registers : List of GPU cluster registers */ + struct gen8_cluster_registers *clusters; + /* num_clusters : Length of GPU cluster registers list */ + size_t num_clusters; + /* spstp_cluster_registers : List of GPU SPTP cluster registers */ + struct gen8_sptp_cluster_registers *sptp_clusters; + /* num_sptp_clusters : Length of GPU SPTP cluster registers list */ + size_t num_sptp_clusters; + /* post_crashdumper_regs : Registers which need to be dumped after CD runs */ + const u32 *post_crashdumper_regs; + /* index_registers : List of index_registers */ + struct gen8_cp_indexed_reg *index_registers; + /* index_registers_len : Length of the index registers */ + size_t index_registers_len; + /* mempool_index_registers : List of CP mempool_index_registers */ + struct gen8_cp_indexed_reg *mempool_index_registers; + /* mempool_index_registers_len : Length of the mempool index registers */ + size_t mempool_index_registers_len; +}; + +#endif /*__ADRENO_GEN8_SNAPSHOT_H */ diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index 5a4514d2d8..5dcdb6d9a9 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -43,6 +43,7 @@ def kgsl_get_srcs(): "adreno_gen7_snapshot.c", "adreno_gen8.c", "adreno_gen8_gmu.c", + "adreno_gen8_gmu_snapshot.c", "adreno_gen8_hfi.c", "adreno_gen8_hwsched.c", "adreno_gen8_hwsched_hfi.c", @@ -50,6 +51,7 @@ def kgsl_get_srcs(): "adreno_gen8_preempt.c", "adreno_gen8_ringbuffer.c", "adreno_gen8_rpmh.c", + "adreno_gen8_snapshot.c", "adreno_hwsched.c", "adreno_ioctl.c", "adreno_perfcounter.c", diff --git a/gen8_reg.h b/gen8_reg.h index 7e71e45349..37dfe11361 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -879,6 +879,8 @@ #define GEN8_CP_ROQ_SDS_STATUS_PIPE 0x924 #define GEN8_CP_ROQ_MRB_STATUS_PIPE 0x925 #define GEN8_CP_ROQ_VSD_STATUS_PIPE 0x926 +#define GEN8_CP_SLICE_MEM_POOL_DBG_ADDR_PIPE 0xb00 +#define GEN8_CP_SLICE_MEM_POOL_DBG_DATA_PIPE 0xb01 /* UCHE registers */ #define GEN8_UCHE_MODE_CNTL 0xe01 diff --git a/kgsl_device.h b/kgsl_device.h index f1687b305e..ce37644142 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -985,6 +985,24 @@ void kgsl_snapshot_indexed_registers(struct kgsl_device *device, struct kgsl_snapshot *snapshot, unsigned int index, unsigned int data, unsigned int start, unsigned int count); +/** + * kgsl_snapshot_indexed_registers_v2 - Add a set of indexed registers to the + * snapshot + * @device: Pointer to the KGSL device being snapshotted + * @snapshot: Snapshot instance + * @index: Offset for the index register + * @data: Offset for the data register + * @start: Index to start reading + * @count: Number of entries to read + * @pipe_id: Pipe ID to be dumped + * @slice_id: Slice ID to be dumped + * + * Dump the values from an indexed register group into the snapshot + */ +void kgsl_snapshot_indexed_registers_v2(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, u32 index, u32 data, + u32 start, u32 count, u32 pipe_id, u32 slice_id); + int kgsl_snapshot_get_object(struct kgsl_snapshot *snapshot, struct kgsl_process_private *process, uint64_t gpuaddr, uint64_t size, unsigned int type); diff --git a/kgsl_snapshot.c b/kgsl_snapshot.c index 9213ab8e15..bf1b544705 100644 --- a/kgsl_snapshot.c +++ b/kgsl_snapshot.c @@ -163,13 +163,13 @@ int kgsl_snapshot_have_object(struct kgsl_snapshot *snapshot, */ int kgsl_snapshot_get_object(struct kgsl_snapshot *snapshot, struct kgsl_process_private *process, uint64_t gpuaddr, - uint64_t size, unsigned int type) + uint64_t size, u32 type) { struct kgsl_mem_entry *entry; struct kgsl_snapshot_object *obj; uint64_t offset; int ret = -EINVAL; - unsigned int mem_type; + u32 mem_type; if (!gpuaddr) return 0; @@ -290,7 +290,7 @@ size_t kgsl_snapshot_dump_registers(struct kgsl_device *device, u8 *buf, { struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; struct kgsl_snapshot_registers *regs = priv; - unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + u32 *data = (u32 *)(buf + sizeof(*header)); int count = 0, j, k; /* Figure out how many registers we are going to dump */ @@ -308,11 +308,11 @@ size_t kgsl_snapshot_dump_registers(struct kgsl_device *device, u8 *buf, } for (j = 0; j < regs->count; j++) { - unsigned int start = regs->regs[j * 2]; - unsigned int end = regs->regs[j * 2 + 1]; + u32 start = regs->regs[j * 2]; + u32 end = regs->regs[j * 2 + 1]; for (k = start; k <= end; k++) { - unsigned int val; + u32 val; kgsl_regread(device, k, &val); *data++ = k; @@ -327,10 +327,10 @@ size_t kgsl_snapshot_dump_registers(struct kgsl_device *device, u8 *buf, } struct kgsl_snapshot_indexed_registers { - unsigned int index; - unsigned int data; - unsigned int start; - unsigned int count; + u32 index; + u32 data; + u32 start; + u32 count; }; static size_t kgsl_snapshot_dump_indexed_regs(struct kgsl_device *device, @@ -339,7 +339,7 @@ static size_t kgsl_snapshot_dump_indexed_regs(struct kgsl_device *device, struct kgsl_snapshot_indexed_registers *iregs = priv; struct kgsl_snapshot_indexed_regs *header = (struct kgsl_snapshot_indexed_regs *)buf; - unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + u32 *data = (u32 *)(buf + sizeof(*header)); if (remain < (iregs->count * 4) + sizeof(*header)) { SNAPSHOT_ERR_NOMEM(device, "INDEXED REGS"); @@ -371,9 +371,9 @@ static size_t kgsl_snapshot_dump_indexed_regs(struct kgsl_device *device, */ void kgsl_snapshot_indexed_registers(struct kgsl_device *device, struct kgsl_snapshot *snapshot, - unsigned int index, unsigned int data, - unsigned int start, - unsigned int count) + u32 index, u32 data, + u32 start, + u32 count) { struct kgsl_snapshot_indexed_registers iregs; @@ -386,6 +386,59 @@ void kgsl_snapshot_indexed_registers(struct kgsl_device *device, snapshot, kgsl_snapshot_dump_indexed_regs, &iregs); } +struct kgsl_snapshot_indexed_registers_v2 { + u32 index; + u32 data; + u32 start; + u32 count; + u32 pipe_id; + u32 slice_id; +}; + +static size_t kgsl_snapshot_dump_indexed_regs_v2(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_indexed_registers_v2 *iregs = priv; + struct kgsl_snapshot_indexed_regs_v2 *header = + (struct kgsl_snapshot_indexed_regs_v2 *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + + if (remain < (iregs->count * 4 * 3) + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "INDEXED REGS"); + return 0; + } + + header->index_reg = iregs->index; + header->data_reg = iregs->data; + header->count = iregs->count; + header->start = iregs->start; + header->pipe_id = iregs->pipe_id; + header->slice_id = iregs->slice_id; + + kgsl_regmap_read_indexed_interleaved(&device->regmap, iregs->index, + iregs->data, data, iregs->start, iregs->count); + + return (iregs->count * 4 * 3) + sizeof(*header); +} + +void kgsl_snapshot_indexed_registers_v2(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + u32 index, u32 data, u32 start, u32 count, + u32 pipe_id, u32 slice_id) +{ + struct kgsl_snapshot_indexed_registers_v2 iregs; + + iregs.index = index; + iregs.data = data; + iregs.start = start; + iregs.count = count; + iregs.pipe_id = pipe_id; + iregs.slice_id = slice_id; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_INDEXED_REGS_V2, + snapshot, kgsl_snapshot_dump_indexed_regs_v2, &iregs); +} + /** * kgsl_snapshot_add_section() - Add a new section to the GPU snapshot * @device: the KGSL device being snapshotted diff --git a/kgsl_snapshot.h b/kgsl_snapshot.h index 50f245dbf6..fa8471f5fe 100644 --- a/kgsl_snapshot.h +++ b/kgsl_snapshot.h @@ -44,6 +44,7 @@ struct kgsl_snapshot_section_header { #define KGSL_SNAPSHOT_SECTION_IB 0x0401 #define KGSL_SNAPSHOT_SECTION_IB_V2 0x0402 #define KGSL_SNAPSHOT_SECTION_INDEXED_REGS 0x0501 +#define KGSL_SNAPSHOT_SECTION_INDEXED_REGS_V2 0x0502 #define KGSL_SNAPSHOT_SECTION_ISTORE 0x0801 #define KGSL_SNAPSHOT_SECTION_DEBUG 0x0901 #define KGSL_SNAPSHOT_SECTION_DEBUGBUS 0x0A01 @@ -53,8 +54,10 @@ struct kgsl_snapshot_section_header { #define KGSL_SNAPSHOT_SECTION_MEMLIST_V2 0x0E02 #define KGSL_SNAPSHOT_SECTION_SHADER 0x1201 #define KGSL_SNAPSHOT_SECTION_SHADER_V2 0x1202 +#define KGSL_SNAPSHOT_SECTION_SHADER_V3 0x1203 #define KGSL_SNAPSHOT_SECTION_MVC 0x1501 #define KGSL_SNAPSHOT_SECTION_MVC_V2 0x1502 +#define KGSL_SNAPSHOT_SECTION_MVC_V3 0x1503 #define KGSL_SNAPSHOT_SECTION_GMU 0x1601 #define KGSL_SNAPSHOT_SECTION_GMU_MEMORY 0x1701 #define KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS 0x1801 @@ -240,6 +243,15 @@ struct kgsl_snapshot_indexed_regs { int count; /* Number of dwords in the data */ } __packed; +struct kgsl_snapshot_indexed_regs_v2 { + u32 index_reg; /* Offset of the index register for this section */ + u32 data_reg; /* Offset of the data register for this section */ + u32 start; /* Starting index */ + u32 count; /* Number of dwords in the data */ + u32 pipe_id; /* Id of pipe, BV, Br etc */ + u32 slice_id; /* Slice ID to be dumped */ +} __packed; + /* MVC register sub-section header */ struct kgsl_snapshot_mvc_regs { int ctxt_id; @@ -253,6 +265,16 @@ struct kgsl_snapshot_mvc_regs_v2 { int location_id; } __packed; +struct kgsl_snapshot_mvc_regs_v3 { + u32 ctxt_id; + u32 cluster_id; + u32 pipe_id; + u32 location_id; + u32 slice_id; + u32 sp_id; + u32 usptp_id; +} __packed; + /* Istore sub-section header */ struct kgsl_snapshot_istore { int count; /* Number of instructions in the istore */ @@ -316,6 +338,17 @@ struct kgsl_snapshot_shader_v2 { u32 size; /* Number of dwords in the dump */ } __packed; +struct kgsl_snapshot_shader_v3 { + u32 type; /* SP/TP statetype */ + u32 slice_id; /* Slice ID */ + u32 sp_index; /* SP/TP index */ + u32 usptp; /* USPTP index */ + u32 pipe_id; /* Pipe id */ + u32 location; /* Location value */ + u32 ctxt_id; /* Context ID */ + u32 size; /* Number of dwords in the dump */ +} __packed; + #define TRACE_BUF_NUM_SIG 4 /** From 6ed1229fd32500de7183eef64052446504df7e7d Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 13 Nov 2023 11:20:59 -0800 Subject: [PATCH 0583/1016] kgsl: gen8: Add support for GMU virtual register bank This change ports commit e4cc41d5da1a ("kgsl: gmu: Add support for GMU virtual register bank") from gen7 to gen8. Change-Id: I4308fb1b1d40264a2bf036adf46cc3481fdefcc1 Signed-off-by: Hareesh Gundu --- adreno_gen8_gmu.c | 4 +++ adreno_gen8_gmu.h | 2 ++ adreno_gen8_gmu_snapshot.c | 2 ++ adreno_gen8_hwsched.c | 53 ++++++++++++++++++++++++++++++-------- gen8_reg.h | 1 + 5 files changed, 51 insertions(+), 11 deletions(-) diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 3b8d6269c1..554d0c3cfe 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -882,6 +882,10 @@ void gen8_gmu_register_config(struct adreno_device *adreno_dev) */ kgsl_regwrite(device, GEN8_GBIF_HALT, BIT(3)); + /* Set vrb address before starting GMU */ + if (!IS_ERR_OR_NULL(gmu->vrb)) + gmu_core_regwrite(device, GEN8_GMUCX_GENERAL_11, gmu->vrb->gmuaddr); + /* Set the log wptr index */ gmu_core_regwrite(device, GEN8_GMUCX_GENERAL_9, gmu->log_wptr_retention); diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index 5b2c260a4c..19d90058fc 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -53,6 +53,8 @@ struct gen8_gmu_device { const struct firmware *fw_image; struct kgsl_memdesc *dump_mem; struct kgsl_memdesc *gmu_log; + /** @vrb: GMU virtual register bank memory */ + struct kgsl_memdesc *vrb; /** @gmu_init_scratch: Memory to store the initial HFI messages */ struct kgsl_memdesc *gmu_init_scratch; /** @gpu_boot_scratch: Memory to store the bootup HFI messages */ diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index d005d0ec30..d578fcfe06 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -128,6 +128,8 @@ static void gen8_gmu_snapshot_memories(struct kgsl_device *device, desc.type = SNAPSHOT_GMU_MEM_DEBUG; else if ((md == gmu->gmu_init_scratch) || (md == gmu->gpu_boot_scratch)) desc.type = SNAPSHOT_GMU_MEM_WARMBOOT; + else if (md == gmu->vrb) + desc.type = SNAPSHOT_GMU_MEM_VRB; else desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK; diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 43f13a1b21..029fda1acb 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -858,6 +858,24 @@ static int gen8_gmu_warmboot_init(struct adreno_device *adreno_dev) return ret; } +static int gen8_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + /* GMU Virtual register bank */ + if (IS_ERR_OR_NULL(gmu->vrb)) { + gmu->vrb = gen8_reserve_gmu_kernel_block(gmu, 0, GMU_VRB_SIZE, + GMU_NONCACHED_KERNEL, 0); + + /* Populate size of the virtual register bank */ + if (!IS_ERR(gmu->vrb)) + gmu_core_set_vrb_register(gmu->vrb->hostptr, + VRB_SIZE_IDX, gmu->vrb->size >> 2); + } + + return PTR_ERR_OR_ZERO(gmu->vrb); +} + static int gen8_hwsched_gmu_init(struct adreno_device *adreno_dev) { int ret; @@ -874,6 +892,10 @@ static int gen8_hwsched_gmu_init(struct adreno_device *adreno_dev) if (ret) return ret; + ret = gen8_hwsched_gmu_memory_init(adreno_dev); + if (ret) + return ret; + return gen8_hwsched_hfi_init(adreno_dev); } @@ -1813,6 +1835,15 @@ int gen8_hwsched_add_to_minidump(struct adreno_device *adreno_dev) return ret; } + if (!IS_ERR_OR_NULL(gen8_dev->gmu.vrb)) { + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, + KGSL_GMU_VRB_ENTRY, + gen8_dev->gmu.vrb->hostptr, + gen8_dev->gmu.vrb->size); + if (ret) + return ret; + } + /* Dump HFI hwsched global mem alloc entries */ for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i]; @@ -1820,12 +1851,12 @@ int gen8_hwsched_add_to_minidump(struct adreno_device *adreno_dev) u32 rb_id = 0; if (!hfi_get_minidump_string(entry->desc.mem_kind, - &hfi_minidump_str[0], - sizeof(hfi_minidump_str), &rb_id)) { + &hfi_minidump_str[0], + sizeof(hfi_minidump_str), &rb_id)) { ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, - hfi_minidump_str, - entry->md->hostptr, - entry->md->size); + hfi_minidump_str, + entry->md->hostptr, + entry->md->size); if (ret) return ret; } @@ -1833,18 +1864,18 @@ int gen8_hwsched_add_to_minidump(struct adreno_device *adreno_dev) if (!IS_ERR_OR_NULL(hw_hfi->big_ib)) { ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, - KGSL_HFI_BIG_IB_ENTRY, - hw_hfi->big_ib->hostptr, - hw_hfi->big_ib->size); + KGSL_HFI_BIG_IB_ENTRY, + hw_hfi->big_ib->hostptr, + hw_hfi->big_ib->size); if (ret) return ret; } if (!IS_ERR_OR_NULL(hw_hfi->big_ib_recurring)) ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, - KGSL_HFI_BIG_IB_REC_ENTRY, - hw_hfi->big_ib_recurring->hostptr, - hw_hfi->big_ib_recurring->size); + KGSL_HFI_BIG_IB_REC_ENTRY, + hw_hfi->big_ib_recurring->hostptr, + hw_hfi->big_ib_recurring->size); return ret; } diff --git a/gen8_reg.h b/gen8_reg.h index 37dfe11361..1985e758a6 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -1539,6 +1539,7 @@ #define GEN8_GMUCX_GENERAL_8 0x1f9c8 #define GEN8_GMUCX_GENERAL_9 0x1f9c9 #define GEN8_GMUCX_GENERAL_10 0x1f9ca +#define GEN8_GMUCX_GENERAL_11 0x1f9cb /* Always on registers */ #define GEN8_GMUAO_AO_INTERRUPT_EN 0x23b03 From 8e7439662088986fe1fbf4c3520e68c83c995908 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 13 Nov 2023 11:22:29 -0800 Subject: [PATCH 0584/1016] kgsl: gen8: Add support for GMU tracepoints logging This change ports commit e2dc259daa3f ("kgsl: hwsched: Add support for GMU tracepoints logging") to gen8. Change-Id: I53f7d1c1a338911ffa0727477277904269596021 Signed-off-by: Hareesh Gundu --- adreno_gen8_gmu.c | 5 +++++ adreno_gen8_gmu.h | 2 ++ adreno_gen8_gmu_snapshot.c | 2 ++ adreno_gen8_hwsched.c | 36 ++++++++++++++++++++++++++++++++---- adreno_gen8_hwsched_hfi.c | 13 ++++++++++--- 5 files changed, 51 insertions(+), 7 deletions(-) diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 554d0c3cfe..efcecb58bf 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -279,6 +279,8 @@ int gen8_gmu_device_start(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + gmu_core_reset_trace_header(&gmu->trace); + gmu_ao_sync_event(adreno_dev); /* Bring GMU out of reset */ @@ -2462,6 +2464,9 @@ int gen8_gmu_probe(struct kgsl_device *device, gmu->log_stream_enable = false; gmu->log_group_mask = 0x3; + /* Initialize to zero to detect trace packet loss */ + gmu->trace.seq_num = 0; + /* Disabled by default */ gmu->stats_enable = false; /* Set default to CM3 busy cycles countable */ diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index 19d90058fc..1368e883c8 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -55,6 +55,8 @@ struct gen8_gmu_device { struct kgsl_memdesc *gmu_log; /** @vrb: GMU virtual register bank memory */ struct kgsl_memdesc *vrb; + /** @trace: gmu trace container */ + struct kgsl_gmu_trace trace; /** @gmu_init_scratch: Memory to store the initial HFI messages */ struct kgsl_memdesc *gmu_init_scratch; /** @gpu_boot_scratch: Memory to store the bootup HFI messages */ diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index d578fcfe06..4d6250efb5 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -130,6 +130,8 @@ static void gen8_gmu_snapshot_memories(struct kgsl_device *device, desc.type = SNAPSHOT_GMU_MEM_WARMBOOT; else if (md == gmu->vrb) desc.type = SNAPSHOT_GMU_MEM_VRB; + else if (md == gmu->trace.md) + desc.type = SNAPSHOT_GMU_MEM_TRACE; else desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK; diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 029fda1acb..4d3054e29a 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -867,13 +867,32 @@ static int gen8_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) gmu->vrb = gen8_reserve_gmu_kernel_block(gmu, 0, GMU_VRB_SIZE, GMU_NONCACHED_KERNEL, 0); + if (IS_ERR(gmu->vrb)) + return PTR_ERR(gmu->vrb); + /* Populate size of the virtual register bank */ - if (!IS_ERR(gmu->vrb)) - gmu_core_set_vrb_register(gmu->vrb->hostptr, - VRB_SIZE_IDX, gmu->vrb->size >> 2); + gmu_core_set_vrb_register(gmu->vrb->hostptr, VRB_SIZE_IDX, + gmu->vrb->size >> 2); } - return PTR_ERR_OR_ZERO(gmu->vrb); + /* GMU trace log */ + if (IS_ERR_OR_NULL(gmu->trace.md)) { + gmu->trace.md = gen8_reserve_gmu_kernel_block(gmu, 0, + GMU_TRACE_SIZE, GMU_NONCACHED_KERNEL, 0); + + if (IS_ERR(gmu->trace.md)) + return PTR_ERR(gmu->trace.md); + + /* Pass trace buffer address to GMU through the VRB */ + gmu_core_set_vrb_register(gmu->vrb->hostptr, + VRB_TRACE_BUFFER_ADDR_IDX, + gmu->trace.md->gmuaddr); + + /* Initialize the GMU trace buffer header */ + gmu_core_trace_header_init(&gmu->trace); + } + + return 0; } static int gen8_hwsched_gmu_init(struct adreno_device *adreno_dev) @@ -1844,6 +1863,15 @@ int gen8_hwsched_add_to_minidump(struct adreno_device *adreno_dev) return ret; } + if (!IS_ERR_OR_NULL(gen8_dev->gmu.trace.md)) { + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, + KGSL_GMU_TRACE_ENTRY, + gen8_dev->gmu.trace.md->hostptr, + gen8_dev->gmu.trace.md->size); + if (ret) + return ret; + } + /* Dump HFI hwsched global mem alloc entries */ for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i]; diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index a162156be6..ab29ae7a37 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -2641,17 +2641,24 @@ static int hfi_f2h_main(void *arg) { struct adreno_device *adreno_dev = arg; struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); while (!kthread_should_stop()) { wait_event_interruptible(hfi->f2h_wq, kthread_should_stop() || - (!(is_queue_empty(adreno_dev, HFI_MSG_ID) && - is_queue_empty(adreno_dev, HFI_DBG_ID)) && - (hfi->irq_mask & HFI_IRQ_MSGQ_MASK))); + /* If msgq irq is enabled and msgq has messages to process */ + (((hfi->irq_mask & HFI_IRQ_MSGQ_MASK) && + !is_queue_empty(adreno_dev, HFI_MSG_ID)) || + /* Trace buffer has messages to process */ + !gmu_core_is_trace_empty(gmu->trace.md->hostptr) || + /* Dbgq has messages to process */ + !is_queue_empty(adreno_dev, HFI_DBG_ID))); if (kthread_should_stop()) break; gen8_hwsched_process_msgq(adreno_dev); + gmu_core_process_trace_data(KGSL_DEVICE(adreno_dev), + &gmu->pdev->dev, &gmu->trace); gen8_hwsched_process_dbgq(adreno_dev, true); } From ddf0d30ba15ad6ac3b4ba8d69be098f874218d2e Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 14 Nov 2023 23:39:20 +0530 Subject: [PATCH 0585/1016] kgsl: gen8: Enable fast bus hint for gen8_0_0 GPU Enable fast bus hint for gen8_0_0 GPU to increase IB vote on high ddr stall. Change-Id: I354e958a088c53907af88e187e6071fc99332a59 Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 1 + 1 file changed, 1 insertion(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 6f82c1443d..a254eacee5 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2655,6 +2655,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .highest_bank_bit = 16, .gmu_hub_clk_freq = 200000000, .gen8_snapshot_block_list = &gen8_0_0_snapshot_block_list, + .fast_bus_hint = true, }; static const struct adreno_gpu_core *adreno_gpulist[] = { From ab66719d57e040e5c6c378ad06adca1710934e64 Mon Sep 17 00:00:00 2001 From: Kaushal Sanadhya Date: Wed, 11 Oct 2023 16:34:43 +0530 Subject: [PATCH 0586/1016] kgsl: gen7: Add support for Gen7_14_0 gpu Add support for Gen7_14_0 gpu. Change-Id: Icc95af5b7721459ec2b830717f4072dc027fc078 Signed-off-by: Kaushal Sanadhya --- adreno-gpulist.h | 34 +++ adreno.c | 6 +- adreno.h | 4 +- adreno_gen7.c | 24 +- adreno_gen7_14_0_snapshot.h | 462 ++++++++++++++++++++++++++++++++++++ adreno_gen7_snapshot.c | 46 +++- 6 files changed, 557 insertions(+), 19 deletions(-) create mode 100644 adreno_gen7_14_0_snapshot.h diff --git a/adreno-gpulist.h b/adreno-gpulist.h index a254eacee5..b4008e7954 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2358,6 +2358,39 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_9_1 = { .fast_bus_hint = true, }; +extern const struct gen7_snapshot_block_list gen7_14_0_snapshot_block_list; + +static const struct adreno_gen7_core adreno_gpu_core_gen7_14_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN7_14_0, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen7-14-0", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | + ADRENO_PREEMPTION | ADRENO_IFPC | ADRENO_BCL | ADRENO_ACD, + .gpudev = &adreno_gen7_gmu_gpudev.base, + .perfcounters = &adreno_gen7_perfcounters, + .uche_gmem_alignment = 0, + .gmem_size = SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_4M, + }, + .sqefw_name = "gen70e00_sqe.fw", + .gmufw_name = "gmu_gen70e00.bin", + .zap_name = "gen70e00_zap.mbn", + .hwcg = gen7_0_0_hwcg_regs, + .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .ao_hwcg = gen7_0_0_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), + .gbif = gen7_0_0_gbif_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen7_0_0_protected_regs, + .highest_bank_bit = 15, + .gen7_snapshot_block_list = &gen7_14_0_snapshot_block_list, + .preempt_level = 1, + .fast_bus_hint = false, +}; + static const struct kgsl_regmap_list a663_hwcg_regs[] = { {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, @@ -2705,6 +2738,7 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen7_4_0.base, &adreno_gpu_core_gen7_9_0.base, &adreno_gpu_core_gen7_9_1.base, + &adreno_gpu_core_gen7_14_0.base, &adreno_gpu_core_gen8_0_0.base, }; diff --git a/adreno.c b/adreno.c index c7e8cd434d..7feae3d9b9 100644 --- a/adreno.c +++ b/adreno.c @@ -1296,10 +1296,12 @@ int adreno_device_probe(struct platform_device *pdev, /* * Force no write allocate for A5x, A6x and all gen7 targets - * except gen_7_9_x. gen_7_9_x uses write allocate + * except gen_7_9_x and gen_7_14_0. gen_7_9_x and gen_7_14_0 + * use write allocate. */ if (adreno_is_a5xx(adreno_dev) || adreno_is_a6xx(adreno_dev) || - (adreno_is_gen7(adreno_dev) && !adreno_is_gen7_9_x(adreno_dev))) + (adreno_is_gen7(adreno_dev) && !adreno_is_gen7_9_x(adreno_dev) && + !adreno_is_gen7_14_0(adreno_dev))) kgsl_mmu_set_feature(device, KGSL_MMU_FORCE_LLCC_NWA); /* Bind the components before doing the KGSL platform probe. */ diff --git a/adreno.h b/adreno.h index af056e8377..90838bfa85 100644 --- a/adreno.h +++ b/adreno.h @@ -239,6 +239,7 @@ enum adreno_gpurev { ADRENO_REV_GEN7_4_0 = ADRENO_GPUREV_VALUE(7, 4, 0), ADRENO_REV_GEN7_9_0 = ADRENO_GPUREV_VALUE(7, 9, 0), ADRENO_REV_GEN7_9_1 = ADRENO_GPUREV_VALUE(7, 9, 1), + ADRENO_REV_GEN7_14_0 = ADRENO_GPUREV_VALUE(7, 14, 0), ADRENO_REV_GEN8_0_0 = ADRENO_GPUREV_VALUE(8, 0, 0), }; @@ -1259,6 +1260,7 @@ ADRENO_TARGET(gen7_2_1, ADRENO_REV_GEN7_2_1) ADRENO_TARGET(gen7_4_0, ADRENO_REV_GEN7_4_0) ADRENO_TARGET(gen7_9_0, ADRENO_REV_GEN7_9_0) ADRENO_TARGET(gen7_9_1, ADRENO_REV_GEN7_9_1) +ADRENO_TARGET(gen7_14_0, ADRENO_REV_GEN7_14_0) static inline int adreno_is_gen7_9_x(struct adreno_device *adreno_dev) { @@ -1274,7 +1276,7 @@ static inline int adreno_is_gen7_0_x_family(struct adreno_device *adreno_dev) static inline int adreno_is_gen7_2_x_family(struct adreno_device *adreno_dev) { return adreno_is_gen7_2_0(adreno_dev) || adreno_is_gen7_2_1(adreno_dev) || - adreno_is_gen7_9_x(adreno_dev); + adreno_is_gen7_9_x(adreno_dev) || adreno_is_gen7_14_0(adreno_dev); } /* diff --git a/adreno_gen7.c b/adreno_gen7.c index 1f4fe35b5d..93127f2fe2 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -529,7 +529,7 @@ static void gen7_patch_pwrup_reglist(struct adreno_device *adreno_dev) u32 *dest = ptr + sizeof(*lock); /* Static IFPC-only registers */ - if (adreno_is_gen7_0_x_family(adreno_dev)) { + if (adreno_is_gen7_0_x_family(adreno_dev) || adreno_is_gen7_14_0(adreno_dev)) { reglist[items].regs = gen7_0_0_ifpc_pwrup_reglist; reglist[items].count = ARRAY_SIZE(gen7_0_0_ifpc_pwrup_reglist); } else { @@ -547,7 +547,7 @@ static void gen7_patch_pwrup_reglist(struct adreno_device *adreno_dev) } /* Static IFPC + preemption registers */ - if (adreno_is_gen7_0_x_family(adreno_dev)) { + if (adreno_is_gen7_0_x_family(adreno_dev) || adreno_is_gen7_14_0(adreno_dev)) { reglist[items].regs = gen7_0_0_pwrup_reglist; reglist[items].count = ARRAY_SIZE(gen7_0_0_pwrup_reglist); } else { @@ -825,8 +825,12 @@ int gen7_start(struct adreno_device *adreno_dev) _llc_gpuhtw_slice_activate(adreno_dev); kgsl_regwrite(device, GEN7_CP_APRIV_CNTL, GEN7_BR_APRIV_DEFAULT); - kgsl_regwrite(device, GEN7_CP_BV_APRIV_CNTL, GEN7_APRIV_DEFAULT); - kgsl_regwrite(device, GEN7_CP_LPAC_APRIV_CNTL, GEN7_APRIV_DEFAULT); + + /* gen7_14_0 does not have BV and LPAC hence skip regwrite */ + if (!adreno_is_gen7_14_0(adreno_dev)) { + kgsl_regwrite(device, GEN7_CP_BV_APRIV_CNTL, GEN7_APRIV_DEFAULT); + kgsl_regwrite(device, GEN7_CP_LPAC_APRIV_CNTL, GEN7_APRIV_DEFAULT); + } /* Marking AQE Instruction cache fetches as privileged */ if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) @@ -835,6 +839,9 @@ int gen7_start(struct adreno_device *adreno_dev) if (adreno_is_gen7_9_x(adreno_dev)) kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), FIELD_PREP(GENMASK(31, 29), 1)); + else if (adreno_is_gen7_14_0(adreno_dev)) + kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), + FIELD_PREP(GENMASK(31, 29), 2)); /* * CP Icache prefetch brings no benefit on few gen7 variants because of @@ -1082,9 +1089,12 @@ int gen7_rb_start(struct adreno_device *adreno_dev) kgsl_regwrite(device, GEN7_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr)); kgsl_regwrite(device, GEN7_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr)); - addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_rptr); - kgsl_regwrite(device, GEN7_CP_BV_RB_RPTR_ADDR_LO, lower_32_bits(addr)); - kgsl_regwrite(device, GEN7_CP_BV_RB_RPTR_ADDR_HI, upper_32_bits(addr)); + /* gen7_14_0 does not have BV hence skip regwrite */ + if (!adreno_is_gen7_14_0(adreno_dev)) { + addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_rptr); + kgsl_regwrite(device, GEN7_CP_BV_RB_RPTR_ADDR_LO, lower_32_bits(addr)); + kgsl_regwrite(device, GEN7_CP_BV_RB_RPTR_ADDR_HI, upper_32_bits(addr)); + } kgsl_regwrite(device, GEN7_CP_RB_CNTL, GEN7_CP_RB_CNTL_DEFAULT); diff --git a/adreno_gen7_14_0_snapshot.h b/adreno_gen7_14_0_snapshot.h new file mode 100644 index 0000000000..cdacf04192 --- /dev/null +++ b/adreno_gen7_14_0_snapshot.h @@ -0,0 +1,462 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#ifndef __ADRENO_GEN7_14_0_SNAPSHOT_H +#define __ADRENO_GEN7_14_0_SNAPSHOT_H + +#include "adreno_gen7_snapshot.h" + +static const u32 gen7_14_0_debugbus_blocks[] = { + DEBUGBUS_CP_0_0, + DEBUGBUS_CP_0_1, + DEBUGBUS_RBBM, + DEBUGBUS_HLSQ, + DEBUGBUS_UCHE_0, + DEBUGBUS_TESS_BR, + DEBUGBUS_PC_BR, + DEBUGBUS_VFDP_BR, + DEBUGBUS_VPC_BR, + DEBUGBUS_TSE_BR, + DEBUGBUS_RAS_BR, + DEBUGBUS_VSC, + DEBUGBUS_COM_0, + DEBUGBUS_LRZ_BR, + DEBUGBUS_UFC_0, + DEBUGBUS_UFC_1, + DEBUGBUS_GMU_GX, + DEBUGBUS_DBGC, + DEBUGBUS_GPC_BR, + DEBUGBUS_LARC, + DEBUGBUS_HLSQ_SPTP, + DEBUGBUS_RB_0, + DEBUGBUS_RB_1, + DEBUGBUS_UCHE_WRAPPER, + DEBUGBUS_CCU_0, + DEBUGBUS_CCU_1, + DEBUGBUS_VFD_BR_0, + DEBUGBUS_VFD_BR_1, + DEBUGBUS_VFD_BR_2, + DEBUGBUS_VFD_BR_3, + DEBUGBUS_USP_0, + DEBUGBUS_USP_1, + DEBUGBUS_TP_0, + DEBUGBUS_TP_1, + DEBUGBUS_TP_2, + DEBUGBUS_TP_3, + DEBUGBUS_USPTP_0, + DEBUGBUS_USPTP_1, + DEBUGBUS_USPTP_2, + DEBUGBUS_USPTP_3, +}; + +static const struct gen7_sel_reg gen7_14_0_rb_rac_sel = { + .host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST, + .cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD, + .val = 0x0, +}; + +static const struct gen7_sel_reg gen7_14_0_rb_rbp_sel = { + .host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST, + .cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD, + .val = 0x9, +}; + +static const u32 gen7_14_0_post_crashdumper_registers[] = { + 0x00535, 0x00535, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_post_crashdumper_registers), 8)); + +static const u32 gen7_14_0_gpu_registers[] = { + 0x00000, 0x00000, 0x00002, 0x00002, 0x00011, 0x00012, 0x00016, 0x0001b, + 0x0001f, 0x00032, 0x00038, 0x0003c, 0x00042, 0x00042, 0x00044, 0x00044, + 0x00047, 0x00047, 0x00049, 0x0004a, 0x0004c, 0x0004c, 0x00050, 0x00050, + 0x00056, 0x00056, 0x00073, 0x00075, 0x000ad, 0x000ae, 0x000b0, 0x000b0, + 0x000b4, 0x000b4, 0x000b8, 0x000b8, 0x000bc, 0x000bc, 0x000c0, 0x000c0, + 0x000c4, 0x000c4, 0x000c8, 0x000c8, 0x000cc, 0x000cc, 0x000d0, 0x000d0, + 0x000d4, 0x000d4, 0x000d8, 0x000d8, 0x000dc, 0x000dc, 0x000e0, 0x000e0, + 0x000e4, 0x000e4, 0x000e8, 0x000e8, 0x000ec, 0x000ec, 0x000f0, 0x000f0, + 0x000f4, 0x000f4, 0x000f8, 0x000f8, 0x00100, 0x00100, 0x00104, 0x0010b, + 0x0010f, 0x0011d, 0x0012f, 0x0012f, 0x00200, 0x0020d, 0x00215, 0x00243, + 0x00260, 0x00268, 0x00272, 0x00274, 0x00286, 0x00286, 0x0028a, 0x0028a, + 0x0028c, 0x0028c, 0x00300, 0x00401, 0x00500, 0x00500, 0x00507, 0x0050b, + 0x0050f, 0x0050f, 0x00511, 0x00511, 0x00533, 0x00534, 0x00540, 0x00555, + 0x00564, 0x00567, 0x00800, 0x00808, 0x00810, 0x00813, 0x00820, 0x00821, + 0x00823, 0x00827, 0x00830, 0x00834, 0x00840, 0x00841, 0x00843, 0x00847, + 0x0084f, 0x00886, 0x008a0, 0x008ab, 0x008c0, 0x008c0, 0x008c4, 0x008c5, + 0x008d0, 0x008dd, 0x008f0, 0x008f3, 0x00900, 0x00903, 0x00908, 0x00911, + 0x00928, 0x0093e, 0x00942, 0x0094d, 0x00980, 0x00984, 0x0098d, 0x0098f, + 0x009b0, 0x009b4, 0x009c2, 0x009c9, 0x009ce, 0x009d7, 0x00a00, 0x00a00, + 0x00a02, 0x00a03, 0x00a10, 0x00a4f, 0x00a67, 0x00a6c, 0x00a9c, 0x00a9f, + 0x00c00, 0x00c00, 0x00c02, 0x00c04, 0x00c06, 0x00c06, 0x00c10, 0x00cd9, + 0x00ce0, 0x00d0c, 0x00df0, 0x00df4, 0x00e01, 0x00e02, 0x00e07, 0x00e0e, + 0x00e10, 0x00e12, 0x00e17, 0x00e17, 0x00e19, 0x00e19, 0x00e1b, 0x00e2b, + 0x00e30, 0x00e32, 0x00e38, 0x00e3c, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_gpu_registers), 8)); + +static const u32 gen7_14_0_dbgc_registers[] = { + 0x00600, 0x0061c, 0x0061e, 0x00634, 0x00640, 0x0065a, 0x00679, 0x0067a, + 0x00699, 0x00699, 0x0069b, 0x0069e, 0x18400, 0x1841c, 0x1841e, 0x18434, + 0x18440, 0x1845c, 0x18479, 0x1847c, 0x18580, 0x18581, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_dbgc_registers), 8)); + +static const u32 gen7_14_0_cx_misc_registers[] = { + 0x27800, 0x27800, 0x27810, 0x27814, 0x27820, 0x27824, 0x27828, 0x2782a, + 0x27832, 0x27857, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_cx_misc_registers), 8)); + +static const u32 gen7_14_0_gmu_registers[] = { + 0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403, + 0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03, + 0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403, + 0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03, + 0x1f400, 0x1f40b, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507, + 0x1f509, 0x1f50b, 0x1f800, 0x1f804, 0x1f807, 0x1f808, 0x1f80b, 0x1f80c, + 0x1f80f, 0x1f80f, 0x1f811, 0x1f811, 0x1f813, 0x1f817, 0x1f819, 0x1f81c, + 0x1f824, 0x1f82a, 0x1f82d, 0x1f830, 0x1f840, 0x1f853, 0x1f860, 0x1f860, + 0x1f870, 0x1f879, 0x1f87f, 0x1f87f, 0x1f888, 0x1f889, 0x1f8a0, 0x1f8a2, + 0x1f8a4, 0x1f8af, 0x1f8c0, 0x1f8c1, 0x1f8c3, 0x1f8c4, 0x1f8d0, 0x1f8d0, + 0x1f8ec, 0x1f8ec, 0x1f8f0, 0x1f8f1, 0x1f910, 0x1f911, 0x1f920, 0x1f921, + 0x1f924, 0x1f925, 0x1f928, 0x1f929, 0x1f92c, 0x1f92d, 0x1f940, 0x1f940, + 0x1f942, 0x1f944, 0x1f948, 0x1f94a, 0x1f980, 0x1f981, 0x1f984, 0x1f986, + 0x1f992, 0x1f993, 0x1f996, 0x1f99e, 0x1f9c0, 0x1f9c0, 0x1f9c5, 0x1f9d4, + 0x1f9f1, 0x1f9f1, 0x1f9f8, 0x1f9fa, 0x1fa00, 0x1fa03, 0x20000, 0x20005, + 0x20008, 0x20009, 0x20010, 0x20012, 0x20018, 0x20018, 0x20020, 0x20024, + 0x20030, 0x20031, 0x23801, 0x23801, 0x23803, 0x23803, 0x23805, 0x23805, + 0x23807, 0x23807, 0x23809, 0x23809, 0x2380b, 0x2380b, 0x2380d, 0x2380d, + 0x2380f, 0x2380f, 0x23811, 0x23811, 0x23813, 0x23813, 0x23815, 0x23815, + 0x23817, 0x23817, 0x23819, 0x23819, 0x2381b, 0x2381b, 0x2381d, 0x2381d, + 0x2381f, 0x23820, 0x23822, 0x23822, 0x23824, 0x23824, 0x23826, 0x23826, + 0x23828, 0x23828, 0x2382a, 0x2382a, 0x2382c, 0x2382c, 0x2382e, 0x2382e, + 0x23830, 0x23830, 0x23832, 0x23832, 0x23834, 0x23834, 0x23836, 0x23836, + 0x23838, 0x23838, 0x2383a, 0x2383a, 0x2383c, 0x2383c, 0x2383e, 0x2383e, + 0x23840, 0x23847, 0x23b00, 0x23b01, 0x23b03, 0x23b03, 0x23b05, 0x23b0e, + 0x23b10, 0x23b13, 0x23b15, 0x23b16, 0x23b20, 0x23b20, 0x23b28, 0x23b28, + 0x23b30, 0x23b30, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_gmu_registers), 8)); + +static const u32 gen7_14_0_gmu_gx_registers[] = { + 0x1a802, 0x1a802, 0x1a883, 0x1a884, 0x1a900, 0x1a92b, 0x1a940, 0x1a940, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_gmu_gx_registers), 8)); + +static const u32 gen7_14_0_rscc_registers[] = { + 0x14000, 0x14034, 0x14036, 0x14036, 0x14040, 0x14042, 0x14044, 0x14045, + 0x14047, 0x14047, 0x14080, 0x14084, 0x14089, 0x1408c, 0x14091, 0x14094, + 0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac, 0x14100, 0x14104, + 0x14114, 0x14119, 0x14124, 0x14132, 0x14154, 0x1416b, 0x14340, 0x14341, + 0x14344, 0x14344, 0x14346, 0x1437c, 0x143f0, 0x143f8, 0x143fa, 0x143fe, + 0x14400, 0x14404, 0x14406, 0x1440a, 0x1440c, 0x14410, 0x14412, 0x14416, + 0x14418, 0x1441c, 0x1441e, 0x14422, 0x14424, 0x14424, 0x14498, 0x144a0, + 0x144a2, 0x144a6, 0x144a8, 0x144ac, 0x144ae, 0x144b2, 0x144b4, 0x144b8, + 0x144ba, 0x144be, 0x144c0, 0x144c4, 0x144c6, 0x144ca, 0x144cc, 0x144cc, + 0x14540, 0x14548, 0x1454a, 0x1454e, 0x14550, 0x14554, 0x14556, 0x1455a, + 0x1455c, 0x14560, 0x14562, 0x14566, 0x14568, 0x1456c, 0x1456e, 0x14572, + 0x14574, 0x14574, 0x145e8, 0x145f0, 0x145f2, 0x145f6, 0x145f8, 0x145fc, + 0x145fe, 0x14602, 0x14604, 0x14608, 0x1460a, 0x1460e, 0x14610, 0x14614, + 0x14616, 0x1461a, 0x1461c, 0x1461c, 0x14690, 0x14698, 0x1469a, 0x1469e, + 0x146a0, 0x146a4, 0x146a6, 0x146aa, 0x146ac, 0x146b0, 0x146b2, 0x146b6, + 0x146b8, 0x146bc, 0x146be, 0x146c2, 0x146c4, 0x146c4, 0x14738, 0x14740, + 0x14742, 0x14746, 0x14748, 0x1474c, 0x1474e, 0x14752, 0x14754, 0x14758, + 0x1475a, 0x1475e, 0x14760, 0x14764, 0x14766, 0x1476a, 0x1476c, 0x1476c, + 0x147e0, 0x147e8, 0x147ea, 0x147ee, 0x147f0, 0x147f4, 0x147f6, 0x147fa, + 0x147fc, 0x14800, 0x14802, 0x14806, 0x14808, 0x1480c, 0x1480e, 0x14812, + 0x14814, 0x14814, 0x14888, 0x14890, 0x14892, 0x14896, 0x14898, 0x1489c, + 0x1489e, 0x148a2, 0x148a4, 0x148a8, 0x148aa, 0x148ae, 0x148b0, 0x148b4, + 0x148b6, 0x148ba, 0x148bc, 0x148bc, 0x14930, 0x14938, 0x1493a, 0x1493e, + 0x14940, 0x14944, 0x14946, 0x1494a, 0x1494c, 0x14950, 0x14952, 0x14956, + 0x14958, 0x1495c, 0x1495e, 0x14962, 0x14964, 0x14964, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_rscc_registers), 8)); + +static const u32 gen7_14_0_cpr_registers[] = { + 0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c, + 0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850, + 0x26880, 0x2688e, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee, + 0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f, + 0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274ad, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_cpr_registers), 8)); + +static const u32 gen7_14_0_gpucc_registers[] = { + 0x24000, 0x2400f, 0x24400, 0x2440f, 0x24c00, 0x24cff, 0x24800, 0x24805, + 0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, + 0x26400, 0x26405, 0x26414, 0x2641d, 0x2642a, 0x26430, 0x26432, 0x26433, + 0x26441, 0x2644b, 0x2644d, 0x26457, 0x26466, 0x26468, 0x26478, 0x2647a, + 0x26489, 0x2648a, 0x2649c, 0x2649e, 0x264a0, 0x264a4, 0x264c5, 0x264c7, + 0x264d6, 0x264d8, 0x264e8, 0x264e9, 0x264f9, 0x264fc, 0x2651c, 0x2651e, + 0x26540, 0x26576, 0x26600, 0x26616, 0x26620, 0x2662d, 0x26630, 0x26631, + 0x26635, 0x26635, 0x26637, 0x26637, 0x2663a, 0x2663a, 0x26642, 0x26642, + 0x26656, 0x26658, 0x2665b, 0x2665d, 0x2665f, 0x26662, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_gpucc_registers), 8)); + +static const u32 gen7_14_0_noncontext_pipe_br_registers[] = { + 0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b, + 0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640, + 0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a, + 0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16, + 0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31, + 0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79, + 0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f, + 0x0a630, 0x0a631, 0x0a638, 0x0a638, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_noncontext_pipe_br_registers), 8)); + +static const u32 gen7_14_0_noncontext_pipe_lpac_registers[] = { + 0x00f80, 0x00f80, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_noncontext_pipe_lpac_registers), 8)); + +static const u32 gen7_14_0_noncontext_rb_rac_pipe_br_registers[] = { + 0x08e10, 0x08e1c, 0x08e20, 0x08e25, 0x08e51, 0x08e54, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_noncontext_rb_rac_pipe_br_registers), 8)); + +static const u32 gen7_14_0_noncontext_rb_rbp_pipe_br_registers[] = { + 0x08e01, 0x08e01, 0x08e04, 0x08e04, 0x08e06, 0x08e09, 0x08e0c, 0x08e0c, + 0x08e28, 0x08e28, 0x08e2c, 0x08e35, 0x08e3b, 0x08e3f, 0x08e50, 0x08e50, + 0x08e5b, 0x08e5d, 0x08e5f, 0x08e5f, 0x08e61, 0x08e61, 0x08e63, 0x08e65, + 0x08e68, 0x08e68, 0x08e70, 0x08e79, 0x08e80, 0x08e8f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_noncontext_rb_rbp_pipe_br_registers), 8)); + +static const u32 gen7_14_0_pc_cluster_fe_pipe_br_registers[] = { + 0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886, + 0x09970, 0x09972, 0x09b00, 0x09b08, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_pc_cluster_fe_pipe_br_registers), 8)); + +static const u32 gen7_14_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers[] = { + 0x0aa40, 0x0aabf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers), 8)); + +static const u32 gen7_14_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers[] = { + 0x0aa40, 0x0aabf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers), 8)); + +static const u32 gen7_14_0_non_context_tpl1_pipe_none_usptp_registers[] = { + 0x0b602, 0x0b602, 0x0b604, 0x0b604, 0x0b608, 0x0b60c, 0x0b60f, 0x0b621, + 0x0b630, 0x0b633, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_non_context_tpl1_pipe_none_usptp_registers), 8)); + +static const u32 gen7_14_0_non_context_tpl1_pipe_br_usptp_registers[] = { + 0x0b600, 0x0b600, + UINT_MAX, UINT_MAX, +}; + +static const u32 gen7_14_0_tpl1_cluster_sp_vs_pipe_br_usptp_registers[] = { + 0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_tpl1_cluster_sp_vs_pipe_br_usptp_registers), 8)); + +static const u32 gen7_14_0_tpl1_cluster_sp_ps_pipe_br_usptp_registers[] = { + 0x0b180, 0x0b183, 0x0b190, 0x0b195, 0x0b2c0, 0x0b2d5, 0x0b300, 0x0b307, + 0x0b309, 0x0b309, 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_14_0_tpl1_cluster_sp_ps_pipe_br_usptp_registers), 8)); + +static struct gen7_cluster_registers gen7_14_0_clusters[] = { + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_14_0_noncontext_pipe_br_registers, }, + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_14_0_noncontext_rb_rac_pipe_br_registers, &gen7_14_0_rb_rac_sel, }, + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_14_0_noncontext_rb_rbp_pipe_br_registers, &gen7_14_0_rb_rbp_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_14_0_rb_rac_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_14_0_rb_rac_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_14_0_rb_rbp_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_14_0_rb_rbp_sel, }, + { CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_gras_cluster_gras_pipe_br_registers, }, + { CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_gras_cluster_gras_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_14_0_pc_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_14_0_pc_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vfd_cluster_fe_pipe_bv_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vfd_cluster_fe_pipe_bv_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_fe_pipe_br_registers, }, + { CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, }, + { CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, }, + { CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, }, + { CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, }, +}; + +static struct gen7_sptp_cluster_registers gen7_14_0_sptp_clusters[] = { + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, HLSQ_STATE, + gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers, 0xae00}, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, SP_TOP, + gen7_0_0_sp_noncontext_pipe_br_sp_top_registers, 0xae00}, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, USPTP, + gen7_0_0_sp_noncontext_pipe_br_usptp_registers, 0xae00}, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, + gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, + gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, + gen7_14_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + gen7_14_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800}, + { CLUSTER_NONE, TP0_NCTX_REG, PIPE_NONE, 0, USPTP, + gen7_14_0_non_context_tpl1_pipe_none_usptp_registers, 0xb600}, + { CLUSTER_NONE, TP0_NCTX_REG, PIPE_BR, 0, USPTP, + gen7_14_0_non_context_tpl1_pipe_br_usptp_registers, 0xb600}, + { CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen7_14_0_tpl1_cluster_sp_ps_pipe_br_usptp_registers, 0xb000}, + { CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + gen7_14_0_tpl1_cluster_sp_ps_pipe_br_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen7_14_0_tpl1_cluster_sp_ps_pipe_br_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + gen7_14_0_tpl1_cluster_sp_ps_pipe_br_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP, + gen7_14_0_tpl1_cluster_sp_ps_pipe_br_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP, + gen7_14_0_tpl1_cluster_sp_ps_pipe_br_usptp_registers, 0xb000}, +}; + +static struct gen7_shader_block gen7_14_0_shader_blocks[] = { + { TP0_TMO_DATA, 0x0200, 2, 2, PIPE_BR, USPTP }, + { TP0_SMO_DATA, 0x0080, 2, 2, PIPE_BR, USPTP }, + { TP0_MIPMAP_BASE_DATA, 0x03C0, 2, 2, PIPE_BR, USPTP }, + { SP_INST_DATA, 0x0800, 2, 2, PIPE_BR, USPTP }, + { SP_INST_DATA_1, 0x0800, 2, 2, PIPE_BR, USPTP }, + { SP_LB_0_DATA, 0x0800, 2, 2, PIPE_BR, USPTP }, + { SP_LB_1_DATA, 0x0800, 2, 2, PIPE_BR, USPTP }, + { SP_LB_2_DATA, 0x0800, 2, 2, PIPE_BR, USPTP }, + { SP_LB_3_DATA, 0x0800, 2, 2, PIPE_BR, USPTP }, + { SP_LB_4_DATA, 0x0800, 2, 2, PIPE_BR, USPTP }, + { SP_LB_5_DATA, 0x0800, 2, 2, PIPE_BR, USPTP }, + { SP_CB_RAM, 0x0390, 2, 2, PIPE_BR, USPTP }, + { SP_INST_TAG, 0x0090, 2, 2, PIPE_BR, USPTP }, + { SP_TMO_TAG, 0x0080, 2, 2, PIPE_BR, USPTP }, + { SP_SMO_TAG, 0x0080, 2, 2, PIPE_BR, USPTP }, + { SP_STATE_DATA, 0x0040, 2, 2, PIPE_BR, USPTP }, + { SP_HWAVE_RAM, 0x0100, 2, 2, PIPE_BR, USPTP }, + { SP_L0_INST_BUF, 0x0050, 2, 2, PIPE_BR, USPTP }, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CHUNK_CPS_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CVS_MISC_RAM, 0x0280, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CPS_MISC_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CPS_MISC_RAM_1, 0x0200, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CVS_MISC_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CPS_MISC_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_INST_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0064, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0064, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_INST_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_STPROC_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_BV_BE_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_DATAPATH_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_FRONTEND_META, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_INDIRECT_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_BACKEND_META, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE }, +}; + +static struct gen7_reg_list gen7_14_0_reg_list[] = { + { gen7_14_0_gpu_registers, NULL }, + { gen7_14_0_cx_misc_registers, NULL }, + { gen7_14_0_dbgc_registers, NULL }, + { NULL, NULL }, +}; + +static struct gen7_cp_indexed_reg gen7_14_0_cp_indexed_reg_list[] = { + { GEN7_CP_SQE_STAT_ADDR, GEN7_CP_SQE_STAT_DATA, 0x40}, + { GEN7_CP_DRAW_STATE_ADDR, GEN7_CP_DRAW_STATE_DATA, 0x100}, + { GEN7_CP_ROQ_DBG_ADDR, GEN7_CP_ROQ_DBG_DATA, 0x800}, + { GEN7_CP_SQE_UCODE_DBG_ADDR, GEN7_CP_SQE_UCODE_DBG_DATA, 0x8000}, +}; + +static const u32 *gen7_14_0_external_core_regs[] = { + gen7_14_0_gpucc_registers, + gen7_14_0_cpr_registers, +}; +#endif /*_ADRENO_GEN7_14_0_SNAPSHOT_H */ diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 2fd728852c..4c53773d14 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -9,6 +9,7 @@ #include "adreno_gen7_0_0_snapshot.h" #include "adreno_gen7_2_0_snapshot.h" #include "adreno_gen7_9_0_snapshot.h" +#include "adreno_gen7_14_0_snapshot.h" static struct kgsl_memdesc *gen7_capturescript; static struct kgsl_memdesc *gen7_crashdump_registers; @@ -97,6 +98,32 @@ const struct gen7_snapshot_block_list gen7_9_0_snapshot_block_list = { .index_registers_len = ARRAY_SIZE(gen7_9_0_cp_indexed_reg_list), }; +const struct gen7_snapshot_block_list gen7_14_0_snapshot_block_list = { + .pre_crashdumper_regs = gen7_9_0_pre_crashdumper_gpu_registers, + .debugbus_blocks = gen7_14_0_debugbus_blocks, + .debugbus_blocks_len = ARRAY_SIZE(gen7_14_0_debugbus_blocks), + .gbif_debugbus_blocks = gen7_gbif_debugbus_blocks, + .gbif_debugbus_blocks_len = ARRAY_SIZE(gen7_gbif_debugbus_blocks), + .cx_debugbus_blocks = gen7_cx_dbgc_debugbus_blocks, + .cx_debugbus_blocks_len = ARRAY_SIZE(gen7_cx_dbgc_debugbus_blocks), + .external_core_regs = gen7_14_0_external_core_regs, + .num_external_core_regs = ARRAY_SIZE(gen7_14_0_external_core_regs), + .gmu_regs = gen7_14_0_gmu_registers, + .gmu_gx_regs = gen7_14_0_gmu_gx_registers, + .rscc_regs = gen7_14_0_rscc_registers, + .reg_list = gen7_14_0_reg_list, + .cx_misc_regs = gen7_14_0_cx_misc_registers, + .shader_blocks = gen7_14_0_shader_blocks, + .num_shader_blocks = ARRAY_SIZE(gen7_14_0_shader_blocks), + .clusters = gen7_14_0_clusters, + .num_clusters = ARRAY_SIZE(gen7_14_0_clusters), + .sptp_clusters = gen7_14_0_sptp_clusters, + .num_sptp_clusters = ARRAY_SIZE(gen7_14_0_sptp_clusters), + .post_crashdumper_regs = gen7_14_0_post_crashdumper_registers, + .index_registers = gen7_14_0_cp_indexed_reg_list, + .index_registers_len = ARRAY_SIZE(gen7_14_0_cp_indexed_reg_list), +}; + #define GEN7_SP_READ_SEL_VAL(_location, _pipe, _statetype, _usptp, _sptp) \ (FIELD_PREP(GENMASK(19, 18), _location) | \ FIELD_PREP(GENMASK(17, 16), _pipe) | \ @@ -269,8 +296,7 @@ static size_t gen7_legacy_snapshot_shader(struct kgsl_device *device, * AHB path might fail. Hence, skip SP_INST_TAG and SP_INST_DATA* * state types during snapshot dump in legacy flow. */ - if (adreno_is_gen7_0_0(adreno_dev) || adreno_is_gen7_0_1(adreno_dev) || - adreno_is_gen7_4_0(adreno_dev)) { + if (adreno_is_gen7_0_x_family(adreno_dev) || adreno_is_gen7_14_0(adreno_dev)) { if (block->statetype == SP_INST_TAG || block->statetype == SP_INST_DATA || block->statetype == SP_INST_DATA_1 || @@ -685,18 +711,20 @@ static void gen7_snapshot_mempool(struct kgsl_device *device, { /* set CP_CHICKEN_DBG[StabilizeMVC] to stabilize it while dumping */ kgsl_regrmw(device, GEN7_CP_CHICKEN_DBG, 0x4, 0x4); - kgsl_regrmw(device, GEN7_CP_BV_CHICKEN_DBG, 0x4, 0x4); kgsl_snapshot_indexed_registers(device, snapshot, GEN7_CP_MEM_POOL_DBG_ADDR, GEN7_CP_MEM_POOL_DBG_DATA, 0, 0x2200); - kgsl_snapshot_indexed_registers(device, snapshot, - GEN7_CP_BV_MEM_POOL_DBG_ADDR, GEN7_CP_BV_MEM_POOL_DBG_DATA, - 0, 0x2200); + if (!adreno_is_gen7_14_0(ADRENO_DEVICE(device))) { + kgsl_regrmw(device, GEN7_CP_BV_CHICKEN_DBG, 0x4, 0x4); + kgsl_snapshot_indexed_registers(device, snapshot, + GEN7_CP_BV_MEM_POOL_DBG_ADDR, GEN7_CP_BV_MEM_POOL_DBG_DATA, + 0, 0x2200); + kgsl_regrmw(device, GEN7_CP_BV_CHICKEN_DBG, 0x4, 0x0); + } kgsl_regrmw(device, GEN7_CP_CHICKEN_DBG, 0x4, 0x0); - kgsl_regrmw(device, GEN7_CP_BV_CHICKEN_DBG, 0x4, 0x0); } static unsigned int gen7_read_dbgahb(struct kgsl_device *device, @@ -1720,11 +1748,11 @@ void gen7_snapshot(struct adreno_device *adreno_dev, gen7_snapshot_block_list->index_registers[i].data, 0, gen7_snapshot_block_list->index_registers[i].size); - if (!adreno_is_gen7_9_x(adreno_dev)) { + if (!adreno_is_gen7_9_x(adreno_dev)) gen7_snapshot_br_roq(device, snapshot); + if (!adreno_is_gen7_9_x(adreno_dev) && !adreno_is_gen7_14_0(adreno_dev)) { gen7_snapshot_bv_roq(device, snapshot); - gen7_snapshot_lpac_roq(device, snapshot); } From e70e479801d25a71caa87c1eb01bc7c8b43d4288 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Thu, 16 Nov 2023 20:28:41 -0800 Subject: [PATCH 0587/1016] msm: kgsl: Remove inaccesible registers from snapshot Some registers are not accessible from HLOS, hence we cannot dump them in snapshot. Omit such registers from snapshot. 1. CP_SECVID 2. GDPM_LKG (some registers) 3. GPUCC (some registers). Change-Id: Idd6673f093da35b91d68f467f48b36d72937d085 Signed-off-by: Urvashi Agrawal --- adreno_gen8_0_0_snapshot.h | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index 5b5bc0c741..b3614ae557 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -142,16 +142,6 @@ static const u32 gen8_0_0_ahb_secure_gpu_registers[] = { }; static_assert(IS_ALIGNED(sizeof(gen8_0_0_ahb_secure_gpu_registers), 8)); -/* - * Block : ['AHB_SECURE'] - * pairs : 1 (Regs:3) - */ -static const u32 gen8_0_0_ahb_secure_cp_cp_pipe_none_registers[] = { - 0x0f000, 0x0f002, - UINT_MAX, UINT_MAX, -}; -static_assert(IS_ALIGNED(sizeof(gen8_0_0_ahb_secure_cp_cp_pipe_none_registers), 8)); - /* * Block : ['GBIF'] * REGION : UNSLICE @@ -1910,7 +1900,6 @@ static struct gen8_reg_list gen8_0_0_ahb_registers[] = { { UNSLICE, gen8_0_0_ahb_precd_gpu_registers }, { SLICE, gen8_0_0_ahb_precd_gpu_slice_slice_registers }, { UNSLICE, gen8_0_0_ahb_secure_gpu_registers }, - { UNSLICE, gen8_0_0_ahb_secure_cp_cp_pipe_none_registers }, }; static struct gen8_reg_list gen8_gmu_registers[] = { @@ -1928,7 +1917,6 @@ static struct gen8_reg_list gen8_gmu_registers[] = { static const u32 gen8_0_0_gdpm_lkg_registers[] = { 0x21c00, 0x21c00, 0x21c08, 0x21c09, 0x21c0e, 0x21c0f, 0x21c4f, 0x21c50, 0x21c52, 0x21c52, 0x21c54, 0x21c56, 0x21c58, 0x21c5a, 0x21c5c, 0x21c60, - 0x22048, 0x220a0, UINT_MAX, UINT_MAX, }; static_assert(IS_ALIGNED(sizeof(gen8_0_0_gdpm_lkg_registers), 8)); @@ -1964,13 +1952,13 @@ static_assert(IS_ALIGNED(sizeof(gen8_0_0_gpu_cc_ahb2phy_swman_registers), 8)); * pairs : 26 (Regs:133) */ static const u32 gen8_0_0_gpu_cc_gpu_cc_reg_registers[] = { - 0x25000, 0x25002, 0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, - 0x26000, 0x26004, 0x26400, 0x26406, 0x26415, 0x2641d, 0x2641f, 0x26440, - 0x26443, 0x26444, 0x26478, 0x2647a, 0x26489, 0x2648a, 0x2649c, 0x2649e, - 0x264a0, 0x264a1, 0x264c5, 0x264c7, 0x264e8, 0x264ea, 0x264f9, 0x264fc, - 0x2650b, 0x2650b, 0x2651c, 0x2651e, 0x26540, 0x2654b, 0x26554, 0x26556, - 0x26558, 0x2655c, 0x2655e, 0x2655f, 0x26563, 0x26563, 0x2656d, 0x26573, - 0x26576, 0x26576, 0x26578, 0x2657a, + 0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, + 0x26400, 0x26406, 0x26415, 0x2641d, 0x2641f, 0x26440, 0x26443, 0x26444, + 0x26478, 0x2647a, 0x26489, 0x2648a, 0x2649c, 0x2649e, 0x264a0, 0x264a1, + 0x264c5, 0x264c7, 0x264e8, 0x264ea, 0x264f9, 0x264fc, 0x2650b, 0x2650b, + 0x2651c, 0x2651e, 0x26540, 0x2654b, 0x26554, 0x26556, 0x26558, 0x2655c, + 0x2655e, 0x2655f, 0x26563, 0x26563, 0x2656d, 0x26573, 0x26576, 0x26576, + 0x26578, 0x2657a, UINT_MAX, UINT_MAX, }; static_assert(IS_ALIGNED(sizeof(gen8_0_0_gpu_cc_gpu_cc_reg_registers), 8)); From 1b41ec070a6a20f1ed2da83f106121c8b69eb5f9 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Thu, 16 Nov 2023 18:59:58 -0800 Subject: [PATCH 0588/1016] kgsl: gen8: Enable LPAC and AQE features for gen8_0_0 Enable LPAC and AQE feature for gen8_0_0 to support additional graphics functionality. Change-Id: Idcfba1877106f1594b71c2b6bc1c6147646e1865 Signed-off-by: Hareesh Gundu --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index a254eacee5..2b97597c44 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2633,7 +2633,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), .compatible = "qcom,adreno-gpu-gen8-0-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | - ADRENO_CONTENT_PROTECTION, + ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_16M, From 74b4ce2ff65ece8487cdabc6274d402c4b9a43db Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Tue, 28 Nov 2023 11:07:32 -0800 Subject: [PATCH 0589/1016] kgsl: Fix undefined references of kgsl_pwrscale_fast_bus_hint() kgsl can compile with the kernel in-tree versions of the Adreno GPU and bandwidth governors. Hence make it kgsl_pwrscale_fast_bus_hint() symbol is available when kgsl compiles with the in-tree governors. Change-Id: Ia68601f357339674f81788fb2f51d17a77f66e55 Signed-off-by: Hareesh Gundu --- kgsl_pwrscale.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kgsl_pwrscale.h b/kgsl_pwrscale.h index fd24760901..92f7dd5fae 100644 --- a/kgsl_pwrscale.h +++ b/kgsl_pwrscale.h @@ -135,8 +135,8 @@ static inline void devfreq_gpubw_exit(void) int devfreq_gpubw_init(void); void devfreq_gpubw_exit(void); +#endif void kgsl_pwrscale_fast_bus_hint(bool on); -#endif #endif From df8c98a52e359b9c8355f4ca920387f312e9e287 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Wed, 19 Jul 2023 16:02:54 -0600 Subject: [PATCH 0590/1016] kgsl: hwsched: Add gmu_hw_fence_ready_ts variable Hitherto, we have tracked the per context last submitted timestamp with the internal timestamp in hwsched targets. This gets reset to zero during SLUMBER entry and during reset/recovery. This can cause kgsl to send a rogue H2F_HW_FENCE_INFO packet to GMU either during a concurrent SLUMBER entry or reset/recovery. Hence, introduce a new variable which will be used in the hardware fence creation path. Change-Id: Ie720268c20e6747fc17339b82b638cb0ce12ad32 Signed-off-by: Harshdeep Dhatt --- adreno_a6xx_hwsched_hfi.c | 3 --- adreno_drawctxt.h | 8 ++++++++ adreno_gen7_hwsched_hfi.c | 11 +++++++---- adreno_gen8_hwsched_hfi.c | 11 +++++++---- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 5e3d21cfef..f7582c2f93 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -1918,9 +1918,6 @@ skipib: gmu_core_regwrite(KGSL_DEVICE(adreno_dev), A6XX_GMU_HOST2GMU_INTR_SET, DISPQ_IRQ_BIT(drawobj->context->gmu_dispatch_queue)); - /* - * We don't need the drawctxt spinlock here because hardware fences are not enabled for a6x - */ drawctxt->internal_timestamp = drawobj->timestamp; return ret; diff --git a/adreno_drawctxt.h b/adreno_drawctxt.h index ed3e6fb405..3ff571b806 100644 --- a/adreno_drawctxt.h +++ b/adreno_drawctxt.h @@ -88,6 +88,14 @@ struct adreno_context { u32 hw_fence_count; /** @syncobj_timestamp: Timestamp to check whether GMU has consumed a syncobj */ u32 syncobj_timestamp; + /** + * @gmu_hw_fence_ready_ts: This timestamp is used to figure out whether a hardware fence + * is ready to be submitted to GMU at the time of its creation or not. This timestamp + * tracks the timestamp of the most recently submitted cmdbatch submission to the GMU + * context queue for this context. This is different from the internal_timestamp (which gets + * reset to 0 in some cases). + */ + u32 gmu_hw_fence_ready_ts; }; /* Flag definitions for flag field in adreno_context */ diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 10ede8e356..207b09ed9f 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -3570,7 +3570,7 @@ void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev, * If this ts hasn't been submitted yet, then store it in the drawctxt hardware fence * list and return. This fence will be sent to GMU when this ts is dispatched to GMU. */ - if (timestamp_cmp(kfence->timestamp, drawctxt->internal_timestamp) > 0) { + if (timestamp_cmp(kfence->timestamp, drawctxt->gmu_hw_fence_ready_ts) > 0) { drawctxt_queue_hw_fence(drawctxt, entry); destroy_hw_fence = false; goto done; @@ -3843,13 +3843,16 @@ skipib: gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN7_GMU_HOST2GMU_INTR_SET, DISPQ_IRQ_BIT(get_irq_bit(adreno_dev, drawobj))); + drawctxt->internal_timestamp = drawobj->timestamp; + spin_lock(&drawctxt->lock); process_hw_fence_queue(adreno_dev, drawctxt, drawobj->timestamp); /* - * We need to update the internal timestamp while holding the drawctxt lock since we have to - * check it in the hardware fence creation path, where we are not taking the device mutex. + * We need to update the gmu_hw_fence_ready_ts while holding the drawctxt lock since we + * have to check it in the hardware fence creation path, where we are not taking the device + * mutex. */ - drawctxt->internal_timestamp = drawobj->timestamp; + drawctxt->gmu_hw_fence_ready_ts = drawobj->timestamp; spin_unlock(&drawctxt->lock); return 0; diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index ab29ae7a37..1e4a8598fa 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -3572,7 +3572,7 @@ void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, * If this ts hasn't been submitted yet, then store it in the drawctxt hardware fence * list and return. This fence will be sent to GMU when this ts is dispatched to GMU. */ - if (timestamp_cmp(kfence->timestamp, drawctxt->internal_timestamp) > 0) { + if (timestamp_cmp(kfence->timestamp, drawctxt->gmu_hw_fence_ready_ts) > 0) { drawctxt_queue_hw_fence(drawctxt, entry); destroy_hw_fence = false; goto done; @@ -3841,13 +3841,16 @@ skipib: gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN8_GMUCX_HOST2GMU_INTR_SET, DISPQ_IRQ_BIT(get_irq_bit(adreno_dev, drawobj))); + drawctxt->internal_timestamp = drawobj->timestamp; + spin_lock(&drawctxt->lock); process_hw_fence_queue(adreno_dev, drawctxt, drawobj->timestamp); /* - * We need to update the internal timestamp while holding the drawctxt lock since we have to - * check it in the hardware fence creation path, where we are not taking the device mutex. + * We need to update the gmu_hw_fence_ready_ts while holding the drawctxt lock since we + * have to check it in the hardware fence creation path, where we are not taking the + * device mutex. */ - drawctxt->internal_timestamp = drawobj->timestamp; + drawctxt->gmu_hw_fence_ready_ts = drawobj->timestamp; spin_unlock(&drawctxt->lock); return 0; From 8aa6a8fe5e1a1cb41f62e3f410b7258f23a01371 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Tue, 19 Sep 2023 15:08:48 -0600 Subject: [PATCH 0591/1016] kgsl: hwsched: Introduce a SYNC dispatch irq bit If a submission is behind a pending SYNC object, then raise the SYNC irq bit. This fixes a power regression seen with SYNC object submissions. Change-Id: Ie30135d0ff060e99d10a75391646333278b19f0b Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched_hfi.c | 14 +++++++++++--- adreno_gen8_hwsched_hfi.c | 14 +++++++++++--- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 207b09ed9f..c269f5d886 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -3016,6 +3016,7 @@ static void populate_ibs(struct adreno_device *adreno_dev, #define HFI_DSP_IRQ_BASE 2 #define DISPQ_IRQ_BIT(_idx) BIT((_idx) + HFI_DSP_IRQ_BASE) +#define DISPQ_SYNC_IRQ_BIT(_idx) ((DISPQ_IRQ_BIT(_idx) << (KGSL_PRIORITY_MAX_RB_LEVELS + 1))) int gen7_gmu_context_queue_write(struct adreno_device *adreno_dev, struct kgsl_memdesc *gmu_context_queue, u32 *msg, u32 size_bytes, @@ -3751,6 +3752,7 @@ int gen7_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_dr struct adreno_submit_time time = {0}; struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); static void *cmdbuf; + struct gmu_context_queue_header *hdr = NULL; if (cmdbuf == NULL) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -3839,9 +3841,15 @@ skipib: if (ret) return ret; - /* Send interrupt to GMU to receive the message */ - gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN7_GMU_HOST2GMU_INTR_SET, - DISPQ_IRQ_BIT(get_irq_bit(adreno_dev, drawobj))); + hdr = drawctxt->gmu_context_queue.hostptr; + /* The last sync object has been retired by the GMU */ + if (timestamp_cmp(hdr->sync_obj_ts, drawctxt->syncobj_timestamp) >= 0) + /* Send interrupt to GMU to receive the message */ + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN7_GMU_HOST2GMU_INTR_SET, + DISPQ_IRQ_BIT(get_irq_bit(adreno_dev, drawobj))); + else + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN7_GMU_HOST2GMU_INTR_SET, + DISPQ_SYNC_IRQ_BIT(get_irq_bit(adreno_dev, drawobj))); drawctxt->internal_timestamp = drawobj->timestamp; diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 1e4a8598fa..83048906a2 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -3022,6 +3022,7 @@ static void populate_ibs(struct adreno_device *adreno_dev, #define HFI_DSP_IRQ_BASE 2 #define DISPQ_IRQ_BIT(_idx) BIT((_idx) + HFI_DSP_IRQ_BASE) +#define DISPQ_SYNC_IRQ_BIT(_idx) ((DISPQ_IRQ_BIT(_idx) << (KGSL_PRIORITY_MAX_RB_LEVELS + 1))) int gen8_gmu_context_queue_write(struct adreno_device *adreno_dev, struct adreno_context *drawctxt, u32 *msg, u32 size_bytes, @@ -3749,6 +3750,7 @@ int gen8_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_dr struct adreno_submit_time time = {0}; struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); static void *cmdbuf; + struct gmu_context_queue_header *hdr = NULL; if (cmdbuf == NULL) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -3837,9 +3839,15 @@ skipib: if (ret) return ret; - /* Send interrupt to GMU to receive the message */ - gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN8_GMUCX_HOST2GMU_INTR_SET, - DISPQ_IRQ_BIT(get_irq_bit(adreno_dev, drawobj))); + hdr = drawctxt->gmu_context_queue.hostptr; + /* The last sync object has been retired by the GMU */ + if (timestamp_cmp(hdr->sync_obj_ts, drawctxt->syncobj_timestamp) >= 0) + /* Send interrupt to GMU to receive the message */ + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN8_GMUCX_HOST2GMU_INTR_SET, + DISPQ_IRQ_BIT(get_irq_bit(adreno_dev, drawobj))); + else + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN8_GMUCX_HOST2GMU_INTR_SET, + DISPQ_SYNC_IRQ_BIT(get_irq_bit(adreno_dev, drawobj))); drawctxt->internal_timestamp = drawobj->timestamp; From 8fcec31f972e9f09f04e36788e806fe2ddf294c3 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 27 Nov 2023 15:21:13 -0800 Subject: [PATCH 0592/1016] kgsl: gen8: Dump cx_misc register in snapshot Add support for cx_misc register dumping in gen8 snapshot. Also having below changes, where we can dump even GX is OFF i ) Update firmware dump sequence in gen8 snapshot ii) Update CX_MISC register dump for gen7 snapshot Change-Id: Ib516e1908a7b979b63a962fa4e05f217b7d7fdd2 Signed-off-by: Hareesh Gundu --- adreno_gen7_snapshot.c | 4 +- adreno_gen8_snapshot.c | 88 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 82 insertions(+), 10 deletions(-) diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 2fd728852c..23ae60c668 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -1643,6 +1643,8 @@ void gen7_snapshot(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN7_RBBM_CLOCK_CNTL3_TP0, cgc2); } + gen7_cx_misc_regs_snapshot(device, snapshot); + /* SQE Firmware */ kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, gen7_snapshot_sqe, NULL); @@ -1695,8 +1697,6 @@ void gen7_snapshot(struct adreno_device *adreno_dev, gen7_reglist_snapshot(device, snapshot); - gen7_cx_misc_regs_snapshot(device, snapshot); - /* * Need to program and save this register before capturing resource table * to workaround a CGC issue diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 9c515026c4..94e029a497 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -32,6 +32,7 @@ const struct gen8_snapshot_block_list gen8_0_0_snapshot_block_list = { .num_gmu_regs = ARRAY_SIZE(gen8_gmu_registers), .rscc_regs = gen8_0_0_rscc_rsc_registers, .reg_list = gen8_0_0_reg_list, + .cx_misc_regs = gen8_0_0_cx_misc_registers, .shader_blocks = gen8_0_0_shader_blocks, .num_shader_blocks = ARRAY_SIZE(gen8_0_0_shader_blocks), .cp_clusters = gen8_0_0_cp_clusters, @@ -1527,6 +1528,75 @@ static void gen8_reglist_snapshot(struct kgsl_device *device, } } +static size_t gen8_snapshot_cx_misc_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + const u32 *ptr = (u32 *)priv; + u32 *src, *data = (unsigned int *)buf; + size_t size = adreno_snapshot_regs_count(ptr) * sizeof(u32); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "CX_MISC REGISTERS"); + return 0; + } + + src = gen8_crashdump_registers->hostptr; + + for (; ptr[0] != UINT_MAX; ptr += 2) { + u32 cnt = REG_COUNT(ptr); + + if (cnt == 1) + *data++ = BIT(31) | ptr[0]; + else { + *data++ = ptr[0]; + *data++ = cnt; + } + memcpy(data, src, cnt << 2); + data += cnt; + src += cnt; + } + + /* Return the size of the section */ + return size; +} + +static void gen8_cx_misc_regs_snapshot(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + u64 *ptr, offset = 0; + const u32 *regs_ptr = (const u32 *)gen8_snapshot_block_list->cx_misc_regs; + + if (CD_SCRIPT_CHECK(device)) + goto legacy_snapshot; + + /* Build the crash script */ + ptr = (u64 *)gen8_capturescript->hostptr; + + for (; regs_ptr[0] != UINT_MAX; regs_ptr += 2) { + u32 r = REG_COUNT(regs_ptr); + + ptr += CD_READ(ptr, regs_ptr[0], r, + (gen8_crashdump_registers->gpuaddr + offset)); + offset += r * sizeof(u32); + } + + /* Marker for end of script */ + CD_FINISH(ptr, offset); + + /* Try to run the crash dumper */ + if (_gen8_do_crashdump(device)) { + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, gen8_snapshot_cx_misc_registers, + (void *)gen8_snapshot_block_list->cx_misc_regs); + return; + } + +legacy_snapshot: + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, adreno_snapshot_cx_misc_registers, + (void *)gen8_snapshot_block_list->cx_misc_regs); +} + void gen8_snapshot_external_core_regs(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { @@ -1572,6 +1642,16 @@ void gen8_snapshot(struct adreno_device *adreno_dev, gen8_snapshot_debugbus(adreno_dev, snapshot); + gen8_cx_misc_regs_snapshot(device, snapshot); + + /* SQE Firmware */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, gen8_snapshot_sqe, NULL); + + /* AQE Firmware */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, gen8_snapshot_aqe, NULL); + if (!adreno_gx_is_on(adreno_dev)) return; @@ -1634,14 +1714,6 @@ void gen8_snapshot(struct adreno_device *adreno_dev, gen8_snapshot_block_list->index_registers[i].pipe_id, UINT_MAX); } - /* SQE Firmware */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, gen8_snapshot_sqe, NULL); - - /* AQE Firmware */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, gen8_snapshot_aqe, NULL); - /* Mempool debug data */ gen8_snapshot_mempool(device, snapshot); From a9c4d552c51073e6ad3d6a7ab8501d265fbcea15 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Tue, 24 Oct 2023 11:52:28 -0700 Subject: [PATCH 0593/1016] kgsl: gen8: Add support for external powerup reglist For gen8 there are pipe specific registers which need to be part of the static powerup list for CP to restore at IFPC interval. Hence add support to include all the pipe register to external powerup reglist. Change-Id: I9db3738fe2657c0a2265a13a5035bc406cbf602d Signed-off-by: Hareesh Gundu --- adreno_gen8.c | 96 ++++++++++++++++++++++++++++++++++++++++----------- adreno_gen8.h | 12 +++++++ 2 files changed, 88 insertions(+), 20 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 6a24029904..1413992e84 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -123,6 +123,27 @@ static const u32 gen8_ifpc_pwrup_reglist[] = { GEN8_CP_PROTECT_REG_PIPE + 15, }; +static const struct gen8_pwrup_extlist gen8_0_0_pwrup_extlist[] = { + { GEN8_GRAS_TSEFE_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_GRAS_NC_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_GRAS_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_RB_CCU_CNTL, BIT(PIPE_BR)}, + { GEN8_RB_CCU_NC_MODE_CNTL, BIT(PIPE_BR)}, + { GEN8_RB_RESOLVE_PREFETCH_CNTL, BIT(PIPE_BR)}, + { GEN8_RB_CMP_DBG_ECO_CNTL, BIT(PIPE_BR)}, + { GEN8_RB_GC_GMEM_PROTECT, BIT(PIPE_BR)}, + { GEN8_RB_LPAC_GMEM_PROTECT, BIT(PIPE_BR)}, + { GEN8_VPC_FLATSHADE_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_PC_CHICKEN_BITS_1, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_PC_CHICKEN_BITS_2, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_PC_CHICKEN_BITS_3, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_PC_CHICKEN_BITS_4, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_VFD_CB_BV_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_VFD_CB_BR_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_VFD_CB_BUSY_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_VFD_CB_LP_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR)}, +}; + static int acd_calibrate_set(void *data, u64 val) { struct kgsl_device *device = data; @@ -548,13 +569,16 @@ static void gen8_hwcg_set(struct adreno_device *adreno_dev, bool on) static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_device *gen8_dev = container_of(adreno_dev, + struct gen8_device, adreno_dev); struct adreno_reglist_list reglist[3]; void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; - u32 items = 0, i, j; + u32 items = 0, i, j, pipe_id; u32 *dest = ptr + sizeof(*lock); - /* Static IFPC-only registers */ + /* Static IFPC restore only registers */ reglist[items].regs = gen8_ifpc_pwrup_reglist; reglist[items].count = ARRAY_SIZE(gen8_ifpc_pwrup_reglist); lock->ifpc_list_len = reglist[items].count; @@ -575,7 +599,7 @@ static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev) for (j = 0; j < reglist[i].count; j++) { *dest++ = r[j]; - kgsl_regread(KGSL_DEVICE(adreno_dev), r[j], dest++); + kgsl_regread(device, r[j], dest++); } } @@ -593,8 +617,35 @@ static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev) * dynamic list with triplets as * (
), and the length is * stored as number for triplets in dynamic_list_len. + * + * Starting with Gen8, some of the registers that are initialized statically + * by the kernel are pipe-specific. Because only the dynamic list is able to + * support specifying a pipe ID, these registers are bundled along with any + * dynamic entries such as perf counter selects into a single dynamic list. */ - lock->dynamic_list_len = 0; + + gen8_dev->ext_pwrup_list_len = 0; + + /* + * Write external pipe specific regs (
- triplets) + * offset and the current value into GPU buffer + */ + for (pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) { + for (i = 0; i < ARRAY_SIZE(gen8_0_0_pwrup_extlist); i++) { + unsigned long pipe = (unsigned long)gen8_0_0_pwrup_extlist[i].pipelines; + + if (!test_bit(pipe_id, &pipe)) + continue; + + *dest++ = FIELD_PREP(GENMASK(15, 12), pipe_id); + *dest++ = gen8_0_0_pwrup_extlist[i].offset; + gen8_regread_aperture(device, gen8_0_0_pwrup_extlist[i].offset, + dest++, pipe_id, 0, 0); + gen8_dev->ext_pwrup_list_len++; + } + } + + lock->dynamic_list_len = gen8_dev->ext_pwrup_list_len; } /* _llc_configure_gpu_scid() - Program the sub-cache ID for all GPU blocks */ @@ -755,12 +806,11 @@ int gen8_start(struct adreno_device *adreno_dev) kgsl_regwrite(device, GEN8_UCHE_WRITE_THRU_BASE_LO, lower_32_bits(uche_trap_base)); kgsl_regwrite(device, GEN8_UCHE_WRITE_THRU_BASE_HI, upper_32_bits(uche_trap_base)); - /* * CP takes care of the restore during IFPC exit. We need to restore at slumber * boundary as well */ - if (pwrup_lock->dynamic_list_len > 0) { + if (pwrup_lock->dynamic_list_len - gen8_dev->ext_pwrup_list_len > 0) { kgsl_regwrite(device, GEN8_RBBM_PERFCTR_CNTL, 0x1); kgsl_regwrite(device, GEN8_RBBM_SLICE_PERFCTR_CNTL, 0x1); } @@ -1848,21 +1898,23 @@ int gen8_perfcounter_remove(struct adreno_device *adreno_dev, struct adreno_perfcount_register *reg, u32 groupid) { const struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); const struct adreno_perfcount_group *group; void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; - u32 *data = ptr + sizeof(*lock); - int offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; + u32 offset = ((lock->ifpc_list_len + lock->preemption_list_len) * 2) + + (gen8_dev->ext_pwrup_list_len * 3); int i, last_offset, num_removed, start_offset = -1; - u32 pipe = FIELD_PREP(GENMASK(13, 12), _get_pipeid(groupid)); + u32 *data = ptr + sizeof(*lock), pipe = FIELD_PREP(GENMASK(13, 12), _get_pipeid(groupid)); + u16 perfcntr_list_len = lock->dynamic_list_len - gen8_dev->ext_pwrup_list_len; - if (!lock->dynamic_list_len) + if (!perfcntr_list_len) return -EINVAL; group = &(counters->groups[groupid]); if (!(group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE)) { - if (lock->dynamic_list_len != 2) + if (perfcntr_list_len != 2) return 0; if (kgsl_hwlock(lock)) { @@ -1872,10 +1924,10 @@ int gen8_perfcounter_remove(struct adreno_device *adreno_dev, goto disable_perfcounter; } - last_offset = offset + lock->dynamic_list_len * 3; + last_offset = offset + (perfcntr_list_len * 3); /* Look for the perfcounter to remove in the list */ - for (i = 0; i < lock->dynamic_list_len - 2; i++) { + for (i = 0; i < perfcntr_list_len - 2; i++) { if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { start_offset = offset; break; @@ -1911,9 +1963,9 @@ disable_perfcounter: * If dynamic list length is 2 and no_restore_count is 0, then we can remove * the perfcounter controls from the list. */ - if (lock->dynamic_list_len == 2 && !adreno_dev->no_restore_count) { + if (perfcntr_list_len == 2 && !adreno_dev->no_restore_count) { memset(&data[offset], 0, 6 * sizeof(u32)); - lock->dynamic_list_len = 0; + lock->dynamic_list_len = gen8_dev->ext_pwrup_list_len; } kgsl_hwunlock(lock); @@ -1923,13 +1975,17 @@ disable_perfcounter: int gen8_perfcounter_update(struct adreno_device *adreno_dev, struct adreno_perfcount_register *reg, bool update_reg, u32 pipe, unsigned long flags) { + struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; + u32 offset = ((lock->ifpc_list_len + lock->preemption_list_len) * 2) + + (gen8_dev->ext_pwrup_list_len * 3); u32 *data = ptr + sizeof(*lock); - int i, start_offset = -1, offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; + int i, start_offset = -1; + u16 perfcntr_list_len = lock->dynamic_list_len - gen8_dev->ext_pwrup_list_len; if (flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) { - for (i = 0; i < lock->dynamic_list_len - 2; i++) { + for (i = 0; i < perfcntr_list_len - 2; i++) { if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { start_offset = offset; break; @@ -1937,7 +1993,7 @@ int gen8_perfcounter_update(struct adreno_device *adreno_dev, offset += 3; } - } else if (lock->dynamic_list_len) { + } else if (perfcntr_list_len) { goto update; } @@ -1962,8 +2018,8 @@ int gen8_perfcounter_update(struct adreno_device *adreno_dev, } /* Initialize the lock->dynamic_list_len to account for perfcounter controls */ - if (!lock->dynamic_list_len) - lock->dynamic_list_len = 2; + if (!perfcntr_list_len) + lock->dynamic_list_len = gen8_dev->ext_pwrup_list_len + 2; /* * For all targets GEN8_SLICE_RBBM_PERFCTR_CNTL needs to be the last entry, diff --git a/adreno_gen8.h b/adreno_gen8.h index c29fc0e401..9aeff7d805 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -39,6 +39,18 @@ struct gen8_device { struct adreno_device adreno_dev; /** @aperture: The last value that the host aperture register was programmed to */ u32 aperture; + /** @ext_pwrup_list_len: External pwrup reglist length */ + u16 ext_pwrup_list_len; +}; + +/** + * struct gen8_pwrup_extlist - container for a powerup external reglist + */ +struct gen8_pwrup_extlist { + /** offset: Dword offset of the register to write */ + u32 offset; + /** pipelines: pipelines to write */ + u32 pipelines; }; /** From 5053eb6ebea7adeb88709c5aa30d9665b404779a Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Fri, 17 Nov 2023 12:18:53 -0800 Subject: [PATCH 0594/1016] msm: kgsl: Enable warmboot feature for gen8_0_0 Warmboot saves a lot of back and forth hfi to and from GMU while doing slumber transitions. Enable this feature for gen8_0_0 chip. Change-Id: I1caffa8da0469de2fb020d9355bb0c3fb55f1750 Signed-off-by: Urvashi Agrawal --- adreno-gpulist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 2b97597c44..95e4fc708e 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2633,7 +2633,8 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), .compatible = "qcom,adreno-gpu-gen8-0-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | - ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE, + ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | + ADRENO_GMU_WARMBOOT, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_16M, From b2befe40625bfeb98717669038275f1dad371099 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 21 Nov 2023 13:57:33 +0530 Subject: [PATCH 0595/1016] kgsl: gen8: Enable L3 vote support for gen8_0_0 GPU Gen8_0_0 supports L3 voting to be able to successfully pin L3 frequency to a certain value during L3 characterization, debug scenarios and perf hints. Change-Id: Iae82e9a329de18640bf7d8272f2bdf1cf2f9701b Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 95e4fc708e..51cd3d2b98 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2634,7 +2634,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .compatible = "qcom,adreno-gpu-gen8-0-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | - ADRENO_GMU_WARMBOOT, + ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_16M, From c056537751d3ebe8e2c80365da1465812ddf5ad1 Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Wed, 29 Nov 2023 11:39:54 -0800 Subject: [PATCH 0596/1016] kgsl: gen8: Enable BCL feature for gen8_0_0 GPU Battery Current Limiter prevents under voltage and provides overcurrent protection. Change-Id: Ic755f80bcc417d5da2eaf4b2570eaf6b47d647fd Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- adreno-gpulist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 51cd3d2b98..e6df16cb41 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2634,7 +2634,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .compatible = "qcom,adreno-gpu-gen8-0-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | - ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE, + ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_BCL, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_16M, @@ -2657,6 +2657,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .gmu_hub_clk_freq = 200000000, .gen8_snapshot_block_list = &gen8_0_0_snapshot_block_list, .fast_bus_hint = true, + .bcl_data = 1, }; static const struct adreno_gpu_core *adreno_gpulist[] = { From c2e352976e4caf4a9312659a2ca6f407ee96b82a Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Thu, 30 Nov 2023 16:01:28 -0800 Subject: [PATCH 0597/1016] kgsl: gen8: Enable IFPC on gen8_0_0 GPU Inter-Frame Power Collapse is a power saving feature for GPU. Change-Id: Ib9c69ca155c4ea7faacf28210d60a79936e441c8 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- adreno-gpulist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index e6df16cb41..7fa5ff97f4 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2634,7 +2634,8 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .compatible = "qcom,adreno-gpu-gen8-0-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | - ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_BCL, + ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_BCL | + ADRENO_IFPC, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_16M, From bb870d934a75b5a3be8245eabba581923333e220 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Tue, 21 Nov 2023 14:45:42 +0530 Subject: [PATCH 0598/1016] msm: kgsl: use linux/spinlock.h instead of linux/rwlock.h Directly including linux/rwlock.h breaks RT build failure. Fix this by including linux/spinlock.h which includes the correct rwlock header based on the selected PREEMPT configuration. Change-Id: I42d3f7ec9a136f41626ca2678da00d325b896289 Signed-off-by: Kassey Li Signed-off-by: Pankaj Gupta --- kgsl_events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kgsl_events.c b/kgsl_events.c index 24c56e1b9c..c536ee0496 100644 --- a/kgsl_events.c +++ b/kgsl_events.c @@ -5,7 +5,7 @@ */ #include -#include +#include #include "kgsl_debugfs.h" #include "kgsl_device.h" From 28a4481ff536522224657b8c58aa36410aa97dd5 Mon Sep 17 00:00:00 2001 From: Archana Sriram Date: Wed, 25 Oct 2023 09:37:25 +0530 Subject: [PATCH 0599/1016] kgsl: build: Add changes to compile graphics-kernel for QCS605 Add changes to compile graphics kernel code for QCS605. Change-Id: Id307905d5144e6ae325809c02ebc1c7689c00349 Signed-off-by: Archana Sriram --- Kbuild | 3 +++ config/gki_qcs605.conf | 14 ++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 config/gki_qcs605.conf diff --git a/Kbuild b/Kbuild index 6090df63db..65f12ce88e 100644 --- a/Kbuild +++ b/Kbuild @@ -58,6 +58,9 @@ endif ifeq ($(CONFIG_ARCH_HOLI), y) include $(KGSL_PATH)/config/gki_blair.conf endif +ifeq ($(CONFIG_ARCH_SDM670), y) + include $(KGSL_PATH)/config/gki_qcs605.conf +endif ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq diff --git a/config/gki_qcs605.conf b/config/gki_qcs605.conf new file mode 100644 index 0000000000..177fc8cbd2 --- /dev/null +++ b/config/gki_qcs605.conf @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 +CONFIG_QCOM_KGSL_SORT_POOL = y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y +CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT = y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" + +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ + -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ + -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ + -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=1 \ + -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" From c28d6350c11b1c425ed342b77411221a829fcd2c Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Thu, 30 Nov 2023 12:38:42 -0800 Subject: [PATCH 0600/1016] kgsl: gen8: Update static powerup reglist Add KGSL programming noncontext register to the static powerup reglist to restore register values during ifpc and preemption. Change-Id: Ic211b7de1dbe978e7ce6a4d5382adc60a19a54b8 Signed-off-by: Hareesh Gundu --- adreno_gen8.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 1413992e84..2feaa5d6a0 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -25,9 +25,9 @@ /* IFPC & Preemption static powerup restore list */ static const u32 gen8_pwrup_reglist[] = { GEN8_UCHE_MODE_CNTL, - GEN8_UCHE_CACHE_WAYS, GEN8_UCHE_VARB_IDLE_TIMEOUT, GEN8_UCHE_GBIF_GX_CONFIG, + GEN8_UCHE_CACHE_WAYS, GEN8_UCHE_CCHE_MODE_CNTL, GEN8_UCHE_CCHE_CACHE_WAYS, GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_LO, @@ -38,23 +38,34 @@ static const u32 gen8_pwrup_reglist[] = { GEN8_UCHE_WRITE_THRU_BASE_HI, GEN8_UCHE_TRAP_BASE_LO, GEN8_UCHE_TRAP_BASE_HI, + GEN8_UCHE_CLIENT_PF, + GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_LO, + GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_HI, + GEN8_VSC_BIN_SIZE, + GEN8_VSC_KMD_DBG_ECO_CNTL, + GEN8_RB_CMP_NC_MODE_CNTL, + GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, + GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_LO, + GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_HI, }; /* IFPC only static powerup restore list */ static const u32 gen8_ifpc_pwrup_reglist[] = { - GEN8_CP_PROTECT_CNTL_PIPE, GEN8_RBBM_NC_MODE_CNTL, GEN8_RBBM_SLICE_NC_MODE_CNTL, + GEN8_RBBM_GBIF_CLIENT_QOS_CNTL, GEN8_SP_NC_MODE_CNTL, - GEN8_SP_CHICKEN_BITS_2, - GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, GEN8_SP_READ_SEL, - GEN8_TPL1_DBG_ECO_CNTL1, - GEN8_TPL1_NC_MODE_CNTL, GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_LO, GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_HI, GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_LO, GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_HI, + GEN8_SP_CHICKEN_BITS_1, + GEN8_SP_CHICKEN_BITS_2, + GEN8_SP_CHICKEN_BITS_3, + GEN8_TPL1_NC_MODE_CNTL, + GEN8_TPL1_DBG_ECO_CNTL, + GEN8_TPL1_DBG_ECO_CNTL1, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_1, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_2, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_3, @@ -73,6 +84,8 @@ static const u32 gen8_ifpc_pwrup_reglist[] = { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_16, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_17, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_18, + GEN8_CP_PROTECT_CNTL_PIPE, + GEN8_CP_SMMU_STREAM_ID_LPAC, GEN8_CP_PROTECT_REG_GLOBAL, GEN8_CP_PROTECT_REG_GLOBAL + 1, GEN8_CP_PROTECT_REG_GLOBAL + 2, @@ -129,19 +142,25 @@ static const struct gen8_pwrup_extlist gen8_0_0_pwrup_extlist[] = { { GEN8_GRAS_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_RB_CCU_CNTL, BIT(PIPE_BR)}, { GEN8_RB_CCU_NC_MODE_CNTL, BIT(PIPE_BR)}, + { GEN8_RB_CMP_NC_MODE_CNTL, BIT(PIPE_BR)}, { GEN8_RB_RESOLVE_PREFETCH_CNTL, BIT(PIPE_BR)}, { GEN8_RB_CMP_DBG_ECO_CNTL, BIT(PIPE_BR)}, { GEN8_RB_GC_GMEM_PROTECT, BIT(PIPE_BR)}, { GEN8_RB_LPAC_GMEM_PROTECT, BIT(PIPE_BR)}, + { GEN8_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, BIT(PIPE_BR)}, { GEN8_VPC_FLATSHADE_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_PC_CHICKEN_BITS_1, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_PC_CHICKEN_BITS_2, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_PC_CHICKEN_BITS_3, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_PC_CHICKEN_BITS_4, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_PC_AUTO_VERTEX_STRIDE, BIT(PIPE_BR) | BIT(PIPE_BV)}, + { GEN8_PC_VIS_STREAM_CNTL, BIT(PIPE_BR) | BIT(PIPE_BV)}, + { GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1, BIT(PIPE_BR) | BIT(PIPE_BV)}, { GEN8_VFD_CB_BV_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_VFD_CB_BR_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_VFD_CB_BUSY_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_VFD_CB_LP_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_VFD_DBG_ECO_CNTL, BIT(PIPE_BR) | BIT(PIPE_BV)}, }; static int acd_calibrate_set(void *data, u64 val) From de92277182e70178f4736181e9d0245756d3044f Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Thu, 30 Nov 2023 11:37:15 -0800 Subject: [PATCH 0601/1016] kgsl: gen8: Force coldboot after a perfcounter release This change ports commit c92706b1f39b ("kgsl: hwsched: Force coldboot after a perfcounter release") from gen7 to gen8. Change-Id: I72864facd2dbb7dcc8a0e4f0430f5d68d73f7c5e Signed-off-by: Urvashi Agrawal --- adreno_gen8_hwsched_hfi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 83048906a2..79fa9c07cc 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -1292,6 +1292,8 @@ void gen8_hwsched_process_msgq(struct adreno_device *adreno_dev) adreno_perfcounter_put(adreno_dev, cmd->group_id, cmd->countable, PERFCOUNTER_FLAG_KERNEL); + + adreno_mark_for_coldboot(adreno_dev); } break; } From e06f329b01d96df089e68a9c610e9b6591bf85c1 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 23 Nov 2023 22:00:42 +0530 Subject: [PATCH 0602/1016] kgsl: Use correct format specifier for unsigned int in dcvs_set Currently format specifier %d is being used for a unsigned int while printing error in dcvs_set(). Fix it by using correct format specifier. Change-Id: I580967a47c70a51700eadfae879701dd7d4f99f0 Signed-off-by: Pankaj Gupta --- adreno_a6xx_gmu.c | 2 +- adreno_a6xx_hwsched.c | 2 +- adreno_gen7_gmu.c | 2 +- adreno_gen7_hwsched.c | 2 +- adreno_gen8_gmu.c | 2 +- adreno_gen8_hwsched.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index b13ca9c9a4..2ed1437f04 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -2046,7 +2046,7 @@ static int a6xx_gmu_dcvs_set(struct adreno_device *adreno_dev, if (ret) { dev_err_ratelimited(&gmu->pdev->dev, - "Failed to set GPU perf idx %d, bw idx %d\n", + "Failed to set GPU perf idx %u, bw idx %u\n", req.freq, req.bw); /* diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 0e7efc21f4..8ad713e7c5 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -1035,7 +1035,7 @@ static int a6xx_hwsched_dcvs_set(struct adreno_device *adreno_dev, if (ret) { dev_err_ratelimited(&gmu->pdev->dev, - "Failed to set GPU perf idx %d, bw idx %d\n", + "Failed to set GPU perf idx %u, bw idx %u\n", req.freq, req.bw); /* diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 49f0257702..6676eeb37a 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1617,7 +1617,7 @@ static int gen7_gmu_dcvs_set(struct adreno_device *adreno_dev, ret = gen7_hfi_send_generic_req(adreno_dev, &req, sizeof(req)); if (ret) { dev_err_ratelimited(&gmu->pdev->dev, - "Failed to set GPU perf idx %d, bw idx %d\n", + "Failed to set GPU perf idx %u, bw idx %u\n", req.freq, req.bw); /* diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index b3e6475394..46366919c3 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -1378,7 +1378,7 @@ static int gen7_hwsched_dcvs_set(struct adreno_device *adreno_dev, if (ret) { dev_err_ratelimited(&gmu->pdev->dev, - "Failed to set GPU perf idx %d, bw idx %d\n", + "Failed to set GPU perf idx %u, bw idx %u\n", req.freq, req.bw); /* diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index efcecb58bf..e2fc4c6a15 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1519,7 +1519,7 @@ static int gen8_gmu_dcvs_set(struct adreno_device *adreno_dev, ret = gen8_hfi_send_generic_req(adreno_dev, &req, sizeof(req)); if (ret) { dev_err_ratelimited(&gmu->pdev->dev, - "Failed to set GPU perf idx %d, bw idx %d\n", + "Failed to set GPU perf idx %u, bw idx %u\n", req.freq, req.bw); /* diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 4d3054e29a..1d220be64b 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -1367,7 +1367,7 @@ static int gen8_hwsched_dcvs_set(struct adreno_device *adreno_dev, if (ret) { dev_err_ratelimited(&gmu->pdev->dev, - "Failed to set GPU perf idx %d, bw idx %d\n", + "Failed to set GPU perf idx %u, bw idx %u\n", req.freq, req.bw); /* From f3f577ac0a17415caeb7ce3604243564ee653be4 Mon Sep 17 00:00:00 2001 From: Amit Kushwaha Date: Mon, 31 Jul 2023 18:23:31 +0530 Subject: [PATCH 0603/1016] msm: kgsl: Add hibernation support in KGSL for A7x Add handlers for dev_pm_ops, enable BCL after CB register set and during hibernation save and restore gmu pdc config. Change-Id: I32d992a1b4eaae2ff13f014f1b09967fc8122959 Signed-off-by: Abhishek Barman Signed-off-by: Amit Kushwaha --- adreno_gen7_gmu.c | 84 ++++++++++++++++++++++++++++++++++++------- adreno_gen7_gmu.h | 2 ++ adreno_gen7_hwsched.c | 22 +++++++++++- 3 files changed, 95 insertions(+), 13 deletions(-) diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 49f0257702..3b783333a8 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -242,6 +242,7 @@ struct adreno_device *gen7_gmu_to_adreno(struct gen7_gmu_device *gmu) } #define RSC_CMD_OFFSET 2 +#define GEN7_PDC_ENABLE_REG_VALUE 0x80000001 static void _regwrite(void __iomem *regbase, unsigned int offsetwords, unsigned int value) @@ -289,24 +290,32 @@ void gen7_load_rsc_ucode(struct adreno_device *adreno_dev) int gen7_load_pdc_ucode(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - struct resource *res_cfg; - void __iomem *cfg = NULL; - res_cfg = platform_get_resource_byname(gmu->pdev, IORESOURCE_MEM, + if (gmu->pdc_cfg_base == NULL) { + struct resource *res_cfg; + + res_cfg = platform_get_resource_byname(gmu->pdev, IORESOURCE_MEM, "gmu_pdc"); - if (res_cfg) - cfg = ioremap(res_cfg->start, resource_size(res_cfg)); - if (!cfg) { - dev_err(&gmu->pdev->dev, "Failed to map PDC CFG\n"); - return -ENODEV; + if (res_cfg) + gmu->pdc_cfg_base = devm_ioremap(&gmu->pdev->dev, + res_cfg->start, resource_size(res_cfg)); + + if (!gmu->pdc_cfg_base) { + dev_err(&gmu->pdev->dev, "Failed to map PDC CFG\n"); + return -ENODEV; + } } /* Setup GPU PDC */ - _regwrite(cfg, GEN7_PDC_GPU_SEQ_START_ADDR, 0); - _regwrite(cfg, GEN7_PDC_GPU_ENABLE_PDC, 0x80000001); + _regwrite(gmu->pdc_cfg_base, GEN7_PDC_GPU_SEQ_START_ADDR, 0); + _regwrite(gmu->pdc_cfg_base, GEN7_PDC_GPU_ENABLE_PDC, + GEN7_PDC_ENABLE_REG_VALUE); - iounmap(cfg); + if (!IS_ENABLED(CONFIG_QCOM_KGSL_HIBERNATION)) { + devm_iounmap(&gmu->pdev->dev, gmu->pdc_cfg_base); + gmu->pdc_cfg_base = NULL; + } return 0; } @@ -1893,6 +1902,34 @@ int gen7_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level) return 0; } +#if IS_ENABLED(CONFIG_QCOM_KGSL_HIBERNATION) +static void gen7_gmu_force_first_boot(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 val = 0; + + if (gmu->pdc_cfg_base) { + kgsl_pwrctrl_enable_cx_gdsc(device); + gen7_gmu_enable_clks(adreno_dev, 0); + + val = __raw_readl(gmu->pdc_cfg_base + (GEN7_PDC_GPU_ENABLE_PDC << 2)); + + /* Make sure we read val before disabling clks. */ + mb(); + + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + kgsl_pwrctrl_disable_cx_gdsc(device); + gen7_rdpm_cx_freq_update(gmu, 0); + } + + if (val != GEN7_PDC_ENABLE_REG_VALUE) { + clear_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags); + clear_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags); + } +} +#endif + static int gen7_gmu_first_boot(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -2149,6 +2186,9 @@ static const struct gmu_dev_ops gen7_gmudev = { .bcl_sid_set = gen7_bcl_sid_set, .bcl_sid_get = gen7_bcl_sid_get, .send_nmi = gen7_gmu_send_nmi, +#if IS_ENABLED(CONFIG_QCOM_KGSL_HIBERNATION) + .force_first_boot = gen7_gmu_force_first_boot, +#endif }; static int gen7_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, @@ -2815,6 +2855,7 @@ static int gen7_boot(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + bool bcl_state = adreno_dev->bcl_enabled; int ret; if (WARN_ON(test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))) @@ -2822,7 +2863,23 @@ static int gen7_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE); - ret = gen7_gmu_boot(adreno_dev); + if (IS_ENABLED(CONFIG_QCOM_KGSL_HIBERNATION) && + !test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) { + /* + * During hibernation entry ZAP was unloaded and + * CBCAST BCL register is in reset state. + * Set bcl_enabled to false to skip KMD's HFI request + * to GMU for BCL feature, send BCL feature request to + * GMU after ZAP load at GPU boot. This ensures that + * Central Broadcast register was programmed before + * enabling BCL. + */ + adreno_dev->bcl_enabled = false; + ret = gen7_gmu_first_boot(adreno_dev); + } else { + ret = gen7_gmu_boot(adreno_dev); + } + if (ret) return ret; @@ -2835,6 +2892,9 @@ static int gen7_boot(struct adreno_device *adreno_dev) set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + if (IS_ENABLED(CONFIG_QCOM_KGSL_HIBERNATION)) + adreno_dev->bcl_enabled = bcl_state; + device->pwrctrl.last_stat_updated = ktime_get(); kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 95fa6a6abf..5eaa36e766 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -103,6 +103,8 @@ struct gen7_gmu_device { void __iomem *rdpm_cx_virt; /** @rdpm_mx_virt: Pointer where the RDPM MX block is mapped */ void __iomem *rdpm_mx_virt; + /** @pdc_cfg_base: Base address of PDC cfg registers */ + void __iomem *pdc_cfg_base; /** @num_oob_perfcntr: Number of active oob_perfcntr requests */ u32 num_oob_perfcntr; /** @acd_debug_val: DVM value to calibrate ACD for a level */ diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index b3e6475394..fcf4e955a8 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -979,6 +979,7 @@ static int gen7_hwsched_boot(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + bool bcl_state = adreno_dev->bcl_enabled; int ret; if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) @@ -988,7 +989,23 @@ static int gen7_hwsched_boot(struct adreno_device *adreno_dev) adreno_hwsched_start(adreno_dev); - ret = gen7_hwsched_gmu_boot(adreno_dev); + if (IS_ENABLED(CONFIG_QCOM_KGSL_HIBERNATION) && + !test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) { + /* + * During hibernation entry ZAP was unloaded and + * CBCAST BCL register is in reset state. + * Set bcl_enabled to false to skip KMD's HFI request + * to GMU for BCL feature, send BCL feature request to + * GMU after ZAP load at GPU boot. This ensures that + * Central Broadcast register was programmed before + * enabling BCL. + */ + adreno_dev->bcl_enabled = false; + ret = gen7_hwsched_gmu_first_boot(adreno_dev); + } else { + ret = gen7_hwsched_gmu_boot(adreno_dev); + } + if (ret) return ret; @@ -1001,6 +1018,9 @@ static int gen7_hwsched_boot(struct adreno_device *adreno_dev) set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + if (IS_ENABLED(CONFIG_QCOM_KGSL_HIBERNATION)) + adreno_dev->bcl_enabled = bcl_state; + device->pwrctrl.last_stat_updated = ktime_get(); kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); From 52b7c63222a2c77de45ec075824c73aa150d7747 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Tue, 5 Dec 2023 12:32:33 -0800 Subject: [PATCH 0604/1016] kgsl: gen8: Fix debug read of SP non-context registers in snapshot Always use PIPE_BR in state_read for HLSQ/SP/TP non-context registers to get correct values. Change-Id: Ie26feed88a8c6d60e4c0fbb700ccd58970ee988f Signed-off-by: Hareesh Gundu --- adreno_gen8_0_0_snapshot.h | 40 +++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index b3614ae557..118ad517e1 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -638,12 +638,12 @@ static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_rb_slice_pipe_br_rbp_regist * Location: HLSQ_STATE * pairs : 8 (Regs:34) */ -static const u32 gen8_0_0_non_context_sp_pipe_none_hlsq_state_registers[] = { +static const u32 gen8_0_0_non_context_sp_pipe_br_hlsq_state_registers[] = { 0x0ae05, 0x0ae05, 0x0ae10, 0x0ae13, 0x0ae15, 0x0ae16, 0x0ae52, 0x0ae52, 0x0ae60, 0x0ae67, 0x0ae69, 0x0ae6e, 0x0ae70, 0x0ae75, 0x0aec0, 0x0aec5, UINT_MAX, UINT_MAX, }; -static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_none_hlsq_state_registers), 8)); +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_br_hlsq_state_registers), 8)); /* * Block : ['SP'] @@ -653,12 +653,12 @@ static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_none_hlsq_state_reg * Location: SP_TOP * pairs : 6 (Regs:60) */ -static const u32 gen8_0_0_non_context_sp_pipe_none_sp_top_registers[] = { +static const u32 gen8_0_0_non_context_sp_pipe_br_sp_top_registers[] = { 0x0ae00, 0x0ae0c, 0x0ae0f, 0x0ae0f, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3f, 0x0ae50, 0x0ae52, 0x0ae80, 0x0aea3, UINT_MAX, UINT_MAX, }; -static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_none_sp_top_registers), 8)); +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_br_sp_top_registers), 8)); /* * Block : ['SP'] @@ -668,13 +668,13 @@ static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_none_sp_top_registe * Location: USPTP * pairs : 9 (Regs:64) */ -static const u32 gen8_0_0_non_context_sp_pipe_none_usptp_registers[] = { +static const u32 gen8_0_0_non_context_sp_pipe_br_usptp_registers[] = { 0x0ae00, 0x0ae0c, 0x0ae0f, 0x0ae0f, 0x0ae17, 0x0ae19, 0x0ae30, 0x0ae32, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3b, 0x0ae3e, 0x0ae3f, 0x0ae50, 0x0ae52, 0x0ae80, 0x0aea3, UINT_MAX, UINT_MAX, }; -static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_none_usptp_registers), 8)); +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_br_usptp_registers), 8)); /* * Block : ['SP'] @@ -684,12 +684,12 @@ static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_none_usptp_register * Location: HLSQ_DP_STR * pairs : 5 (Regs:18) */ -static const u32 gen8_0_0_non_context_sp_pipe_none_hlsq_dp_str_registers[] = { +static const u32 gen8_0_0_non_context_sp_pipe_br_hlsq_dp_str_registers[] = { 0x0ae05, 0x0ae05, 0x0ae60, 0x0ae65, 0x0ae6b, 0x0ae6c, 0x0ae73, 0x0ae75, 0x0aec0, 0x0aec5, UINT_MAX, UINT_MAX, }; -static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_none_hlsq_dp_str_registers), 8)); +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_br_hlsq_dp_str_registers), 8)); /* * Block : ['TPL1'] @@ -699,12 +699,12 @@ static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_none_hlsq_dp_str_re * Location: USPTP * pairs : 5 (Regs:48) */ -static const u32 gen8_0_0_non_context_tpl1_pipe_none_usptp_registers[] = { +static const u32 gen8_0_0_non_context_tpl1_pipe_br_usptp_registers[] = { 0x0b600, 0x0b600, 0x0b602, 0x0b602, 0x0b604, 0x0b604, 0x0b606, 0x0b61e, 0x0b620, 0x0b633, UINT_MAX, UINT_MAX, }; -static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_tpl1_pipe_none_usptp_registers), 8)); +static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_tpl1_pipe_br_usptp_registers), 8)); /* * Block : ['GRAS'] @@ -1666,16 +1666,16 @@ static struct gen8_cluster_registers gen8_0_0_mvc_clusters[] = { }; static struct gen8_sptp_cluster_registers gen8_0_0_sptp_clusters[] = { - { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_NONE, 0, HLSQ_STATE, - gen8_0_0_non_context_sp_pipe_none_hlsq_state_registers, 0xae00}, - { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_NONE, 0, SP_TOP, - gen8_0_0_non_context_sp_pipe_none_sp_top_registers, 0xae00}, - { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_NONE, 0, USPTP, - gen8_0_0_non_context_sp_pipe_none_usptp_registers, 0xae00}, - { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_NONE, 0, HLSQ_DP_STR, - gen8_0_0_non_context_sp_pipe_none_hlsq_dp_str_registers, 0xae00}, - { CLUSTER_NONE, UNSLICE, 2, 2, TP0_NCTX_REG, PIPE_NONE, 0, USPTP, - gen8_0_0_non_context_tpl1_pipe_none_usptp_registers, 0xb600}, + { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_BR, 0, HLSQ_STATE, + gen8_0_0_non_context_sp_pipe_br_hlsq_state_registers, 0xae00}, + { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_BR, 0, SP_TOP, + gen8_0_0_non_context_sp_pipe_br_sp_top_registers, 0xae00}, + { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_BR, 0, USPTP, + gen8_0_0_non_context_sp_pipe_br_usptp_registers, 0xae00}, + { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_BR, 0, HLSQ_DP_STR, + gen8_0_0_non_context_sp_pipe_br_hlsq_dp_str_registers, 0xae00}, + { CLUSTER_NONE, UNSLICE, 2, 2, TP0_NCTX_REG, PIPE_BR, 0, USPTP, + gen8_0_0_non_context_tpl1_pipe_br_usptp_registers, 0xb600}, { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800}, { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, From b6c9f571ea28fe8a7d7384c25728fc1a4dcb707d Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Tue, 5 Dec 2023 10:25:09 -0800 Subject: [PATCH 0605/1016] kgsl: power: Remove last_governor from the kgsl_pwrscale struct The last_governor member in the kgsl_pwrscale struct is no longer used. Remove it and remove the use of the governor.h file in kgsl since we no longer require it. Change-Id: I18da8040fa258db02ced89a456d11c95ce1d8637 Signed-off-by: Lynus Vaz --- kgsl_pwrscale.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/kgsl_pwrscale.h b/kgsl_pwrscale.h index 92f7dd5fae..5b9c3d4d43 100644 --- a/kgsl_pwrscale.h +++ b/kgsl_pwrscale.h @@ -7,7 +7,6 @@ #ifndef __KGSL_PWRSCALE_H #define __KGSL_PWRSCALE_H -#include "governor.h" #include "kgsl_pwrctrl.h" #if IS_ENABLED(CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ) @@ -31,7 +30,6 @@ struct kgsl_power_stats { * @gpu_profile - GPU profile data for the devfreq device * @bus_profile - Bus specific data for the bus devfreq device * @freq_table - GPU frequencies for the DCVS algorithm - * @last_governor - Prior devfreq governor * @accum_stats - Accumulated statistics for various frequency calculations * @enabled - Whether or not power scaling is enabled * @time - Last submitted sample timestamp @@ -54,7 +52,6 @@ struct kgsl_pwrscale { struct msm_adreno_extended_profile gpu_profile; struct msm_busmon_extended_profile bus_profile; unsigned long freq_table[KGSL_MAX_PWRLEVELS]; - char last_governor[DEVFREQ_NAME_LEN]; struct kgsl_power_stats accum_stats; bool enabled; ktime_t time; From 88ebde5def43ada25656e889bbdce85f1f96d6d5 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 26 Aug 2023 13:48:15 +0530 Subject: [PATCH 0606/1016] kgsl: Add support to control GDSCs using genpd GDSCs were modeled as regulators till now. But all future chipsets will have gdscs modeled as power domains. So, add support for power-domains along with support for regulators on legacy chipsets. Change-Id: Iacc119e53ef0b52bd8b10e0d7724a9adad9468b3 Signed-off-by: Kamal Agrawal --- adreno.c | 23 +++-- adreno_a6xx.c | 5 +- adreno_a6xx_gmu.c | 79 ++++++-------- adreno_a6xx_rgmu.c | 44 ++++---- adreno_gen7_gmu.c | 40 +++---- adreno_gen8_gmu.c | 40 +++---- kgsl_iommu.c | 35 +++++-- kgsl_iommu.h | 4 +- kgsl_pwrctrl.c | 252 +++++++++++++++++++++++++++++++++++---------- kgsl_pwrctrl.h | 39 +++++-- kgsl_util.c | 23 ++--- kgsl_util.h | 12 +-- 12 files changed, 363 insertions(+), 233 deletions(-) diff --git a/adreno.c b/adreno.c index c7e8cd434d..51e3358c82 100644 --- a/adreno.c +++ b/adreno.c @@ -1646,19 +1646,24 @@ static int adreno_init(struct kgsl_device *device) return 0; } -static bool regulators_left_on(struct kgsl_device *device) +static bool gdscs_left_on(struct kgsl_device *device) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; if (gmu_core_gpmu_isenabled(device)) return false; - if (!IS_ERR_OR_NULL(pwr->cx_gdsc)) - if (regulator_is_enabled(pwr->cx_gdsc)) - return true; + if (pwr->cx_regulator) + return regulator_is_enabled(pwr->cx_regulator); - if (!IS_ERR_OR_NULL(pwr->gx_gdsc)) - return regulator_is_enabled(pwr->gx_gdsc); + if (pwr->gx_regulator) + return regulator_is_enabled(pwr->gx_regulator); + + if (pwr->cx_pd) + return kgsl_genpd_is_enabled(pwr->cx_pd); + + if (pwr->gx_pd) + return kgsl_genpd_is_enabled(pwr->gx_pd); return false; } @@ -1878,12 +1883,12 @@ static int _adreno_start(struct adreno_device *adreno_dev) const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); int status; unsigned int state = device->state; - bool regulator_left_on; + bool gdsc_left_on; /* make sure ADRENO_DEVICE_STARTED is not set here */ WARN_ON(test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)); - regulator_left_on = regulators_left_on(device); + gdsc_left_on = gdscs_left_on(device); /* Clear any GPU faults that might have been left over */ adreno_clear_gpu_fault(adreno_dev); @@ -1903,7 +1908,7 @@ static int _adreno_start(struct adreno_device *adreno_dev) memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data)); /* Soft reset the GPU if a regulator is stuck on*/ - if (regulator_left_on) + if (gdsc_left_on) _soft_reset(adreno_dev); /* Start the GPU */ diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 6a142c4d24..15abcc059c 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -534,7 +534,10 @@ bool a6xx_gx_is_on(struct adreno_device *adreno_dev) clk_on = __clk_is_enabled(pwr->grp_clks[0]); - gdsc_on = regulator_is_enabled(pwr->gx_gdsc); + if (pwr->gx_pd) + gdsc_on = kgsl_genpd_is_enabled(pwr->gx_pd); + else + gdsc_on = regulator_is_enabled(pwr->gx_regulator); return (gdsc_on & clk_on); } diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index b13ca9c9a4..bce91d4ccd 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -598,13 +598,16 @@ void a6xx_gmu_disable_gdsc(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; - if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) - regulator_set_mode(pwr->cx_gdsc, REGULATOR_MODE_IDLE); + /* ADRENO_QUIRK_CX_GDSC quirk is not supported for genpd */ + WARN_ON_ONCE(pwr->cx_pd && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)); + + if (pwr->cx_regulator && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) + regulator_set_mode(pwr->cx_regulator, REGULATOR_MODE_IDLE); kgsl_pwrctrl_disable_cx_gdsc(device); - if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) - regulator_set_mode(pwr->cx_gdsc, REGULATOR_MODE_NORMAL); + if (pwr->cx_regulator && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) + regulator_set_mode(pwr->cx_regulator, REGULATOR_MODE_NORMAL); } int a6xx_gmu_device_start(struct adreno_device *adreno_dev) @@ -1836,10 +1839,8 @@ static int a6xx_gmu_init(struct adreno_device *adreno_dev) static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) { - int ret = 0; struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct kgsl_pwrctrl *pwr = &device->pwrctrl; /* If SPTP_RAC is on, turn off SPTP_RAC HS */ a6xx_gmu_sptprac_disable(adreno_dev); @@ -1881,47 +1882,35 @@ static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) /* * This is based on the assumption that GMU is the only one controlling - * the GX HS. This code path is the only client voting for GX through - * the regulator interface. + * the GX HS. This code path is the only client voting for GX from linux + * kernel. */ - if (pwr->gx_gdsc) { - if (a6xx_gmu_gx_is_on(adreno_dev)) { - /* Switch gx gdsc control from GMU to CPU - * force non-zero reference count in clk driver - * so next disable call will turn - * off the GDSC - */ - ret = regulator_enable(pwr->gx_gdsc); - if (ret) - dev_err(&gmu->pdev->dev, - "suspend fail: gx enable %d\n", ret); + if (!a6xx_gmu_gx_is_on(adreno_dev)) + return; - /* - * Toggle the loop_en bit, across disabling the gx gdsc, - * with a delay of 10 XO cycles before disabling gx - * gdsc. This is to prevent CPR measurements from - * failing. - */ - if (adreno_is_a660(adreno_dev)) { - gmu_core_regrmw(device, A6XX_GPU_CPR_FSM_CTL, - 1, 0); - ndelay(520); - } + /* + * Switch gx gdsc control from GMU to CPU force non-zero reference + * count in clk driver so next disable call will turn off the GDSC + */ + kgsl_pwrctrl_enable_gx_gdsc(device); - ret = regulator_disable(pwr->gx_gdsc); - if (ret) - dev_err(&gmu->pdev->dev, - "suspend fail: gx disable %d\n", ret); - - if (adreno_is_a660(adreno_dev)) - gmu_core_regrmw(device, A6XX_GPU_CPR_FSM_CTL, - 1, 1); - - if (a6xx_gmu_gx_is_on(adreno_dev)) - dev_err(&gmu->pdev->dev, - "gx is stuck on\n"); - } + /* + * Toggle the loop_en bit, across disabling the gx gdsc, with a delay + * of 10 XO cycles before disabling gx gdsc. This is to prevent CPR + * measurements from failing. + */ + if (adreno_is_a660(adreno_dev)) { + gmu_core_regrmw(device, A6XX_GPU_CPR_FSM_CTL, 1, 0); + ndelay(520); } + + kgsl_pwrctrl_disable_gx_gdsc(device); + + if (adreno_is_a660(adreno_dev)) + gmu_core_regrmw(device, A6XX_GPU_CPR_FSM_CTL, 1, 1); + + if (a6xx_gmu_gx_is_on(adreno_dev)) + dev_err(&gmu->pdev->dev, "gx is stuck on\n"); } /* @@ -2904,8 +2893,8 @@ int a6xx_gmu_probe(struct kgsl_device *device, /* Setup any rdpm register ranges */ a6xx_gmu_rdpm_probe(gmu, device); - /* Set up GMU regulators */ - ret = kgsl_pwrctrl_probe_regulators(device, pdev); + /* Set up GMU gdscs */ + ret = kgsl_pwrctrl_probe_gdscs(device, pdev); if (ret) return ret; diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index dc988f34be..2ab389baa1 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -484,34 +484,26 @@ static void a6xx_rgmu_disable_clks(struct adreno_device *adreno_dev) { struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct kgsl_pwrctrl *pwr = &device->pwrctrl; - int ret; - /* Check GX GDSC is status */ - if (a6xx_rgmu_gx_is_on(adreno_dev)) { + /* + * This is based on the assumption that GMU is the only one controlling + * the GX HS. This code path is the only client voting for GX from linux + * kernel. + */ + if (!a6xx_rgmu_gx_is_on(adreno_dev)) + goto done; - if (IS_ERR_OR_NULL(pwr->gx_gdsc)) - return; + /* + * Switch gx gdsc control from GMU to CPU force non-zero reference + * count in clk driver so next disable call will turn off the GDSC + */ + kgsl_pwrctrl_enable_gx_gdsc(device); + kgsl_pwrctrl_disable_gx_gdsc(device); - /* - * Switch gx gdsc control from RGMU to CPU. Force non-zero - * reference count in clk driver so next disable call will - * turn off the GDSC. - */ - ret = regulator_enable(pwr->gx_gdsc); - if (ret) - dev_err(&rgmu->pdev->dev, - "Fail to enable gx gdsc:%d\n", ret); - - ret = regulator_disable(pwr->gx_gdsc); - if (ret) - dev_err(&rgmu->pdev->dev, - "Fail to disable gx gdsc:%d\n", ret); - - if (a6xx_rgmu_gx_is_on(adreno_dev)) - dev_err(&rgmu->pdev->dev, "gx is stuck on\n"); - } + if (a6xx_rgmu_gx_is_on(adreno_dev)) + dev_err(&rgmu->pdev->dev, "gx is stuck on\n"); +done: clk_bulk_disable_unprepare(rgmu->num_clks, rgmu->clks); } @@ -1339,8 +1331,8 @@ static int a6xx_rgmu_probe(struct kgsl_device *device, rgmu->pdev = pdev; - /* Set up RGMU regulators */ - ret = kgsl_pwrctrl_probe_regulators(device, pdev); + /* Set up RGMU gdscs */ + ret = kgsl_pwrctrl_probe_gdscs(device, pdev); if (ret) return ret; diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 49f0257702..c78276cbf6 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1459,10 +1459,8 @@ static void _do_gbif_halt(struct kgsl_device *device, u32 reg, u32 ack_reg, static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) { - int ret = 0; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct kgsl_pwrctrl *pwr = &device->pwrctrl; /* Disconnect GPU from BUS is not needed if CX GDSC goes off later */ @@ -1497,31 +1495,21 @@ static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) /* * This is based on the assumption that GMU is the only one controlling - * the GX HS. This code path is the only client voting for GX through - * the regulator interface. + * the GX HS. This code path is the only client voting for GX from linux + * kernel. */ - if (pwr->gx_gdsc) { - if (gen7_gmu_gx_is_on(adreno_dev)) { - /* Switch gx gdsc control from GMU to CPU - * force non-zero reference count in clk driver - * so next disable call will turn - * off the GDSC - */ - ret = regulator_enable(pwr->gx_gdsc); - if (ret) - dev_err(&gmu->pdev->dev, - "suspend fail: gx enable %d\n", ret); + if (!gen7_gmu_gx_is_on(adreno_dev)) + return; - ret = regulator_disable(pwr->gx_gdsc); - if (ret) - dev_err(&gmu->pdev->dev, - "suspend fail: gx disable %d\n", ret); + /* + * Switch gx gdsc control from GMU to CPU force non-zero reference + * count in clk driver so next disable call will turn off the GDSC + */ + kgsl_pwrctrl_enable_gx_gdsc(device); + kgsl_pwrctrl_disable_gx_gdsc(device); - if (gen7_gmu_gx_is_on(adreno_dev)) - dev_err(&gmu->pdev->dev, - "gx is stuck on\n"); - } - } + if (gen7_gmu_gx_is_on(adreno_dev)) + dev_err(&gmu->pdev->dev, "gx is stuck on\n"); } /* @@ -2526,8 +2514,8 @@ int gen7_gmu_probe(struct kgsl_device *device, /* Setup any rdpm register ranges */ gen7_gmu_rdpm_probe(gmu, device); - /* Set up GMU regulators */ - ret = kgsl_pwrctrl_probe_regulators(device, pdev); + /* Set up GMU gdscs */ + ret = kgsl_pwrctrl_probe_gdscs(device, pdev); if (ret) return ret; diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index efcecb58bf..51959ac192 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1361,10 +1361,8 @@ static void _do_gbif_halt(struct kgsl_device *device, u32 reg, u32 ack_reg, static void gen8_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) { - int ret = 0; struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct kgsl_pwrctrl *pwr = &device->pwrctrl; /* Disconnect GPU from BUS is not needed if CX GDSC goes off later */ @@ -1399,31 +1397,21 @@ static void gen8_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) /* * This is based on the assumption that GMU is the only one controlling - * the GX HS. This code path is the only client voting for GX through - * the regulator interface. + * the GX HS. This code path is the only client voting for GX from linux + * kernel. */ - if (pwr->gx_gdsc) { - if (gen8_gmu_gx_is_on(adreno_dev)) { - /* Switch gx gdsc control from GMU to CPU - * force non-zero reference count in clk driver - * so next disable call will turn - * off the GDSC - */ - ret = regulator_enable(pwr->gx_gdsc); - if (ret) - dev_err(&gmu->pdev->dev, - "suspend fail: gx enable %d\n", ret); + if (!gen8_gmu_gx_is_on(adreno_dev)) + return; - ret = regulator_disable(pwr->gx_gdsc); - if (ret) - dev_err(&gmu->pdev->dev, - "suspend fail: gx disable %d\n", ret); + /* + * Switch gx gdsc control from GMU to CPU force non-zero reference + * count in clk driver so next disable call will turn off the GDSC + */ + kgsl_pwrctrl_enable_gx_gdsc(device); + kgsl_pwrctrl_disable_gx_gdsc(device); - if (gen8_gmu_gx_is_on(adreno_dev)) - dev_err(&gmu->pdev->dev, - "gx is stuck on\n"); - } - } + if (gen8_gmu_gx_is_on(adreno_dev)) + dev_err(&gmu->pdev->dev, "gx is stuck on\n"); } /* @@ -2413,8 +2401,8 @@ int gen8_gmu_probe(struct kgsl_device *device, /* Setup any rdpm register ranges */ gen8_gmu_rdpm_probe(gmu, device); - /* Set up GMU regulators */ - ret = kgsl_pwrctrl_probe_regulators(device, pdev); + /* Set up GMU gdscs */ + ret = kgsl_pwrctrl_probe_gdscs(device, pdev); if (ret) return ret; diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 8750b946a7..e740aaddeb 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) #include @@ -1227,8 +1228,11 @@ static void kgsl_iommu_disable_clk(struct kgsl_mmu *mmu) clk_bulk_disable_unprepare(iommu->num_clks, iommu->clks); - if (!IS_ERR_OR_NULL(iommu->cx_gdsc)) - regulator_disable(iommu->cx_gdsc); + if (!IS_ERR_OR_NULL(iommu->cx_regulator)) + regulator_disable(iommu->cx_regulator); + + if (pm_runtime_enabled(&iommu->pdev->dev)) + pm_runtime_put_sync(&iommu->pdev->dev); } /* @@ -1239,8 +1243,11 @@ static void kgsl_iommu_enable_clk(struct kgsl_mmu *mmu) { struct kgsl_iommu *iommu = &mmu->iommu; - if (!IS_ERR_OR_NULL(iommu->cx_gdsc)) - WARN_ON(regulator_enable(iommu->cx_gdsc)); + if (!IS_ERR_OR_NULL(iommu->cx_regulator)) + WARN_ON(regulator_enable(iommu->cx_regulator)); + + if (pm_runtime_enabled(&iommu->pdev->dev)) + WARN_ON(pm_runtime_resume_and_get(&iommu->pdev->dev)); WARN_ON(clk_bulk_prepare_enable(iommu->num_clks, iommu->clks)); @@ -1622,6 +1629,9 @@ static void kgsl_iommu_close(struct kgsl_mmu *mmu) kgsl_guard_page = NULL; } + if (pm_runtime_enabled(&iommu->pdev->dev)) + pm_runtime_disable(&iommu->pdev->dev); + kmem_cache_destroy(addr_entry_cache); addr_entry_cache = NULL; } @@ -2637,8 +2647,18 @@ int kgsl_iommu_bind(struct kgsl_device *device, struct platform_device *pdev) iommu->clks[iommu->num_clks++].clk = c; } - /* Get the CX regulator if it is available */ - iommu->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); + /* + * IOMMU device is already bound to power domain by core framework + * as there is only single power domain + */ + if (of_property_read_bool(pdev->dev.of_node, "power-domains")) { + pm_runtime_enable(&pdev->dev); + } else { + struct regulator *cx_regulator = devm_regulator_get(&pdev->dev, "vddcx"); + + if (!IS_ERR(cx_regulator)) + iommu->cx_regulator = cx_regulator; + } set_bit(KGSL_MMU_PAGED, &mmu->features); @@ -2701,6 +2721,9 @@ int kgsl_iommu_bind(struct kgsl_device *device, struct platform_device *pdev) return 0; err: + if (pm_runtime_enabled(&pdev->dev)) + pm_runtime_disable(&pdev->dev); + kmem_cache_destroy(addr_entry_cache); addr_entry_cache = NULL; diff --git a/kgsl_iommu.h b/kgsl_iommu.h index 6bce555ff7..b6f8d362bf 100644 --- a/kgsl_iommu.h +++ b/kgsl_iommu.h @@ -173,8 +173,8 @@ struct kgsl_iommu { u32 cb0_offset; /** @pagesize: Size of each context bank register space */ u32 pagesize; - /** @cx_gdsc: CX GDSC handle in case the IOMMU needs it */ - struct regulator *cx_gdsc; + /** @cx_regulator: CX regulator handle in case the IOMMU needs it */ + struct regulator *cx_regulator; }; /* diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index dc289a926d..52a2c477e9 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,8 @@ #define KGSL_MAX_BUSLEVELS 20 +#define GX_GDSC_TIMEOUT_MS 200 + /* Order deeply matters here because reasons. New entries go on the end */ static const char * const clocks[KGSL_MAX_CLKS] = { "src_clk", @@ -1300,70 +1303,132 @@ int kgsl_pwrctrl_axi(struct kgsl_device *device, bool state) return 0; } +static int kgsl_genpd_disable_wait(struct device *dev, u32 timeout) +{ + ktime_t tout = ktime_add_us(ktime_get(), timeout * USEC_PER_MSEC); + int ret; + + ret = pm_runtime_put_sync(dev); + if (ret < 0) + return ret; + + for (;;) { + if (!kgsl_genpd_is_enabled(dev)) + return 0; + + if (ktime_compare(ktime_get(), tout) > 0) + return (!kgsl_genpd_is_enabled(dev) ? 0 : -ETIMEDOUT); + + usleep_range((100 >> 2) + 1, 100); + } +} + +int kgsl_regulator_disable_wait(struct regulator *reg, u32 timeout) +{ + ktime_t tout = ktime_add_us(ktime_get(), timeout * USEC_PER_MSEC); + int ret; + + ret = regulator_disable(reg); + if (ret) + return ret; + + for (;;) { + if (!regulator_is_enabled(reg)) + return 0; + + if (ktime_compare(ktime_get(), tout) > 0) + return (!regulator_is_enabled(reg) ? 0 : -ETIMEDOUT); + + usleep_range((100 >> 2) + 1, 100); + } +} + int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; - struct regulator *regulator = pwr->cx_gdsc; int ret; - if (IS_ERR_OR_NULL(regulator)) + if (!pwr->cx_regulator && !pwr->cx_pd) return 0; ret = wait_for_completion_timeout(&pwr->cx_gdsc_gate, msecs_to_jiffies(5000)); if (!ret) { - dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); /* Dump the cx regulator consumer list */ - qcom_clk_dump(NULL, regulator, false); + if (pwr->cx_regulator) { + dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); + qcom_clk_dump(NULL, pwr->cx_regulator, false); + } else { + dev_err(device->dev, "GPU CX wait timeout\n"); + } } - ret = regulator_enable(regulator); + if (pwr->cx_regulator) + ret = regulator_enable(pwr->cx_regulator); + else + ret = pm_runtime_resume_and_get(pwr->cx_pd); + if (ret) - dev_err(device->dev, "Failed to enable CX regulator: %d\n", ret); + dev_err(device->dev, "Failed to enable CX gdsc, error %d\n", ret); kgsl_mmu_send_tlb_hint(&device->mmu, false); pwr->cx_gdsc_wait = false; return ret; } -static int kgsl_pwtctrl_enable_gx_gdsc(struct kgsl_device *device) +int kgsl_pwrctrl_enable_gx_gdsc(struct kgsl_device *device) { - struct regulator *regulator = device->pwrctrl.gx_gdsc; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; int ret; - if (IS_ERR_OR_NULL(regulator)) + if (!pwr->gx_regulator && !pwr->gx_pd) return 0; - ret = regulator_enable(regulator); + if (pwr->gx_regulator) + ret = regulator_enable(pwr->gx_regulator); + else + ret = pm_runtime_resume_and_get(pwr->gx_pd); + if (ret) - dev_err(device->dev, "Failed to enable GX regulator: %d\n", ret); + dev_err(device->dev, "Failed to enable GX gdsc, error %d\n", ret); + return ret; } void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device) { - struct regulator *regulator = device->pwrctrl.cx_gdsc; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; - if (IS_ERR_OR_NULL(regulator)) + if (!pwr->cx_regulator && !pwr->cx_pd) return; kgsl_mmu_send_tlb_hint(&device->mmu, true); - reinit_completion(&device->pwrctrl.cx_gdsc_gate); - device->pwrctrl.cx_gdsc_wait = true; - regulator_disable(regulator); + reinit_completion(&pwr->cx_gdsc_gate); + pwr->cx_gdsc_wait = true; + + if (pwr->cx_regulator) + regulator_disable(pwr->cx_regulator); + else + pm_runtime_put_sync(pwr->cx_pd); } -static void kgsl_pwrctrl_disable_gx_gdsc(struct kgsl_device *device) +void kgsl_pwrctrl_disable_gx_gdsc(struct kgsl_device *device) { - struct regulator *regulator = device->pwrctrl.gx_gdsc; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret; - if (IS_ERR_OR_NULL(regulator)) + if (!pwr->gx_regulator && !pwr->gx_pd) return; - if (!kgsl_regulator_disable_wait(regulator, 200)) - dev_err(device->dev, "Regulator vdd is stuck on\n"); + if (pwr->gx_regulator) + ret = kgsl_regulator_disable_wait(pwr->gx_regulator, GX_GDSC_TIMEOUT_MS); + else + ret = kgsl_genpd_disable_wait(pwr->gx_pd, GX_GDSC_TIMEOUT_MS); + + if (ret) + dev_err(device->dev, "vdd is stuck on, error %d\n", ret); } -static int enable_regulators(struct kgsl_device *device) +static int enable_gdscs(struct kgsl_device *device) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; int ret; @@ -1375,10 +1440,10 @@ static int enable_regulators(struct kgsl_device *device) if (!ret) { /* Set parent in retention voltage to power up vdd supply */ ret = kgsl_regulator_set_voltage(device->dev, - pwr->gx_gdsc_parent, - pwr->gx_gdsc_parent_min_corner); + pwr->gx_regulator_parent, + pwr->gx_regulator_parent_min_corner); if (!ret) - ret = kgsl_pwtctrl_enable_gx_gdsc(device); + ret = kgsl_pwrctrl_enable_gx_gdsc(device); } if (ret) { @@ -1390,28 +1455,80 @@ static int enable_regulators(struct kgsl_device *device) return 0; } -int kgsl_pwrctrl_probe_regulators(struct kgsl_device *device, - struct platform_device *pdev) +static int kgsl_pwrctrl_probe_cx_gdsc(struct kgsl_device *device, struct platform_device *pdev) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; - pwr->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); - if (IS_ERR(pwr->cx_gdsc)) { - if (PTR_ERR(pwr->cx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vddcx gdsc\n"); - return PTR_ERR(pwr->cx_gdsc); - } + if (of_property_read_bool(pdev->dev.of_node, "power-domains")) { + /* Get virtual device handle for CX GDSC to control it */ + struct device *cx_pd = dev_pm_domain_attach_by_name(&pdev->dev, "cx"); - pwr->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); - if (IS_ERR(pwr->gx_gdsc)) { - if (PTR_ERR(pwr->gx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vdd gdsc\n"); - return PTR_ERR(pwr->gx_gdsc); + if (IS_ERR_OR_NULL(cx_pd)) { + dev_err_probe(&pdev->dev, PTR_ERR(cx_pd), + "Failed to attach cx power domain\n"); + return IS_ERR(cx_pd) ? PTR_ERR(cx_pd) : -EINVAL; + } + pwr->cx_pd = cx_pd; + } else { + struct regulator *cx_regulator = devm_regulator_get(&pdev->dev, "vddcx"); + + if (IS_ERR(cx_regulator)) { + dev_err_probe(&pdev->dev, PTR_ERR(cx_regulator), + "Couldn't get the vddcx\n"); + return PTR_ERR(cx_regulator); + } + pwr->cx_regulator = cx_regulator; } return 0; } +static int kgsl_pwrctrl_probe_gx_gdsc(struct kgsl_device *device, struct platform_device *pdev) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + if (of_property_read_bool(pdev->dev.of_node, "power-domains")) { + /* Get virtual device handle for GX GDSC to control it */ + struct device *gx_pd = dev_pm_domain_attach_by_name(&pdev->dev, "gx"); + + if (IS_ERR_OR_NULL(gx_pd)) { + dev_err_probe(&pdev->dev, PTR_ERR(gx_pd), + "Failed to attach gx power domain\n"); + return IS_ERR(gx_pd) ? PTR_ERR(gx_pd) : -EINVAL; + } + pwr->gx_pd = gx_pd; + } else { + struct regulator *gx_regulator = devm_regulator_get(&pdev->dev, "vdd"); + + if (IS_ERR(gx_regulator)) { + dev_err_probe(&pdev->dev, PTR_ERR(gx_regulator), + "Couldn't get the vdd\n"); + return PTR_ERR(gx_regulator); + } + pwr->gx_regulator = gx_regulator; + } + + return 0; +} + +int kgsl_pwrctrl_probe_gdscs(struct kgsl_device *device, struct platform_device *pdev) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret; + + ret = kgsl_pwrctrl_probe_cx_gdsc(device, pdev); + if (ret) + return ret; + + ret = kgsl_pwrctrl_probe_gx_gdsc(device, pdev); + if (ret && pwr->cx_pd) { + dev_pm_domain_detach(pwr->cx_pd, false); + pwr->cx_pd = NULL; + } + + return ret; +} + static int kgsl_cx_gdsc_event(struct notifier_block *nb, unsigned long event, void *data) { @@ -1419,7 +1536,13 @@ static int kgsl_cx_gdsc_event(struct notifier_block *nb, struct kgsl_device *device = container_of(pwr, struct kgsl_device, pwrctrl); u32 val; - if (!(event & REGULATOR_EVENT_DISABLE) || !pwr->cx_gdsc_wait) + if (!pwr->cx_gdsc_wait) + return 0; + + if (pwr->cx_pd && (event != GENPD_NOTIFY_OFF)) + return 0; + + if (pwr->cx_regulator && !(event & REGULATOR_EVENT_DISABLE)) return 0; if (pwr->cx_gdsc_offset) { @@ -1434,16 +1557,18 @@ static int kgsl_cx_gdsc_event(struct notifier_block *nb, return 0; } -int kgsl_register_gdsc_notifier(struct kgsl_device *device) +static int kgsl_register_gdsc_notifier(struct kgsl_device *device) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; - if (!IS_ERR_OR_NULL(pwr->cx_gdsc)) { - pwr->cx_gdsc_nb.notifier_call = kgsl_cx_gdsc_event; - return devm_regulator_register_notifier(pwr->cx_gdsc, &pwr->cx_gdsc_nb); - } + if (!pwr->cx_regulator && !pwr->cx_pd) + return 0; - return 0; + pwr->cx_gdsc_nb.notifier_call = kgsl_cx_gdsc_event; + if (pwr->cx_regulator) + return devm_regulator_register_notifier(pwr->cx_regulator, &pwr->cx_gdsc_nb); + + return dev_pm_genpd_add_notifier(pwr->cx_pd, &pwr->cx_gdsc_nb); } static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) @@ -1468,18 +1593,18 @@ static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) trace_kgsl_rail(device, state); /* Set the parent in retention voltage to disable CPR interrupts */ - kgsl_regulator_set_voltage(device->dev, pwr->gx_gdsc_parent, - pwr->gx_gdsc_parent_min_corner); + kgsl_regulator_set_voltage(device->dev, pwr->gx_regulator_parent, + pwr->gx_regulator_parent_min_corner); kgsl_pwrctrl_disable_gx_gdsc(device); /* Remove the vote for the vdd parent supply */ - kgsl_regulator_set_voltage(device->dev, pwr->gx_gdsc_parent, 0); + kgsl_regulator_set_voltage(device->dev, pwr->gx_regulator_parent, 0); kgsl_pwrctrl_disable_cx_gdsc(device); } } else { - status = enable_regulators(device); + status = enable_gdscs(device); kgsl_mmu_send_tlb_hint(&device->mmu, false); } @@ -1663,24 +1788,26 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) _isense_clk_set_rate(pwr, pwr->num_pwrlevels - 1); - if (of_property_read_bool(pdev->dev.of_node, "vddcx-supply")) - pwr->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); + if (of_property_read_bool(pdev->dev.of_node, "vddcx-supply") || + (of_property_match_string(pdev->dev.of_node, "power-domain-names", "cx") >= 0)) + kgsl_pwrctrl_probe_cx_gdsc(device, pdev); - if (of_property_read_bool(pdev->dev.of_node, "vdd-supply")) - pwr->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); + if (of_property_read_bool(pdev->dev.of_node, "vdd-supply") || + (of_property_match_string(pdev->dev.of_node, "power-domain-names", "gx") >= 0)) + kgsl_pwrctrl_probe_gx_gdsc(device, pdev); if (of_property_read_bool(pdev->dev.of_node, "vdd-parent-supply")) { - pwr->gx_gdsc_parent = devm_regulator_get(&pdev->dev, + pwr->gx_regulator_parent = devm_regulator_get(&pdev->dev, "vdd-parent"); - if (IS_ERR(pwr->gx_gdsc_parent)) { + if (IS_ERR(pwr->gx_regulator_parent)) { dev_err(device->dev, "Failed to get vdd-parent regulator:%ld\n", - PTR_ERR(pwr->gx_gdsc_parent)); + PTR_ERR(pwr->gx_regulator_parent)); return -ENODEV; } if (of_property_read_u32(pdev->dev.of_node, "vdd-parent-min-corner", - &pwr->gx_gdsc_parent_min_corner)) { + &pwr->gx_regulator_parent_min_corner)) { dev_err(device->dev, "vdd-parent-min-corner not found\n"); return -ENODEV; @@ -1713,6 +1840,17 @@ void kgsl_pwrctrl_close(struct kgsl_device *device) dev_pm_qos_remove_request(&pwr->sysfs_thermal_req); pm_runtime_disable(&device->pdev->dev); + + if (pwr->cx_pd) { + dev_pm_genpd_remove_notifier(pwr->cx_pd); + dev_pm_domain_detach(pwr->cx_pd, false); + pwr->cx_pd = NULL; + } + + if (pwr->gx_pd) { + dev_pm_domain_detach(pwr->gx_pd, false); + pwr->gx_pd = NULL; + } } void kgsl_idle_check(struct work_struct *work) diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index a14229609b..383d85ee99 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -103,14 +103,18 @@ struct kgsl_pwrctrl { int interrupt_num; struct clk *grp_clks[KGSL_MAX_CLKS]; struct clk *gpu_bimc_int_clk; - /** @cx_gdsc: Pointer to the CX domain regulator if applicable */ - struct regulator *cx_gdsc; - /** @gx_gdsc: Pointer to the GX domain regulator if applicable */ - struct regulator *gx_gdsc; - /** @gx_gdsc: Pointer to the GX domain parent supply */ - struct regulator *gx_gdsc_parent; - /** @gx_gdsc_parent_min_corner: Minimum supply voltage for GX parent */ - u32 gx_gdsc_parent_min_corner; + /** @cx_regulator: Pointer to the CX domain regulator if applicable */ + struct regulator *cx_regulator; + /** @gx_regulator: Pointer to the GX domain regulator if applicable */ + struct regulator *gx_regulator; + /** @cx_pd: Power domain for controlling CX GDSC */ + struct device *cx_pd; + /** @gx_pd: Power domain for controlling GX GDSC */ + struct device *gx_pd; + /** @gx_regulator_parent: Pointer to the GX domain parent supply */ + struct regulator *gx_regulator_parent; + /** @gx_regulator_parent_min_corner: Minimum supply voltage for GX parent */ + u32 gx_regulator_parent_min_corner; /** @cx_gdsc_nb: Notifier block for cx gdsc regulator */ struct notifier_block cx_gdsc_nb; /** @cx_gdsc_gate: Completion to signal cx gdsc collapse status */ @@ -292,12 +296,25 @@ int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device); void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device); /** - * kgsl_pwrctrl_probe_regulators - Probe regulators + * kgsl_pwrctrl_enable_gx_gdsc - Enable gx gdsc + * @device: Pointer to the kgsl device + * + * Return: 0 on success or negative error on failure + */ +int kgsl_pwrctrl_enable_gx_gdsc(struct kgsl_device *device); + +/** + * kgsl_pwrctrl_disable_gx_gdsc - Disable gx gdsc + * @device: Pointer to the kgsl device + */ +void kgsl_pwrctrl_disable_gx_gdsc(struct kgsl_device *device); + +/** + * kgsl_pwrctrl_probe_gdscs - Probe gdscs * @device: Pointer to the kgsl device * @pdev: Pointer to the platform device * * Return: 0 on success or negative error on failure */ -int kgsl_pwrctrl_probe_regulators(struct kgsl_device *device, - struct platform_device *pdev); +int kgsl_pwrctrl_probe_gdscs(struct kgsl_device *device, struct platform_device *pdev); #endif /* __KGSL_PWRCTRL_H */ diff --git a/kgsl_util.c b/kgsl_util.c index a6ba2ae922..718171e7bc 100644 --- a/kgsl_util.c +++ b/kgsl_util.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) #include @@ -22,30 +23,20 @@ #include #include #include -#include #include #include "adreno.h" #include "kgsl_util.h" -bool kgsl_regulator_disable_wait(struct regulator *reg, u32 timeout) +bool kgsl_genpd_is_enabled(struct device *dev) { - ktime_t tout = ktime_add_us(ktime_get(), timeout * 1000); + struct generic_pm_domain *genpd; - if (IS_ERR_OR_NULL(reg)) - return true; + if (IS_ERR_OR_NULL(dev) || IS_ERR_OR_NULL(dev->pm_domain)) + return false; - regulator_disable(reg); - - for (;;) { - if (!regulator_is_enabled(reg)) - return true; - - if (ktime_compare(ktime_get(), tout) > 0) - return (!regulator_is_enabled(reg)); - - usleep_range((100 >> 2) + 1, 100); - } + genpd = pd_to_genpd(dev->pm_domain); + return (READ_ONCE(genpd->status) == GENPD_STATE_ON); } struct clk *kgsl_of_clk_by_name(struct clk_bulk_data *clks, int count, diff --git a/kgsl_util.h b/kgsl_util.h index 67e28e4b5d..74b622e27e 100644 --- a/kgsl_util.h +++ b/kgsl_util.h @@ -107,16 +107,12 @@ int kgsl_hwlock(struct cpu_gpu_lock *lock); void kgsl_hwunlock(struct cpu_gpu_lock *lock); /** - * kgsl_regulator_disable_wait - Disable a regulator and wait for it - * @reg: A &struct regulator handle - * @timeout: Time to wait (in milliseconds) + * kgsl_genpd_is_enabled - Check whether genpd is enabled or not + * @dev: Power domain handle * - * Disable the regulator and wait @timeout milliseconds for it to enter the - * disabled state. - * - * Return: True if the regulator was disabled or false if it timed out + * Return: True if genpd is enabled otherwise false. */ -bool kgsl_regulator_disable_wait(struct regulator *reg, u32 timeout); +bool kgsl_genpd_is_enabled(struct device *dev); /** * kgsl_of_clk_by_name - Return a clock device for a given name From 6f2a236d3fa399ba7416f7d1982a3b0d2770a729 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Fri, 1 Dec 2023 12:37:09 -0800 Subject: [PATCH 0607/1016] kgsl: gen8: Disable BR throttling In certain scenarios enabling BR throttling resulting to counter overflow. This overflow can cause counter to get corrupted which results in throttle threshold blocking BR pipe and results into fault. Hence disable BR throttling. Change-Id: I9a0b9d0eb8de4207e00bd329a1cbab9429649310 Signed-off-by: Hareesh Gundu --- adreno-gpulist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index e6df16cb41..1ff0ad1169 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2526,7 +2526,8 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { { GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1, 0x00000002, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_PC_CHICKEN_BITS_1, 0x00000003, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_PC_CHICKEN_BITS_2, 0x00000200, BIT(PIPE_BR) | BIT(PIPE_BV) }, - { GEN8_PC_CHICKEN_BITS_3, 0x00500000, BIT(PIPE_BR) | BIT(PIPE_BV) }, + /* Disable BR throttling */ + { GEN8_PC_CHICKEN_BITS_3, 0x00400000, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_PC_CHICKEN_BITS_4, 0x00500050, BIT(PIPE_BR) | BIT(PIPE_BV) }, /* Configure GBIF GX registers */ { GEN8_UCHE_GBIF_GX_CONFIG, 0x010240e0, BIT(PIPE_NONE) }, From db956ec79756effa49b865ab7e62b004716f1341 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Fri, 1 Dec 2023 10:22:14 -0800 Subject: [PATCH 0608/1016] msm: kgsl: Add minor fixes in snasphot When kgsl runs out of memory it prints the error logs, ratelimit the logs so that we do not spam the logs. Omit the secure regsiters from snapshot which hold the secure firmware address. Add extra paranthesis around some size checks. Change-Id: I6bb824bd8889b4689f760da7bf1191dacbdd2fa4 Signed-off-by: Urvashi Agrawal --- adreno_gen8_0_0_snapshot.h | 2 +- adreno_gen8_snapshot.c | 2 +- kgsl_device.h | 2 +- kgsl_snapshot.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index b3614ae557..601e5ba38e 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -362,7 +362,7 @@ static const u32 gen8_0_0_cp_cp_pipe_none_registers[] = { 0x00800, 0x0081e, 0x00820, 0x0082d, 0x00838, 0x0083e, 0x00840, 0x00847, 0x0084b, 0x0084c, 0x00850, 0x0088f, 0x008b5, 0x008b6, 0x008c0, 0x008cb, 0x008d0, 0x008e4, 0x008e7, 0x008ee, 0x008fa, 0x008fd, 0x00928, 0x00929, - 0x00958, 0x0095b, 0x00980, 0x009ff, 0x0f000, 0x0f002, + 0x00958, 0x0095b, 0x00980, 0x009ff, UINT_MAX, UINT_MAX, }; static_assert(IS_ALIGNED(sizeof(gen8_0_0_cp_cp_pipe_none_registers), 8)); diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 94e029a497..75ee03f6ec 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -600,7 +600,7 @@ static void gen8_snapshot_shader(struct kgsl_device *device, for (i = 0; i < num_shader_blocks; i++) { struct gen8_shader_block *block = &shader_blocks[i]; - for (slice = 0; sp < block->num_slices; slice++) { + for (slice = 0; slice < block->num_slices; slice++) { for (sp = 0; sp < block->num_sps; sp++) { for (usptp = 0; usptp < block->num_usptps; usptp++) { info.block = block; diff --git a/kgsl_device.h b/kgsl_device.h index ce37644142..33fca3e031 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -965,7 +965,7 @@ struct kgsl_process_private *kgsl_process_private_find(pid_t pid); * the number of strings in the binary */ #define SNAPSHOT_ERR_NOMEM(_d, _s) \ - dev_err((_d)->dev, \ + dev_err_ratelimited((_d)->dev, \ "snapshot: not enough snapshot memory for section %s\n", (_s)) /** diff --git a/kgsl_snapshot.c b/kgsl_snapshot.c index bf1b544705..dd04963d3f 100644 --- a/kgsl_snapshot.c +++ b/kgsl_snapshot.c @@ -403,7 +403,7 @@ static size_t kgsl_snapshot_dump_indexed_regs_v2(struct kgsl_device *device, (struct kgsl_snapshot_indexed_regs_v2 *)buf; u32 *data = (u32 *)(buf + sizeof(*header)); - if (remain < (iregs->count * 4 * 3) + sizeof(*header)) { + if (remain < ((iregs->count * 4) + sizeof(*header))) { SNAPSHOT_ERR_NOMEM(device, "INDEXED REGS"); return 0; } @@ -418,7 +418,7 @@ static size_t kgsl_snapshot_dump_indexed_regs_v2(struct kgsl_device *device, kgsl_regmap_read_indexed_interleaved(&device->regmap, iregs->index, iregs->data, data, iregs->start, iregs->count); - return (iregs->count * 4 * 3) + sizeof(*header); + return (iregs->count * 4) + sizeof(*header); } void kgsl_snapshot_indexed_registers_v2(struct kgsl_device *device, From a44a4dd5e23da9738491d2463373c67b5aa4cb45 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Fri, 17 Nov 2023 12:21:48 +0530 Subject: [PATCH 0609/1016] kgsl: Do not allow allocations for zero size VBOs Add size check in gpumem_alloc_vbo_entry(). Requesting allocation of zero size VBO will leave a mementry with invalid gpuaddr having size 0. Change-Id: Ica1780fd0ba0e921383e2af8e4cbbb41f33bfbf5 Signed-off-by: Pankaj Gupta --- kgsl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kgsl.c b/kgsl.c index c1b3a281f8..e6f3926ce9 100644 --- a/kgsl.c +++ b/kgsl.c @@ -4096,6 +4096,9 @@ gpumem_alloc_vbo_entry(struct kgsl_device_private *dev_priv, struct kgsl_mem_entry *entry; int ret; + if (!size) + return ERR_PTR(-EINVAL); + /* Disallow specific flags */ if (flags & (KGSL_MEMFLAGS_GPUREADONLY | KGSL_CACHEMODE_MASK)) return ERR_PTR(-EINVAL); From 2df1d1303e307c4a339bdd7f79b543056286ec62 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 16 Sep 2023 17:10:26 +0530 Subject: [PATCH 0610/1016] kgsl: Add support to compile graphics driver for niobe Add support to compile graphics driver for niobe. Change-Id: If37c5b0daf901f69c30048f9f67f9eeccf8ec8a5 Signed-off-by: Kamal Agrawal --- Kbuild | 3 +++ config/gki_niobe.conf | 24 ++++++++++++++++++++++++ config/niobe_consolidate_gpuconf | 9 +++++++++ config/niobe_gki_gpuconf | 9 +++++++++ 4 files changed, 45 insertions(+) create mode 100644 config/gki_niobe.conf create mode 100644 config/niobe_consolidate_gpuconf create mode 100644 config/niobe_gki_gpuconf diff --git a/Kbuild b/Kbuild index 65f12ce88e..6b8cee33d1 100644 --- a/Kbuild +++ b/Kbuild @@ -61,6 +61,9 @@ endif ifeq ($(CONFIG_ARCH_SDM670), y) include $(KGSL_PATH)/config/gki_qcs605.conf endif +ifeq ($(CONFIG_ARCH_NIOBE), y) + include $(KGSL_PATH)/config/gki_niobe.conf +endif ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq diff --git a/config/gki_niobe.conf b/config/gki_niobe.conf new file mode 100644 index 0000000000..04506a8f0d --- /dev/null +++ b/config/gki_niobe.conf @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 +CONFIG_QCOM_KGSL_SORT_POOL = y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y +CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT = y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" +CONFIG_QTI_HW_FENCE=y + +ifneq ($(CONFIG_CORESIGHT),) + CONFIG_QCOM_KGSL_CORESIGHT = y +endif + +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ + -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ + -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ + -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=1 \ + -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" \ + -DCONFIG_QTI_HW_FENCE=1 # This is needed to enable the hw-fence driver APIs in kernel headers + +ifneq ($(CONFIG_CORESIGHT),) + ccflags-y += -DCONFIG_QCOM_KGSL_CORESIGHT=1 +endif diff --git a/config/niobe_consolidate_gpuconf b/config/niobe_consolidate_gpuconf new file mode 100644 index 0000000000..837c2df56d --- /dev/null +++ b/config/niobe_consolidate_gpuconf @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL=m +CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 +CONFIG_QCOM_KGSL_SORT_POOL=y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y +CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" diff --git a/config/niobe_gki_gpuconf b/config/niobe_gki_gpuconf new file mode 100644 index 0000000000..837c2df56d --- /dev/null +++ b/config/niobe_gki_gpuconf @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL=m +CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 +CONFIG_QCOM_KGSL_SORT_POOL=y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y +CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" From 099ba87324095a749b206e0c82d0601fba64d072 Mon Sep 17 00:00:00 2001 From: Raviteja Narayanam Date: Tue, 7 Nov 2023 19:32:49 +0530 Subject: [PATCH 0611/1016] kgsl: gen7: Add support for Gen7_11_0 gpu Add required code support to identify and get snapshot for the Gen7_11_0 GPU. Change-Id: I36e71ba8cc155095a07579e9a2fc3674196d30ef Signed-off-by: Raviteja Narayanam --- adreno-gpulist.h | 37 + adreno.h | 5 +- adreno_gen7.c | 9 +- adreno_gen7_11_0_snapshot.h | 1276 +++++++++++++++++++++++++++++++++++ adreno_gen7_snapshot.c | 27 + gen7_reg.h | 1 + 6 files changed, 1351 insertions(+), 4 deletions(-) create mode 100644 adreno_gen7_11_0_snapshot.h diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 32c9643e27..0f8afdce46 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2391,6 +2391,42 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_14_0 = { .fast_bus_hint = false, }; +extern const struct gen7_snapshot_block_list gen7_11_0_snapshot_block_list; + +static const struct adreno_gen7_core adreno_gpu_core_gen7_11_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN7_11_0, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen7-11-0", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_L3_VOTE | + ADRENO_DMS, + .gpudev = &adreno_gen7_hwsched_gpudev.base, + .perfcounters = &adreno_gen7_hwsched_perfcounters, + .uche_gmem_alignment = SZ_16M, + .gmem_size = SZ_1M + SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_4M, + }, + .gmu_fw_version = GMU_VERSION(4, 5, 0), + .sqefw_name = "gen71100_sqe.fw", + .gmufw_name = "gen71100_gmu.bin", + .zap_name = "gen71100_zap.mbn", + .hwcg = gen7_2_0_hwcg_regs, + .hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs), + .ao_hwcg = gen7_2_0_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), + .gbif = gen7_2_0_gbif_regs, + .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen7_0_0_protected_regs, + .highest_bank_bit = 16, + .gmu_hub_clk_freq = 200000000, + .gen7_snapshot_block_list = &gen7_11_0_snapshot_block_list, + .preempt_level = 1, + .fast_bus_hint = true, +}; + static const struct kgsl_regmap_list a663_hwcg_regs[] = { {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, @@ -2741,6 +2777,7 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen7_9_0.base, &adreno_gpu_core_gen7_9_1.base, &adreno_gpu_core_gen7_14_0.base, + &adreno_gpu_core_gen7_11_0.base, &adreno_gpu_core_gen8_0_0.base, }; diff --git a/adreno.h b/adreno.h index 90838bfa85..4af079020a 100644 --- a/adreno.h +++ b/adreno.h @@ -240,6 +240,7 @@ enum adreno_gpurev { ADRENO_REV_GEN7_9_0 = ADRENO_GPUREV_VALUE(7, 9, 0), ADRENO_REV_GEN7_9_1 = ADRENO_GPUREV_VALUE(7, 9, 1), ADRENO_REV_GEN7_14_0 = ADRENO_GPUREV_VALUE(7, 14, 0), + ADRENO_REV_GEN7_11_0 = ADRENO_GPUREV_VALUE(7, 11, 0), ADRENO_REV_GEN8_0_0 = ADRENO_GPUREV_VALUE(8, 0, 0), }; @@ -1261,6 +1262,7 @@ ADRENO_TARGET(gen7_4_0, ADRENO_REV_GEN7_4_0) ADRENO_TARGET(gen7_9_0, ADRENO_REV_GEN7_9_0) ADRENO_TARGET(gen7_9_1, ADRENO_REV_GEN7_9_1) ADRENO_TARGET(gen7_14_0, ADRENO_REV_GEN7_14_0) +ADRENO_TARGET(gen7_11_0, ADRENO_REV_GEN7_11_0) static inline int adreno_is_gen7_9_x(struct adreno_device *adreno_dev) { @@ -1276,7 +1278,8 @@ static inline int adreno_is_gen7_0_x_family(struct adreno_device *adreno_dev) static inline int adreno_is_gen7_2_x_family(struct adreno_device *adreno_dev) { return adreno_is_gen7_2_0(adreno_dev) || adreno_is_gen7_2_1(adreno_dev) || - adreno_is_gen7_9_x(adreno_dev) || adreno_is_gen7_14_0(adreno_dev); + adreno_is_gen7_9_x(adreno_dev) || adreno_is_gen7_14_0(adreno_dev) || + adreno_is_gen7_11_0(adreno_dev); } /* diff --git a/adreno_gen7.c b/adreno_gen7.c index a37b805c74..ee32898ba4 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -847,10 +847,12 @@ int gen7_start(struct adreno_device *adreno_dev) */ if (adreno_is_gen7_0_0(adreno_dev) || adreno_is_gen7_0_1(adreno_dev) || adreno_is_gen7_4_0(adreno_dev) || adreno_is_gen7_2_0(adreno_dev) - || adreno_is_gen7_2_1(adreno_dev)) { + || adreno_is_gen7_2_1(adreno_dev) || adreno_is_gen7_11_0(adreno_dev)) { kgsl_regwrite(device, GEN7_CP_CHICKEN_DBG, 0x1); kgsl_regwrite(device, GEN7_CP_BV_CHICKEN_DBG, 0x1); - kgsl_regwrite(device, GEN7_CP_LPAC_CHICKEN_DBG, 0x1); + /* Avoid configuring LPAC pipe on targets which do not have LPAC. */ + if (adreno_dev->lpac_enabled) + kgsl_regwrite(device, GEN7_CP_LPAC_CHICKEN_DBG, 0x1); } _set_secvid(device); @@ -1626,7 +1628,8 @@ int gen7_probe_common(struct platform_device *pdev, kgsl_pwrscale_fast_bus_hint(gen7_core->fast_bus_hint); device->pwrctrl.rt_bus_hint = gen7_core->rt_bus_hint; - device->pwrctrl.cx_gdsc_offset = GEN7_GPU_CC_CX_GDSCR; + device->pwrctrl.cx_gdsc_offset = adreno_is_gen7_11_0(adreno_dev) ? + GEN7_11_0_GPU_CC_CX_GDSCR : GEN7_GPU_CC_CX_GDSCR; ret = adreno_device_probe(pdev, adreno_dev); if (ret) diff --git a/adreno_gen7_11_0_snapshot.h b/adreno_gen7_11_0_snapshot.h new file mode 100644 index 0000000000..3276436df9 --- /dev/null +++ b/adreno_gen7_11_0_snapshot.h @@ -0,0 +1,1276 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#ifndef __ADRENO_GEN7_11_0_SNAPSHOT_H +#define __ADRENO_GEN7_11_0_SNAPSHOT_H + +#include "adreno_gen7_snapshot.h" + +static const u32 gen7_11_0_debugbus_blocks[] = { + DEBUGBUS_CP_0_0, + DEBUGBUS_CP_0_1, + DEBUGBUS_RBBM, + DEBUGBUS_HLSQ, + DEBUGBUS_UCHE_0, + DEBUGBUS_TESS_BR, + DEBUGBUS_TESS_BV, + DEBUGBUS_PC_BR, + DEBUGBUS_PC_BV, + DEBUGBUS_VFDP_BR, + DEBUGBUS_VFDP_BV, + DEBUGBUS_VPC_BR, + DEBUGBUS_VPC_BV, + DEBUGBUS_TSE_BR, + DEBUGBUS_TSE_BV, + DEBUGBUS_RAS_BR, + DEBUGBUS_RAS_BV, + DEBUGBUS_VSC, + DEBUGBUS_COM_0, + DEBUGBUS_LRZ_BR, + DEBUGBUS_LRZ_BV, + DEBUGBUS_UFC_0, + DEBUGBUS_UFC_1, + DEBUGBUS_GMU_GX, + DEBUGBUS_DBGC, + DEBUGBUS_CX, + DEBUGBUS_GMU_CX, + DEBUGBUS_GPC_BR, + DEBUGBUS_GPC_BV, + DEBUGBUS_LARC, + DEBUGBUS_HLSQ_SPTP, + DEBUGBUS_RB_0, + DEBUGBUS_RB_1, + DEBUGBUS_RB_2, + DEBUGBUS_UCHE_WRAPPER, + DEBUGBUS_CCU_0, + DEBUGBUS_CCU_1, + DEBUGBUS_CCU_2, + DEBUGBUS_VFD_BR_0, + DEBUGBUS_VFD_BR_1, + DEBUGBUS_VFD_BR_2, + DEBUGBUS_VFD_BV_0, + DEBUGBUS_USP_0, + DEBUGBUS_USP_1, + DEBUGBUS_USP_2, + DEBUGBUS_TP_0, + DEBUGBUS_TP_1, + DEBUGBUS_TP_2, + DEBUGBUS_TP_3, + DEBUGBUS_TP_4, + DEBUGBUS_TP_5, + DEBUGBUS_USPTP_0, + DEBUGBUS_USPTP_1, + DEBUGBUS_USPTP_2, + DEBUGBUS_USPTP_3, + DEBUGBUS_USPTP_4, + DEBUGBUS_USPTP_5, +}; + +static const u32 gen7_11_0_gbif_debugbus_blocks[] = { + DEBUGBUS_GBIF_GX, +}; + +static const u32 gen7_11_0_cx_debugbus_blocks[] = { + DEBUGBUS_CX, + DEBUGBUS_GMU_CX, + DEBUGBUS_GBIF_CX, +}; + +static struct gen7_shader_block gen7_11_0_shader_blocks[] = { + { TP0_TMO_DATA, 0x0200, 3, 2, PIPE_BR, USPTP }, + { TP0_SMO_DATA, 0x0080, 3, 2, PIPE_BR, USPTP }, + { TP0_MIPMAP_BASE_DATA, 0x03C0, 3, 2, PIPE_BR, USPTP }, + { SP_INST_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_INST_DATA_1, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_LB_0_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_LB_1_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_LB_2_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_LB_3_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_LB_4_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_LB_5_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_LB_6_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_LB_7_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_CB_RAM, 0x0390, 3, 2, PIPE_BR, USPTP }, + { SP_LB_13_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_LB_14_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_INST_TAG, 0x00C0, 3, 2, PIPE_BR, USPTP }, + { SP_INST_DATA_2, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_TMO_TAG, 0x0080, 3, 2, PIPE_BR, USPTP }, + { SP_SMO_TAG, 0x0080, 3, 2, PIPE_BR, USPTP }, + { SP_STATE_DATA, 0x0040, 3, 2, PIPE_BR, USPTP }, + { SP_HWAVE_RAM, 0x0100, 3, 2, PIPE_BR, USPTP }, + { SP_L0_INST_BUF, 0x0050, 3, 2, PIPE_BR, USPTP }, + { SP_LB_8_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_LB_9_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_LB_10_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_LB_11_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { SP_LB_12_DATA, 0x0800, 3, 2, PIPE_BR, USPTP }, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE }, + { HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0300, 1, 1, PIPE_BV, HLSQ_STATE }, + { HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BV, HLSQ_STATE }, + { HLSQ_CHUNK_CPS_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE }, + { HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE }, + { HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CVS_MISC_RAM, 0x0280, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CVS_MISC_RAM, 0x0280, 1, 1, PIPE_BV, HLSQ_STATE }, + { HLSQ_CPS_MISC_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CPS_MISC_RAM_1, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE }, + { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE }, + { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CVS_MISC_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_CVS_MISC_RAM_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE }, + { HLSQ_CPS_MISC_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_INST_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_INST_RAM_TAG, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE }, + { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0064, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0038, 1, 1, PIPE_BV, HLSQ_STATE }, + { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0064, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_INST_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_STPROC_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_BV_BE_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_BV_BE_META, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE }, + { HLSQ_DATAPATH_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE }, + { HLSQ_INDIRECT_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_BACKEND_META, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE }, + { HLSQ_BACKEND_META, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE }, +}; + +/* + * Block : ['GBIF'] + * Pipeline: PIPE_NONE + * pairs : 5 (Regs:38) + */ +static const u32 gen7_11_0_gbif_registers[] = { + 0x03c00, 0x03c0b, 0x03c40, 0x03c42, 0x03c45, 0x03c47, 0x03c49, 0x03c4a, + 0x03cc0, 0x03cd1, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_gbif_registers), 8)); + +/* + * Block : ['BROADCAST', 'CP', 'GRAS', 'PC'] + * Block : ['RBBM', 'RDVM', 'UCHE'] + * Block : ['VFD', 'VPC', 'VSC'] + * Pipeline: PIPE_NONE + * pairs : 162 (Regs:1489) + */ +static const u32 gen7_11_0_gpu_registers[] = { + 0x00000, 0x00000, 0x00002, 0x00002, 0x00011, 0x00012, 0x00016, 0x0001b, + 0x0001f, 0x00032, 0x00038, 0x0003c, 0x00042, 0x00042, 0x00044, 0x00044, + 0x00047, 0x00047, 0x00049, 0x0004a, 0x0004c, 0x0004c, 0x00050, 0x00050, + 0x00056, 0x00056, 0x000ad, 0x000ae, 0x000b0, 0x000b0, 0x000b4, 0x000b4, + 0x000b8, 0x000b8, 0x000bc, 0x000bc, 0x000c0, 0x000c0, 0x000c4, 0x000c4, + 0x000c8, 0x000c8, 0x000cc, 0x000cc, 0x000d0, 0x000d0, 0x000d4, 0x000d4, + 0x000d8, 0x000d8, 0x000dc, 0x000dc, 0x000e0, 0x000e0, 0x000e4, 0x000e4, + 0x000e8, 0x000e8, 0x000ec, 0x000ec, 0x000f0, 0x000f0, 0x000f4, 0x000f4, + 0x000f8, 0x000f8, 0x00100, 0x00100, 0x00104, 0x0010c, 0x0010f, 0x0011d, + 0x0012f, 0x0012f, 0x00200, 0x0020d, 0x00210, 0x00213, 0x00215, 0x0023d, + 0x00260, 0x00270, 0x00272, 0x00274, 0x00281, 0x0028d, 0x00300, 0x00401, + 0x00410, 0x00451, 0x00460, 0x004a3, 0x004c0, 0x004d1, 0x00500, 0x00500, + 0x00507, 0x0050b, 0x0050f, 0x0050f, 0x00511, 0x00511, 0x00533, 0x00536, + 0x00540, 0x00555, 0x00564, 0x00567, 0x00574, 0x00577, 0x00584, 0x0059b, + 0x00800, 0x00808, 0x00810, 0x00813, 0x00820, 0x00821, 0x00823, 0x00827, + 0x00830, 0x00834, 0x0083f, 0x00841, 0x00843, 0x00847, 0x0084f, 0x00886, + 0x008a0, 0x008ab, 0x008c0, 0x008c0, 0x008c4, 0x008c6, 0x008d0, 0x008dd, + 0x008e0, 0x008e6, 0x008f0, 0x008f3, 0x00900, 0x00903, 0x00908, 0x00911, + 0x00928, 0x0093e, 0x00942, 0x0094d, 0x00980, 0x00984, 0x0098d, 0x0098f, + 0x009b0, 0x009b4, 0x009c2, 0x009c9, 0x009ce, 0x009d7, 0x009e0, 0x009e7, + 0x00a00, 0x00a00, 0x00a02, 0x00a03, 0x00a10, 0x00a4f, 0x00a61, 0x00a9f, + 0x00ad0, 0x00adb, 0x00b00, 0x00b31, 0x00b35, 0x00b3c, 0x00b40, 0x00b40, + 0x00c00, 0x00c00, 0x00c02, 0x00c04, 0x00c06, 0x00c06, 0x00c10, 0x00cd9, + 0x00ce0, 0x00d0c, 0x00df0, 0x00df4, 0x00e01, 0x00e02, 0x00e07, 0x00e0e, + 0x00e10, 0x00e13, 0x00e17, 0x00e19, 0x00e1b, 0x00e2b, 0x00e30, 0x00e32, + 0x00e38, 0x00e3c, 0x00e40, 0x00e4b, 0x0ec00, 0x0ec01, 0x0ec05, 0x0ec05, + 0x0ec07, 0x0ec07, 0x0ec0a, 0x0ec0a, 0x0ec12, 0x0ec12, 0x0ec26, 0x0ec28, + 0x0ec2b, 0x0ec2d, 0x0ec2f, 0x0ec2f, 0x0ec40, 0x0ec41, 0x0ec45, 0x0ec45, + 0x0ec47, 0x0ec47, 0x0ec4a, 0x0ec4a, 0x0ec52, 0x0ec52, 0x0ec66, 0x0ec68, + 0x0ec6b, 0x0ec6d, 0x0ec6f, 0x0ec6f, 0x0ec80, 0x0ec81, 0x0ec85, 0x0ec85, + 0x0ec87, 0x0ec87, 0x0ec8a, 0x0ec8a, 0x0ec92, 0x0ec92, 0x0eca6, 0x0eca8, + 0x0ecab, 0x0ecad, 0x0ecaf, 0x0ecaf, 0x0ecc0, 0x0ecc1, 0x0ecc5, 0x0ecc5, + 0x0ecc7, 0x0ecc7, 0x0ecca, 0x0ecca, 0x0ecd2, 0x0ecd2, 0x0ece6, 0x0ece8, + 0x0eceb, 0x0eced, 0x0ecef, 0x0ecef, 0x0ed00, 0x0ed01, 0x0ed05, 0x0ed05, + 0x0ed07, 0x0ed07, 0x0ed0a, 0x0ed0a, 0x0ed12, 0x0ed12, 0x0ed26, 0x0ed28, + 0x0ed2b, 0x0ed2d, 0x0ed2f, 0x0ed2f, 0x0ed40, 0x0ed41, 0x0ed45, 0x0ed45, + 0x0ed47, 0x0ed47, 0x0ed4a, 0x0ed4a, 0x0ed52, 0x0ed52, 0x0ed66, 0x0ed68, + 0x0ed6b, 0x0ed6d, 0x0ed6f, 0x0ed6f, 0x0ed80, 0x0ed81, 0x0ed85, 0x0ed85, + 0x0ed87, 0x0ed87, 0x0ed8a, 0x0ed8a, 0x0ed92, 0x0ed92, 0x0eda6, 0x0eda8, + 0x0edab, 0x0edad, 0x0edaf, 0x0edaf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_gpu_registers), 8)); + +/* + * Block : ['GMUAO', 'GMUCX', 'GMUCX_RAM'] + * Pipeline: PIPE_NONE + * pairs : 126 (Regs:334) + */ +static const u32 gen7_11_0_gmu_registers[] = { + 0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403, + 0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03, + 0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403, + 0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03, + 0x1f400, 0x1f40b, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507, + 0x1f509, 0x1f50b, 0x1f705, 0x1f705, 0x1f710, 0x1f711, 0x1f713, 0x1f716, + 0x1f720, 0x1f724, 0x1f729, 0x1f729, 0x1f730, 0x1f747, 0x1f800, 0x1f804, + 0x1f807, 0x1f808, 0x1f80b, 0x1f80c, 0x1f80f, 0x1f80f, 0x1f811, 0x1f811, + 0x1f813, 0x1f817, 0x1f819, 0x1f81c, 0x1f824, 0x1f82a, 0x1f82d, 0x1f830, + 0x1f840, 0x1f853, 0x1f860, 0x1f860, 0x1f862, 0x1f864, 0x1f868, 0x1f868, + 0x1f870, 0x1f879, 0x1f87f, 0x1f87f, 0x1f888, 0x1f889, 0x1f890, 0x1f892, + 0x1f894, 0x1f896, 0x1f8a0, 0x1f8a2, 0x1f8a4, 0x1f8af, 0x1f8b8, 0x1f8b9, + 0x1f8c0, 0x1f8c1, 0x1f8c3, 0x1f8c4, 0x1f8d0, 0x1f8d0, 0x1f8ec, 0x1f8ec, + 0x1f8f0, 0x1f8f1, 0x1f910, 0x1f913, 0x1f920, 0x1f921, 0x1f924, 0x1f925, + 0x1f928, 0x1f929, 0x1f92c, 0x1f92d, 0x1f940, 0x1f940, 0x1f942, 0x1f944, + 0x1f948, 0x1f94a, 0x1f951, 0x1f951, 0x1f95d, 0x1f95d, 0x1f962, 0x1f962, + 0x1f973, 0x1f973, 0x1f980, 0x1f981, 0x1f984, 0x1f986, 0x1f992, 0x1f993, + 0x1f996, 0x1f99e, 0x1f9c0, 0x1f9c0, 0x1f9c5, 0x1f9d4, 0x1f9f0, 0x1f9f1, + 0x1f9f8, 0x1f9fa, 0x1f9fc, 0x1f9fc, 0x1fa00, 0x1fa03, 0x20000, 0x2000b, + 0x20010, 0x20012, 0x20018, 0x20018, 0x2001a, 0x2001a, 0x20020, 0x20021, + 0x20024, 0x20024, 0x20030, 0x20031, 0x20034, 0x20036, 0x23801, 0x23801, + 0x23803, 0x23803, 0x23805, 0x23805, 0x23807, 0x23807, 0x23809, 0x23809, + 0x2380b, 0x2380b, 0x2380d, 0x2380d, 0x2380f, 0x2380f, 0x23811, 0x23811, + 0x23813, 0x23813, 0x23815, 0x23815, 0x23817, 0x23817, 0x23819, 0x23819, + 0x2381b, 0x2381b, 0x2381d, 0x2381d, 0x2381f, 0x23820, 0x23822, 0x23822, + 0x23824, 0x23824, 0x23826, 0x23826, 0x23828, 0x23828, 0x2382a, 0x2382a, + 0x2382c, 0x2382c, 0x2382e, 0x2382e, 0x23830, 0x23830, 0x23832, 0x23832, + 0x23834, 0x23834, 0x23836, 0x23836, 0x23838, 0x23838, 0x2383a, 0x2383a, + 0x2383c, 0x2383c, 0x2383e, 0x2383e, 0x23840, 0x23847, 0x23b00, 0x23b01, + 0x23b03, 0x23b03, 0x23b05, 0x23b0e, 0x23b10, 0x23b13, 0x23b15, 0x23b16, + 0x23b28, 0x23b28, 0x23b30, 0x23b30, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_gmu_registers), 8)); + +/* + * Block : ['GMUGX'] + * Pipeline: PIPE_NONE + * pairs : 4 (Regs:48) + */ +static const u32 gen7_11_0_gmugx_registers[] = { + 0x1a802, 0x1a802, 0x1a883, 0x1a884, 0x1a900, 0x1a92b, 0x1a940, 0x1a940, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_gmugx_registers), 8)); + +/* + * Block : ['CX_MISC'] + * Pipeline: PIPE_NONE + * pairs : 5 (Regs:52) + */ +static const u32 gen7_11_0_cx_misc_registers[] = { + 0x27800, 0x27800, 0x27810, 0x27814, 0x27820, 0x27824, 0x27828, 0x2782a, + 0x27832, 0x27857, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_cx_misc_registers), 8)); + +/* + * Block : ['DBGC'] + * Pipeline: PIPE_NONE + * pairs : 8 (Regs:94) + */ +static const u32 gen7_11_0_dbgc_registers[] = { + 0x00600, 0x0061c, 0x0061e, 0x00634, 0x00640, 0x0065b, 0x00679, 0x0067b, + 0x00699, 0x00699, 0x0069b, 0x0069e, 0x006a0, 0x006a3, 0x006c0, 0x006c1, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_dbgc_registers), 8)); + +/* + * Block : ['CX_DBGC'] + * Pipeline: PIPE_NONE + * pairs : 5 (Regs:85) + */ +static const u32 gen7_11_0_cx_dbgc_registers[] = { + 0x18400, 0x1841c, 0x1841e, 0x18434, 0x18440, 0x1845b, 0x18479, 0x1847b, + 0x18580, 0x18581, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_cx_dbgc_registers), 8)); + +/* + * Block : ['BROADCAST', 'CP', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF'] + * Block : ['GMUAO', 'GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM'] + * Block : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * pairs : 30 (Regs:529) + */ +static const u32 gen7_11_0_non_context_pipe_br_registers[] = { + 0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b, + 0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640, + 0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a, + 0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16, + 0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31, + 0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79, + 0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f, + 0x0a630, 0x0a631, 0x0a638, 0x0a63c, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_pipe_br_registers), 8)); + +/* + * Block : ['BROADCAST', 'CP', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF'] + * Block : ['GMUAO', 'GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM'] + * Block : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC'] + * Pipeline: PIPE_BV + * Cluster : CLUSTER_NONE + * pairs : 30 (Regs:529) + */ +static const u32 gen7_11_0_non_context_pipe_bv_registers[] = { + 0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b, + 0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640, + 0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a, + 0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16, + 0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31, + 0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79, + 0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f, + 0x0a630, 0x0a631, 0x0a638, 0x0a63c, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_pipe_bv_registers), 8)); + +/* + * Block : ['BROADCAST', 'CP', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF'] + * Block : ['GMUAO', 'GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM'] + * Block : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC'] + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_NONE + * pairs : 2 (Regs:7) + */ +static const u32 gen7_11_0_non_context_pipe_lpac_registers[] = { + 0x00887, 0x0088c, 0x00f80, 0x00f80, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_pipe_lpac_registers), 8)); + +/* + * Block : ['RB'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * pairs : 4 (Regs:33) + */ +static const u32 gen7_11_0_non_context_rb_pipe_br_rac_registers[] = { + 0x08e10, 0x08e1c, 0x08e20, 0x08e25, 0x08e51, 0x08e5a, 0x08ea0, 0x08ea3, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_rb_pipe_br_rac_registers), 8)); + +/* + * Block : ['RB'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * pairs : 15 (Regs:62) + */ +static const u32 gen7_11_0_non_context_rb_pipe_br_rbp_registers[] = { + 0x08e01, 0x08e01, 0x08e04, 0x08e04, 0x08e06, 0x08e09, 0x08e0c, 0x08e0c, + 0x08e28, 0x08e28, 0x08e2c, 0x08e35, 0x08e3b, 0x08e40, 0x08e50, 0x08e50, + 0x08e5b, 0x08e5d, 0x08e5f, 0x08e5f, 0x08e61, 0x08e61, 0x08e63, 0x08e66, + 0x08e68, 0x08e69, 0x08e70, 0x08e79, 0x08e80, 0x08e8f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_rb_pipe_br_rbp_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * Location: HLSQ_STATE + * pairs : 3 (Regs:20) + */ +static const u32 gen7_11_0_non_context_sp_pipe_br_hlsq_state_registers[] = { + 0x0ae52, 0x0ae52, 0x0ae60, 0x0ae67, 0x0ae69, 0x0ae73, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_sp_pipe_br_hlsq_state_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * Location: SP_TOP + * pairs : 10 (Regs:60) + */ +static const u32 gen7_11_0_non_context_sp_pipe_br_sp_top_registers[] = { + 0x0ae00, 0x0ae00, 0x0ae02, 0x0ae04, 0x0ae06, 0x0ae09, 0x0ae0c, 0x0ae0c, + 0x0ae0f, 0x0ae0f, 0x0ae28, 0x0ae2b, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3f, + 0x0ae50, 0x0ae52, 0x0ae80, 0x0aea3, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_sp_pipe_br_sp_top_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * Location: USPTP + * pairs : 10 (Regs:21) + */ +static const u32 gen7_11_0_non_context_sp_pipe_br_usptp_registers[] = { + 0x0ae00, 0x0ae00, 0x0ae02, 0x0ae04, 0x0ae06, 0x0ae09, 0x0ae0c, 0x0ae0c, + 0x0ae0f, 0x0ae0f, 0x0ae30, 0x0ae32, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3b, + 0x0ae3e, 0x0ae3f, 0x0ae50, 0x0ae52, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_sp_pipe_br_usptp_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_NONE + * Location: HLSQ_STATE + * pairs : 1 (Regs:4) + */ +static const u32 gen7_11_0_non_context_sp_pipe_lpac_hlsq_state_registers[] = { + 0x0af88, 0x0af8b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_sp_pipe_lpac_hlsq_state_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_NONE + * Location: SP_TOP + * pairs : 1 (Regs:5) + */ +static const u32 gen7_11_0_non_context_sp_pipe_lpac_sp_top_registers[] = { + 0x0af80, 0x0af84, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_sp_pipe_lpac_sp_top_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_NONE + * Location: USPTP + * pairs : 2 (Regs:8) + */ +static const u32 gen7_11_0_non_context_sp_pipe_lpac_usptp_registers[] = { + 0x0af80, 0x0af84, 0x0af90, 0x0af92, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_sp_pipe_lpac_usptp_registers), 8)); + +/* + * Block : ['TPL1'] + * Pipeline: PIPE_NONE + * Cluster : CLUSTER_NONE + * Location: USPTP + * pairs : 5 (Regs:30) + */ +static const u32 gen7_11_0_non_context_tpl1_pipe_none_usptp_registers[] = { + 0x0b602, 0x0b602, 0x0b604, 0x0b604, 0x0b608, 0x0b60c, 0x0b60f, 0x0b621, + 0x0b630, 0x0b633, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_tpl1_pipe_none_usptp_registers), 8)); + +/* + * Block : ['TPL1'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * Location: USPTP + * pairs : 1 (Regs:1) + */ +static const u32 gen7_11_0_non_context_tpl1_pipe_br_usptp_registers[] = { + 0x0b600, 0x0b600, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_tpl1_pipe_br_usptp_registers), 8)); + +/* + * Block : ['TPL1'] + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_NONE + * Location: USPTP + * pairs : 1 (Regs:1) + */ +static const u32 gen7_11_0_non_context_tpl1_pipe_lpac_usptp_registers[] = { + 0x0b780, 0x0b780, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_tpl1_pipe_lpac_usptp_registers), 8)); + +/* + * Block : ['GRAS'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_GRAS + * pairs : 14 (Regs:290) + */ +static const u32 gen7_11_0_gras_pipe_br_cluster_gras_registers[] = { + 0x08000, 0x0800c, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d, + 0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa, + 0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08113, 0x08120, 0x0813f, + 0x08400, 0x08406, 0x0840a, 0x0840b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_gras_pipe_br_cluster_gras_registers), 8)); + +/* + * Block : ['GRAS'] + * Pipeline: PIPE_BV + * Cluster : CLUSTER_GRAS + * pairs : 14 (Regs:290) + */ +static const u32 gen7_11_0_gras_pipe_bv_cluster_gras_registers[] = { + 0x08000, 0x0800c, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d, + 0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa, + 0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08113, 0x08120, 0x0813f, + 0x08400, 0x08406, 0x0840a, 0x0840b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_gras_pipe_bv_cluster_gras_registers), 8)); + +/* + * Block : ['PC'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_FE + * pairs : 6 (Regs:27) + */ +static const u32 gen7_11_0_pc_pipe_br_cluster_fe_registers[] = { + 0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886, + 0x09970, 0x09972, 0x09b00, 0x09b08, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_pc_pipe_br_cluster_fe_registers), 8)); + +/* + * Block : ['PC'] + * Pipeline: PIPE_BV + * Cluster : CLUSTER_FE + * pairs : 6 (Regs:27) + */ +static const u32 gen7_11_0_pc_pipe_bv_cluster_fe_registers[] = { + 0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886, + 0x09970, 0x09972, 0x09b00, 0x09b08, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_pc_pipe_bv_cluster_fe_registers), 8)); + +/* + * Block : ['VFD'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_FE + * pairs : 2 (Regs:236) + */ +static const u32 gen7_11_0_vfd_pipe_br_cluster_fe_registers[] = { + 0x0a000, 0x0a009, 0x0a00e, 0x0a0ef, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_vfd_pipe_br_cluster_fe_registers), 8)); + +/* + * Block : ['VFD'] + * Pipeline: PIPE_BV + * Cluster : CLUSTER_FE + * pairs : 2 (Regs:236) + */ +static const u32 gen7_11_0_vfd_pipe_bv_cluster_fe_registers[] = { + 0x0a000, 0x0a009, 0x0a00e, 0x0a0ef, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_vfd_pipe_bv_cluster_fe_registers), 8)); + +/* + * Block : ['VPC'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_FE + * pairs : 1 (Regs:8) + */ +static const u32 gen7_11_0_vpc_pipe_br_cluster_fe_registers[] = { + 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_vpc_pipe_br_cluster_fe_registers), 8)); + +/* + * Block : ['VPC'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_PC_VS + * pairs : 2 (Regs:20) + */ +static const u32 gen7_11_0_vpc_pipe_br_cluster_pc_vs_registers[] = { + 0x09101, 0x0910c, 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_vpc_pipe_br_cluster_pc_vs_registers), 8)); + +/* + * Block : ['VPC'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_VPC_PS + * pairs : 4 (Regs:60) + */ +static const u32 gen7_11_0_vpc_pipe_br_cluster_vpc_ps_registers[] = { + 0x09200, 0x0920f, 0x09212, 0x09216, 0x09218, 0x09236, 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_vpc_pipe_br_cluster_vpc_ps_registers), 8)); + +/* + * Block : ['VPC'] + * Pipeline: PIPE_BV + * Cluster : CLUSTER_FE + * pairs : 1 (Regs:8) + */ +static const u32 gen7_11_0_vpc_pipe_bv_cluster_fe_registers[] = { + 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_vpc_pipe_bv_cluster_fe_registers), 8)); + +/* + * Block : ['VPC'] + * Pipeline: PIPE_BV + * Cluster : CLUSTER_PC_VS + * pairs : 2 (Regs:20) + */ +static const u32 gen7_11_0_vpc_pipe_bv_cluster_pc_vs_registers[] = { + 0x09101, 0x0910c, 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_vpc_pipe_bv_cluster_pc_vs_registers), 8)); + +/* + * Block : ['VPC'] + * Pipeline: PIPE_BV + * Cluster : CLUSTER_VPC_PS + * pairs : 4 (Regs:60) + */ +static const u32 gen7_11_0_vpc_pipe_bv_cluster_vpc_ps_registers[] = { + 0x09200, 0x0920f, 0x09212, 0x09216, 0x09218, 0x09236, 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_vpc_pipe_bv_cluster_vpc_ps_registers), 8)); + +/* + * Block : ['RB'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_PS + * pairs : 39 (Regs:133) + */ +static const u32 gen7_11_0_rb_pipe_br_cluster_ps_rac_registers[] = { + 0x08802, 0x08802, 0x08804, 0x08806, 0x08809, 0x0880a, 0x0880e, 0x08811, + 0x08818, 0x0881e, 0x08821, 0x08821, 0x08823, 0x08826, 0x08829, 0x08829, + 0x0882b, 0x0882e, 0x08831, 0x08831, 0x08833, 0x08836, 0x08839, 0x08839, + 0x0883b, 0x0883e, 0x08841, 0x08841, 0x08843, 0x08846, 0x08849, 0x08849, + 0x0884b, 0x0884e, 0x08851, 0x08851, 0x08853, 0x08856, 0x08859, 0x08859, + 0x0885b, 0x0885e, 0x08860, 0x08864, 0x08870, 0x08870, 0x08873, 0x08876, + 0x08878, 0x08879, 0x08882, 0x08885, 0x08887, 0x08889, 0x08891, 0x08891, + 0x08898, 0x08899, 0x088c0, 0x088c1, 0x088e5, 0x088e5, 0x088f4, 0x088f5, + 0x08a00, 0x08a05, 0x08a10, 0x08a15, 0x08a20, 0x08a25, 0x08a30, 0x08a35, + 0x08c00, 0x08c01, 0x08c18, 0x08c1f, 0x08c26, 0x08c34, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_rb_pipe_br_cluster_ps_rac_registers), 8)); + +/* + * Block : ['RB'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_PS + * pairs : 34 (Regs:100) + */ +static const u32 gen7_11_0_rb_pipe_br_cluster_ps_rbp_registers[] = { + 0x08800, 0x08801, 0x08803, 0x08803, 0x0880b, 0x0880d, 0x08812, 0x08812, + 0x08820, 0x08820, 0x08822, 0x08822, 0x08827, 0x08828, 0x0882a, 0x0882a, + 0x0882f, 0x08830, 0x08832, 0x08832, 0x08837, 0x08838, 0x0883a, 0x0883a, + 0x0883f, 0x08840, 0x08842, 0x08842, 0x08847, 0x08848, 0x0884a, 0x0884a, + 0x0884f, 0x08850, 0x08852, 0x08852, 0x08857, 0x08858, 0x0885a, 0x0885a, + 0x0885f, 0x0885f, 0x08865, 0x08865, 0x08871, 0x08872, 0x08877, 0x08877, + 0x08880, 0x08881, 0x08886, 0x08886, 0x08890, 0x08890, 0x088d0, 0x088e4, + 0x088e8, 0x088ea, 0x088f0, 0x088f0, 0x08900, 0x0891a, 0x08927, 0x08928, + 0x08c17, 0x08c17, 0x08c20, 0x08c25, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_rb_pipe_br_cluster_ps_rbp_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_VS + * Location: HLSQ_STATE + * pairs : 28 (Regs:211) + */ +static const u32 gen7_11_0_sp_pipe_br_cluster_sp_vs_hlsq_state_registers[] = { + 0x0a800, 0x0a801, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824, + 0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a832, 0x0a835, 0x0a83a, 0x0a83a, + 0x0a83c, 0x0a83c, 0x0a83f, 0x0a841, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862, + 0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a872, 0x0a872, + 0x0a88c, 0x0a88e, 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898, + 0x0a89a, 0x0a89d, 0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab03, + 0x0ab05, 0x0ab05, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_br_cluster_sp_vs_hlsq_state_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_VS + * Location: SP_TOP + * pairs : 21 (Regs:69) + */ +static const u32 gen7_11_0_sp_pipe_br_cluster_sp_vs_sp_top_registers[] = { + 0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a82d, 0x0a82d, + 0x0a82f, 0x0a831, 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840, + 0x0a85c, 0x0a85d, 0x0a862, 0x0a864, 0x0a868, 0x0a868, 0x0a870, 0x0a871, + 0x0a88d, 0x0a88e, 0x0a893, 0x0a895, 0x0a899, 0x0a899, 0x0a8a0, 0x0a8af, + 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, 0x0ab0a, 0x0ab1b, + 0x0ab20, 0x0ab20, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_br_cluster_sp_vs_sp_top_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_VS + * Location: USPTP + * pairs : 16 (Regs:269) + */ +static const u32 gen7_11_0_sp_pipe_br_cluster_sp_vs_usptp_registers[] = { + 0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a82d, 0x0a82d, + 0x0a82f, 0x0a833, 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861, + 0x0a863, 0x0a868, 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a899, + 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_br_cluster_sp_vs_usptp_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_PS + * Location: HLSQ_STATE + * pairs : 19 (Regs:331) + */ +static const u32 gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_state_registers[] = { + 0x0a980, 0x0a984, 0x0a99e, 0x0a99e, 0x0a9a7, 0x0a9a7, 0x0a9aa, 0x0a9aa, + 0x0a9ae, 0x0a9b0, 0x0a9b2, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc, + 0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e0, 0x0a9fc, 0x0aa00, 0x0aa00, + 0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab03, 0x0ab05, 0x0ab05, + 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_state_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_PS + * Location: HLSQ_DP + * pairs : 3 (Regs:19) + */ +static const u32 gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers[] = { + 0x0a9b1, 0x0a9b1, 0x0a9c6, 0x0a9cb, 0x0a9d4, 0x0a9df, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_PS + * Location: SP_TOP + * pairs : 17 (Regs:73) + */ +static const u32 gen7_11_0_sp_pipe_br_cluster_sp_ps_sp_top_registers[] = { + 0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a9a2, 0x0a9a7, 0x0a9a8, + 0x0a9aa, 0x0a9aa, 0x0a9ae, 0x0a9ae, 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, + 0x0a9ba, 0x0a9bc, 0x0a9c5, 0x0a9c5, 0x0a9e0, 0x0a9f9, 0x0aa00, 0x0aa01, + 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, 0x0ab0a, 0x0ab1b, + 0x0ab20, 0x0ab20, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_br_cluster_sp_ps_sp_top_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_PS + * Location: USPTP + * pairs : 17 (Regs:331) + */ +static const u32 gen7_11_0_sp_pipe_br_cluster_sp_ps_usptp_registers[] = { + 0x0a980, 0x0a982, 0x0a985, 0x0a9a6, 0x0a9a8, 0x0a9a9, 0x0a9ab, 0x0a9ae, + 0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9bf, 0x0a9c2, 0x0a9c3, + 0x0a9c5, 0x0a9c5, 0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa01, 0x0aa01, + 0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22, + 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_br_cluster_sp_ps_usptp_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_BV + * Cluster : CLUSTER_SP_VS + * Location: HLSQ_STATE + * pairs : 27 (Regs:209) + */ +static const u32 gen7_11_0_sp_pipe_bv_cluster_sp_vs_hlsq_state_registers[] = { + 0x0a800, 0x0a801, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824, + 0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a832, 0x0a835, 0x0a83a, 0x0a83a, + 0x0a83c, 0x0a83c, 0x0a83f, 0x0a841, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862, + 0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a872, 0x0a872, + 0x0a88c, 0x0a88e, 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898, + 0x0a89a, 0x0a89d, 0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab02, + 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_bv_cluster_sp_vs_hlsq_state_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_BV + * Cluster : CLUSTER_SP_VS + * Location: SP_TOP + * pairs : 20 (Regs:67) + */ +static const u32 gen7_11_0_sp_pipe_bv_cluster_sp_vs_sp_top_registers[] = { + 0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a82d, 0x0a82d, + 0x0a82f, 0x0a831, 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840, + 0x0a85c, 0x0a85d, 0x0a862, 0x0a864, 0x0a868, 0x0a868, 0x0a870, 0x0a871, + 0x0a88d, 0x0a88e, 0x0a893, 0x0a895, 0x0a899, 0x0a899, 0x0a8a0, 0x0a8af, + 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_bv_cluster_sp_vs_sp_top_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_BV + * Cluster : CLUSTER_SP_VS + * Location: USPTP + * pairs : 16 (Regs:266) + */ +static const u32 gen7_11_0_sp_pipe_bv_cluster_sp_vs_usptp_registers[] = { + 0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a82d, 0x0a82d, + 0x0a82f, 0x0a833, 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861, + 0x0a863, 0x0a868, 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a899, + 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab02, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_bv_cluster_sp_vs_usptp_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_SP_PS + * Location: HLSQ_STATE + * pairs : 13 (Regs:294) + */ +static const u32 gen7_11_0_sp_pipe_lpac_cluster_sp_ps_hlsq_state_registers[] = { + 0x0a9b0, 0x0a9b0, 0x0a9b2, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc, + 0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9fc, + 0x0aa00, 0x0aa00, 0x0aa31, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab01, + 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_lpac_cluster_sp_ps_hlsq_state_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_SP_PS + * Location: HLSQ_DP + * pairs : 2 (Regs:13) + */ +static const u32 gen7_11_0_sp_pipe_lpac_cluster_sp_ps_hlsq_dp_registers[] = { + 0x0a9b1, 0x0a9b1, 0x0a9d4, 0x0a9df, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_lpac_cluster_sp_ps_hlsq_dp_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_SP_PS + * Location: SP_TOP + * pairs : 8 (Regs:33) + */ +static const u32 gen7_11_0_sp_pipe_lpac_cluster_sp_ps_sp_top_registers[] = { + 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9bc, 0x0a9c5, 0x0a9c5, + 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9f9, 0x0aa00, 0x0aa00, 0x0ab00, 0x0ab00, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_lpac_cluster_sp_ps_sp_top_registers), 8)); + +/* + * Block : ['SP'] + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_SP_PS + * Location: USPTP + * pairs : 11 (Regs:279) + */ +static const u32 gen7_11_0_sp_pipe_lpac_cluster_sp_ps_usptp_registers[] = { + 0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9be, 0x0a9c2, 0x0a9c3, + 0x0a9c5, 0x0a9c5, 0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa31, 0x0aa31, + 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab01, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_lpac_cluster_sp_ps_usptp_registers), 8)); + +/* + * Block : ['TPL1'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_VS + * Location: USPTP + * pairs : 3 (Regs:10) + */ +static const u32 gen7_11_0_tpl1_pipe_br_cluster_sp_vs_usptp_registers[] = { + 0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_tpl1_pipe_br_cluster_sp_vs_usptp_registers), 8)); + +/* + * Block : ['TPL1'] + * Pipeline: PIPE_BR + * Cluster : CLUSTER_SP_PS + * Location: USPTP + * pairs : 6 (Regs:42) + */ +static const u32 gen7_11_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers[] = { + 0x0b180, 0x0b183, 0x0b190, 0x0b195, 0x0b2c0, 0x0b2d5, 0x0b300, 0x0b307, + 0x0b309, 0x0b309, 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers), 8)); + +/* + * Block : ['TPL1'] + * Pipeline: PIPE_BV + * Cluster : CLUSTER_SP_VS + * Location: USPTP + * pairs : 3 (Regs:10) + */ +static const u32 gen7_11_0_tpl1_pipe_bv_cluster_sp_vs_usptp_registers[] = { + 0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_tpl1_pipe_bv_cluster_sp_vs_usptp_registers), 8)); + +/* + * Block : ['TPL1'] + * Pipeline: PIPE_LPAC + * Cluster : CLUSTER_SP_PS + * Location: USPTP + * pairs : 5 (Regs:7) + */ +static const u32 gen7_11_0_tpl1_pipe_lpac_cluster_sp_ps_usptp_registers[] = { + 0x0b180, 0x0b181, 0x0b300, 0x0b301, 0x0b307, 0x0b307, 0x0b309, 0x0b309, + 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_tpl1_pipe_lpac_cluster_sp_ps_usptp_registers), 8)); + +static const struct gen7_sel_reg gen7_11_0_rb_rac_sel = { + .host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST, + .cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD, + .val = 0, +}; + +static const struct gen7_sel_reg gen7_11_0_rb_rbp_sel = { + .host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST, + .cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD, + .val = 0x9, +}; + +static struct gen7_cluster_registers gen7_11_0_clusters[] = { + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_11_0_non_context_pipe_br_registers, }, + { CLUSTER_NONE, PIPE_BV, STATE_NON_CONTEXT, + gen7_11_0_non_context_pipe_bv_registers, }, + { CLUSTER_NONE, PIPE_LPAC, STATE_NON_CONTEXT, + gen7_11_0_non_context_pipe_lpac_registers, }, + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_11_0_non_context_rb_pipe_br_rac_registers, &gen7_11_0_rb_rac_sel, }, + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_11_0_non_context_rb_pipe_br_rbp_registers, &gen7_11_0_rb_rbp_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_11_0_rb_pipe_br_cluster_ps_rac_registers, &gen7_11_0_rb_rac_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_11_0_rb_pipe_br_cluster_ps_rac_registers, &gen7_11_0_rb_rac_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_11_0_rb_pipe_br_cluster_ps_rbp_registers, &gen7_11_0_rb_rbp_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_11_0_rb_pipe_br_cluster_ps_rbp_registers, &gen7_11_0_rb_rbp_sel, }, + { CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_11_0_gras_pipe_br_cluster_gras_registers, }, + { CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_11_0_gras_pipe_br_cluster_gras_registers, }, + { CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_11_0_gras_pipe_bv_cluster_gras_registers, }, + { CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_11_0_gras_pipe_bv_cluster_gras_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_11_0_pc_pipe_br_cluster_fe_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_11_0_pc_pipe_br_cluster_fe_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_11_0_pc_pipe_bv_cluster_fe_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_11_0_pc_pipe_bv_cluster_fe_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_11_0_vfd_pipe_br_cluster_fe_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_11_0_vfd_pipe_br_cluster_fe_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_11_0_vfd_pipe_bv_cluster_fe_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_11_0_vfd_pipe_bv_cluster_fe_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_11_0_vpc_pipe_br_cluster_fe_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_11_0_vpc_pipe_br_cluster_fe_registers, }, + { CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_11_0_vpc_pipe_br_cluster_pc_vs_registers, }, + { CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_11_0_vpc_pipe_br_cluster_pc_vs_registers, }, + { CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_11_0_vpc_pipe_br_cluster_vpc_ps_registers, }, + { CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_11_0_vpc_pipe_br_cluster_vpc_ps_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_11_0_vpc_pipe_bv_cluster_fe_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_11_0_vpc_pipe_bv_cluster_fe_registers, }, + { CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_11_0_vpc_pipe_bv_cluster_pc_vs_registers, }, + { CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_11_0_vpc_pipe_bv_cluster_pc_vs_registers, }, + { CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_11_0_vpc_pipe_bv_cluster_vpc_ps_registers, }, + { CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_11_0_vpc_pipe_bv_cluster_vpc_ps_registers, }, +}; + +static struct gen7_sptp_cluster_registers gen7_11_0_sptp_clusters[] = { + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, HLSQ_STATE, + gen7_11_0_non_context_sp_pipe_br_hlsq_state_registers, 0xae00}, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, SP_TOP, + gen7_11_0_non_context_sp_pipe_br_sp_top_registers, 0xae00}, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, USPTP, + gen7_11_0_non_context_sp_pipe_br_usptp_registers, 0xae00}, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, HLSQ_STATE, + gen7_11_0_non_context_sp_pipe_lpac_hlsq_state_registers, 0xaf80}, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, SP_TOP, + gen7_11_0_non_context_sp_pipe_lpac_sp_top_registers, 0xaf80}, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, USPTP, + gen7_11_0_non_context_sp_pipe_lpac_usptp_registers, 0xaf80}, + { CLUSTER_NONE, TP0_NCTX_REG, PIPE_NONE, 0, USPTP, + gen7_11_0_non_context_tpl1_pipe_none_usptp_registers, 0xb600}, + { CLUSTER_NONE, TP0_NCTX_REG, PIPE_BR, 0, USPTP, + gen7_11_0_non_context_tpl1_pipe_br_usptp_registers, 0xb600}, + { CLUSTER_NONE, TP0_NCTX_REG, PIPE_LPAC, 0, USPTP, + gen7_11_0_non_context_tpl1_pipe_lpac_usptp_registers, 0xb780}, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, + gen7_11_0_sp_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP, + gen7_11_0_sp_pipe_br_cluster_sp_vs_sp_top_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen7_11_0_sp_pipe_br_cluster_sp_vs_usptp_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE, + gen7_11_0_sp_pipe_bv_cluster_sp_vs_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP, + gen7_11_0_sp_pipe_bv_cluster_sp_vs_sp_top_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + gen7_11_0_sp_pipe_bv_cluster_sp_vs_usptp_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, + gen7_11_0_sp_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP, + gen7_11_0_sp_pipe_br_cluster_sp_vs_sp_top_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + gen7_11_0_sp_pipe_br_cluster_sp_vs_usptp_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE, + gen7_11_0_sp_pipe_bv_cluster_sp_vs_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP, + gen7_11_0_sp_pipe_bv_cluster_sp_vs_sp_top_registers, 0xa800}, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, + gen7_11_0_sp_pipe_bv_cluster_sp_vs_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, + gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP, + gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP, + gen7_11_0_sp_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen7_11_0_sp_pipe_br_cluster_sp_ps_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, + gen7_11_0_sp_pipe_lpac_cluster_sp_ps_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_DP, + gen7_11_0_sp_pipe_lpac_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP, + gen7_11_0_sp_pipe_lpac_cluster_sp_ps_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + gen7_11_0_sp_pipe_lpac_cluster_sp_ps_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, + gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP, + gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP, + gen7_11_0_sp_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + gen7_11_0_sp_pipe_br_cluster_sp_ps_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP, + gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP, + gen7_11_0_sp_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP, + gen7_11_0_sp_pipe_br_cluster_sp_ps_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP, + gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP, + gen7_11_0_sp_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP, + gen7_11_0_sp_pipe_br_cluster_sp_ps_usptp_registers, 0xa800}, + { CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen7_11_0_tpl1_pipe_br_cluster_sp_vs_usptp_registers, 0xb000}, + { CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + gen7_11_0_tpl1_pipe_bv_cluster_sp_vs_usptp_registers, 0xb000}, + { CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + gen7_11_0_tpl1_pipe_br_cluster_sp_vs_usptp_registers, 0xb000}, + { CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, + gen7_11_0_tpl1_pipe_bv_cluster_sp_vs_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen7_11_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + gen7_11_0_tpl1_pipe_lpac_cluster_sp_ps_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + gen7_11_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP, + gen7_11_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP, + gen7_11_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, +}; + +/* + * Before dumping the CP MVC + * Program CP_APERTURE_CNTL_* with pipeID={CP_PIPE} + * Then dump corresponding {Register_PIPE} + */ + +static struct gen7_cp_indexed_reg gen7_11_0_cp_indexed_reg_list[] = { + { GEN7_CP_SQE_STAT_ADDR, GEN7_CP_SQE_STAT_DATA, 0x00040}, + { GEN7_CP_DRAW_STATE_ADDR, GEN7_CP_DRAW_STATE_DATA, 0x00100}, + { GEN7_CP_ROQ_DBG_ADDR, GEN7_CP_ROQ_DBG_DATA, 0x00800}, + { GEN7_CP_SQE_UCODE_DBG_ADDR, GEN7_CP_SQE_UCODE_DBG_DATA, 0x08000}, + { GEN7_CP_BV_DRAW_STATE_ADDR, GEN7_CP_BV_DRAW_STATE_DATA, 0x00100}, + { GEN7_CP_BV_ROQ_DBG_ADDR, GEN7_CP_BV_ROQ_DBG_DATA, 0x00800}, + { GEN7_CP_BV_SQE_UCODE_DBG_ADDR, GEN7_CP_BV_SQE_UCODE_DBG_DATA, 0x08000}, + { GEN7_CP_BV_SQE_STAT_ADDR, GEN7_CP_BV_SQE_STAT_DATA, 0x00040}, + { GEN7_CP_RESOURCE_TABLE_DBG_ADDR, GEN7_CP_RESOURCE_TABLE_DBG_DATA, 0x04100}, + { GEN7_CP_LPAC_DRAW_STATE_ADDR, GEN7_CP_LPAC_DRAW_STATE_DATA, 0x00100}, + { GEN7_CP_LPAC_ROQ_DBG_ADDR, GEN7_CP_LPAC_ROQ_DBG_DATA, 0x00200}, + { GEN7_CP_SQE_AC_UCODE_DBG_ADDR, GEN7_CP_SQE_AC_UCODE_DBG_DATA, 0x08000}, + { GEN7_CP_SQE_AC_STAT_ADDR, GEN7_CP_SQE_AC_STAT_DATA, 0x00040}, + { GEN7_CP_LPAC_FIFO_DBG_ADDR, GEN7_CP_LPAC_FIFO_DBG_DATA, 0x00040}, +}; + +/* + * Block : ['DPM_LEAKAGE'] + * Pipeline: PIPE_NONE + * pairs : 9 (Regs:26) + */ +static const u32 gen7_11_0_dpm_lkg_registers[] = { + 0x21c00, 0x21c00, 0x21c08, 0x21c09, 0x21c0e, 0x21c0f, 0x21c4f, 0x21c50, + 0x21c52, 0x21c52, 0x21c54, 0x21c56, 0x21c58, 0x21c5a, 0x21c5c, 0x21c60, + UINT_MAX, UINT_MAX, +}; + +/* + * Block : ['GPU_CC_GPU_CC_REG'] + * Pipeline: PIPE_NONE + */ +static const u32 gen7_11_0_gpucc_registers[] = { + 0x24000, 0x2400f, 0x24400, 0x2440f, 0x24800, 0x24805, 0x24c00, 0x24cff, + 0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, + 0x26400, 0x26405, 0x2640a, 0x26413, 0x26418, 0x26448, 0x2644d, 0x2644e, + 0x26450, 0x26452, 0x26454, 0x2645b, 0x26460, 0x26468, 0x2646d, 0x2646f, + 0x26540, 0x2654e, 0x26554, 0x26573, 0x26576, 0x26576, 0x26600, 0x26616, + 0x26620, 0x2662d, 0x26630, 0x26631, 0x26635, 0x26635, 0x26637, 0x26637, + 0x2663a, 0x2663a, 0x26642, 0x26642, 0x26656, 0x26658, 0x2665b, 0x2665d, + 0x2665f, 0x26662, UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_gpucc_registers), 8)); + +/* + * Block : ['CPR'] + * Pipeline: PIPE_NONE + * pairs : 20 (Regs:471) + */ +static const u32 gen7_11_0_cpr_registers[] = { + 0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c, + 0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850, + 0x26880, 0x26897, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee, + 0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f, + 0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274c4, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_cpr_registers), 8)); + +/* + * Block : ['RSCC_RSC'] + * Pipeline: PIPE_NONE + * pairs : 99 (Regs:598) + */ +static const u32 gen7_11_0_rscc_registers[] = { + 0x14000, 0x14034, 0x14036, 0x14036, 0x14040, 0x14042, 0x14044, 0x14045, + 0x14047, 0x14047, 0x14080, 0x14084, 0x14089, 0x1408c, 0x14091, 0x14094, + 0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac, 0x14100, 0x14104, + 0x14114, 0x14119, 0x14124, 0x14132, 0x14154, 0x1416b, 0x14340, 0x14341, + 0x14344, 0x14344, 0x14346, 0x1437c, 0x143f0, 0x143f8, 0x143fa, 0x143fe, + 0x14400, 0x14404, 0x14406, 0x1440a, 0x1440c, 0x14410, 0x14412, 0x14416, + 0x14418, 0x1441c, 0x1441e, 0x14422, 0x14424, 0x14424, 0x14498, 0x144a0, + 0x144a2, 0x144a6, 0x144a8, 0x144ac, 0x144ae, 0x144b2, 0x144b4, 0x144b8, + 0x144ba, 0x144be, 0x144c0, 0x144c4, 0x144c6, 0x144ca, 0x144cc, 0x144cc, + 0x14540, 0x14548, 0x1454a, 0x1454e, 0x14550, 0x14554, 0x14556, 0x1455a, + 0x1455c, 0x14560, 0x14562, 0x14566, 0x14568, 0x1456c, 0x1456e, 0x14572, + 0x14574, 0x14574, 0x145e8, 0x145f0, 0x145f2, 0x145f6, 0x145f8, 0x145fc, + 0x145fe, 0x14602, 0x14604, 0x14608, 0x1460a, 0x1460e, 0x14610, 0x14614, + 0x14616, 0x1461a, 0x1461c, 0x1461c, 0x14690, 0x14698, 0x1469a, 0x1469e, + 0x146a0, 0x146a4, 0x146a6, 0x146aa, 0x146ac, 0x146b0, 0x146b2, 0x146b6, + 0x146b8, 0x146bc, 0x146be, 0x146c2, 0x146c4, 0x146c4, 0x14738, 0x14740, + 0x14742, 0x14746, 0x14748, 0x1474c, 0x1474e, 0x14752, 0x14754, 0x14758, + 0x1475a, 0x1475e, 0x14760, 0x14764, 0x14766, 0x1476a, 0x1476c, 0x1476c, + 0x147e0, 0x147e8, 0x147ea, 0x147ee, 0x147f0, 0x147f4, 0x147f6, 0x147fa, + 0x147fc, 0x14800, 0x14802, 0x14806, 0x14808, 0x1480c, 0x1480e, 0x14812, + 0x14814, 0x14814, 0x14888, 0x14890, 0x14892, 0x14896, 0x14898, 0x1489c, + 0x1489e, 0x148a2, 0x148a4, 0x148a8, 0x148aa, 0x148ae, 0x148b0, 0x148b4, + 0x148b6, 0x148ba, 0x148bc, 0x148bc, 0x14930, 0x14938, 0x1493a, 0x1493e, + 0x14940, 0x14944, 0x14946, 0x1494a, 0x1494c, 0x14950, 0x14952, 0x14956, + 0x14958, 0x1495c, 0x1495e, 0x14962, 0x14964, 0x14964, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_11_0_rscc_registers), 8)); + +static struct gen7_reg_list gen7_11_0_reg_list[] = { + { gen7_11_0_gpu_registers, NULL }, + { gen7_11_0_dbgc_registers, NULL }, + { gen7_11_0_cx_dbgc_registers, NULL }, + { NULL, NULL }, +}; + +static const u32 *gen7_11_0_external_core_regs[] = { + gen7_11_0_gpucc_registers, + gen7_11_0_cpr_registers, + gen7_11_0_dpm_lkg_registers, +}; +#endif diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index c4e8da849e..eebfb624d9 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -10,6 +10,7 @@ #include "adreno_gen7_2_0_snapshot.h" #include "adreno_gen7_9_0_snapshot.h" #include "adreno_gen7_14_0_snapshot.h" +#include "adreno_gen7_11_0_snapshot.h" static struct kgsl_memdesc *gen7_capturescript; static struct kgsl_memdesc *gen7_crashdump_registers; @@ -124,6 +125,32 @@ const struct gen7_snapshot_block_list gen7_14_0_snapshot_block_list = { .index_registers_len = ARRAY_SIZE(gen7_14_0_cp_indexed_reg_list), }; +const struct gen7_snapshot_block_list gen7_11_0_snapshot_block_list = { + .pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers, + .debugbus_blocks = gen7_11_0_debugbus_blocks, + .debugbus_blocks_len = ARRAY_SIZE(gen7_11_0_debugbus_blocks), + .gbif_debugbus_blocks = gen7_11_0_gbif_debugbus_blocks, + .gbif_debugbus_blocks_len = ARRAY_SIZE(gen7_11_0_gbif_debugbus_blocks), + .cx_debugbus_blocks = gen7_11_0_cx_debugbus_blocks, + .cx_debugbus_blocks_len = ARRAY_SIZE(gen7_11_0_cx_debugbus_blocks), + .external_core_regs = gen7_11_0_external_core_regs, + .num_external_core_regs = ARRAY_SIZE(gen7_11_0_external_core_regs), + .gmu_regs = gen7_11_0_gmu_registers, + .gmu_gx_regs = gen7_11_0_gmugx_registers, + .rscc_regs = gen7_11_0_rscc_registers, + .reg_list = gen7_11_0_reg_list, + .cx_misc_regs = gen7_11_0_cx_misc_registers, + .shader_blocks = gen7_11_0_shader_blocks, + .num_shader_blocks = ARRAY_SIZE(gen7_11_0_shader_blocks), + .clusters = gen7_11_0_clusters, + .num_clusters = ARRAY_SIZE(gen7_11_0_clusters), + .sptp_clusters = gen7_11_0_sptp_clusters, + .num_sptp_clusters = ARRAY_SIZE(gen7_11_0_sptp_clusters), + .post_crashdumper_regs = gen7_0_0_post_crashdumper_registers, + .index_registers = gen7_11_0_cp_indexed_reg_list, + .index_registers_len = ARRAY_SIZE(gen7_11_0_cp_indexed_reg_list), +}; + #define GEN7_SP_READ_SEL_VAL(_location, _pipe, _statetype, _usptp, _sptp) \ (FIELD_PREP(GENMASK(19, 18), _location) | \ FIELD_PREP(GENMASK(17, 16), _pipe) | \ diff --git a/gen7_reg.h b/gen7_reg.h index 4c8b64d675..fc005f7166 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -1254,6 +1254,7 @@ /* GPUCC registers */ #define GEN7_GPU_CC_GX_DOMAIN_MISC3 0x26541 #define GEN7_GPU_CC_CX_GDSCR 0x26442 +#define GEN7_11_0_GPU_CC_CX_GDSCR 0x26423 /* GPU RSC sequencer registers */ #define GEN7_GPU_RSCC_RSC_STATUS0_DRV0 0x00004 From 698f4b90fafadf2a5a1fe39ebc7d76d95c9a66e2 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Tue, 26 Sep 2023 15:38:59 -0600 Subject: [PATCH 0612/1016] kgsl: hwsched: Abstract out hardware fence APIs Introduce a local CONFIG_QCOM_KGSL_SYNX config option and set it for targets that use synx APIs for hardware fences. Also, modify/fix the paths that need to signal kgsl hardware fences in situations where GMU is turned off. Change-Id: I33d9bb83cb911b2f32a000e06aca37a23fcf9a9a Signed-off-by: Harshdeep Dhatt --- Android.mk | 4 + Kconfig | 8 + adreno_gen7_hwsched.c | 2 +- adreno_gen7_hwsched_hfi.c | 66 ++------- adreno_gen7_hwsched_hfi.h | 11 -- adreno_gen8_hwsched.c | 2 +- adreno_gen8_hwsched_hfi.c | 67 ++------- adreno_gen8_hwsched_hfi.h | 11 -- adreno_hwsched.c | 57 ++----- adreno_hwsched.h | 22 +-- build/kgsl_defs.bzl | 9 +- config/sun_consolidate_gpuconf | 1 + kgsl_sync.c | 261 ++++++++++++++++++++++++++++++++- kgsl_sync.h | 72 ++++++++- 14 files changed, 383 insertions(+), 210 deletions(-) diff --git a/Android.mk b/Android.mk index 25ecfdfc12..b8dcdce788 100644 --- a/Android.mk +++ b/Android.mk @@ -36,6 +36,10 @@ ifeq ($(TARGET_BOARD_PLATFORM), pineapple) KBUILD_OPTIONS += KBUILD_EXTRA_SYMBOLS+=$(PWD)/$(call intermediates-dir-for,DLKM,hw-fence-module-symvers)/Module.symvers endif +ifeq ($(TARGET_BOARD_PLATFORM), sun) + KBUILD_OPTIONS += KBUILD_EXTRA_SYMBOLS+=$(PWD)/$(call intermediates-dir-for,DLKM,synx-driver-symvers)/synx-driver-symvers +endif + include $(CLEAR_VARS) # For incremental compilation LOCAL_SRC_FILES := $(wildcard $(LOCAL_PATH)/**/*) $(wildcard $(LOCAL_PATH)/*) diff --git a/Kconfig b/Kconfig index 506bf6eac7..ad02251e31 100644 --- a/Kconfig +++ b/Kconfig @@ -100,3 +100,11 @@ config QCOM_KGSL_HIBERNATION Say 'Y' to enable hibernation support in kgsl. If enabled, kgsl will register necessary power manager callbacks to support hibernation. + +config QCOM_KGSL_SYNX + bool "Use synx APIs in KGSL for HW fences" + help + Say 'Y' to enable synx API in kgsl. If enabled, kgsl will use + synx API for HW fence feature. This allows hardware fence inter-op + between HW fence clients and SYNX clients. If not enabled, then + kgsl will use msm_hw_fence APIs for hardware fence feature. diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index b3e6475394..faab5eb6d0 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -1107,7 +1107,7 @@ static void drain_ctx_hw_fences_cpu(struct adreno_device *adreno_dev, spin_lock(&drawctxt->lock); list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) { - gen7_trigger_hw_fence_cpu(adreno_dev, entry); + kgsl_hw_fence_trigger_cpu(KGSL_DEVICE(adreno_dev), entry->kfence); gen7_remove_hw_fence_entry(adreno_dev, entry); } spin_unlock(&drawctxt->lock); diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index c269f5d886..21a0d3d074 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -4,7 +4,6 @@ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ -#include #include #include #include @@ -22,12 +21,6 @@ #include "kgsl_trace.h" #include "kgsl_util.h" -#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) -#include -#else -#include -#endif - #define HFI_QUEUE_MAX (HFI_QUEUE_DEFAULT_CNT + HFI_QUEUE_DISPATCH_MAX_CNT) #define DEFINE_QHDR(gmuaddr, id, prio) \ @@ -1644,7 +1637,7 @@ static struct hfi_mem_alloc_entry *get_mem_alloc_entry( */ switch (desc->mem_kind) { case HFI_MEMKIND_HW_FENCE: - entry->md = &adreno_dev->hwsched.hw_fence.memdesc; + entry->md = &adreno_dev->hwsched.hw_fence_md; break; case HFI_MEMKIND_MEMSTORE: entry->md = device->memstore; @@ -2075,7 +2068,7 @@ static int gen7_hfi_send_hw_fence_feature_ctrl(struct adreno_device *adreno_dev) ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_HW_FENCE, 1, 0); if (ret && (ret == -ENOENT)) { dev_err(&gmu->pdev->dev, "GMU doesn't support HW_FENCE feature\n"); - adreno_hwsched_deregister_hw_fence(hwsched->hw_fence.handle); + adreno_hwsched_deregister_hw_fence(adreno_dev); return 0; } @@ -3100,21 +3093,6 @@ static u32 get_irq_bit(struct adreno_device *adreno_dev, struct kgsl_drawobj *dr return 0; } -static int add_gmu_waiter(struct adreno_device *adreno_dev, - struct dma_fence *fence) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - int ret = msm_hw_fence_wait_update(adreno_dev->hwsched.hw_fence.handle, - &fence, 1, true); - - if (ret) - dev_err_ratelimited(device->dev, - "Failed to add GMU as waiter ret:%d fence ctx:%llu ts:%llu\n", - ret, fence->context, fence->seqno); - - return ret; -} - static void populate_kgsl_fence(struct hfi_syncobj *obj, struct dma_fence *fence) { @@ -3146,6 +3124,7 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); struct hfi_submit_syncobj *cmd; struct hfi_syncobj *obj = NULL; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 seqnum; /* Add hfi_syncobj struct for sync object */ @@ -3186,7 +3165,7 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, * If this sync object has a software only fence, make sure that it is * already signaled so that we can skip sending this fence to the GMU. */ - if (!test_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &fences[j]->flags)) { + if (!kgsl_is_hw_fence(fences[j])) { if (WARN(!dma_fence_is_signaled(fences[j]), "sync object has unsignaled software fence")) return -EINVAL; @@ -3196,14 +3175,14 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, if (is_kgsl_fence(fences[j])) { populate_kgsl_fence(obj, fences[j]); } else { - int ret = add_gmu_waiter(adreno_dev, fences[j]); + int ret = kgsl_hw_fence_add_waiter(device, fences[j]); if (ret) { syncobj->flags &= ~KGSL_SYNCOBJ_HW; return ret; } - if (test_bit(MSM_HW_FENCE_FLAG_SIGNALED_BIT, &fences[j]->flags) || + if (kgsl_hw_fence_signaled(fences[j]) || test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fences[j]->flags)) obj->flags |= BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT); @@ -3522,7 +3501,6 @@ void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev, struct kgsl_context *context = ktimeline->context; struct adreno_context *drawctxt = ADRENO_CONTEXT(context); struct adreno_hw_fence_entry *entry = NULL; - struct msm_hw_fence_create_params params = {0}; /* Only allow a single log in a second */ static DEFINE_RATELIMIT_STATE(_rs, HZ, 1); struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); @@ -3531,17 +3509,8 @@ void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev, int ret = 0; bool destroy_hw_fence = true; - params.fence = &kfence->fence; - params.handle = &kfence->hw_fence_index; - kfence->hw_fence_handle = adreno_dev->hwsched.hw_fence.handle; - - ret = msm_hw_fence_create(kfence->hw_fence_handle, ¶ms); - if ((ret || IS_ERR_OR_NULL(params.handle))) { - if (__ratelimit(&_rs)) - dev_err(device->dev, "Failed to create ctx:%d ts:%d hardware fence:%d\n", - kfence->context_id, kfence->timestamp, ret); + if (kgsl_hw_fence_create(device, kfence)) return; - } spin_lock(&drawctxt->lock); spin_lock(&hw_hfi->hw_fence.lock); @@ -3608,7 +3577,7 @@ void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev, done: if (destroy_hw_fence) { - msm_hw_fence_destroy(kfence->hw_fence_handle, &kfence->fence); + kgsl_hw_fence_destroy(kfence); if (entry) gen7_remove_hw_fence_entry(adreno_dev, entry); } @@ -3955,23 +3924,6 @@ int gen7_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, return ret; } -void gen7_trigger_hw_fence_cpu(struct adreno_device *adreno_dev, - struct adreno_hw_fence_entry *entry) -{ - int ret = msm_hw_fence_update_txq(adreno_dev->hwsched.hw_fence.handle, - entry->cmd.hash_index, 0, 0); - - if (ret) { - dev_err_ratelimited(adreno_dev->dev.dev, - "Failed to trigger hw fence via cpu: ctx:%d ts:%d ret:%d\n", - entry->drawctxt->base.id, (u32)entry->cmd.ts, ret); - return; - } - - msm_hw_fence_trigger_signal(adreno_dev->hwsched.hw_fence.handle, IPCC_CLIENT_GPU, - IPCC_CLIENT_APSS, 0); -} - /* We don't want to unnecessarily wake the GMU to trigger hardware fences */ static void drain_context_hw_fence_cpu(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) @@ -3980,7 +3932,7 @@ static void drain_context_hw_fence_cpu(struct adreno_device *adreno_dev, list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { - gen7_trigger_hw_fence_cpu(adreno_dev, entry); + kgsl_hw_fence_trigger_cpu(KGSL_DEVICE(adreno_dev), entry->kfence); gen7_remove_hw_fence_entry(adreno_dev, entry); } diff --git a/adreno_gen7_hwsched_hfi.h b/adreno_gen7_hwsched_hfi.h index 80afb5798c..80f52c94a3 100644 --- a/adreno_gen7_hwsched_hfi.h +++ b/adreno_gen7_hwsched_hfi.h @@ -319,17 +319,6 @@ int gen7_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_d void gen7_remove_hw_fence_entry(struct adreno_device *adreno_dev, struct adreno_hw_fence_entry *entry); -/** - * gen7_trigger_hw_fence_cpu - Trigger hardware fence from cpu - * @adreno_dev: pointer to the adreno device - * @fence: hardware fence entry to be triggered - * - * Trigger the hardware fence by sending it to GMU's TxQueue and raise the - * interrupt from GMU to APPS - */ -void gen7_trigger_hw_fence_cpu(struct adreno_device *adreno_dev, - struct adreno_hw_fence_entry *fence); - /** * gen7_hwsched_disable_hw_fence_throttle - Disable hardware fence throttling after reset * @adreno_dev: pointer to the adreno device diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 4d3054e29a..78095d404e 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -1096,7 +1096,7 @@ static void drain_ctx_hw_fences_cpu(struct adreno_device *adreno_dev, spin_lock(&drawctxt->lock); list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) { - gen8_trigger_hw_fence_cpu(adreno_dev, entry); + kgsl_hw_fence_trigger_cpu(KGSL_DEVICE(adreno_dev), entry->kfence); gen8_remove_hw_fence_entry(adreno_dev, entry); } spin_unlock(&drawctxt->lock); diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 83048906a2..014fa9f1a1 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -4,7 +4,6 @@ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. */ -#include #include #include #include @@ -22,13 +21,6 @@ #include "kgsl_trace.h" #include "kgsl_util.h" -#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) -#include -#else -#include -#endif - - #define HFI_QUEUE_MAX (HFI_QUEUE_DEFAULT_CNT + HFI_QUEUE_DISPATCH_MAX_CNT) #define DEFINE_QHDR(gmuaddr, id, prio) \ @@ -1656,7 +1648,7 @@ static struct hfi_mem_alloc_entry *get_mem_alloc_entry( */ switch (desc->mem_kind) { case HFI_MEMKIND_HW_FENCE: - entry->md = &adreno_dev->hwsched.hw_fence.memdesc; + entry->md = &adreno_dev->hwsched.hw_fence_md; break; case HFI_MEMKIND_MEMSTORE: entry->md = device->memstore; @@ -2085,7 +2077,7 @@ static int gen8_hfi_send_hw_fence_feature_ctrl(struct adreno_device *adreno_dev) ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_HW_FENCE, 1, 0); if (ret && (ret == -ENOENT)) { dev_err(&gmu->pdev->dev, "GMU doesn't support HW_FENCE feature\n"); - adreno_hwsched_deregister_hw_fence(hwsched->hw_fence.handle); + adreno_hwsched_deregister_hw_fence(adreno_dev); return 0; } @@ -3103,21 +3095,6 @@ static u32 get_irq_bit(struct adreno_device *adreno_dev, struct kgsl_drawobj *dr return 0; } -static int add_gmu_waiter(struct adreno_device *adreno_dev, - struct dma_fence *fence) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - int ret = msm_hw_fence_wait_update(adreno_dev->hwsched.hw_fence.handle, - &fence, 1, true); - - if (ret) - dev_err_ratelimited(device->dev, - "Failed to add GMU as waiter ret:%d fence ctx:%llu ts:%llu\n", - ret, fence->context, fence->seqno); - - return ret; -} - static void populate_kgsl_fence(struct hfi_syncobj *obj, struct dma_fence *fence) { @@ -3149,6 +3126,7 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); struct hfi_submit_syncobj *cmd; struct hfi_syncobj *obj = NULL; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); /* Add hfi_syncobj struct for sync object */ cmd_sizebytes = sizeof(*cmd) + @@ -3188,7 +3166,7 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, * If this sync object has a software only fence, make sure that it is * already signaled so that we can skip sending this fence to the GMU. */ - if (!test_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &fences[j]->flags)) { + if (!kgsl_is_hw_fence(fences[j])) { if (WARN(!dma_fence_is_signaled(fences[j]), "sync object has unsignaled software fence")) return -EINVAL; @@ -3198,14 +3176,14 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, if (is_kgsl_fence(fences[j])) { populate_kgsl_fence(obj, fences[j]); } else { - int ret = add_gmu_waiter(adreno_dev, fences[j]); + int ret = kgsl_hw_fence_add_waiter(device, fences[j]); if (ret) { syncobj->flags &= ~KGSL_SYNCOBJ_HW; return ret; } - if (test_bit(MSM_HW_FENCE_FLAG_SIGNALED_BIT, &fences[j]->flags) || + if (kgsl_hw_fence_signaled(fences[j]) || test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fences[j]->flags)) obj->flags |= BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT); @@ -3524,7 +3502,6 @@ void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, struct kgsl_context *context = ktimeline->context; struct adreno_context *drawctxt = ADRENO_CONTEXT(context); struct adreno_hw_fence_entry *entry = NULL; - struct msm_hw_fence_create_params params = {0}; /* Only allow a single log in a second */ static DEFINE_RATELIMIT_STATE(_rs, HZ, 1); struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); @@ -3533,17 +3510,8 @@ void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, int ret = 0; bool destroy_hw_fence = true; - params.fence = &kfence->fence; - params.handle = &kfence->hw_fence_index; - kfence->hw_fence_handle = adreno_dev->hwsched.hw_fence.handle; - - ret = msm_hw_fence_create(kfence->hw_fence_handle, ¶ms); - if ((ret || IS_ERR_OR_NULL(params.handle))) { - if (__ratelimit(&_rs)) - dev_err(device->dev, "Failed to create ctx:%d ts:%d hardware fence:%d\n", - kfence->context_id, kfence->timestamp, ret); + if (kgsl_hw_fence_create(device, kfence)) return; - } spin_lock(&drawctxt->lock); spin_lock(&hw_hfi->hw_fence.lock); @@ -3610,7 +3578,7 @@ void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, done: if (destroy_hw_fence) { - msm_hw_fence_destroy(kfence->hw_fence_handle, &kfence->fence); + kgsl_hw_fence_destroy(kfence); if (entry) gen8_remove_hw_fence_entry(adreno_dev, entry); } @@ -3953,23 +3921,6 @@ int gen8_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, return ret; } -void gen8_trigger_hw_fence_cpu(struct adreno_device *adreno_dev, - struct adreno_hw_fence_entry *entry) -{ - int ret = msm_hw_fence_update_txq(adreno_dev->hwsched.hw_fence.handle, - entry->cmd.hash_index, 0, 0); - - if (ret) { - dev_err_ratelimited(adreno_dev->dev.dev, - "Failed to trigger hw fence via cpu: ctx:%d ts:%d ret:%d\n", - entry->drawctxt->base.id, (u32)entry->cmd.ts, ret); - return; - } - - msm_hw_fence_trigger_signal(adreno_dev->hwsched.hw_fence.handle, IPCC_CLIENT_GPU, - IPCC_CLIENT_APSS, 0); -} - /* We don't want to unnecessarily wake the GMU to trigger hardware fences */ static void drain_context_hw_fence_cpu(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) @@ -3978,7 +3929,7 @@ static void drain_context_hw_fence_cpu(struct adreno_device *adreno_dev, list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { - gen8_trigger_hw_fence_cpu(adreno_dev, entry); + kgsl_hw_fence_trigger_cpu(KGSL_DEVICE(adreno_dev), entry->kfence); gen8_remove_hw_fence_entry(adreno_dev, entry); } diff --git a/adreno_gen8_hwsched_hfi.h b/adreno_gen8_hwsched_hfi.h index b1dc120e0d..00cbb44fc9 100644 --- a/adreno_gen8_hwsched_hfi.h +++ b/adreno_gen8_hwsched_hfi.h @@ -315,17 +315,6 @@ int gen8_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_d void gen8_remove_hw_fence_entry(struct adreno_device *adreno_dev, struct adreno_hw_fence_entry *entry); -/** - * gen8_trigger_hw_fence_cpu - Trigger hardware fence from cpu - * @adreno_dev: pointer to the adreno device - * @fence: hardware fence entry to be triggered - * - * Trigger the hardware fence by sending it to GMU's TxQueue and raise the - * interrupt from GMU to APPS - */ -void gen8_trigger_hw_fence_cpu(struct adreno_device *adreno_dev, - struct adreno_hw_fence_entry *fence); - /** * gen8_hwsched_disable_hw_fence_throttle - Disable hardware fence throttling after reset * @adreno_dev: pointer to the adreno device diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 9779c85245..14363e859f 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1297,17 +1297,16 @@ static const struct attribute *_hwsched_attr_list[] = { void adreno_hwsched_deregister_hw_fence(struct adreno_device *adreno_dev) { struct adreno_hwsched *hwsched = &adreno_dev->hwsched; - struct adreno_hw_fence *hw_fence = &hwsched->hw_fence; if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags)) return; - msm_hw_fence_deregister(hwsched->hw_fence.handle); + kgsl_hw_fence_close(KGSL_DEVICE(adreno_dev)); - if (hw_fence->memdesc.sgt) - sg_free_table(hw_fence->memdesc.sgt); + if (hwsched->hw_fence_md.sgt) + sg_free_table(hwsched->hw_fence_md.sgt); - memset(&hw_fence->memdesc, 0x0, sizeof(hw_fence->memdesc)); + memset(&hwsched->hw_fence_md, 0x0, sizeof(hwsched->hw_fence_md)); kmem_cache_destroy(hwsched->hw_fence_cache); @@ -1940,27 +1939,6 @@ void adreno_hwsched_fault(struct adreno_device *adreno_dev, adreno_hwsched_trigger(adreno_dev); } -static bool is_tx_slot_available(struct adreno_device *adreno_dev) -{ - struct adreno_hwsched *hwsched = &adreno_dev->hwsched; - void *ptr = hwsched->hw_fence.mem_descriptor.virtual_addr; - struct msm_hw_fence_hfi_queue_header *hdr = (struct msm_hw_fence_hfi_queue_header *) - (ptr + sizeof(struct msm_hw_fence_hfi_queue_table_header)); - u32 queue_size_dwords = hdr->queue_size / sizeof(u32); - u32 payload_size_dwords = hdr->pkt_size / sizeof(u32); - u32 free_dwords, write_idx = hdr->write_index, read_idx = hdr->read_index; - u32 reserved_dwords = atomic_read(&hwsched->hw_fence_count) * payload_size_dwords; - - free_dwords = read_idx <= write_idx ? - queue_size_dwords - (write_idx - read_idx) : - read_idx - write_idx; - - if (free_dwords - reserved_dwords <= payload_size_dwords) - return false; - - return true; -} - static void adreno_hwsched_create_hw_fence(struct adreno_device *adreno_dev, struct kgsl_sync_fence *kfence) { @@ -1976,7 +1954,8 @@ static void adreno_hwsched_create_hw_fence(struct adreno_device *adreno_dev, if (kgsl_context_is_bad(context)) return; - if (!is_tx_slot_available(adreno_dev)) + if (!kgsl_hw_fence_tx_slot_available(KGSL_DEVICE(adreno_dev), + &adreno_dev->hwsched.hw_fence_count)) return; hwsched_ops->create_hw_fence(adreno_dev, kfence); @@ -2249,7 +2228,6 @@ int adreno_hwsched_idle(struct adreno_device *adreno_dev) void adreno_hwsched_register_hw_fence(struct adreno_device *adreno_dev) { struct adreno_hwsched *hwsched = &adreno_dev->hwsched; - struct adreno_hw_fence *hw_fence = &hwsched->hw_fence; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; @@ -2263,32 +2241,23 @@ void adreno_hwsched_register_hw_fence(struct adreno_device *adreno_dev) if (test_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags)) return; - hw_fence->handle = msm_hw_fence_register(HW_FENCE_CLIENT_ID_CTX0, - &hw_fence->mem_descriptor); - if (IS_ERR_OR_NULL(hw_fence->handle)) { - dev_err(device->dev, "HW fences not supported: %d\n", - PTR_ERR_OR_ZERO(hw_fence->handle)); - hw_fence->handle = NULL; + if (kgsl_hw_fence_init(device)) return; - } /* * We need to set up the memory descriptor with the physical address of the Tx/Rx Queues so * that these buffers can be imported in to GMU VA space */ - kgsl_memdesc_init(device, &hw_fence->memdesc, 0); - hw_fence->memdesc.physaddr = hw_fence->mem_descriptor.device_addr; - hw_fence->memdesc.size = hw_fence->mem_descriptor.size; - hw_fence->memdesc.hostptr = hw_fence->mem_descriptor.virtual_addr; + kgsl_memdesc_init(device, &hwsched->hw_fence_md, 0); + kgsl_hw_fence_populate_md(device, &hwsched->hw_fence_md); - ret = kgsl_memdesc_sg_dma(&hw_fence->memdesc, hw_fence->memdesc.physaddr, - hw_fence->memdesc.size); + ret = kgsl_memdesc_sg_dma(&hwsched->hw_fence_md, hwsched->hw_fence_md.physaddr, + hwsched->hw_fence_md.size); if (ret) { dev_err(device->dev, "Failed to setup HW fences memdesc: %d\n", ret); - msm_hw_fence_deregister(hw_fence->handle); - hw_fence->handle = NULL; - memset(&hw_fence->memdesc, 0x0, sizeof(hw_fence->memdesc)); + kgsl_hw_fence_close(device); + memset(&hwsched->hw_fence_md, 0x0, sizeof(hwsched->hw_fence_md)); return; } diff --git a/adreno_hwsched.h b/adreno_hwsched.h index d0b315e49e..ff84ee8f24 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -7,12 +7,6 @@ #ifndef _ADRENO_HWSCHED_H_ #define _ADRENO_HWSCHED_H_ -#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) -#include -#else -#include -#endif - #include "kgsl_sync.h" /* This structure represents inflight command object */ @@ -61,18 +55,6 @@ struct adreno_hwsched_ops { }; -/** - * struct adreno_hw_fence - Container for hardware fences instance - */ -struct adreno_hw_fence { - /** @handle: Handle for hardware fences */ - void *handle; - /** @descriptor: Memory descriptor for hardware fences */ - struct msm_hw_fence_mem_addr mem_descriptor; - /** @memdesc: Kgsl memory descriptor for hardware fences queue */ - struct kgsl_memdesc memdesc; -}; - /** * struct adreno_hwsched - Container for the hardware scheduler */ @@ -109,8 +91,6 @@ struct adreno_hwsched { /** @lsr_check_ws: Lsr work to update power stats */ struct work_struct lsr_check_ws; /** @hw_fence: Container for the hw fences instance */ - struct adreno_hw_fence hw_fence; - /** @hw_fence_cache: kmem cache for storing hardware output fences */ struct kmem_cache *hw_fence_cache; /** @hw_fence_count: Number of hardware fences that haven't yet been sent to Tx Queue */ atomic_t hw_fence_count; @@ -123,6 +103,8 @@ struct adreno_hwsched { struct kgsl_memdesc global_ctxtq; /** @global_ctxt_gmu_registered: Whether global context is registered with gmu */ bool global_ctxt_gmu_registered; + /** @hw_fence_md: Kgsl memory descriptor for hardware fences queue */ + struct kgsl_memdesc hw_fence_md; }; /* diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index 5dcdb6d9a9..f70d05fe23 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -88,7 +88,7 @@ def external_deps(target, variant): defconfigs = [] # Add msm_hw_fence in the dependency and defconfig lists for targets that use it - if target in [ "pineapple", "sun" ]: + if target in [ "pineapple" ]: deplist = deplist + [ "//vendor/qcom/opensource/mm-drivers/hw_fence:{}_msm_hw_fence".format(tv), "//vendor/qcom/opensource/mm-drivers/hw_fence:hw_fence_headers" @@ -97,6 +97,13 @@ def external_deps(target, variant): "//vendor/qcom/opensource/mm-drivers/hw_fence:defconfig" ] + # Add synx-kernel in the dependency list for targets that use it for hardware fences + if target in [ "sun" ]: + deplist = deplist + [ + "//vendor/qcom/opensource/synx-kernel:{}_modules".format(tv), + "//vendor/qcom/opensource/synx-kernel:synx_headers" + ] + native.genrule( name = "{}_defconfig".format(tv), srcs = defconfigs + [ "config/{}_gpuconf".format(tv) ], diff --git a/config/sun_consolidate_gpuconf b/config/sun_consolidate_gpuconf index 837c2df56d..097b6806d5 100644 --- a/config/sun_consolidate_gpuconf +++ b/config/sun_consolidate_gpuconf @@ -7,3 +7,4 @@ CONFIG_QCOM_KGSL_SORT_POOL=y CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" +CONFIG_QCOM_KGSL_SYNX=y diff --git a/kgsl_sync.c b/kgsl_sync.c index ff9aa4c375..b4cff282d3 100644 --- a/kgsl_sync.c +++ b/kgsl_sync.c @@ -4,6 +4,7 @@ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ +#include #include #include #include @@ -11,13 +12,265 @@ #include "kgsl_device.h" #include "kgsl_sync.h" +static const struct dma_fence_ops kgsl_sync_fence_ops; +/* Only allow a single log in a second */ +static DEFINE_RATELIMIT_STATE(_rs, HZ, 1); + +#ifdef CONFIG_QCOM_KGSL_SYNX + +#include + +static struct synx_hw_fence_descriptor { + /** @handle: Handle for hardware fences */ + struct synx_session *handle; + /** @descriptor: Memory descriptor for hardware fences */ + struct synx_queue_desc mem_descriptor; +} kgsl_synx; + +int kgsl_hw_fence_init(struct kgsl_device *device) +{ + struct synx_initialization_params params; + + params.id = (enum synx_client_id)SYNX_CLIENT_HW_FENCE_GFX_CTX0; + params.ptr = &kgsl_synx.mem_descriptor; + kgsl_synx.handle = synx_initialize(¶ms); + + if (IS_ERR_OR_NULL(kgsl_synx.handle)) { + dev_err(device->dev, "HW fences not supported: %d\n", + PTR_ERR_OR_ZERO(kgsl_synx.handle)); + kgsl_synx.handle = NULL; + return -EINVAL; + } + + return 0; +} + +void kgsl_hw_fence_close(struct kgsl_device *device) +{ + synx_uninitialize(kgsl_synx.handle); +} + +void kgsl_hw_fence_populate_md(struct kgsl_device *device, struct kgsl_memdesc *md) +{ + md->physaddr = kgsl_synx.mem_descriptor.dev_addr; + md->size = kgsl_synx.mem_descriptor.size; + md->hostptr = kgsl_synx.mem_descriptor.vaddr; +} + +int kgsl_hw_fence_create(struct kgsl_device *device, struct kgsl_sync_fence *kfence) +{ + struct synx_create_params params = {0}; + int ret; + + params.fence = &kfence->fence; + params.h_synx = (u32 *)&kfence->hw_fence_index; + params.flags = SYNX_CREATE_DMA_FENCE; + + ret = synx_create(kgsl_synx.handle, ¶ms); + if (!ret) + return 0; + + if (__ratelimit(&_rs)) + dev_err(device->dev, "Failed to create ctx:%d ts:%d hardware fence:%d\n", + kfence->context_id, kfence->timestamp, ret); + + return -EINVAL; +} + +int kgsl_hw_fence_add_waiter(struct kgsl_device *device, struct dma_fence *fence) +{ + struct synx_import_params params; + u32 handle = 0; + int ret; + + params.indv.fence = fence; + params.type = SYNX_IMPORT_INDV_PARAMS; + params.indv.new_h_synx = &handle; + params.indv.flags = SYNX_IMPORT_DMA_FENCE; + + ret = synx_import(kgsl_synx.handle, ¶ms); + if (ret) { + dev_err_ratelimited(device->dev, + "Failed to add GMU as waiter ret:%d fence ctx:%llu ts:%llu\n", + ret, fence->context, fence->seqno); + return ret; + } + + /* release reference held by synx_import */ + ret = synx_release(kgsl_synx.handle, handle); + if (ret) + dev_err_ratelimited(device->dev, + "Failed to release wait fences ret:%d fence ctx:%llu ts:%llu\n", + ret, fence->context, fence->seqno); + + return ret; +} + +bool kgsl_hw_fence_tx_slot_available(struct kgsl_device *device, const atomic_t *hw_fence_count) +{ + void *ptr = kgsl_synx.mem_descriptor.vaddr; + struct synx_hw_fence_hfi_queue_header *hdr = (struct synx_hw_fence_hfi_queue_header *) + (ptr + sizeof(struct synx_hw_fence_hfi_queue_table_header)); + u32 queue_size_dwords = hdr->queue_size / sizeof(u32); + u32 payload_size_dwords = hdr->pkt_size / sizeof(u32); + u32 free_dwords, write_idx = hdr->write_index, read_idx = hdr->read_index; + u32 reserved_dwords = atomic_read(hw_fence_count) * payload_size_dwords; + + free_dwords = read_idx <= write_idx ? + queue_size_dwords - (write_idx - read_idx) : + read_idx - write_idx; + + if (free_dwords - reserved_dwords <= payload_size_dwords) + return false; + + return true; +} + +void kgsl_hw_fence_destroy(struct kgsl_sync_fence *kfence) +{ + synx_release(kgsl_synx.handle, kfence->hw_fence_index); +} + +void kgsl_hw_fence_trigger_cpu(struct kgsl_device *device, struct kgsl_sync_fence *kfence) +{ + synx_signal(kgsl_synx.handle, (u32)kfence->hw_fence_index, SYNX_STATE_SIGNALED_SUCCESS); +} + +bool kgsl_hw_fence_signaled(struct dma_fence *fence) +{ + return test_bit(SYNX_HW_FENCE_FLAG_SIGNALED_BIT, &fence->flags); +} + +bool kgsl_is_hw_fence(struct dma_fence *fence) +{ + return test_bit(SYNX_HW_FENCE_FLAG_ENABLED_BIT, &fence->flags); +} + +#else + #if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) #include #else #include #endif -static const struct dma_fence_ops kgsl_sync_fence_ops; +static struct msm_hw_fence_descriptor { + /** @handle: Handle for hardware fences */ + void *handle; + /** @descriptor: Memory descriptor for hardware fences */ + struct msm_hw_fence_mem_addr mem_descriptor; +} kgsl_msm_hw_fence; + +int kgsl_hw_fence_init(struct kgsl_device *device) +{ + kgsl_msm_hw_fence.handle = msm_hw_fence_register(HW_FENCE_CLIENT_ID_CTX0, + &kgsl_msm_hw_fence.mem_descriptor); + + if (IS_ERR_OR_NULL(kgsl_msm_hw_fence.handle)) { + dev_err(device->dev, "HW fences not supported: %d\n", + PTR_ERR_OR_ZERO(kgsl_msm_hw_fence.handle)); + kgsl_msm_hw_fence.handle = NULL; + return -EINVAL; + } + + return 0; +} + +void kgsl_hw_fence_close(struct kgsl_device *device) +{ + msm_hw_fence_deregister(kgsl_msm_hw_fence.handle); +} + +void kgsl_hw_fence_populate_md(struct kgsl_device *device, struct kgsl_memdesc *md) +{ + md->physaddr = kgsl_msm_hw_fence.mem_descriptor.device_addr; + md->size = kgsl_msm_hw_fence.mem_descriptor.size; + md->hostptr = kgsl_msm_hw_fence.mem_descriptor.virtual_addr; +} + +int kgsl_hw_fence_create(struct kgsl_device *device, struct kgsl_sync_fence *kfence) +{ + struct msm_hw_fence_create_params params = {0}; + int ret; + + params.fence = &kfence->fence; + params.handle = &kfence->hw_fence_index; + + ret = msm_hw_fence_create(kgsl_msm_hw_fence.handle, ¶ms); + if ((ret || IS_ERR_OR_NULL(params.handle))) { + if (__ratelimit(&_rs)) + dev_err(device->dev, "Failed to create ctx:%d ts:%d hardware fence:%d\n", + kfence->context_id, kfence->timestamp, ret); + return -EINVAL; + } + + return 0; +} + +int kgsl_hw_fence_add_waiter(struct kgsl_device *device, struct dma_fence *fence) +{ + int ret = msm_hw_fence_wait_update(kgsl_msm_hw_fence.handle, &fence, 1, true); + + if (ret) + dev_err_ratelimited(device->dev, + "Failed to add GMU as waiter ret:%d fence ctx:%llu ts:%llu\n", + ret, fence->context, fence->seqno); + + return ret; +} + +bool kgsl_hw_fence_tx_slot_available(struct kgsl_device *device, const atomic_t *hw_fence_count) +{ + void *ptr = kgsl_msm_hw_fence.mem_descriptor.virtual_addr; + struct msm_hw_fence_hfi_queue_header *hdr = (struct msm_hw_fence_hfi_queue_header *) + (ptr + sizeof(struct msm_hw_fence_hfi_queue_table_header)); + u32 queue_size_dwords = hdr->queue_size / sizeof(u32); + u32 payload_size_dwords = hdr->pkt_size / sizeof(u32); + u32 free_dwords, write_idx = hdr->write_index, read_idx = hdr->read_index; + u32 reserved_dwords = atomic_read(hw_fence_count) * payload_size_dwords; + + free_dwords = read_idx <= write_idx ? + queue_size_dwords - (write_idx - read_idx) : + read_idx - write_idx; + + if (free_dwords - reserved_dwords <= payload_size_dwords) + return false; + + return true; +} + +void kgsl_hw_fence_destroy(struct kgsl_sync_fence *kfence) +{ + msm_hw_fence_destroy(kgsl_msm_hw_fence.handle, &kfence->fence); +} + +#define IPCC_GPU_PHYS_ID 4 +void kgsl_hw_fence_trigger_cpu(struct kgsl_device *device, struct kgsl_sync_fence *kfence) +{ + int ret = msm_hw_fence_update_txq(kgsl_msm_hw_fence.handle, kfence->hw_fence_index, + 0, 0); + if (ret) { + dev_err_ratelimited(device->dev, + "Failed to trigger hw fence via cpu: ctx:%d ts:%d ret:%d\n", + kfence->context_id, kfence->timestamp, ret); + return; + } + + msm_hw_fence_trigger_signal(kgsl_msm_hw_fence.handle, IPCC_GPU_PHYS_ID, + IPCC_CLIENT_APSS, 0); +} + +bool kgsl_hw_fence_signaled(struct dma_fence *fence) +{ + return test_bit(MSM_HW_FENCE_FLAG_SIGNALED_BIT, &fence->flags); +} + +bool kgsl_is_hw_fence(struct dma_fence *fence) +{ + return test_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &fence->flags); +} + +#endif static struct kgsl_sync_fence *kgsl_sync_fence_create( struct kgsl_context *context, @@ -68,8 +321,8 @@ static void kgsl_sync_fence_release(struct dma_fence *fence) { struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; - if (test_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &fence->flags)) - msm_hw_fence_destroy(kfence->hw_fence_handle, fence); + if (kgsl_is_hw_fence(fence)) + kgsl_hw_fence_destroy(kfence); kgsl_sync_timeline_put(kfence->parent); kfree(kfence); @@ -447,7 +700,7 @@ static void kgsl_count_hw_fences(struct kgsl_drawobj_sync_event *event, struct d if (event->syncobj->flags & KGSL_SYNCOBJ_SW) return; - if (!test_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &fence->flags)) { + if (!kgsl_is_hw_fence(fence)) { /* Ignore software fences that are already signaled */ if (!dma_fence_is_signaled(fence)) event->syncobj->flags |= KGSL_SYNCOBJ_SW; diff --git a/kgsl_sync.h b/kgsl_sync.h index 5962318ce0..33cf060d8d 100644 --- a/kgsl_sync.h +++ b/kgsl_sync.h @@ -54,8 +54,6 @@ struct kgsl_sync_fence { unsigned int timestamp; /** @hw_fence_index: Index of hw fence in hw fence table */ u64 hw_fence_index; - /** @hw_fence_handle: Handle to the hw fence client */ - void *hw_fence_handle; }; /** @@ -115,6 +113,26 @@ bool is_kgsl_fence(struct dma_fence *f); void kgsl_sync_timeline_signal(struct kgsl_sync_timeline *ktimeline, u32 timestamp); +int kgsl_hw_fence_init(struct kgsl_device *device); + +void kgsl_hw_fence_close(struct kgsl_device *device); + +void kgsl_hw_fence_populate_md(struct kgsl_device *device, struct kgsl_memdesc *md); + +int kgsl_hw_fence_create(struct kgsl_device *device, struct kgsl_sync_fence *kfence); + +int kgsl_hw_fence_add_waiter(struct kgsl_device *device, struct dma_fence *fence); + +bool kgsl_hw_fence_tx_slot_available(struct kgsl_device *device, const atomic_t *hw_fence_count); + +void kgsl_hw_fence_destroy(struct kgsl_sync_fence *kfence); + +void kgsl_hw_fence_trigger_cpu(struct kgsl_device *device, struct kgsl_sync_fence *kfence); + +bool kgsl_hw_fence_signaled(struct dma_fence *fence); + +bool kgsl_is_hw_fence(struct dma_fence *fence); + #else static inline int kgsl_add_fence_event(struct kgsl_device *device, u32 context_id, u32 timestamp, void __user *data, int len, @@ -203,6 +221,56 @@ void kgsl_sync_timeline_signal(struct kgsl_sync_timeline *ktimeline, } +int kgsl_hw_fence_init(struct kgsl_device *device) +{ + return -EINVAL; +} + +void kgsl_hw_fence_close(struct kgsl_device *device) +{ + +} + +void kgsl_hw_fence_populate_md(struct kgsl_device *device, struct kgsl_memdesc *md) +{ + +} + +int kgsl_hw_fence_create(struct kgsl_device *device, struct kgsl_sync_fence *kfence) +{ + return -EINVAL; +} + +int kgsl_hw_fence_add_waiter(struct kgsl_device *device, struct dma_fence *fence) +{ + return -EINVAL; +} + +bool kgsl_hw_fence_tx_slot_available(struct kgsl_device *device, const atomic_t *hw_fence_count) +{ + return false; +} + +void kgsl_hw_fence_destroy(struct kgsl_sync_fence *kfence) +{ + +} + +void kgsl_hw_fence_trigger_cpu(struct kgsl_device *device, struct kgsl_sync_fence *kfence) +{ + +} + +bool kgsl_hw_fence_signaled(struct dma_fence *fence) +{ + return false; +} + +bool kgsl_is_hw_fence(struct dma_fence *fence) +{ + return false; +} + #endif /* CONFIG_SYNC_FILE */ #endif /* __KGSL_SYNC_H */ From 3c5204bc60fc5c1ef3c66726f9fc68cd2a952fb6 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 15 Dec 2023 12:00:14 +0530 Subject: [PATCH 0613/1016] kgsl: gen7: Remove redundant BV_PC perfcounter group definition Regular perfcounter group macro is used to program the select registers directly without programming aperture. But, aperture programming is needed for BV_PC group and it is already taken care. Thus, remove regular BV_PC perfcounter group definition. Change-Id: I7b6c9c6350cc9ba1f351cea56c12008d00483042 Signed-off-by: Kamal Agrawal --- adreno_gen7_perfcounter.c | 1 - 1 file changed, 1 deletion(-) diff --git a/adreno_gen7_perfcounter.c b/adreno_gen7_perfcounter.c index 4f0fefabb8..80f4095712 100644 --- a/adreno_gen7_perfcounter.c +++ b/adreno_gen7_perfcounter.c @@ -1134,7 +1134,6 @@ static const struct adreno_perfcount_group gen7_9_0_hwsched_perfcounter_groups gen7_counter_gmu_perf_enable, gen7_counter_read_norestore), GEN7_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(CP, cp), - GEN7_BV_REGULAR_PERFCOUNTER_GROUP(PC, pc), GEN7_BV_PERFCOUNTER_GROUP(PC, pc, gen7_counter_bv_enable, gen7_counter_read), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), GEN7_BV_PERFCOUNTER_GROUP(VPC, vpc, gen7_counter_bv_enable, gen7_counter_read), From 575280759df253e64ca883474cc07b1fd807b154 Mon Sep 17 00:00:00 2001 From: Deepak Kumar Date: Fri, 15 Dec 2023 13:43:20 +0530 Subject: [PATCH 0614/1016] kgsl: Use unlocked variant of dma_buf_map/unmap_attachment() APIs dma-buf clients that don't take the reservation lock explicitly for dma_buf_map/unmap_attachment() APIs need to use unlocked variant of these APIs on kernel v6.2 and onwards. Change-Id: I01662d30ffb3862a6e6a65d3d56c454c665ee6f4 Signed-off-by: Deepak Kumar --- kgsl.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/kgsl.c b/kgsl.c index c1b3a281f8..d0ce0c57b5 100644 --- a/kgsl.c +++ b/kgsl.c @@ -332,7 +332,12 @@ static void kgsl_destroy_ion(struct kgsl_memdesc *memdesc) if (metadata != NULL) { remove_dmabuf_list(metadata); +#if (KERNEL_VERSION(6, 2, 0) <= LINUX_VERSION_CODE) + dma_buf_unmap_attachment_unlocked(metadata->attach, memdesc->sgt, + DMA_BIDIRECTIONAL); +#else dma_buf_unmap_attachment(metadata->attach, memdesc->sgt, DMA_BIDIRECTIONAL); +#endif dma_buf_detach(metadata->dmabuf, metadata->attach); dma_buf_put(metadata->dmabuf); kfree(metadata); @@ -3386,8 +3391,11 @@ static int kgsl_setup_dma_buf(struct kgsl_device *device, entry->memdesc.flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); entry->memdesc.flags |= (uint64_t)KGSL_MEMFLAGS_USERMEM_ION; +#if (KERNEL_VERSION(6, 2, 0) <= LINUX_VERSION_CODE) + sg_table = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL); +#else sg_table = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); - +#endif if (IS_ERR_OR_NULL(sg_table)) { ret = PTR_ERR(sg_table); goto out; @@ -3416,8 +3424,11 @@ static int kgsl_setup_dma_buf(struct kgsl_device *device, out: if (ret) { if (!IS_ERR_OR_NULL(sg_table)) +#if (KERNEL_VERSION(6, 2, 0) <= LINUX_VERSION_CODE) + dma_buf_unmap_attachment_unlocked(attach, sg_table, DMA_BIDIRECTIONAL); +#else dma_buf_unmap_attachment(attach, sg_table, DMA_BIDIRECTIONAL); - +#endif if (!IS_ERR_OR_NULL(attach)) dma_buf_detach(dmabuf, attach); From 45576e1960d7365264860ef5764877a2b3190829 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 16 Dec 2023 16:36:20 +0530 Subject: [PATCH 0615/1016] kgsl: gen8: Update static IFPC powerup reglist Update static IFPC power up register list based on latest recommendation. Change-Id: I8d180c14524798dd424b75424cb3870d6855de7f Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 2feaa5d6a0..70cc3244de 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -53,11 +53,8 @@ static const u32 gen8_pwrup_reglist[] = { static const u32 gen8_ifpc_pwrup_reglist[] = { GEN8_RBBM_NC_MODE_CNTL, GEN8_RBBM_SLICE_NC_MODE_CNTL, - GEN8_RBBM_GBIF_CLIENT_QOS_CNTL, GEN8_SP_NC_MODE_CNTL, GEN8_SP_READ_SEL, - GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_LO, - GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_HI, GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_LO, GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_HI, GEN8_SP_CHICKEN_BITS_1, @@ -85,7 +82,6 @@ static const u32 gen8_ifpc_pwrup_reglist[] = { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_17, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_18, GEN8_CP_PROTECT_CNTL_PIPE, - GEN8_CP_SMMU_STREAM_ID_LPAC, GEN8_CP_PROTECT_REG_GLOBAL, GEN8_CP_PROTECT_REG_GLOBAL + 1, GEN8_CP_PROTECT_REG_GLOBAL + 2, From ddf5c1a2e1facca0f592670109714168f42594aa Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 2 Nov 2023 23:43:47 +0530 Subject: [PATCH 0616/1016] kgsl: Add support for gen8_4_0 GPU Add an entry in the adreno gpulist to support gen8_4_0 GPU. Change-Id: I60304325d49d012d697e2d550ba8b0f3bfb969ef Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 32 +++++++++++++++++++++++++++++++- adreno.h | 1 + 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 8b705a803e..943cba3c23 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2695,6 +2695,36 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .bcl_data = 1, }; +static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN8_4_0, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen8-4-0", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION, + .gpudev = &adreno_gen8_hwsched_gpudev.base, + .perfcounters = &adreno_gen8_perfcounters, + .uche_gmem_alignment = SZ_64M, + .gmem_size = 12 * SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_8M, + }, + .aqefw_name = "gen80000_aqe.fw", + .sqefw_name = "gen80000_sqe.fw", + .gmufw_name = "gen80000_gmu.bin", + .zap_name = "gen80000_zap.mbn", + .ao_hwcg = gen8_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen8_ao_hwcg_regs), + .gbif = gen8_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen8_0_0_gbif_cx_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen8_0_0_protected_regs, + .nonctxt_regs = gen8_0_0_nonctxt_regs, + .highest_bank_bit = 16, + .gmu_hub_clk_freq = 200000000, + .gen8_snapshot_block_list = &gen8_0_0_snapshot_block_list, +}; + static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_a306, /* Deprecated */ &adreno_gpu_core_a306a, /* Deprecated */ @@ -2744,5 +2774,5 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen7_9_1.base, &adreno_gpu_core_gen7_14_0.base, &adreno_gpu_core_gen8_0_0.base, - + &adreno_gpu_core_gen8_4_0.base, }; diff --git a/adreno.h b/adreno.h index 90838bfa85..231942d0d7 100644 --- a/adreno.h +++ b/adreno.h @@ -241,6 +241,7 @@ enum adreno_gpurev { ADRENO_REV_GEN7_9_1 = ADRENO_GPUREV_VALUE(7, 9, 1), ADRENO_REV_GEN7_14_0 = ADRENO_GPUREV_VALUE(7, 14, 0), ADRENO_REV_GEN8_0_0 = ADRENO_GPUREV_VALUE(8, 0, 0), + ADRENO_REV_GEN8_4_0 = ADRENO_GPUREV_VALUE(8, 4, 0), }; #define ADRENO_SOFT_FAULT BIT(0) From 516ac6ecb050e96554383e8781cc560750d91076 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Wed, 20 Dec 2023 13:52:55 -0800 Subject: [PATCH 0617/1016] msm: kgsl: Do not release dma and anon buffers if unmap fails If iommu unmap fails and leaves dma or anon buffers still mapped in the iommu, do not free them. Change-Id: Ice0e1a59c1ac0ee7a9d62d8899966b84fa63d5ca Signed-off-by: Lynus Vaz --- kgsl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kgsl.c b/kgsl.c index c1b3a281f8..e37dfae6e9 100644 --- a/kgsl.c +++ b/kgsl.c @@ -330,6 +330,9 @@ static void kgsl_destroy_ion(struct kgsl_memdesc *memdesc) struct kgsl_mem_entry, memdesc); struct kgsl_dma_buf_meta *metadata = entry->priv_data; + if (memdesc->priv & KGSL_MEMDESC_MAPPED) + return; + if (metadata != NULL) { remove_dmabuf_list(metadata); dma_buf_unmap_attachment(metadata->attach, memdesc->sgt, DMA_BIDIRECTIONAL); @@ -353,6 +356,9 @@ static void kgsl_destroy_anon(struct kgsl_memdesc *memdesc) struct scatterlist *sg; struct page *page; + if (memdesc->priv & KGSL_MEMDESC_MAPPED) + return; + for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) { page = sg_page(sg); for (j = 0; j < (sg->length >> PAGE_SHIFT); j++) { From 0375598f96fe1f1e22f4cb393aa84b5c3a4c2a39 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 21 Dec 2023 15:05:13 +0530 Subject: [PATCH 0618/1016] msm: kgsl: Hold mutex lock while mapping VBO buffer Currently, VBO buffers are mapped after releasing the target memdesc's mutex lock. This could introduce a race which can lead to use after free of VBO buffers. Thus, map VBO buffers inside mutex lock. Change-Id: I79e6f446005245aac56bb799585dcc1e523da5ff Signed-off-by: Kamal Agrawal --- kgsl_vbo.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kgsl_vbo.c b/kgsl_vbo.c index c7ef7d11e2..92d3d84c94 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -238,6 +238,11 @@ static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, } } + ret = kgsl_mmu_map_child(memdesc->pagetable, memdesc, start, + &entry->memdesc, offset, last - start + 1); + if (ret) + goto error; + /* Add the new range */ interval_tree_insert(&range->range, &memdesc->ranges); @@ -245,8 +250,7 @@ static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, range->entry, bind_range_len(range)); mutex_unlock(&memdesc->ranges_lock); - return kgsl_mmu_map_child(memdesc->pagetable, memdesc, start, - &entry->memdesc, offset, last - start + 1); + return ret; error: kgsl_mem_entry_put(range->entry); From 425c645fd3e8171ad587e7cd932d30d20859ce4e Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Mon, 25 Dec 2023 16:36:38 +0530 Subject: [PATCH 0619/1016] kgsl: Log current IB and AB in kgsl_buslevel trace During AB vote update, IB is logged as 0xFF (INVALID_DCVS_IDX) in kgsl_buslevel trace and vice-versa. Log current IB and AB information instead of invalid value. Change-Id: I96852d81fa5f6daed74a474ac35f8fb5dc1a6064 Signed-off-by: Kamal Agrawal --- adreno_a6xx_gmu.c | 2 +- adreno_a6xx_hwsched.c | 2 +- adreno_gen7_gmu.c | 2 +- adreno_gen7_hwsched.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index e3f8d29383..5e58de3d2f 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -2601,7 +2601,7 @@ static int a6xx_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, pwr->cur_ab = ab; } - trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel, ab); + trace_kgsl_buslevel(device, pwr->active_pwrlevel, pwr->cur_buslevel, pwr->cur_ab); return ret; } diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 8ad713e7c5..4c96710cc4 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -1118,7 +1118,7 @@ static int a6xx_hwsched_bus_set(struct adreno_device *adreno_dev, int buslevel, pwr->cur_ab = ab; } - trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel, ab); + trace_kgsl_buslevel(device, pwr->active_pwrlevel, pwr->cur_buslevel, pwr->cur_ab); return ret; } diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index abe02eee8f..1eb89aa08b 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -2213,7 +2213,7 @@ static int gen7_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, pwr->cur_ab = ab; } - trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel, ab); + trace_kgsl_buslevel(device, pwr->active_pwrlevel, pwr->cur_buslevel, pwr->cur_ab); return ret; } diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index fbaf606281..da13a6078e 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -1492,7 +1492,7 @@ static int gen7_hwsched_bus_set(struct adreno_device *adreno_dev, int buslevel, pwr->cur_ab = ab; } - trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel, ab); + trace_kgsl_buslevel(device, pwr->active_pwrlevel, pwr->cur_buslevel, pwr->cur_ab); return ret; } From 0ff2b80206bd5faa71dffd52466a17d04da85d29 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Mon, 25 Dec 2023 14:27:44 +0530 Subject: [PATCH 0620/1016] kgsl: Fix GMU based AB voting logic Current logic can compute incorrect AB vote. 1. When raw AB is greater than or equal to theoritical max_ab, quantized AB could either be truncated or calculated as 0xFFFF which is an invalid vote. 2. When raw AB is a very low value, quantized value could be rounded off to 0. But, AB is not expected to be 0 in above scenarios. Fix this by setting AB as: Case-1: maximum allowed AB value Case-2: minimum allowed non-zero AB value. Change-Id: I94a45b4f3d1733c3d9283e995d7bb5658ff14012 Signed-off-by: Kamal Agrawal --- adreno_gen7_gmu.c | 49 +++++++++++++++++++++++++++-------------------- adreno_gen8_gmu.c | 49 +++++++++++++++++++++++++++-------------------- kgsl_gmu_core.h | 3 ++- 3 files changed, 58 insertions(+), 43 deletions(-) diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index abe02eee8f..03d21b3257 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -2222,34 +2222,41 @@ static int gen7_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, u32 gen7_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab) { u16 vote = 0; + u32 max_bw, max_ab; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; if (!adreno_dev->gmu_ab || (ab == INVALID_AB_VALUE)) return (FIELD_PREP(GENMASK(31, 16), INVALID_AB_VALUE)); - if (pwr->ddr_table[pwr->ddr_table_count - 1]) { - /* - * if ab is calculated as higher than theoretical max bandwidth, set ab as - * theoretical max to prevent truncation during quantization. - * - * max ddr bandwidth (kbps) = (Max bw in kbps per channel * number of channel) - * max ab (Mbps) = max ddr bandwidth (kbps) / 1000 - */ - u32 max_bw = pwr->ddr_table[pwr->ddr_table_count - 1] * NUM_CHANNELS; - u32 max_ab = max_bw / 1000; + /* + * max ddr bandwidth (kbps) = (Max bw in kbps per channel * number of channel) + * max ab (Mbps) = max ddr bandwidth (kbps) / 1000 + */ + max_bw = pwr->ddr_table[pwr->ddr_table_count - 1] * NUM_CHANNELS; + max_ab = max_bw / 1000; - ab = min_t(u32, ab, max_ab); - - /* - * Power FW supports a 16 bit AB BW level. We can quantize the entire vote-able BW - * range to a 16 bit space and the quantized value can be used to vote for AB though - * GMU. Quantization can be performed as below. - * - * quantized_vote = (ab vote (kbps) * 2^16) / max ddr bandwidth (kbps) - */ + /* + * If requested AB is higher than theoretical max bandwidth, set AB vote as max + * allowable quantized AB value. + * + * Power FW supports a 16 bit AB BW level. We can quantize the entire vote-able BW + * range to a 16 bit space and the quantized value can be used to vote for AB though + * GMU. Quantization can be performed as below. + * + * quantized_vote = (ab vote (kbps) * 2^16) / max ddr bandwidth (kbps) + */ + if (ab >= max_ab) + vote = MAX_AB_VALUE; + else vote = (u16)(((u64)ab * 1000 * (1 << 16)) / max_bw); - } + + /* + * Vote will be calculated as 0 for smaller AB values. + * Set a minimum non-zero vote in such cases. + */ + if (ab && !vote) + vote = 0x1; /* * Set ab enable mask and valid AB vote. req.bw is 32 bit value 0xABABENIB diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index fcef28361d..b57a745350 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -2071,34 +2071,41 @@ static int gen8_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab) { u16 vote = 0; + u32 max_bw, max_ab; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; if (!adreno_dev->gmu_ab || (ab == INVALID_AB_VALUE)) return (FIELD_PREP(GENMASK(31, 16), INVALID_AB_VALUE)); - if (pwr->ddr_table[pwr->ddr_table_count - 1]) { - /* - * if ab is calculated as higher than theoretical max bandwidth, set ab as - * theoretical max to prevent truncation during quantization. - * - * max ddr bandwidth (kbps) = (Max bw in kbps per channel * number of channel) - * max ab (Mbps) = max ddr bandwidth (kbps) / 1000 - */ - u32 max_bw = pwr->ddr_table[pwr->ddr_table_count - 1] * NUM_CHANNELS; - u32 max_ab = max_bw / 1000; + /* + * max ddr bandwidth (kbps) = (Max bw in kbps per channel * number of channel) + * max ab (Mbps) = max ddr bandwidth (kbps) / 1000 + */ + max_bw = pwr->ddr_table[pwr->ddr_table_count - 1] * NUM_CHANNELS; + max_ab = max_bw / 1000; - ab = min_t(u32, ab, max_ab); - - /* - * Power FW supports a 16 bit AB BW level. We can quantize the entire vote-able BW - * range to a 16 bit space and the quantized value can be used to vote for AB though - * GMU. Quantization can be performed as below. - * - * quantized_vote = (ab vote (kbps) * 2^16) / max ddr bandwidth (kbps) - */ + /* + * If requested AB is higher than theoretical max bandwidth, set AB vote as max + * allowable quantized AB value. + * + * Power FW supports a 16 bit AB BW level. We can quantize the entire vote-able BW + * range to a 16 bit space and the quantized value can be used to vote for AB though + * GMU. Quantization can be performed as below. + * + * quantized_vote = (ab vote (kbps) * 2^16) / max ddr bandwidth (kbps) + */ + if (ab >= max_ab) + vote = MAX_AB_VALUE; + else vote = (u16)(((u64)ab * 1000 * (1 << 16)) / max_bw); - } + + /* + * Vote will be calculated as 0 for smaller AB values. + * Set a minimum non-zero vote in such cases. + */ + if (ab && !vote) + vote = 0x1; /* * Set ab enable mask and valid AB vote. req.bw is 32 bit value 0xABABENIB diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index cba1b31130..cb30bc73d5 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_GMU_CORE_H #define __KGSL_GMU_CORE_H @@ -21,6 +21,7 @@ #define MAX_BW_CMDS 8 #define INVALID_DCVS_IDX 0xFF #define INVALID_AB_VALUE 0xFFFF +#define MAX_AB_VALUE (0xFFFF - 1) #define INVALID_BW_VOTE (INVALID_DCVS_IDX | \ (FIELD_PREP(GENMASK(31, 16), INVALID_AB_VALUE))) #if MAX_CNOC_LEVELS > MAX_GX_LEVELS From 7f72d59f5e6ba226097dbd1fca7668787c110b10 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 19 Dec 2023 15:00:41 +0530 Subject: [PATCH 0621/1016] kgsl: gen8: Update uche gmem alignment for gen8_0_0 UCHE GMEM address should be 64M aligned for gen8_0_0. Change-Id: I04d97c00cb1f1a3e0972f4e767b182ea7b16ce88 Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 6c69404a89..942987ff83 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2708,7 +2708,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { ADRENO_IFPC, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, - .uche_gmem_alignment = SZ_16M, + .uche_gmem_alignment = SZ_64M, .gmem_size = 12 * SZ_1M, .bus_width = 32, .snapshot_size = SZ_8M, From 5207f319ce87c2d25ad577f6f553c693c00cbdc0 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 24 Dec 2023 12:03:25 +0530 Subject: [PATCH 0622/1016] kgsl: Discard force bus/clk/rail sysfs requests for gmu targets For GMU targets, GMU controls the bus/clk/rail voting. Currently, forcing clock and rail state doesn't have any impact for GMU targets. But, force bus on request can lead to invalid bus voting. Since these sysfs are not functional for GMU targets, discard the force bus/clk/rail sysfs requests. Change-Id: Icd5e0ab6f8f21bd6a339d75b3c73365195e9ac55 Signed-off-by: Kamal Agrawal --- kgsl_pwrctrl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 52a2c477e9..9f72977988 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -704,6 +704,9 @@ static ssize_t __force_on_store(struct device *dev, struct kgsl_device *device = dev_get_drvdata(dev); int ret; + if (gmu_core_gpmu_isenabled(device)) + return -EOPNOTSUPP; + ret = kstrtou32(buf, 0, &val); if (ret) return ret; From 0acfb3e6c3eaf896a096b9c400bd1408f86603e5 Mon Sep 17 00:00:00 2001 From: Kaushal Sanadhya Date: Tue, 26 Dec 2023 11:57:21 +0530 Subject: [PATCH 0623/1016] kgsl: gen7: Update perfcounter list for CB unsupported targets Update perfcounter list for the targets with concurrent binning disabled. Currently this affects the Gen7_14_0 target. Change-Id: Idab3199b632630b1a3fc9753acc804541e540b78 Signed-off-by: Kaushal Sanadhya --- adreno-gpulist.h | 2 +- adreno_gen7.h | 1 + adreno_gen7_perfcounter.c | 42 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 32c9643e27..44c8657837 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2368,7 +2368,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_14_0 = { .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_PREEMPTION | ADRENO_IFPC | ADRENO_BCL | ADRENO_ACD, .gpudev = &adreno_gen7_gmu_gpudev.base, - .perfcounters = &adreno_gen7_perfcounters, + .perfcounters = &adreno_gen7_no_cb_perfcounters, .uche_gmem_alignment = 0, .gmem_size = SZ_1M, .bus_width = 32, diff --git a/adreno_gen7.h b/adreno_gen7.h index 5834a836f0..116977ac45 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -19,6 +19,7 @@ extern const struct adreno_power_ops gen7_gmu_power_ops; extern const struct adreno_power_ops gen7_hwsched_power_ops; extern const struct adreno_perfcounters adreno_gen7_perfcounters; extern const struct adreno_perfcounters adreno_gen7_hwsched_perfcounters; +extern const struct adreno_perfcounters adreno_gen7_no_cb_perfcounters; extern const struct adreno_perfcounters adreno_gen7_9_0_hwsched_perfcounters; struct gen7_gpudev { diff --git a/adreno_gen7_perfcounter.c b/adreno_gen7_perfcounter.c index 4f0fefabb8..231a2367d8 100644 --- a/adreno_gen7_perfcounter.c +++ b/adreno_gen7_perfcounter.c @@ -1194,6 +1194,43 @@ static const struct adreno_perfcount_group gen7_perfcounter_groups GEN7_BV_PERFCOUNTER_GROUP(HLSQ, hlsq, gen7_counter_bv_enable, gen7_counter_read), }; +/* These perfcounter groups are applicable to gen7 targets with concurrent binning disabled.*/ +static const struct adreno_perfcount_group gen7_no_cb_perfcounter_groups + [KGSL_PERFCOUNTER_GROUP_MAX] = { + GEN7_REGULAR_PERFCOUNTER_GROUP(CP, cp), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, RBBM, rbbm, 0, + gen7_counter_enable, gen7_counter_read), + GEN7_PERFCOUNTER_GROUP(PC, pc, gen7_counter_enable, gen7_counter_read), + GEN7_PERFCOUNTER_GROUP(VFD, vfd, gen7_counter_inline_enable, gen7_counter_read), + GEN7_PERFCOUNTER_GROUP(HLSQ, hlsq, gen7_counter_enable, gen7_counter_read), + GEN7_PERFCOUNTER_GROUP(VPC, vpc, gen7_counter_enable, gen7_counter_read), + GEN7_REGULAR_PERFCOUNTER_GROUP(CCU, ccu), + GEN7_REGULAR_PERFCOUNTER_GROUP(CMP, cmp), + GEN7_PERFCOUNTER_GROUP(TSE, tse, gen7_counter_enable, gen7_counter_read), + GEN7_PERFCOUNTER_GROUP(RAS, ras, gen7_counter_enable, gen7_counter_read), + GEN7_PERFCOUNTER_GROUP(LRZ, lrz, gen7_counter_enable, gen7_counter_read), + GEN7_REGULAR_PERFCOUNTER_GROUP(UCHE, uche), + GEN7_PERFCOUNTER_GROUP(TP, tp, gen7_counter_inline_enable, gen7_counter_read), + GEN7_PERFCOUNTER_GROUP(SP, sp, gen7_counter_inline_enable, gen7_counter_read), + GEN7_REGULAR_PERFCOUNTER_GROUP(RB, rb), + GEN7_REGULAR_PERFCOUNTER_GROUP(VSC, vsc), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, VBIF, gbif, 0, + gen7_counter_gbif_enable, gen7_counter_read_norestore), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, VBIF_PWR, gbif_pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED, + gen7_counter_gbif_pwr_enable, gen7_counter_read_norestore), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, ALWAYSON, alwayson, + ADRENO_PERFCOUNTER_GROUP_FIXED, + gen7_counter_alwayson_enable, gen7_counter_alwayson_read), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_XOCLK, gmu_xoclk, 0, + gen7_counter_gmu_xoclk_enable, gen7_counter_read_norestore), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_GMUCLK, gmu_gmuclk, 0, + gen7_counter_gmu_gmuclk_enable, gen7_counter_read_norestore), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_PERF, gmu_perf, 0, + gen7_counter_gmu_perf_enable, gen7_counter_read_norestore), + GEN7_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), +}; + const struct adreno_perfcounters adreno_gen7_perfcounters = { gen7_perfcounter_groups, ARRAY_SIZE(gen7_perfcounter_groups), @@ -1204,6 +1241,11 @@ const struct adreno_perfcounters adreno_gen7_hwsched_perfcounters = { ARRAY_SIZE(gen7_hwsched_perfcounter_groups), }; +const struct adreno_perfcounters adreno_gen7_no_cb_perfcounters = { + gen7_no_cb_perfcounter_groups, + ARRAY_SIZE(gen7_no_cb_perfcounter_groups), +}; + const struct adreno_perfcounters adreno_gen7_9_0_hwsched_perfcounters = { gen7_9_0_hwsched_perfcounter_groups, ARRAY_SIZE(gen7_9_0_hwsched_perfcounter_groups), From 322d09f1c590c99feaae3b916645378f8636e931 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Fri, 5 Jan 2024 11:25:39 -0800 Subject: [PATCH 0624/1016] kgsl: Fix in_range macro redefined compilation error The in_range() macro is added in kernel v6.6. Rename local in_range() with kgsl_regmap_in_range() to fix compile error when -Wmacro-redefined compile flag is used. Change-Id: I13b0e3e29c9d0007716c4648982fe0749e69fcda Signed-off-by: Hareesh Gundu --- kgsl_regmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kgsl_regmap.c b/kgsl_regmap.c index f16ec93947..6fcf305b95 100644 --- a/kgsl_regmap.c +++ b/kgsl_regmap.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -84,7 +84,7 @@ int kgsl_regmap_add_region(struct kgsl_regmap *regmap, struct platform_device *p return ret; } -#define in_range(a, base, len) \ +#define kgsl_regmap_in_range(a, base, len) \ (((a) >= (base)) && ((a) < ((base) + (len)))) struct kgsl_regmap_region *kgsl_regmap_get_region(struct kgsl_regmap *regmap, @@ -95,7 +95,7 @@ struct kgsl_regmap_region *kgsl_regmap_get_region(struct kgsl_regmap *regmap, for (i = 0; i < regmap->count; i++) { struct kgsl_regmap_region *region = ®map->region[i]; - if (in_range(offset, region->offset, region->size)) + if (kgsl_regmap_in_range(offset, region->offset, region->size)) return region; } From 23c1c3e4652e369313ae9e454a2de9b5e02f4535 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 7 Jan 2024 21:44:04 +0530 Subject: [PATCH 0625/1016] kgsl: Dump cx_misc registers once in snapshot For few GPUs, cx_misc registers are dumped multiple times in snapshot: 1. As part of GPU register list 2. As part of snapshot block list Thus, remove it from GPU register list. Change-Id: I3d159748b0c1670ca827430e11e25e239fec698c Signed-off-by: Kamal Agrawal --- adreno_gen7_14_0_snapshot.h | 3 +-- adreno_gen8_0_0_snapshot.h | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/adreno_gen7_14_0_snapshot.h b/adreno_gen7_14_0_snapshot.h index cdacf04192..b46ba73182 100644 --- a/adreno_gen7_14_0_snapshot.h +++ b/adreno_gen7_14_0_snapshot.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_GEN7_14_0_SNAPSHOT_H #define __ADRENO_GEN7_14_0_SNAPSHOT_H @@ -443,7 +443,6 @@ static struct gen7_shader_block gen7_14_0_shader_blocks[] = { static struct gen7_reg_list gen7_14_0_reg_list[] = { { gen7_14_0_gpu_registers, NULL }, - { gen7_14_0_cx_misc_registers, NULL }, { gen7_14_0_dbgc_registers, NULL }, { NULL, NULL }, }; diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index 479935ed34..b9e9caba6f 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_GEN8_0_0_SNAPSHOT_H @@ -1888,7 +1888,6 @@ static struct gen8_cp_indexed_reg gen8_0_0_cp_mempool_reg_list[] = { static struct gen8_reg_list gen8_0_0_reg_list[] = { { UNSLICE, gen8_0_0_gpu_registers }, { SLICE, gen8_0_0_gpu_slice_registers }, - { UNSLICE, gen8_0_0_cx_misc_registers }, { UNSLICE, gen8_0_0_dbgc_registers }, { SLICE, gen8_0_0_dbgc_slice_registers }, { UNSLICE, gen8_0_0_cx_dbgc_registers }, From 6259936a5580a89b5dae82d63b0de1ee66247b69 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 7 Jan 2024 23:20:21 +0530 Subject: [PATCH 0626/1016] kgsl: Send NMI to GMU for GMU faults NMI is sent to GMU only when ADRENO_GMU_FAULT fault flag is set. But, currently driver sets ADRENO_HARD_FAULT for few GMU faults. Fix this by setting ADRENO_GMU_FAULT flag for GMU faults. Change-Id: I4bbb7de3033ef92189d20f9706f7d32db8ca1110 Signed-off-by: Kamal Agrawal --- adreno.h | 7 +------ adreno_a6xx_hwsched.c | 6 +++--- adreno_a6xx_hwsched_hfi.c | 12 ++++++------ adreno_gen7_hwsched.c | 6 +++--- adreno_gen7_hwsched_hfi.c | 12 ++++++------ adreno_gen8_hwsched.c | 6 +++--- adreno_gen8_hwsched_hfi.c | 12 ++++++------ 7 files changed, 28 insertions(+), 33 deletions(-) diff --git a/adreno.h b/adreno.h index 1c52fa2d68..166394a710 100644 --- a/adreno.h +++ b/adreno.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_H #define __ADRENO_H @@ -1410,11 +1410,6 @@ static inline void adreno_set_gpu_fault(struct adreno_device *adreno_dev, smp_wmb(); } -static inline bool adreno_gmu_gpu_fault(struct adreno_device *adreno_dev) -{ - return adreno_gpu_fault(adreno_dev) & ADRENO_GMU_FAULT; -} - /** * adreno_clear_gpu_fault() - Clear the GPU fault register * @adreno_dev: A pointer to an adreno_device structure diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 8ad713e7c5..0b248a3a27 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1043,7 +1043,7 @@ static int a6xx_hwsched_dcvs_set(struct adreno_device *adreno_dev, * dispatcher based reset and recovery. */ if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) - adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } if (req.freq != INVALID_DCVS_IDX) @@ -1183,7 +1183,7 @@ void a6xx_hwsched_handle_watchdog(struct adreno_device *adreno_dev) dev_err_ratelimited(&gmu->pdev->dev, "GMU watchdog expired interrupt received\n"); - adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } static void a6xx_hwsched_pm_resume(struct adreno_device *adreno_dev) diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index f7582c2f93..42889498f6 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -551,7 +551,7 @@ static void a6xx_hwsched_process_dbgq(struct adreno_device *adreno_dev, bool lim if (!recovery) return; - adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } /* HFI interrupt handler */ @@ -591,7 +591,7 @@ static irqreturn_t a6xx_hwsched_hfi_handler(int irq, void *data) dev_err_ratelimited(&gmu->pdev->dev, "GMU CM3 fault interrupt received\n"); - adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } /* Ignore OOB bits */ @@ -1715,7 +1715,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, context->id, ret); if (device->gmu_fault) - adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); return ret; } @@ -1727,7 +1727,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, context->id, ret); if (device->gmu_fault) - adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); return ret; } @@ -2074,7 +2074,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, */ adreno_drawctxt_set_guilty(device, context); - adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); goto done; } diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index fbaf606281..97f0e6ba2a 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1406,7 +1406,7 @@ static int gen7_hwsched_dcvs_set(struct adreno_device *adreno_dev, * dispatcher based reset and recovery. */ if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) - gen7_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } if (req.freq != INVALID_DCVS_IDX) @@ -1572,7 +1572,7 @@ void gen7_hwsched_handle_watchdog(struct adreno_device *adreno_dev) dev_err_ratelimited(&gmu->pdev->dev, "GMU watchdog expired interrupt received\n"); - gen7_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } static void gen7_hwsched_drain_ctxt_unregister(struct adreno_device *adreno_dev) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 21a0d3d074..ef6a00ffb2 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -908,7 +908,7 @@ static void gen7_process_syncobj_query_work(struct kthread_work *work) dev_err(&gmu->pdev->dev, "Missing sync object ctx:%d ts:%d retired:%d\n", context->id, cmd->sync_obj_ts, hdr->sync_obj_ts); gmu_core_fault_snapshot(device); - gen7_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } } @@ -1366,7 +1366,7 @@ static irqreturn_t gen7_hwsched_hfi_handler(int irq, void *data) dev_err_ratelimited(&gmu->pdev->dev, "GMU CM3 fault interrupt received\n"); - gen7_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } /* Ignore OOB bits */ @@ -2947,7 +2947,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, context->id, ret); if (device->gmu_fault) - gen7_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); return ret; } @@ -2959,7 +2959,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, context->id, ret); if (device->gmu_fault) - gen7_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); return ret; } @@ -4021,7 +4021,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, * Make sure we send all fences from this context to the TxQueue after recovery */ move_detached_context_hardware_fences(adreno_dev, drawctxt); - gen7_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); goto done; } diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index c6f5b35646..db1426896a 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1375,7 +1375,7 @@ static int gen8_hwsched_dcvs_set(struct adreno_device *adreno_dev, * dispatcher based reset and recovery. */ if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) - gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } if (req.freq != INVALID_DCVS_IDX) @@ -1537,7 +1537,7 @@ void gen8_hwsched_handle_watchdog(struct adreno_device *adreno_dev) dev_err_ratelimited(&gmu->pdev->dev, "GMU watchdog expired interrupt received\n"); - gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } static void gen8_hwsched_drain_ctxt_unregister(struct adreno_device *adreno_dev) diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 1af4ea8ce5..106e4d7fef 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -921,7 +921,7 @@ static void gen8_process_syncobj_query_work(struct kthread_work *work) dev_err(&gmu->pdev->dev, "Missing sync object ctx:%d ts:%d retired:%d\n", context->id, cmd->sync_obj_ts, hdr->sync_obj_ts); gmu_core_fault_snapshot(device); - gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } } @@ -1379,7 +1379,7 @@ static irqreturn_t gen8_hwsched_hfi_handler(int irq, void *data) dev_err_ratelimited(&gmu->pdev->dev, "GMU CM3 fault interrupt received\n"); - gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } /* Ignore OOB bits */ @@ -2954,7 +2954,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, context->id, ret); if (device->gmu_fault) - gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); return ret; } @@ -2966,7 +2966,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, context->id, ret); if (device->gmu_fault) - gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); return ret; } @@ -4020,7 +4020,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, * Make sure we send all fences from this context to the TxQueue after recovery */ move_detached_context_hardware_fences(adreno_dev, drawctxt); - gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); goto done; } From ae3c12de4b16b5e6aa75e2ea051b38d9db6be982 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 3 Jan 2024 23:37:06 +0530 Subject: [PATCH 0627/1016] kgsl: gen8: Add non context register list for gen8_4_0 GPU Value of RBBM_GBIF_CLIENT_QOS_CNTL is different for gen8_4_0 GPU as compared to gen8_0_0 GPU. Thus, use separate non-context register list for gen8_4_0. Change-Id: I012a2f363c4f0a34c5f3d41be872dd4397ae3cf5 Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 63 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 64289bce75..12e3b03011 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #define ANY_ID (~0) @@ -2731,6 +2731,65 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .bcl_data = 1, }; +/* GEN8_4_0 noncontext register list */ +static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { + { GEN8_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_CNTLT, 0xf0004000, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_MASKL_0, 0x00000003, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0xffffffff, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0xffffffff, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_BYTEL_0, 0x00000008, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_BYTEL_1, 0x76543210, BIT(PIPE_NONE) }, + { GEN8_GRAS_DBG_ECO_CNTL, 0x00000800, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_AUTO_VERTEX_STRIDE, 0x00000001, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_VIS_STREAM_CNTL, 0x10010000, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1, 0x00000002, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_CHICKEN_BITS_1, 0x00000003, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_CHICKEN_BITS_2, 0x00000200, BIT(PIPE_BR) | BIT(PIPE_BV) }, + /* Disable BR throttling */ + { GEN8_PC_CHICKEN_BITS_3, 0x00400000, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_CHICKEN_BITS_4, 0x00500050, BIT(PIPE_BR) | BIT(PIPE_BV) }, + /* Configure GBIF GX registers */ + { GEN8_UCHE_GBIF_GX_CONFIG, 0x010240e0, BIT(PIPE_NONE) }, + /* Reset value of RBBM_GBIF_CLIENT_QOS_CNTL is 0. So, no need to program explicitly. */ + /* Enable full concurrent resolve and unresolves */ + { GEN8_RB_CCU_CNTL, 0x00000068, BIT(PIPE_BR) }, + { GEN8_RB_GC_GMEM_PROTECT, 0x0c000000, BIT(PIPE_BR) }, + /* Configure number of outstanding transactions to 32 */ + { GEN8_RB_RESOLVE_PREFETCH_CNTL, 0x00000007, BIT(PIPE_BR) }, + /* Disable ubwc merged UFC request feature */ + { GEN8_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) }, + { GEN8_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + { GEN8_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + /* Limit the number of wave-slots for Eviction buffer to 1 per ALU GRP */ + { GEN8_SP_CHICKEN_BITS_1, BIT(26), BIT(PIPE_NONE) }, + { GEN8_SP_CHICKEN_BITS_2, 0x00800000, BIT(PIPE_NONE) }, + { GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, + { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, + { GEN8_SP_READ_SEL, 0x0001ff00, BIT(PIPE_NONE) }, + { GEN8_TPL1_DBG_ECO_CNTL, 0x10000000, BIT(PIPE_NONE) }, + /* Enable cubemap small miplevel optimization settings */ + { GEN8_TPL1_DBG_ECO_CNTL1, 0x00000724, BIT(PIPE_NONE) }, + /* Disable tag bank id hashing */ + { GEN8_UCHE_MODE_CNTL, 0x000a0000, BIT(PIPE_NONE) }, + { GEN8_UCHE_CCHE_MODE_CNTL, 0x00001000, BIT(PIPE_NONE) }, + /* Limit gmem number of ways for GMEM requests in each set */ + { GEN8_UCHE_CCHE_CACHE_WAYS, 0x00000800, BIT(PIPE_NONE)}, + { GEN8_UCHE_CACHE_WAYS, 0x00080000, BIT(PIPE_NONE) }, + /* Configure UCHE to CCU switchthreshold timeout cycles */ + { GEN8_UCHE_VARB_IDLE_TIMEOUT, 0x00000020, BIT(PIPE_NONE) }, + { GEN8_VFD_DBG_ECO_CNTL, 0x00008000, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VFD_CB_BV_THRESHOLD, 0x00500050, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VFD_CB_BR_THRESHOLD, 0x00600060, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VFD_CB_BUSY_REQ_CNT, 0x00200020, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VFD_CB_LP_REQ_CNT, 0x00000020, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VPC_FLATSHADE_MODE_CNTL, 0x00000001, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VSC_BIN_SIZE, 0x00010001, BIT(PIPE_NONE) }, + /* Disable redundant tile data optimization */ + { GEN8_VSC_KMD_DBG_ECO_CNTL, BIT(11), BIT(PIPE_NONE)}, + { 0 }, +}; + static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .base = { DEFINE_ADRENO_REV(ADRENO_REV_GEN8_4_0, @@ -2755,7 +2814,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .gbif_count = ARRAY_SIZE(gen8_0_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen8_0_0_protected_regs, - .nonctxt_regs = gen8_0_0_nonctxt_regs, + .nonctxt_regs = gen8_4_0_nonctxt_regs, .highest_bank_bit = 16, .gmu_hub_clk_freq = 200000000, .gen8_snapshot_block_list = &gen8_0_0_snapshot_block_list, From a23d029719c7a174c90b2be7f129e9ae2a7ab9cd Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 12 Jan 2024 23:21:03 +0530 Subject: [PATCH 0628/1016] kgsl: gen8: Add STCHE to UCHE clients list A new UCHE client named STCHE is added in gen8. Thus, update the UCHE clients list. Change-Id: I931009fca689e91f5706b4e93b889e29198792ca Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 70cc3244de..470e984522 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1521,7 +1521,8 @@ static const char *const uche_client[] = { "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", - "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP" + "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP", + "STCHE", }; static const char *const uche_lpac_client[] = { From 4131ef0ad2d38cb9a438756de94bf9fca0d51f0e Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Tue, 2 Jan 2024 19:39:16 +0530 Subject: [PATCH 0629/1016] kgsl: Prevent wrapped around VA range allocation Sanitize size param in gpumem_alloc_vbo_entry() to avoid integer overflow in _get_unmapped_area(). This overflow may end up in allocating a wrapped-around VA range, which can overlap with already in use VA. Change-Id: I5b4e74ce8c8e8e3323822efe40abf1f355fd3bc3 Signed-off-by: Pankaj Gupta --- kgsl.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/kgsl.c b/kgsl.c index c142097789..1ed26ae85f 100644 --- a/kgsl.c +++ b/kgsl.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -4097,6 +4097,14 @@ static u64 cap_alignment(struct kgsl_device *device, u64 flags) return flags | FIELD_PREP(KGSL_MEMALIGN_MASK, align); } +static u64 gpumem_max_va_size(struct kgsl_pagetable *pt, u64 flags) +{ + if (flags & KGSL_MEMFLAGS_FORCE_32BIT) + return pt->compat_va_end - pt->compat_va_start; + + return pt->va_end - pt->va_start; +} + static struct kgsl_mem_entry * gpumem_alloc_vbo_entry(struct kgsl_device_private *dev_priv, u64 size, u64 flags) @@ -4105,11 +4113,9 @@ gpumem_alloc_vbo_entry(struct kgsl_device_private *dev_priv, struct kgsl_device *device = dev_priv->device; struct kgsl_memdesc *memdesc; struct kgsl_mem_entry *entry; + struct kgsl_pagetable *pt; int ret; - if (!size) - return ERR_PTR(-EINVAL); - /* Disallow specific flags */ if (flags & (KGSL_MEMFLAGS_GPUREADONLY | KGSL_CACHEMODE_MASK)) return ERR_PTR(-EINVAL); @@ -4128,6 +4134,12 @@ gpumem_alloc_vbo_entry(struct kgsl_device_private *dev_priv, if ((flags & KGSL_MEMFLAGS_SECURE) && !check_and_warn_secured(device)) return ERR_PTR(-EOPNOTSUPP); + pt = (flags & KGSL_MEMFLAGS_SECURE) ? + device->mmu.securepagetable : private->pagetable; + + if (!size || (size > gpumem_max_va_size(pt, flags))) + return ERR_PTR(-EINVAL); + flags = cap_alignment(device, flags); entry = kgsl_mem_entry_create(); From cf287a02f748ceb44c09a93f3b69900639b3c8f2 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 6 Jan 2024 17:14:47 +0530 Subject: [PATCH 0630/1016] kgsl: Increase size of global address range Size of preemption record global buffer has increased from ~16MB in gen7 to ~55MB in gen8. Thus, increase the size of global address range from 20MB to 60MB. Change-Id: I68982e07f58f4e400fd4614b271f9fd09c841253 Signed-off-by: Kamal Agrawal --- kgsl_iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kgsl_iommu.h b/kgsl_iommu.h index b6f8d362bf..4f70ef5c34 100644 --- a/kgsl_iommu.h +++ b/kgsl_iommu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_IOMMU_H #define __KGSL_IOMMU_H @@ -12,7 +12,7 @@ * These defines control the address range for allocations that * are mapped into all pagetables. */ -#define KGSL_IOMMU_GLOBAL_MEM_SIZE (20 * SZ_1M) +#define KGSL_IOMMU_GLOBAL_MEM_SIZE (60 * SZ_1M) #define KGSL_IOMMU_GLOBAL_MEM_BASE32 0xf8000000 #define KGSL_IOMMU_GLOBAL_MEM_BASE64 \ (KGSL_MEMSTORE_TOKEN_ADDRESS - KGSL_IOMMU_GLOBAL_MEM_SIZE) From dec46e88db598fbead21f6ef262d3d70865ed581 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 19 Jan 2024 08:38:17 -0800 Subject: [PATCH 0631/1016] msm: kgsl: Keep the timeline fence valid for logging The timeline fence needs to remain valid for logging purposes. Take an extra refcount on the timeline dma_fence to make sure it doesn't go away till we're done with it. Change-Id: I6670ef7add099a72684c1fe20ed009dff85d1f27 Signed-off-by: Lynus Vaz --- kgsl_drawobj.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kgsl_drawobj.c b/kgsl_drawobj.c index 82233b30bf..ab98fa2170 100644 --- a/kgsl_drawobj.c +++ b/kgsl_drawobj.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ /* @@ -588,6 +588,8 @@ static int drawobj_add_sync_timeline(struct kgsl_device *device, /* Set pending flag before adding callback to avoid race */ set_bit(event->id, &syncobj->pending); + /* Get a dma_fence refcount to hand over to the callback */ + dma_fence_get(event->fence); ret = dma_fence_add_callback(event->fence, &event->cb, drawobj_sync_timeline_fence_callback); @@ -602,11 +604,16 @@ static int drawobj_add_sync_timeline(struct kgsl_device *device, ret = 0; } + /* Put the refcount from fence creation */ + dma_fence_put(event->fence); kgsl_drawobj_put(drawobj); return ret; } trace_syncpoint_timeline_fence(event->syncobj, event->fence, false); + + /* Put the refcount from fence creation */ + dma_fence_put(event->fence); return 0; } From 7123a05cddc5f5e0dbae417f945bded1ea0b2381 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Fri, 12 Jan 2024 11:17:07 -0800 Subject: [PATCH 0632/1016] kgsl: gen8: Update the reset value of VFD_CB_LP_REQ_CNT for gen8_0_0 VFD_CB_LP_REQ_CNT holds a buffer reservation value which was insufficient to allow out of order execution (between BR and BV) of one full wave. Hence increase this buffer size to 16 to ensure switch between BR and BV is allowed only when VFD has enough space to execute at least 1 full wave. Change-Id: Ia70370b7706df58848957b47d3f5d01a21291ffa Signed-off-by: Urvashi Agrawal --- adreno-gpulist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 64289bce75..00bd11ed3f 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #define ANY_ID (~0) @@ -2631,7 +2631,7 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { { GEN8_VFD_CB_BV_THRESHOLD, 0x00500050, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_VFD_CB_BR_THRESHOLD, 0x00600060, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_VFD_CB_BUSY_REQ_CNT, 0x00200020, BIT(PIPE_BR) | BIT(PIPE_BV) }, - { GEN8_VFD_CB_LP_REQ_CNT, 0x00000020, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VFD_CB_LP_REQ_CNT, 0x00100020, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_VPC_FLATSHADE_MODE_CNTL, 0x00000001, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_VSC_BIN_SIZE, 0x00010001, BIT(PIPE_NONE) }, /* Disable redundant tile data optimization */ From 669ceb2b4e3d099ca463363c62d852957faff871 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Thu, 4 Jan 2024 13:41:41 -0800 Subject: [PATCH 0633/1016] msm: kgsl: Remove some SP registers from snapshot for gen8_0_0 Some SP registers under the HLSQ_DP_STR location are already included in the HLSQ_DP location, so we do not need to dump them again with a new location tag. Remove these from the snapshot. Change-Id: I6cc8da349383e42709c81114e9bf615e15def369 Signed-off-by: Urvashi Agrawal --- adreno_gen8_0_0_snapshot.h | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index 479935ed34..d413b08826 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_GEN8_0_0_SNAPSHOT_H @@ -676,21 +676,6 @@ static const u32 gen8_0_0_non_context_sp_pipe_br_usptp_registers[] = { }; static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_br_usptp_registers), 8)); -/* - * Block : ['SP'] - * REGION : UNSLICE - * Pipeline: PIPE_NONE - * Cluster : CLUSTER_NONE - * Location: HLSQ_DP_STR - * pairs : 5 (Regs:18) - */ -static const u32 gen8_0_0_non_context_sp_pipe_br_hlsq_dp_str_registers[] = { - 0x0ae05, 0x0ae05, 0x0ae60, 0x0ae65, 0x0ae6b, 0x0ae6c, 0x0ae73, 0x0ae75, - 0x0aec0, 0x0aec5, - UINT_MAX, UINT_MAX, -}; -static_assert(IS_ALIGNED(sizeof(gen8_0_0_non_context_sp_pipe_br_hlsq_dp_str_registers), 8)); - /* * Block : ['TPL1'] * REGION : UNSLICE @@ -1672,8 +1657,6 @@ static struct gen8_sptp_cluster_registers gen8_0_0_sptp_clusters[] = { gen8_0_0_non_context_sp_pipe_br_sp_top_registers, 0xae00}, { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_BR, 0, USPTP, gen8_0_0_non_context_sp_pipe_br_usptp_registers, 0xae00}, - { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_BR, 0, HLSQ_DP_STR, - gen8_0_0_non_context_sp_pipe_br_hlsq_dp_str_registers, 0xae00}, { CLUSTER_NONE, UNSLICE, 2, 2, TP0_NCTX_REG, PIPE_BR, 0, USPTP, gen8_0_0_non_context_tpl1_pipe_br_usptp_registers, 0xb600}, { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, From 8f9074d42f5576fd4cbb179535bb039472542408 Mon Sep 17 00:00:00 2001 From: NISARG SHETH Date: Fri, 5 Jan 2024 11:53:34 +0530 Subject: [PATCH 0634/1016] kgsl: gen8: Enable LPAC and AQE features for gen8_4_0 Enable LPAC and AQE feature for gen8_4_0 GPU to support additional graphics functionality. Change-Id: I66887d0508ca4b74d08921e1c5d2ac0eded25626 Signed-off-by: NISARG SHETH --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 12e3b03011..73d1255b8a 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2796,7 +2796,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), .compatible = "qcom,adreno-gpu-gen8-4-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | - ADRENO_CONTENT_PROTECTION, + ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, From 9f8f4005bae480c548ec718c138435faaef08c1f Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Wed, 1 Nov 2023 14:06:51 -0600 Subject: [PATCH 0635/1016] kgsl: gen8: Add support for non context overrides Allow overriding specific non context registers through debugfs. Example usage of the non context overrides i) To overwrite noncontext register: echo ":" > /sys/kernel/debug/kgsl/kgsl-3d0/nc_override" ii) To view overridden noncontext register list cat /sys/kernel/debug/kgsl/kgsl-3d0/nc_override Change-Id: I21589bc15e1757c60c35c3fe52da7d6869aeee75 Signed-off-by: Carter Cooper Signed-off-by: Hareesh Gundu --- adreno_gen8.c | 241 +++++++++++++++++++++++++++++++++++++++++++++++++- adreno_gen8.h | 29 +++++- gen8_reg.h | 10 ++- 3 files changed, 277 insertions(+), 3 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 70cc3244de..89900419f8 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -159,6 +159,48 @@ static const struct gen8_pwrup_extlist gen8_0_0_pwrup_extlist[] = { { GEN8_VFD_DBG_ECO_CNTL, BIT(PIPE_BR) | BIT(PIPE_BV)}, }; +struct gen8_nonctxt_overrides gen8_nc_overrides[] = { + { GEN8_UCHE_MODE_CNTL, BIT(PIPE_NONE), 0, 0, 0, }, + { GEN8_UCHE_CACHE_WAYS, BIT(PIPE_NONE), 0, 0, 0, }, + { GEN8_UCHE_CLIENT_PF, BIT(PIPE_NONE), 0, 0, 0, }, + { GEN8_UCHE_DBG_ECO_CNTL_0, BIT(PIPE_NONE), 0, 0, 2, }, + { GEN8_UCHE_HW_DBG_CNTL, BIT(PIPE_NONE), 0, 0, 2, }, + { GEN8_UCHE_CCHE_HW_DBG_CNTL, BIT(PIPE_NONE), 0, 0, 2, }, + { GEN8_RB_DBG_ECO_CNTL, BIT(PIPE_BR), 0, 0, 3, }, + { GEN8_RB_CCU_DBG_ECO_CNTL, BIT(PIPE_BR), 0, 0, 3, }, + { GEN8_RB_CCU_CNTL, BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_RB_CCU_NC_MODE_CNTL, BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_RB_SLICE_UFC_PREFETCH_CNTL, BIT(PIPE_BR), 0, 0, 3, }, + { GEN8_RB_SLICE_UFC_DBG_CNTL, BIT(PIPE_BR), 0, 0, 3, }, + { GEN8_RB_CMP_NC_MODE_CNTL, BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_RB_RESOLVE_PREFETCH_CNTL, BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_RB_CMP_DBG_ECO_CNTL, BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_RB_UFC_DBG_CNTL, BIT(PIPE_BR), 0, 0, 3, }, + { GEN8_PC_CHICKEN_BITS_1, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_PC_CHICKEN_BITS_2, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_PC_CHICKEN_BITS_3, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_PC_CHICKEN_BITS_4, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_PC_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 3, }, + { GEN8_VFD_CB_LP_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, }, + { GEN8_VFD_CB_BUSY_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, }, + { GEN8_VFD_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, }, + { GEN8_SP_DBG_ECO_CNTL, BIT(PIPE_NONE), 0, 0, 1, }, + { GEN8_SP_NC_MODE_CNTL, BIT(PIPE_NONE), 0, 0, 0, }, + { GEN8_SP_CHICKEN_BITS, BIT(PIPE_NONE), 0, 0, 1, }, + { GEN8_SP_NC_MODE_CNTL_2, BIT(PIPE_NONE), 0, 0, 1, }, + { GEN8_SP_CHICKEN_BITS_1, BIT(PIPE_NONE), 0, 0, 0, }, + { GEN8_SP_CHICKEN_BITS_2, BIT(PIPE_NONE), 0, 0, 0, }, + { GEN8_SP_CHICKEN_BITS_3, BIT(PIPE_NONE), 0, 0, 0, }, + { GEN8_SP_CHICKEN_BITS_4, BIT(PIPE_NONE), 0, 0, 1, }, + { GEN8_SP_DISPATCH_CNTL, BIT(PIPE_NONE), 0, 0, 1, }, + { GEN8_SP_HLSQ_DBG_ECO_CNTL, BIT(PIPE_NONE), 0, 0, 1, }, + { GEN8_SP_DBG_CNTL, BIT(PIPE_NONE), 0, 0, 1, }, + { GEN8_TPL1_NC_MODE_CNTL, BIT(PIPE_NONE), 0, 0, 1, }, + { GEN8_TPL1_DBG_ECO_CNTL, BIT(PIPE_NONE), 0, 0, 0, }, + { GEN8_TPL1_DBG_ECO_CNTL1, BIT(PIPE_NONE), 0, 0, 0, }, + { 0 } +}; + static int acd_calibrate_set(void *data, u64 val) { struct kgsl_device *device = data; @@ -196,6 +238,121 @@ static int acd_calibrate_get(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(acd_cal_fops, acd_calibrate_get, acd_calibrate_set, "%llu\n"); +static ssize_t nc_override_get(struct file *filep, + char __user *user_buf, size_t len, loff_t *off) +{ + struct kgsl_device *device = (struct kgsl_device *) filep->private_data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_device *gen8_dev = container_of(adreno_dev, + struct gen8_device, adreno_dev); + struct gen8_nonctxt_overrides *nc_overrides = gen8_dev->nc_overrides; + u32 i, max_size = PAGE_SIZE; + char *buf, *pos; + ssize_t size = 0; + + if (!gen8_dev->nc_overrides_enabled || !nc_overrides) + return 0; + + buf = kzalloc(max_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + pos = buf; + + mutex_lock(&gen8_dev->nc_mutex); + /* Copy all assignments from list to str */ + for (i = 0; nc_overrides[i].offset; i++) { + if (nc_overrides[i].set) { + len = scnprintf(pos, max_size, "0x%x:0x%8.8x\n", + nc_overrides[i].offset, nc_overrides[i].val); + /* If we run out of space len will be zero */ + if (len == 0) + break; + max_size -= len; + pos += len; + } + } + mutex_unlock(&gen8_dev->nc_mutex); + + size = simple_read_from_buffer(user_buf, len, off, buf, pos - buf); + + kfree(buf); + return size; +} + +static void nc_override_cb(struct adreno_device *adreno_dev, void *priv) +{ + struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); + + gen8_dev->nc_overrides_enabled = true; +} + +static ssize_t nc_override_set(struct file *filep, + const char __user *user_buf, size_t len, loff_t *off) +{ + struct kgsl_device *device = (struct kgsl_device *) filep->private_data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); + struct gen8_nonctxt_overrides *nc_overrides = gen8_dev->nc_overrides; + u32 i, offset, val; + int ret = -EINVAL; + ssize_t size = 0; + char *buf; + + if (!nc_overrides) + return 0; + + if ((len >= PAGE_SIZE) || (len == 0)) + return -EINVAL; + + buf = kzalloc(len + 1, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + + if (copy_from_user(buf, user_buf, len)) { + ret = -EFAULT; + goto err; + } + + /* For sanity and parsing, ensure it is null terminated */ + buf[len] = '\0'; + + size = sscanf(buf, "0x%x:0x%x", &offset, &val); + if (size == 0) + goto err; + + size = 0; + + mutex_lock(&gen8_dev->nc_mutex); + for (i = 0; nc_overrides[i].offset; i++) { + if (nc_overrides[i].offset == offset) { + nc_overrides[i].val = val; + nc_overrides[i].set = true; + size = len; + break; + } + } + mutex_unlock(&gen8_dev->nc_mutex); + + if (size > 0) { + ret = adreno_power_cycle(ADRENO_DEVICE(device), nc_override_cb, NULL); + if (!ret) + ret = size; + } + +err: + kfree(buf); + return ret; +} + +static const struct file_operations nc_override_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = nc_override_get, + .write = nc_override_set, + .llseek = noop_llseek, +}; + void gen8_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds) { u32 i = 0, mask = 0; @@ -299,6 +456,8 @@ int gen8_fenced_write(struct adreno_device *adreno_dev, u32 offset, int gen8_init(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen8_device *gen8_dev = container_of(adreno_dev, + struct gen8_device, adreno_dev); const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); u64 freq = gen8_core->gmu_hub_clk_freq; @@ -315,6 +474,12 @@ int gen8_init(struct adreno_device *adreno_dev) gen8_crashdump_init(adreno_dev); + gen8_dev->nc_overrides = gen8_nc_overrides; + mutex_init(&gen8_dev->nc_mutex); + + /* Debugfs node for noncontext registers override */ + debugfs_create_file("nc_override", 0644, device->d_debugfs, device, &nc_override_fops); + return adreno_allocate_global(device, &adreno_dev->pwrup_reglist, PAGE_SIZE, 0, 0, KGSL_MEMDESC_PRIVILEGED, "powerup_register_list"); @@ -515,6 +680,8 @@ static void gen8_nonctxt_regconfig(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); const struct gen8_nonctxt_regs *regs = gen8_core->nonctxt_regs; + struct gen8_device *gen8_dev = container_of(adreno_dev, + struct gen8_device, adreno_dev); u32 i, pipe_id; unsigned long pipe; @@ -535,6 +702,32 @@ static void gen8_nonctxt_regconfig(struct adreno_device *adreno_dev) } } + /* Program non context registers overrides for all pipes */ + if (gen8_dev->nc_overrides_enabled) { + struct gen8_nonctxt_overrides *nc_overrides = gen8_dev->nc_overrides; + + mutex_lock(&gen8_dev->nc_mutex); + for (pipe_id = PIPE_NONE; pipe_id <= PIPE_AQE1; pipe_id++) { + + if ((pipe_id == PIPE_LPAC) && !ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) + continue; + else if (((pipe_id == PIPE_AQE0) || (pipe_id == PIPE_AQE1)) && + !ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) + continue; + + for (i = 0; nc_overrides[i].offset; i++) { + if (!nc_overrides[i].set) + continue; + + pipe = (unsigned long)nc_overrides[i].pipelines; + if (test_bit(pipe_id, &pipe)) + gen8_regwrite_aperture(device, nc_overrides[i].offset, + nc_overrides[i].val, pipe_id, 0, 0); + } + } + mutex_unlock(&gen8_dev->nc_mutex); + } + /* Clear aperture register */ gen8_host_aperture_set(adreno_dev, 0, 0, 0); } @@ -592,6 +785,7 @@ static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev) struct cpu_gpu_lock *lock = ptr; u32 items = 0, i, j, pipe_id; u32 *dest = ptr + sizeof(*lock); + struct gen8_nonctxt_overrides *nc_overrides = gen8_dev->nc_overrides; /* Static IFPC restore only registers */ reglist[items].regs = gen8_ifpc_pwrup_reglist; @@ -616,6 +810,29 @@ static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev) *dest++ = r[j]; kgsl_regread(device, r[j], dest++); } + + mutex_lock(&gen8_dev->nc_mutex); + for (j = 0; j < nc_overrides[j].offset; j++) { + unsigned long pipe = (unsigned long)nc_overrides[j].pipelines; + + if (!(test_bit(PIPE_NONE, &pipe) && nc_overrides[j].set && + nc_overrides[j].list_type)) + continue; + + if ((reglist[i].regs == gen8_ifpc_pwrup_reglist) && + (nc_overrides[j].list_type == 1)) { + *dest++ = nc_overrides[j].offset; + kgsl_regread(device, nc_overrides[j].offset, dest++); + lock->ifpc_list_len++; + } else if ((reglist[i].regs == gen8_pwrup_reglist) && + (nc_overrides[j].list_type == 2)) { + *dest++ = nc_overrides[j].offset; + kgsl_regread(device, nc_overrides[j].offset, dest++); + lock->preemption_list_len++; + } + } + mutex_unlock(&gen8_dev->nc_mutex); + } /* @@ -660,6 +877,28 @@ static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev) } } + /* + * Write noncontext override pipe specific regs (
- triplets) + * offset and the current value into GPU buffer + */ + mutex_lock(&gen8_dev->nc_mutex); + for (pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) { + for (i = 0; i < nc_overrides[i].offset; i++) { + unsigned long pipe = (unsigned long)nc_overrides[i].pipelines; + + if (!(test_bit(pipe_id, &pipe) && nc_overrides[i].set && + nc_overrides[i].list_type)) + continue; + + *dest++ = FIELD_PREP(GENMASK(15, 12), pipe_id); + *dest++ = nc_overrides[i].offset; + gen8_regread_aperture(device, nc_overrides[i].offset, + dest++, pipe_id, 0, 0); + gen8_dev->ext_pwrup_list_len++; + } + } + mutex_unlock(&gen8_dev->nc_mutex); + lock->dynamic_list_len = gen8_dev->ext_pwrup_list_len; } diff --git a/adreno_gen8.h b/adreno_gen8.h index 9aeff7d805..08c4e41859 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ADRENO_GEN8_H_ @@ -29,6 +29,24 @@ struct gen8_gpudev { extern const struct gen8_gpudev adreno_gen8_gmu_gpudev; extern const struct gen8_gpudev adreno_gen8_hwsched_gpudev; +struct gen8_nonctxt_overrides { + /** offset: Dword offset of the register to write */ + u32 offset; + /** pipelines: Pipelines to write */ + u32 pipelines; + /** val: Value to be written to the register */ + u32 val; + /** set: True for user override request */ + bool set; + /** + * list_type: 0 If the register already present in any of exisiting static pwrup list + 1 if the register fits into IFPC static pwrup only list + 2 if the register fits into IFPC + preemption static list + 3 if the register fits into external powerup list + */ + u32 list_type; +}; + /** * struct gen8_device - Container for the gen8_device */ @@ -41,6 +59,15 @@ struct gen8_device { u32 aperture; /** @ext_pwrup_list_len: External pwrup reglist length */ u16 ext_pwrup_list_len; + /** + * @nc_overrides: Noncontext registers overrides whitelist if defined, + * must be null terminated + */ + struct gen8_nonctxt_overrides *nc_overrides; + /** @nc_mutex: Mutex to protect nc_overrides updates */ + struct mutex nc_mutex; + /** @nc_overrides_enabled: Set through debugfs path when any override is enabled */ + bool nc_overrides_enabled; }; /** diff --git a/gen8_reg.h b/gen8_reg.h index 1985e758a6..584c138046 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _GEN8_REG_H @@ -956,6 +956,8 @@ #define GEN8_GRAS_PERFCTR_LRZ_SEL_3 0x8733 /* RB registers */ +#define GEN8_RB_DBG_ECO_CNTL 0x8e04 +#define GEN8_RB_CCU_DBG_ECO_CNTL 0x8e06 #define GEN8_RB_CCU_CNTL 0x8e07 #define GEN8_RB_CCU_NC_MODE_CNTL 0x8e08 #define GEN8_RB_GC_GMEM_PROTECT 0x8e09 @@ -976,6 +978,8 @@ #define GEN8_RB_SUB_BLOCK_SEL_CNTL_HOST 0x8e3b #define GEN8_RB_SUB_BLOCK_SEL_CNTL_CD 0x8e3d #define GEN8_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE 0x8e50 +#define GEN8_RB_SLICE_UFC_PREFETCH_CNTL 0x8e77 +#define GEN8_RB_SLICE_UFC_DBG_CNTL 0x8e78 #define GEN8_RB_CMP_NC_MODE_CNTL 0x8f00 #define GEN8_RB_RESOLVE_PREFETCH_CNTL 0x8f01 #define GEN8_RB_CMP_DBG_ECO_CNTL 0x8f02 @@ -990,6 +994,7 @@ #define GEN8_RB_PERFCTR_UFC_SEL_3 0x8f13 #define GEN8_RB_PERFCTR_UFC_SEL_4 0x8f14 #define GEN8_RB_PERFCTR_UFC_SEL_5 0x8f15 +#define GEN8_RB_UFC_DBG_CNTL 0x8f29 /* VPC registers */ #define GEN8_VPC_PERFCTR_VPC_SEL_2_0 0x9670 @@ -1055,6 +1060,8 @@ #define GEN8_PC_PERFCTR_PC_SEL_14 0x9e3e #define GEN8_PC_PERFCTR_PC_SEL_15 0x9e3f #define GEN8_PC_CHICKEN_BITS_1 0x9e50 +#define GEN8_PC_DBG_ECO_CNTL 0x9e53 + #define GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1 0x9e64 #define GEN8_PC_SLICE_PERFCTR_PC_SEL_0 0x9f00 #define GEN8_PC_SLICE_PERFCTR_PC_SEL_1 0x9f01 @@ -1132,6 +1139,7 @@ #define GEN8_SP_ISDB_DEBUG_ADDR_LO 0xae3e #define GEN8_SP_ISDB_DEBUG_ADDR_HI 0xae3f #define GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP 0xae6b +#define GEN8_SP_HLSQ_DBG_ECO_CNTL 0xae6c #define GEN8_SP_READ_SEL 0xae6d #define GEN8_SP_DBG_CNTL 0xae71 From 2b8bf1ca1c89f8924bbeccae38f3525d82778e04 Mon Sep 17 00:00:00 2001 From: NISARG SHETH Date: Fri, 5 Jan 2024 11:58:11 +0530 Subject: [PATCH 0636/1016] kgsl: gen8: Enable warmboot feature for gen8_4_0 GPU Warmboot saves a lot of back and forth hfi to and from GMU while doing slumber transitions. Enable this feature for gen8_4_0 GPU. Change-Id: Ic00ac709d2018c2adbc9b55643d9fb1c7a9f83a7 Signed-off-by: NISARG SHETH --- adreno-gpulist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 73d1255b8a..5162a46dbc 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2796,7 +2796,8 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), .compatible = "qcom,adreno-gpu-gen8-4-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | - ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE, + ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | + ADRENO_GMU_WARMBOOT, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, From 44c83ec5c0e685272126a5b3380d4d83f3ee1d33 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Tue, 19 Dec 2023 08:18:11 -0800 Subject: [PATCH 0637/1016] msm: kgsl: Put VBO child refcount if unmap succeeds If the VBO range cannot be unmapped the underlying physical memory should not be freed. Enforce this by not decrementing the mem entry's refcount. Change-Id: Ia9d12589fbfa849ae7e2bef8e1cdbb9cf8f33364 Signed-off-by: Lynus Vaz --- kgsl_vbo.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/kgsl_vbo.c b/kgsl_vbo.c index 92d3d84c94..a19a01b9f3 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -46,6 +46,12 @@ static struct kgsl_memdesc_bind_range *bind_range_create(u64 start, u64 last, return range; } +static void bind_range_destroy(struct kgsl_memdesc_bind_range *range) +{ + kgsl_mem_entry_put(range->entry); + kfree(range); +} + static u64 bind_range_len(struct kgsl_memdesc_bind_range *range) { return (range->range.last - range->range.start) + 1; @@ -114,8 +120,7 @@ static void kgsl_memdesc_remove_range(struct kgsl_mem_entry *target, kgsl_mmu_map_zero_page_to_range(memdesc->pagetable, memdesc, range->range.start, bind_range_len(range)); - kgsl_mem_entry_put(range->entry); - kfree(range); + bind_range_destroy(range); } } @@ -175,8 +180,7 @@ static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, } } - kgsl_mem_entry_put(cur->entry); - kfree(cur); + bind_range_destroy(cur); continue; } @@ -253,8 +257,7 @@ static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, return ret; error: - kgsl_mem_entry_put(range->entry); - kfree(range); + bind_range_destroy(range); mutex_unlock(&memdesc->ranges_lock); return ret; } @@ -264,6 +267,7 @@ static void kgsl_sharedmem_vbo_put_gpuaddr(struct kgsl_memdesc *memdesc) struct interval_tree_node *node, *next; struct kgsl_memdesc_bind_range *range; int ret = 0; + bool unmap_fail; /* * If the VBO maps the zero range then we can unmap the entire @@ -273,6 +277,8 @@ static void kgsl_sharedmem_vbo_put_gpuaddr(struct kgsl_memdesc *memdesc) ret = kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, 0, memdesc->size); + unmap_fail = ret; + /* * FIXME: do we have a use after free potential here? We might need to * lock this and set a "do not update" bit @@ -294,15 +300,16 @@ static void kgsl_sharedmem_vbo_put_gpuaddr(struct kgsl_memdesc *memdesc) range->range.start, range->range.last - range->range.start + 1); - /* If unmap failed, mark the child memdesc as still mapped */ - if (ret) - range->entry->memdesc.priv |= KGSL_MEMDESC_MAPPED; + /* Put the child's refcount if unmap succeeds */ + if (!ret) + bind_range_destroy(range); + else + kfree(range); - kgsl_mem_entry_put(range->entry); - kfree(range); + unmap_fail = unmap_fail || ret; } - if (ret) + if (unmap_fail) return; /* Put back the GPU address */ From 067e452019d96137286c9ba976222e9ec6ba5c8f Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 12 Jan 2024 11:16:42 -0800 Subject: [PATCH 0638/1016] msm: kgsl: Do not reclaim pages mapped in a VBO Pages mapped into a VBO can be accessible both through the memdesc as well as through the parent VBO. Reclaiming these pages would require a lot of overhead to update iommu mappings. Instead, do not reclaim the pages of a memdesc that is currently mapped in a VBO. Change-Id: Ic2787f09081c5dc3a66c3582b98266937c8ce1e5 Signed-off-by: Lynus Vaz --- kgsl.c | 3 ++- kgsl.h | 4 +++- kgsl_reclaim.c | 9 ++++++++- kgsl_vbo.c | 15 +++++++++++++-- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/kgsl.c b/kgsl.c index c142097789..c2fc48732d 100644 --- a/kgsl.c +++ b/kgsl.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -259,6 +259,7 @@ static struct kgsl_mem_entry *kgsl_mem_entry_create(void) /* put this ref in userspace memory alloc and map ioctls */ kref_get(&entry->refcount); atomic_set(&entry->map_count, 0); + atomic_set(&entry->vbo_count, 0); } return entry; diff --git a/kgsl.h b/kgsl.h index ad10700b32..335f50e6d1 100644 --- a/kgsl.h +++ b/kgsl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_H #define __KGSL_H @@ -362,6 +362,8 @@ struct kgsl_mem_entry { * debugfs accounting */ atomic_t map_count; + /** @vbo_count: Count how many VBO ranges this entry is mapped in */ + atomic_t vbo_count; }; struct kgsl_device_private; diff --git a/kgsl_reclaim.c b/kgsl_reclaim.c index 91713b7604..60823b95b2 100644 --- a/kgsl_reclaim.c +++ b/kgsl_reclaim.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -267,6 +267,13 @@ static u32 kgsl_reclaim_process(struct kgsl_process_private *process, continue; } + /* Do not reclaim pages mapped into a VBO */ + if (atomic_read(&valid_entry->vbo_count)) { + kgsl_mem_entry_put(entry); + next++; + continue; + } + if ((atomic_read(&process->unpinned_page_count) + memdesc->page_count) > kgsl_reclaim_max_page_limit) { kgsl_mem_entry_put(entry); diff --git a/kgsl_vbo.c b/kgsl_vbo.c index a19a01b9f3..bf72c139db 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -43,12 +43,16 @@ static struct kgsl_memdesc_bind_range *bind_range_create(u64 start, u64 last, return ERR_PTR(-EINVAL); } + atomic_inc(&entry->vbo_count); return range; } static void bind_range_destroy(struct kgsl_memdesc_bind_range *range) { - kgsl_mem_entry_put(range->entry); + struct kgsl_mem_entry *entry = range->entry; + + atomic_dec(&entry->vbo_count); + kgsl_mem_entry_put(entry); kfree(range); } @@ -361,8 +365,12 @@ static void kgsl_sharedmem_free_bind_op(struct kgsl_sharedmem_bind_op *op) if (IS_ERR_OR_NULL(op)) return; - for (i = 0; i < op->nr_ops; i++) + for (i = 0; i < op->nr_ops; i++) { + /* Decrement the vbo_count we added when creating the bind_op */ + if (op->ops[i].entry) + atomic_dec(&op->ops[i].entry->vbo_count); kgsl_mem_entry_put(op->ops[i].entry); + } kgsl_mem_entry_put(op->target); @@ -468,6 +476,9 @@ kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, goto err; } + /* Keep the child pinned in memory */ + atomic_inc(&entry->vbo_count); + /* Make sure the child is not a VBO */ if ((entry->memdesc.flags & KGSL_MEMFLAGS_VBO)) { ret = -EINVAL; From a05f37ff54077259da42151001c461a000c6b24c Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Tue, 16 Jan 2024 16:58:01 -0700 Subject: [PATCH 0639/1016] kgsl: hwfence: Take detached context refcount When we encounter a failure during context detach, we move the pending hardware fences from this context to a list. After reset is complete, we trigger these fences one by one. As part of freeing these hardware fences, we decrement the drawctxt's hw_fence count. This can cause a use-after-free of the drawctxt structure since this context is already detached and may have been freed. To fix this, take the context refcount for each of these hardware fences in the detach path, and put them back when each of these hardware fences are triggered after recovery. Change-Id: I3077be6b07cc55c77329c06330433d9ece9569c8 Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched.c | 7 ++++++- adreno_gen7_hwsched_hfi.c | 4 +++- adreno_gen8_hwsched.c | 7 ++++++- adreno_gen8_hwsched_hfi.c | 4 +++- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index da13a6078e..acf8cd29b3 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1678,6 +1678,7 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d { struct adreno_hw_fence_entry *entry, *tmp; struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + struct kgsl_context *context = NULL; int ret = 0; list_for_each_entry_safe(entry, tmp, &hfi->detached_hw_fence_list, node) { @@ -1691,7 +1692,11 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d if (ret) return ret; + context = &entry->drawctxt->base; + gen7_remove_hw_fence_entry(adreno_dev, entry); + + kgsl_context_put(context); } return ret; diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 21a0d3d074..41f380b5b6 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -3254,6 +3254,7 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; if ((timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)) { + _kgsl_context_get(&drawctxt->base); list_move_tail(&entry->node, &hfi->detached_hw_fence_list); continue; } @@ -3263,6 +3264,7 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d /* Also grab all the hardware fences which were never sent to GMU */ list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { + _kgsl_context_get(&drawctxt->base); list_move_tail(&entry->node, &hfi->detached_hw_fence_list); } } diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index c6f5b35646..795201c4a9 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1643,6 +1643,7 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d { struct adreno_hw_fence_entry *entry, *tmp; struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct kgsl_context *context = NULL; int ret = 0; list_for_each_entry_safe(entry, tmp, &hfi->detached_hw_fence_list, node) { @@ -1656,7 +1657,11 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d if (ret) return ret; + context = &entry->drawctxt->base; + gen8_remove_hw_fence_entry(adreno_dev, entry); + + kgsl_context_put(context); } return ret; diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 1af4ea8ce5..f9d5f12b5f 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -3257,6 +3257,7 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; if ((timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)) { + _kgsl_context_get(&drawctxt->base); list_move_tail(&entry->node, &hfi->detached_hw_fence_list); continue; } @@ -3266,6 +3267,7 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d /* Also grab all the hardware fences which were never sent to GMU */ list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { + _kgsl_context_get(&drawctxt->base); list_move_tail(&entry->node, &hfi->detached_hw_fence_list); } } From 82c42c6aa0e8ff3d2ae71ae99f741ce771573e38 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Fri, 12 Jan 2024 12:03:58 -0700 Subject: [PATCH 0640/1016] kgsl: hwfence: Call fd_install after creating hw fence Once fd_install is done, userspace can predict the dma fence fd and call close(fd). And say the dma fence get signaled at the same time as well. Then both these operations will reduce the refcount of the dma fence to zero thereby freeing the kfence. This can cause use-after-free of the kfence in the hardware fence creation path. To avoid this, do fd_install after creating hw fence. Change-Id: Ib9c446562ff5199e469c7db9581518fb5a695e3f Signed-off-by: Harshdeep Dhatt --- kgsl_sync.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kgsl_sync.c b/kgsl_sync.c index b4cff282d3..6810860618 100644 --- a/kgsl_sync.c +++ b/kgsl_sync.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012-2019, 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -487,11 +487,12 @@ int kgsl_add_fence_event(struct kgsl_device *device, ret = -EFAULT; goto out; } - fd_install(priv.fence_fd, kfence->sync_file->file); if (!retired) device->ftbl->create_hw_fence(device, kfence); + fd_install(priv.fence_fd, kfence->sync_file->file); + out: kgsl_context_put(context); if (ret) { From ddb0f03444b4e85763ca58fea3b530d686cf6250 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Mon, 8 Jan 2024 14:59:39 -0700 Subject: [PATCH 0641/1016] kgsl: gen8: Remove dispatch queue support gen8 hwsched only supports context queues. So remove dispatch queue support from gen8 files. Change-Id: I8213dfef26ac4e28191f4d341b677613fbf6160a Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched_hfi.c | 9 +-- adreno_gen8_hwsched.c | 5 +- adreno_gen8_hwsched_hfi.c | 162 ++------------------------------------ adreno_hwsched.c | 6 +- 4 files changed, 12 insertions(+), 170 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 41f380b5b6..85e9bb4d69 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -2045,13 +2045,14 @@ done: return pending_ack.results[2]; } -static void _context_queue_enable(struct adreno_device *adreno_dev) +static void _context_queue_hw_fence_enable(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); if (GMU_VER_MINOR(gmu->ver.hfi) >= 3) { if (gen7_hfi_send_get_value(adreno_dev, HFI_VALUE_CONTEXT_QUEUE, 0) == 1) { set_bit(ADRENO_HWSCHED_CONTEXT_QUEUE, &adreno_dev->hwsched.flags); + adreno_hwsched_register_hw_fence(adreno_dev); } } } @@ -2440,10 +2441,8 @@ int gen7_hwsched_hfi_start(struct adreno_device *adreno_dev) * HFI_VALUE_CONTEXT_QUEUE can only be queried after GMU has initialized some of the * required resources as part of handling gen7_hfi_send_core_fw_start() */ - if (!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) { - _context_queue_enable(adreno_dev); - adreno_hwsched_register_hw_fence(adreno_dev); - } + if (!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + _context_queue_hw_fence_enable(adreno_dev); ret = gen7_hfi_send_hw_fence_feature_ctrl(adreno_dev); if (ret) diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 795201c4a9..d82bd88670 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -444,9 +444,6 @@ void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, } - if (!adreno_hwsched_context_queue_enabled(adreno_dev)) - return; - read_lock(&device->context_lock); idr_for_each(&device->context_idr, snapshot_context_queue, snapshot); read_unlock(&device->context_lock); @@ -521,6 +518,8 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); + adreno_hwsched_register_hw_fence(adreno_dev); + ret = gen8_gmu_device_start(adreno_dev); if (ret) goto err; diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index f9d5f12b5f..9717e3ca7b 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -21,7 +21,7 @@ #include "kgsl_trace.h" #include "kgsl_util.h" -#define HFI_QUEUE_MAX (HFI_QUEUE_DEFAULT_CNT + HFI_QUEUE_DISPATCH_MAX_CNT) +#define HFI_QUEUE_MAX (HFI_QUEUE_DEFAULT_CNT) #define DEFINE_QHDR(gmuaddr, id, prio) \ {\ @@ -39,26 +39,6 @@ .write_index = 0, \ } -static struct dq_info { - /** @max_dq: Maximum number of dispatch queues per RB level */ - u32 max_dq; - /** @base_dq_id: Base dqid for level */ - u32 base_dq_id; - /** @offset: Next dqid to use for roundrobin context assignment */ - u32 offset; -} gen8_hfi_dqs[KGSL_PRIORITY_MAX_RB_LEVELS] = { - { 4, 0, }, /* RB0 */ - { 4, 4, }, /* RB1 */ - { 3, 8, }, /* RB2 */ - { 3, 11, }, /* RB3 */ -}, gen8_hfi_dqs_lpac[KGSL_PRIORITY_MAX_RB_LEVELS + 1] = { - { 4, 0, }, /* RB0 */ - { 4, 4, }, /* RB1 */ - { 3, 8, }, /* RB2 */ - { 2, 11, }, /* RB3 */ - { 1, 13, }, /* RB LPAC */ -}; - struct pending_cmd gen8_hw_fence_ack; struct gen8_hwsched_hfi *to_gen8_hwsched_hfi( @@ -1459,25 +1439,6 @@ static void init_queues(struct gen8_hfi *hfi) DEFINE_QHDR(gmuaddr, HFI_CMD_ID, 0), DEFINE_QHDR(gmuaddr, HFI_MSG_ID, 0), DEFINE_QHDR(gmuaddr, HFI_DBG_ID, 0), - /* 4 DQs for RB priority 0 */ - DEFINE_QHDR(gmuaddr, 3, 0), - DEFINE_QHDR(gmuaddr, 4, 0), - DEFINE_QHDR(gmuaddr, 5, 0), - DEFINE_QHDR(gmuaddr, 6, 0), - /* 4 DQs for RB priority 1 */ - DEFINE_QHDR(gmuaddr, 7, 1), - DEFINE_QHDR(gmuaddr, 8, 1), - DEFINE_QHDR(gmuaddr, 9, 1), - DEFINE_QHDR(gmuaddr, 10, 1), - /* 3 DQs for RB priority 2 */ - DEFINE_QHDR(gmuaddr, 11, 2), - DEFINE_QHDR(gmuaddr, 12, 2), - DEFINE_QHDR(gmuaddr, 13, 2), - /* 2 DQs for RB priority 3 */ - DEFINE_QHDR(gmuaddr, 14, 3), - DEFINE_QHDR(gmuaddr, 15, 3), - /* 1 DQ for LPAC RB priority 4 */ - DEFINE_QHDR(gmuaddr, 16, 4), }, }; @@ -2057,16 +2018,6 @@ done: return pending_ack.results[2]; } -static void _context_queue_enable(struct adreno_device *adreno_dev) -{ - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - - if (GMU_VER_MINOR(gmu->ver.hfi) >= 3) { - if (gen8_hfi_send_get_value(adreno_dev, HFI_VALUE_CONTEXT_QUEUE, 0) == 1) - set_bit(ADRENO_HWSCHED_CONTEXT_QUEUE, &adreno_dev->hwsched.flags); - } -} - static int gen8_hfi_send_hw_fence_feature_ctrl(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); @@ -2443,15 +2394,6 @@ int gen8_hwsched_hfi_start(struct adreno_device *adreno_dev) if (ret) goto err; - /* - * HFI_VALUE_CONTEXT_QUEUE can only be queried after GMU has initialized some of the - * required resources as part of handling gen8_hfi_send_core_fw_start() - */ - if (!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) { - _context_queue_enable(adreno_dev); - adreno_hwsched_register_hw_fence(adreno_dev); - } - ret = gen8_hfi_send_hw_fence_feature_ctrl(adreno_dev); if (ret) goto err; @@ -2822,32 +2764,11 @@ static void init_gmu_context_queue(struct adreno_context *drawctxt) hdr->hw_fence_buffer_size = drawctxt->gmu_hw_fence_queue.size; } -static u32 get_dq_id(struct adreno_device *adreno_dev, struct kgsl_context *context) -{ - struct dq_info *info; - u32 next; - u32 priority = adreno_get_level(context); - - if (adreno_dev->lpac_enabled) - info = &gen8_hfi_dqs_lpac[priority]; - else - info = &gen8_hfi_dqs[priority]; - - next = info->base_dq_id + info->offset; - - info->offset = (info->offset + 1) % info->max_dq; - - return next; -} - static int allocate_context_queues(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { int ret = 0; - if (!adreno_hwsched_context_queue_enabled(adreno_dev)) - return 0; - if (test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags) && !drawctxt->gmu_hw_fence_queue.gmuaddr) { ret = gen8_alloc_gmu_kernel_block( @@ -2931,8 +2852,7 @@ static int send_context_pointers(struct adreno_device *adreno_dev, cmd.user_ctxt_record_addr = context->user_ctxt_record->memdesc.gpuaddr; - if (adreno_hwsched_context_queue_enabled(adreno_dev)) - cmd.gmu_context_queue_addr = drawctxt->gmu_context_queue.gmuaddr; + cmd.gmu_context_queue_addr = drawctxt->gmu_context_queue.gmuaddr; return gen8_hfi_send_cmd_async(adreno_dev, &cmd, sizeof(cmd)); } @@ -2972,10 +2892,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, } context->gmu_registered = true; - if (adreno_hwsched_context_queue_enabled(adreno_dev)) - context->gmu_dispatch_queue = UINT_MAX; - else - context->gmu_dispatch_queue = get_dq_id(adreno_dev, context); + context->gmu_dispatch_queue = UINT_MAX; return 0; } @@ -3085,9 +3002,6 @@ done: static u32 get_irq_bit(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { - if (!adreno_hwsched_context_queue_enabled(adreno_dev)) - return drawobj->context->gmu_dispatch_queue; - if (adreno_is_preemption_enabled(adreno_dev)) return adreno_get_level(drawobj->context); @@ -3655,64 +3569,6 @@ static void process_hw_fence_queue(struct adreno_device *adreno_dev, } } -/* Size in below functions are in unit of dwords */ -static int gen8_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 queue_idx, - u32 *msg, u32 size_bytes, struct kgsl_drawobj_cmd *cmdobj, struct adreno_submit_time *time) -{ - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr; - struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; - u32 *queue; - u32 i, write, empty_space; - u32 size_dwords = size_bytes >> 2; - u32 align_size = ALIGN(size_dwords, SZ_4); - u32 id = MSG_HDR_GET_ID(*msg); - - if (hdr->status == HFI_QUEUE_STATUS_DISABLED || !IS_ALIGNED(size_bytes, sizeof(u32))) - return -EINVAL; - - queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx); - - empty_space = (hdr->write_index >= hdr->read_index) ? - (hdr->queue_size - (hdr->write_index - hdr->read_index)) - : (hdr->read_index - hdr->write_index); - - if (empty_space <= align_size) - return -ENOSPC; - - write = hdr->write_index; - - for (i = 0; i < size_dwords; i++) { - queue[write] = msg[i]; - write = (write + 1) % hdr->queue_size; - } - - /* Cookify any non used data at the end of the write buffer */ - for (; i < align_size; i++) { - queue[write] = 0xfafafafa; - write = (write + 1) % hdr->queue_size; - } - - /* Ensure packet is written out before proceeding */ - wmb(); - - gen8_add_profile_events(adreno_dev, cmdobj, time); - - /* - * Put the profiling information in the user profiling buffer. - * The hfi_update_write_idx below has a wmb() before the actual - * write index update to ensure that the GMU does not see the - * packet before the profile data is written out. - */ - adreno_profile_submit_time(time); - - trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg)); - - hfi_update_write_idx(&hdr->write_index, write); - - return 0; -} - int gen8_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { int ret = 0; @@ -3801,13 +3657,8 @@ skipib: seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); - if (adreno_hwsched_context_queue_enabled(adreno_dev)) - ret = gen8_gmu_context_queue_write(adreno_dev, - drawctxt, (u32 *)cmd, cmd_sizebytes, drawobj, &time); - else - ret = gen8_hfi_dispatch_queue_write(adreno_dev, - HFI_DSP_ID_0 + drawobj->context->gmu_dispatch_queue, - (u32 *)cmd, cmd_sizebytes, cmdobj, &time); + ret = gen8_gmu_context_queue_write(adreno_dev, drawctxt, (u32 *)cmd, cmd_sizebytes, drawobj, + &time); if (ret) return ret; @@ -4084,9 +3935,6 @@ u32 gen8_hwsched_preempt_count_get(struct adreno_device *adreno_dev) void gen8_hwsched_context_destroy(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { - if (!adreno_hwsched_context_queue_enabled(adreno_dev)) - return; - if (drawctxt->gmu_context_queue.gmuaddr) gen8_free_gmu_block(to_gen8_gmu(adreno_dev), &drawctxt->gmu_context_queue); diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 14363e859f..bcb4157893 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -2234,10 +2234,6 @@ void adreno_hwsched_register_hw_fence(struct adreno_device *adreno_dev) if (!ADRENO_FEATURE(adreno_dev, ADRENO_HW_FENCE)) return; - /* Enable hardware fences only if context queues are enabled */ - if (!adreno_hwsched_context_queue_enabled(adreno_dev)) - return; - if (test_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags)) return; From a8b15346bdaa4bd4c1d7b4cc0e3fdae915f5380e Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Fri, 15 Dec 2023 10:53:11 -0700 Subject: [PATCH 0642/1016] kgsl: hwsched: Add hw fence shadow table support GMU FW needs to allocate a shadow table to cache external fences that it receives in its rx queue. It will send a mem_alloc request of type MEMKIND_HW_FENCE_SHADOW. The number of entries in this table is derived from a hw-fence dt property and communicated to GMU via VRB. Additionally, the hash index of an input fence must be communicated to GMU via the hfi_syncobj packet. This allows for constant time look up when checking whether an external input fence is signaled or not in the GMU FW. Hardware fence feature requires that GMU has the shadow table support. Hence, check for the GMU FW version when enabling this feature. Change-Id: Ic63500481c484941c4f02942bfe5b2226690c9a4 Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched_hfi.c | 13 +++++++------ adreno_gen8_hwsched.c | 31 ++++++++++++++++++++++++++++++- adreno_gen8_hwsched_hfi.c | 4 +++- adreno_hfi.h | 27 +++++++++++++++++++++++++-- adreno_trace.h | 22 +++++++++++++++++++++- kgsl_gmu_core.c | 10 +++++++++- kgsl_gmu_core.h | 9 +++++++++ kgsl_sync.c | 18 ++++++++++++++---- kgsl_sync.h | 6 +++--- 9 files changed, 121 insertions(+), 19 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 85e9bb4d69..2bc8c10d4f 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -291,7 +291,8 @@ struct syncobj_flags { const char *name; }; -static void _get_syncobj_string(char *str, u32 max_size, struct hfi_syncobj *syncobj, u32 index) +static void _get_syncobj_string(char *str, u32 max_size, struct hfi_syncobj_legacy *syncobj, + u32 index) { u32 count = scnprintf(str, max_size, "syncobj[%d] ctxt_id:%llu seqno:%llu flags:", index, syncobj->ctxt_id, syncobj->seq_no); @@ -315,7 +316,7 @@ static void _get_syncobj_string(char *str, u32 max_size, struct hfi_syncobj *syn static void log_syncobj(struct gen7_gmu_device *gmu, struct hfi_submit_syncobj *cmd) { - struct hfi_syncobj *syncobj = (struct hfi_syncobj *)&cmd[1]; + struct hfi_syncobj_legacy *syncobj = (struct hfi_syncobj_legacy *)&cmd[1]; char str[128]; u32 i = 0; @@ -3092,7 +3093,7 @@ static u32 get_irq_bit(struct adreno_device *adreno_dev, struct kgsl_drawobj *dr return 0; } -static void populate_kgsl_fence(struct hfi_syncobj *obj, +static void populate_kgsl_fence(struct hfi_syncobj_legacy *obj, struct dma_fence *fence) { struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; @@ -3122,7 +3123,7 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, u32 cmd_sizebytes; struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); struct hfi_submit_syncobj *cmd; - struct hfi_syncobj *obj = NULL; + struct hfi_syncobj_legacy *obj = NULL; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 seqnum; @@ -3137,7 +3138,7 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, memset(cmdbuf, 0x0, cmd_sizebytes); cmd = cmdbuf; cmd->num_syncobj = syncobj->num_hw_fence; - obj = (struct hfi_syncobj *)&cmd[1]; + obj = (struct hfi_syncobj_legacy *)&cmd[1]; for (i = 0; i < syncobj->numsyncs; i++) { struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i]; @@ -3174,7 +3175,7 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, if (is_kgsl_fence(fences[j])) { populate_kgsl_fence(obj, fences[j]); } else { - int ret = kgsl_hw_fence_add_waiter(device, fences[j]); + int ret = kgsl_hw_fence_add_waiter(device, fences[j], NULL); if (ret) { syncobj->flags &= ~KGSL_SYNCOBJ_HW; diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index d82bd88670..0154579328 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -468,6 +468,31 @@ static int gmu_clock_set_rate(struct adreno_device *adreno_dev) return ret; } +static void _get_hw_fence_entries(struct adreno_device *adreno_dev) +{ + struct device_node *node = NULL; + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + u32 shadow_num_entries = 0; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_HW_FENCE)) + return; + + node = of_find_node_by_name(NULL, "qcom,hw-fence"); + if (!node) + return; + + if (of_property_read_u32(node, "qcom,hw-fence-table-entries", + &shadow_num_entries)) { + dev_err(&gmu->pdev->dev, "qcom,hw-fence-table-entries property not found\n"); + shadow_num_entries = 8192; + } + + of_node_put(node); + + gmu_core_set_vrb_register(gmu->vrb->hostptr, VRB_HW_FENCE_SHADOW_NUM_ENTRIES, + shadow_num_entries); +} + static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -502,6 +527,8 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto clks_gdsc_off; + _get_hw_fence_entries(adreno_dev); + gen8_gmu_register_config(adreno_dev); ret = gen8_gmu_version_info(adreno_dev); @@ -518,7 +545,9 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); - adreno_hwsched_register_hw_fence(adreno_dev); + /* This is the minimum GMU FW HFI version required to enable hw fences */ + if (GMU_VER_MINOR(gmu->ver.hfi) >= 7) + adreno_hwsched_register_hw_fence(adreno_dev); ret = gen8_gmu_device_start(adreno_dev); if (ret) diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 9717e3ca7b..47c1495b98 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -3092,7 +3092,8 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, if (is_kgsl_fence(fences[j])) { populate_kgsl_fence(obj, fences[j]); } else { - int ret = kgsl_hw_fence_add_waiter(device, fences[j]); + int ret = kgsl_hw_fence_add_waiter(device, fences[j], + &obj->hash_index); if (ret) { syncobj->flags &= ~KGSL_SYNCOBJ_HW; @@ -3110,6 +3111,7 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, obj->seq_no, obj->flags, fences[j]->ops->get_timeline_name ? fences[j]->ops->get_timeline_name(fences[j]) : "unknown"); + obj->header = FIELD_PREP(GENMASK(15, 0), sizeof(*obj) >> 2); obj++; } } diff --git a/adreno_hfi.h b/adreno_hfi.h index e12eea2444..75215e97f5 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_HFI_H #define __ADRENO_HFI_H @@ -244,6 +244,8 @@ enum hfi_mem_kind { * between LPAC and GC */ HFI_MEMKIND_AQE_BUFFER, + /** @HFI_MEMKIND_HW_FENCE_SHADOW: Shadow memory used for caching external input fences */ + HFI_MEMKIND_HW_FENCE_SHADOW, HFI_MEMKIND_MAX, }; @@ -274,6 +276,7 @@ static const char * const hfi_memkind_strings[] = { [HFI_MEMKIND_HW_FENCE] = "GMU HW FENCE", [HFI_MEMKIND_PREEMPT_SCRATCH] = "GMU PREEMPTION", [HFI_MEMKIND_AQE_BUFFER] = "GMU AQE BUFFER", + [HFI_MEMKIND_HW_FENCE_SHADOW] = "GMU HW FENCE SHADOW", [HFI_MEMKIND_MAX] = "GMU UNKNOWN", }; @@ -955,9 +958,29 @@ struct hfi_submit_cmd { u32 big_ib_gmu_va; } __packed; -struct hfi_syncobj { +/* This structure is only used for hw fence feature on gen7 hwsched targets */ +struct hfi_syncobj_legacy { + /** @ctxt_id: dma fence context id for external fence and gmu context id for kgsl fence */ u64 ctxt_id; + /** @seq_no: Sequence number (or timestamp) of this fence */ u64 seq_no; + /** @flags: Flags for this fence */ + u64 flags; +} __packed; + +struct hfi_syncobj { + /** + * @header: bits[0:15]: size of this packet in dwords, bits[15:23]: version, + * bits[24:31] unused + */ + u32 header; + /** @hash_index: hash index of external input fence */ + u32 hash_index; + /** @ctxt_id: dma fence context id for external fence and gmu context id for kgsl fence */ + u64 ctxt_id; + /** @seq_no: Sequence number (or timestamp) of this fence */ + u64 seq_no; + /** @flags: Flags for this fence */ u64 flags; } __packed; diff --git a/adreno_trace.h b/adreno_trace.h index 62ee64f626..b2d55224a5 100644 --- a/adreno_trace.h +++ b/adreno_trace.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2013-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #if !defined(_ADRENO_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) @@ -951,6 +951,26 @@ TRACE_EVENT(adreno_preempt_done, ) ); +TRACE_EVENT(adreno_ext_hw_fence_signal, + TP_PROTO(u64 context, u64 seq_no, u32 flags, u64 gmu_ticks), + TP_ARGS(context, seq_no, flags, gmu_ticks), + TP_STRUCT__entry( + __field(u64, context) + __field(u64, seq_no) + __field(u32, flags) + __field(u64, ticks) + ), + TP_fast_assign( + __entry->context = context; + __entry->seq_no = seq_no; + __entry->flags = flags; + __entry->ticks = gmu_ticks; + ), + TP_printk("ctx=%llu seqno=%llu flags=0x%x ticks=%llu", + __entry->context, __entry->seq_no, __entry->flags, __entry->ticks + ) +); + TRACE_EVENT(adreno_ifpc_count, TP_PROTO(unsigned int ifpc_count), TP_ARGS(ifpc_count), diff --git a/kgsl_gmu_core.c b/kgsl_gmu_core.c index dfa635f50e..02b1b341f3 100644 --- a/kgsl_gmu_core.c +++ b/kgsl_gmu_core.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -248,6 +248,14 @@ static void stream_trace_data(struct gmu_trace_packet *pkt) data->ctx_switch_cntl, pkt->ticks); break; } + case GMU_TRACE_EXTERNAL_HW_FENCE_SIGNAL: { + struct trace_ext_hw_fence_signal *data = + (struct trace_ext_hw_fence_signal *)pkt->payload; + + trace_adreno_ext_hw_fence_signal(data->context, data->seq_no, + data->flags, pkt->ticks); + break; + } default: { char str[64]; diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index cb30bc73d5..c21c81de9b 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -180,6 +180,8 @@ enum gmu_vrb_idx { VRB_WARMBOOT_SCRATCH_IDX = 1, /* Contains the address of GMU trace buffer */ VRB_TRACE_BUFFER_ADDR_IDX = 2, + /* Contains the number of hw fence shadow table entries */ + VRB_HW_FENCE_SHADOW_NUM_ENTRIES = 3, }; /* For GMU Trace */ @@ -271,6 +273,7 @@ struct gmu_trace_header { enum gmu_trace_id { GMU_TRACE_PREEMPT_TRIGGER = 1, GMU_TRACE_PREEMPT_DONE = 2, + GMU_TRACE_EXTERNAL_HW_FENCE_SIGNAL = 3, GMU_TRACE_MAX, }; @@ -286,6 +289,12 @@ struct trace_preempt_done { u32 ctx_switch_cntl; } __packed; +struct trace_ext_hw_fence_signal { + u64 context; + u64 seq_no; + u32 flags; +} __packed; + /** * struct kgsl_gmu_trace - wrapper for gmu trace memory object */ diff --git a/kgsl_sync.c b/kgsl_sync.c index 6810860618..518089b389 100644 --- a/kgsl_sync.c +++ b/kgsl_sync.c @@ -77,7 +77,7 @@ int kgsl_hw_fence_create(struct kgsl_device *device, struct kgsl_sync_fence *kfe return -EINVAL; } -int kgsl_hw_fence_add_waiter(struct kgsl_device *device, struct dma_fence *fence) +int kgsl_hw_fence_add_waiter(struct kgsl_device *device, struct dma_fence *fence, u32 *hash_index) { struct synx_import_params params; u32 handle = 0; @@ -98,10 +98,14 @@ int kgsl_hw_fence_add_waiter(struct kgsl_device *device, struct dma_fence *fence /* release reference held by synx_import */ ret = synx_release(kgsl_synx.handle, handle); - if (ret) + if (ret) { dev_err_ratelimited(device->dev, "Failed to release wait fences ret:%d fence ctx:%llu ts:%llu\n", ret, fence->context, fence->seqno); + } else { + if (hash_index) + *hash_index = handle; + } return ret; } @@ -207,14 +211,20 @@ int kgsl_hw_fence_create(struct kgsl_device *device, struct kgsl_sync_fence *kfe return 0; } -int kgsl_hw_fence_add_waiter(struct kgsl_device *device, struct dma_fence *fence) +int kgsl_hw_fence_add_waiter(struct kgsl_device *device, struct dma_fence *fence, u32 *hash_index) { - int ret = msm_hw_fence_wait_update(kgsl_msm_hw_fence.handle, &fence, 1, true); + u64 handle = 0; + int ret = msm_hw_fence_wait_update_v2(kgsl_msm_hw_fence.handle, &fence, &handle, NULL, 1, + true); if (ret) dev_err_ratelimited(device->dev, "Failed to add GMU as waiter ret:%d fence ctx:%llu ts:%llu\n", ret, fence->context, fence->seqno); + else { + if (hash_index) + *hash_index = (u32)handle; + } return ret; } diff --git a/kgsl_sync.h b/kgsl_sync.h index 33cf060d8d..7be684006f 100644 --- a/kgsl_sync.h +++ b/kgsl_sync.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2012-2014,2018-2019, 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_SYNC_H #define __KGSL_SYNC_H @@ -121,7 +121,7 @@ void kgsl_hw_fence_populate_md(struct kgsl_device *device, struct kgsl_memdesc * int kgsl_hw_fence_create(struct kgsl_device *device, struct kgsl_sync_fence *kfence); -int kgsl_hw_fence_add_waiter(struct kgsl_device *device, struct dma_fence *fence); +int kgsl_hw_fence_add_waiter(struct kgsl_device *device, struct dma_fence *fence, u32 *hash_index); bool kgsl_hw_fence_tx_slot_available(struct kgsl_device *device, const atomic_t *hw_fence_count); @@ -241,7 +241,7 @@ int kgsl_hw_fence_create(struct kgsl_device *device, struct kgsl_sync_fence *kfe return -EINVAL; } -int kgsl_hw_fence_add_waiter(struct kgsl_device *device, struct dma_fence *fence) +int kgsl_hw_fence_add_waiter(struct kgsl_device *device, struct dma_fence *fence, u32 *hash_index) { return -EINVAL; } From a21a1489b2b6fc62486289071dd5a9b5ea3efa95 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Wed, 27 Dec 2023 12:30:42 -0800 Subject: [PATCH 0643/1016] kgsl: reclaim: Use folio_batch and related API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Latest kernel v6.2 onwards pagevec support is removed. Hence add support for folio_batch and it’s API to replace pagevec usage. Change-Id: I04e8d833b47a9c3c5fc14ec8f8b4697c4bec438a Signed-off-by: Hareesh Gundu --- kgsl_reclaim.c | 145 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 103 insertions(+), 42 deletions(-) diff --git a/kgsl_reclaim.c b/kgsl_reclaim.c index 91713b7604..caf4c9140b 100644 --- a/kgsl_reclaim.c +++ b/kgsl_reclaim.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -31,21 +31,115 @@ struct work_struct reclaim_work; static atomic_t kgsl_nr_to_reclaim; +#if (KERNEL_VERSION(6, 2, 0) <= LINUX_VERSION_CODE) +static void kgsl_memdesc_clear_unevictable(struct kgsl_process_private *process, + struct kgsl_memdesc *memdesc) +{ + struct folio_batch fbatch; + int i; + + /* + * Pages that are first allocated are by default added to + * unevictable list. To reclaim them, we first clear the + * AS_UNEVICTABLE flag of the shmem file address space thus + * check_move_unevictable_folios() places them on the + * evictable list. + * + * Once reclaim is done, hint that further shmem allocations + * will have to be on the unevictable list. + */ + mapping_clear_unevictable(memdesc->shmem_filp->f_mapping); + folio_batch_init(&fbatch); + for (i = 0; i < memdesc->page_count; i++) { + set_page_dirty_lock(memdesc->pages[i]); + spin_lock(&memdesc->lock); + folio_batch_add(&fbatch, page_folio(memdesc->pages[i])); + memdesc->pages[i] = NULL; + atomic_inc(&process->unpinned_page_count); + spin_unlock(&memdesc->lock); + if (folio_batch_count(&fbatch) == PAGEVEC_SIZE) { + check_move_unevictable_folios(&fbatch); + __folio_batch_release(&fbatch); + } + } + + if (folio_batch_count(&fbatch)) { + check_move_unevictable_folios(&fbatch); + __folio_batch_release(&fbatch); + } +} + +static int kgsl_read_mapping(struct kgsl_memdesc *memdesc, struct page **page, int i) +{ + struct folio *folio = shmem_read_folio_gfp(memdesc->shmem_filp->f_mapping, + i, kgsl_gfp_mask(0)); + + if (!IS_ERR(folio)) { + *page = folio_page(folio, 0); + return 0; + } + + return PTR_ERR(folio); +} +#else +static void kgsl_memdesc_clear_unevictable(struct kgsl_process_private *process, + struct kgsl_memdesc *memdesc) +{ + struct pagevec pvec; + int i; + + /* + * Pages that are first allocated are by default added to + * unevictable list. To reclaim them, we first clear the + * AS_UNEVICTABLE flag of the shmem file address space thus + * check_move_unevictable_pages() places them on the + * evictable list. + * + * Once reclaim is done, hint that further shmem allocations + * will have to be on the unevictable list. + */ + mapping_clear_unevictable(memdesc->shmem_filp->f_mapping); + pagevec_init(&pvec); + for (i = 0; i < memdesc->page_count; i++) { + set_page_dirty_lock(memdesc->pages[i]); + spin_lock(&memdesc->lock); + pagevec_add(&pvec, memdesc->pages[i]); + memdesc->pages[i] = NULL; + atomic_inc(&process->unpinned_page_count); + spin_unlock(&memdesc->lock); + if (pagevec_count(&pvec) == PAGEVEC_SIZE) { + check_move_unevictable_pages(&pvec); + __pagevec_release(&pvec); + } + } + + if (pagevec_count(&pvec)) { + check_move_unevictable_pages(&pvec); + __pagevec_release(&pvec); + } +} + +static int kgsl_read_mapping(struct kgsl_memdesc *memdesc, struct page **page, int i) +{ + *page = shmem_read_mapping_page_gfp(memdesc->shmem_filp->f_mapping, + i, kgsl_gfp_mask(0)); + return PTR_ERR_OR_ZERO(*page); +} +#endif + static int kgsl_memdesc_get_reclaimed_pages(struct kgsl_mem_entry *entry) { struct kgsl_memdesc *memdesc = &entry->memdesc; int i, ret; - struct page *page; + struct page *page = NULL; for (i = 0; i < memdesc->page_count; i++) { if (memdesc->pages[i]) continue; - page = shmem_read_mapping_page_gfp( - memdesc->shmem_filp->f_mapping, i, kgsl_gfp_mask(0)); - - if (IS_ERR(page)) - return PTR_ERR(page); + ret = kgsl_read_mapping(memdesc, &page, i); + if (ret) + return ret; kgsl_page_sync(memdesc->dev, page, PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -211,12 +305,6 @@ ssize_t kgsl_nr_to_scan_show(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%d\n", kgsl_nr_to_scan); } -static void kgsl_release_page_vec(struct pagevec *pvec) -{ - check_move_unevictable_pages(pvec); - __pagevec_release(pvec); -} - static u32 kgsl_reclaim_process(struct kgsl_process_private *process, u32 pages_to_reclaim) { @@ -281,35 +369,8 @@ static u32 kgsl_reclaim_process(struct kgsl_process_private *process, } if (!kgsl_mmu_unmap(memdesc->pagetable, memdesc)) { - int i; - struct pagevec pvec; - - /* - * Pages that are first allocated are by default added to - * unevictable list. To reclaim them, we first clear the - * AS_UNEVICTABLE flag of the shmem file address space thus - * check_move_unevictable_pages() places them on the - * evictable list. - * - * Once reclaim is done, hint that further shmem allocations - * will have to be on the unevictable list. - */ - mapping_clear_unevictable(memdesc->shmem_filp->f_mapping); - pagevec_init(&pvec); - for (i = 0; i < memdesc->page_count; i++) { - set_page_dirty_lock(memdesc->pages[i]); - spin_lock(&memdesc->lock); - pagevec_add(&pvec, memdesc->pages[i]); - memdesc->pages[i] = NULL; - atomic_inc(&process->unpinned_page_count); - spin_unlock(&memdesc->lock); - if (pagevec_count(&pvec) == PAGEVEC_SIZE) - kgsl_release_page_vec(&pvec); - remaining--; - } - if (pagevec_count(&pvec)) - kgsl_release_page_vec(&pvec); - + kgsl_memdesc_clear_unevictable(process, memdesc); + remaining -= memdesc->page_count; reclaim_shmem_address_space(memdesc->shmem_filp->f_mapping); mapping_set_unevictable(memdesc->shmem_filp->f_mapping); memdesc->priv |= KGSL_MEMDESC_RECLAIMED; From 9f7e0c0e01b5c6f6316a0523ac0752127f28ba43 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Wed, 3 Jan 2024 20:13:25 -0800 Subject: [PATCH 0644/1016] kgsl: build: Enable kgsl process reclaim for sun Enable kgsl process reclaim to make driver pages available for reclaiming. This would swap kgsl allocated pages to zram so they can be reclaimed by shrinker. Change-Id: I257f6c06ebe04fd353bfdb3c51d81f719f6be207 Signed-off-by: Hareesh Gundu --- config/sun_consolidate_gpuconf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/config/sun_consolidate_gpuconf b/config/sun_consolidate_gpuconf index 097b6806d5..79dc8d883a 100644 --- a/config/sun_consolidate_gpuconf +++ b/config/sun_consolidate_gpuconf @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. +# Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. CONFIG_QCOM_KGSL=m CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 @@ -8,3 +8,5 @@ CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" CONFIG_QCOM_KGSL_SYNX=y +CONFIG_QCOM_KGSL_USE_SHMEM=y +CONFIG_QCOM_KGSL_PROCESS_RECLAIM=y From 996e3620c345795547ccb51f1b3e92f6f7db3347 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Tue, 9 Jan 2024 15:05:59 -0700 Subject: [PATCH 0645/1016] kgsl: gen8: Vote for soccp for hardware fences Every client needs to vote for soccp power for hardware fence feature. In case the API returns an error, disable hardware fence feature and log any hardware fences that were created. This is needed for debug because these fences may never be signaled. Change-Id: I2800ca9ec2e6caf7b797066e024124c689a4b472 Signed-off-by: Harshdeep Dhatt --- adreno_gen8_hwsched.c | 56 ++++++++++++++++++++++++++++++++++++++ adreno_gen8_hwsched_hfi.h | 4 ++- adreno_hwsched.c | 36 +++++++++++++++++++++++++ adreno_hwsched.h | 10 ++++++- kgsl_gmu_core.c | 57 +++++++++++++++++++++++++++++++++++++++ kgsl_gmu_core.h | 27 ++++++++++++++++++- 6 files changed, 187 insertions(+), 3 deletions(-) diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 0154579328..5be66385e4 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -493,6 +493,52 @@ static void _get_hw_fence_entries(struct adreno_device *adreno_dev) shadow_num_entries); } +static void gen8_hwsched_soccp_vote_init(struct adreno_device *adreno_dev) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); + + if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags)) + return; + + if (hw_hfi->hw_fence.soccp_rproc) + return; + + hw_hfi->hw_fence.soccp_rproc = gmu_core_soccp_vote_init(&gmu->pdev->dev); + if (!IS_ERR(hw_hfi->hw_fence.soccp_rproc)) + return; + + /* Disable hw fences */ + clear_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags); +} + +static void gen8_hwsched_soccp_vote(struct adreno_device *adreno_dev, bool pwr_on) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); + + if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags)) + return; + + if (!gmu_core_soccp_vote(&gmu->pdev->dev, &gmu->flags, hw_hfi->hw_fence.soccp_rproc, + pwr_on)) + return; + + /* Make sure no more hardware fences are created */ + spin_lock(&hw_hfi->hw_fence.lock); + set_bit(GEN8_HWSCHED_HW_FENCE_ABORT_BIT, &hw_hfi->hw_fence.flags); + spin_unlock(&hw_hfi->hw_fence.lock); + + clear_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags); + + /* + * It is possible that some hardware fences were created while we were in slumber. Since + * soccp power vote failed, these hardware fences may never be signaled. Hence, log them + * for debug purposes. + */ + adreno_hwsched_log_pending_hw_fences(adreno_dev, &gmu->pdev->dev); +} + static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -549,6 +595,10 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) if (GMU_VER_MINOR(gmu->ver.hfi) >= 7) adreno_hwsched_register_hw_fence(adreno_dev); + gen8_hwsched_soccp_vote_init(adreno_dev); + + gen8_hwsched_soccp_vote(adreno_dev, true); + ret = gen8_gmu_device_start(adreno_dev); if (ret) goto err; @@ -580,6 +630,7 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) err: gen8_gmu_irq_disable(adreno_dev); + gen8_hwsched_soccp_vote(adreno_dev, false); if (device->gmu_fault) { gen8_gmu_suspend(adreno_dev); @@ -626,6 +677,8 @@ static int gen8_hwsched_gmu_boot(struct adreno_device *adreno_dev) gen8_gmu_irq_enable(adreno_dev); + gen8_hwsched_soccp_vote(adreno_dev, true); + ret = gen8_gmu_device_start(adreno_dev); if (ret) goto err; @@ -647,6 +700,7 @@ static int gen8_hwsched_gmu_boot(struct adreno_device *adreno_dev) return 0; err: gen8_gmu_irq_disable(adreno_dev); + gen8_hwsched_soccp_vote(adreno_dev, false); if (device->gmu_fault) { gen8_gmu_suspend(adreno_dev); @@ -1232,6 +1286,8 @@ no_gx_power: kgsl_pwrctrl_clear_l3_vote(device); + gen8_hwsched_soccp_vote(adreno_dev, false); + kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); return ret; diff --git a/adreno_gen8_hwsched_hfi.h b/adreno_gen8_hwsched_hfi.h index 00cbb44fc9..5b08a315c8 100644 --- a/adreno_gen8_hwsched_hfi.h +++ b/adreno_gen8_hwsched_hfi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ADRENO_GEN8_HWSCHED_HFI_H_ @@ -85,6 +85,8 @@ struct gen8_hwsched_hfi { unsigned long flags; /** @seqnum: Sequence number for hardware fence packet header */ atomic_t seqnum; + /** @soccp_rproc: rproc handle for soccp */ + struct rproc *soccp_rproc; } hw_fence; /** * @hw_fence_timer: Timer to trigger fault if unack'd hardware fence count does'nt drop diff --git a/adreno_hwsched.c b/adreno_hwsched.c index bcb4157893..0f4d3794eb 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -2324,3 +2324,39 @@ u32 adreno_hwsched_parse_payload(struct payload_section *payload, u32 key) return 0; } + +void adreno_hwsched_log_pending_hw_fences(struct adreno_device *adreno_dev, struct device *dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hw_fence_entry entries[5]; + struct adreno_hw_fence_entry *entry, *next; + struct kgsl_context *context; + int id, i; + u32 count = 0; + + read_lock(&device->context_lock); + idr_for_each_entry(&device->context_idr, context, id) { + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + + spin_lock(&drawctxt->lock); + + list_for_each_entry_safe(entry, next, &drawctxt->hw_fence_list, node) { + if (count < ARRAY_SIZE(entries)) + memcpy(&entries[count], entry, sizeof(*entry)); + count++; + } + + spin_unlock(&drawctxt->lock); + } + read_unlock(&device->context_lock); + + if (!count) + return; + + dev_err(dev, "%d hw fences may not be signaled. %s are:\n", count, + count > 5 ? "First 5" : "They"); + + for (i = 0; (i < count) && (i < ARRAY_SIZE(entries)); i++) + dev_err(dev, "%d: ctx=%llu seqno=%llu\n", i, entries[i].cmd.ctxt_id, + entries[i].cmd.ts); +} diff --git a/adreno_hwsched.h b/adreno_hwsched.h index ff84ee8f24..72acdd82b0 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ADRENO_HWSCHED_H_ @@ -235,4 +235,12 @@ u32 adreno_hwsched_parse_payload(struct payload_section *payload, u32 key); * Returns zero for hwsched fault else non zero value */ u32 adreno_hwsched_gpu_fault(struct adreno_device *adreno_dev); + +/** + * adreno_hwsched_log_pending_fences - Log any pending hardware fences if soccp vote failed + * @adreno_dev: pointer to the adreno device + * @dev: Pointer to the gmu pdev device + */ +void adreno_hwsched_log_pending_hw_fences(struct adreno_device *adreno_dev, struct device *dev); + #endif diff --git a/kgsl_gmu_core.c b/kgsl_gmu_core.c index 02b1b341f3..7d25684820 100644 --- a/kgsl_gmu_core.c +++ b/kgsl_gmu_core.c @@ -369,3 +369,60 @@ void gmu_core_reset_trace_header(struct kgsl_gmu_trace *trace) gmu_core_trace_header_init(trace); trace->reset_hdr = false; } + +#if (KERNEL_VERSION(6, 2, 0) <= LINUX_VERSION_CODE) +struct rproc *gmu_core_soccp_vote_init(struct device *dev) +{ + u32 soccp_handle; + struct rproc *soccp_rproc; + + if (of_property_read_u32(dev->of_node, "qcom,soccp-controller", &soccp_handle)) + return NULL; + + soccp_rproc = rproc_get_by_phandle(soccp_handle); + if (!IS_ERR_OR_NULL(soccp_rproc)) + return soccp_rproc; + + dev_err(dev, "Failed to get rproc for phandle:%u ret:%ld Disabling hw fences\n", + soccp_handle, soccp_rproc ? PTR_ERR(soccp_rproc) : -ENOENT); + + return soccp_rproc ? soccp_rproc : ERR_PTR(-ENOENT); +} + +int gmu_core_soccp_vote(struct device *dev, unsigned long *gmu_flags, struct rproc *soccp_rproc, + bool pwr_on) +{ + int ret; + + if (!soccp_rproc) + return 0; + + if (!(test_bit(GMU_PRIV_SOCCP_VOTE_ON, gmu_flags) ^ pwr_on)) + return 0; + + ret = rproc_set_state(soccp_rproc, pwr_on); + if (!ret) { + change_bit(GMU_PRIV_SOCCP_VOTE_ON, gmu_flags); + return 0; + } + + dev_err(dev, "soccp power %s failed: %d. Disabling hw fences\n", + pwr_on ? "on" : "off", ret); + + return ret; +} + +#else + +struct rproc *gmu_core_soccp_vote_init(struct device *dev) +{ + return ERR_PTR(-ENOENT); +} + +int gmu_core_soccp_vote(struct device *dev, unsigned long *gmu_flags, struct rproc *soccp_rproc, + bool pwr_on) +{ + return -EINVAL; +} + +#endif diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index c21c81de9b..fcfd4945a6 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -6,8 +6,11 @@ #ifndef __KGSL_GMU_CORE_H #define __KGSL_GMU_CORE_H -#include #include +#include +#if (KERNEL_VERSION(6, 2, 0) <= LINUX_VERSION_CODE) +#include +#endif /* GMU_DEVICE - Given an KGSL device return the GMU specific struct */ #define GMU_DEVICE_OPS(_a) ((_a)->gmu_core.dev_ops) @@ -373,6 +376,8 @@ enum { GMU_PRIV_WARMBOOT_GMU_INIT_DONE, /* Indicates if GPU BOOT HFI messages are recorded successfully */ GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, + /* Indicates if soccp was voted on for hardware fences */ + GMU_PRIV_SOCCP_VOTE_ON, }; struct device_node; @@ -532,4 +537,24 @@ void gmu_core_trace_header_init(struct kgsl_gmu_trace *trace); */ void gmu_core_reset_trace_header(struct kgsl_gmu_trace *trace); +/** + * gmu_core_soccp_vote_init - Initialize soccp rproc handle + * @dev: Pointer to gmu pdev device + * + * Return: Error pointer on failure and NULL or valid rproc handle on success + */ +struct rproc *gmu_core_soccp_vote_init(struct device *dev); + +/** + * gmu_core_soccp_vote - vote for soccp power + * @dev: Pointer to gmu pdev device + * @flags: Pointer to gmu flags + * @soccp_rproc: Pointer to soccp rproc + * @pwr_on: Boolean to indicate vote on or off + + * Return: Negative error on failure and zero on success. + */ +int gmu_core_soccp_vote(struct device *dev, unsigned long *flags, + struct rproc *soccp_rproc, bool pwr_on); + #endif /* __KGSL_GMU_CORE_H */ From 36d146f15323e66ba09961cb2eeb8372f34da042 Mon Sep 17 00:00:00 2001 From: Archana Sriram Date: Tue, 19 Dec 2023 13:06:44 +0530 Subject: [PATCH 0646/1016] kgsl: Increase snapshot size for A615 GPU Increase snapshot size for A615 GPU in adreno-gpulist header. Change-Id: I494054666969dd96831eb86a8fce5a40c097e6c7 Signed-off-by: Archana Sriram --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 64289bce75..542c1816b1 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -834,7 +834,7 @@ static const struct adreno_a6xx_core adreno_gpu_core_a615 = { .uche_gmem_alignment = SZ_1M, .gmem_size = SZ_512K, .bus_width = 32, - .snapshot_size = 600 * SZ_1K, + .snapshot_size = SZ_1M, }, .prim_fifo_threshold = 0x0018000, .gmu_major = 1, From 6109f8b3ccd9e9229b2f2ee01223845aa5ea41bf Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 31 Jan 2024 22:49:00 +0530 Subject: [PATCH 0647/1016] kgsl: gen8: Update the reset value of VFD_CB_LP_REQ_CNT for gen8_4_0 VFD_CB_LP_REQ_CNT holds a buffer reservation value which was insufficient to allow out of order execution (between BR and BV) of one full wave. Hence, increase this buffer size to 16 to ensure switch between BR and BV is allowed only when VFD has enough space to execute at least 1 full wave. Change-Id: I9d449018b75e75ffe4e9f8f84e5ca5223581b281 Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 12e3b03011..9afb2cbed5 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2782,7 +2782,7 @@ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { { GEN8_VFD_CB_BV_THRESHOLD, 0x00500050, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_VFD_CB_BR_THRESHOLD, 0x00600060, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_VFD_CB_BUSY_REQ_CNT, 0x00200020, BIT(PIPE_BR) | BIT(PIPE_BV) }, - { GEN8_VFD_CB_LP_REQ_CNT, 0x00000020, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VFD_CB_LP_REQ_CNT, 0x00100020, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_VPC_FLATSHADE_MODE_CNTL, 0x00000001, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_VSC_BIN_SIZE, 0x00010001, BIT(PIPE_NONE) }, /* Disable redundant tile data optimization */ From bd9046571ebf74d2c717cf8a67d2f3eefc483f18 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Thu, 1 Feb 2024 18:34:20 -0800 Subject: [PATCH 0648/1016] kgsl: gen8: Don't dump secvid registers through crash dumper Secvid register are not accessible to crash dumper. So dump all secvid registers in legacy AHB path. Change-Id: Ie174255f8308eaefcc70ddc6081408a1965e5b21 Signed-off-by: Hareesh Gundu --- adreno_gen8_0_0_snapshot.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index b9e9caba6f..bee127a670 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -193,8 +193,7 @@ static const u32 gen8_0_0_gpu_registers[] = { 0x0ed4a, 0x0ed4a, 0x0ed52, 0x0ed52, 0x0ed66, 0x0ed68, 0x0ed6b, 0x0ed6d, 0x0ed6f, 0x0ed6f, 0x0ed80, 0x0ed81, 0x0ed85, 0x0ed85, 0x0ed87, 0x0ed87, 0x0ed8a, 0x0ed8a, 0x0ed92, 0x0ed92, 0x0eda6, 0x0eda8, 0x0edab, 0x0edad, - 0x0edaf, 0x0edaf, 0x0f400, 0x0f400, 0x0f800, 0x0f803, 0x0fc00, 0x0fc01, - UINT_MAX, UINT_MAX, + 0x0edaf, 0x0edaf, UINT_MAX, UINT_MAX, }; static_assert(IS_ALIGNED(sizeof(gen8_0_0_gpu_registers), 8)); From a7042280d88e68b74cef8ba5b0b035fae71743e4 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 3 Feb 2024 14:50:22 +0530 Subject: [PATCH 0649/1016] kgsl: gen8: Update static powerup register list SP_READ_SEL register is not restored across preemption boundary. RBBM_SLICE_INTERFACE_HANG_INT_CNTL is not retained across IFPC. Thus, update static power up register list so that the registers are restored across IFPC/preemption boundary. Also, remove duplicate entries of UCHE_CCHE_LPAC_GMEM_RANGE_MIN_* register. Change-Id: I29e56eda9d415d775793b8b558e4b595dff7deaf Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 470e984522..19475b5636 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -39,22 +39,21 @@ static const u32 gen8_pwrup_reglist[] = { GEN8_UCHE_TRAP_BASE_LO, GEN8_UCHE_TRAP_BASE_HI, GEN8_UCHE_CLIENT_PF, - GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_LO, - GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_HI, GEN8_VSC_BIN_SIZE, GEN8_VSC_KMD_DBG_ECO_CNTL, GEN8_RB_CMP_NC_MODE_CNTL, GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_LO, GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_HI, + GEN8_SP_READ_SEL, }; /* IFPC only static powerup restore list */ static const u32 gen8_ifpc_pwrup_reglist[] = { GEN8_RBBM_NC_MODE_CNTL, + GEN8_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, GEN8_RBBM_SLICE_NC_MODE_CNTL, GEN8_SP_NC_MODE_CNTL, - GEN8_SP_READ_SEL, GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_LO, GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_HI, GEN8_SP_CHICKEN_BITS_1, From 52a804f6b461f5b0503c1177070ff7f2db73cd7a Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Tue, 23 Jan 2024 11:49:10 -0800 Subject: [PATCH 0650/1016] kgsl: hwsched: Enable TSBWRITEERROR interrupt Enable TSBWRITEERROR interrupt to handle trusted steering block errors and detect security violation reports. Change-Id: Id4f88b24abde9d690f89919865fd8d1532d0b6e6 Signed-off-by: Hareesh Gundu --- adreno_gen7.h | 5 +++-- adreno_gen8.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/adreno_gen7.h b/adreno_gen7.h index 116977ac45..a0061df324 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ADRENO_GEN7_H_ @@ -215,7 +215,8 @@ struct gen7_cp_smmu_info { (1 << GEN7_INT_ATBASYNCFIFOOVERFLOW) | \ (1 << GEN7_INT_ATBBUSOVERFLOW) | \ (1 << GEN7_INT_OUTOFBOUNDACCESS) | \ - (1 << GEN7_INT_UCHETRAPINTERRUPT)) + (1 << GEN7_INT_UCHETRAPINTERRUPT) | \ + (1 << GEN7_INT_TSBWRITEERROR)) /** * to_gen7_core - return the gen7 specific GPU core struct diff --git a/adreno_gen8.h b/adreno_gen8.h index 08c4e41859..a5e50ce374 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -257,7 +257,8 @@ struct gen8_cp_smmu_info { (1 << GEN8_INT_ATBASYNCFIFOOVERFLOW) | \ (1 << GEN8_INT_ATBBUSOVERFLOW) | \ (1 << GEN8_INT_OUTOFBOUNDACCESS) | \ - (1 << GEN8_INT_UCHETRAPINTERRUPT)) + (1 << GEN8_INT_UCHETRAPINTERRUPT) | \ + (1 << GEN8_INT_TSBWRITEERROR)) /** * to_gen8_core - return the gen8 specific GPU core struct From bb9feffebe780d66ece53c7b5dfc9efc591cbca5 Mon Sep 17 00:00:00 2001 From: Rakesh Naidu Bhaviripudi Date: Wed, 24 Jan 2024 14:10:52 +0530 Subject: [PATCH 0651/1016] kgsl: Remove unused gmu always on counter definitions GMU_CX_GMU_ALWAYS_ON_COUNTER_* are not used anymore on a6xx and gen7 targets. Thus, remove these registers. Change-Id: I8a0bcb9e366dd176f8b86348bc4e589d0b8e1f27 Signed-off-by: Rakesh Naidu Bhaviripudi --- a6xx_reg.h | 4 +--- gen7_reg.h | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/a6xx_reg.h b/a6xx_reg.h index eb38411c84..60e3789fa6 100644 --- a/a6xx_reg.h +++ b/a6xx_reg.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _A6XX_REG_H @@ -1019,8 +1019,6 @@ #define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_H 0x1F875 #define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_L 0x1F876 #define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_H 0x1F877 -#define A6XX_GMU_CX_GMU_ALWAYS_ON_COUNTER_L 0x1F888 -#define A6XX_GMU_CX_GMU_ALWAYS_ON_COUNTER_H 0x1F889 #define A6XX_GMU_CX_GMU_PERF_COUNTER_ENABLE 0x1F8A0 #define A6XX_GMU_CX_GMU_PERF_COUNTER_SELECT_0 0x1F8A1 #define A6XX_GMU_CX_GMU_PERF_COUNTER_SELECT_1 0x1F8A2 diff --git a/gen7_reg.h b/gen7_reg.h index fc005f7166..f2c905dfcf 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _GEN7_REG_H @@ -1159,8 +1159,6 @@ #define GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_3 0x1f87f #define GEN7_GMU_CX_AO_COUNTER_LO 0x1f880 #define GEN7_GMU_CX_AO_COUNTER_HI 0x1f881 -#define GEN7_GMU_CX_GMU_ALWAYS_ON_COUNTER_L 0x1f888 -#define GEN7_GMU_CX_GMU_ALWAYS_ON_COUNTER_H 0x1f889 #define GEN7_GMU_PWR_COL_INTER_FRAME_CTRL 0x1f8c0 #define GEN7_GMU_PWR_COL_INTER_FRAME_HYST 0x1f8c1 #define GEN7_GMU_GFX_PWR_CLK_STATUS 0x1f8d0 From 27dd88ec676095f0684ac0e74bc0fcc6dc4cc9cd Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 28 Dec 2023 18:02:06 +0530 Subject: [PATCH 0652/1016] kgsl: Add warning for missing/invalid default GPU power level It is recommended to specify "qcom,initial-pwrlevel" in device tree. Also, the specified value should be a valid gpu power level. Thus, log a warning if default power level is not specified properly in device tree. Change-Id: I6e5515a9221566d1f946ef03444aab030b1946ad Signed-off-by: Kamal Agrawal --- adreno.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/adreno.c b/adreno.c index 6186f15f3e..0d3b6c4987 100644 --- a/adreno.c +++ b/adreno.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include @@ -707,10 +707,8 @@ static void adreno_of_get_initial_pwrlevels(struct kgsl_pwrctrl *pwr, int level; /* Get and set the initial power level */ - if (of_property_read_u32(node, "qcom,initial-pwrlevel", &level)) - level = 1; - - if (level < 0 || level >= pwr->num_pwrlevels) + if (WARN_ON(of_property_read_u32(node, "qcom,initial-pwrlevel", &level) || + level < 0 || level >= pwr->num_pwrlevels)) level = 1; pwr->active_pwrlevel = level; From e8c1784fd39266051536076d68b03aa5c3499500 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 5 Feb 2024 11:17:23 -0800 Subject: [PATCH 0653/1016] =?UTF-8?q?kgsl:=20hwsched:=20Don=E2=80=99t=20pu?= =?UTF-8?q?t=20keepalive=20vote=20in=20irq=5Fhandler?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GMU does GX vote on request for command submission which will be retained till command retired. For hwsched few HW errors handled at host side where GMU already has the GX vote. Introduce new interrupt handlers for hwsched specific targets which don't execute the keep alive vote. Change-Id: I0a319787afb1874a78994b869f42911d8574e2b8 Signed-off-by: Hareesh Gundu --- adreno_a6xx.c | 26 ++++++++++++++++++++++++-- adreno_gen7.c | 28 +++++++++++++++++++++++++--- adreno_gen8.c | 24 +++++++++++++++++++++++- 3 files changed, 72 insertions(+), 6 deletions(-) diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 15abcc059c..c45fac390a 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1853,6 +1853,28 @@ static int a6xx_irq_poll_fence(struct adreno_device *adreno_dev) return 0; } +static irqreturn_t a6xx_hwsched_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret = IRQ_NONE; + u32 status; + + if (a6xx_irq_poll_fence(adreno_dev)) { + adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + return ret; + } + + kgsl_regread(device, A6XX_RBBM_INT_0_STATUS, &status); + + kgsl_regwrite(device, A6XX_RBBM_INT_CLEAR_CMD, status); + + ret = adreno_irq_callbacks(adreno_dev, a6xx_irq_funcs, status); + + trace_kgsl_a5xx_irq_status(adreno_dev, status); + + return ret; +} + static irqreturn_t a6xx_irq_handler(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -2340,7 +2362,7 @@ const struct a6xx_gpudev adreno_a6xx_hwsched_gpudev = { .reg_offsets = a6xx_register_offsets, .probe = a6xx_hwsched_probe, .snapshot = a6xx_hwsched_snapshot, - .irq_handler = a6xx_irq_handler, + .irq_handler = a6xx_hwsched_irq_handler, .iommu_fault_block = a6xx_iommu_fault_block, .preemption_context_init = a6xx_preemption_context_init, .context_detach = a6xx_hwsched_context_detach, diff --git a/adreno_gen7.c b/adreno_gen7.c index ee32898ba4..2924599c6c 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1572,6 +1572,28 @@ static int gen7_irq_poll_fence(struct adreno_device *adreno_dev) return 0; } +static irqreturn_t gen7_hwsched_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret = IRQ_NONE; + u32 status; + + if (gen7_irq_poll_fence(adreno_dev)) { + adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + return ret; + } + + kgsl_regread(device, GEN7_RBBM_INT_0_STATUS, &status); + + kgsl_regwrite(device, GEN7_RBBM_INT_CLEAR_CMD, status); + + ret = adreno_irq_callbacks(adreno_dev, gen7_irq_funcs, status); + + trace_kgsl_gen7_irq_status(adreno_dev, status); + + return ret; +} + static irqreturn_t gen7_irq_handler(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -2166,7 +2188,7 @@ const struct gen7_gpudev adreno_gen7_9_0_hwsched_gpudev = { .reg_offsets = gen7_register_offsets, .probe = gen7_hwsched_probe, .snapshot = gen7_hwsched_snapshot, - .irq_handler = gen7_irq_handler, + .irq_handler = gen7_hwsched_irq_handler, .iommu_fault_block = gen7_iommu_fault_block, .preemption_context_init = gen7_preemption_context_init, .context_detach = gen7_hwsched_context_detach, @@ -2196,7 +2218,7 @@ const struct gen7_gpudev adreno_gen7_hwsched_gpudev = { .reg_offsets = gen7_register_offsets, .probe = gen7_hwsched_probe, .snapshot = gen7_hwsched_snapshot, - .irq_handler = gen7_irq_handler, + .irq_handler = gen7_hwsched_irq_handler, .iommu_fault_block = gen7_iommu_fault_block, .preemption_context_init = gen7_preemption_context_init, .context_detach = gen7_hwsched_context_detach, diff --git a/adreno_gen8.c b/adreno_gen8.c index 470e984522..0fae8a5089 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1768,6 +1768,28 @@ static int gen8_irq_poll_fence(struct adreno_device *adreno_dev) return 0; } +static irqreturn_t gen8_hwsched_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret = IRQ_NONE; + u32 status; + + if (gen8_irq_poll_fence(adreno_dev)) { + adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + return ret; + } + + kgsl_regread(device, GEN8_RBBM_INT_0_STATUS, &status); + + kgsl_regwrite(device, GEN8_RBBM_INT_CLEAR_CMD, status); + + ret = adreno_irq_callbacks(adreno_dev, gen8_irq_funcs, status); + + trace_kgsl_gen8_irq_status(adreno_dev, status); + + return ret; +} + static irqreturn_t gen8_irq_handler(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -2419,7 +2441,7 @@ const struct gen8_gpudev adreno_gen8_hwsched_gpudev = { .reg_offsets = gen8_register_offsets, .probe = gen8_hwsched_probe, .snapshot = gen8_hwsched_snapshot, - .irq_handler = gen8_irq_handler, + .irq_handler = gen8_hwsched_irq_handler, .iommu_fault_block = gen8_iommu_fault_block, .preemption_context_init = gen8_preemption_context_init, .context_detach = gen8_hwsched_context_detach, From 638887d971504ad2c019991d16eac2d8aa0a7830 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 9 Feb 2024 12:11:09 +0530 Subject: [PATCH 0654/1016] kgsl: Clear GPU wake on touch flag on slumber entry The wake on touch flag is only cleared when work is submitted to the GPU after wakeup from a touch event. If the GPU is woken up as a result of a write to the touch_wake sysfs node and no work is submitted, this flag remains set even though the GPU enters SLUMBER. This prevents the next touch event from triggering early GPU wakeup. So, clear the flag when GPU enters SLUMBER. Change-Id: I5cf7fe4950602f96d87cfd646d73dc43e7db1ee2 Signed-off-by: Sushmita Susheelendra Signed-off-by: Kamal Agrawal --- adreno.c | 8 ++++---- adreno.h | 2 -- kgsl_pwrctrl.c | 3 +++ kgsl_pwrctrl.h | 4 +++- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/adreno.c b/adreno.c index 6186f15f3e..57fdcdf5b3 100644 --- a/adreno.c +++ b/adreno.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include @@ -210,7 +210,7 @@ static void adreno_input_work(struct work_struct *work) mutex_lock(&device->mutex); - adreno_dev->wake_on_touch = true; + device->pwrctrl.wake_on_touch = true; ops->touch_wakeup(adreno_dev); @@ -227,7 +227,7 @@ void adreno_touch_wake(struct kgsl_device *device) * here before */ - if (adreno_dev->wake_on_touch) + if (device->pwrctrl.wake_on_touch) return; if (gmu_core_isenabled(device) || (device->state == KGSL_STATE_SLUMBER)) @@ -3106,7 +3106,7 @@ int adreno_verify_cmdobj(struct kgsl_device_private *dev_priv, * been submitted since the last time we set it. * But only clear it when we have rendering commands. */ - ADRENO_DEVICE(device)->wake_on_touch = false; + device->pwrctrl.wake_on_touch = false; } } diff --git a/adreno.h b/adreno.h index 166394a710..fe1f24cc6e 100644 --- a/adreno.h +++ b/adreno.h @@ -680,8 +680,6 @@ struct adreno_device { struct kgsl_memdesc *critpkts_secure; /** @irq_mask: The current interrupt mask for the GPU device */ u32 irq_mask; - /** @wake_on_touch: If true our last wakeup was due to a touch event */ - bool wake_on_touch; /* @dispatch_ops: A pointer to a set of adreno dispatch ops */ const struct adreno_dispatch_ops *dispatch_ops; /** @hwsched: Container for the hardware dispatcher */ diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 9f72977988..b0f48c092d 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -2271,6 +2271,9 @@ void kgsl_pwrctrl_set_state(struct kgsl_device *device, device->state = state; device->requested_state = KGSL_STATE_NONE; + if (state == KGSL_STATE_SLUMBER) + device->pwrctrl.wake_on_touch = false; + spin_lock(&device->submit_lock); if (state == KGSL_STATE_ACTIVE) device->skip_inline_submit = false; diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 383d85ee99..a077c7b9c4 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_PWRCTRL_H #define __KGSL_PWRCTRL_H @@ -175,6 +175,8 @@ struct kgsl_pwrctrl { u32 rt_bus_hint; /** @rt_bus_hint_active: Boolean flag to indicate if RT bus hint is active */ bool rt_bus_hint_active; + /** @wake_on_touch: If true our last wakeup was due to a touch event */ + bool wake_on_touch; }; int kgsl_pwrctrl_init(struct kgsl_device *device); From 5c1d5fe088574a4d10d85dccbde9c0dcc86eb7f7 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 13 Jan 2024 15:22:12 +0530 Subject: [PATCH 0655/1016] kgsl: Make ACV perf mode level target specific Currently, ACV perf mode level is hard-coded for each generation of GPUs. But, DDR frequency plan and recommended perf mode level can vary from target to target within same generation. Thus, introduce a new field in gpulist to specify the recommended perf mode level for each target. Change-Id: I264f40c444463cbedbe53d2e7c5f61bc116e935e Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 4 ++++ adreno_gen7.h | 7 ++++++- adreno_gen7_rpmh.c | 7 +++---- adreno_gen8.h | 7 ++++++- adreno_gen8_rpmh.c | 7 +++---- kgsl_pwrctrl.c | 17 +++++++++++++++++ kgsl_pwrctrl.h | 11 ++++++++++- 7 files changed, 49 insertions(+), 11 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 12e3b03011..1301e937cb 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -16,6 +16,8 @@ static const struct adreno_gpu_core adreno_gpu_core_##_name = { \ .features = ADRENO_DEPRECATED, \ } +#define MHZ_TO_KBPS(mhz, w) ((u64)(mhz * 1000000ULL * w) / (1024)) + DEFINE_DEPRECATED_CORE(a304, ADRENO_REV_A304, 4, 0, 5, ANY_ID); DEFINE_DEPRECATED_CORE(a306, ADRENO_REV_A306, 4, 0, 5, ANY_ID); DEFINE_DEPRECATED_CORE(a306a, ADRENO_REV_A306A, 4, 0, 5, ANY_ID); @@ -2316,6 +2318,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_9_0 = { .gen7_snapshot_block_list = &gen7_9_0_snapshot_block_list, .bcl_data = 1, .acv_perfmode_vote = BIT(2), + .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), .ctxt_record_size = (3572 * SZ_1K), .preempt_level = 1, .fast_bus_hint = true, @@ -2353,6 +2356,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_9_1 = { .gen7_snapshot_block_list = &gen7_9_0_snapshot_block_list, .bcl_data = 1, .acv_perfmode_vote = BIT(2), + .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), .ctxt_record_size = (3572 * SZ_1K), .preempt_level = 1, .fast_bus_hint = true, diff --git a/adreno_gen7.h b/adreno_gen7.h index 116977ac45..9e891a2b00 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ADRENO_GEN7_H_ @@ -115,6 +115,11 @@ struct adreno_gen7_core { u32 preempt_level; /** @qos_value: GPU qos value to set for each RB. */ const u32 *qos_value; + /** + * @acv_perfmode_ddr_freq: Vote perfmode when DDR frequency >= acv_perfmode_ddr_freq. + * If not specified, vote perfmode for highest DDR level only. + */ + u32 acv_perfmode_ddr_freq; /** @acv_perfmode_vote: ACV vote for GPU perfmode */ u32 acv_perfmode_vote; /** @rt_bus_hint: IB level hint for real time clients i.e. RB-0 */ diff --git a/adreno_gen7_rpmh.c b/adreno_gen7_rpmh.c index 62d46b703c..be6cac5b7c 100644 --- a/adreno_gen7_rpmh.c +++ b/adreno_gen7_rpmh.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -452,8 +452,6 @@ static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd, cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j]; } -#define GEN7_9_0_DDR_NOM_IDX 6 - static int build_bw_table(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); @@ -462,7 +460,8 @@ static int build_bw_table(struct adreno_device *adreno_dev) struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct rpmh_bw_votes *ddr, *cnoc = NULL; u32 perfmode_vote = gen7_core->acv_perfmode_vote; - u32 perfmode_lvl = adreno_is_gen7_9_x(adreno_dev) ? GEN7_9_0_DDR_NOM_IDX : 1; + u32 perfmode_lvl = perfmode_vote ? kgsl_pwrctrl_get_acv_perfmode_lvl(device, + gen7_core->acv_perfmode_ddr_freq) : 1; u32 *cnoc_table; u32 count; int ret; diff --git a/adreno_gen8.h b/adreno_gen8.h index 9aeff7d805..2134a932af 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ADRENO_GEN8_H_ @@ -130,6 +130,11 @@ struct adreno_gen8_core { u32 preempt_level; /** @qos_value: GPU qos value to set for each RB. */ const u32 *qos_value; + /** + * @acv_perfmode_ddr_freq: Vote perfmode when DDR frequency >= acv_perfmode_ddr_freq. + * If not specified, vote perfmode for highest DDR level only. + */ + u32 acv_perfmode_ddr_freq; /** @acv_perfmode_vote: ACV vote for GPU perfmode */ u32 acv_perfmode_vote; /** @rt_bus_hint: IB level hint for real time clients i.e. RB-0 */ diff --git a/adreno_gen8_rpmh.c b/adreno_gen8_rpmh.c index a8ee118517..b6024d9fcc 100644 --- a/adreno_gen8_rpmh.c +++ b/adreno_gen8_rpmh.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -452,8 +452,6 @@ static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd, cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j]; } -#define GEN8_DDR_NOM_IDX 6 - static int build_bw_table(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); @@ -462,7 +460,8 @@ static int build_bw_table(struct adreno_device *adreno_dev) struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct rpmh_bw_votes *ddr, *cnoc = NULL; u32 perfmode_vote = gen8_core->acv_perfmode_vote; - u32 perfmode_lvl = GEN8_DDR_NOM_IDX; + u32 perfmode_lvl = perfmode_vote ? kgsl_pwrctrl_get_acv_perfmode_lvl(device, + gen8_core->acv_perfmode_ddr_freq) : 1; u32 *cnoc_table; u32 count; int ret; diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 9f72977988..0f4661843f 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -2362,6 +2362,23 @@ int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device) return device->ftbl->gpu_clock_set(device, pwr->active_pwrlevel); } +u32 kgsl_pwrctrl_get_acv_perfmode_lvl(struct kgsl_device *device, u32 ddr_freq) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int i; + + if (!ddr_freq) + return (pwr->ddr_table_count - 1); + + for (i = 0; i < pwr->ddr_table_count; i++) { + if (pwr->ddr_table[i] >= ddr_freq) + return i; + } + + /* If DDR frequency is not found, vote perfmode for highest DDR level */ + return (pwr->ddr_table_count - 1); +} + int kgsl_gpu_num_freqs(void) { struct kgsl_device *device = kgsl_get_device(0); diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 383d85ee99..5286e74244 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_PWRCTRL_H #define __KGSL_PWRCTRL_H @@ -317,4 +317,13 @@ void kgsl_pwrctrl_disable_gx_gdsc(struct kgsl_device *device); * Return: 0 on success or negative error on failure */ int kgsl_pwrctrl_probe_gdscs(struct kgsl_device *device, struct platform_device *pdev); + +/** + * kgsl_pwrctrl_get_acv_perfmode_lvl - Retrieve DDR level for GPU performance mode + * @device: Pointer to the kgsl device + * @ddr_freq: Target specific DDR frequency from where GPU needs to vote for perf mode + * + * Return: DDR vote level from where GPU should vote for performance mode + */ +u32 kgsl_pwrctrl_get_acv_perfmode_lvl(struct kgsl_device *device, u32 ddr_freq); #endif /* __KGSL_PWRCTRL_H */ From 20e205f68bb70e91c34081324e150e886a68af53 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 8 Feb 2024 10:57:19 +0530 Subject: [PATCH 0656/1016] kgsl: gen8: Enable ACV perfmode vote for gen8_0_0 Enable ACV perfmode vote for gen8_0_0 for improved performance. Change-Id: If6eaa9e0d0129297c4d8dde19d5b933e9ffe279d Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 1 + adreno_gen8.h | 2 -- adreno_gen8_rpmh.c | 14 ++++++-------- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 1301e937cb..acdf048741 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2733,6 +2733,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .gen8_snapshot_block_list = &gen8_0_0_snapshot_block_list, .fast_bus_hint = true, .bcl_data = 1, + .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), }; /* GEN8_4_0 noncontext register list */ diff --git a/adreno_gen8.h b/adreno_gen8.h index 2134a932af..1dd902123c 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -135,8 +135,6 @@ struct adreno_gen8_core { * If not specified, vote perfmode for highest DDR level only. */ u32 acv_perfmode_ddr_freq; - /** @acv_perfmode_vote: ACV vote for GPU perfmode */ - u32 acv_perfmode_vote; /** @rt_bus_hint: IB level hint for real time clients i.e. RB-0 */ const u32 rt_bus_hint; /** @fast_bus_hint: Whether or not to increase IB vote on high ddr stall */ diff --git a/adreno_gen8_rpmh.c b/adreno_gen8_rpmh.c index b6024d9fcc..c3d59bec5b 100644 --- a/adreno_gen8_rpmh.c +++ b/adreno_gen8_rpmh.c @@ -452,6 +452,9 @@ static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd, cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j]; } +/* BIT(2) is used to vote for GPU performance mode through GMU */ +#define ACV_GPU_PERFMODE_VOTE BIT(2) + static int build_bw_table(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); @@ -459,19 +462,14 @@ static int build_bw_table(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct rpmh_bw_votes *ddr, *cnoc = NULL; - u32 perfmode_vote = gen8_core->acv_perfmode_vote; - u32 perfmode_lvl = perfmode_vote ? kgsl_pwrctrl_get_acv_perfmode_lvl(device, - gen8_core->acv_perfmode_ddr_freq) : 1; + u32 perfmode_lvl = kgsl_pwrctrl_get_acv_perfmode_lvl(device, + gen8_core->acv_perfmode_ddr_freq); u32 *cnoc_table; u32 count; int ret; - /* If perfmode vote is not defined, use default value as 0x8 */ - if (!perfmode_vote) - perfmode_vote = BIT(3); - ddr = build_rpmh_bw_votes(gen8_ddr_bcms, ARRAY_SIZE(gen8_ddr_bcms), - pwr->ddr_table, pwr->ddr_table_count, perfmode_vote, perfmode_lvl); + pwr->ddr_table, pwr->ddr_table_count, ACV_GPU_PERFMODE_VOTE, perfmode_lvl); if (IS_ERR(ddr)) return PTR_ERR(ddr); From a08967a47d3c29c172b05fb2bf152f0e1338dd5e Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Fri, 2 Feb 2024 16:24:22 -0800 Subject: [PATCH 0657/1016] kgsl: hwsched: Fix Fault header logging Add gx_on check before accessing the GX register in fault header. Also dump GX state information in fault header. Change-Id: I5aebd400d5772032ba9b8ca67143befdbf1f7cae Signed-off-by: Hareesh Gundu --- adreno_gen7.c | 40 +++++++++++++--------- adreno_gen8.c | 86 +++++++++++++++++++++++++----------------------- adreno_hwsched.c | 77 ++++++++++++++++++++++++++----------------- 3 files changed, 115 insertions(+), 88 deletions(-) diff --git a/adreno_gen7.c b/adreno_gen7.c index ee32898ba4..a2a743e4fc 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -2130,32 +2130,40 @@ static void gen7_lpac_fault_header(struct adreno_device *adreno_dev, { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_context *drawctxt_lpac; - u32 status; - u32 lpac_rptr, lpac_wptr, lpac_ib1sz, lpac_ib2sz; - u64 lpac_ib1base, lpac_ib2base; - - kgsl_regread(device, GEN7_RBBM_STATUS, &status); - kgsl_regread(device, GEN7_CP_LPAC_RB_RPTR, &lpac_rptr); - kgsl_regread(device, GEN7_CP_LPAC_RB_WPTR, &lpac_wptr); - kgsl_regread64(device, GEN7_CP_LPAC_IB1_BASE_HI, GEN7_CP_LPAC_IB1_BASE, &lpac_ib1base); - kgsl_regread(device, GEN7_CP_LPAC_IB1_REM_SIZE, &lpac_ib1sz); - kgsl_regread64(device, GEN7_CP_LPAC_IB2_BASE_HI, GEN7_CP_LPAC_IB2_BASE, &lpac_ib2base); - kgsl_regread(device, GEN7_CP_LPAC_IB2_REM_SIZE, &lpac_ib2sz); + u32 status = 0, lpac_rptr = 0, lpac_wptr = 0, lpac_ib1sz = 0, lpac_ib2sz = 0; + u64 lpac_ib1base = 0, lpac_ib2base = 0; + bool gx_on = adreno_gx_is_on(adreno_dev); drawctxt_lpac = ADRENO_CONTEXT(drawobj_lpac->context); drawobj_lpac->context->last_faulted_cmd_ts = drawobj_lpac->timestamp; drawobj_lpac->context->total_fault_count++; pr_context(device, drawobj_lpac->context, - "LPAC ctx %d ctx_type %s ts %d status %8.8X dispatch_queue=%d rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + "LPAC ctx %d ctx_type %s ts %d dispatch_queue=%d\n", drawobj_lpac->context->id, kgsl_context_type(drawctxt_lpac->type), - drawobj_lpac->timestamp, status, - drawobj_lpac->context->gmu_dispatch_queue, lpac_rptr, lpac_wptr, - lpac_ib1base, lpac_ib1sz, lpac_ib2base, lpac_ib2sz); + drawobj_lpac->timestamp, drawobj_lpac->context->gmu_dispatch_queue); pr_context(device, drawobj_lpac->context, "lpac cmdline: %s\n", drawctxt_lpac->base.proc_priv->cmdline); + if (!gx_on) + goto done; + kgsl_regread(device, GEN7_RBBM_STATUS, &status); + kgsl_regread(device, GEN7_CP_LPAC_RB_RPTR, &lpac_rptr); + kgsl_regread(device, GEN7_CP_LPAC_RB_WPTR, &lpac_wptr); + kgsl_regread64(device, GEN7_CP_LPAC_IB1_BASE_HI, + GEN7_CP_LPAC_IB1_BASE, &lpac_ib1base); + kgsl_regread(device, GEN7_CP_LPAC_IB1_REM_SIZE, &lpac_ib1sz); + kgsl_regread64(device, GEN7_CP_LPAC_IB2_BASE_HI, + GEN7_CP_LPAC_IB2_BASE, &lpac_ib2base); + kgsl_regread(device, GEN7_CP_LPAC_IB2_REM_SIZE, &lpac_ib2sz); + + pr_context(device, drawobj_lpac->context, + "LPAC: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + status, lpac_rptr, lpac_wptr, lpac_ib1base, + lpac_ib1sz, lpac_ib2base, lpac_ib2sz); + +done: trace_adreno_gpu_fault(drawobj_lpac->context->id, drawobj_lpac->timestamp, status, lpac_rptr, lpac_wptr, lpac_ib1base, lpac_ib1sz, lpac_ib2base, lpac_ib2sz, adreno_get_level(drawobj_lpac->context)); diff --git a/adreno_gen8.c b/adreno_gen8.c index 470e984522..38418b26ee 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -2300,8 +2300,23 @@ static void gen8_lpac_fault_header(struct adreno_device *adreno_dev, { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_context *drawctxt; - u32 status, rptr, wptr, ib1sz, ib2sz, ib3sz; - u64 ib1base, ib2base, ib3base; + u32 status = 0, rptr = 0, wptr = 0, ib1sz = 0, ib2sz = 0, ib3sz = 0; + u64 ib1base = 0, ib2base = 0, ib3base = 0; + bool gx_on = adreno_gx_is_on(adreno_dev); + + drawctxt = ADRENO_CONTEXT(drawobj->context); + drawobj->context->last_faulted_cmd_ts = drawobj->timestamp; + drawobj->context->total_fault_count++; + + pr_context(device, drawobj->context, "LPAC ctx %u ctx_type %s ts %u dispatch_queue=%d\n", + drawobj->context->id, kgsl_context_type(drawctxt->type), + drawobj->timestamp, drawobj->context->gmu_dispatch_queue); + + pr_context(device, drawobj->context, "lpac cmdline: %s\n", + drawctxt->base.proc_priv->cmdline); + + if (!gx_on) + goto done; kgsl_regread(device, GEN8_RBBM_LPAC_STATUS, &status); kgsl_regread(device, GEN8_CP_RB_RPTR_LPAC, &rptr); @@ -2317,22 +2332,11 @@ static void gen8_lpac_fault_header(struct adreno_device *adreno_dev, gen8_regread_aperture(device, GEN8_CP_IB3_REM_SIZE_PIPE, &ib3sz, PIPE_LPAC, 0, 0); gen8_host_aperture_set(adreno_dev, 0, 0, 0); - drawctxt = ADRENO_CONTEXT(drawobj->context); - drawobj->context->last_faulted_cmd_ts = drawobj->timestamp; - drawobj->context->total_fault_count++; - pr_context(device, drawobj->context, - "LPAC ctx %u ctx_type %s ts %u status %8.8X\n", - drawobj->context->id, kgsl_context_type(drawctxt->type), - drawobj->timestamp, status); - - pr_context(device, drawobj->context, - "LPAC: rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", - rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, ib3base, ib3sz); - - pr_context(device, drawobj->context, "lpac cmdline: %s\n", - drawctxt->base.proc_priv->cmdline); + "LPAC: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", + status, rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, ib3base, ib3sz); +done: trace_adreno_gpu_fault(drawobj->context->id, drawobj->timestamp, status, rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, adreno_get_level(drawobj->context)); @@ -2344,12 +2348,31 @@ static void gen8_fault_header(struct adreno_device *adreno_dev, { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_context *drawctxt; - u32 status, rptr, wptr, ib1sz, ib2sz, ib3sz, rptr_bv, ib1sz_bv, ib2sz_bv, ib3sz_bv; - u32 gfx_status, gfx_br_status, gfx_bv_status; - u64 ib1base, ib2base, ib3base, ib1base_bv, ib2base_bv, ib3base_bv; - u32 ctxt_id = 0; - u32 ts = 0; + u32 status = 0, rptr = 0, wptr = 0, ib1sz = 0, ib2sz = 0, ib3sz, rptr_bv = 0; + u32 ib1sz_bv = 0, ib2sz_bv = 0, ib3sz_bv, gfx_status, gfx_br_status, gfx_bv_status; + u64 ib1base = 0, ib2base = 0, ib3base, ib1base_bv = 0, ib2base_bv, ib3base_bv; + u32 ctxt_id = 0, ts = 0; int rb_id = -1; + bool gx_on = adreno_gx_is_on(adreno_dev); + + if (drawobj) { + drawctxt = ADRENO_CONTEXT(drawobj->context); + drawobj->context->last_faulted_cmd_ts = drawobj->timestamp; + drawobj->context->total_fault_count++; + ctxt_id = drawobj->context->id; + ts = drawobj->timestamp; + rb_id = adreno_get_level(drawobj->context); + + pr_context(device, drawobj->context, "ctx %u ctx_type %s ts %u\n", + drawobj->context->id, kgsl_context_type(drawctxt->type), + drawobj->timestamp); + + pr_context(device, drawobj->context, "cmdline: %s\n", + drawctxt->base.proc_priv->cmdline); + } + + if (!gx_on) + goto done; kgsl_regread(device, GEN8_RBBM_STATUS, &status); kgsl_regread(device, GEN8_RBBM_GFX_STATUS, &gfx_status); @@ -2378,26 +2401,6 @@ static void gen8_fault_header(struct adreno_device *adreno_dev, gen8_regread_aperture(device, GEN8_CP_IB3_REM_SIZE_PIPE, &ib3sz_bv, PIPE_BV, 0, 0); gen8_host_aperture_set(adreno_dev, 0, 0, 0); - if (drawobj) { - drawctxt = ADRENO_CONTEXT(drawobj->context); - drawobj->context->last_faulted_cmd_ts = drawobj->timestamp; - drawobj->context->total_fault_count++; - ctxt_id = drawobj->context->id; - ts = drawobj->timestamp; - rb_id = adreno_get_level(drawobj->context); - - pr_context(device, drawobj->context, - "ctx %u ctx_type %s ts %u\n", - drawobj->context->id, kgsl_context_type(drawctxt->type), - drawobj->timestamp); - - pr_context(device, drawobj->context, "cmdline: %s\n", - drawctxt->base.proc_priv->cmdline); - - trace_adreno_gpu_fault(drawobj->context->id, drawobj->timestamp, status, - rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, - adreno_get_level(drawobj->context)); - } dev_err(device->dev, "status %8.8X gfx_status %8.8X gfx_br_status %8.8X gfx_bv_status %8.8X\n", status, gfx_status, gfx_br_status, gfx_bv_status); @@ -2410,6 +2413,7 @@ static void gen8_fault_header(struct adreno_device *adreno_dev, "BV: rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", rptr_bv, wptr, ib1base_bv, ib1sz_bv, ib2base_bv, ib2sz_bv, ib3base_bv, ib3sz_bv); +done: trace_adreno_gpu_fault(ctxt_id, ts, status, rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, rb_id); } diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 14363e859f..52a0cdddbf 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1428,45 +1428,60 @@ void adreno_hwsched_replay(struct adreno_device *adreno_dev) } static void do_fault_header(struct adreno_device *adreno_dev, - struct kgsl_drawobj *drawobj) + struct kgsl_drawobj *drawobj, int fault) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); struct adreno_context *drawctxt; - u32 status, rptr, wptr, ib1sz, ib2sz; - u64 ib1base, ib2base; + u32 status = 0, rptr = 0, wptr = 0, ib1sz = 0, ib2sz = 0; + u64 ib1base = 0, ib2base = 0; + bool gx_on = adreno_gx_is_on(adreno_dev); + u32 ctxt_id = 0, ts = 0; + int rb_id = -1; + + dev_err(device->dev, "Fault id:%d and GX is %s\n", fault, gx_on ? "ON" : "OFF"); + + if (!gx_on && !drawobj) + return; if (gpudev->fault_header) return gpudev->fault_header(adreno_dev, drawobj); - adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS, &status); - adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr); - adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr); - adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE, - ADRENO_REG_CP_IB1_BASE_HI, &ib1base); - adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &ib1sz); - adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB2_BASE, - ADRENO_REG_CP_IB2_BASE_HI, &ib2base); - adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ib2sz); + if (gx_on) { + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS, &status); + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr); + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr); + adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE, + ADRENO_REG_CP_IB1_BASE_HI, &ib1base); + adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &ib1sz); + adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB2_BASE, + ADRENO_REG_CP_IB2_BASE_HI, &ib2base); + adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ib2sz); - drawctxt = ADRENO_CONTEXT(drawobj->context); - drawobj->context->last_faulted_cmd_ts = drawobj->timestamp; - drawobj->context->total_fault_count++; + dev_err(device->dev, + "status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + status, rptr, wptr, ib1base, ib1sz, ib2base, ib2sz); + } - pr_context(device, drawobj->context, - "ctx %u ctx_type %s ts %u status %8.8X dispatch_queue=%d rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", - drawobj->context->id, kgsl_context_type(drawctxt->type), - drawobj->timestamp, status, - drawobj->context->gmu_dispatch_queue, rptr, wptr, - ib1base, ib1sz, ib2base, ib2sz); + if (drawobj) { + drawctxt = ADRENO_CONTEXT(drawobj->context); + drawobj->context->last_faulted_cmd_ts = drawobj->timestamp; + drawobj->context->total_fault_count++; + ctxt_id = drawobj->context->id; + ts = drawobj->timestamp; + rb_id = adreno_get_level(drawobj->context); - pr_context(device, drawobj->context, "cmdline: %s\n", - drawctxt->base.proc_priv->cmdline); + pr_context(device, drawobj->context, + "ctx %u ctx_type %s ts %u dispatch_queue=%d\n", + drawobj->context->id, kgsl_context_type(drawctxt->type), + drawobj->timestamp, drawobj->context->gmu_dispatch_queue); - trace_adreno_gpu_fault(drawobj->context->id, drawobj->timestamp, status, - rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, - adreno_get_level(drawobj->context)); + pr_context(device, drawobj->context, + "cmdline: %s\n", drawctxt->base.proc_priv->cmdline); + } + trace_adreno_gpu_fault(ctxt_id, ts, status, rptr, wptr, ib1base, ib1sz, + ib2base, ib2sz, rb_id); } static struct cmd_list_obj *get_active_cmdobj_lpac( @@ -1732,7 +1747,7 @@ static void adreno_hwsched_reset_and_snapshot_legacy(struct adreno_device *adren context = drawobj->context; - do_fault_header(adreno_dev, drawobj); + do_fault_header(adreno_dev, drawobj, fault); kgsl_device_snapshot(device, context, NULL, false); @@ -1803,6 +1818,8 @@ static void adreno_hwsched_reset_and_snapshot(struct adreno_device *adreno_dev, drawobj = NULL; } + do_fault_header(adreno_dev, drawobj, fault); + if (!obj_lpac && (fault & ADRENO_IOMMU_PAGE_FAULT)) obj_lpac = get_active_cmdobj_lpac(adreno_dev); @@ -1814,10 +1831,8 @@ static void adreno_hwsched_reset_and_snapshot(struct adreno_device *adreno_dev, goto done; } - if (obj) { + if (obj) context = drawobj->context; - do_fault_header(adreno_dev, drawobj); - } if (obj_lpac) { drawobj_lpac = obj_lpac->drawobj; From 10aa42412b01b55f0d5fc13e0af4ea4fd4677b33 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Wed, 7 Feb 2024 15:52:26 -0800 Subject: [PATCH 0658/1016] =?UTF-8?q?kgsl:=20snapshot:=20Don=E2=80=99t=20u?= =?UTF-8?q?se=20CP=20crash=20dumper=20when=20GX=20is=20OFF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CP crash dumper resides under GPU GX domain which can’t be accessible without GX ON. Hence skip usage of CP crash dumper for cx_misc region when GX is OFF. Change-Id: Icc2d9465f82a8d40c372c81c59b28471600f5d2d Signed-off-by: Hareesh Gundu --- adreno_gen7_snapshot.c | 4 ++-- adreno_gen8_snapshot.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index eebfb624d9..40f3eb8ea9 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -1536,7 +1536,7 @@ static void gen7_cx_misc_regs_snapshot(struct kgsl_device *device, u64 *ptr, offset = 0; const u32 *regs_ptr = (const u32 *)gen7_snapshot_block_list->cx_misc_regs; - if (CD_SCRIPT_CHECK(device)) + if (CD_SCRIPT_CHECK(device) || !adreno_gx_is_on(ADRENO_DEVICE(device))) goto done; /* Build the crash script */ diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 75ee03f6ec..0b96821ac0 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -1566,7 +1566,7 @@ static void gen8_cx_misc_regs_snapshot(struct kgsl_device *device, u64 *ptr, offset = 0; const u32 *regs_ptr = (const u32 *)gen8_snapshot_block_list->cx_misc_regs; - if (CD_SCRIPT_CHECK(device)) + if (CD_SCRIPT_CHECK(device) || !adreno_gx_is_on(ADRENO_DEVICE(device))) goto legacy_snapshot; /* Build the crash script */ From 78b9c8993dab5b79cd649a1ca674296d808e886c Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 11 Jan 2024 17:22:00 +0530 Subject: [PATCH 0659/1016] kgsl: Add memory barrier after RBBM_SW_RESET Add wmb() after triggering RBBM_SW_RESET to make sure all register writes are posted before we turn off the power resources. Change-Id: I8f9704a393cb2244859ee0f761d0def6f866ab9e Signed-off-by: Pankaj Gupta --- adreno_a6xx_gmu.c | 5 ++++- adreno_gen7_gmu.c | 3 +++ adreno_gen8_gmu.c | 3 +++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 5e58de3d2f..250d9bbbf8 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1877,6 +1877,9 @@ static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) if (a6xx_gmu_gx_is_on(adreno_dev)) kgsl_regwrite(device, A6XX_RBBM_SW_RESET_CMD, 0x1); + /* Make sure above writes are posted before turning off power resources */ + wmb(); + /* Allow the software reset to complete */ udelay(100); diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 2f592d29bb..937cba6922 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1499,6 +1499,9 @@ static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) if (gen7_gmu_gx_is_on(adreno_dev)) kgsl_regwrite(device, GEN7_RBBM_SW_RESET_CMD, 0x1); + /* Make sure above writes are posted before turning off power resources */ + wmb(); + /* Allow the software reset to complete */ udelay(100); diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index b57a745350..2b59b7e3ac 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1392,6 +1392,9 @@ static void gen8_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) if (gen8_gmu_gx_is_on(adreno_dev)) kgsl_regwrite(device, GEN8_RBBM_SW_RESET_CMD, 0x1); + /* Make sure above writes are posted before turning off power resources */ + wmb(); + /* Allow the software reset to complete */ udelay(100); From c3880b4799866f94bd3a4da0c235015c695547c9 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 14 Feb 2024 14:21:09 +0530 Subject: [PATCH 0660/1016] kgsl: gen8: Disable LPAC auto promotion When SP runs LPAC workload in auto-promotion stage, there is a possibility of deadlock with graphics workload. Thus, disable LPAC auto promotion to avoid the hang. Change-Id: I6531013ff35f5d3a3636d0008dea1f7596de0294 Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 12e3b03011..834971c0bc 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2610,8 +2610,11 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { { GEN8_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) }, { GEN8_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, { GEN8_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, - /* Limit the number of wave-slots for Eviction buffer to 1 per ALU GRP */ - { GEN8_SP_CHICKEN_BITS_1, BIT(26), BIT(PIPE_NONE) }, + /* + * BIT(26): Limit the number of wave-slots for Eviction buffer to 1 per ALU GRP + * BIT(30): Disable LPAC auto-promotion + */ + { GEN8_SP_CHICKEN_BITS_1, BIT(26) | BIT(30), BIT(PIPE_NONE) }, { GEN8_SP_CHICKEN_BITS_2, 0x00800000, BIT(PIPE_NONE) }, { GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, @@ -2761,8 +2764,11 @@ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { { GEN8_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) }, { GEN8_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, { GEN8_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, - /* Limit the number of wave-slots for Eviction buffer to 1 per ALU GRP */ - { GEN8_SP_CHICKEN_BITS_1, BIT(26), BIT(PIPE_NONE) }, + /* + * BIT(26): Limit the number of wave-slots for Eviction buffer to 1 per ALU GRP + * BIT(30): Disable LPAC auto-promotion + */ + { GEN8_SP_CHICKEN_BITS_1, BIT(26) | BIT(30), BIT(PIPE_NONE) }, { GEN8_SP_CHICKEN_BITS_2, 0x00800000, BIT(PIPE_NONE) }, { GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, From 176e7d577051084fbe272a5f7d125f29f0385c07 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 18 Feb 2024 11:59:21 +0530 Subject: [PATCH 0661/1016] kgsl: Enable QCOM_KGSL_FENCE_TRACE on niobe consolidate build Enable QCOM_KGSL_FENCE_TRACE config on niobe consolidate build. Change-Id: I2b074189023272057c907effeedc7bac8f95bd7c Signed-off-by: Kamal Agrawal --- config/niobe_consolidate_gpuconf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/niobe_consolidate_gpuconf b/config/niobe_consolidate_gpuconf index 837c2df56d..07a106aa48 100644 --- a/config/niobe_consolidate_gpuconf +++ b/config/niobe_consolidate_gpuconf @@ -1,9 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only -# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. +# Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. CONFIG_QCOM_KGSL=m CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 CONFIG_QCOM_KGSL_SORT_POOL=y +CONFIG_QCOM_KGSL_FENCE_TRACE=y CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" From 55baa1b8fcb6db4c5502572eab7e7c4c49432a4c Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Thu, 25 Jan 2024 18:49:44 +0530 Subject: [PATCH 0662/1016] kgsl: Poll CX CFG GDSC register to ensure CX GDSC collapse Currently, KGSL polls for the PWR_ON bit transition from 1 to 0 to ensure CX GDSC collapse. However, this bit only indicates that the power-down sequence has begun and doesn't guarantee that the CX collapsed at the hardware level. To address this, we need to poll for the POWER_DOWN_COMPLETE bit of the CX CFG GDSC register. This ensures that the CX collapses properly at the hardware level. Thus, update the CX GDSC polling logic accordingly. Change-Id: I6cee228e50e6c9ca051163137e2c00399ee9f3b9 Signed-off-by: Sanjay Yadav --- a6xx_reg.h | 16 ++++++++-------- adreno_a6xx.c | 8 ++++---- adreno_gen7.c | 6 +++--- adreno_gen8.c | 2 +- gen7_reg.h | 6 +++--- gen8_reg.h | 4 ++-- kgsl_pwrctrl.c | 6 +++--- kgsl_pwrctrl.h | 6 +++--- 8 files changed, 27 insertions(+), 27 deletions(-) diff --git a/a6xx_reg.h b/a6xx_reg.h index eb38411c84..2adfd6aeaa 100644 --- a/a6xx_reg.h +++ b/a6xx_reg.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _A6XX_REG_H @@ -1128,15 +1128,15 @@ #define A6XX_GMU_CM3_BUSY_CYCLES 0 /* GPUCC registers */ -#define A6XX_GPU_CC_GX_GDSCR 0x24403 -#define A6XX_GPU_CC_GX_DOMAIN_MISC 0x24542 -#define A6XX_GPU_CC_GX_DOMAIN_MISC3 0x24563 -#define A6XX_GPU_CC_CX_GDSCR 0x2441B +#define A6XX_GPU_CC_GX_GDSCR 0x24403 +#define A6XX_GPU_CC_GX_DOMAIN_MISC 0x24542 +#define A6XX_GPU_CC_GX_DOMAIN_MISC3 0x24563 +#define A6XX_GPU_CC_CX_CFG_GDSCR 0x2441C /* GPUCC offsets are different for A662 */ -#define A662_GPU_CC_GX_GDSCR 0x26417 -#define A662_GPU_CC_GX_DOMAIN_MISC3 0x26541 -#define A662_GPU_CC_CX_GDSCR 0x26442 +#define A662_GPU_CC_GX_GDSCR 0x26417 +#define A662_GPU_CC_GX_DOMAIN_MISC3 0x26541 +#define A662_GPU_CC_CX_CFG_GDSCR 0x26443 /* GPU CPR registers */ #define A6XX_GPU_CPR_FSM_CTL 0x26801 diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 15abcc059c..6dad2201b3 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1901,9 +1901,9 @@ int a6xx_probe_common(struct platform_device *pdev, adreno_reg_offset_init(gpudev->reg_offsets); if (gmu_core_isenabled(device) && (gpudev != &adreno_a6xx_rgmu_gpudev)) - device->pwrctrl.cx_gdsc_offset = (adreno_is_a662(adreno_dev) || - adreno_is_a621(adreno_dev)) ? A662_GPU_CC_CX_GDSCR : - A6XX_GPU_CC_CX_GDSCR; + device->pwrctrl.cx_cfg_gdsc_offset = (adreno_is_a662(adreno_dev) || + adreno_is_a621(adreno_dev)) ? A662_GPU_CC_CX_CFG_GDSCR : + A6XX_GPU_CC_CX_CFG_GDSCR; adreno_dev->hwcg_enabled = true; adreno_dev->uche_client_pf = 1; diff --git a/adreno_gen7.c b/adreno_gen7.c index ee32898ba4..15b3cde4fe 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1628,8 +1628,8 @@ int gen7_probe_common(struct platform_device *pdev, kgsl_pwrscale_fast_bus_hint(gen7_core->fast_bus_hint); device->pwrctrl.rt_bus_hint = gen7_core->rt_bus_hint; - device->pwrctrl.cx_gdsc_offset = adreno_is_gen7_11_0(adreno_dev) ? - GEN7_11_0_GPU_CC_CX_GDSCR : GEN7_GPU_CC_CX_GDSCR; + device->pwrctrl.cx_cfg_gdsc_offset = adreno_is_gen7_11_0(adreno_dev) ? + GEN7_11_0_GPU_CC_CX_CFG_GDSCR : GEN7_GPU_CC_CX_CFG_GDSCR; ret = adreno_device_probe(pdev, adreno_dev); if (ret) diff --git a/adreno_gen8.c b/adreno_gen8.c index 470e984522..f1baf7f479 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1821,7 +1821,7 @@ int gen8_probe_common(struct platform_device *pdev, adreno_dev->uche_client_pf = 1; kgsl_pwrscale_fast_bus_hint(gen8_core->fast_bus_hint); - device->pwrctrl.cx_gdsc_offset = GEN8_GPU_CC_CX_GDSCR; + device->pwrctrl.cx_cfg_gdsc_offset = GEN8_GPU_CC_CX_CFG_GDSCR; device->pwrctrl.rt_bus_hint = gen8_core->rt_bus_hint; diff --git a/gen7_reg.h b/gen7_reg.h index fc005f7166..8a7f19e9c3 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _GEN7_REG_H @@ -1252,9 +1252,9 @@ #define GEN7_GMU_CM3_BUSY_CYCLES 0 /* GPUCC registers */ +#define GEN7_11_0_GPU_CC_CX_CFG_GDSCR 0x26424 +#define GEN7_GPU_CC_CX_CFG_GDSCR 0x26443 #define GEN7_GPU_CC_GX_DOMAIN_MISC3 0x26541 -#define GEN7_GPU_CC_CX_GDSCR 0x26442 -#define GEN7_11_0_GPU_CC_CX_GDSCR 0x26423 /* GPU RSC sequencer registers */ #define GEN7_GPU_RSCC_RSC_STATUS0_DRV0 0x00004 diff --git a/gen8_reg.h b/gen8_reg.h index 1985e758a6..95ec09009e 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _GEN8_REG_H @@ -1571,8 +1571,8 @@ #define GEN8_GMU_CM3_BUSY_CYCLES 0 /* GPUCC registers */ +#define GEN8_GPU_CC_CX_CFG_GDSCR 0x26421 #define GEN8_GPU_CC_GX_DOMAIN_MISC3 0x26541 -#define GEN8_GPU_CC_CX_GDSCR 0x26420 /* GPU RSC sequencer registers */ #define GEN8_GPU_RSCC_RSC_STATUS0_DRV0 0x00004 diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 9f72977988..a3ff073442 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1548,9 +1548,9 @@ static int kgsl_cx_gdsc_event(struct notifier_block *nb, if (pwr->cx_regulator && !(event & REGULATOR_EVENT_DISABLE)) return 0; - if (pwr->cx_gdsc_offset) { - if (kgsl_regmap_read_poll_timeout(&device->regmap, pwr->cx_gdsc_offset, - val, !(val & BIT(31)), 100, 100 * 1000)) + if (pwr->cx_cfg_gdsc_offset) { + if (kgsl_regmap_read_poll_timeout(&device->regmap, pwr->cx_cfg_gdsc_offset, + val, (val & BIT(15)), 100, 100 * 1000)) dev_err(device->dev, "GPU CX wait timeout.\n"); } diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 383d85ee99..abcef54056 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_PWRCTRL_H #define __KGSL_PWRCTRL_H @@ -121,8 +121,8 @@ struct kgsl_pwrctrl { struct completion cx_gdsc_gate; /** @cx_gdsc_wait: Whether to wait for cx gdsc to turn off */ bool cx_gdsc_wait; - /** @cx_gdsc_offset: Offset of CX GDSC register */ - u32 cx_gdsc_offset; + /** @cx_cfg_gdsc_offset: Offset of CX CFG GDSC register */ + u32 cx_cfg_gdsc_offset; int isense_clk_indx; int isense_clk_on_level; unsigned long power_flags; From f50f0b77e149ac7e0a6f9b9ffa711d6263c0e8d3 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 5 Feb 2024 11:00:39 -0800 Subject: [PATCH 0663/1016] kgsl: gen8: Log CP_PIPE_STATUS_PIPE on wait for lowest idle failure Add CP_PIPE_STATUS_PIPE register logging on wait for lowest idle failure to have more information of hardware state. Change-Id: Ia64e617cbab339277451c7c7b38d9619ea2343c8 Signed-off-by: Hareesh Gundu --- adreno_gen8.c | 2 +- adreno_gen8.h | 12 ++++++++++++ adreno_gen8_gmu.c | 17 +++++++++-------- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 470e984522..c77d7a3fd5 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -397,7 +397,7 @@ void gen8_get_gpu_feature_info(struct adreno_device *adreno_dev) adreno_dev->feature_fuse = feature_fuse; } -static void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id, +void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id, u32 slice_id, u32 use_slice_id) { struct gen8_device *gen8_dev = container_of(adreno_dev, diff --git a/adreno_gen8.h b/adreno_gen8.h index 2134a932af..bd7d4310a6 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -556,4 +556,16 @@ void gen8_regread64_aperture(struct kgsl_device *device, void gen8_regread_aperture(struct kgsl_device *device, u32 offsetwords, u32 *value, u32 pipe, u32 slice_id, u32 use_slice_id); + +/** + * gen8_host_aperture_set - Program CP aperture register + * @adreno_dev: Handle to the adreno device + * @pipe_id: Pipe for which the register is to be set + * @slice_id: Slice for which the register is to be set + * @use_slice_id: Set if the value to be read is from a sliced register + * + * This function programs CP aperture register + */ +void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id, + u32 slice_id, u32 use_slice_id); #endif diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index b57a745350..47ffe77f29 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -688,7 +688,7 @@ int gen8_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); - u32 reg, reg1, reg2, reg3, reg4, reg5,/* reg6,*/ reg7, reg8; + u32 reg, reg1, reg2, reg3, reg4, reg5; unsigned long t; u64 ts1, ts2; @@ -750,14 +750,15 @@ int gen8_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) /* Access GX registers only when GX is ON */ if (is_on(reg1)) { - //kgsl_regread(device, GEN8_CP_STATUS_1, ®6);// fEIXME - kgsl_regread(device, GEN8_CP_CP2GMU_STATUS, ®7); - kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_CNTL, ®8); + gen8_regread_aperture(device, GEN8_CP_PIPE_STATUS_PIPE, ®, PIPE_BV, 0, 0); + gen8_regread_aperture(device, GEN8_CP_PIPE_STATUS_PIPE, ®1, PIPE_BR, 0, 0); + /* Clear aperture register */ + gen8_host_aperture_set(adreno_dev, 0, 0, 0); + kgsl_regread(device, GEN8_CP_CP2GMU_STATUS, ®2); + kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_CNTL, ®3); - //dev_err(&gmu->pdev->dev, "GEN8_CP_STATUS_1=%x\n", reg6); - dev_err(&gmu->pdev->dev, - "CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n", - reg7, reg8); + dev_err(&gmu->pdev->dev, "GEN8_CP_PIPE_STATUS_PIPE BV:%x BR:%x\n", reg, reg1); + dev_err(&gmu->pdev->dev, "CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n", reg2, reg3); } WARN_ON(1); From 29d6bd8d19bee7ae4866dcc16e987e9b9693e14c Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Thu, 18 Jan 2024 12:45:55 -0800 Subject: [PATCH 0664/1016] kgsl: gen8: Fix snapshot preemption record dump issue Currently, preemption record is not dumped properly for gen8 hwsched targets due to incorrect offset calculation of preemption record mementry. Fix this by getting correct offset for preemption record and also increase default gen8 preemption record size as per hardware requirement. Change-Id: I27eaa18a986c215df2664a73f56a671eda769b44 Signed-off-by: Hareesh Gundu --- adreno_gen8.c | 5 +++-- adreno_gen8.h | 4 +++- adreno_gen8_hwsched.c | 16 ++++------------ adreno_gen8_snapshot.c | 11 +++-------- 4 files changed, 13 insertions(+), 23 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index c77d7a3fd5..f819f4f3c0 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1839,9 +1839,10 @@ int gen8_probe_common(struct platform_device *pdev, /* debugfs node for ACD calibration */ debugfs_create_file("acd_calibrate", 0644, device->d_debugfs, device, &acd_cal_fops); - /* Dump additional AQE 16KB data on top of default 96KB(48(BR)+48(BV)) */ + /* Dump additional AQE 16KB data on top of default 128KB(64(BR)+64(BV)) */ device->snapshot_ctxt_record_size = ADRENO_FEATURE(adreno_dev, ADRENO_AQE) ? - 112 * SZ_1K : 96 * SZ_1K; + (GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES + SZ_16K) : + GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES; return 0; } diff --git a/adreno_gen8.h b/adreno_gen8.h index bd7d4310a6..0291763bfa 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -201,7 +201,9 @@ struct gen8_cp_smmu_info { #define GEN8_CP_CTXRECORD_MAGIC_REF 0xae399d6eUL /* Size of each CP preemption record */ -#define GEN8_CP_CTXRECORD_SIZE_IN_BYTES (4192 * 1024) +#define GEN8_CP_CTXRECORD_SIZE_IN_BYTES (13536 * SZ_1K) +/* Size of preemption record to be dumped in snapshot */ +#define GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES (128 * 1024) /* Size of the user context record block (in bytes) */ #define GEN8_CP_CTXRECORD_USER_RESTORE_SIZE (192 * 1024) /* Size of the performance counter save/restore block (in bytes) */ diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index db1426896a..f54aa56bc3 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -98,15 +98,10 @@ static void gen8_hwsched_snapshot_preemption_record(struct kgsl_device *device, u8 *dest = snapshot->ptr + sizeof(*section_header); struct kgsl_snapshot_gpu_object_v2 *header = (struct kgsl_snapshot_gpu_object_v2 *)dest; - const struct adreno_gen8_core *gen8_core = to_gen8_core(ADRENO_DEVICE(device)); - u64 ctxt_record_size = GEN8_CP_CTXRECORD_SIZE_IN_BYTES; + u64 ctxt_record_size = max_t(u64, GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES, + device->snapshot_ctxt_record_size); size_t section_size; - if (gen8_core->ctxt_record_size) - ctxt_record_size = gen8_core->ctxt_record_size; - - ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size); - section_size = sizeof(*section_header) + sizeof(*header) + ctxt_record_size; if (snapshot->remain < section_size) { SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); @@ -135,13 +130,10 @@ static void gen8_hwsched_snapshot_preemption_record(struct kgsl_device *device, static void snapshot_preemption_records(struct kgsl_device *device, struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md) { - const struct adreno_gen8_core *gen8_core = - to_gen8_core(ADRENO_DEVICE(device)); - u64 ctxt_record_size = GEN8_CP_CTXRECORD_SIZE_IN_BYTES; + u64 ctxt_record_size = md->size; u64 offset; - if (gen8_core->ctxt_record_size) - ctxt_record_size = gen8_core->ctxt_record_size; + do_div(ctxt_record_size, KGSL_PRIORITY_MAX_RB_LEVELS); /* All preemption records exist as a single mem alloc entry */ for (offset = 0; offset < md->size; offset += ctxt_record_size) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 75ee03f6ec..aeea668889 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -1438,13 +1438,8 @@ static size_t snapshot_preemption_record(struct kgsl_device *device, struct kgsl_snapshot_gpu_object_v2 *header = (struct kgsl_snapshot_gpu_object_v2 *)buf; u8 *ptr = buf + sizeof(*header); - const struct adreno_gen8_core *gpucore = to_gen8_core(ADRENO_DEVICE(device)); - u64 ctxt_record_size = GEN8_CP_CTXRECORD_SIZE_IN_BYTES; - - if (gpucore->ctxt_record_size) - ctxt_record_size = gpucore->ctxt_record_size; - - ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size); + u64 ctxt_record_size = max_t(u64, GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES, + device->snapshot_ctxt_record_size); if (remain < (ctxt_record_size + sizeof(*header))) { SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); From 17d29f1cfe88eebb1368b330665bf5cc5787c3ea Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 2 Feb 2024 14:23:27 +0530 Subject: [PATCH 0665/1016] kgsl: gen8: Add support for frequency limiter interrupt In gen8, frequency limiter violations are reported through CX_MISC interrupt line. Add required changes to handle the frequency limiter interrupt. Change-Id: Ia5ec3014aad3ce185beb38c9fd5f2108f0ab7e69 Signed-off-by: Kamal Agrawal --- adreno.c | 10 ++-------- adreno_gen8.c | 22 ++++++++++++++++++++++ adreno_gen8.h | 5 +++++ adreno_gen8_gmu.c | 16 ++++++++++++++++ gen8_reg.h | 5 ++++- kgsl_device.h | 4 +++- kgsl_pwrctrl.h | 7 +++++++ 7 files changed, 59 insertions(+), 10 deletions(-) diff --git a/adreno.c b/adreno.c index 6186f15f3e..3481fa80e9 100644 --- a/adreno.c +++ b/adreno.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include @@ -419,14 +419,8 @@ static irqreturn_t adreno_irq_handler(int irq, void *data) static irqreturn_t adreno_freq_limiter_irq_handler(int irq, void *data) { struct kgsl_device *device = data; - struct kgsl_pwrctrl *pwr = &device->pwrctrl; - dev_err_ratelimited(device->dev, - "GPU req freq %u from prev freq %u unsupported for speed_bin: %d, soc_code: 0x%x\n", - pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq, - pwr->pwrlevels[pwr->previous_pwrlevel].gpu_freq, - device->speed_bin, - device->soc_code); + KGSL_PWRCTRL_LOG_FREQLIM(device); reset_control_reset(device->freq_limiter_irq_clear); diff --git a/adreno_gen8.c b/adreno_gen8.c index f819f4f3c0..b8752c0dab 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1802,6 +1802,25 @@ done: return ret; } +static irqreturn_t gen8_cx_host_irq_handler(int irq, void *data) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 status; + + adreno_cx_misc_regread(adreno_dev, GEN8_GPU_CX_MISC_INT_0_STATUS, &status); + adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_CLEAR_CMD, status); + + if (status & BIT(GEN8_CX_MISC_GPU_CC_IRQ)) + KGSL_PWRCTRL_LOG_FREQLIM(device); + + if (status & ~GEN8_CX_MISC_INT_MASK) + dev_err_ratelimited(device->dev, "Unhandled CX MISC interrupts 0x%lx\n", + status & ~GEN8_CX_MISC_INT_MASK); + + return IRQ_HANDLED; +} + int gen8_probe_common(struct platform_device *pdev, struct adreno_device *adreno_dev, u32 chipid, const struct adreno_gpu_core *gpucore) @@ -1825,6 +1844,9 @@ int gen8_probe_common(struct platform_device *pdev, device->pwrctrl.rt_bus_hint = gen8_core->rt_bus_hint; + device->cx_host_irq_num = kgsl_request_irq_optional(pdev, + "cx_host_irq", gen8_cx_host_irq_handler, device); + ret = adreno_device_probe(pdev, adreno_dev); if (ret) return ret; diff --git a/adreno_gen8.h b/adreno_gen8.h index 0291763bfa..dbe27b75c6 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -239,6 +239,11 @@ struct gen8_cp_smmu_info { (1 << GEN8_INT_OUTOFBOUNDACCESS) | \ (1 << GEN8_INT_UCHETRAPINTERRUPT)) +/* GEN8 CX MISC interrupt bits */ +#define GEN8_CX_MISC_GPU_CC_IRQ 31 + +#define GEN8_CX_MISC_INT_MASK BIT(GEN8_CX_MISC_GPU_CC_IRQ) + /** * to_gen8_core - return the gen8 specific GPU core struct * @adreno_dev: An Adreno GPU device handle diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 47ffe77f29..79426887ce 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -599,6 +599,14 @@ void gen8_gmu_irq_enable(struct adreno_device *adreno_dev) /* Enable all IRQs on host */ enable_irq(hfi->irq); enable_irq(gmu->irq); + + if (device->cx_host_irq_num <= 0) + return; + + /* Clear pending IRQs, unmask needed interrupts and enable CX host IRQ */ + adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_CLEAR_CMD, UINT_MAX); + adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_0_MASK, GEN8_CX_MISC_INT_MASK); + enable_irq(device->cx_host_irq_num); } void gen8_gmu_irq_disable(struct adreno_device *adreno_dev) @@ -617,6 +625,14 @@ void gen8_gmu_irq_disable(struct adreno_device *adreno_dev) gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, UINT_MAX); gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_CLR, UINT_MAX); + + if (device->cx_host_irq_num <= 0) + return; + + /* Disable CX host IRQ, mask all interrupts and clear pending IRQs */ + disable_irq(device->cx_host_irq_num); + adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_0_MASK, UINT_MAX); + adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_CLEAR_CMD, UINT_MAX); } static int gen8_gmu_hfi_start_msg(struct adreno_device *adreno_dev) diff --git a/gen8_reg.h b/gen8_reg.h index 1985e758a6..36989a66b7 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _GEN8_REG_H @@ -1591,6 +1591,9 @@ #define GEN8_SMMU_BASE 0x28000 /* GPU CX_MISC registers */ +#define GEN8_GPU_CX_MISC_INT_CLEAR_CMD 0x31 +#define GEN8_GPU_CX_MISC_INT_0_MASK 0x33 +#define GEN8_GPU_CX_MISC_INT_0_STATUS 0x34 #define GEN8_GPU_CX_MISC_AO_COUNTER_LO 0x80 #define GEN8_GPU_CX_MISC_AO_COUNTER_HI 0x81 #define GEN8_GPU_CX_MISC_SW_FUSE_VALUE 0x400 diff --git a/kgsl_device.h b/kgsl_device.h index 33fca3e031..dd567fba02 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_DEVICE_H #define __KGSL_DEVICE_H @@ -342,6 +342,8 @@ struct kgsl_device { struct reset_control *freq_limiter_irq_clear; /** @freq_limiter_intr_num: The interrupt number for freq limiter */ int freq_limiter_intr_num; + /** @cx_host_irq_num: Interrupt number for cx_host_irq */ + int cx_host_irq_num; }; #define KGSL_MMU_DEVICE(_mmu) \ diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 5286e74244..523b4af1a2 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -30,6 +30,13 @@ #define KGSL_XO_CLK_FREQ 19200000 #define KGSL_ISENSE_CLK_FREQ 200000000 +#define KGSL_PWRCTRL_LOG_FREQLIM(device) dev_err_ratelimited(device->dev, \ + "GPU req freq %u from prev freq %u unsupported for speed_bin: %d, soc_code: 0x%x\n", \ + device->pwrctrl.pwrlevels[device->pwrctrl.active_pwrlevel].gpu_freq, \ + device->pwrctrl.pwrlevels[device->pwrctrl.previous_pwrlevel].gpu_freq, \ + device->speed_bin, \ + device->soc_code) + struct platform_device; struct icc_path; From d195c2142f35fcbf28cdb210663b89bbd7f121df Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 5 Jan 2024 22:49:24 +0530 Subject: [PATCH 0666/1016] kgsl: gen8: Program QoS value for gen8_4_0 GPU Program the recommended QoS value required to enable dynamic QoS control feature on gen8_4_0 gpu. Change-Id: Ibb64035b1a9f99da6591721269f0325316387684 Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 12e3b03011..a404f21bff 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2790,6 +2790,10 @@ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { { 0 }, }; +static const u32 gen8_4_0_gbif_client_qos_values[KGSL_PRIORITY_MAX_RB_LEVELS] = { + 0x33233323, +}; + static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .base = { DEFINE_ADRENO_REV(ADRENO_REV_GEN8_4_0, @@ -2818,6 +2822,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .highest_bank_bit = 16, .gmu_hub_clk_freq = 200000000, .gen8_snapshot_block_list = &gen8_0_0_snapshot_block_list, + .qos_value = gen8_4_0_gbif_client_qos_values, }; static const struct adreno_gpu_core *adreno_gpulist[] = { From 5571e039497be570fd048336d013bef415a9ff0c Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 20 Feb 2024 21:37:34 +0530 Subject: [PATCH 0667/1016] kgsl: gen8: Enable ACV perfmode vote for gen8_4_0 Enable ACV perfmode vote for gen8_4_0 for improved performance. Change-Id: Ibf253c001b1d49adc01302a1f909b55c02dc25ad Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 1 + 1 file changed, 1 insertion(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index a404f21bff..120fa422c6 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2823,6 +2823,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .gmu_hub_clk_freq = 200000000, .gen8_snapshot_block_list = &gen8_0_0_snapshot_block_list, .qos_value = gen8_4_0_gbif_client_qos_values, + .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), }; static const struct adreno_gpu_core *adreno_gpulist[] = { From b11e1ca3c2996eb812256433a8ff8f5b1da48bbe Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Mon, 29 Jan 2024 12:28:52 -0800 Subject: [PATCH 0668/1016] msm: kgsl: DEBUG: Add debug logs for NoC error Add delay and debug logs in snapshot Signed-off-by: Urvashi Agrawal Change-Id: I0088ab497e8f4ad656fd0cc2b6d64ee436d05d82 Signed-off-by: Vaishali Gupta --- adreno_gen8.c | 3 +++ adreno_gen8_gmu_snapshot.c | 1 + 2 files changed, 4 insertions(+) diff --git a/adreno_gen8.c b/adreno_gen8.c index 70cc3244de..1e4e319974 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -412,6 +412,9 @@ static void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_CP_APERTURE_CNTL_HOST, aperture_val); + /* Add a barrier for memory writes to complete */ + mb(); + gen8_dev->aperture = aperture_val; } diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index 4d6250efb5..cf4eb5e7ad 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -294,6 +294,7 @@ void gen8_gmu_snapshot(struct adreno_device *adreno_dev, * register in snapshot to analyze the system state even in partial * snapshot dump */ + dev_err(device->dev, "Snapshot capture started \n"); gen8_snapshot_external_core_regs(device, snapshot); gen8_gmu_device_snapshot(device, snapshot); From e859160503d9cfaddcf10cac6b6fcbc336c97bb9 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Tue, 19 Dec 2023 08:18:11 -0800 Subject: [PATCH 0669/1016] msm: kgsl: Put VBO child refcount if unmap succeeds If the VBO range cannot be unmapped the underlying physical memory should not be freed. Enforce this by not decrementing the mem entry's refcount. Change-Id: Ia9d12589fbfa849ae7e2bef8e1cdbb9cf8f33364 Signed-off-by: Lynus Vaz Signed-off-by: Vaishali Gupta --- kgsl_vbo.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/kgsl_vbo.c b/kgsl_vbo.c index 92d3d84c94..a19a01b9f3 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -46,6 +46,12 @@ static struct kgsl_memdesc_bind_range *bind_range_create(u64 start, u64 last, return range; } +static void bind_range_destroy(struct kgsl_memdesc_bind_range *range) +{ + kgsl_mem_entry_put(range->entry); + kfree(range); +} + static u64 bind_range_len(struct kgsl_memdesc_bind_range *range) { return (range->range.last - range->range.start) + 1; @@ -114,8 +120,7 @@ static void kgsl_memdesc_remove_range(struct kgsl_mem_entry *target, kgsl_mmu_map_zero_page_to_range(memdesc->pagetable, memdesc, range->range.start, bind_range_len(range)); - kgsl_mem_entry_put(range->entry); - kfree(range); + bind_range_destroy(range); } } @@ -175,8 +180,7 @@ static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, } } - kgsl_mem_entry_put(cur->entry); - kfree(cur); + bind_range_destroy(cur); continue; } @@ -253,8 +257,7 @@ static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, return ret; error: - kgsl_mem_entry_put(range->entry); - kfree(range); + bind_range_destroy(range); mutex_unlock(&memdesc->ranges_lock); return ret; } @@ -264,6 +267,7 @@ static void kgsl_sharedmem_vbo_put_gpuaddr(struct kgsl_memdesc *memdesc) struct interval_tree_node *node, *next; struct kgsl_memdesc_bind_range *range; int ret = 0; + bool unmap_fail; /* * If the VBO maps the zero range then we can unmap the entire @@ -273,6 +277,8 @@ static void kgsl_sharedmem_vbo_put_gpuaddr(struct kgsl_memdesc *memdesc) ret = kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, 0, memdesc->size); + unmap_fail = ret; + /* * FIXME: do we have a use after free potential here? We might need to * lock this and set a "do not update" bit @@ -294,15 +300,16 @@ static void kgsl_sharedmem_vbo_put_gpuaddr(struct kgsl_memdesc *memdesc) range->range.start, range->range.last - range->range.start + 1); - /* If unmap failed, mark the child memdesc as still mapped */ - if (ret) - range->entry->memdesc.priv |= KGSL_MEMDESC_MAPPED; + /* Put the child's refcount if unmap succeeds */ + if (!ret) + bind_range_destroy(range); + else + kfree(range); - kgsl_mem_entry_put(range->entry); - kfree(range); + unmap_fail = unmap_fail || ret; } - if (ret) + if (unmap_fail) return; /* Put back the GPU address */ From 1cb0c3789d749dde487aca45abdc1b410c964d21 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Wed, 20 Dec 2023 13:52:55 -0800 Subject: [PATCH 0670/1016] msm: kgsl: Do not release dma and anon buffers if unmap fails If iommu unmap fails and leaves dma or anon buffers still mapped in the iommu, do not free them. Change-Id: Ice0e1a59c1ac0ee7a9d62d8899966b84fa63d5ca Signed-off-by: Lynus Vaz Signed-off-by: Vaishali Gupta --- kgsl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kgsl.c b/kgsl.c index c142097789..1431aee862 100644 --- a/kgsl.c +++ b/kgsl.c @@ -330,6 +330,9 @@ static void kgsl_destroy_ion(struct kgsl_memdesc *memdesc) struct kgsl_mem_entry, memdesc); struct kgsl_dma_buf_meta *metadata = entry->priv_data; + if (memdesc->priv & KGSL_MEMDESC_MAPPED) + return; + if (metadata != NULL) { remove_dmabuf_list(metadata); #if (KERNEL_VERSION(6, 2, 0) <= LINUX_VERSION_CODE) @@ -358,6 +361,9 @@ static void kgsl_destroy_anon(struct kgsl_memdesc *memdesc) struct scatterlist *sg; struct page *page; + if (memdesc->priv & KGSL_MEMDESC_MAPPED) + return; + for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) { page = sg_page(sg); for (j = 0; j < (sg->length >> PAGE_SHIFT); j++) { From 56b1ed9d2aacd0e1b7d32910b2becf9de63b2b1f Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 12 Jan 2024 11:16:42 -0800 Subject: [PATCH 0671/1016] msm: kgsl: Do not reclaim pages mapped in a VBO Pages mapped into a VBO can be accessible both through the memdesc as well as through the parent VBO. Reclaiming these pages would require a lot of overhead to update iommu mappings. Instead, do not reclaim the pages of a memdesc that is currently mapped in a VBO. Change-Id: Ic2787f09081c5dc3a66c3582b98266937c8ce1e5 Signed-off-by: Lynus Vaz Signed-off-by: Vaishali Gupta --- kgsl.c | 3 ++- kgsl.h | 4 +++- kgsl_reclaim.c | 9 ++++++++- kgsl_vbo.c | 15 +++++++++++++-- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/kgsl.c b/kgsl.c index 1431aee862..f8bbc6b515 100644 --- a/kgsl.c +++ b/kgsl.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -259,6 +259,7 @@ static struct kgsl_mem_entry *kgsl_mem_entry_create(void) /* put this ref in userspace memory alloc and map ioctls */ kref_get(&entry->refcount); atomic_set(&entry->map_count, 0); + atomic_set(&entry->vbo_count, 0); } return entry; diff --git a/kgsl.h b/kgsl.h index ad10700b32..335f50e6d1 100644 --- a/kgsl.h +++ b/kgsl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_H #define __KGSL_H @@ -362,6 +362,8 @@ struct kgsl_mem_entry { * debugfs accounting */ atomic_t map_count; + /** @vbo_count: Count how many VBO ranges this entry is mapped in */ + atomic_t vbo_count; }; struct kgsl_device_private; diff --git a/kgsl_reclaim.c b/kgsl_reclaim.c index 91713b7604..60823b95b2 100644 --- a/kgsl_reclaim.c +++ b/kgsl_reclaim.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -267,6 +267,13 @@ static u32 kgsl_reclaim_process(struct kgsl_process_private *process, continue; } + /* Do not reclaim pages mapped into a VBO */ + if (atomic_read(&valid_entry->vbo_count)) { + kgsl_mem_entry_put(entry); + next++; + continue; + } + if ((atomic_read(&process->unpinned_page_count) + memdesc->page_count) > kgsl_reclaim_max_page_limit) { kgsl_mem_entry_put(entry); diff --git a/kgsl_vbo.c b/kgsl_vbo.c index a19a01b9f3..bf72c139db 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -43,12 +43,16 @@ static struct kgsl_memdesc_bind_range *bind_range_create(u64 start, u64 last, return ERR_PTR(-EINVAL); } + atomic_inc(&entry->vbo_count); return range; } static void bind_range_destroy(struct kgsl_memdesc_bind_range *range) { - kgsl_mem_entry_put(range->entry); + struct kgsl_mem_entry *entry = range->entry; + + atomic_dec(&entry->vbo_count); + kgsl_mem_entry_put(entry); kfree(range); } @@ -361,8 +365,12 @@ static void kgsl_sharedmem_free_bind_op(struct kgsl_sharedmem_bind_op *op) if (IS_ERR_OR_NULL(op)) return; - for (i = 0; i < op->nr_ops; i++) + for (i = 0; i < op->nr_ops; i++) { + /* Decrement the vbo_count we added when creating the bind_op */ + if (op->ops[i].entry) + atomic_dec(&op->ops[i].entry->vbo_count); kgsl_mem_entry_put(op->ops[i].entry); + } kgsl_mem_entry_put(op->target); @@ -468,6 +476,9 @@ kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, goto err; } + /* Keep the child pinned in memory */ + atomic_inc(&entry->vbo_count); + /* Make sure the child is not a VBO */ if ((entry->memdesc.flags & KGSL_MEMFLAGS_VBO)) { ret = -EINVAL; From af451578ba1eac59e37def344678d5bd67e01e93 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Fri, 12 Jan 2024 12:03:58 -0700 Subject: [PATCH 0672/1016] kgsl: hwfence: Call fd_install after creating hw fence Once fd_install is done, userspace can predict the dma fence fd and call close(fd). And say the dma fence get signaled at the same time as well. Then both these operations will reduce the refcount of the dma fence to zero thereby freeing the kfence. This can cause use-after-free of the kfence in the hardware fence creation path. To avoid this, do fd_install after creating hw fence. Change-Id: Ib9c446562ff5199e469c7db9581518fb5a695e3f Signed-off-by: Harshdeep Dhatt Signed-off-by: Vaishali Gupta --- kgsl_sync.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kgsl_sync.c b/kgsl_sync.c index b4cff282d3..6810860618 100644 --- a/kgsl_sync.c +++ b/kgsl_sync.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012-2019, 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -487,11 +487,12 @@ int kgsl_add_fence_event(struct kgsl_device *device, ret = -EFAULT; goto out; } - fd_install(priv.fence_fd, kfence->sync_file->file); if (!retired) device->ftbl->create_hw_fence(device, kfence); + fd_install(priv.fence_fd, kfence->sync_file->file); + out: kgsl_context_put(context); if (ret) { From 36af7cf615a62a9d48fcbed73a363ac75ff44876 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Tue, 16 Jan 2024 16:58:01 -0700 Subject: [PATCH 0673/1016] kgsl: hwfence: Take detached context refcount When we encounter a failure during context detach, we move the pending hardware fences from this context to a list. After reset is complete, we trigger these fences one by one. As part of freeing these hardware fences, we decrement the drawctxt's hw_fence count. This can cause a use-after-free of the drawctxt structure since this context is already detached and may have been freed. To fix this, take the context refcount for each of these hardware fences in the detach path, and put them back when each of these hardware fences are triggered after recovery. Change-Id: I3077be6b07cc55c77329c06330433d9ece9569c8 Signed-off-by: Harshdeep Dhatt Signed-off-by: Vaishali Gupta --- adreno_gen7_hwsched.c | 7 ++++++- adreno_gen7_hwsched_hfi.c | 4 +++- adreno_gen8_hwsched.c | 7 ++++++- adreno_gen8_hwsched_hfi.c | 4 +++- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index da13a6078e..acf8cd29b3 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1678,6 +1678,7 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d { struct adreno_hw_fence_entry *entry, *tmp; struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + struct kgsl_context *context = NULL; int ret = 0; list_for_each_entry_safe(entry, tmp, &hfi->detached_hw_fence_list, node) { @@ -1691,7 +1692,11 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d if (ret) return ret; + context = &entry->drawctxt->base; + gen7_remove_hw_fence_entry(adreno_dev, entry); + + kgsl_context_put(context); } return ret; diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 21a0d3d074..41f380b5b6 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -3254,6 +3254,7 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; if ((timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)) { + _kgsl_context_get(&drawctxt->base); list_move_tail(&entry->node, &hfi->detached_hw_fence_list); continue; } @@ -3263,6 +3264,7 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d /* Also grab all the hardware fences which were never sent to GMU */ list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { + _kgsl_context_get(&drawctxt->base); list_move_tail(&entry->node, &hfi->detached_hw_fence_list); } } diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index c6f5b35646..795201c4a9 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1643,6 +1643,7 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d { struct adreno_hw_fence_entry *entry, *tmp; struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); + struct kgsl_context *context = NULL; int ret = 0; list_for_each_entry_safe(entry, tmp, &hfi->detached_hw_fence_list, node) { @@ -1656,7 +1657,11 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d if (ret) return ret; + context = &entry->drawctxt->base; + gen8_remove_hw_fence_entry(adreno_dev, entry); + + kgsl_context_put(context); } return ret; diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 1af4ea8ce5..f9d5f12b5f 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -3257,6 +3257,7 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; if ((timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)) { + _kgsl_context_get(&drawctxt->base); list_move_tail(&entry->node, &hfi->detached_hw_fence_list); continue; } @@ -3266,6 +3267,7 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d /* Also grab all the hardware fences which were never sent to GMU */ list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { + _kgsl_context_get(&drawctxt->base); list_move_tail(&entry->node, &hfi->detached_hw_fence_list); } } From 50d53cac916ed6ed1554c85defe9c1c5ba2a41b7 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 19 Jan 2024 08:38:17 -0800 Subject: [PATCH 0674/1016] msm: kgsl: Keep the timeline fence valid for logging The timeline fence needs to remain valid for logging purposes. Take an extra refcount on the timeline dma_fence to make sure it doesn't go away till we're done with it. Change-Id: I6670ef7add099a72684c1fe20ed009dff85d1f27 Signed-off-by: Lynus Vaz Signed-off-by: Vaishali Gupta --- kgsl_drawobj.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kgsl_drawobj.c b/kgsl_drawobj.c index 82233b30bf..ab98fa2170 100644 --- a/kgsl_drawobj.c +++ b/kgsl_drawobj.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ /* @@ -588,6 +588,8 @@ static int drawobj_add_sync_timeline(struct kgsl_device *device, /* Set pending flag before adding callback to avoid race */ set_bit(event->id, &syncobj->pending); + /* Get a dma_fence refcount to hand over to the callback */ + dma_fence_get(event->fence); ret = dma_fence_add_callback(event->fence, &event->cb, drawobj_sync_timeline_fence_callback); @@ -602,11 +604,16 @@ static int drawobj_add_sync_timeline(struct kgsl_device *device, ret = 0; } + /* Put the refcount from fence creation */ + dma_fence_put(event->fence); kgsl_drawobj_put(drawobj); return ret; } trace_syncpoint_timeline_fence(event->syncobj, event->fence, false); + + /* Put the refcount from fence creation */ + dma_fence_put(event->fence); return 0; } From e79a1524863aa51af201a64c4ea7ce2b4d0d2834 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Tue, 2 Jan 2024 19:39:16 +0530 Subject: [PATCH 0675/1016] kgsl: Prevent wrapped around VA range allocation Sanitize size param in gpumem_alloc_vbo_entry() to avoid integer overflow in _get_unmapped_area(). This overflow may end up in allocating a wrapped-around VA range, which can overlap with already in use VA. Change-Id: I5b4e74ce8c8e8e3323822efe40abf1f355fd3bc3 Signed-off-by: Pankaj Gupta Signed-off-by: Vaishali Gupta --- kgsl.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kgsl.c b/kgsl.c index f8bbc6b515..787d3324b3 100644 --- a/kgsl.c +++ b/kgsl.c @@ -4104,6 +4104,14 @@ static u64 cap_alignment(struct kgsl_device *device, u64 flags) return flags | FIELD_PREP(KGSL_MEMALIGN_MASK, align); } +static u64 gpumem_max_va_size(struct kgsl_pagetable *pt, u64 flags) +{ + if (flags & KGSL_MEMFLAGS_FORCE_32BIT) + return pt->compat_va_end - pt->compat_va_start; + + return pt->va_end - pt->va_start; +} + static struct kgsl_mem_entry * gpumem_alloc_vbo_entry(struct kgsl_device_private *dev_priv, u64 size, u64 flags) @@ -4112,11 +4120,9 @@ gpumem_alloc_vbo_entry(struct kgsl_device_private *dev_priv, struct kgsl_device *device = dev_priv->device; struct kgsl_memdesc *memdesc; struct kgsl_mem_entry *entry; + struct kgsl_pagetable *pt; int ret; - if (!size) - return ERR_PTR(-EINVAL); - /* Disallow specific flags */ if (flags & (KGSL_MEMFLAGS_GPUREADONLY | KGSL_CACHEMODE_MASK)) return ERR_PTR(-EINVAL); @@ -4135,6 +4141,12 @@ gpumem_alloc_vbo_entry(struct kgsl_device_private *dev_priv, if ((flags & KGSL_MEMFLAGS_SECURE) && !check_and_warn_secured(device)) return ERR_PTR(-EOPNOTSUPP); + pt = (flags & KGSL_MEMFLAGS_SECURE) ? + device->mmu.securepagetable : private->pagetable; + + if (!size || (size > gpumem_max_va_size(pt, flags))) + return ERR_PTR(-EINVAL); + flags = cap_alignment(device, flags); entry = kgsl_mem_entry_create(); From d9eced9fb61711a5a00ed1ec533946546134c1bc Mon Sep 17 00:00:00 2001 From: Ashok Gandla Date: Fri, 19 Jan 2024 16:14:27 +0530 Subject: [PATCH 0676/1016] kgsl: build: Add changes to compile graphics-kernel for Bengal Add changes to compile graphics-kernel for Bengal. Change-Id: Ib3fb1fc98d4a2cb28c07bf80cb3c3bad7710e1a9 Signed-off-by: Ashok Gandla --- Kbuild | 3 +++ config/gki_bengal.conf | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 config/gki_bengal.conf diff --git a/Kbuild b/Kbuild index 6b8cee33d1..c9caf3da20 100644 --- a/Kbuild +++ b/Kbuild @@ -64,6 +64,9 @@ endif ifeq ($(CONFIG_ARCH_NIOBE), y) include $(KGSL_PATH)/config/gki_niobe.conf endif +ifeq ($(CONFIG_ARCH_BENGAL), y) + include $(KGSL_PATH)/config/gki_bengal.conf +endif ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq diff --git a/config/gki_bengal.conf b/config/gki_bengal.conf new file mode 100644 index 0000000000..9692132550 --- /dev/null +++ b/config/gki_bengal.conf @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 +CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" +CONFIG_QCOM_KGSL_SORT_POOL = y + +ifneq ($(CONFIG_SHMEM),) + CONFIG_QCOM_KGSL_USE_SHMEM = y + CONFIG_QCOM_KGSL_PROCESS_RECLAIM = y +endif + +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ + -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ + -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" \ + -DCONFIG_QCOM_KGSL_SORT_POOL=1 + +ifneq ($(CONFIG_SHMEM),) + ccflags-y += -DCONFIG_QCOM_KGSL_USE_SHMEM=1 \ + -DCONFIG_QCOM_KGSL_PROCESS_RECLAIM=1 +endif From a88f39365efb26477d34a167821f398d665f2c64 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Fri, 16 Feb 2024 12:34:36 -0800 Subject: [PATCH 0677/1016] kgsl: gen8: Dump only 1 USPTP data in snapshot For gen8 targets every Usptp for any given SP will have the same data. So we do not need to dump all the usptp in the snapshot. This gives the added benefit of saving some space in snapshot. Change-Id: Iba0706208480d35c51692c2ab731a7ae429e3a32 Signed-off-by: Urvashi Agrawal --- adreno_gen8_0_0_snapshot.h | 190 ++++++++++++++++++------------------- 1 file changed, 95 insertions(+), 95 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index de1963dfa4..8b257b21fa 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -10,35 +10,35 @@ #include "adreno_gen8_snapshot.h" static struct gen8_shader_block gen8_0_0_shader_blocks[] = { - { TP0_TMO_DATA, 0x0200, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { TP0_SMO_DATA, 0x0080, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { TP0_MIPMAP_BASE_DATA, 0x0080, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_INST_DATA_3, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_INST_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_INST_DATA_1, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_0_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_1_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_2_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_3_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_4_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_5_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_6_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_7_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_CB_RAM, 0x0390, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_13_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_14_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_INST_TAG, 0x0100, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_INST_DATA_2, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_TMO_TAG, 0x0080, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_SMO_TAG, 0x0080, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_STATE_DATA, 0x0040, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_HWAVE_RAM, 0x0100, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_L0_INST_BUF, 0x0080, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_8_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_9_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_10_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_11_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_12_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { TP0_TMO_DATA, 0x0200, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { TP0_SMO_DATA, 0x0080, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { TP0_MIPMAP_BASE_DATA, 0x0080, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_INST_DATA_3, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_INST_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_INST_DATA_1, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_0_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_1_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_2_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_3_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_4_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_5_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_6_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_7_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_CB_RAM, 0x0390, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_13_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_14_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_INST_TAG, 0x0100, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_INST_DATA_2, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_TMO_TAG, 0x0080, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_SMO_TAG, 0x0080, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_STATE_DATA, 0x0040, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_HWAVE_RAM, 0x0100, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_L0_INST_BUF, 0x0080, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_8_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_9_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_10_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_11_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_LB_12_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, { HLSQ_DATAPATH_DSTR_META, 0x0170, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_DATAPATH_DSTR_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, { HLSQ_DESC_REMAP_META, 0x0018, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, @@ -1651,138 +1651,138 @@ static struct gen8_cluster_registers gen8_0_0_mvc_clusters[] = { }; static struct gen8_sptp_cluster_registers gen8_0_0_sptp_clusters[] = { - { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_BR, 0, HLSQ_STATE, + { CLUSTER_NONE, UNSLICE, 2, 1, SP_NCTX_REG, PIPE_BR, 0, HLSQ_STATE, gen8_0_0_non_context_sp_pipe_br_hlsq_state_registers, 0xae00}, - { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_BR, 0, SP_TOP, + { CLUSTER_NONE, UNSLICE, 2, 1, SP_NCTX_REG, PIPE_BR, 0, SP_TOP, gen8_0_0_non_context_sp_pipe_br_sp_top_registers, 0xae00}, - { CLUSTER_NONE, UNSLICE, 2, 2, SP_NCTX_REG, PIPE_BR, 0, USPTP, + { CLUSTER_NONE, UNSLICE, 2, 1, SP_NCTX_REG, PIPE_BR, 0, USPTP, gen8_0_0_non_context_sp_pipe_br_usptp_registers, 0xae00}, - { CLUSTER_NONE, UNSLICE, 2, 2, TP0_NCTX_REG, PIPE_BR, 0, USPTP, + { CLUSTER_NONE, UNSLICE, 2, 1, TP0_NCTX_REG, PIPE_BR, 0, USPTP, gen8_0_0_non_context_tpl1_pipe_br_usptp_registers, 0xb600}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP, gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP, gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_cctx_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_usptp_shared_const_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE, gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE, gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE, gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP, gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP, gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_cctx_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_shared_const_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP, gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP, gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_cctx_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE, gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE, gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE, gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP, gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP, gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_cctx_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, + { CLUSTER_SP_VS, SLICE, 2, 1, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_cctx_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_shared_const_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_usptp_shared_const_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_cctx_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_shared_const_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_DP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_DP, gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_dp_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP, gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP, gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_shared_const_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_cctx_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_shared_const_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, - { CLUSTER_SP_PS, SLICE, 2, 2, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP, + { CLUSTER_SP_PS, SLICE, 2, 1, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP, gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, - { CLUSTER_SP_VS, SLICE, 2, 2, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + { CLUSTER_SP_VS, SLICE, 2, 1, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, gen8_0_0_tpl1_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xb000}, - { CLUSTER_SP_VS, SLICE, 2, 2, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + { CLUSTER_SP_VS, SLICE, 2, 1, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, gen8_0_0_tpl1_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xb000}, - { CLUSTER_SP_VS, SLICE, 2, 2, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + { CLUSTER_SP_VS, SLICE, 2, 1, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, gen8_0_0_tpl1_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xb000}, - { CLUSTER_SP_VS, SLICE, 2, 2, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, + { CLUSTER_SP_VS, SLICE, 2, 1, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, gen8_0_0_tpl1_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xb000}, - { CLUSTER_SP_PS, SLICE, 2, 2, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + { CLUSTER_SP_PS, SLICE, 2, 1, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, gen8_0_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, - { CLUSTER_SP_PS, SLICE, 2, 2, TP0_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + { CLUSTER_SP_PS, SLICE, 2, 1, TP0_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, gen8_0_0_tpl1_slice_pipe_lpac_cluster_sp_ps_usptp_registers, 0xb000}, - { CLUSTER_SP_PS, SLICE, 2, 2, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + { CLUSTER_SP_PS, SLICE, 2, 1, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, gen8_0_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, - { CLUSTER_SP_PS, SLICE, 2, 2, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP, + { CLUSTER_SP_PS, SLICE, 2, 1, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP, gen8_0_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, - { CLUSTER_SP_PS, SLICE, 2, 2, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP, + { CLUSTER_SP_PS, SLICE, 2, 1, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP, gen8_0_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, }; From 91e944fdfd4d896da1b61dc86e7e1eba90494184 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Sat, 10 Feb 2024 00:37:15 +0530 Subject: [PATCH 0678/1016] kgsl: Clear fault while booting GMU There are some instances where another fault is reported while initial fault was being handled. In such cases hwsched will trigger a new recovery as soon as the device_mutex gets released when the first recovery completes. This happens because fault bit remains set if we get another fault during the first recovery. Fix this problem by clearing hwsched fault while booting GMU. Also clear the fault (hwsched or dispatcher) before starting the GMU so any failure during GMU boot can be captured. Change-Id: I4b375217559d3fa640b83bc5b29042839f66eebe Signed-off-by: Pankaj Gupta --- adreno_a6xx_gmu.c | 11 +++++++---- adreno_a6xx_hwsched.c | 9 ++++++--- adreno_a6xx_rgmu.c | 8 ++++---- adreno_gen7_gmu.c | 9 ++++++--- adreno_gen7_hwsched.c | 9 ++++++--- adreno_gen8_gmu.c | 9 ++++++--- adreno_gen8_hwsched.c | 9 ++++++--- adreno_hwsched.c | 8 ++++++++ adreno_hwsched.h | 8 ++++++++ 9 files changed, 57 insertions(+), 23 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 5e58de3d2f..bb57c4d215 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -2401,6 +2401,9 @@ static int a6xx_gmu_first_boot(struct adreno_device *adreno_dev) level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min; icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + ret = a6xx_gmu_device_start(adreno_dev); if (ret) goto err; @@ -2491,6 +2494,9 @@ static int a6xx_gmu_boot(struct adreno_device *adreno_dev) a6xx_gmu_irq_enable(adreno_dev); + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + ret = a6xx_gmu_device_start(adreno_dev); if (ret) goto err; @@ -3124,9 +3130,6 @@ static int a6xx_gpu_boot(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; - /* Clear any GPU faults that might have been left over */ - adreno_clear_gpu_fault(adreno_dev); - adreno_set_active_ctxs_null(adreno_dev); ret = kgsl_mmu_start(device); diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 4103eb5a1b..771ca83839 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -384,6 +384,9 @@ static int a6xx_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); + /* Clear any hwsched faults that might have been left over */ + adreno_hwsched_clear_fault(adreno_dev); + ret = a6xx_gmu_device_start(adreno_dev); if (ret) goto err; @@ -448,6 +451,9 @@ static int a6xx_hwsched_gmu_boot(struct adreno_device *adreno_dev) a6xx_gmu_irq_enable(adreno_dev); + /* Clear any hwsched faults that might have been left over */ + adreno_hwsched_clear_fault(adreno_dev); + ret = a6xx_gmu_device_start(adreno_dev); if (ret) goto err; @@ -584,9 +590,6 @@ static int a6xx_hwsched_gpu_boot(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; - /* Clear any GPU faults that might have been left over */ - adreno_clear_gpu_fault(adreno_dev); - ret = kgsl_mmu_start(device); if (ret) goto err; diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 2ab389baa1..469f92b35d 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -706,9 +706,6 @@ static int a6xx_gpu_boot(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; - /* Clear any GPU faults that might have been left over */ - adreno_clear_gpu_fault(adreno_dev); - adreno_set_active_ctxs_null(adreno_dev); ret = kgsl_mmu_start(device); @@ -788,6 +785,9 @@ static int a6xx_rgmu_boot(struct adreno_device *adreno_dev) a6xx_rgmu_irq_enable(adreno_dev); + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + ret = a6xx_rgmu_fw_start(adreno_dev, GMU_COLD_BOOT); if (ret) goto err; diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 2f592d29bb..ea79c35dc1 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1964,6 +1964,9 @@ static int gen7_gmu_first_boot(struct adreno_device *adreno_dev) level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min; icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + ret = gen7_gmu_device_start(adreno_dev); if (ret) goto err; @@ -2047,6 +2050,9 @@ static int gen7_gmu_boot(struct adreno_device *adreno_dev) gen7_gmu_irq_enable(adreno_dev); + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + ret = gen7_gmu_device_start(adreno_dev); if (ret) goto err; @@ -2770,9 +2776,6 @@ static int gen7_gpu_boot(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; - /* Clear any GPU faults that might have been left over */ - adreno_clear_gpu_fault(adreno_dev); - adreno_set_active_ctxs_null(adreno_dev); ret = kgsl_mmu_start(device); diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 754bb94bbf..9b6de1fbc9 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -532,6 +532,9 @@ static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); + /* Clear any hwsched faults that might have been left over */ + adreno_hwsched_clear_fault(adreno_dev); + ret = gen7_gmu_device_start(adreno_dev); if (ret) goto err; @@ -609,6 +612,9 @@ static int gen7_hwsched_gmu_boot(struct adreno_device *adreno_dev) gen7_gmu_irq_enable(adreno_dev); + /* Clear any hwsched faults that might have been left over */ + adreno_hwsched_clear_fault(adreno_dev); + ret = gen7_gmu_device_start(adreno_dev); if (ret) goto err; @@ -779,9 +785,6 @@ static int gen7_hwsched_gpu_boot(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; - /* Clear any GPU faults that might have been left over */ - adreno_clear_gpu_fault(adreno_dev); - ret = kgsl_mmu_start(device); if (ret) goto err; diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index b57a745350..a7825f8d97 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1829,6 +1829,9 @@ static int gen8_gmu_first_boot(struct adreno_device *adreno_dev) level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min; icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + ret = gen8_gmu_device_start(adreno_dev); if (ret) goto err; @@ -1903,6 +1906,9 @@ static int gen8_gmu_boot(struct adreno_device *adreno_dev) gen8_gmu_irq_enable(adreno_dev); + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + ret = gen8_gmu_device_start(adreno_dev); if (ret) goto err; @@ -2617,9 +2623,6 @@ static int gen8_gpu_boot(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; - /* Clear any GPU faults that might have been left over */ - adreno_clear_gpu_fault(adreno_dev); - adreno_set_active_ctxs_null(adreno_dev); ret = kgsl_mmu_start(device); diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index fe781d9ace..409edc234c 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -599,6 +599,9 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) gen8_hwsched_soccp_vote(adreno_dev, true); + /* Clear any hwsched faults that might have been left over */ + adreno_hwsched_clear_fault(adreno_dev); + ret = gen8_gmu_device_start(adreno_dev); if (ret) goto err; @@ -679,6 +682,9 @@ static int gen8_hwsched_gmu_boot(struct adreno_device *adreno_dev) gen8_hwsched_soccp_vote(adreno_dev, true); + /* Clear any hwsched faults that might have been left over */ + adreno_hwsched_clear_fault(adreno_dev); + ret = gen8_gmu_device_start(adreno_dev); if (ret) goto err; @@ -850,9 +856,6 @@ static int gen8_hwsched_gpu_boot(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; - /* Clear any GPU faults that might have been left over */ - adreno_clear_gpu_fault(adreno_dev); - ret = kgsl_mmu_start(device); if (ret) goto err; diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 0f4d3794eb..080ff4c3f0 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1939,6 +1939,14 @@ void adreno_hwsched_fault(struct adreno_device *adreno_dev, adreno_hwsched_trigger(adreno_dev); } +void adreno_hwsched_clear_fault(struct adreno_device *adreno_dev) +{ + atomic_set(&adreno_dev->hwsched.fault, 0); + + /* make sure other CPUs see the update */ + smp_wmb(); +} + static void adreno_hwsched_create_hw_fence(struct adreno_device *adreno_dev, struct kgsl_sync_fence *kfence) { diff --git a/adreno_hwsched.h b/adreno_hwsched.h index 72acdd82b0..99f98fd854 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -154,6 +154,14 @@ int adreno_hwsched_init(struct adreno_device *adreno_dev, */ void adreno_hwsched_fault(struct adreno_device *adreno_dev, u32 fault); +/** + * adreno_hwsched_clear_fault() - Clear the hwsched fault + * @adreno_dev: A pointer to an adreno_device structure + * + * Clear the hwsched fault status for adreno device + */ +void adreno_hwsched_clear_fault(struct adreno_device *adreno_dev); + /** * adreno_hwsched_parse_fault_ib - Parse the faulty submission * @adreno_dev: pointer to the adreno device From cc2a4d0125d75ac855cfc12c17cd20d5cabda318 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 28 Feb 2024 11:12:42 +0530 Subject: [PATCH 0679/1016] kgsl: Clean up snapshot header file Clean up the snapshot header file by removing macros and structures that are no longer in use. Change-Id: Iaaf99428e465d0cd886f2ae002433b49a66184a2 Signed-off-by: Kamal Agrawal --- kgsl_snapshot.h | 106 ++---------------------------------------------- 1 file changed, 3 insertions(+), 103 deletions(-) diff --git a/kgsl_snapshot.h b/kgsl_snapshot.h index fa8471f5fe..52679de924 100644 --- a/kgsl_snapshot.h +++ b/kgsl_snapshot.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _KGSL_SNAPSHOT_H_ @@ -39,18 +39,13 @@ struct kgsl_snapshot_section_header { #define KGSL_SNAPSHOT_SECTION_OS 0x0101 #define KGSL_SNAPSHOT_SECTION_REGS 0x0201 #define KGSL_SNAPSHOT_SECTION_REGS_V2 0x0202 -#define KGSL_SNAPSHOT_SECTION_RB 0x0301 #define KGSL_SNAPSHOT_SECTION_RB_V2 0x0302 -#define KGSL_SNAPSHOT_SECTION_IB 0x0401 #define KGSL_SNAPSHOT_SECTION_IB_V2 0x0402 #define KGSL_SNAPSHOT_SECTION_INDEXED_REGS 0x0501 #define KGSL_SNAPSHOT_SECTION_INDEXED_REGS_V2 0x0502 -#define KGSL_SNAPSHOT_SECTION_ISTORE 0x0801 #define KGSL_SNAPSHOT_SECTION_DEBUG 0x0901 #define KGSL_SNAPSHOT_SECTION_DEBUGBUS 0x0A01 -#define KGSL_SNAPSHOT_SECTION_GPU_OBJECT 0x0B01 #define KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2 0x0B02 -#define KGSL_SNAPSHOT_SECTION_MEMLIST 0x0E01 #define KGSL_SNAPSHOT_SECTION_MEMLIST_V2 0x0E02 #define KGSL_SNAPSHOT_SECTION_SHADER 0x1201 #define KGSL_SNAPSHOT_SECTION_SHADER_V2 0x1202 @@ -58,7 +53,6 @@ struct kgsl_snapshot_section_header { #define KGSL_SNAPSHOT_SECTION_MVC 0x1501 #define KGSL_SNAPSHOT_SECTION_MVC_V2 0x1502 #define KGSL_SNAPSHOT_SECTION_MVC_V3 0x1503 -#define KGSL_SNAPSHOT_SECTION_GMU 0x1601 #define KGSL_SNAPSHOT_SECTION_GMU_MEMORY 0x1701 #define KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS 0x1801 #define KGSL_SNAPSHOT_SECTION_TRACE_BUFFER 0x1901 @@ -67,46 +61,9 @@ struct kgsl_snapshot_section_header { #define KGSL_SNAPSHOT_SECTION_END 0xFFFF /* OS sub-section header */ -#define KGSL_SNAPSHOT_OS_LINUX 0x0001 -#define KGSL_SNAPSHOT_OS_LINUX_V3 0x00000202 #define KGSL_SNAPSHOT_OS_LINUX_V4 0x00000203 /* Linux OS specific information */ -struct kgsl_snapshot_linux { - int osid; /* subsection OS identifier */ - int state; /* 1 if the thread is running, 0 for hung */ - __u32 seconds; /* Unix timestamp for the snapshot */ - __u32 power_flags; /* Current power flags */ - __u32 power_level; /* Current power level */ - __u32 power_interval_timeout; /* Power interval timeout */ - __u32 grpclk; /* Current GP clock value */ - __u32 busclk; /* Current busclk value */ - __u32 ptbase; /* Current ptbase */ - __u32 pid; /* PID of the process that owns the PT */ - __u32 current_context; /* ID of the current context */ - __u32 ctxtcount; /* Number of contexts appended to section */ - unsigned char release[32]; /* kernel release */ - unsigned char version[32]; /* kernel version */ - unsigned char comm[16]; /* Name of the process that owns the PT */ -} __packed; - -struct kgsl_snapshot_linux_v2 { - int osid; /* subsection OS identifier */ - __u32 seconds; /* Unix timestamp for the snapshot */ - __u32 power_flags; /* Current power flags */ - __u32 power_level; /* Current power level */ - __u32 power_interval_timeout; /* Power interval timeout */ - __u32 grpclk; /* Current GP clock value */ - __u32 busclk; /* Current busclk value */ - __u64 ptbase; /* Current ptbase */ - __u32 pid; /* PID of the process that owns the PT */ - __u32 current_context; /* ID of the current context */ - __u32 ctxtcount; /* Number of contexts appended to section */ - unsigned char release[32]; /* kernel release */ - unsigned char version[32]; /* kernel version */ - unsigned char comm[16]; /* Name of the process that owns the PT */ -} __packed; - struct kgsl_snapshot_linux_v4 { int osid; /* subsection OS identifier */ __u32 seconds; /* Unix timestamp for the snapshot */ @@ -133,31 +90,14 @@ struct kgsl_snapshot_linux_v4 { * These are appended one after another in the OS section below * the header above */ - -struct kgsl_snapshot_linux_context { - __u32 id; /* The context ID */ - __u32 timestamp_queued; /* The last queued timestamp */ - __u32 timestamp_retired; /* The last timestamp retired by HW */ -}; - struct kgsl_snapshot_linux_context_v2 { __u32 id; /* The context ID */ __u32 timestamp_queued; /* The last queued timestamp */ __u32 timestamp_consumed; /* The last timestamp consumed by HW */ __u32 timestamp_retired; /* The last timestamp retired by HW */ }; -/* Ringbuffer sub-section header */ -struct kgsl_snapshot_rb { - int start; /* dword at the start of the dump */ - int end; /* dword at the end of the dump */ - int rbsize; /* Size (in dwords) of the ringbuffer */ - int wptr; /* Current index of the CPU write pointer */ - int rptr; /* Current index of the GPU read pointer */ - int count; /* Number of dwords in the dump */ - __u32 timestamp_queued; /* The last queued timestamp */ - __u32 timestamp_retired; /* The last timestamp retired by HW */ -} __packed; +/* Ringbuffer sub-section header */ struct kgsl_snapshot_rb_v2 { int start; /* dword at the start of the dump */ int end; /* dword at the end of the dump */ @@ -171,18 +111,6 @@ struct kgsl_snapshot_rb_v2 { __u32 id; /* Ringbuffer identifier */ } __packed; - -/* Replay or Memory list section, both sections have same header */ -struct kgsl_snapshot_replay_mem_list { - /* - * Number of IBs to replay for replay section or - * number of memory list entries for mem list section - */ - int num_entries; - /* Pagetable base to which the replay IBs or memory entries belong */ - __u32 ptbase; -} __packed; - /* Replay or Memory list section, both sections have same header */ struct kgsl_snapshot_mem_list_v2 { /* @@ -194,14 +122,6 @@ struct kgsl_snapshot_mem_list_v2 { __u64 ptbase; } __packed; - -/* Indirect buffer sub-section header */ -struct kgsl_snapshot_ib { - __u32 gpuaddr; /* GPU address of the the IB */ - __u32 ptbase; /* Base for the pagetable the GPU address is valid in */ - int size; /* Size of the IB */ -} __packed; - /* Indirect buffer sub-section header (v2) */ struct kgsl_snapshot_ib_v2 { __u64 gpuaddr; /* GPU address of the the IB */ @@ -275,22 +195,9 @@ struct kgsl_snapshot_mvc_regs_v3 { u32 usptp_id; } __packed; -/* Istore sub-section header */ -struct kgsl_snapshot_istore { - int count; /* Number of instructions in the istore */ -} __packed; - /* Debug data sub-section header */ -/* A2XX debug sections */ -#define SNAPSHOT_DEBUG_SX 1 -#define SNAPSHOT_DEBUG_CP 2 -#define SNAPSHOT_DEBUG_SQ 3 -#define SNAPSHOT_DEBUG_SQTHREAD 4 -#define SNAPSHOT_DEBUG_MIU 5 - -/* A3XX debug sections */ -#define SNAPSHOT_DEBUG_VPC_MEMORY 6 +/* A5XX debug sections */ #define SNAPSHOT_DEBUG_CP_MEQ 7 #define SNAPSHOT_DEBUG_CP_PM4_RAM 8 #define SNAPSHOT_DEBUG_CP_PFP_RAM 9 @@ -385,13 +292,6 @@ struct kgsl_snapshot_trace_buffer { #define SNAPSHOT_GPU_OBJECT_DRAW 4 #define SNAPSHOT_GPU_OBJECT_GLOBAL 5 -struct kgsl_snapshot_gpu_object { - int type; /* Type of GPU object */ - __u32 gpuaddr; /* GPU address of the the object */ - __u32 ptbase; /* Base for the pagetable the GPU address is valid in */ - int size; /* Size of the object (in dwords) */ -}; - struct kgsl_snapshot_gpu_object_v2 { int type; /* Type of GPU object */ __u64 gpuaddr; /* GPU address of the the object */ From cfbe7b4a6e4c7d65344e6472b53960874aa4ebed Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Wed, 28 Feb 2024 12:45:45 +0530 Subject: [PATCH 0680/1016] kgsl: Replace all instances of strlcpy() with strscpy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, we use the strlcpy() API to copy strings from one source to another. An obvious issue with strlcpy() is that it uses strlen() to read the source string and if the source string is not null-terminated, strlen() reads the string beyond the buffer, which could lead to safety issues. To fix this, we can switch to the strscpy() API. This API ensures that it doesn’t read memory from the source beyond the specified number of bytes, thereby enhancing the robustness and safety of our string copying operations. Change-Id: Ia130ce7e7807bdc170a098465242c038f7eab5b2 Signed-off-by: Sanjay Yadav --- adreno.c | 4 ++-- adreno_profile.c | 4 ++-- adreno_snapshot.c | 10 +++++----- kgsl_eventlog.c | 6 +++--- kgsl_sharedmem.c | 4 ++-- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/adreno.c b/adreno.c index 6186f15f3e..73bf6ca2f2 100644 --- a/adreno.c +++ b/adreno.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include @@ -2125,7 +2125,7 @@ static int adreno_prop_gpu_model(struct kgsl_device *device, { struct kgsl_gpu_model model = {0}; - strlcpy(model.gpu_model, adreno_get_gpu_model(device), + strscpy(model.gpu_model, adreno_get_gpu_model(device), sizeof(model.gpu_model)); return copy_prop(param, &model, sizeof(model)); diff --git a/adreno_profile.c b/adreno_profile.c index bf6a554625..2af913f656 100644 --- a/adreno_profile.c +++ b/adreno_profile.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -297,7 +297,7 @@ static bool _add_to_assignments_list(struct adreno_profile *profile, entry->offset = offset; entry->offset_hi = offset_hi; - strlcpy(entry->name, str, sizeof(entry->name)); + strscpy(entry->name, str, sizeof(entry->name)); profile->assignment_count++; diff --git a/adreno_snapshot.c b/adreno_snapshot.c index a17ac0380d..e37464e4d2 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -983,8 +983,8 @@ static void adreno_snapshot_os(struct kgsl_device *device, header->osid = KGSL_SNAPSHOT_OS_LINUX_V4; - strlcpy(header->release, init_utsname()->release, sizeof(header->release)); - strlcpy(header->version, init_utsname()->version, sizeof(header->version)); + strscpy(header->release, init_utsname()->release, sizeof(header->release)); + strscpy(header->version, init_utsname()->version, sizeof(header->version)); header->seconds = ktime_get_real_seconds(); header->power_flags = device->pwrctrl.power_flags; @@ -1001,14 +1001,14 @@ static void adreno_snapshot_os(struct kgsl_device *device, if (guilty) { header->current_context = guilty->id; header->pid = guilty->tid; - strlcpy(header->comm, guilty->proc_priv->comm, + strscpy(header->comm, guilty->proc_priv->comm, sizeof(header->comm)); } if (guilty_lpac) { header->current_context_lpac = guilty_lpac->id; header->pid_lpac = guilty_lpac->tid; - strlcpy(header->comm_lpac, guilty_lpac->proc_priv->comm, + strscpy(header->comm_lpac, guilty_lpac->proc_priv->comm, sizeof(header->comm_lpac)); } diff --git a/kgsl_eventlog.c b/kgsl_eventlog.c index 908efdccb4..f6678d4638 100644 --- a/kgsl_eventlog.c +++ b/kgsl_eventlog.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -192,7 +192,7 @@ void log_kgsl_syncpoint_fence_event(u32 id, char *fence_name) entry->id = id; memset(entry->name, 0, sizeof(entry->name)); - strlcpy(entry->name, fence_name, sizeof(entry->name)); + strscpy(entry->name, fence_name, sizeof(entry->name)); } void log_kgsl_syncpoint_fence_expire_event(u32 id, char *fence_name) @@ -208,7 +208,7 @@ void log_kgsl_syncpoint_fence_expire_event(u32 id, char *fence_name) entry->id = id; memset(entry->name, 0, sizeof(entry->name)); - strlcpy(entry->name, fence_name, sizeof(entry->name)); + strscpy(entry->name, fence_name, sizeof(entry->name)); } void log_kgsl_timeline_fence_alloc_event(u32 id, u64 seqno) diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index 1db9c2a5c2..a953d58e7e 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -964,7 +964,7 @@ void kgsl_get_memory_usage(char *name, size_t name_size, uint64_t memflags) for (i = 0; memtype_attrs[i]; i++) { memtype = container_of(memtype_attrs[i], struct kgsl_memtype, attr); if (memtype->type == type) { - strlcpy(name, memtype->attr.name, name_size); + strscpy(name, memtype->attr.name, name_size); return; } } From 55cbc2b271a23e1f505c1bf57cf140b96f3ef531 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 14 Feb 2024 17:12:57 +0530 Subject: [PATCH 0681/1016] kgsl: Make preemption context init function generation agnostic _preemption_context_init() logic is independent of gpu generation. Thus, move it to generation agnostic file to reduce code duplication. Change-Id: I0aec73d428c1f7ba06c56d798f4150e8e96ca387 Signed-off-by: Kamal Agrawal --- adreno.h | 4 +++- adreno_a6xx.c | 6 ------ adreno_a6xx.h | 6 +----- adreno_a6xx_preempt.c | 33 +------------------------------ adreno_drawctxt.c | 45 +++++++++++++++++++++++++++++++++++-------- adreno_gen7.c | 3 --- adreno_gen7.h | 4 ---- adreno_gen7_preempt.c | 33 +------------------------------ adreno_gen8.c | 2 -- adreno_gen8.h | 4 ---- adreno_gen8_preempt.c | 33 +------------------------------ 11 files changed, 44 insertions(+), 129 deletions(-) diff --git a/adreno.h b/adreno.h index fe1f24cc6e..503ed780f8 100644 --- a/adreno.h +++ b/adreno.h @@ -32,6 +32,9 @@ /* Index to preemption scratch buffer to store current QOS value */ #define QOS_VALUE_IDX KGSL_PRIORITY_MAX_RB_LEVELS +/* Size of the user context record block (in bytes) */ +#define ADRENO_CP_CTXRECORD_USER_RESTORE_SIZE (192 * SZ_1K) + /* ADRENO_DEVICE - Given a kgsl_device return the adreno device struct */ #define ADRENO_DEVICE(device) \ container_of(device, struct adreno_device, dev) @@ -910,7 +913,6 @@ struct adreno_gpudev { unsigned int prelevel, unsigned int postlevel, bool post); void (*preemption_schedule)(struct adreno_device *adreno_dev); - int (*preemption_context_init)(struct kgsl_context *context); void (*context_detach)(struct adreno_context *drawctxt); void (*pre_reset)(struct adreno_device *adreno_dev); void (*gpu_keepalive)(struct adreno_device *adreno_dev, diff --git a/adreno_a6xx.c b/adreno_a6xx.c index c45fac390a..2fbd77e0ee 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -2343,7 +2343,6 @@ const struct adreno_gpudev adreno_a6xx_gpudev = { .iommu_fault_block = a6xx_iommu_fault_block, .reset = a6xx_reset, .preemption_schedule = a6xx_preemption_schedule, - .preemption_context_init = a6xx_preemption_context_init, .read_alwayson = a6xx_read_alwayson, .power_ops = &adreno_power_operations, .clear_pending_transactions = a6xx_clear_pending_transactions, @@ -2364,7 +2363,6 @@ const struct a6xx_gpudev adreno_a6xx_hwsched_gpudev = { .snapshot = a6xx_hwsched_snapshot, .irq_handler = a6xx_hwsched_irq_handler, .iommu_fault_block = a6xx_iommu_fault_block, - .preemption_context_init = a6xx_preemption_context_init, .context_detach = a6xx_hwsched_context_detach, .read_alwayson = a6xx_read_alwayson, .reset = a6xx_hwsched_reset_replay, @@ -2396,7 +2394,6 @@ const struct a6xx_gpudev adreno_a6xx_gmu_gpudev = { .iommu_fault_block = a6xx_iommu_fault_block, .reset = a6xx_gmu_reset, .preemption_schedule = a6xx_preemption_schedule, - .preemption_context_init = a6xx_preemption_context_init, .read_alwayson = a6xx_read_alwayson, .power_ops = &a6xx_gmu_power_ops, .remove = a6xx_remove, @@ -2424,7 +2421,6 @@ const struct adreno_gpudev adreno_a6xx_rgmu_gpudev = { .iommu_fault_block = a6xx_iommu_fault_block, .reset = a6xx_rgmu_reset, .preemption_schedule = a6xx_preemption_schedule, - .preemption_context_init = a6xx_preemption_context_init, .read_alwayson = a6xx_read_alwayson, .power_ops = &a6xx_rgmu_power_ops, .remove = a6xx_remove, @@ -2451,7 +2447,6 @@ const struct adreno_gpudev adreno_a619_holi_gpudev = { .iommu_fault_block = a6xx_iommu_fault_block, .reset = a6xx_reset, .preemption_schedule = a6xx_preemption_schedule, - .preemption_context_init = a6xx_preemption_context_init, .read_alwayson = a6xx_read_alwayson, .power_ops = &adreno_power_operations, .clear_pending_transactions = a6xx_clear_pending_transactions, @@ -2479,7 +2474,6 @@ const struct a6xx_gpudev adreno_a630_gpudev = { .iommu_fault_block = a6xx_iommu_fault_block, .reset = a6xx_gmu_reset, .preemption_schedule = a6xx_preemption_schedule, - .preemption_context_init = a6xx_preemption_context_init, .read_alwayson = a6xx_read_alwayson, .power_ops = &a630_gmu_power_ops, .remove = a6xx_remove, diff --git a/adreno_a6xx.h b/adreno_a6xx.h index 35fec861a5..650611ccb5 100644 --- a/adreno_a6xx.h +++ b/adreno_a6xx.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ADRENO_A6XX_H_ @@ -156,8 +156,6 @@ struct a6xx_cp_smmu_info { #define A6XX_CP_CTXRECORD_MAGIC_REF 0xAE399D6EUL /* Size of each CP preemption record */ #define A6XX_CP_CTXRECORD_SIZE_IN_BYTES (2112 * 1024) -/* Size of the user context record block (in bytes) */ -#define A6XX_CP_CTXRECORD_USER_RESTORE_SIZE (192 * 1024) /* Size of the performance counter save/restore block (in bytes) */ #define A6XX_CP_PERFCOUNTER_SAVE_RESTORE_SIZE (4 * 1024) @@ -239,8 +237,6 @@ unsigned int a6xx_set_marker(unsigned int *cmds, void a6xx_preemption_callback(struct adreno_device *adreno_dev, int bit); -int a6xx_preemption_context_init(struct kgsl_context *context); - void a6xx_preemption_context_destroy(struct kgsl_context *context); void a6xx_snapshot(struct adreno_device *adreno_dev, diff --git a/adreno_a6xx_preempt.c b/adreno_a6xx_preempt.c index 82160e390f..771a0f4d67 100644 --- a/adreno_a6xx_preempt.c +++ b/adreno_a6xx_preempt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -760,34 +760,3 @@ int a6xx_preemption_init(struct adreno_device *adreno_dev) set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); return 0; } - -int a6xx_preemption_context_init(struct kgsl_context *context) -{ - struct kgsl_device *device = context->device; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - uint64_t flags = 0; - - if (!adreno_preemption_feature_set(adreno_dev)) - return 0; - - if (context->flags & KGSL_CONTEXT_SECURE) - flags |= KGSL_MEMFLAGS_SECURE; - - if (is_compat_task()) - flags |= KGSL_MEMFLAGS_FORCE_32BIT; - - /* - * gpumem_alloc_entry takes an extra refcount. Put it only when - * destroying the context to keep the context record valid - */ - context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv, - A6XX_CP_CTXRECORD_USER_RESTORE_SIZE, flags); - if (IS_ERR(context->user_ctxt_record)) { - int ret = PTR_ERR(context->user_ctxt_record); - - context->user_ctxt_record = NULL; - return ret; - } - - return 0; -} diff --git a/adreno_drawctxt.c b/adreno_drawctxt.c index 3402620305..705d0b7cef 100644 --- a/adreno_drawctxt.c +++ b/adreno_drawctxt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -306,6 +306,38 @@ void adreno_drawctxt_set_guilty(struct kgsl_device *device, adreno_drawctxt_invalidate(device, context); } +static int drawctxt_preemption_init(struct kgsl_context *context) +{ + struct kgsl_device *device = context->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u64 flags = 0; + + /* User context record is needed for a6x and beyond targets only */ + if (!adreno_preemption_feature_set(adreno_dev) || (ADRENO_GPUREV(adreno_dev) < 600)) + return 0; + + if (context->flags & KGSL_CONTEXT_SECURE) + flags |= KGSL_MEMFLAGS_SECURE; + + if (is_compat_task()) + flags |= KGSL_MEMFLAGS_FORCE_32BIT; + + /* + * gpumem_alloc_entry takes an extra refcount. Put it only when + * destroying the context to keep the context record valid + */ + context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv, + ADRENO_CP_CTXRECORD_USER_RESTORE_SIZE, flags); + if (IS_ERR(context->user_ctxt_record)) { + int ret = PTR_ERR(context->user_ctxt_record); + + context->user_ctxt_record = NULL; + return ret; + } + + return 0; +} + #define KGSL_CONTEXT_PRIORITY_MED 0x8 /** @@ -322,7 +354,6 @@ adreno_drawctxt_create(struct kgsl_device_private *dev_priv, struct adreno_context *drawctxt; struct kgsl_device *device = dev_priv->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); int ret; unsigned int local; @@ -432,12 +463,10 @@ adreno_drawctxt_create(struct kgsl_device_private *dev_priv, if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->setup_context) adreno_dev->dispatch_ops->setup_context(adreno_dev, drawctxt); - if (gpudev->preemption_context_init) { - ret = gpudev->preemption_context_init(&drawctxt->base); - if (ret != 0) { - kgsl_context_detach(&drawctxt->base); - return ERR_PTR(ret); - } + ret = drawctxt_preemption_init(&drawctxt->base); + if (ret) { + kgsl_context_detach(&drawctxt->base); + return ERR_PTR(ret); } /* copy back whatever flags we dediced were valid */ diff --git a/adreno_gen7.c b/adreno_gen7.c index 517ae4f643..63d0dabd84 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -2198,7 +2198,6 @@ const struct gen7_gpudev adreno_gen7_9_0_hwsched_gpudev = { .snapshot = gen7_hwsched_snapshot, .irq_handler = gen7_hwsched_irq_handler, .iommu_fault_block = gen7_iommu_fault_block, - .preemption_context_init = gen7_preemption_context_init, .context_detach = gen7_hwsched_context_detach, .read_alwayson = gen7_9_0_read_alwayson, .reset = gen7_hwsched_reset_replay, @@ -2228,7 +2227,6 @@ const struct gen7_gpudev adreno_gen7_hwsched_gpudev = { .snapshot = gen7_hwsched_snapshot, .irq_handler = gen7_hwsched_irq_handler, .iommu_fault_block = gen7_iommu_fault_block, - .preemption_context_init = gen7_preemption_context_init, .context_detach = gen7_hwsched_context_detach, .read_alwayson = gen7_read_alwayson, .reset = gen7_hwsched_reset_replay, @@ -2263,7 +2261,6 @@ const struct gen7_gpudev adreno_gen7_gmu_gpudev = { .iommu_fault_block = gen7_iommu_fault_block, .reset = gen7_gmu_reset, .preemption_schedule = gen7_preemption_schedule, - .preemption_context_init = gen7_preemption_context_init, .read_alwayson = gen7_read_alwayson, .power_ops = &gen7_gmu_power_ops, .remove = gen7_remove, diff --git a/adreno_gen7.h b/adreno_gen7.h index 11b701ad8c..ccfef0d777 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -187,8 +187,6 @@ struct gen7_cp_smmu_info { #define GEN7_CP_CTXRECORD_MAGIC_REF 0xae399d6eUL /* Size of each CP preemption record */ #define GEN7_CP_CTXRECORD_SIZE_IN_BYTES (4192 * 1024) -/* Size of the user context record block (in bytes) */ -#define GEN7_CP_CTXRECORD_USER_RESTORE_SIZE (192 * 1024) /* Size of the performance counter save/restore block (in bytes) */ #define GEN7_CP_PERFCOUNTER_SAVE_RESTORE_SIZE (4 * 1024) @@ -255,8 +253,6 @@ unsigned int gen7_set_marker(unsigned int *cmds, void gen7_preemption_callback(struct adreno_device *adreno_dev, int bit); -int gen7_preemption_context_init(struct kgsl_context *context); - void gen7_preemption_context_destroy(struct kgsl_context *context); void gen7_preemption_prepare_postamble(struct adreno_device *adreno_dev); diff --git a/adreno_gen7_preempt.c b/adreno_gen7_preempt.c index 41f60fb7ad..e52e7278a1 100644 --- a/adreno_gen7_preempt.c +++ b/adreno_gen7_preempt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -769,34 +769,3 @@ done: clear_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); return ret; } - -int gen7_preemption_context_init(struct kgsl_context *context) -{ - struct kgsl_device *device = context->device; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - u64 flags = 0; - - if (!adreno_preemption_feature_set(adreno_dev)) - return 0; - - if (context->flags & KGSL_CONTEXT_SECURE) - flags |= KGSL_MEMFLAGS_SECURE; - - if (is_compat_task()) - flags |= KGSL_MEMFLAGS_FORCE_32BIT; - - /* - * gpumem_alloc_entry takes an extra refcount. Put it only when - * destroying the context to keep the context record valid - */ - context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv, - GEN7_CP_CTXRECORD_USER_RESTORE_SIZE, flags); - if (IS_ERR(context->user_ctxt_record)) { - int ret = PTR_ERR(context->user_ctxt_record); - - context->user_ctxt_record = NULL; - return ret; - } - - return 0; -} diff --git a/adreno_gen8.c b/adreno_gen8.c index 51f0b2d724..53fad796d9 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -2708,7 +2708,6 @@ const struct gen8_gpudev adreno_gen8_hwsched_gpudev = { .snapshot = gen8_hwsched_snapshot, .irq_handler = gen8_hwsched_irq_handler, .iommu_fault_block = gen8_iommu_fault_block, - .preemption_context_init = gen8_preemption_context_init, .context_detach = gen8_hwsched_context_detach, .read_alwayson = gen8_read_alwayson, .reset = gen8_hwsched_reset_replay, @@ -2744,7 +2743,6 @@ const struct gen8_gpudev adreno_gen8_gmu_gpudev = { .iommu_fault_block = gen8_iommu_fault_block, .reset = gen8_gmu_reset, .preemption_schedule = gen8_preemption_schedule, - .preemption_context_init = gen8_preemption_context_init, .read_alwayson = gen8_read_alwayson, .power_ops = &gen8_gmu_power_ops, .remove = gen8_remove, diff --git a/adreno_gen8.h b/adreno_gen8.h index eb1f07d977..69d2f65d69 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -229,8 +229,6 @@ struct gen8_cp_smmu_info { #define GEN8_CP_CTXRECORD_SIZE_IN_BYTES (13536 * SZ_1K) /* Size of preemption record to be dumped in snapshot */ #define GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES (128 * 1024) -/* Size of the user context record block (in bytes) */ -#define GEN8_CP_CTXRECORD_USER_RESTORE_SIZE (192 * 1024) /* Size of the performance counter save/restore block (in bytes) */ #define GEN8_CP_PERFCOUNTER_SAVE_RESTORE_SIZE (4 * 1024) @@ -301,8 +299,6 @@ u32 gen8_set_marker(u32 *cmds, enum adreno_cp_marker_type type); void gen8_preemption_callback(struct adreno_device *adreno_dev, int bit); -int gen8_preemption_context_init(struct kgsl_context *context); - void gen8_preemption_context_destroy(struct kgsl_context *context); void gen8_preemption_prepare_postamble(struct adreno_device *adreno_dev); diff --git a/adreno_gen8_preempt.c b/adreno_gen8_preempt.c index cf8ecd8141..8d9432a7c2 100644 --- a/adreno_gen8_preempt.c +++ b/adreno_gen8_preempt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -774,34 +774,3 @@ done: clear_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); return ret; } - -int gen8_preemption_context_init(struct kgsl_context *context) -{ - struct kgsl_device *device = context->device; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - u64 flags = 0; - - if (!adreno_preemption_feature_set(adreno_dev)) - return 0; - - if (context->flags & KGSL_CONTEXT_SECURE) - flags |= KGSL_MEMFLAGS_SECURE; - - if (is_compat_task()) - flags |= KGSL_MEMFLAGS_FORCE_32BIT; - - /* - * gpumem_alloc_entry takes an extra refcount. Put it only when - * destroying the context to keep the context record valid - */ - context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv, - GEN8_CP_CTXRECORD_USER_RESTORE_SIZE, flags); - if (IS_ERR(context->user_ctxt_record)) { - int ret = PTR_ERR(context->user_ctxt_record); - - context->user_ctxt_record = NULL; - return ret; - } - - return 0; -} From 05b37871cff2fe67c9f3d0e18b0197b4904b0c20 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Wed, 24 Jan 2024 11:54:52 -0700 Subject: [PATCH 0682/1016] kgsl: hwfence: Fix indexing issue when handling F2H_MSG_SYNCOBJ_QUERY As part of handling F2H_MSG_SYNCOBJ_QUERY packet in kgsl, we currently are not ignoring signaled sw-fences that might be part of the queried sync object. This can be fixed by skipping any software dma fences in the sync object as part of handling F2H_MSG_SYNCOBJ_QUERY packet. However, a better fix is to keep one-to-one mapping between each hardware fence and its index when composing the H2F_MSG_ISSUE_SYNCOBJ packet. We can easily achieve this by introducing an array of hardware fences for each sync object. This also simplifies how we walk a sync object to find all the hardware fences in it. Define new macros to derive the maximum number of hardware fences that can be embedded in a single H2F_MSG_ISSUE_SYNCOBJ packet. If a sync object contains more than the capped number of hardware fences then set KGSL_SYNCOBJ_SW flag for this sync object. Change-Id: I4ab03fab82475e5d35ebb785b41a19265661aa45 Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched.c | 4 ++ adreno_gen7_hwsched_hfi.c | 107 +++++++++++------------------------- adreno_gen8_hwsched.c | 4 ++ adreno_gen8_hwsched_hfi.c | 110 +++++++++++--------------------------- adreno_hfi.h | 8 +++ adreno_hwsched.c | 43 +++++---------- kgsl_device.h | 5 ++ kgsl_drawobj.c | 1 + kgsl_drawobj.h | 4 +- kgsl_sync.c | 55 ++++++++++++------- kgsl_sync.h | 7 +++ 11 files changed, 142 insertions(+), 206 deletions(-) diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 754bb94bbf..ca52a35768 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -1859,6 +1859,10 @@ int gen7_hwsched_probe(struct platform_device *pdev, kgsl_mmu_set_feature(device, KGSL_MMU_PAGEFAULT_TERMINATE); + if (ADRENO_FEATURE(adreno_dev, ADRENO_HW_FENCE)) + device->max_syncobj_hw_fence_count = min_t(u32, HFI_SYNCOBJ_LEGACY_HW_FENCE_MAX, + MAX_SYNCOBJ_QUERY_BITS); + ret = adreno_hwsched_init(adreno_dev, &gen7_hwsched_ops); if (ret) dev_err(&pdev->dev, "adreno hardware scheduler init failed ret %d\n", ret); diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 3c397d8f06..37828850a3 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -4,7 +4,6 @@ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ -#include #include #include #include @@ -791,13 +790,13 @@ static bool fence_is_queried(struct hfi_syncobj_query_cmd *cmd, u32 fence_index) } static void set_fence_signal_bit(struct adreno_device *adreno_dev, - struct hfi_syncobj_query_cmd *reply, struct dma_fence *fence, u32 fence_index, - char *name) + struct hfi_syncobj_query_cmd *reply, struct dma_fence *fence, u32 fence_index) { u32 index = GET_QUERIED_FENCE_INDEX(fence_index); u32 bit = GET_QUERIED_FENCE_BIT(fence_index); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); u64 flags = ADRENO_HW_FENCE_SW_STATUS_PENDING; + char name[KGSL_FENCE_NAME_LEN]; char value[32] = "unknown"; if (fence->ops->timeline_value_str) @@ -810,6 +809,9 @@ static void set_fence_signal_bit(struct adreno_device *adreno_dev, reply->queries[index].query_bitmask |= BIT(bit); flags = ADRENO_HW_FENCE_SW_STATUS_SIGNALED; } + + kgsl_get_fence_name(fence, name, sizeof(name)); + trace_adreno_hw_fence_query(fence->context, fence->seqno, flags, name, value); } @@ -817,34 +819,17 @@ static void gen7_syncobj_query_reply(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj, struct hfi_syncobj_query_cmd *cmd) { struct hfi_syncobj_query_cmd reply = {0}; - int i, j, fence_index = 0; + int i; struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); - for (i = 0; i < syncobj->numsyncs; i++) { - struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i]; - struct kgsl_sync_fence_cb *kcb = event->handle; - struct dma_fence **fences; - struct dma_fence_array *array; - struct event_fence_info *info = event->priv; - u32 num_fences; + for (i = 0; i < syncobj->num_hw_fence; i++) { + struct dma_fence *fence = syncobj->hw_fences[i]; - array = to_dma_fence_array(kcb->fence); - if (array != NULL) { - num_fences = array->num_fences; - fences = array->fences; - } else { - num_fences = 1; - fences = &kcb->fence; - } + if (!fence_is_queried(cmd, i)) + continue; - for (j = 0; j < num_fences; j++, fence_index++) { - if (!fence_is_queried(cmd, fence_index)) - continue; - - set_fence_signal_bit(adreno_dev, &reply, fences[j], fence_index, - info ? info->fences[j].name : "unknown"); - } + set_fence_signal_bit(adreno_dev, &reply, fence, i); } reply.hdr = CREATE_MSG_HDR(F2H_MSG_SYNCOBJ_QUERY, HFI_MSG_CMD); @@ -3055,7 +3040,7 @@ int gen7_gmu_context_queue_write(struct adreno_device *adreno_dev, struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); trace_adreno_syncobj_submitted(drawobj->context->id, drawobj->timestamp, - syncobj->numsyncs, gpudev->read_alwayson(adreno_dev)); + syncobj->num_hw_fence, gpudev->read_alwayson(adreno_dev)); goto done; } @@ -3119,7 +3104,7 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj, void *cmdbuf) { struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); - int i, j; + int i; u32 cmd_sizebytes; struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); struct hfi_submit_syncobj *cmd; @@ -3140,61 +3125,31 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, cmd->num_syncobj = syncobj->num_hw_fence; obj = (struct hfi_syncobj_legacy *)&cmd[1]; - for (i = 0; i < syncobj->numsyncs; i++) { - struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i]; - struct kgsl_sync_fence_cb *kcb = event->handle; - struct dma_fence **fences; - struct dma_fence_array *array; - u32 num_fences; + for (i = 0; i < syncobj->num_hw_fence; i++) { + struct dma_fence *fence = syncobj->hw_fences[i]; - if (!kcb) - return -EINVAL; - - array = to_dma_fence_array(kcb->fence); - if (array != NULL) { - num_fences = array->num_fences; - fences = array->fences; + if (is_kgsl_fence(fence)) { + populate_kgsl_fence(obj, fence); } else { - num_fences = 1; - fences = &kcb->fence; - } + int ret = kgsl_hw_fence_add_waiter(device, fence, NULL); - for (j = 0; j < num_fences; j++) { - - /* - * If this sync object has a software only fence, make sure that it is - * already signaled so that we can skip sending this fence to the GMU. - */ - if (!kgsl_is_hw_fence(fences[j])) { - if (WARN(!dma_fence_is_signaled(fences[j]), - "sync object has unsignaled software fence")) - return -EINVAL; - continue; + if (ret) { + syncobj->flags &= ~KGSL_SYNCOBJ_HW; + return ret; } - if (is_kgsl_fence(fences[j])) { - populate_kgsl_fence(obj, fences[j]); - } else { - int ret = kgsl_hw_fence_add_waiter(device, fences[j], NULL); + if (kgsl_hw_fence_signaled(fence) || + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + obj->flags |= BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT); - if (ret) { - syncobj->flags &= ~KGSL_SYNCOBJ_HW; - return ret; - } - - if (kgsl_hw_fence_signaled(fences[j]) || - test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fences[j]->flags)) - obj->flags |= BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT); - - obj->ctxt_id = fences[j]->context; - obj->seq_no = fences[j]->seqno; - } - trace_adreno_input_hw_fence(drawobj->context->id, obj->ctxt_id, - obj->seq_no, obj->flags, fences[j]->ops->get_timeline_name ? - fences[j]->ops->get_timeline_name(fences[j]) : "unknown"); - - obj++; + obj->ctxt_id = fence->context; + obj->seq_no = fence->seqno; } + trace_adreno_input_hw_fence(drawobj->context->id, obj->ctxt_id, + obj->seq_no, obj->flags, fence->ops->get_timeline_name ? + fence->ops->get_timeline_name(fence) : "unknown"); + + obj++; } /* diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 5d901f93d4..0d5d021ca2 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -1900,6 +1900,10 @@ int gen8_hwsched_probe(struct platform_device *pdev, kgsl_mmu_set_feature(device, KGSL_MMU_PAGEFAULT_TERMINATE); + if (ADRENO_FEATURE(adreno_dev, ADRENO_HW_FENCE)) + device->max_syncobj_hw_fence_count = min_t(u32, HFI_SYNCOBJ_HW_FENCE_MAX, + MAX_SYNCOBJ_QUERY_BITS); + return adreno_hwsched_init(adreno_dev, &gen8_hwsched_ops); } diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 41757ffd9b..ab23788e9b 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -4,7 +4,6 @@ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ -#include #include #include #include @@ -783,13 +782,13 @@ static bool fence_is_queried(struct hfi_syncobj_query_cmd *cmd, u32 fence_index) } static void set_fence_signal_bit(struct adreno_device *adreno_dev, - struct hfi_syncobj_query_cmd *reply, struct dma_fence *fence, u32 fence_index, - char *name) + struct hfi_syncobj_query_cmd *reply, struct dma_fence *fence, u32 fence_index) { u32 index = GET_QUERIED_FENCE_INDEX(fence_index); u32 bit = GET_QUERIED_FENCE_BIT(fence_index); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); u64 flags = ADRENO_HW_FENCE_SW_STATUS_PENDING; + char name[KGSL_FENCE_NAME_LEN]; char value[32] = "unknown"; if (fence->ops->timeline_value_str) @@ -802,6 +801,8 @@ static void set_fence_signal_bit(struct adreno_device *adreno_dev, reply->queries[index].query_bitmask |= BIT(bit); flags = ADRENO_HW_FENCE_SW_STATUS_SIGNALED; } + kgsl_get_fence_name(fence, name, sizeof(name)); + trace_adreno_hw_fence_query(fence->context, fence->seqno, flags, name, value); } @@ -809,34 +810,17 @@ static void gen8_syncobj_query_reply(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj, struct hfi_syncobj_query_cmd *cmd) { struct hfi_syncobj_query_cmd reply = {0}; - int i, j, fence_index = 0; + int i; struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); - for (i = 0; i < syncobj->numsyncs; i++) { - struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i]; - struct kgsl_sync_fence_cb *kcb = event->handle; - struct dma_fence **fences; - struct dma_fence_array *array; - struct event_fence_info *info = event->priv; - u32 num_fences; + for (i = 0; i < syncobj->num_hw_fence; i++) { + struct dma_fence *fence = syncobj->hw_fences[i]; - array = to_dma_fence_array(kcb->fence); - if (array != NULL) { - num_fences = array->num_fences; - fences = array->fences; - } else { - num_fences = 1; - fences = &kcb->fence; - } + if (!fence_is_queried(cmd, i)) + continue; - for (j = 0; j < num_fences; j++, fence_index++) { - if (!fence_is_queried(cmd, fence_index)) - continue; - - set_fence_signal_bit(adreno_dev, &reply, fences[j], fence_index, - info ? info->fences[j].name : "unknown"); - } + set_fence_signal_bit(adreno_dev, &reply, fence, i); } reply.hdr = CREATE_MSG_HDR(F2H_MSG_SYNCOBJ_QUERY, HFI_MSG_CMD); @@ -2976,7 +2960,7 @@ int gen8_gmu_context_queue_write(struct adreno_device *adreno_dev, struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); trace_adreno_syncobj_submitted(drawobj->context->id, drawobj->timestamp, - syncobj->numsyncs, gpudev->read_alwayson(adreno_dev)); + syncobj->num_hw_fence, gpudev->read_alwayson(adreno_dev)); goto done; } @@ -3037,7 +3021,7 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj, void *cmdbuf) { struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); - int i, j; + int i; u32 cmd_sizebytes, seqnum; struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); struct hfi_submit_syncobj *cmd; @@ -3057,63 +3041,33 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, cmd->num_syncobj = syncobj->num_hw_fence; obj = (struct hfi_syncobj *)&cmd[1]; - for (i = 0; i < syncobj->numsyncs; i++) { - struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i]; - struct kgsl_sync_fence_cb *kcb = event->handle; - struct dma_fence **fences; - struct dma_fence_array *array; - u32 num_fences; + for (i = 0; i < syncobj->num_hw_fence; i++) { + struct dma_fence *fence = syncobj->hw_fences[i]; - if (!kcb) - return -EINVAL; - - array = to_dma_fence_array(kcb->fence); - if (array != NULL) { - num_fences = array->num_fences; - fences = array->fences; + if (is_kgsl_fence(fence)) { + populate_kgsl_fence(obj, fence); } else { - num_fences = 1; - fences = &kcb->fence; - } + int ret = kgsl_hw_fence_add_waiter(device, fence, + &obj->hash_index); - for (j = 0; j < num_fences; j++) { - - /* - * If this sync object has a software only fence, make sure that it is - * already signaled so that we can skip sending this fence to the GMU. - */ - if (!kgsl_is_hw_fence(fences[j])) { - if (WARN(!dma_fence_is_signaled(fences[j]), - "sync object has unsignaled software fence")) - return -EINVAL; - continue; + if (ret) { + syncobj->flags &= ~KGSL_SYNCOBJ_HW; + return ret; } - if (is_kgsl_fence(fences[j])) { - populate_kgsl_fence(obj, fences[j]); - } else { - int ret = kgsl_hw_fence_add_waiter(device, fences[j], - &obj->hash_index); + if (kgsl_hw_fence_signaled(fence) || + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + obj->flags |= BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT); - if (ret) { - syncobj->flags &= ~KGSL_SYNCOBJ_HW; - return ret; - } - - if (kgsl_hw_fence_signaled(fences[j]) || - test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fences[j]->flags)) - obj->flags |= BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT); - - obj->ctxt_id = fences[j]->context; - obj->seq_no = fences[j]->seqno; - } - trace_adreno_input_hw_fence(drawobj->context->id, obj->ctxt_id, - obj->seq_no, obj->flags, fences[j]->ops->get_timeline_name ? - fences[j]->ops->get_timeline_name(fences[j]) : "unknown"); - - obj->header = FIELD_PREP(GENMASK(15, 0), sizeof(*obj) >> 2); - obj++; + obj->ctxt_id = fence->context; + obj->seq_no = fence->seqno; } + trace_adreno_input_hw_fence(drawobj->context->id, obj->ctxt_id, + obj->seq_no, obj->flags, fence->ops->get_timeline_name ? + fence->ops->get_timeline_name(fence) : "unknown"); + + obj->header = FIELD_PREP(GENMASK(15, 0), sizeof(*obj) >> 2); + obj++; } /* diff --git a/adreno_hfi.h b/adreno_hfi.h index 75215e97f5..385366b005 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -992,6 +992,14 @@ struct hfi_submit_syncobj { u32 num_syncobj; } __packed; +#define HFI_SYNCOBJ_LEGACY_HW_FENCE_MAX \ + ((HFI_MAX_MSG_SIZE - sizeof(struct hfi_submit_syncobj)) \ + / sizeof(struct hfi_syncobj_legacy)) + +#define HFI_SYNCOBJ_HW_FENCE_MAX \ + ((HFI_MAX_MSG_SIZE - sizeof(struct hfi_submit_syncobj)) \ + / sizeof(struct hfi_syncobj)) + struct hfi_log_block { u32 hdr; u32 version; diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 0cad8a113e..816023652a 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -4,9 +4,6 @@ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ -#include -#include - #include "adreno.h" #include "adreno_hfi.h" #include "adreno_snapshot.h" @@ -14,6 +11,7 @@ #include "adreno_trace.h" #include "kgsl_timeline.h" #include +#include /* * Number of commands that can be queued in a context before it sleeps @@ -1639,39 +1637,22 @@ static bool context_is_throttled(struct kgsl_device *device, static void _print_syncobj(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { - int i, j, fence_index = 0; + int i; struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - for (i = 0; i < syncobj->numsyncs; i++) { - struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i]; - struct kgsl_sync_fence_cb *kcb = event->handle; - struct dma_fence **fences; - struct dma_fence_array *array; - u32 num_fences; + for (i = 0; i < syncobj->num_hw_fence; i++) { + struct dma_fence *fence = syncobj->hw_fences[i]; + bool kgsl = is_kgsl_fence(fence); + bool signaled = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags); + char value[32] = "unknown"; - array = to_dma_fence_array(kcb->fence); - if (array != NULL) { - num_fences = array->num_fences; - fences = array->fences; - } else { - num_fences = 1; - fences = &kcb->fence; - } + if (fence->ops->timeline_value_str) + fence->ops->timeline_value_str(fence, value, sizeof(value)); - for (j = 0; j < num_fences; j++, fence_index++) { - bool kgsl = is_kgsl_fence(fences[j]); - bool signaled = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fences[j]->flags); - char value[32] = "unknown"; - - if (fences[j]->ops->timeline_value_str) - fences[j]->ops->timeline_value_str(fences[j], value, sizeof(value)); - - dev_err(device->dev, - "dma fence[%d] signaled:%d kgsl:%d ctx:%llu seqno:%llu value:%s\n", - fence_index, signaled, kgsl, fences[j]->context, fences[j]->seqno, - value); - } + dev_err(device->dev, + "dma fence[%d] signaled:%d kgsl:%d ctx:%llu seqno:%llu value:%s\n", + i, signaled, kgsl, fence->context, fence->seqno, value); } } diff --git a/kgsl_device.h b/kgsl_device.h index dd567fba02..6b2d4300ea 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -344,6 +344,11 @@ struct kgsl_device { int freq_limiter_intr_num; /** @cx_host_irq_num: Interrupt number for cx_host_irq */ int cx_host_irq_num; + /** + * @max_syncobj_hw_fence_count: Maximum number of hardware fences that are allowed in a sync + * object + */ + u32 max_syncobj_hw_fence_count; }; #define KGSL_MMU_DEVICE(_mmu) \ diff --git a/kgsl_drawobj.c b/kgsl_drawobj.c index ab98fa2170..23f091997c 100644 --- a/kgsl_drawobj.c +++ b/kgsl_drawobj.c @@ -65,6 +65,7 @@ static void syncobj_destroy_object(struct kgsl_drawobj *drawobj) } } + kfree(syncobj->hw_fences); kfree(syncobj->synclist); kfree(syncobj); } diff --git a/kgsl_drawobj.h b/kgsl_drawobj.h index b32ba58873..eb3b40b1cb 100644 --- a/kgsl_drawobj.h +++ b/kgsl_drawobj.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_DRAWOBJ_H @@ -116,6 +116,8 @@ struct kgsl_drawobj_sync { u32 flags; /** @num_hw_fence: number of hw fences in this syncobj */ u32 num_hw_fence; + /** @hw_fences: Array to hold pointers to hardware fences that are in this syncobj */ + struct dma_fence **hw_fences; }; #define KGSL_BINDOBJ_STATE_START 0 diff --git a/kgsl_sync.c b/kgsl_sync.c index 518089b389..70b13db1a1 100644 --- a/kgsl_sync.c +++ b/kgsl_sync.c @@ -704,22 +704,47 @@ bool is_kgsl_fence(struct dma_fence *f) static void kgsl_count_hw_fences(struct kgsl_drawobj_sync_event *event, struct dma_fence *fence) { - /* - * Even one unsignaled sw-only fence in this sync object means we can't send this sync - * object to the hardware - */ - if (event->syncobj->flags & KGSL_SYNCOBJ_SW) + struct kgsl_drawobj_sync *syncobj = event->syncobj; + u32 max_hw_fence = event->device->max_syncobj_hw_fence_count; + + if (syncobj->flags & KGSL_SYNCOBJ_SW) return; if (!kgsl_is_hw_fence(fence)) { - /* Ignore software fences that are already signaled */ + /* + * Ignore software fences that are already signaled. Even one unsignaled sw-only + * fence in this sync object means we can't send this sync object to the hardware + */ if (!dma_fence_is_signaled(fence)) - event->syncobj->flags |= KGSL_SYNCOBJ_SW; - } else { - event->syncobj->num_hw_fence++; + syncobj->flags |= KGSL_SYNCOBJ_SW; + return; } + + if (!syncobj->hw_fences) { + syncobj->hw_fences = kcalloc(max_hw_fence, sizeof(*syncobj->hw_fences), GFP_KERNEL); + if (!syncobj->hw_fences) { + syncobj->flags |= KGSL_SYNCOBJ_SW; + return; + } + } + + if (syncobj->num_hw_fence < max_hw_fence) + syncobj->hw_fences[syncobj->num_hw_fence++] = fence; + else + syncobj->flags |= KGSL_SYNCOBJ_SW; } +void kgsl_get_fence_name(struct dma_fence *f, char *name, u32 max_size) +{ + int len = scnprintf(name, max_size, "%s %s", f->ops->get_driver_name(f), + f->ops->get_timeline_name(f)); + + if (f->ops->fence_value_str) { + len += scnprintf(name + len, max_size - len, ": "); + f->ops->fence_value_str(f, name + len, max_size - len); + } + +} void kgsl_get_fence_info(struct kgsl_drawobj_sync_event *event) { unsigned int num_fences; @@ -752,18 +777,8 @@ void kgsl_get_fence_info(struct kgsl_drawobj_sync_event *event) for (i = 0; i < num_fences; i++) { struct dma_fence *f = fences[i]; struct fence_info *fi = &info_ptr->fences[i]; - int len; - len = scnprintf(fi->name, sizeof(fi->name), "%s %s", - f->ops->get_driver_name(f), - f->ops->get_timeline_name(f)); - - if (f->ops->fence_value_str) { - len += scnprintf(fi->name + len, sizeof(fi->name) - len, - ": "); - f->ops->fence_value_str(f, fi->name + len, - sizeof(fi->name) - len); - } + kgsl_get_fence_name(f, fi->name, sizeof(fi->name)); kgsl_count_hw_fences(event, f); } diff --git a/kgsl_sync.h b/kgsl_sync.h index 7be684006f..dc46c27bfe 100644 --- a/kgsl_sync.h +++ b/kgsl_sync.h @@ -133,6 +133,8 @@ bool kgsl_hw_fence_signaled(struct dma_fence *fence); bool kgsl_is_hw_fence(struct dma_fence *fence); +void kgsl_get_fence_name(struct dma_fence *f, char *name, u32 max_size); + #else static inline int kgsl_add_fence_event(struct kgsl_device *device, u32 context_id, u32 timestamp, void __user *data, int len, @@ -271,6 +273,11 @@ bool kgsl_is_hw_fence(struct dma_fence *fence) return false; } +void kgsl_get_fence_name(struct dma_fence *f, char *name, u32 max_size) +{ + +} + #endif /* CONFIG_SYNC_FILE */ #endif /* __KGSL_SYNC_H */ From fec653092616902f1e3a3555aea07add5c134b79 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Mon, 29 Jan 2024 13:11:41 -0700 Subject: [PATCH 0683/1016] kgsl: hwfence: Get kgsl fence context refcount Some sync objects can contain input fence which is a kgsl fence and belongs to a kgsl context. It is possible that this context gets detached and destroyed before GMU processes this sync object. This can create functional issues since the memstore eop/sop of this context will not be reliable anymore. Hence, take a refcount of this context before dispatching this sync object to GMU, and put it back only when we know that GMU has retired this sync object. If the refcount fails, then it means that the context is going away and we can deem this fence as signaled in that case. Change-Id: I70235873caa1c0f3f46df39b1aceee9c9e41c168 Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched_hfi.c | 27 ++++++++++++++------------- adreno_gen8_hwsched_hfi.c | 27 ++++++++++++++------------- adreno_hwsched.c | 14 +++++++++++++- adreno_hwsched.h | 7 +++++++ kgsl_drawobj.h | 15 +++++++++++++-- kgsl_sync.c | 2 +- 6 files changed, 62 insertions(+), 30 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 37828850a3..4db07ccb41 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -824,7 +824,7 @@ static void gen7_syncobj_query_reply(struct adreno_device *adreno_dev, const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); for (i = 0; i < syncobj->num_hw_fence; i++) { - struct dma_fence *fence = syncobj->hw_fences[i]; + struct dma_fence *fence = syncobj->hw_fences[i].fence; if (!fence_is_queried(cmd, i)) continue; @@ -3078,25 +3078,25 @@ static u32 get_irq_bit(struct adreno_device *adreno_dev, struct kgsl_drawobj *dr return 0; } -static void populate_kgsl_fence(struct hfi_syncobj_legacy *obj, - struct dma_fence *fence) +static void populate_kgsl_fence(struct kgsl_drawobj_sync_hw_fence *hw_fence, + struct hfi_syncobj_legacy *obj) { - struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; + struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)hw_fence->fence; struct kgsl_sync_timeline *ktimeline = kfence->parent; unsigned long flags; obj->flags |= BIT(GMU_SYNCOBJ_FLAG_KGSL_FENCE_BIT); spin_lock_irqsave(&ktimeline->lock, flags); - /* If the context is going away or the dma fence is signaled, mark the fence as triggered */ - if (!ktimeline->context || dma_fence_is_signaled_locked(fence)) { - obj->flags |= BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT); - spin_unlock_irqrestore(&ktimeline->lock, flags); - return; - } - obj->ctxt_id = ktimeline->context->id; + + if (dma_fence_is_signaled_locked(&kfence->fence) || !_kgsl_context_get(ktimeline->context)) + obj->flags |= BIT(GMU_SYNCOBJ_FLAG_KGSL_FENCE_BIT); + else + hw_fence->context = ktimeline->context; + spin_unlock_irqrestore(&ktimeline->lock, flags); + obj->ctxt_id = kfence->context_id; obj->seq_no = kfence->timestamp; } @@ -3126,14 +3126,15 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, obj = (struct hfi_syncobj_legacy *)&cmd[1]; for (i = 0; i < syncobj->num_hw_fence; i++) { - struct dma_fence *fence = syncobj->hw_fences[i]; + struct dma_fence *fence = syncobj->hw_fences[i].fence; if (is_kgsl_fence(fence)) { - populate_kgsl_fence(obj, fence); + populate_kgsl_fence(&syncobj->hw_fences[i], obj); } else { int ret = kgsl_hw_fence_add_waiter(device, fence, NULL); if (ret) { + adreno_hwsched_syncobj_kfence_put(syncobj); syncobj->flags &= ~KGSL_SYNCOBJ_HW; return ret; } diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index ab23788e9b..98fe62be96 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -815,7 +815,7 @@ static void gen8_syncobj_query_reply(struct adreno_device *adreno_dev, const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); for (i = 0; i < syncobj->num_hw_fence; i++) { - struct dma_fence *fence = syncobj->hw_fences[i]; + struct dma_fence *fence = syncobj->hw_fences[i].fence; if (!fence_is_queried(cmd, i)) continue; @@ -2995,25 +2995,25 @@ static u32 get_irq_bit(struct adreno_device *adreno_dev, struct kgsl_drawobj *dr return 0; } -static void populate_kgsl_fence(struct hfi_syncobj *obj, - struct dma_fence *fence) +static void populate_kgsl_fence(struct kgsl_drawobj_sync_hw_fence *hw_fence, + struct hfi_syncobj *obj) { - struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; + struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)hw_fence->fence; struct kgsl_sync_timeline *ktimeline = kfence->parent; unsigned long flags; obj->flags |= BIT(GMU_SYNCOBJ_FLAG_KGSL_FENCE_BIT); spin_lock_irqsave(&ktimeline->lock, flags); - /* If the context is going away or the dma fence is signaled, mark the fence as triggered */ - if (!ktimeline->context || dma_fence_is_signaled_locked(fence)) { - obj->flags |= BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT); - spin_unlock_irqrestore(&ktimeline->lock, flags); - return; - } - obj->ctxt_id = ktimeline->context->id; + + if (dma_fence_is_signaled_locked(&kfence->fence) || !_kgsl_context_get(ktimeline->context)) + obj->flags |= BIT(GMU_SYNCOBJ_FLAG_KGSL_FENCE_BIT); + else + hw_fence->context = ktimeline->context; + spin_unlock_irqrestore(&ktimeline->lock, flags); + obj->ctxt_id = kfence->context_id; obj->seq_no = kfence->timestamp; } @@ -3042,15 +3042,16 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, obj = (struct hfi_syncobj *)&cmd[1]; for (i = 0; i < syncobj->num_hw_fence; i++) { - struct dma_fence *fence = syncobj->hw_fences[i]; + struct dma_fence *fence = syncobj->hw_fences[i].fence; if (is_kgsl_fence(fence)) { - populate_kgsl_fence(obj, fence); + populate_kgsl_fence(&syncobj->hw_fences[i], obj); } else { int ret = kgsl_hw_fence_add_waiter(device, fence, &obj->hash_index); if (ret) { + adreno_hwsched_syncobj_kfence_put(syncobj); syncobj->flags &= ~KGSL_SYNCOBJ_HW; return ret; } diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 816023652a..9c03de6242 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1151,6 +1151,16 @@ void adreno_hwsched_retire_cmdobj(struct adreno_hwsched *hwsched, kgsl_drawobj_destroy(drawobj); } +void adreno_hwsched_syncobj_kfence_put(struct kgsl_drawobj_sync *syncobj) +{ + int i; + + for (i = 0; i < syncobj->num_hw_fence; i++) { + kgsl_context_put(syncobj->hw_fences[i].context); + syncobj->hw_fences[i].context = NULL; + } +} + static bool drawobj_retired(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { @@ -1166,6 +1176,7 @@ static bool drawobj_retired(struct adreno_device *adreno_dev, if (timestamp_cmp(drawobj->timestamp, hdr->sync_obj_ts) > 0) return false; + adreno_hwsched_syncobj_kfence_put(SYNCOBJ(drawobj)); trace_adreno_syncobj_retired(drawobj->context->id, drawobj->timestamp); kgsl_drawobj_destroy(drawobj); return true; @@ -1359,6 +1370,7 @@ static bool drawobj_replay(struct adreno_device *adreno_dev, if (kgsl_drawobj_events_pending(SYNCOBJ(drawobj))) return true; + adreno_hwsched_syncobj_kfence_put(SYNCOBJ(drawobj)); trace_adreno_syncobj_retired(drawobj->context->id, drawobj->timestamp); kgsl_drawobj_destroy(drawobj); return false; @@ -1642,7 +1654,7 @@ static void _print_syncobj(struct adreno_device *adreno_dev, struct kgsl_drawobj struct kgsl_device *device = KGSL_DEVICE(adreno_dev); for (i = 0; i < syncobj->num_hw_fence; i++) { - struct dma_fence *fence = syncobj->hw_fences[i]; + struct dma_fence *fence = syncobj->hw_fences[i].fence; bool kgsl = is_kgsl_fence(fence); bool signaled = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags); char value[32] = "unknown"; diff --git a/adreno_hwsched.h b/adreno_hwsched.h index 72acdd82b0..e827ee0db9 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -243,4 +243,11 @@ u32 adreno_hwsched_gpu_fault(struct adreno_device *adreno_dev); */ void adreno_hwsched_log_pending_hw_fences(struct adreno_device *adreno_dev, struct device *dev); +/** + * adreno_hwsched_syncobj_kfence_put - Put back kfence context refcounts for this sync object + * @syncobj: Pointer to the sync object + * + */ +void adreno_hwsched_syncobj_kfence_put(struct kgsl_drawobj_sync *syncobj); + #endif diff --git a/kgsl_drawobj.h b/kgsl_drawobj.h index eb3b40b1cb..abc4dfb464 100644 --- a/kgsl_drawobj.h +++ b/kgsl_drawobj.h @@ -94,6 +94,15 @@ struct kgsl_drawobj_cmd { /* This sync object can be sent to hardware */ #define KGSL_SYNCOBJ_HW BIT(1) +struct kgsl_drawobj_sync_hw_fence { + /** @fence: Pointer to hardware fence */ + struct dma_fence *fence; + /** + * context: Pointer to kgsl context if this hardware fence is owned by a kgsl context + */ + struct kgsl_context *context; +}; + /** * struct kgsl_drawobj_sync - KGSL sync object * @base: Base kgsl_drawobj, this needs to be the first entry @@ -116,8 +125,10 @@ struct kgsl_drawobj_sync { u32 flags; /** @num_hw_fence: number of hw fences in this syncobj */ u32 num_hw_fence; - /** @hw_fences: Array to hold pointers to hardware fences that are in this syncobj */ - struct dma_fence **hw_fences; + /** + * @hw_fences: Array to hold information regarding hardware fences that are in this syncobj + */ + struct kgsl_drawobj_sync_hw_fence *hw_fences; }; #define KGSL_BINDOBJ_STATE_START 0 diff --git a/kgsl_sync.c b/kgsl_sync.c index 70b13db1a1..d9bbe61175 100644 --- a/kgsl_sync.c +++ b/kgsl_sync.c @@ -729,7 +729,7 @@ static void kgsl_count_hw_fences(struct kgsl_drawobj_sync_event *event, struct d } if (syncobj->num_hw_fence < max_hw_fence) - syncobj->hw_fences[syncobj->num_hw_fence++] = fence; + syncobj->hw_fences[syncobj->num_hw_fence++].fence = fence; else syncobj->flags |= KGSL_SYNCOBJ_SW; } From 18ac7b95195fb1b41182b93b49f8658ddf8fbf35 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Wed, 24 Jan 2024 17:10:57 -0700 Subject: [PATCH 0684/1016] kgsl: hwfence: Update trace_adreno_syncobj_retired A new GMU tracepoint is added to accurately know when GMU retired a sync object. Change-Id: I7e73bf9f5e1734055d3bc576a78123bfa22ceffd Signed-off-by: Harshdeep Dhatt --- adreno_hwsched.c | 2 -- adreno_trace.h | 8 +++++--- kgsl_gmu_core.c | 7 +++++++ kgsl_gmu_core.h | 6 ++++++ 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 9c03de6242..54a2c4d2b5 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1177,7 +1177,6 @@ static bool drawobj_retired(struct adreno_device *adreno_dev, return false; adreno_hwsched_syncobj_kfence_put(SYNCOBJ(drawobj)); - trace_adreno_syncobj_retired(drawobj->context->id, drawobj->timestamp); kgsl_drawobj_destroy(drawobj); return true; } @@ -1371,7 +1370,6 @@ static bool drawobj_replay(struct adreno_device *adreno_dev, return true; adreno_hwsched_syncobj_kfence_put(SYNCOBJ(drawobj)); - trace_adreno_syncobj_retired(drawobj->context->id, drawobj->timestamp); kgsl_drawobj_destroy(drawobj); return false; } diff --git a/adreno_trace.h b/adreno_trace.h index b2d55224a5..86c4da7345 100644 --- a/adreno_trace.h +++ b/adreno_trace.h @@ -166,18 +166,20 @@ TRACE_EVENT(adreno_syncobj_submitted, ); TRACE_EVENT(adreno_syncobj_retired, - TP_PROTO(u32 id, u32 timestamp), - TP_ARGS(id, timestamp), + TP_PROTO(u32 id, u32 timestamp, u64 ticks), + TP_ARGS(id, timestamp, ticks), TP_STRUCT__entry( __field(u32, id) __field(u32, timestamp) + __field(u64, ticks) ), TP_fast_assign( __entry->id = id; __entry->timestamp = timestamp; + __entry->ticks = ticks; ), TP_printk( - "ctx=%u ts=%u", __entry->id, __entry->timestamp) + "ctx=%u ts=%u ticks=%llu", __entry->id, __entry->timestamp, __entry->ticks) ); TRACE_EVENT(adreno_cmdbatch_submitted, diff --git a/kgsl_gmu_core.c b/kgsl_gmu_core.c index 7d25684820..f61e9afc64 100644 --- a/kgsl_gmu_core.c +++ b/kgsl_gmu_core.c @@ -256,6 +256,13 @@ static void stream_trace_data(struct gmu_trace_packet *pkt) data->flags, pkt->ticks); break; } + case GMU_TRACE_SYNCOBJ_RETIRE: { + struct trace_syncobj_retire *data = + (struct trace_syncobj_retire *)pkt->payload; + + trace_adreno_syncobj_retired(data->gmu_ctxt_id, data->timestamp, pkt->ticks); + break; + } default: { char str[64]; diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index fcfd4945a6..51f8576a4f 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -277,6 +277,7 @@ enum gmu_trace_id { GMU_TRACE_PREEMPT_TRIGGER = 1, GMU_TRACE_PREEMPT_DONE = 2, GMU_TRACE_EXTERNAL_HW_FENCE_SIGNAL = 3, + GMU_TRACE_SYNCOBJ_RETIRE = 4, GMU_TRACE_MAX, }; @@ -298,6 +299,11 @@ struct trace_ext_hw_fence_signal { u32 flags; } __packed; +struct trace_syncobj_retire { + u32 gmu_ctxt_id; + u32 timestamp; +} __packed; + /** * struct kgsl_gmu_trace - wrapper for gmu trace memory object */ From 3979e38fe1e3c71861edc507b2f489771edf0b30 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Tue, 30 Jan 2024 16:29:06 -0700 Subject: [PATCH 0685/1016] kgsl: hwfence: Fix NULL pointer access when accessing defer_drawctxt Say the deferred drawctxt(stored in hfi.hw_fence.defer_drawctxt) gets handled during recovery in gen7/8_hwsched_disable_hw_fence_throttle(), hfi.hw_fence.defer_drawctxt will be set to NULL. Say now gen7/8_defer_hw_fence_work() gets the dispatcher and device mutexes, it will end up de-referencing the NULL pointer. Fix this by adding a NULL check for drawctxt in gen7/8_defer_hw_fence_work(). Change-Id: If9218ab4b55998ac46790295736fb6ad8ea1b1dd Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched_hfi.c | 24 +++++++++++++----------- adreno_gen8_hwsched_hfi.c | 24 +++++++++++++----------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 4db07ccb41..cb6b623c4c 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1115,20 +1115,14 @@ static void gen7_defer_hw_fence_work(struct kthread_work *work) { struct gen7_hwsched_hfi *hfi = container_of(work, struct gen7_hwsched_hfi, defer_hw_fence_work); - struct adreno_context *drawctxt = NULL; - struct kgsl_device *device; - struct adreno_device *adreno_dev; + struct gen7_hwsched_device *gen7_hwsched = container_of(hfi, struct gen7_hwsched_device, + hwsched_hfi); + struct adreno_context *drawctxt; + struct adreno_device *adreno_dev = &gen7_hwsched->gen7_dev.adreno_dev; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 ts; int ret; - spin_lock(&hfi->hw_fence.lock); - drawctxt = hfi->hw_fence.defer_drawctxt; - ts = hfi->hw_fence.defer_ts; - spin_unlock(&hfi->hw_fence.lock); - - device = drawctxt->base.device; - adreno_dev = ADRENO_DEVICE(device); - /* * Grab the dispatcher and device mutex as we don't want to race with concurrent fault * recovery @@ -1136,6 +1130,14 @@ static void gen7_defer_hw_fence_work(struct kthread_work *work) mutex_lock(&adreno_dev->hwsched.mutex); mutex_lock(&device->mutex); + spin_lock(&hfi->hw_fence.lock); + drawctxt = hfi->hw_fence.defer_drawctxt; + ts = hfi->hw_fence.defer_ts; + spin_unlock(&hfi->hw_fence.lock); + + if (!drawctxt) + goto unlock; + ret = process_hw_fence_deferred_ctxt(adreno_dev, drawctxt, ts); if (ret) { /* the deferred drawctxt will be handled post fault recovery */ diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 98fe62be96..1fb7c5c1ae 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -1105,20 +1105,14 @@ static void gen8_defer_hw_fence_work(struct kthread_work *work) { struct gen8_hwsched_hfi *hfi = container_of(work, struct gen8_hwsched_hfi, defer_hw_fence_work); - struct adreno_context *drawctxt = NULL; - struct kgsl_device *device; - struct adreno_device *adreno_dev; + struct gen8_hwsched_device *gen8_hwsched = container_of(hfi, struct gen8_hwsched_device, + hwsched_hfi); + struct adreno_context *drawctxt; + struct adreno_device *adreno_dev = &gen8_hwsched->gen8_dev.adreno_dev; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 ts; int ret; - spin_lock(&hfi->hw_fence.lock); - drawctxt = hfi->hw_fence.defer_drawctxt; - ts = hfi->hw_fence.defer_ts; - spin_unlock(&hfi->hw_fence.lock); - - device = drawctxt->base.device; - adreno_dev = ADRENO_DEVICE(device); - /* * Grab the dispatcher and device mutex as we don't want to race with concurrent fault * recovery @@ -1126,6 +1120,14 @@ static void gen8_defer_hw_fence_work(struct kthread_work *work) mutex_lock(&adreno_dev->hwsched.mutex); mutex_lock(&device->mutex); + spin_lock(&hfi->hw_fence.lock); + drawctxt = hfi->hw_fence.defer_drawctxt; + ts = hfi->hw_fence.defer_ts; + spin_unlock(&hfi->hw_fence.lock); + + if (!drawctxt) + goto unlock; + ret = process_hw_fence_deferred_ctxt(adreno_dev, drawctxt, ts); if (ret) { /* the deferred drawctxt will be handled post fault recovery */ From 1b033acffe9beaa6df6a7240b2fe8c96b9533cdd Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Mon, 5 Feb 2024 10:41:07 -0700 Subject: [PATCH 0686/1016] kgsl: hwfence: Clear hardware fence flag after synx_release() Client driver needs to clear SYNX_HW_FENCE_FLAG_ENABLED_BIT after calling synx_release(). This is because synx_release() doesn't have a way to get to the corresponding dma fence structure. This fixes an issue where synx_release() complains that it is being invoked on a hardware fence which has already been released. Change-Id: Icd7d51cfdb9ba8d3ef1c6c43a720cab90cdfc2d2 Signed-off-by: Harshdeep Dhatt --- kgsl_sync.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kgsl_sync.c b/kgsl_sync.c index d9bbe61175..aeaa847745 100644 --- a/kgsl_sync.c +++ b/kgsl_sync.c @@ -133,6 +133,12 @@ bool kgsl_hw_fence_tx_slot_available(struct kgsl_device *device, const atomic_t void kgsl_hw_fence_destroy(struct kgsl_sync_fence *kfence) { synx_release(kgsl_synx.handle, kfence->hw_fence_index); + + /* + * synx_release() doesn't have a way to get to the dma fence. Hence, the client must clear + * this bit from the dma fence flags. + */ + clear_bit(SYNX_HW_FENCE_FLAG_ENABLED_BIT, &kfence->fence.flags); } void kgsl_hw_fence_trigger_cpu(struct kgsl_device *device, struct kgsl_sync_fence *kfence) From 228f5a8e2a7f061ab950638a514deab4c0b00dae Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 29 Feb 2024 16:50:31 -0700 Subject: [PATCH 0687/1016] kgsl: hwfence: Avoid scenarios that call kgsl_hw_fence_destroy() Currently, we create a hardware fence first and then destroy it in some scenarios for example if the timestamp is already retired, or if there isn't enough space in context's hw fence buffer. This causes a hardware fence leak because a hardware fence which is never sent to tx queue is never deleted from the global table. To fix this, call kgsl_hw_fence_create() where we are sure that the hardware fence will be sent to GMU and eventually be sent to tx queue. Change-Id: I05677a3441e742f7b4d93a6c23a3800ecf07dd78 Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched_hfi.c | 77 ++++++++++++++++++-------------------- adreno_gen8_hwsched_hfi.c | 78 +++++++++++++++++++-------------------- 2 files changed, 73 insertions(+), 82 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 3c397d8f06..a5c82b44b8 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -3329,6 +3329,10 @@ static inline int setup_hw_fence_info_cmd(struct adreno_device *adreno_dev, if (ret) return ret; + ret = kgsl_hw_fence_create(KGSL_DEVICE(adreno_dev), kfence); + if (ret) + return ret; + entry->cmd.gmu_ctxt_id = entry->drawctxt->base.id; entry->cmd.ctxt_id = kfence->fence.context; entry->cmd.ts = kfence->fence.seqno; @@ -3382,25 +3386,6 @@ int gen7_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, return ret; } -/** - * drawctxt_queue_hw_fence - Add a hardware fence to draw context's hardware fence list and make - * sure the list remains sorted (with the fence with the largest timestamp at the end) - */ -static void drawctxt_queue_hw_fence(struct adreno_context *drawctxt, - struct adreno_hw_fence_entry *new) -{ - struct adreno_hw_fence_entry *entry = NULL; - u32 ts = (u32)new->cmd.ts; - - /* Walk the list backwards to find the right spot for this fence */ - list_for_each_entry_reverse(entry, &drawctxt->hw_fence_list, node) { - if (timestamp_cmp(ts, (u32)entry->cmd.ts) > 0) - break; - } - - list_add(&new->node, &entry->node); -} - #define DRAWCTXT_SLOT_AVAILABLE(count) \ ((count + 1) < (HW_FENCE_QUEUE_SIZE / sizeof(struct hfi_hw_fence_info))) @@ -3439,6 +3424,29 @@ static struct adreno_hw_fence_entry *allocate_hw_fence_entry(struct adreno_devic return entry; } +/** + * drawctxt_queue_hw_fence - Add a hardware fence to draw context's hardware fence list and make + * sure the list remains sorted (with the fence with the largest timestamp at the end) + */ +static void drawctxt_queue_hw_fence(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct kgsl_sync_fence *kfence) +{ + struct adreno_hw_fence_entry *new = allocate_hw_fence_entry(adreno_dev, drawctxt, kfence); + struct adreno_hw_fence_entry *entry = NULL; + u32 ts = kfence->timestamp; + + if (!new) + return; + + /* Walk the list backwards to find the right spot for this fence */ + list_for_each_entry_reverse(entry, &drawctxt->hw_fence_list, node) { + if (timestamp_cmp(ts, (u32)entry->cmd.ts) > 0) + break; + } + + list_add(&new->node, &entry->node); +} + static bool _hw_fence_end_sleep(struct adreno_device *adreno_dev) { struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); @@ -3509,10 +3517,6 @@ void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev, struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); u32 retired = 0; int ret = 0; - bool destroy_hw_fence = true; - - if (kgsl_hw_fence_create(device, kfence)) - return; spin_lock(&drawctxt->lock); spin_lock(&hw_hfi->hw_fence.lock); @@ -3524,15 +3528,9 @@ void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev, if (kgsl_context_is_bad(context)) goto done; - entry = allocate_hw_fence_entry(adreno_dev, drawctxt, kfence); - if (!entry) - goto done; - /* If recovery is imminent, then do not create a hardware fence */ - if (test_bit(GEN7_HWSCHED_HW_FENCE_ABORT_BIT, &hw_hfi->hw_fence.flags)) { - destroy_hw_fence = true; + if (test_bit(GEN7_HWSCHED_HW_FENCE_ABORT_BIT, &hw_hfi->hw_fence.flags)) goto done; - } ret = _hw_fence_sleep(adreno_dev, drawctxt); if (ret) @@ -3543,8 +3541,7 @@ void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev, * list and return. This fence will be sent to GMU when this ts is dispatched to GMU. */ if (timestamp_cmp(kfence->timestamp, drawctxt->gmu_hw_fence_ready_ts) > 0) { - drawctxt_queue_hw_fence(drawctxt, entry); - destroy_hw_fence = false; + drawctxt_queue_hw_fence(adreno_dev, drawctxt, kfence); goto done; } @@ -3560,6 +3557,10 @@ void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev, goto done; } + entry = allocate_hw_fence_entry(adreno_dev, drawctxt, kfence); + if (!entry) + goto done; + /* * If timestamp is not retired then GMU must already be powered up. This is because SLUMBER * thread has to wait for hardware fence spinlock to make sure the hardware fence unack @@ -3568,22 +3569,16 @@ void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev, ret = _send_hw_fence_no_ack(adreno_dev, entry); if (ret) { if (__ratelimit(&_rs)) - dev_err(&gmu->pdev->dev, "Aborting hw fence for ctx:%d ts:%d ret:%d\n", + dev_err(&gmu->pdev->dev, "hw fence for ctx:%d ts:%d ret:%d may not be destroyed\n", kfence->context_id, kfence->timestamp, ret); + gen7_remove_hw_fence_entry(adreno_dev, entry); + kgsl_hw_fence_destroy(kfence); goto done; } list_add_tail(&entry->node, &drawctxt->hw_fence_inflight_list); - destroy_hw_fence = false; - done: - if (destroy_hw_fence) { - kgsl_hw_fence_destroy(kfence); - if (entry) - gen7_remove_hw_fence_entry(adreno_dev, entry); - } - spin_unlock(&hw_hfi->hw_fence.lock); spin_unlock(&drawctxt->lock); } diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 41757ffd9b..195fe21fa8 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -3248,6 +3248,10 @@ static inline int setup_hw_fence_info_cmd(struct adreno_device *adreno_dev, if (ret) return ret; + ret = kgsl_hw_fence_create(KGSL_DEVICE(adreno_dev), kfence); + if (ret) + return ret; + entry->cmd.gmu_ctxt_id = entry->drawctxt->base.id; entry->cmd.ctxt_id = kfence->fence.context; entry->cmd.ts = kfence->fence.seqno; @@ -3301,25 +3305,6 @@ int gen8_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, return ret; } -/** - * drawctxt_queue_hw_fence - Add a hardware fence to draw context's hardware fence list and make - * sure the list remains sorted (with the fence with the largest timestamp at the end) - */ -static void drawctxt_queue_hw_fence(struct adreno_context *drawctxt, - struct adreno_hw_fence_entry *new) -{ - struct adreno_hw_fence_entry *entry = NULL; - u32 ts = (u32)new->cmd.ts; - - /* Walk the list backwards to find the right spot for this fence */ - list_for_each_entry_reverse(entry, &drawctxt->hw_fence_list, node) { - if (timestamp_cmp(ts, (u32)entry->cmd.ts) > 0) - break; - } - - list_add(&new->node, &entry->node); -} - #define DRAWCTXT_SLOT_AVAILABLE(count) \ ((count + 1) < (HW_FENCE_QUEUE_SIZE / sizeof(struct hfi_hw_fence_info))) @@ -3358,6 +3343,29 @@ static struct adreno_hw_fence_entry *allocate_hw_fence_entry(struct adreno_devic return entry; } +/** + * drawctxt_queue_hw_fence - Add a hardware fence to draw context's hardware fence list and make + * sure the list remains sorted (with the fence with the largest timestamp at the end) + */ +static void drawctxt_queue_hw_fence(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct kgsl_sync_fence *kfence) +{ + struct adreno_hw_fence_entry *entry = NULL; + struct adreno_hw_fence_entry *new = allocate_hw_fence_entry(adreno_dev, drawctxt, kfence); + u32 ts = kfence->timestamp; + + if (!new) + return; + + /* Walk the list backwards to find the right spot for this fence */ + list_for_each_entry_reverse(entry, &drawctxt->hw_fence_list, node) { + if (timestamp_cmp(ts, (u32)entry->cmd.ts) > 0) + break; + } + + list_add(&new->node, &entry->node); +} + static bool _hw_fence_end_sleep(struct adreno_device *adreno_dev) { struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); @@ -3428,10 +3436,6 @@ void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); u32 retired = 0; int ret = 0; - bool destroy_hw_fence = true; - - if (kgsl_hw_fence_create(device, kfence)) - return; spin_lock(&drawctxt->lock); spin_lock(&hw_hfi->hw_fence.lock); @@ -3443,15 +3447,9 @@ void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, if (kgsl_context_is_bad(context)) goto done; - entry = allocate_hw_fence_entry(adreno_dev, drawctxt, kfence); - if (!entry) - goto done; - /* If recovery is imminent, then do not create a hardware fence */ - if (test_bit(GEN8_HWSCHED_HW_FENCE_ABORT_BIT, &hw_hfi->hw_fence.flags)) { - destroy_hw_fence = true; + if (test_bit(GEN8_HWSCHED_HW_FENCE_ABORT_BIT, &hw_hfi->hw_fence.flags)) goto done; - } ret = _hw_fence_sleep(adreno_dev, drawctxt); if (ret) @@ -3462,8 +3460,7 @@ void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, * list and return. This fence will be sent to GMU when this ts is dispatched to GMU. */ if (timestamp_cmp(kfence->timestamp, drawctxt->gmu_hw_fence_ready_ts) > 0) { - drawctxt_queue_hw_fence(drawctxt, entry); - destroy_hw_fence = false; + drawctxt_queue_hw_fence(adreno_dev, drawctxt, kfence); goto done; } @@ -3479,6 +3476,10 @@ void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, goto done; } + entry = allocate_hw_fence_entry(adreno_dev, drawctxt, kfence); + if (!entry) + goto done; + /* * If timestamp is not retired then GMU must already be powered up. This is because SLUMBER * thread has to wait for hardware fence spinlock to make sure the hardware fence unack @@ -3487,22 +3488,17 @@ void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, ret = _send_hw_fence_no_ack(adreno_dev, entry); if (ret) { if (__ratelimit(&_rs)) - dev_err(&gmu->pdev->dev, "Aborting hw fence for ctx:%d ts:%d ret:%d\n", + dev_err(&gmu->pdev->dev, + "hw fence for ctx:%d ts:%d ret:%d may not be destroyed\n", kfence->context_id, kfence->timestamp, ret); + kgsl_hw_fence_destroy(kfence); + gen8_remove_hw_fence_entry(adreno_dev, entry); goto done; } list_add_tail(&entry->node, &drawctxt->hw_fence_inflight_list); - destroy_hw_fence = false; - done: - if (destroy_hw_fence) { - kgsl_hw_fence_destroy(kfence); - if (entry) - gen8_remove_hw_fence_entry(adreno_dev, entry); - } - spin_unlock(&hw_hfi->hw_fence.lock); spin_unlock(&drawctxt->lock); } From 413bcf24f783d13af3aa330080aa647f45f82671 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 29 Feb 2024 17:21:46 -0700 Subject: [PATCH 0688/1016] kgsl: hwsched: Add missing rmb() when reading HFI_MSG_ID queue Currently, in peek_next_header(), queue[hdr->read_index] can get re-ordered and get executed before GMU has written the packets in the queue. This can return stale data from the queue. Add a rmb() to fix this. Change-Id: Iaa2cf1e43bc507751906f67685d197466674820b Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched_hfi.c | 48 +++++++++++++++++++++------------------ adreno_gen8_hwsched_hfi.c | 48 +++++++++++++++++++++------------------ 2 files changed, 52 insertions(+), 44 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index a5c82b44b8..99f967a8ab 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -749,17 +749,37 @@ static void log_gpu_fault(struct adreno_device *adreno_dev) } } -static u32 peek_next_header(struct gen7_gmu_device *gmu, uint32_t queue_idx) +static bool is_queue_empty(struct adreno_device *adreno_dev, u32 queue_idx) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + return true; + + if (hdr->read_index == hdr->write_index) + return true; + + /* + * This is to ensure that the queue is not read speculatively before the queue empty + * condition is evaluated + */ + rmb(); + + return false; +} + +static u32 peek_next_header(struct adreno_device *adreno_dev, struct gen7_gmu_device *gmu, + u32 queue_idx) { struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; struct hfi_queue_table *tbl = mem_addr->hostptr; struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; u32 *queue; - if (hdr->status == HFI_QUEUE_STATUS_DISABLED) - return 0; - - if (hdr->read_index == hdr->write_index) + if (is_queue_empty(adreno_dev, queue_idx)) return 0; queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx); @@ -1229,7 +1249,7 @@ void gen7_hwsched_process_msgq(struct adreno_device *adreno_dev) mutex_lock(&hw_hfi->msgq_mutex); for (;;) { - next_hdr = peek_next_header(gmu, HFI_MSG_ID); + next_hdr = peek_next_header(adreno_dev, gmu, HFI_MSG_ID); if (!next_hdr) break; @@ -2608,22 +2628,6 @@ int gen7_hwsched_lpac_cp_init(struct adreno_device *adreno_dev) "LPAC CP initialization failed to idle\n"); } -static bool is_queue_empty(struct adreno_device *adreno_dev, u32 queue_idx) -{ - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; - struct hfi_queue_table *tbl = mem_addr->hostptr; - struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; - - if (hdr->status == HFI_QUEUE_STATUS_DISABLED) - return true; - - if (hdr->read_index == hdr->write_index) - return true; - - return false; -} - static int hfi_f2h_main(void *arg) { struct adreno_device *adreno_dev = arg; diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 195fe21fa8..1d566b8b87 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -741,17 +741,37 @@ static void log_gpu_fault(struct adreno_device *adreno_dev) } } -static u32 peek_next_header(struct gen8_gmu_device *gmu, uint32_t queue_idx) +static bool is_queue_empty(struct adreno_device *adreno_dev, u32 queue_idx) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + return true; + + if (hdr->read_index == hdr->write_index) + return true; + + /* + * This is to ensure that the queue is not read speculatively before the queue empty + * condition is evaluated + */ + rmb(); + + return false; +} + +static u32 peek_next_header(struct adreno_device *adreno_dev, struct gen8_gmu_device *gmu, + u32 queue_idx) { struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; struct hfi_queue_table *tbl = mem_addr->hostptr; struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; u32 *queue; - if (hdr->status == HFI_QUEUE_STATUS_DISABLED) - return 0; - - if (hdr->read_index == hdr->write_index) + if (is_queue_empty(adreno_dev, queue_idx)) return 0; queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx); @@ -1221,7 +1241,7 @@ void gen8_hwsched_process_msgq(struct adreno_device *adreno_dev) mutex_lock(&hw_hfi->msgq_mutex); for (;;) { - next_hdr = peek_next_header(gmu, HFI_MSG_ID); + next_hdr = peek_next_header(adreno_dev, gmu, HFI_MSG_ID); if (!next_hdr) break; @@ -2557,22 +2577,6 @@ int gen8_hwsched_lpac_cp_init(struct adreno_device *adreno_dev) "LPAC CP initialization failed to idle\n"); } -static bool is_queue_empty(struct adreno_device *adreno_dev, u32 queue_idx) -{ - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; - struct hfi_queue_table *tbl = mem_addr->hostptr; - struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; - - if (hdr->status == HFI_QUEUE_STATUS_DISABLED) - return true; - - if (hdr->read_index == hdr->write_index) - return true; - - return false; -} - static int hfi_f2h_main(void *arg) { struct adreno_device *adreno_dev = arg; From cec475057b7d5fb86e7d76e3b61c07495b765039 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 29 Feb 2024 17:29:43 -0700 Subject: [PATCH 0689/1016] kgsl: hwfence: Fix gen8_send_hw_fence_hfi_wait_ack() Set the sequence number and size of the hardware fence packets that are sent from within gen8_send_hw_fence_hfi_wait_ack(). Change-Id: I3c0cb941043408b1bfa13faf02e1ecd0f5a159a8 Signed-off-by: Harshdeep Dhatt --- adreno_gen8_hwsched_hfi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 1d566b8b87..6736739a92 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -3289,6 +3289,7 @@ int gen8_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, entry->cmd.flags |= flags; seqnum = atomic_inc_return(&hfi->hw_fence.seqnum); + entry->cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(entry->cmd.hdr, seqnum, sizeof(entry->cmd) >> 2); gen8_hw_fence_ack.sent_hdr = entry->cmd.hdr; From b00181d746c9d9c2208766ff273699f8463f9fa9 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Fri, 1 Dec 2023 16:18:31 -0700 Subject: [PATCH 0690/1016] kgsl: gen8: Enable hardware fences for gen8_0_0 This feature enables fences to be signaled via hardware interrupts. Change-Id: If0c9db704153a8a4a8b0d2d6d19cc13e8536260c Signed-off-by: Harshdeep Dhatt --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 842b7bc031..65a5965ad7 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2712,7 +2712,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_BCL | - ADRENO_IFPC, + ADRENO_IFPC | ADRENO_HW_FENCE, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, From 15045dba390a8c36c4148f24b267cd7dc72db989 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Fri, 12 Jan 2024 12:19:59 -0800 Subject: [PATCH 0691/1016] kgsl: Fix atomic GPU snapshot page table base dump issue kgsl panic notifier will be called in atomic context to get GPU snapshot. In adreno_snapshot_os() section while getting current page table base sleep path is invoked in mmu clk enablement. Hence skip dumping page table base in atomic GPU snapshot. Change-Id: Id06ae3686128786916a9ec93184189325ba62daf Signed-off-by: Hareesh Gundu --- kgsl_iommu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index e7d6a43387..b3a6ed25fc 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2011-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1844,15 +1844,17 @@ kgsl_iommu_get_current_ttbr0(struct kgsl_mmu *mmu, struct kgsl_context *context) u64 val; struct kgsl_iommu *iommu = &mmu->iommu; struct kgsl_iommu_context *ctx = &iommu->user_context; + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); if (kgsl_context_is_lpac(context)) ctx = &iommu->lpac_context; /* - * We cannot enable or disable the clocks in interrupt context, this - * function is called from interrupt context if there is an axi error + * We cannot enable or disable the clocks in interrupt and atomic context, this + * function is called from interrupt context if there is an axi error and atomic + * context GPU snapshot for panic notifier callback. */ - if (in_interrupt()) + if (in_interrupt() || device->snapshot_atomic) return 0; if (ctx->cb_num < 0) From e2ef38debf5dcbf773cc2b2c99bb03c44dc47f57 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Fri, 9 Feb 2024 15:55:35 +0530 Subject: [PATCH 0692/1016] kgsl: Add support for AHB timeout detection Detect timeout at GPU AHB before NOC error gets triggered and release the port for further transactions. Also return an error to the master for further error handling at NOC level. Since AHB port is released on timeout detection, DCC will be able to dump GPU registers specified in DCC script. Change-Id: I06d70b3f152264c60e25baf4c9c61ed4d84b1514 Signed-off-by: Pankaj Gupta --- adreno.c | 42 ++++++++++++++++++++++++++++++++++++++++++ adreno.h | 23 +++++++++++++++++++++++ adreno_gen7.c | 18 ++++++++++++++++++ adreno_gen7.h | 10 ++++++++++ adreno_gen7_gmu.c | 14 ++++++++++++++ adreno_gen7_hwsched.c | 14 ++++++++++++++ adreno_gen8.c | 18 ++++++++++++++++++ adreno_gen8.h | 11 +++++++++++ adreno_gen8_gmu.c | 14 ++++++++++++++ adreno_gen8_hwsched.c | 14 ++++++++++++++ gen7_reg.h | 5 +++++ gen8_reg.h | 5 +++++ 12 files changed, 188 insertions(+) diff --git a/adreno.c b/adreno.c index 1ad5655a9d..d36f6de155 100644 --- a/adreno.c +++ b/adreno.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1864,6 +1865,47 @@ void adreno_get_bus_counters(struct adreno_device *adreno_dev) "Unable to get perf counters for bus DCVS\n"); } +#define ADRENO_AHB_MIN_TIMEOUT_VAL_USEC 1000 + +u32 adreno_get_ahb_timeout_val(struct adreno_device *adreno_dev, u32 noc_timeout_us) +{ + u64 cycles, hub_clk_freq = adreno_dev->gmu_hub_clk_freq; + u32 timeout_val; + + if (!noc_timeout_us) + return 0; + + do_div(hub_clk_freq, HZ_PER_MHZ); + cycles = hub_clk_freq * noc_timeout_us; + + /* + * Get max possible AHB timeout value which is less than the GPU NOC timeout value. + * When cycles are exact power of two, the calculated AHB timeout value will be same + * as GPU config NOC timeout. Just reduce one cycle to make sure we do not program AHB + * timeout same as GPU config NOC timeout. + */ + if (is_power_of_2(cycles)) + cycles -= 1; + + timeout_val = ilog2(cycles); + + /* + * Make sure, AHB timeout value fits into bit fields and it is not too low + * which can cause false timeouts. + */ + if ((timeout_val > GENMASK(4, 0)) || + ((ADRENO_AHB_MIN_TIMEOUT_VAL_USEC * hub_clk_freq) > (1 << timeout_val))) { + dev_warn(adreno_dev->dev.dev, "Invalid AHB timeout_val %u\n", timeout_val); + return 0; + } + + /* + * Return (timeout_val - 1). Based on timeout_val programmed, a timeout will occur if + * an AHB transaction is not completed in 2 ^ (timeout_val + 1) cycles. + */ + return (timeout_val - 1); +} + /** * _adreno_start - Power up the GPU and prepare to accept commands * @adreno_dev: Pointer to an adreno_device structure diff --git a/adreno.h b/adreno.h index fe1f24cc6e..5503d1ad3a 100644 --- a/adreno.h +++ b/adreno.h @@ -254,6 +254,15 @@ enum adreno_gpurev { #define ADRENO_CTX_DETATCH_TIMEOUT_FAULT BIT(6) #define ADRENO_GMU_FAULT_SKIP_SNAPSHOT BIT(7) +/** + * Bit fields for GPU_CX_MISC_CX_AHB_*_CNTL registers + * AHB_TXFRTIMEOUTRELEASE [8:8] + * AHB_TXFRTIMEOUTENABLE [9:9] + * AHB_RESPONDERROR [11:11] + * AHB_ERRORSTATUSENABLE [12:12] + */ +#define ADRENO_AHB_CNTL_DEFAULT (BIT(12) | BIT(11) | BIT(9) | BIT(8)) + enum adreno_pipe_type { PIPE_NONE = 0, PIPE_BR = 1, @@ -732,6 +741,11 @@ struct adreno_device { * ADRENO_PERFCOUNTER_GROUP_RESTORE flag set */ u32 no_restore_count; + /* + * @ahb_timeout_val: AHB transaction timeout value. + * If set, a timeout will occur in 2 ^ (ahb_timeout_val + 1) cycles. + */ + u32 ahb_timeout_val; }; /** @@ -1979,4 +1993,13 @@ void adreno_mark_for_coldboot(struct adreno_device *adreno_dev); * Return - True if smmu is stalled or false otherwise */ bool adreno_smmu_is_stalled(struct adreno_device *adreno_dev); + +/** + * adreno_get_ahb_timeout_val() - Get the ahb_timeout value + * @adreno_dev: Adreno device handle + * @noc_timeout_us: GPU config NOC timeout value in usec + * + * Return - AHB timeout value to be programmed in AHB CNTL registers + */ +u32 adreno_get_ahb_timeout_val(struct adreno_device *adreno_dev, u32 noc_timeout_us); #endif /*__ADRENO_H */ diff --git a/adreno_gen7.c b/adreno_gen7.c index 517ae4f643..b8b4b379e4 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -327,6 +327,8 @@ int gen7_init(struct adreno_device *adreno_dev) adreno_dev->highest_bank_bit = gen7_core->highest_bank_bit; adreno_dev->gmu_hub_clk_freq = freq ? freq : 150000000; + adreno_dev->ahb_timeout_val = adreno_get_ahb_timeout_val(adreno_dev, + gen7_core->noc_timeout_us); adreno_dev->bcl_data = gen7_core->bcl_data; adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev, @@ -660,6 +662,22 @@ static u64 gen7_get_uche_trap_base(void) /* Add crashdumper permissions for the BR APRIV */ #define GEN7_BR_APRIV_DEFAULT (GEN7_APRIV_DEFAULT | BIT(6) | BIT(5)) +void gen7_enable_ahb_timeout_detection(struct adreno_device *adreno_dev) +{ + u32 val; + + if (!adreno_dev->ahb_timeout_val) + return; + + val = (ADRENO_AHB_CNTL_DEFAULT | FIELD_PREP(GENMASK(4, 0), + adreno_dev->ahb_timeout_val)); + adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_AON_CNTL, val); + adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_GMU_CNTL, val); + adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_CP_CNTL, val); + adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL, val); + adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_HOST_CNTL, val); +} + int gen7_start(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); diff --git a/adreno_gen7.h b/adreno_gen7.h index 11b701ad8c..e2f02a01eb 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -126,6 +126,8 @@ struct adreno_gen7_core { const u32 rt_bus_hint; /** @fast_bus_hint: Whether or not to increase IB vote on high ddr stall */ bool fast_bus_hint; + /** @noc_timeout_us: GPU config NOC port timeout in usec */ + u32 noc_timeout_us; }; /** @@ -473,6 +475,14 @@ to_gen7_gpudev(const struct adreno_gpudev *gpudev) */ void gen7_reset_preempt_records(struct adreno_device *adreno_dev); +/** + * gen7_enable_ahb_timeout_detection - Program AHB control registers + * @adreno_dev: An Adreno GPU handle + * + * Program AHB control registers to enable AHB timeout detection. + */ +void gen7_enable_ahb_timeout_detection(struct adreno_device *adreno_dev); + /** * gen7_rdpm_mx_freq_update - Update the mx frequency * @gmu: An Adreno GMU handle diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 2f592d29bb..7548f838f9 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1937,6 +1937,13 @@ static int gen7_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; + /* + * Enable AHB timeout detection to catch any register access taking longer + * time before NOC timeout gets detected. Enable this logic before any + * register access which happens to be just after enabling clocks. + */ + gen7_enable_ahb_timeout_detection(adreno_dev); + /* Initialize the CX timer */ gen7_cx_timer_init(adreno_dev); @@ -2035,6 +2042,13 @@ static int gen7_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; + /* + * Enable AHB timeout detection to catch any register access taking longer + * time before NOC timeout gets detected. Enable this logic before any + * register access which happens to be just after enabling clocks. + */ + gen7_enable_ahb_timeout_detection(adreno_dev); + ret = gen7_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 754bb94bbf..6785808462 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -492,6 +492,13 @@ static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; + /* + * Enable AHB timeout detection to catch any register access taking longer + * time before NOC timeout gets detected. Enable this logic before any + * register access which happens to be just after enabling clocks. + */ + gen7_enable_ahb_timeout_detection(adreno_dev); + /* Initialize the CX timer */ gen7_cx_timer_init(adreno_dev); @@ -597,6 +604,13 @@ static int gen7_hwsched_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; + /* + * Enable AHB timeout detection to catch any register access taking longer + * time before NOC timeout gets detected. Enable this logic before any + * register access which happens to be just after enabling clocks. + */ + gen7_enable_ahb_timeout_detection(adreno_dev); + ret = gen7_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_gen8.c b/adreno_gen8.c index 51f0b2d724..4b8ae209b5 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -462,6 +462,8 @@ int gen8_init(struct adreno_device *adreno_dev) adreno_dev->highest_bank_bit = gen8_core->highest_bank_bit; adreno_dev->gmu_hub_clk_freq = freq ? freq : 150000000; + adreno_dev->ahb_timeout_val = adreno_get_ahb_timeout_val(adreno_dev, + gen8_core->noc_timeout_us); adreno_dev->bcl_data = gen8_core->bcl_data; adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev, @@ -999,6 +1001,22 @@ static const struct kgsl_regmap_list gen8_0_0_bicubic_regs[] = { { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_19, 0x3f7227f7 }, }; +void gen8_enable_ahb_timeout_detection(struct adreno_device *adreno_dev) +{ + u32 val; + + if (!adreno_dev->ahb_timeout_val) + return; + + val = (ADRENO_AHB_CNTL_DEFAULT | FIELD_PREP(GENMASK(4, 0), + adreno_dev->ahb_timeout_val)); + adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_AON_CNTL, val); + adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_GMU_CNTL, val); + adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_CP_CNTL, val); + adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL, val); + adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_HOST_CNTL, val); +} + #define MIN_HBB 13 int gen8_start(struct adreno_device *adreno_dev) { diff --git a/adreno_gen8.h b/adreno_gen8.h index eb1f07d977..aaf749e402 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -166,6 +166,8 @@ struct adreno_gen8_core { const u32 rt_bus_hint; /** @fast_bus_hint: Whether or not to increase IB vote on high ddr stall */ bool fast_bus_hint; + /** @noc_timeout_us: GPU config NOC port timeout in usec */ + u32 noc_timeout_us; }; /** @@ -321,6 +323,15 @@ void gen8_crashdump_init(struct adreno_device *adreno_dev); void gen8_snapshot_external_core_regs(struct kgsl_device *device, struct kgsl_snapshot *snapshot); +/** + * gen8_enable_ahb_timeout_detection - Program AHB control registers + * @adreno_dev: An Adreno GPU handle + * + * Program AHB control registers to enable AHB timeout detection. + * + */ +void gen8_enable_ahb_timeout_detection(struct adreno_device *adreno_dev); + /** * gen8_start - Program gen8 registers * @adreno_dev: An Adreno GPU handle diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 79426887ce..78ede44ce2 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1819,6 +1819,13 @@ static int gen8_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; + /* + * Enable AHB timeout detection to catch any register access taking longer + * time before NOC timeout gets detected. Enable this logic before any + * register access which happens to be just after enabling clocks. + */ + gen8_enable_ahb_timeout_detection(adreno_dev); + /* Initialize the CX timer */ gen8_cx_timer_init(adreno_dev); @@ -1908,6 +1915,13 @@ static int gen8_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; + /* + * Enable AHB timeout detection to catch any register access taking longer + * time before NOC timeout gets detected. Enable this logic before any + * register access which happens to be just after enabling clocks. + */ + gen8_enable_ahb_timeout_detection(adreno_dev); + ret = gen8_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 5d901f93d4..0a145615b9 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -550,6 +550,13 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; + /* + * Enable AHB timeout detection to catch any register access taking longer + * time before NOC timeout gets detected. Enable this logic before any + * register access which happens to be just after enabling clocks. + */ + gen8_enable_ahb_timeout_detection(adreno_dev); + /* Initialize the CX timer */ gen8_cx_timer_init(adreno_dev); @@ -657,6 +664,13 @@ static int gen8_hwsched_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; + /* + * Enable AHB timeout detection to catch any register access taking longer + * time before NOC timeout gets detected. Enable this logic before any + * register access which happens to be just after enabling clocks. + */ + gen8_enable_ahb_timeout_detection(adreno_dev); + ret = gen8_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/gen7_reg.h b/gen7_reg.h index f2c905dfcf..33aeba131b 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -1301,6 +1301,11 @@ /* GPU CX_MISC registers */ #define GEN7_CX_MISC_BASE 0x27800 +#define GEN7_GPU_CX_MISC_CX_AHB_AON_CNTL 0x10 +#define GEN7_GPU_CX_MISC_CX_AHB_GMU_CNTL 0x11 +#define GEN7_GPU_CX_MISC_CX_AHB_CP_CNTL 0x12 +#define GEN7_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL 0x13 +#define GEN7_GPU_CX_MISC_CX_AHB_HOST_CNTL 0x14 #define GEN7_GPU_CX_MISC_TCM_RET_CNTL 0x39 #define GEN7_GPU_CX_MISC_AO_COUNTER_LO 0x80 #define GEN7_GPU_CX_MISC_AO_COUNTER_HI 0x81 diff --git a/gen8_reg.h b/gen8_reg.h index 8a3b52ada5..af0249d930 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -1599,6 +1599,11 @@ #define GEN8_SMMU_BASE 0x28000 /* GPU CX_MISC registers */ +#define GEN8_GPU_CX_MISC_CX_AHB_AON_CNTL 0x10 +#define GEN8_GPU_CX_MISC_CX_AHB_GMU_CNTL 0x11 +#define GEN8_GPU_CX_MISC_CX_AHB_CP_CNTL 0x12 +#define GEN8_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL 0x13 +#define GEN8_GPU_CX_MISC_CX_AHB_HOST_CNTL 0x14 #define GEN8_GPU_CX_MISC_INT_CLEAR_CMD 0x31 #define GEN8_GPU_CX_MISC_INT_0_MASK 0x33 #define GEN8_GPU_CX_MISC_INT_0_STATUS 0x34 From 3ebf50ecbb25cc80b9a0adda64447e2c646aedd3 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Fri, 2 Feb 2024 17:12:35 +0530 Subject: [PATCH 0693/1016] kgsl: Enable AHB timeout detection for gen_8_0_0 Define noc_timeout_us for gen_8_0_0 GPU to enable AHB timeout detection. Change-Id: I2e7ffd6f57ccebb6c42a523fe4b01e8b549a3e54 Signed-off-by: Pankaj Gupta --- adreno-gpulist.h | 1 + 1 file changed, 1 insertion(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index e349d78473..aa3825cdcb 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2737,6 +2737,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .fast_bus_hint = true, .bcl_data = 1, .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), + .noc_timeout_us = 3410, /* 3.41 msec */ }; /* GEN8_4_0 noncontext register list */ From 0d49ca8593e6d2817c25f06bfa55402cf5c4e698 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Fri, 2 Feb 2024 12:37:24 -0800 Subject: [PATCH 0694/1016] kgsl: gen8: Fix GMU register capture in snapshot flow When capturing GMU GX registers we need to ensure GX is ON, while the rest of the GMU registers can be captured when GX is OFF. Fix the GMU register capture so that we do not touch the GX domain registers when GX might be OFF in snapshot flow. Change-Id: Id8f389b01e8bbc11c148c0fad703cbbfb774535d Signed-off-by: Urvashi Agrawal --- adreno_gen8_0_0_snapshot.h | 3 +-- adreno_gen8_gmu_snapshot.c | 36 ++++++++++++++++++++---------------- adreno_gen8_snapshot.c | 5 +++-- adreno_gen8_snapshot.h | 12 +++++++----- 4 files changed, 31 insertions(+), 25 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index 5dba870f95..c43907f96e 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -1883,8 +1883,7 @@ static struct gen8_reg_list gen8_0_0_ahb_registers[] = { { UNSLICE, gen8_0_0_ahb_secure_gpu_registers }, }; -static struct gen8_reg_list gen8_gmu_registers[] = { - { UNSLICE, gen8_0_0_gmu_registers }, +static struct gen8_reg_list gen8_gmu_gx_registers[] = { { UNSLICE, gen8_0_0_gmugx_registers }, { SLICE, gen8_0_0_gmugx_slice_registers }, }; diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index 4d6250efb5..ce247991ca 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -247,7 +247,7 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, const struct gen8_snapshot_block_list *gen8_snapshot_block_list = gpucore->gen8_snapshot_block_list; u32 i, slice, j; - struct gen8_reg_list_info info; + struct gen8_reg_list_info info = {0}; kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, snapshot, gen8_gmu_snapshot_itcm, gmu); @@ -256,28 +256,32 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, gen8_gmu_snapshot_memories(device, gmu, snapshot); - for (i = 0 ; i < gen8_snapshot_block_list->num_gmu_regs; i++) { - struct gen8_reg_list *regs = &gen8_snapshot_block_list->gmu_regs[i]; - - slice = regs->slice_region ? MAX_PHYSICAL_SLICES : 1; - for (j = 0 ; j < slice; j++) { - info.regs = regs; - info.slice_id = (slice > 1) ? j : UINT_MAX; - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, - gen8_legacy_snapshot_registers, &info); - } - } - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, gen8_snapshot_rscc_registers, (void *) gen8_snapshot_block_list->rscc_regs); + /* Capture GMU registers which are on CX domain and unsliced */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, + adreno_snapshot_registers_v2, + (void *) gen8_snapshot_block_list->gmu_cx_unsliced_regs); + if (!gen8_gmu_gx_is_on(adreno_dev)) goto dtcm; /* Set fence to ALLOW mode so registers can be read */ kgsl_regwrite(device, GEN8_GMUAO_AHB_FENCE_CTRL, 0); - /* Make sure the previous write posted before reading */ - wmb(); + + /* Capture GMU registers which are on GX domain */ + for (i = 0 ; i < gen8_snapshot_block_list->num_gmu_gx_regs; i++) { + struct gen8_reg_list *regs = &gen8_snapshot_block_list->gmu_gx_regs[i]; + + slice = regs->slice_region ? MAX_PHYSICAL_SLICES : 1; + for (j = 0 ; j < slice; j++) { + info.regs = regs; + info.slice_id = j; + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, + gen8_legacy_snapshot_registers, &info); + } + } dtcm: kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 5d44d74486..139fb30265 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -28,8 +28,9 @@ const struct gen8_snapshot_block_list gen8_0_0_snapshot_block_list = { .cx_debugbus_blocks_len = ARRAY_SIZE(gen8_cx_debugbus_blocks), .external_core_regs = gen8_0_0_external_core_regs, .num_external_core_regs = ARRAY_SIZE(gen8_0_0_external_core_regs), - .gmu_regs = gen8_gmu_registers, - .num_gmu_regs = ARRAY_SIZE(gen8_gmu_registers), + .gmu_cx_unsliced_regs = gen8_0_0_gmu_registers, + .gmu_gx_regs = gen8_gmu_gx_registers, + .num_gmu_gx_regs = ARRAY_SIZE(gen8_gmu_gx_registers), .rscc_regs = gen8_0_0_rscc_rsc_registers, .reg_list = gen8_0_0_reg_list, .cx_misc_regs = gen8_0_0_cx_misc_registers, diff --git a/adreno_gen8_snapshot.h b/adreno_gen8_snapshot.h index 83090b67de..2cb8c4e2ee 100644 --- a/adreno_gen8_snapshot.h +++ b/adreno_gen8_snapshot.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_GEN8_SNAPSHOT_H #define __ADRENO_GEN8_SNAPSHOT_H @@ -610,10 +610,12 @@ struct gen8_snapshot_block_list { const u32 **external_core_regs; /* num_external_core_regs : length of external core registers list */ size_t num_external_core_regs; - /* gmu_registers : List of GMU registers */ - struct gen8_reg_list *gmu_regs; - /* num_gmu_regs : Length of GMU registers list */ - size_t num_gmu_regs; + /* gmu_cx_unsliced_regs : List of GMU CX unsliced registers */ + const u32 *gmu_cx_unsliced_regs; + /* gmu_gx_registers : List of GMU registers */ + struct gen8_reg_list *gmu_gx_regs; + /* num_gmu_gx_regs : Length of GMU registers list */ + size_t num_gmu_gx_regs; /* rscc_regs : List of RSCC registers */ const u32 *rscc_regs; /* reg_list : List of GPU internal registers */ From f67ef2fc32ccff4ae0781e39fb9b321cfe1bfaea Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Fri, 5 Jan 2024 16:09:53 -0700 Subject: [PATCH 0695/1016] kgsl: gen8: Move from mailbox to qmp_aoss domain for ACD Mailbox support is being dropped by AOSS starting with gen8 targets. Remove mailbox support and replace with qmp. Change-Id: I13b0e03853ab412c801c4141075333d1661c8d43 Signed-off-by: Carter Cooper Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- adreno_gen8_gmu.c | 38 ++++++++++++-------------------------- adreno_gen8_gmu.h | 10 +++++----- 2 files changed, 17 insertions(+), 31 deletions(-) diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 79426887ce..586b8300ff 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -1744,26 +1743,20 @@ static irqreturn_t gen8_gmu_irq_handler(int irq, void *data) void gen8_gmu_aop_send_acd_state(struct gen8_gmu_device *gmu, bool flag) { - struct qmp_pkt msg; char msg_buf[36]; u32 size; int ret; - if (IS_ERR_OR_NULL(gmu->mailbox.channel)) + if (IS_ERR_OR_NULL(gmu->qmp)) return; size = scnprintf(msg_buf, sizeof(msg_buf), "{class: gpu, res: acd, val: %d}", flag); - /* mailbox controller expects 4-byte aligned buffer */ - msg.size = ALIGN((size + 1), SZ_4); - msg.data = msg_buf; - - ret = mbox_send_message(gmu->mailbox.channel, &msg); - + ret = qmp_send(gmu->qmp, msg_buf, ALIGN((size + 1), SZ_4)); if (ret < 0) dev_err(&gmu->pdev->dev, - "AOP mbox send message failed: %d\n", ret); + "AOP qmp send message failed: %d\n", ret); } int gen8_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level) @@ -1970,7 +1963,7 @@ static int gen8_gmu_acd_set(struct kgsl_device *device, bool val) struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - if (IS_ERR_OR_NULL(gmu->mailbox.channel)) + if (IS_ERR_OR_NULL(gmu->qmp)) return -EINVAL; /* Don't do any unneeded work if ACD is already in the correct state */ @@ -2159,19 +2152,12 @@ static void gen8_free_gmu_globals(struct gen8_gmu_device *gmu) gmu->global_entries = 0; } -static int gen8_gmu_aop_mailbox_init(struct adreno_device *adreno_dev, +static int gen8_gmu_qmp_aoss_init(struct adreno_device *adreno_dev, struct gen8_gmu_device *gmu) { - struct kgsl_mailbox *mailbox = &gmu->mailbox; - - mailbox->client.dev = &gmu->pdev->dev; - mailbox->client.tx_block = true; - mailbox->client.tx_tout = 1000; - mailbox->client.knows_txdone = false; - - mailbox->channel = mbox_request_channel(&mailbox->client, 0); - if (IS_ERR(mailbox->channel)) - return PTR_ERR(mailbox->channel); + gmu->qmp = qmp_get(&gmu->pdev->dev); + if (IS_ERR(gmu->qmp)) + return PTR_ERR(gmu->qmp); adreno_dev->acd_enabled = true; return 0; @@ -2214,10 +2200,10 @@ static void gen8_gmu_acd_probe(struct kgsl_device *device, cmd->num_levels = cmd_idx; - ret = gen8_gmu_aop_mailbox_init(adreno_dev, gmu); + ret = gen8_gmu_qmp_aoss_init(adreno_dev, gmu); if (ret) dev_err(&gmu->pdev->dev, - "AOP mailbox init failed: %d\n", ret); + "AOP qmp init failed: %d\n", ret); } static int gen8_gmu_reg_probe(struct adreno_device *adreno_dev) @@ -2321,8 +2307,8 @@ void gen8_gmu_remove(struct kgsl_device *device) struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - if (!IS_ERR_OR_NULL(gmu->mailbox.channel)) - mbox_free_channel(gmu->mailbox.channel); + if (!IS_ERR_OR_NULL(gmu->qmp)) + qmp_put(gmu->qmp); adreno_dev->acd_enabled = false; diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index 1368e883c8..56407947a8 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -1,13 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_GEN8_GMU_H #define __ADRENO_GEN8_GMU_H -#include +#include #include "adreno_gen8_hfi.h" #include "kgsl_gmu_core.h" @@ -37,7 +37,6 @@ struct gen8_dcvs_table { * than default power level * @idle_level: Minimal GPU idle power level * @fault_count: GMU fault count - * @mailbox: Messages to AOP for ACD enable/disable go through this * @log_wptr_retention: Store the log wptr offset on slumber */ struct gen8_gmu_device { @@ -71,7 +70,8 @@ struct gen8_gmu_device { u32 freqs[GMU_MAX_PWRLEVELS]; /** @vlvls: Array of GMU voltage levels */ u32 vlvls[GMU_MAX_PWRLEVELS]; - struct kgsl_mailbox mailbox; + /** @qmp: aoss_qmp handle */ + struct qmp *qmp; /** @gmu_globals: Array to store gmu global buffers */ struct kgsl_memdesc gmu_globals[GMU_KERNEL_ENTRIES]; /** @global_entries: To keep track of number of gmu buffers */ @@ -299,7 +299,7 @@ int gen8_gmu_memory_init(struct adreno_device *adreno_dev); * @gmu: Pointer to the gen8 gmu device * @flag: Boolean to enable or disable acd in aop * - * This function enables or disables gpu acd feature using mailbox + * This function enables or disables gpu acd feature using qmp */ void gen8_gmu_aop_send_acd_state(struct gen8_gmu_device *gmu, bool flag); From 17e5f467890514e2463f866f3e7300f88ccbf5e7 Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Mon, 4 Mar 2024 13:53:21 -0800 Subject: [PATCH 0696/1016] kgsl: gen8: Support dedicated MxC for GPU Some targets support dedicated MxC rails for GPU. Use a different resource id to query command db to setup Gx votes for such targets. Change-Id: Idab2d7ab6193cb9cc8bff18ddd25e1d6e2eb3f57 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- adreno_gen8_rpmh.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/adreno_gen8_rpmh.c b/adreno_gen8_rpmh.c index c3d59bec5b..361a436b04 100644 --- a/adreno_gen8_rpmh.c +++ b/adreno_gen8_rpmh.c @@ -404,6 +404,13 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) if (ret) return ret; + /* If the target supports dedicated MxC rail, read the same */ + if (cmd_db_read_addr("gmxc.lvl")) { + ret = rpmh_arc_cmds(&mx_arc, "gmxc.lvl"); + if (ret) + return ret; + } + return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &cx_arc); } From 8f6a6249e66b2885e846089f092d45aa223f64db Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Mon, 12 Feb 2024 14:18:14 -0800 Subject: [PATCH 0697/1016] kgsl: gen8: Add memory barriers before reading aperture registers Add memory barriers after programming the aperture to ensure the write is posted before we read the registers behind the aperture. Change-Id: I019afc52ab02360b3d1d4be5e8bcf70b75385c9f Signed-off-by: Urvashi Agrawal --- adreno_gen7_perfcounter.c | 8 +++++++- adreno_gen8.c | 3 +++ adreno_gen8_perfcounter.c | 8 +++++++- adreno_gen8_snapshot.c | 28 +++++++++++++++++++--------- 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/adreno_gen7_perfcounter.c b/adreno_gen7_perfcounter.c index f3157375e9..76b5acd093 100644 --- a/adreno_gen7_perfcounter.c +++ b/adreno_gen7_perfcounter.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -45,6 +45,9 @@ static int gen7_counter_br_enable(struct adreno_device *adreno_dev, FIELD_PREP(GENMASK(13, 12), PIPE_BR), group->flags); kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, val); + /* Ensure all writes are posted before accessing the piped register */ + mb(); + if (!ret) reg->value = 0; @@ -67,6 +70,9 @@ static int gen7_counter_bv_enable(struct adreno_device *adreno_dev, FIELD_PREP(GENMASK(13, 12), PIPE_BV), group->flags); kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, val); + /* Ensure all writes are posted before accessing the piped register */ + mb(); + if (!ret) reg->value = 0; diff --git a/adreno_gen8.c b/adreno_gen8.c index 51f0b2d724..cc8e181324 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -576,6 +576,9 @@ void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id, kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_CP_APERTURE_CNTL_HOST, aperture_val); + /* Make sure the aperture write goes through before reading the registers */ + mb(); + gen8_dev->aperture = aperture_val; } diff --git a/adreno_gen8_perfcounter.c b/adreno_gen8_perfcounter.c index 41e9ba34d9..60e4b5918a 100644 --- a/adreno_gen8_perfcounter.c +++ b/adreno_gen8_perfcounter.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -68,6 +68,9 @@ static int gen8_counter_br_enable(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val); + /* Ensure all writes are posted before reading the piped register */ + mb(); + if (!ret) reg->value = 0; @@ -91,6 +94,9 @@ static int gen8_counter_bv_enable(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val); + /* Ensure all writes are posted before reading the piped register */ + mb(); + if (!ret) reg->value = 0; diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 5d44d74486..ddbdfca406 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -189,6 +189,9 @@ size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL (info->slice_id, 0, 0, 0)); + /* Make sure the previous writes are posted before reading */ + mb(); + for (ptr = info->regs->regs; ptr[0] != UINT_MAX; ptr += 2) { count = REG_COUNT(ptr); @@ -675,6 +678,14 @@ done: kgsl_regrmw(device, GEN8_SP_DBG_CNTL, GENMASK(1, 0), 0x0); } +static void gen8_rmw_aperture(struct kgsl_device *device, + u32 offsetwords, u32 mask, u32 val, u32 pipe, u32 slice_id, u32 use_slice_id) +{ + gen8_host_aperture_set(ADRENO_DEVICE(device), pipe, slice_id, use_slice_id); + + kgsl_regmap_rmw(&device->regmap, offsetwords, mask, val); +} + static void gen8_snapshot_mempool(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { @@ -688,21 +699,17 @@ static void gen8_snapshot_mempool(struct kgsl_device *device, for (j = 0; j < slice; j++) { - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL - (j, cp_indexed_reg->pipe_id, 0, 0)); - /* set CP_CHICKEN_DBG[StabilizeMVC] to stabilize it while dumping */ - kgsl_regrmw(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x4); + gen8_rmw_aperture(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x4, + cp_indexed_reg->pipe_id, j, 1); kgsl_snapshot_indexed_registers_v2(device, snapshot, cp_indexed_reg->addr, cp_indexed_reg->data, 0, cp_indexed_reg->size, cp_indexed_reg->pipe_id, - ((slice > 1) ? j : UINT_MAX)); + ((cp_indexed_reg->slice_region == SLICE) ? j : UINT_MAX)); - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL - (j, cp_indexed_reg->pipe_id, 0, 0)); - - kgsl_regrmw(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x0); + gen8_rmw_aperture(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x0, + cp_indexed_reg->pipe_id, j, 1); } } } @@ -934,6 +941,9 @@ static size_t gen8_legacy_snapshot_mvc(struct kgsl_device *device, u8 *buf, if (info->cluster->sel) kgsl_regwrite(device, info->cluster->sel->host_reg, info->cluster->sel->val); + /* Make sure the previous writes are posted before reading */ + mb(); + for (; ptr[0] != UINT_MAX; ptr += 2) { u32 count = REG_COUNT(ptr); From 202362501b8723eac4ae3a869279da978e89ce20 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Tue, 5 Mar 2024 16:02:51 -0800 Subject: [PATCH 0698/1016] kgsl: gen8: Set the SP_PERFCTR_SHADER_MASK on Gen8 devices The SP_PERFCTR_SHADER_MASK enables counting SP shader statistics on Gen8 devices. Set this register at GPU start time to allow perfcounters to collect these statistics. Change-Id: I17192af6aab7baa2330ebd957f26258380a2376e Signed-off-by: Lynus Vaz --- adreno-gpulist.h | 4 ++++ adreno_gen8.c | 1 + 2 files changed, 5 insertions(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index e349d78473..60107e5cdd 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2614,6 +2614,8 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { { GEN8_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) }, { GEN8_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, { GEN8_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + /* Enable contribution of all shader stages to SP perfcounters */ + { GEN8_SP_PERFCTR_SHADER_MASK, 0x0000003f, BIT(PIPE_NONE) }, /* * BIT(26): Limit the number of wave-slots for Eviction buffer to 1 per ALU GRP * BIT(30): Disable LPAC auto-promotion @@ -2769,6 +2771,8 @@ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { { GEN8_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) }, { GEN8_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, { GEN8_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + /* Enable contribution of all shader stages to SP perfcounters */ + { GEN8_SP_PERFCTR_SHADER_MASK, 0x0000003f, BIT(PIPE_NONE) }, /* * BIT(26): Limit the number of wave-slots for Eviction buffer to 1 per ALU GRP * BIT(30): Disable LPAC auto-promotion diff --git a/adreno_gen8.c b/adreno_gen8.c index 51f0b2d724..5aa3257873 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -59,6 +59,7 @@ static const u32 gen8_ifpc_pwrup_reglist[] = { GEN8_SP_CHICKEN_BITS_1, GEN8_SP_CHICKEN_BITS_2, GEN8_SP_CHICKEN_BITS_3, + GEN8_SP_PERFCTR_SHADER_MASK, GEN8_TPL1_NC_MODE_CNTL, GEN8_TPL1_DBG_ECO_CNTL, GEN8_TPL1_DBG_ECO_CNTL1, From a4ef6269b536cfb6aa553f23a6dc8790549196d5 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Wed, 14 Feb 2024 15:18:36 -0800 Subject: [PATCH 0699/1016] kgsl: gen8: Fix MVC stabilization sequence in snapshot For sliced GPU we need to set the MVC stabilize bit even for the CP_SLICE_CHICKEN_DBG register. Change-Id: I4ceb35f6a04203dd56a33e6d8df8e97abebeb504 Signed-off-by: Urvashi Agrawal --- adreno_gen8_snapshot.c | 7 +++++++ gen8_reg.h | 1 + 2 files changed, 8 insertions(+) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index ddbdfca406..a0ec2b6c46 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -701,6 +701,9 @@ static void gen8_snapshot_mempool(struct kgsl_device *device, /* set CP_CHICKEN_DBG[StabilizeMVC] to stabilize it while dumping */ gen8_rmw_aperture(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x4, + cp_indexed_reg->pipe_id, 0, 0); + + gen8_rmw_aperture(device, GEN8_CP_SLICE_CHICKEN_DBG_PIPE, 0x4, 0x4, cp_indexed_reg->pipe_id, j, 1); kgsl_snapshot_indexed_registers_v2(device, snapshot, @@ -708,7 +711,11 @@ static void gen8_snapshot_mempool(struct kgsl_device *device, 0, cp_indexed_reg->size, cp_indexed_reg->pipe_id, ((cp_indexed_reg->slice_region == SLICE) ? j : UINT_MAX)); + /* Reset CP_CHICKEN_DBG[StabilizeMVC] once we are done */ gen8_rmw_aperture(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x0, + cp_indexed_reg->pipe_id, 0, 0); + + gen8_rmw_aperture(device, GEN8_CP_SLICE_CHICKEN_DBG_PIPE, 0x4, 0x0, cp_indexed_reg->pipe_id, j, 1); } } diff --git a/gen8_reg.h b/gen8_reg.h index 8a3b52ada5..eb4c177f51 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -881,6 +881,7 @@ #define GEN8_CP_ROQ_VSD_STATUS_PIPE 0x926 #define GEN8_CP_SLICE_MEM_POOL_DBG_ADDR_PIPE 0xb00 #define GEN8_CP_SLICE_MEM_POOL_DBG_DATA_PIPE 0xb01 +#define GEN8_CP_SLICE_CHICKEN_DBG_PIPE 0xb93 /* UCHE registers */ #define GEN8_UCHE_MODE_CNTL 0xe01 From 6c450fb28bf39e72b9ec9e3b550f052e99f46c29 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Tue, 13 Feb 2024 11:08:03 -0800 Subject: [PATCH 0700/1016] kgsl: pwrctrl: Track the time limited by thermal throttling Track the time spent by the GPU limited by the thermal level. Provide a sysfs node thermal_time for userspace to read this statistic. Change-Id: I0dd237bc4c464968777a74ef63ed7ef7cc71f279 Signed-off-by: Lynus Vaz --- kgsl_pwrctrl.c | 10 ++++++++++ kgsl_pwrctrl.h | 2 ++ kgsl_pwrscale.c | 4 +++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 3e94d95e31..41a081cc10 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -370,6 +370,14 @@ static ssize_t thermal_pwrlevel_show(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%d\n", pwr->thermal_pwrlevel); } +static ssize_t thermal_time_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", device->pwrctrl.thermal_time); +} + static ssize_t max_pwrlevel_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -1093,6 +1101,7 @@ static DEVICE_ATTR_RO(gpu_clock_stats); static DEVICE_ATTR_RW(max_pwrlevel); static DEVICE_ATTR_RW(min_pwrlevel); static DEVICE_ATTR_RW(thermal_pwrlevel); +static DEVICE_ATTR_RO(thermal_time); static DEVICE_ATTR_RO(num_pwrlevels); static DEVICE_ATTR_RO(reset_count); static DEVICE_ATTR_RW(force_clk_on); @@ -1120,6 +1129,7 @@ static const struct attribute *pwrctrl_attr_list[] = { &dev_attr_max_pwrlevel.attr, &dev_attr_min_pwrlevel.attr, &dev_attr_thermal_pwrlevel.attr, + &dev_attr_thermal_time.attr, &dev_attr_num_pwrlevels.attr, &dev_attr_reset_count.attr, &dev_attr_force_clk_on.attr, diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 05ccc3025f..50ce8597b5 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -146,6 +146,8 @@ struct kgsl_pwrctrl { unsigned int throttle_mask; u32 interval_timeout; u64 clock_times[KGSL_MAX_PWRLEVELS]; + /** @thermal_time: Time in usecs the GPU is limited by thermal constraints */ + u64 thermal_time; struct kgsl_clk_stats clk_stats; bool bus_control; int bus_mod; diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index cd0d70aa38..ec26386cc1 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -131,6 +131,8 @@ void kgsl_pwrscale_update_stats(struct kgsl_device *device) device->pwrscale.accum_stats.ram_wait += stats.ram_wait; pwrctrl->clock_times[pwrctrl->active_pwrlevel] += stats.busy_time; + if (pwrctrl->thermal_pwrlevel) + pwrctrl->thermal_time += stats.busy_time; pwrctrl->time_in_pwrlevel[pwrctrl->active_pwrlevel] += ktime_us_delta(cur_time, pwrctrl->last_stat_updated); pwrctrl->last_stat_updated = cur_time; From a2d5d09959f51032953babbcde4d02b916c1d836 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Fri, 8 Mar 2024 15:15:44 -0800 Subject: [PATCH 0701/1016] kgsl: hwsched: Print fault tolerance policy Print fault tolerance policy in fault log header for more information. Change-Id: I31c8eec8b00fc2ec30d03da807ddf9e5d01accbe Signed-off-by: Hareesh Gundu --- adreno_gen8.c | 10 ++++++---- adreno_hwsched.c | 12 ++++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 51f0b2d724..71ee647d8b 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -2591,9 +2591,11 @@ static void gen8_lpac_fault_header(struct adreno_device *adreno_dev, drawobj->context->last_faulted_cmd_ts = drawobj->timestamp; drawobj->context->total_fault_count++; - pr_context(device, drawobj->context, "LPAC ctx %u ctx_type %s ts %u dispatch_queue=%d\n", + pr_context(device, drawobj->context, + "LPAC ctx %u ctx_type %s ts %u policy %lX dispatch_queue=%d\n", drawobj->context->id, kgsl_context_type(drawctxt->type), - drawobj->timestamp, drawobj->context->gmu_dispatch_queue); + drawobj->timestamp, CMDOBJ(drawobj)->fault_recovery, + drawobj->context->gmu_dispatch_queue); pr_context(device, drawobj->context, "lpac cmdline: %s\n", drawctxt->base.proc_priv->cmdline); @@ -2646,9 +2648,9 @@ static void gen8_fault_header(struct adreno_device *adreno_dev, ts = drawobj->timestamp; rb_id = adreno_get_level(drawobj->context); - pr_context(device, drawobj->context, "ctx %u ctx_type %s ts %u\n", + pr_context(device, drawobj->context, "ctx %u ctx_type %s ts %u policy %lX\n", drawobj->context->id, kgsl_context_type(drawctxt->type), - drawobj->timestamp); + drawobj->timestamp, CMDOBJ(drawobj)->fault_recovery); pr_context(device, drawobj->context, "cmdline: %s\n", drawctxt->base.proc_priv->cmdline); diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 0cad8a113e..d854ba6373 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1472,9 +1472,10 @@ static void do_fault_header(struct adreno_device *adreno_dev, rb_id = adreno_get_level(drawobj->context); pr_context(device, drawobj->context, - "ctx %u ctx_type %s ts %u dispatch_queue=%d\n", + "ctx %u ctx_type %s ts %u policy %lX dispatch_queue=%d\n", drawobj->context->id, kgsl_context_type(drawctxt->type), - drawobj->timestamp, drawobj->context->gmu_dispatch_queue); + drawobj->timestamp, CMDOBJ(drawobj)->fault_recovery, + drawobj->context->gmu_dispatch_queue); pr_context(device, drawobj->context, "cmdline: %s\n", drawctxt->base.proc_priv->cmdline); @@ -1809,11 +1810,13 @@ static void adreno_hwsched_reset_and_snapshot(struct adreno_device *adreno_dev, if (!obj && (fault & ADRENO_IOMMU_PAGE_FAULT)) obj = get_active_cmdobj(adreno_dev); - if (obj) + if (obj) { drawobj = obj->drawobj; - else if (hwsched->recurring_cmdobj && + CMDOBJ(drawobj)->fault_recovery = cmd->gc.policy; + } else if (hwsched->recurring_cmdobj && hwsched->recurring_cmdobj->base.context->id == cmd->gc.ctxt_id) { drawobj = DRAWOBJ(hwsched->recurring_cmdobj); + CMDOBJ(drawobj)->fault_recovery = cmd->gc.policy; if (!kref_get_unless_zero(&drawobj->refcount)) drawobj = NULL; } @@ -1836,6 +1839,7 @@ static void adreno_hwsched_reset_and_snapshot(struct adreno_device *adreno_dev, if (obj_lpac) { drawobj_lpac = obj_lpac->drawobj; + CMDOBJ(drawobj_lpac)->fault_recovery = cmd->lpac.policy; context_lpac = drawobj_lpac->context; if (gpudev->lpac_fault_header) gpudev->lpac_fault_header(adreno_dev, drawobj_lpac); From b913ed2b5f95dfa575c875da0456c9928ead085c Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Fri, 1 Sep 2023 02:03:53 +0530 Subject: [PATCH 0702/1016] kgsl: Avoid blocking thermal mitigation During thermal mitigation, devfreq cooling driver triggers PMQOS update which in turn calls the PMQOS notifier callback registered by gpu devfreq. This devfreq CB tries to acquire the gpu devfreq lock which could be already held by kgsl dcvs thread. And the kgsl dcvs thread could be blocked waiting for scm-lock from keymaster driver. Keymaster's work in TZ is known to take more than 10 seconds in some cases. thermal driver -> devfreq cooling -> gpu devfreq cb (blocked on gpu devfreq lock) -> kgsl dcvs thread -> scm driver (blocked on scm-lock) -> Keymaster's scm call This blocks the thermal mitigation for gpu (potentially for other subsystems too) and results in thermal reset. To fix this, we should avoid devfreq-cooling driver and directly register as a thermal cooling device. Then we can immediately do thermal mitigation and then call PMQOS update from a workqueue to avoid block thermal driver's callback. Change-Id: I3b8289def994a211acab587bf5e452b339cb8ffb Signed-off-by: Akhil P Oommen Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- kgsl_pwrctrl.c | 210 ++++++++++++++++++++++++++++++++++++++++++++---- kgsl_pwrctrl.h | 8 ++ kgsl_pwrscale.c | 63 +-------------- kgsl_pwrscale.h | 2 - 4 files changed, 205 insertions(+), 78 deletions(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 3e94d95e31..f55b9f1124 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "kgsl_device.h" @@ -75,17 +76,21 @@ static void _bimc_clk_prepare_enable(struct kgsl_device *device, static unsigned int _adjust_pwrlevel(struct kgsl_pwrctrl *pwr, int level, struct kgsl_pwr_constraint *pwrc) { - unsigned int max_pwrlevel = max_t(unsigned int, pwr->thermal_pwrlevel, + unsigned int thermal_pwrlevel = READ_ONCE(pwr->thermal_pwrlevel); + unsigned int max_pwrlevel = max_t(unsigned int, thermal_pwrlevel, pwr->max_pwrlevel); unsigned int min_pwrlevel = min_t(unsigned int, pwr->thermal_pwrlevel_floor, pwr->min_pwrlevel); + /* Ensure that max pwrlevel is within pmqos max limit */ + max_pwrlevel = max_t(unsigned int, max_pwrlevel, + READ_ONCE(pwr->pmqos_max_pwrlevel)); + /* Ensure that max/min pwrlevels are within thermal max/min limits */ max_pwrlevel = min_t(unsigned int, max_pwrlevel, pwr->thermal_pwrlevel_floor); - min_pwrlevel = max_t(unsigned int, min_pwrlevel, - pwr->thermal_pwrlevel); + min_pwrlevel = max_t(unsigned int, min_pwrlevel, thermal_pwrlevel); switch (pwrc->type) { case KGSL_CONSTRAINT_PWRLEVEL: { @@ -366,8 +371,10 @@ static ssize_t thermal_pwrlevel_show(struct device *dev, struct kgsl_device *device = dev_get_drvdata(dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; + u32 thermal_pwrlevel = max_t(u32, READ_ONCE(pwr->thermal_pwrlevel), + READ_ONCE(pwr->pmqos_max_pwrlevel)); - return scnprintf(buf, PAGE_SIZE, "%d\n", pwr->thermal_pwrlevel); + return scnprintf(buf, PAGE_SIZE, "%d\n", thermal_pwrlevel); } static ssize_t max_pwrlevel_store(struct device *dev, @@ -1733,6 +1740,149 @@ static int kgsl_pwrctrl_clk_set_rate(struct clk *grp_clk, unsigned int freq, return ret; } +/* + * pmqos_max_notifier_call - Callback function registered to receive qos max + * frequency events. + * @nb: The notifier block + * @val: Max frequency value in KHz for GPU + * + * The function subscribes to GPU max frequency change and updates thermal + * power level accordingly. + */ +static int pmqos_max_notifier_call(struct notifier_block *nb, unsigned long val, void *data) +{ + struct kgsl_pwrctrl *pwr = container_of(nb, struct kgsl_pwrctrl, nb_max); + struct kgsl_device *device = container_of(pwr, struct kgsl_device, pwrctrl); + u32 max_freq = val * 1000; + int level; + u32 thermal_pwrlevel; + + if (!device->pwrscale.devfreq_enabled) + return NOTIFY_DONE; + + for (level = pwr->num_pwrlevels - 1; level >= 0; level--) { + /* get nearest power level with a maximum delta of 5MHz */ + if (abs(pwr->pwrlevels[level].gpu_freq - max_freq) < 5000000) + break; + } + + if (level < 0) + return NOTIFY_DONE; + + if (level == pwr->pmqos_max_pwrlevel) + return NOTIFY_OK; + + pwr->pmqos_max_pwrlevel = level; + + /* + * Since thermal constraint is already applied prior to this, if qos constraint is same as + * thermal constraint, we can return early here. + */ + thermal_pwrlevel = READ_ONCE(pwr->thermal_pwrlevel); + if (pwr->pmqos_max_pwrlevel == thermal_pwrlevel) + return NOTIFY_OK; + + trace_kgsl_thermal_constraint(max_freq); + + mutex_lock(&device->mutex); + + /* Update the current level using the new limit */ + kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel); + + mutex_unlock(&device->mutex); + return NOTIFY_OK; +} + +static int kgsl_cooling_get_max_state(struct thermal_cooling_device *cooling_dev, + unsigned long *state) +{ + struct kgsl_device *device = cooling_dev->devdata; + + *state = device->pwrctrl.num_pwrlevels - 1; + return 0; +} + +static int kgsl_cooling_get_cur_state(struct thermal_cooling_device *cooling_dev, + unsigned long *state) +{ + struct kgsl_device *device = cooling_dev->devdata; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + *state = READ_ONCE(pwr->thermal_pwrlevel); + return 0; +} + +static int kgsl_cooling_set_cur_state(struct thermal_cooling_device *cooling_dev, + unsigned long state) +{ + struct kgsl_device *device = cooling_dev->devdata; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + u32 freq; + + if (state > (pwr->num_pwrlevels - 1)) + return -EINVAL; + + if (state == READ_ONCE(pwr->thermal_pwrlevel)) + return 0; + + freq = pwr->pwrlevels[state].gpu_freq; + trace_kgsl_thermal_constraint(freq); + WRITE_ONCE(pwr->thermal_pwrlevel, state); + + mutex_lock(&device->mutex); + + /* Update the current level using the new limit */ + kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel); + + mutex_unlock(&device->mutex); + + queue_work(kgsl_driver.workqueue, &pwr->cooling_ws); + return 0; +} + +static const struct thermal_cooling_device_ops kgsl_cooling_ops = { + .get_max_state = kgsl_cooling_get_max_state, + .get_cur_state = kgsl_cooling_get_cur_state, + .set_cur_state = kgsl_cooling_set_cur_state, +}; + +static void do_pmqos_update(struct work_struct *work) +{ + struct kgsl_pwrctrl *pwr = container_of(work, struct kgsl_pwrctrl, cooling_ws); + u32 thermal_pwrlevel = READ_ONCE(pwr->thermal_pwrlevel); + u32 freq = pwr->pwrlevels[thermal_pwrlevel].gpu_freq; + + dev_pm_qos_update_request(&pwr->pmqos_max_freq, DIV_ROUND_UP(freq, HZ_PER_KHZ)); +} + +static int register_thermal_cooling_device(struct kgsl_device *device, struct device_node *np) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + const char *name = "kgsl"; + int ret; + + ret = dev_pm_qos_add_request(&device->pdev->dev, &pwr->pmqos_max_freq, + DEV_PM_QOS_MAX_FREQUENCY, PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); + if (ret) + goto err; + + INIT_WORK(&pwr->cooling_ws, do_pmqos_update); + + pwr->cooling_dev = thermal_of_cooling_device_register(np, name, device, + &kgsl_cooling_ops); + if (IS_ERR(pwr->cooling_dev)) { + dev_pm_qos_remove_request(&pwr->pmqos_max_freq); + ret = PTR_ERR(pwr->cooling_dev); + goto err; + } + + return 0; + +err: + dev_err(device->dev, "Unable to register thermal cooling device: %d\n", ret); + return ret; +} + int kgsl_pwrctrl_init(struct kgsl_device *device) { int i, result, freq; @@ -1761,16 +1911,6 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) init_waitqueue_head(&device->active_cnt_wq); - /* Initialize the thermal clock constraints */ - pwr->thermal_pwrlevel = 0; - pwr->thermal_pwrlevel_floor = pwr->num_pwrlevels - 1; - - result = dev_pm_qos_add_request(&pdev->dev, &pwr->sysfs_thermal_req, - DEV_PM_QOS_MAX_FREQUENCY, - PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); - if (result < 0) - dev_err(device->dev, "PM QoS thermal request failed:%d\n", result); - for (i = 0; i < pwr->num_pwrlevels; i++) { freq = pwr->pwrlevels[i].gpu_freq; @@ -1823,7 +1963,34 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) result = kgsl_register_gdsc_notifier(device); if (result) { dev_err(&pdev->dev, "Failed to register gdsc notifier: %d\n", result); - return result; + goto err; + } + + /* Initialize the thermal clock constraints */ + pwr->thermal_pwrlevel = 0; + pwr->thermal_pwrlevel_floor = pwr->num_pwrlevels - 1; + result = dev_pm_qos_add_request(&pdev->dev, &pwr->sysfs_thermal_req, + DEV_PM_QOS_MAX_FREQUENCY, + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); + if (result < 0) + dev_err(device->dev, "PM QoS sysfs thermal request failed:%d\n", result); + + /* + * Due to the way it is implemented by the thermal driver, thermal skin mitigation event + * is triggered through PMQOS. Usually, this is supposed to be handled via devfreq. + * Because devfreq recommendations can be overridden by kgsl min_freq/pwrlevel sysfs nodes, + * kgsl should listen to PMQOS events and apply MAX FREQUENCY limit correctly. + */ + pwr->nb_max.notifier_call = pmqos_max_notifier_call; + result = dev_pm_qos_add_notifier(&pdev->dev, &pwr->nb_max, DEV_PM_QOS_MAX_FREQUENCY); + if (result) + dev_err(device->dev, "Unable to register notifier call for PMQOS updates: %d\n", + result); + + result = register_thermal_cooling_device(device, pdev->dev.of_node); + if (result) { + dev_pm_qos_remove_notifier(&pdev->dev, &pwr->nb_max, DEV_PM_QOS_MAX_FREQUENCY); + goto err; } pwr->power_flags = 0; @@ -1831,6 +1998,12 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) pm_runtime_enable(&pdev->dev); return 0; + +err: + if (dev_pm_qos_request_active(&pwr->sysfs_thermal_req)) + dev_pm_qos_remove_request(&pwr->sysfs_thermal_req); + + return result; } void kgsl_pwrctrl_close(struct kgsl_device *device) @@ -1839,6 +2012,13 @@ void kgsl_pwrctrl_close(struct kgsl_device *device) pwr->power_flags = 0; + if (!IS_ERR(pwr->cooling_dev)) { + dev_pm_qos_remove_request(&pwr->pmqos_max_freq); + dev_pm_qos_remove_notifier(&device->pdev->dev, &pwr->nb_max, + DEV_PM_QOS_MAX_FREQUENCY); + thermal_cooling_device_unregister(pwr->cooling_dev); + } + if (dev_pm_qos_request_active(&pwr->sysfs_thermal_req)) dev_pm_qos_remove_request(&pwr->sysfs_thermal_req); diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 05ccc3025f..64e09084eb 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -170,6 +170,14 @@ struct kgsl_pwrctrl { u32 cur_ab; /** @sysfs_thermal_req - PM QoS maximum frequency request from user (via sysfs) */ struct dev_pm_qos_request sysfs_thermal_req; + /* pmqos_max_pwrlevel: Max power level limit set from the PMQOS notifier */ + u32 pmqos_max_pwrlevel; + /* cooling_dev: Handle to thermal cooling dev */ + struct thermal_cooling_device *cooling_dev; + /* pmqos_max_freq: Handle to raise PMQOS MAX FREQUENCY request */ + struct dev_pm_qos_request pmqos_max_freq; + /* cooling_ws: Work which updates PMQOS during thermal event */ + struct work_struct cooling_ws; /** @time_in_pwrlevel: Each pwrlevel active duration in usec */ u64 time_in_pwrlevel[KGSL_MAX_PWRLEVELS]; /** @last_stat_updated: The last time stats were updated */ diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index cd0d70aa38..86ce54f96b 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -512,7 +512,8 @@ int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags) * Ignore this check when only single power level in use to * avoid setting default AB vote in normal situations too. */ - if (pwr->thermal_pwrlevel != pwr->num_pwrlevels - 1 || + if ((pwr->thermal_pwrlevel != pwr->num_pwrlevels - 1 && + pwr->pmqos_max_pwrlevel != pwr->num_pwrlevels - 1) || pwr->num_pwrlevels == 1) pwr->bus_ab_mbytes = ab_mbytes; else @@ -648,49 +649,6 @@ static void pwrscale_of_ca_aware(struct kgsl_device *device) of_node_put(node); } -/* - * thermal_max_notifier_call - Callback function registered to receive qos max - * frequency events. - * @nb: The notifier block - * @val: Max frequency value in KHz for GPU - * - * The function subscribes to GPU max frequency change and updates thermal - * power level accordingly. - */ -static int thermal_max_notifier_call(struct notifier_block *nb, unsigned long val, void *data) -{ - struct kgsl_pwrctrl *pwr = container_of(nb, struct kgsl_pwrctrl, nb_max); - struct kgsl_device *device = container_of(pwr, struct kgsl_device, pwrctrl); - u32 max_freq = val * 1000; - int level; - - if (!device->pwrscale.devfreq_enabled) - return NOTIFY_DONE; - - for (level = pwr->num_pwrlevels - 1; level >= 0; level--) { - /* get nearest power level with a maximum delta of 5MHz */ - if (abs(pwr->pwrlevels[level].gpu_freq - max_freq) < 5000000) - break; - } - - if (level < 0) - return NOTIFY_DONE; - - if (level == pwr->thermal_pwrlevel) - return NOTIFY_OK; - - trace_kgsl_thermal_constraint(max_freq); - pwr->thermal_pwrlevel = level; - - mutex_lock(&device->mutex); - - /* Update the current level using the new limit */ - kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel); - - mutex_unlock(&device->mutex); - return NOTIFY_OK; -} - int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, const char *governor) { @@ -771,16 +729,6 @@ int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, return ret; } - pwr->nb_max.notifier_call = thermal_max_notifier_call; - ret = dev_pm_qos_add_notifier(&pdev->dev, &pwr->nb_max, DEV_PM_QOS_MAX_FREQUENCY); - - if (ret) { - dev_err(device->dev, "Unable to register notifier call for thermal: %d\n", ret); - device->pwrscale.enabled = false; - msm_adreno_tz_exit(); - return ret; - } - devfreq = devfreq_add_device(&pdev->dev, &gpu_profile->profile, governor, &adreno_tz_data); if (IS_ERR_OR_NULL(devfreq)) { @@ -791,10 +739,6 @@ int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, pwrscale->enabled = true; pwrscale->devfreqptr = devfreq; - pwrscale->cooling_dev = of_devfreq_cooling_register(pdev->dev.of_node, - devfreq); - if (IS_ERR(pwrscale->cooling_dev)) - pwrscale->cooling_dev = NULL; if (adreno_tz_data.bus.num) pwrscale_busmon_create(device, pdev, pwrscale->freq_table); @@ -836,8 +780,6 @@ void kgsl_pwrscale_close(struct kgsl_device *device) if (!pwrscale->devfreqptr) return; - if (pwrscale->cooling_dev) - devfreq_cooling_unregister(pwrscale->cooling_dev); if (pwrscale->devfreq_wq) { flush_workqueue(pwrscale->devfreq_wq); @@ -847,7 +789,6 @@ void kgsl_pwrscale_close(struct kgsl_device *device) devfreq_remove_device(device->pwrscale.devfreqptr); device->pwrscale.devfreqptr = NULL; - dev_pm_qos_remove_notifier(&device->pdev->dev, &pwr->nb_max, DEV_PM_QOS_MAX_FREQUENCY); msm_adreno_tz_exit(); } diff --git a/kgsl_pwrscale.h b/kgsl_pwrscale.h index 5b9c3d4d43..271511d6cc 100644 --- a/kgsl_pwrscale.h +++ b/kgsl_pwrscale.h @@ -40,7 +40,6 @@ struct kgsl_power_stats { * @devfreq_notify_ws - Notify devfreq to update sampling * @next_governor_call - Timestamp after which the governor may be notified of * a new sample - * @cooling_dev - Thermal cooling device handle * @ctxt_aware_enable - Whether or not ctxt aware DCVS feature is enabled * @ctxt_aware_busy_penalty - The time in microseconds required to trigger * ctxt aware power level jump @@ -61,7 +60,6 @@ struct kgsl_pwrscale { struct work_struct devfreq_resume_ws; struct work_struct devfreq_notify_ws; ktime_t next_governor_call; - struct thermal_cooling_device *cooling_dev; bool ctxt_aware_enable; unsigned int ctxt_aware_target_pwrlevel; unsigned int ctxt_aware_busy_penalty; From b049ac91739238c051243c013e7fea749b47bfd1 Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Mon, 29 Jan 2024 16:17:16 -0700 Subject: [PATCH 0703/1016] kgsl: gen8: Add additional non context overrides Add more non context overrides that are desired. Also make sure that the power up reglist is re-generated after every successful debugfs update request. Change-Id: Ife204947255971a80231d6dfdf9e4d6fc3e9bec0 Signed-off-by: Carter Cooper Signed-off-by: Hareesh Gundu --- adreno_gen8.c | 18 +++++++++++++++--- gen8_reg.h | 6 +++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 51f0b2d724..cec274f142 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -165,6 +165,8 @@ struct gen8_nonctxt_overrides gen8_nc_overrides[] = { { GEN8_UCHE_DBG_ECO_CNTL_0, BIT(PIPE_NONE), 0, 0, 2, }, { GEN8_UCHE_HW_DBG_CNTL, BIT(PIPE_NONE), 0, 0, 2, }, { GEN8_UCHE_CCHE_HW_DBG_CNTL, BIT(PIPE_NONE), 0, 0, 2, }, + { GEN8_GRAS_NC_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_GRAS_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_RB_DBG_ECO_CNTL, BIT(PIPE_BR), 0, 0, 3, }, { GEN8_RB_CCU_DBG_ECO_CNTL, BIT(PIPE_BR), 0, 0, 3, }, { GEN8_RB_CCU_CNTL, BIT(PIPE_BR), 0, 0, 0, }, @@ -179,10 +181,18 @@ struct gen8_nonctxt_overrides gen8_nc_overrides[] = { { GEN8_PC_CHICKEN_BITS_2, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_PC_CHICKEN_BITS_3, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_PC_CHICKEN_BITS_4, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_PC_CHICKEN_BITS_5, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 2, }, { GEN8_PC_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 3, }, - { GEN8_VFD_CB_LP_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, }, - { GEN8_VFD_CB_BUSY_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, }, - { GEN8_VFD_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, }, + { GEN8_VFD_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_VFD_CB_BV_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_VFD_CB_BR_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_VFD_CB_LP_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_VFD_CB_BUSY_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, + { GEN8_VPC_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 2, }, + { GEN8_VPC_DBG_ECO_CNTL_1, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 2, }, + { GEN8_VPC_DBG_ECO_CNTL_2, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 1, }, + { GEN8_VPC_DBG_ECO_CNTL_3, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 2, }, + { GEN8_VPC_FLATSHADE_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_SP_DBG_ECO_CNTL, BIT(PIPE_NONE), 0, 0, 1, }, { GEN8_SP_NC_MODE_CNTL, BIT(PIPE_NONE), 0, 0, 0, }, { GEN8_SP_CHICKEN_BITS, BIT(PIPE_NONE), 0, 0, 1, }, @@ -284,6 +294,8 @@ static void nc_override_cb(struct adreno_device *adreno_dev, void *priv) struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); gen8_dev->nc_overrides_enabled = true; + /* Force to update and make new patched reglist */ + adreno_dev->patch_reglist = false; } static ssize_t nc_override_set(struct file *filep, diff --git a/gen8_reg.h b/gen8_reg.h index 8a3b52ada5..f4e4d6d739 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -997,6 +997,7 @@ #define GEN8_RB_UFC_DBG_CNTL 0x8f29 /* VPC registers */ +#define GEN8_VPC_DBG_ECO_CNTL_2 0x9604 #define GEN8_VPC_PERFCTR_VPC_SEL_2_0 0x9670 #define GEN8_VPC_PERFCTR_VPC_SEL_2_1 0x9671 #define GEN8_VPC_PERFCTR_VPC_SEL_2_2 0x9672 @@ -1009,6 +1010,7 @@ #define GEN8_VPC_PERFCTR_VPC_SEL_2_9 0x9679 #define GEN8_VPC_PERFCTR_VPC_SEL_2_10 0x967a #define GEN8_VPC_PERFCTR_VPC_SEL_2_11 0x967b +#define GEN8_VPC_DBG_ECO_CNTL 0x9680 #define GEN8_VPC_PERFCTR_VPC_SEL_0 0x9690 /* Indexed Register */ #define GEN8_VPC_PERFCTR_VPC_SEL_1 0x9691 /* Indexed Register */ #define GEN8_VPC_PERFCTR_VPC_SEL_2 0x9692 /* Indexed Register */ @@ -1024,6 +1026,7 @@ #define GEN8_VPC_LB_MODE_CNTL 0x9740 #define GEN8_VPC_FLATSHADE_MODE_CNTL 0x9741 #define GEN8_VPC_DBG_ECO_CNTL_1 0x9742 +#define GEN8_VPC_DBG_ECO_CNTL_3 0x9745 #define GEN8_VPC_PERFCTR_VPC_SEL_1_0 0x9750 #define GEN8_VPC_PERFCTR_VPC_SEL_1_1 0x9751 #define GEN8_VPC_PERFCTR_VPC_SEL_1_2 0x9752 @@ -1040,7 +1043,6 @@ /* PC registers:*/ #define GEN8_PC_AUTO_VERTEX_STRIDE 0x9e0a #define GEN8_PC_VIS_STREAM_CNTL 0x9e0d -#define GEN8_PC_CHICKEN_BITS_2 0x9f20 #define GEN8_PC_CHICKEN_BITS_3 0x9e22 #define GEN8_PC_CHICKEN_BITS_4 0x9e23 #define GEN8_PC_PERFCTR_PC_SEL_0 0x9e30 @@ -1061,6 +1063,8 @@ #define GEN8_PC_PERFCTR_PC_SEL_15 0x9e3f #define GEN8_PC_CHICKEN_BITS_1 0x9e50 #define GEN8_PC_DBG_ECO_CNTL 0x9e53 +#define GEN8_PC_CHICKEN_BITS_2 0x9f20 +#define GEN8_PC_CHICKEN_BITS_5 0x9f23 #define GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1 0x9e64 #define GEN8_PC_SLICE_PERFCTR_PC_SEL_0 0x9f00 From 567328d4f34da7eef18fc1a2639730c99c6be092 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Tue, 12 Mar 2024 16:13:39 -0700 Subject: [PATCH 0704/1016] kgsl: gen8: Fix UCHE_CLIENT_PF programming For gen8 uche client id field expanded from 3 to 6. Thus, fix this programming issue to match with HW spec. Change-Id: I03089a029e8f101e4017306f9b8f8667ce9c0902 Signed-off-by: Hareesh Gundu --- adreno_gen8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 51f0b2d724..d8851b0b62 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1152,7 +1152,7 @@ int gen8_start(struct adreno_device *adreno_dev) kgsl_regwrite(device, GEN8_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30)); kgsl_regwrite(device, GEN8_UCHE_CLIENT_PF, BIT(7) | - FIELD_PREP(GENMASK(3, 0), adreno_dev->uche_client_pf)); + FIELD_PREP(GENMASK(6, 0), adreno_dev->uche_client_pf)); /* Enable the GMEM save/restore feature for preemption */ if (adreno_is_preemption_enabled(adreno_dev)) { From eb97bf9d43948cc19e59e56232b94c09059e7dfa Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Tue, 20 Feb 2024 15:23:11 -0800 Subject: [PATCH 0705/1016] kgsl: gmu: Introduce GMU fault panic setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce GMU Panic setting to force crash on GMU fault errors which needs crash dumps rather than snapshot. GMU panic setting can be overridden based on the debugfs ‘gmu_fault_policy’ entry (OR) target specific probe function. By default GMU panic setting is set to GMU_FAULT_PANIC_NONE. Following are the various GMU fault panic settings allowed to configure dynamically and statically. 1) GMU_FAULT_DEVICE_START 2) GMU_FAULT_HFI_INIT 3) GMU_FAULT_OOB_SET 4) GMU_FAULT_HFI_RECIVE_ACK 5) GMU_FAULT_SEND_CMD_WAIT_INLINE 6) GMU_FAULT_HFI_SEND_GENERIC_REQ 7) GMU_FAULT_F2H_MSG_ERR 8) GMU_FAULT_H2F_MSG_START 9) GMU_FAULT_WAIT_ACK_COMPLETION 10) GMU_FAULT_HFI_ACK 11) GMU_FAULT_CTX_UNREGISTER 12) GMU_FAULT_WAIT_FOR_LOWEST_IDLE 13) GMU_FAULT_WAIT_FOR_IDLE 14) GMU_FAULT_HW_FENCE. Change-Id: Ib3350e8d893a383a57f839c0cb782e1036a58353 Signed-off-by: Hareesh Gundu --- adreno_a6xx_gmu.c | 26 ++++++++++++++++---------- adreno_a6xx_gmu.h | 6 ++++-- adreno_a6xx_hfi.c | 14 ++++++++++---- adreno_a6xx_hwsched.c | 4 ++-- adreno_a6xx_hwsched_hfi.c | 16 ++++++++++------ adreno_a6xx_preempt.c | 4 ++-- adreno_a6xx_rgmu.c | 15 ++++++++++----- adreno_a6xx_ringbuffer.c | 4 ++-- adreno_debugfs.c | 36 ++++++++++++++++++++++++++++++++++-- adreno_dispatch.c | 4 ++-- adreno_gen7_gmu.c | 23 ++++++++++++++--------- adreno_gen7_gmu.h | 6 ++++-- adreno_gen7_hfi.c | 16 +++++++++++----- adreno_gen7_hwsched.c | 8 ++++---- adreno_gen7_hwsched_hfi.c | 20 ++++++++++++-------- adreno_gen7_preempt.c | 4 ++-- adreno_gen7_ringbuffer.c | 4 ++-- adreno_gen8_gmu.c | 27 +++++++++++++++++---------- adreno_gen8_gmu.h | 6 ++++-- adreno_gen8_hfi.c | 17 +++++++++++------ adreno_gen8_hwsched.c | 8 ++++---- adreno_gen8_hwsched_hfi.c | 20 ++++++++++++-------- adreno_gen8_preempt.c | 4 ++-- adreno_gen8_ringbuffer.c | 4 ++-- adreno_hwsched.c | 10 +++++----- kgsl_gmu_core.c | 5 +++-- kgsl_gmu_core.h | 35 +++++++++++++++++++++++++++++++++-- 27 files changed, 234 insertions(+), 112 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 35681ad46f..6473ace5f9 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -637,7 +637,7 @@ int a6xx_gmu_device_start(struct adreno_device *adreno_dev) val, GMU_START_TIMEOUT, mask)) { dev_err(&gmu->pdev->dev, "GMU doesn't boot\n"); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_DEVICE_START); return -ETIMEDOUT; } @@ -661,7 +661,7 @@ int a6xx_gmu_hfi_start(struct adreno_device *adreno_dev) GMU_START_TIMEOUT, BIT(0))) { dev_err(&gmu->pdev->dev, "GMU HFI init failed\n"); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_HFI_INIT); return -ETIMEDOUT; } @@ -950,9 +950,9 @@ int a6xx_gmu_oob_set(struct kgsl_device *device, check, GPU_START_TIMEOUT, check)) { if (req == oob_perfcntr) gmu->num_oob_perfcntr--; - gmu_core_fault_snapshot(device); ret = -ETIMEDOUT; WARN(1, "OOB request %s timed out\n", oob_to_str(req)); + gmu_core_fault_snapshot(device, GMU_FAULT_OOB_SET); trigger_reset_recovery(adreno_dev, req); } @@ -1144,7 +1144,7 @@ int a6xx_gmu_sptprac_enable(struct adreno_device *adreno_dev) SPTPRAC_CTRL_TIMEOUT, SPTPRAC_POWERON_STATUS_MASK)) { dev_err(&gmu->pdev->dev, "power on SPTPRAC fail\n"); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); return -ETIMEDOUT; } @@ -1362,7 +1362,7 @@ int a6xx_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) } WARN_ON(1); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_WAIT_FOR_LOWEST_IDLE); return -ETIMEDOUT; } @@ -1384,7 +1384,7 @@ int a6xx_gmu_wait_for_idle(struct adreno_device *adreno_dev) "GMU not idling: status2=0x%x %llx %llx\n", status2, ts1, a6xx_read_alwayson(ADRENO_DEVICE(device))); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_WAIT_FOR_IDLE); return -ETIMEDOUT; } @@ -2093,9 +2093,12 @@ static unsigned int a6xx_gmu_ifpc_isenabled(struct kgsl_device *device) } /* Send an NMI to the GMU */ -void a6xx_gmu_send_nmi(struct kgsl_device *device, bool force) +void a6xx_gmu_send_nmi(struct kgsl_device *device, bool force, + enum gmu_fault_panic_policy gf_policy) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u64 ticks = gpudev->read_alwayson(adreno_dev); struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); u32 val; @@ -2106,7 +2109,7 @@ void a6xx_gmu_send_nmi(struct kgsl_device *device, bool force) if (a6xx_gmu_gx_is_on(adreno_dev) && adreno_smmu_is_stalled(adreno_dev)) { dev_err(&gmu->pdev->dev, "Skipping NMI because SMMU is stalled\n"); - return; + goto done; } if (force) @@ -2151,6 +2154,9 @@ nmi: /* Wait for the NMI to be handled */ udelay(200); + +done: + KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, gmu->pdev, ticks, gf_policy); } static void a6xx_gmu_cooperative_reset(struct kgsl_device *device) @@ -2177,7 +2183,7 @@ static void a6xx_gmu_cooperative_reset(struct kgsl_device *device) * If we dont get a snapshot ready from GMU, trigger NMI * and if we still timeout then we just continue with reset. */ - a6xx_gmu_send_nmi(device, true); + a6xx_gmu_send_nmi(device, true, GMU_FAULT_PANIC_NONE); gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &result); if ((result & 0x800) != 0x800) @@ -2226,7 +2232,7 @@ void a6xx_gmu_handle_watchdog(struct adreno_device *adreno_dev) gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_MASK, (mask | GMU_INT_WDOG_BITE)); - a6xx_gmu_send_nmi(device, false); + a6xx_gmu_send_nmi(device, false, GMU_FAULT_PANIC_NONE); dev_err_ratelimited(&gmu->pdev->dev, "GMU watchdog expired interrupt received\n"); diff --git a/adreno_a6xx_gmu.h b/adreno_a6xx_gmu.h index 4ad298f6ca..1e24ec84ca 100644 --- a/adreno_a6xx_gmu.h +++ b/adreno_a6xx_gmu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_A6XX_GMU_H #define __ADRENO_A6XX_GMU_H @@ -438,9 +438,11 @@ void a6xx_gmu_handle_watchdog(struct adreno_device *adreno_dev); /** * a6xx_gmu_send_nmi - Send NMI to GMU * @device: Pointer to the kgsl device + * @gf_policy: GMU fault panic setting policy * @force: Boolean to forcefully send NMI irrespective of GMU state */ -void a6xx_gmu_send_nmi(struct kgsl_device *device, bool force); +void a6xx_gmu_send_nmi(struct kgsl_device *device, bool force, + enum gmu_fault_panic_policy gf_policy); /** * a6xx_gmu_add_to_minidump - Register a6xx_device with va minidump diff --git a/adreno_a6xx_hfi.c b/adreno_a6xx_hfi.c index 7bde0a2afe..908d25de59 100644 --- a/adreno_a6xx_hfi.c +++ b/adreno_a6xx_hfi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -252,7 +252,7 @@ int a6xx_receive_ack_cmd(struct a6xx_gmu_device *gmu, void *rcvd, "HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n", req_hdr, ret_cmd->sent_hdr); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_HFI_RECIVE_ACK); return -ENODEV; } @@ -331,10 +331,10 @@ static int a6xx_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT); if (rc) { - gmu_core_fault_snapshot(device); dev_err(&gmu->pdev->dev, "Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n", cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd)); + gmu_core_fault_snapshot(device, GMU_FAULT_SEND_CMD_WAIT_INLINE); return rc; } @@ -362,11 +362,11 @@ int a6xx_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 s struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - gmu_core_fault_snapshot(device); dev_err(&gmu->pdev->dev, "HFI ACK failure: Req=0x%8.8X, Result=0x%8.8X\n", ret_cmd.results[1], ret_cmd.results[2]); + gmu_core_fault_snapshot(device, GMU_FAULT_HFI_SEND_GENERIC_REQ); return -EINVAL; } @@ -536,12 +536,18 @@ static int a6xx_hfi_send_test(struct adreno_device *adreno_dev) void adreno_a6xx_receive_err_req(struct a6xx_gmu_device *gmu, void *rcvd) { + struct kgsl_device *device = KGSL_DEVICE(a6xx_gmu_to_adreno(gmu)); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(ADRENO_DEVICE(device)); + u64 ticks = gpudev->read_alwayson(ADRENO_DEVICE(device)); struct hfi_err_cmd *cmd = rcvd; dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n", ((cmd->error_code >> 16) & 0xFFFF), (cmd->error_code & 0xFFFF), (char *) cmd->data); + + KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, + gmu->pdev, ticks, GMU_FAULT_F2H_MSG_ERR); } void adreno_a6xx_receive_debug_req(struct a6xx_gmu_device *gmu, void *rcvd) diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 771ca83839..75893892ee 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -938,7 +938,7 @@ static void hwsched_idle_check(struct work_struct *work) if (!a6xx_hw_isidle(adreno_dev)) { dev_err(device->dev, "GPU isn't idle before SLUMBER\n"); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); } a6xx_hwsched_power_off(adreno_dev); @@ -1181,7 +1181,7 @@ void a6xx_hwsched_handle_watchdog(struct adreno_device *adreno_dev) gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_MASK, (mask | GMU_INT_WDOG_BITE)); - a6xx_gmu_send_nmi(device, false); + a6xx_gmu_send_nmi(device, false, GMU_FAULT_PANIC_NONE); dev_err_ratelimited(&gmu->pdev->dev, "GMU watchdog expired interrupt received\n"); diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 42889498f6..563abcab43 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -611,6 +611,8 @@ static int check_ack_failure(struct adreno_device *adreno_dev, struct pending_cmd *ack) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u64 ticks = gpudev->read_alwayson(adreno_dev); if (ack->results[2] != 0xffffffff) return 0; @@ -620,6 +622,8 @@ static int check_ack_failure(struct adreno_device *adreno_dev, MSG_HDR_GET_ID(ack->sent_hdr), MSG_HDR_GET_SEQNUM(ack->sent_hdr)); + KGSL_GMU_CORE_FORCE_PANIC(KGSL_DEVICE(adreno_dev)->gmu_core.gf_panic, + gmu->pdev, ticks, GMU_FAULT_HFI_ACK); return -EINVAL; } @@ -1008,8 +1012,7 @@ poll: dev_err(&gmu->pdev->dev, "Timed out processing MSG_START seqnum: %d\n", seqnum); - gmu_core_fault_snapshot(device); - return rc; + goto done; } /* Clear the interrupt */ @@ -1018,8 +1021,8 @@ poll: if (a6xx_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)) <= 0) { dev_err(&gmu->pdev->dev, "MSG_START: no payload\n"); - gmu_core_fault_snapshot(device); - return -EINVAL; + rc = -EINVAL; + goto done; } if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { @@ -1050,7 +1053,8 @@ poll: MSG_HDR_GET_ID(rcvd[0]), MSG_HDR_GET_TYPE(rcvd[0])); - gmu_core_fault_snapshot(device); +done: + gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); return rc; } @@ -2065,7 +2069,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, mutex_lock(&device->mutex); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_CTX_UNREGISTER); /* * Trigger dispatcher based reset and recovery. Invalidate the diff --git a/adreno_a6xx_preempt.c b/adreno_a6xx_preempt.c index 771a0f4d67..949301961b 100644 --- a/adreno_a6xx_preempt.c +++ b/adreno_a6xx_preempt.c @@ -58,7 +58,7 @@ static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer, if (!atomic) { /* If WPTR update fails, set the fault and trigger recovery */ if (ret) { - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } @@ -402,7 +402,7 @@ void a6xx_preemption_trigger(struct adreno_device *adreno_dev, bool atomic) err: /* If fenced write fails, take inline snapshot and trigger recovery */ if (!atomic) { - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } else { diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 469f92b35d..4742cb5da7 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -169,7 +169,7 @@ static int a6xx_rgmu_oob_set(struct kgsl_device *device, dev_err(&rgmu->pdev->dev, "Timed out while setting OOB req:%s status:0x%x\n", oob_to_str(req), status); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); return ret; } @@ -371,7 +371,7 @@ static int a6xx_rgmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) reg[7], reg[8], reg[9]); WARN_ON(1); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); return -ETIMEDOUT; } @@ -459,7 +459,7 @@ static int a6xx_rgmu_fw_start(struct adreno_device *adreno_dev, gmu_core_regread(device, A6XX_RGMU_CX_PCC_DEBUG, &status); dev_err(&rgmu->pdev->dev, "rgmu boot Failed. status:%08x\n", status); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); return -ETIMEDOUT; } @@ -601,9 +601,13 @@ static int a6xx_rgmu_load_firmware(struct adreno_device *adreno_dev) } /* Halt RGMU execution */ -static void a6xx_rgmu_halt_execution(struct kgsl_device *device, bool force) +static void a6xx_rgmu_halt_execution(struct kgsl_device *device, bool force, + enum gmu_fault_panic_policy gf_policy) { - struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(ADRENO_DEVICE(device)); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u64 ticks = gpudev->read_alwayson(adreno_dev); unsigned int index, status, fence; if (!device->gmu_fault) @@ -635,6 +639,7 @@ static void a6xx_rgmu_halt_execution(struct kgsl_device *device, bool force) */ gmu_core_regwrite(device, A6XX_GMU_AO_AHB_FENCE_CTRL, 0); + KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, rgmu->pdev, ticks, gf_policy); } static void halt_gbif_arb(struct adreno_device *adreno_dev) diff --git a/adreno_a6xx_ringbuffer.c b/adreno_a6xx_ringbuffer.c index a5356faa8a..47db9cadcd 100644 --- a/adreno_a6xx_ringbuffer.c +++ b/adreno_a6xx_ringbuffer.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -167,7 +167,7 @@ int a6xx_ringbuffer_submit(struct adreno_ringbuffer *rb, * If WPTR update fails, take inline snapshot and trigger * recovery. */ - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } diff --git a/adreno_debugfs.c b/adreno_debugfs.c index 16f88b4870..f7cd9ffef4 100644 --- a/adreno_debugfs.c +++ b/adreno_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2002,2008-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -615,6 +615,34 @@ static int _ifpc_hyst_show(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(ifpc_hyst_fops, _ifpc_hyst_show, _ifpc_hyst_store, "%llu\n"); +static int _gmu_fp_store(void *data, u64 val) +{ + struct adreno_device *adreno_dev = data; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Max allowed GMU fault settings are 9 bits */ + val = FIELD_GET(GENMASK(GMU_FAULT_MAX, 0), val); + + if (val == device->gmu_core.gf_panic) + return 0; + + mutex_lock(&device->mutex); + device->gmu_core.gf_panic = val; + mutex_unlock(&device->mutex); + + return 0; +} + +static int _gmu_fp_show(void *data, u64 *val) +{ + struct adreno_device *adreno_dev = data; + + *val = (u64) KGSL_DEVICE(adreno_dev)->gmu_core.gf_panic; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(gmu_fp_fops, _gmu_fp_show, _gmu_fp_store, "%llu\n"); + void adreno_debugfs_init(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -644,10 +672,14 @@ void adreno_debugfs_init(struct adreno_device *adreno_dev) debugfs_create_file("isdb", 0644, device->d_debugfs, device, &_isdb_fops); - if (gmu_core_isenabled(device)) + if (gmu_core_isenabled(device)) { debugfs_create_file("ifpc_hyst", 0644, device->d_debugfs, device, &ifpc_hyst_fops); + debugfs_create_file("gmu_fault_policy", 0644, device->d_debugfs, + device, &gmu_fp_fops); + } + if (ADRENO_FEATURE(adreno_dev, ADRENO_GMU_WARMBOOT)) debugfs_create_file("warmboot", 0644, device->d_debugfs, device, &warmboot_fops); diff --git a/adreno_dispatch.c b/adreno_dispatch.c index ed621b8b30..1e4bc9231c 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1855,7 +1855,7 @@ static void do_header_and_snapshot(struct kgsl_device *device, int fault, /* GMU snapshot will also pull a full device snapshot */ if (fault & ADRENO_GMU_FAULT) - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); else kgsl_device_snapshot(device, NULL, NULL, false); return; diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 14bfccd691..bd07aa8f89 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -371,7 +371,7 @@ int gen7_gmu_device_start(struct adreno_device *adreno_dev) if (gmu_core_timed_poll_check(device, GEN7_GMU_CM3_FW_INIT_RESULT, BIT(8), 100, GENMASK(8, 0))) { dev_err(&gmu->pdev->dev, "GMU failed to come out of reset\n"); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_DEVICE_START); return -ETIMEDOUT; } @@ -392,7 +392,7 @@ int gen7_gmu_hfi_start(struct adreno_device *adreno_dev) if (gmu_core_timed_poll_check(device, GEN7_GMU_HFI_CTRL_STATUS, BIT(0), 100, BIT(0))) { dev_err(&gmu->pdev->dev, "GMU HFI init failed\n"); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_HFI_INIT); return -ETIMEDOUT; } @@ -631,9 +631,9 @@ int gen7_gmu_oob_set(struct kgsl_device *device, 100, check)) { if (req == oob_perfcntr) gmu->num_oob_perfcntr--; - gmu_core_fault_snapshot(device); ret = -ETIMEDOUT; WARN(1, "OOB request %s timed out\n", oob_to_str(req)); + gmu_core_fault_snapshot(device, GMU_FAULT_OOB_SET); trigger_reset_recovery(adreno_dev, req); } @@ -864,7 +864,7 @@ int gen7_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) } WARN_ON(1); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_WAIT_FOR_LOWEST_IDLE); return -ETIMEDOUT; } @@ -887,7 +887,7 @@ int gen7_gmu_wait_for_idle(struct adreno_device *adreno_dev) "GMU not idling: status2=0x%x %llx %llx\n", status2, ts1, gpudev->read_alwayson(adreno_dev)); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_WAIT_FOR_IDLE); return -ETIMEDOUT; } @@ -1672,9 +1672,12 @@ static unsigned int gen7_gmu_ifpc_isenabled(struct kgsl_device *device) } /* Send an NMI to the GMU */ -void gen7_gmu_send_nmi(struct kgsl_device *device, bool force) +void gen7_gmu_send_nmi(struct kgsl_device *device, bool force, + enum gmu_fault_panic_policy gf_policy) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u64 ticks = gpudev->read_alwayson(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); u32 result; @@ -1685,7 +1688,7 @@ void gen7_gmu_send_nmi(struct kgsl_device *device, bool force) if (gen7_gmu_gx_is_on(adreno_dev) && adreno_smmu_is_stalled(adreno_dev)) { dev_err(&gmu->pdev->dev, "Skipping NMI because SMMU is stalled\n"); - return; + goto done; } if (force) @@ -1725,6 +1728,8 @@ nmi: /* Wait for the NMI to be handled */ udelay(200); +done: + KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, gmu->pdev, ticks, gf_policy); } static void gen7_gmu_cooperative_reset(struct kgsl_device *device) @@ -1751,7 +1756,7 @@ static void gen7_gmu_cooperative_reset(struct kgsl_device *device) * If we dont get a snapshot ready from GMU, trigger NMI * and if we still timeout then we just continue with reset. */ - gen7_gmu_send_nmi(device, true); + gen7_gmu_send_nmi(device, true, GMU_FAULT_PANIC_NONE); gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &result); if ((result & 0x800) != 0x800) @@ -1793,7 +1798,7 @@ void gen7_gmu_handle_watchdog(struct adreno_device *adreno_dev) gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, (mask | GMU_INT_WDOG_BITE)); - gen7_gmu_send_nmi(device, false); + gen7_gmu_send_nmi(device, false, GMU_FAULT_PANIC_NONE); dev_err_ratelimited(&gmu->pdev->dev, "GMU watchdog expired interrupt received\n"); diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 5eaa36e766..d1a8231e72 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_GEN7_GMU_H #define __ADRENO_GEN7_GMU_H @@ -478,9 +478,11 @@ void gen7_gmu_handle_watchdog(struct adreno_device *adreno_dev); /** * gen7_gmu_send_nmi - Send NMI to GMU * @device: Pointer to the kgsl device + * @gf_policy: GMU fault panic setting policy * @force: Boolean to forcefully send NMI irrespective of GMU state */ -void gen7_gmu_send_nmi(struct kgsl_device *device, bool force); +void gen7_gmu_send_nmi(struct kgsl_device *device, bool force, + enum gmu_fault_panic_policy gf_policy); /** * gen7_gmu_add_to_minidump - Register gen7_device with va minidump diff --git a/adreno_gen7_hfi.c b/adreno_gen7_hfi.c index 6df5562f33..10cdedd144 100644 --- a/adreno_gen7_hfi.c +++ b/adreno_gen7_hfi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -255,7 +255,7 @@ int gen7_receive_ack_cmd(struct gen7_gmu_device *gmu, void *rcvd, "HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n", req_hdr, ret_cmd->sent_hdr); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_HFI_RECIVE_ACK); return -ENODEV; } @@ -330,10 +330,10 @@ static int gen7_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT); if (rc) { - gmu_core_fault_snapshot(device); dev_err(&gmu->pdev->dev, "Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n", cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd)); + gmu_core_fault_snapshot(device, GMU_FAULT_SEND_CMD_WAIT_INLINE); return rc; } @@ -361,11 +361,11 @@ int gen7_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 s struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - gmu_core_fault_snapshot(device); dev_err(&gmu->pdev->dev, "HFI ACK failure: Req=0x%8.8X, Result=0x%8.8X\n", ret_cmd.results[1], ret_cmd.results[2]); + gmu_core_fault_snapshot(device, GMU_FAULT_HFI_SEND_GENERIC_REQ); return -EINVAL; } @@ -422,11 +422,11 @@ int gen7_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd, rc = -EINVAL; break; default: - gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev)); dev_err(&gmu->pdev->dev, "HFI ACK: Req=0x%8.8X, Result=0x%8.8X Error:0x%8.8X\n", ret_cmd->results[1], ret_cmd->results[2], ret_cmd->results[3]); rc = -EINVAL; + gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev), GMU_FAULT_HFI_SEND_GENERIC_REQ); break; } @@ -508,12 +508,18 @@ int gen7_hfi_send_set_value(struct adreno_device *adreno_dev, void adreno_gen7_receive_err_req(struct gen7_gmu_device *gmu, void *rcvd) { + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(ADRENO_DEVICE(device)); + u64 ticks = gpudev->read_alwayson(ADRENO_DEVICE(device)); struct hfi_err_cmd *cmd = rcvd; dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n", ((cmd->error_code >> 16) & 0xffff), (cmd->error_code & 0xffff), (char *) cmd->data); + + KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, + gmu->pdev, ticks, GMU_FAULT_F2H_MSG_ERR); } void adreno_gen7_receive_debug_req(struct gen7_gmu_device *gmu, void *rcvd) diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 7f087c1690..05e791fa6c 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -1191,7 +1191,7 @@ static int check_inflight_hw_fences(struct adreno_device *adreno_dev) read_unlock(&device->context_lock); if (ret) - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_HW_FENCE); return ret; } @@ -1278,7 +1278,7 @@ static void check_hw_fence_unack_count(struct adreno_device *adreno_dev) dev_err(&gmu->pdev->dev, "hardware fence unack_count(%d) isn't zero before SLUMBER\n", unack_count); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_HW_FENCE); } static void hwsched_idle_check(struct work_struct *work) @@ -1311,7 +1311,7 @@ static void hwsched_idle_check(struct work_struct *work) if (!gen7_hw_isidle(adreno_dev)) { dev_err(device->dev, "GPU isn't idle before SLUMBER\n"); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); } check_hw_fence_unack_count(adreno_dev); @@ -1584,7 +1584,7 @@ void gen7_hwsched_handle_watchdog(struct adreno_device *adreno_dev) gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, (mask | GMU_INT_WDOG_BITE)); - gen7_gmu_send_nmi(device, false); + gen7_gmu_send_nmi(device, false, GMU_FAULT_PANIC_NONE); dev_err_ratelimited(&gmu->pdev->dev, "GMU watchdog expired interrupt received\n"); diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 6506eae74b..84cb2a08e5 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -913,7 +913,7 @@ static void gen7_process_syncobj_query_work(struct kthread_work *work) if (timestamp_cmp(cmd->sync_obj_ts, hdr->sync_obj_ts) > 0) { dev_err(&gmu->pdev->dev, "Missing sync object ctx:%d ts:%d retired:%d\n", context->id, cmd->sync_obj_ts, hdr->sync_obj_ts); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_HW_FENCE); gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } } @@ -1394,6 +1394,8 @@ static int check_ack_failure(struct adreno_device *adreno_dev, struct pending_cmd *ack) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u64 ticks = gpudev->read_alwayson(adreno_dev); if (ack->results[2] != 0xffffffff) return 0; @@ -1403,6 +1405,8 @@ static int check_ack_failure(struct adreno_device *adreno_dev, MSG_HDR_GET_ID(ack->sent_hdr), MSG_HDR_GET_SEQNUM(ack->sent_hdr)); + KGSL_GMU_CORE_FORCE_PANIC(KGSL_DEVICE(adreno_dev)->gmu_core.gf_panic, + gmu->pdev, ticks, GMU_FAULT_HFI_ACK); return -EINVAL; } @@ -1812,8 +1816,7 @@ poll: dev_err(&gmu->pdev->dev, "Timed out processing MSG_START seqnum: %d\n", seqnum); - gmu_core_fault_snapshot(device); - return rc; + goto done; } /* Clear the interrupt */ @@ -1822,8 +1825,8 @@ poll: if (gen7_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)) <= 0) { dev_err(&gmu->pdev->dev, "MSG_START: no payload\n"); - gmu_core_fault_snapshot(device); - return -EINVAL; + rc = -EINVAL; + goto done; } if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { @@ -1854,7 +1857,8 @@ poll: MSG_HDR_GET_ID(rcvd[0]), MSG_HDR_GET_TYPE(rcvd[0])); - gmu_core_fault_snapshot(device); +done: + gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); return rc; } @@ -3275,7 +3279,7 @@ static int check_detached_context_hardware_fences(struct adreno_device *adreno_d fault: move_detached_context_hardware_fences(adreno_dev, drawctxt); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_HW_FENCE); gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); return ret; @@ -3974,7 +3978,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, mutex_lock(&device->mutex); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_CTX_UNREGISTER); /* * Make sure we send all fences from this context to the TxQueue after recovery diff --git a/adreno_gen7_preempt.c b/adreno_gen7_preempt.c index e52e7278a1..a2fdcf0c8f 100644 --- a/adreno_gen7_preempt.c +++ b/adreno_gen7_preempt.c @@ -58,7 +58,7 @@ static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer, if (!atomic) { /* If WPTR update fails, set the fault and trigger recovery */ if (ret) { - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } @@ -378,7 +378,7 @@ void gen7_preemption_trigger(struct adreno_device *adreno_dev, bool atomic) err: /* If fenced write fails, take inline snapshot and trigger recovery */ if (!in_interrupt()) { - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } else { diff --git a/adreno_gen7_ringbuffer.c b/adreno_gen7_ringbuffer.c index ec1a447fa3..951d2abdd6 100644 --- a/adreno_gen7_ringbuffer.c +++ b/adreno_gen7_ringbuffer.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -194,7 +194,7 @@ int gen7_ringbuffer_submit(struct adreno_ringbuffer *rb, * If WPTR update fails, take inline snapshot and trigger * recovery. */ - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index fba3e66bfa..11ceb6c425 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -292,7 +292,7 @@ int gen8_gmu_device_start(struct adreno_device *adreno_dev) if (gmu_core_timed_poll_check(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, BIT(8), 100, GENMASK(8, 0))) { dev_err(&gmu->pdev->dev, "GMU failed to come out of reset\n"); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_DEVICE_START); return -ETIMEDOUT; } @@ -313,7 +313,7 @@ int gen8_gmu_hfi_start(struct adreno_device *adreno_dev) if (gmu_core_timed_poll_check(device, GEN8_GMUCX_HFI_CTRL_STATUS, BIT(0), 100, BIT(0))) { dev_err(&gmu->pdev->dev, "GMU HFI init failed\n"); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_HFI_INIT); return -ETIMEDOUT; } @@ -549,9 +549,9 @@ int gen8_gmu_oob_set(struct kgsl_device *device, 100, check)) { if (req == oob_perfcntr) gmu->num_oob_perfcntr--; - gmu_core_fault_snapshot(device); - ret = -ETIMEDOUT; WARN(1, "OOB request %s timed out\n", oob_to_str(req)); + ret = -ETIMEDOUT; + gmu_core_fault_snapshot(device, GMU_FAULT_OOB_SET); trigger_reset_recovery(adreno_dev, req); } @@ -778,7 +778,7 @@ int gen8_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) } WARN_ON(1); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_WAIT_FOR_LOWEST_IDLE); return -ETIMEDOUT; } @@ -801,7 +801,7 @@ int gen8_gmu_wait_for_idle(struct adreno_device *adreno_dev) "GMU not idling: status2=0x%x %llx %llx\n", status2, ts1, gpudev->read_alwayson(adreno_dev)); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_WAIT_FOR_IDLE); return -ETIMEDOUT; } @@ -1582,9 +1582,12 @@ static u32 gen8_gmu_ifpc_isenabled(struct kgsl_device *device) } /* Send an NMI to the GMU */ -void gen8_gmu_send_nmi(struct kgsl_device *device, bool force) +void gen8_gmu_send_nmi(struct kgsl_device *device, bool force, + enum gmu_fault_panic_policy gf_policy) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u64 ticks = gpudev->read_alwayson(adreno_dev); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); u32 result; @@ -1595,7 +1598,7 @@ void gen8_gmu_send_nmi(struct kgsl_device *device, bool force) if (gen8_gmu_gx_is_on(adreno_dev) && adreno_smmu_is_stalled(adreno_dev)) { dev_err(&gmu->pdev->dev, "Skipping NMI because SMMU is stalled\n"); - return; + goto done; } if (force) @@ -1635,6 +1638,9 @@ nmi: /* Wait for the NMI to be handled */ udelay(200); + +done: + KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, gmu->pdev, ticks, gf_policy); } static void gen8_gmu_cooperative_reset(struct kgsl_device *device) @@ -1661,7 +1667,7 @@ static void gen8_gmu_cooperative_reset(struct kgsl_device *device) * If we dont get a snapshot ready from GMU, trigger NMI * and if we still timeout then we just continue with reset. */ - gen8_gmu_send_nmi(device, true); + gen8_gmu_send_nmi(device, true, GMU_FAULT_PANIC_NONE); gmu_core_regread(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, &result); if ((result & 0x800) != 0x800) @@ -1703,7 +1709,7 @@ void gen8_gmu_handle_watchdog(struct adreno_device *adreno_dev) gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK, (mask | GMU_INT_WDOG_BITE)); - gen8_gmu_send_nmi(device, false); + gen8_gmu_send_nmi(device, false, GMU_FAULT_PANIC_NONE); dev_err_ratelimited(&gmu->pdev->dev, "GMU watchdog expired interrupt received\n"); @@ -2494,6 +2500,7 @@ int gen8_gmu_probe(struct kgsl_device *device, set_bit(GMU_ENABLED, &device->gmu_core.flags); device->gmu_core.dev_ops = &gen8_gmudev; + device->gmu_core.gf_panic = GMU_FAULT_PANIC_NONE; /* Set default GMU attributes */ gmu->log_stream_enable = false; diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index 1368e883c8..12abcab923 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_GEN8_GMU_H @@ -464,9 +464,11 @@ void gen8_gmu_handle_watchdog(struct adreno_device *adreno_dev); /** * gen8_gmu_send_nmi - Send NMI to GMU * @device: Pointer to the kgsl device + * @gf_policy: GMU fault panic setting policy * @force: Boolean to forcefully send NMI irrespective of GMU state */ -void gen8_gmu_send_nmi(struct kgsl_device *device, bool force); +void gen8_gmu_send_nmi(struct kgsl_device *device, bool force, + enum gmu_fault_panic_policy gf_policy); /** * gen8_gmu_add_to_minidump - Register gen8_device with va minidump diff --git a/adreno_gen8_hfi.c b/adreno_gen8_hfi.c index 977e5382b7..14ca397360 100644 --- a/adreno_gen8_hfi.c +++ b/adreno_gen8_hfi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -254,8 +254,7 @@ int gen8_receive_ack_cmd(struct gen8_gmu_device *gmu, void *rcvd, dev_err_ratelimited(&gmu->pdev->dev, "HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n", req_hdr, ret_cmd->sent_hdr); - - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_HFI_RECIVE_ACK); return -ENODEV; } @@ -329,10 +328,10 @@ static int gen8_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT); if (rc) { - gmu_core_fault_snapshot(device); dev_err(&gmu->pdev->dev, "Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n", cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd)); + gmu_core_fault_snapshot(device, GMU_FAULT_SEND_CMD_WAIT_INLINE); return rc; } @@ -360,11 +359,11 @@ int gen8_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 s struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - gmu_core_fault_snapshot(device); dev_err(&gmu->pdev->dev, "HFI ACK failure: Req=0x%8.8X, Result=0x%8.8X\n", ret_cmd.results[1], ret_cmd.results[2]); + gmu_core_fault_snapshot(device, GMU_FAULT_HFI_SEND_GENERIC_REQ); return -EINVAL; } @@ -421,11 +420,11 @@ int gen8_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd, rc = -EINVAL; break; default: - gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev)); dev_err(&gmu->pdev->dev, "HFI ACK: Req=0x%8.8X, Result=0x%8.8X Error:0x%8.8X\n", ret_cmd->results[1], ret_cmd->results[2], ret_cmd->results[3]); rc = -EINVAL; + gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev), GMU_FAULT_HFI_SEND_GENERIC_REQ); break; } @@ -507,12 +506,18 @@ int gen8_hfi_send_set_value(struct adreno_device *adreno_dev, void adreno_gen8_receive_err_req(struct gen8_gmu_device *gmu, void *rcvd) { + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(ADRENO_DEVICE(device)); + u64 ticks = gpudev->read_alwayson(ADRENO_DEVICE(device)); struct hfi_err_cmd *cmd = rcvd; dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n", ((cmd->error_code >> 16) & 0xffff), (cmd->error_code & 0xffff), (char *) cmd->data); + + KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, + gmu->pdev, ticks, GMU_FAULT_F2H_MSG_ERR); } void adreno_gen8_receive_debug_req(struct gen8_gmu_device *gmu, void *rcvd) diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 94e50a30bb..35d819b255 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -1234,7 +1234,7 @@ static int check_inflight_hw_fences(struct adreno_device *adreno_dev) read_unlock(&device->context_lock); if (ret) - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_HW_FENCE); return ret; } @@ -1323,7 +1323,7 @@ static void check_hw_fence_unack_count(struct adreno_device *adreno_dev) dev_err(&gmu->pdev->dev, "hardware fence unack_count(%d) isn't zero before SLUMBER\n", unack_count); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_HW_FENCE); } static void hwsched_idle_check(struct work_struct *work) @@ -1356,7 +1356,7 @@ static void hwsched_idle_check(struct work_struct *work) if (!gen8_hw_isidle(adreno_dev)) { dev_err(device->dev, "GPU isn't idle before SLUMBER\n"); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); } check_hw_fence_unack_count(adreno_dev); @@ -1625,7 +1625,7 @@ void gen8_hwsched_handle_watchdog(struct adreno_device *adreno_dev) gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK, (mask | GMU_INT_WDOG_BITE)); - gen8_gmu_send_nmi(device, false); + gen8_gmu_send_nmi(device, false, GMU_FAULT_PANIC_NONE); dev_err_ratelimited(&gmu->pdev->dev, "GMU watchdog expired interrupt received\n"); diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 19ff0aa1b3..daffd1e098 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -904,7 +904,7 @@ static void gen8_process_syncobj_query_work(struct kthread_work *work) if (timestamp_cmp(cmd->sync_obj_ts, hdr->sync_obj_ts) > 0) { dev_err(&gmu->pdev->dev, "Missing sync object ctx:%d ts:%d retired:%d\n", context->id, cmd->sync_obj_ts, hdr->sync_obj_ts); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_HW_FENCE); gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } } @@ -1385,6 +1385,8 @@ static int check_ack_failure(struct adreno_device *adreno_dev, struct pending_cmd *ack) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u64 ticks = gpudev->read_alwayson(adreno_dev); if (ack->results[2] != 0xffffffff) return 0; @@ -1394,6 +1396,8 @@ static int check_ack_failure(struct adreno_device *adreno_dev, MSG_HDR_GET_ID(ack->sent_hdr), MSG_HDR_GET_SEQNUM(ack->sent_hdr)); + KGSL_GMU_CORE_FORCE_PANIC(KGSL_DEVICE(adreno_dev)->gmu_core.gf_panic, + gmu->pdev, ticks, GMU_FAULT_HFI_ACK); return -EINVAL; } @@ -1783,8 +1787,7 @@ poll: dev_err(&gmu->pdev->dev, "Timed out processing MSG_START seqnum: %d\n", seqnum); - gmu_core_fault_snapshot(device); - return rc; + goto done; } /* Clear the interrupt */ @@ -1793,8 +1796,8 @@ poll: if (gen8_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)) <= 0) { dev_err(&gmu->pdev->dev, "MSG_START: no payload\n"); - gmu_core_fault_snapshot(device); - return -EINVAL; + rc = -EINVAL; + goto done; } if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { @@ -1825,7 +1828,8 @@ poll: MSG_HDR_GET_ID(rcvd[0]), MSG_HDR_GET_TYPE(rcvd[0])); - gmu_core_fault_snapshot(device); +done: + gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); return rc; } @@ -3193,7 +3197,7 @@ static int check_detached_context_hardware_fences(struct adreno_device *adreno_d fault: move_detached_context_hardware_fences(adreno_dev, drawctxt); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_HW_FENCE); gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); return ret; @@ -3827,7 +3831,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, mutex_lock(&device->mutex); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_CTX_UNREGISTER); /* * Make sure we send all fences from this context to the TxQueue after recovery diff --git a/adreno_gen8_preempt.c b/adreno_gen8_preempt.c index 8d9432a7c2..949547a187 100644 --- a/adreno_gen8_preempt.c +++ b/adreno_gen8_preempt.c @@ -58,7 +58,7 @@ static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer, if (!atomic) { /* If WPTR update fails, set the fault and trigger recovery */ if (ret) { - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } @@ -378,7 +378,7 @@ void gen8_preemption_trigger(struct adreno_device *adreno_dev, bool atomic) err: /* If fenced write fails, take inline snapshot and trigger recovery */ if (!in_interrupt()) { - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } else { diff --git a/adreno_gen8_ringbuffer.c b/adreno_gen8_ringbuffer.c index 31adaec1df..c229719d75 100644 --- a/adreno_gen8_ringbuffer.c +++ b/adreno_gen8_ringbuffer.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -196,7 +196,7 @@ int gen8_ringbuffer_submit(struct adreno_ringbuffer *rb, * If WPTR update fails, take inline snapshot and trigger * recovery. */ - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 8d1a91ee4c..4b1e765365 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1702,7 +1702,7 @@ static void adreno_hwsched_reset_and_snapshot_legacy(struct adreno_device *adren if (cmd->error == GMU_SYNCOBJ_TIMEOUT_ERROR) { print_fault_syncobj(adreno_dev, cmd->ctxt_id, cmd->ts); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); goto done; } @@ -1730,7 +1730,7 @@ static void adreno_hwsched_reset_and_snapshot_legacy(struct adreno_device *adren if (!drawobj) { if (fault & ADRENO_GMU_FAULT) - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); else kgsl_device_snapshot(device, NULL, NULL, false); goto done; @@ -1783,7 +1783,7 @@ static void adreno_hwsched_reset_and_snapshot(struct adreno_device *adreno_dev, if (cmd->error == GMU_SYNCOBJ_TIMEOUT_ERROR) { print_fault_syncobj(adreno_dev, cmd->gc.ctxt_id, cmd->gc.ts); - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); goto done; } @@ -1816,7 +1816,7 @@ static void adreno_hwsched_reset_and_snapshot(struct adreno_device *adreno_dev, if (!obj && !obj_lpac) { if (fault & ADRENO_GMU_FAULT) - gmu_core_fault_snapshot(device); + gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); else kgsl_device_snapshot(device, NULL, NULL, false); goto done; @@ -2322,7 +2322,7 @@ int adreno_hwsched_wait_ack_completion(struct adreno_device *adreno_dev, dev_err(dev, "Ack timeout for id:%d sequence=%d ticks=%llu/%llu\n", MSG_HDR_GET_ID(ack->sent_hdr), MSG_HDR_GET_SEQNUM(ack->sent_hdr), start, end); - gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev)); + gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev), GMU_FAULT_WAIT_ACK_COMPLETION); return -ETIMEDOUT; } diff --git a/kgsl_gmu_core.c b/kgsl_gmu_core.c index f61e9afc64..e98659ae03 100644 --- a/kgsl_gmu_core.c +++ b/kgsl_gmu_core.c @@ -165,13 +165,14 @@ int gmu_core_dev_wait_for_active_transition(struct kgsl_device *device) return 0; } -void gmu_core_fault_snapshot(struct kgsl_device *device) +void gmu_core_fault_snapshot(struct kgsl_device *device, + enum gmu_fault_panic_policy gf_policy) { const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); /* Send NMI first to halt GMU and capture the state close to the point of failure */ if (ops && ops->send_nmi) - ops->send_nmi(device, false); + ops->send_nmi(device, false, gf_policy); kgsl_device_snapshot(device, NULL, NULL, true); } diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 51f8576a4f..e9ff849573 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -390,6 +390,32 @@ struct device_node; struct kgsl_device; struct kgsl_snapshot; +#define GMU_FAULT_PANIC_NONE 0 +enum gmu_fault_panic_policy { + GMU_FAULT_DEVICE_START = 1, + GMU_FAULT_HFI_INIT, + GMU_FAULT_OOB_SET, + GMU_FAULT_HFI_RECIVE_ACK, + GMU_FAULT_SEND_CMD_WAIT_INLINE, + GMU_FAULT_HFI_SEND_GENERIC_REQ, + GMU_FAULT_F2H_MSG_ERR, + GMU_FAULT_H2F_MSG_START, + GMU_FAULT_WAIT_ACK_COMPLETION, + GMU_FAULT_HFI_ACK, + GMU_FAULT_CTX_UNREGISTER, + GMU_FAULT_WAIT_FOR_LOWEST_IDLE, + GMU_FAULT_WAIT_FOR_IDLE, + GMU_FAULT_HW_FENCE, + GMU_FAULT_MAX, +}; + +#define KGSL_GMU_CORE_FORCE_PANIC(gf_panic, pdev, ticks, policy) do { \ + if (gf_panic & BIT(policy)) { \ + dev_err(&pdev->dev, "GMU always on ticks: %llx\n", ticks);\ + BUG();\ + } \ + } while (0) + struct gmu_dev_ops { int (*oob_set)(struct kgsl_device *device, enum oob_request req); void (*oob_clear)(struct kgsl_device *device, enum oob_request req); @@ -402,7 +428,8 @@ struct gmu_dev_ops { int (*bcl_sid_set)(struct kgsl_device *device, u32 sid_id, u64 sid_val); u64 (*bcl_sid_get)(struct kgsl_device *device, u32 sid_id); void (*force_first_boot)(struct kgsl_device *device); - void (*send_nmi)(struct kgsl_device *device, bool force); + void (*send_nmi)(struct kgsl_device *device, bool force, + enum gmu_fault_panic_policy gf_policy); }; /** @@ -415,6 +442,8 @@ struct gmu_core_device { void *ptr; const struct gmu_dev_ops *dev_ops; unsigned long flags; + /** @gf_panic: GMU fault panic policy */ + enum gmu_fault_panic_policy gf_panic; }; extern struct platform_driver a6xx_gmu_driver; @@ -462,10 +491,12 @@ void gmu_core_dev_cooperative_reset(struct kgsl_device *device); /** * gmu_core_fault_snapshot - Set gmu fault and trigger snapshot * @device: Pointer to the kgsl device + * @gf_policy: GMU fault panic setting policy * * Set the gmu fault and take snapshot when we hit a gmu fault */ -void gmu_core_fault_snapshot(struct kgsl_device *device); +void gmu_core_fault_snapshot(struct kgsl_device *device, + enum gmu_fault_panic_policy gf_policy); /** * gmu_core_timed_poll_check() - polling *gmu* register at given offset until From 841dba2fb9dfd817ee025ff684133c2fba41f554 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 22 Feb 2024 09:20:43 +0530 Subject: [PATCH 0706/1016] kgsl: coresight: Fix adreno coresight related compilation errors There were few coresight related changes in kernel version 6.2 and 6.4 that break adreno coresight compilation. Update code with appropriate kernel version checks to fix the compilation errors. Change-Id: I461391c9afcaf3fe979701359c9e3fbaae690569 Signed-off-by: Kamal Agrawal --- adreno_coresight.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/adreno_coresight.c b/adreno_coresight.c index 4d8e3cca97..2fd9db3d2c 100644 --- a/adreno_coresight.c +++ b/adreno_coresight.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013-2020, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -144,7 +144,7 @@ static void _adreno_coresight_set(struct adreno_device *adreno_dev, } /* Generic function to enable coresight debug bus on adreno devices */ -static int adreno_coresight_enable(struct coresight_device *csdev, +static int _adreno_coresight_enable(struct coresight_device *csdev, struct perf_event *event, u32 mode) { struct adreno_coresight_device *adreno_csdev = dev_get_drvdata(&csdev->dev); @@ -175,6 +175,20 @@ static int adreno_coresight_enable(struct coresight_device *csdev, return ret; } +#if (KERNEL_VERSION(6, 4, 0) >= LINUX_VERSION_CODE) +static int adreno_coresight_enable(struct coresight_device *csdev, + struct perf_event *event, u32 mode) +{ + return _adreno_coresight_enable(csdev, event, mode); +} +#else +static int adreno_coresight_enable(struct coresight_device *csdev, + struct perf_event *event, enum cs_mode mode) +{ + return _adreno_coresight_enable(csdev, event, mode); +} +#endif + void adreno_coresight_stop(struct adreno_device *adreno_dev) { _adreno_coresight_get_and_clear(adreno_dev, &adreno_dev->gx_coresight); @@ -187,7 +201,7 @@ void adreno_coresight_start(struct adreno_device *adreno_dev) _adreno_coresight_set(adreno_dev, &adreno_dev->cx_coresight); } -#if (KERNEL_VERSION(6, 3, 0) > LINUX_VERSION_CODE) +#if (KERNEL_VERSION(6, 2, 0) >= LINUX_VERSION_CODE) static int adreno_coresight_trace_id(struct coresight_device *csdev) { struct adreno_coresight_device *adreno_csdev = dev_get_drvdata(&csdev->dev); @@ -197,7 +211,7 @@ static int adreno_coresight_trace_id(struct coresight_device *csdev) #endif static const struct coresight_ops_source adreno_coresight_source_ops = { -#if (KERNEL_VERSION(6, 3, 0) > LINUX_VERSION_CODE) +#if (KERNEL_VERSION(6, 2, 0) >= LINUX_VERSION_CODE) .trace_id = adreno_coresight_trace_id, #endif .enable = adreno_coresight_enable, @@ -217,9 +231,13 @@ void adreno_coresight_remove(struct adreno_device *adreno_dev) coresight_unregister(adreno_dev->cx_coresight.dev); } -#if (KERNEL_VERSION(6, 1, 0) >= LINUX_VERSION_CODE) +#if (KERNEL_VERSION(6, 4, 0) >= LINUX_VERSION_CODE) static int funnel_gfx_enable(struct coresight_device *csdev, int inport, int outport) +#else +static int funnel_gfx_enable(struct coresight_device *csdev, struct coresight_connection *inport, + struct coresight_connection *outport) +#endif { struct kgsl_device *device = kgsl_get_device(0); struct adreno_device *adreno_dev = ADRENO_DEVICE(device); @@ -243,8 +261,13 @@ err: return ret; } +#if (KERNEL_VERSION(6, 4, 0) >= LINUX_VERSION_CODE) static void funnel_gfx_disable(struct coresight_device *csdev, int inport, int outport) +#else +static void funnel_gfx_disable(struct coresight_device *csdev, struct coresight_connection *inport, + struct coresight_connection *outport) +#endif { struct kgsl_device *device = kgsl_get_device(0); struct adreno_device *adreno_dev = ADRENO_DEVICE(device); @@ -267,13 +290,10 @@ err: mutex_unlock(&device->mutex); return; } -#endif struct coresight_ops_link funnel_link_gfx_ops = { -#if (KERNEL_VERSION(6, 1, 0) >= LINUX_VERSION_CODE) .enable = funnel_gfx_enable, .disable = funnel_gfx_disable, -#endif }; struct coresight_ops funnel_gfx_ops = { @@ -287,13 +307,15 @@ static void adreno_coresight_dev_probe(struct kgsl_device *device, { struct platform_device *pdev = of_find_device_by_node(node); struct coresight_desc desc; - u32 atid; + u32 atid = 0; if (!pdev) return; +#if (KERNEL_VERSION(6, 2, 0) >= LINUX_VERSION_CODE) if (of_property_read_u32(node, "coresight-atid", &atid)) return; +#endif if (of_property_read_string(node, "coresight-name", &desc.name)) return; From 09f731c83f12973875af9f41a523d2e53bc99fdb Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 5 Jan 2024 14:19:32 +0530 Subject: [PATCH 0707/1016] kgsl: Add cx_misc and isense_cntl to regmap region Currently, cx_misc register read/write/modify operations are performed through custom functions. Due to this, snapshot path requires CX misc base offset to be programmed for each generation of GPUs. As it is not programmed for gen8 GPUs, cx misc registers are not dumped properly through legacy path. Use regmap region for mapping cx_misc registers to avoid cx misc base offset programming. This also allows us to remove custom functions for cx_misc. Isense control register operations are also performed through custom APIs. Thus, map isense control block as well to regmap region. Change-Id: I0cf0396d259331a61bb2c5f86296f0f79751aae8 Signed-off-by: Kamal Agrawal --- a6xx_reg.h | 4 +- adreno.c | 112 +---------------------------------------- adreno.h | 21 -------- adreno_a6xx.c | 14 ++---- adreno_a6xx_snapshot.c | 52 ++----------------- adreno_gen7.c | 31 +++++------- adreno_gen7_snapshot.c | 10 +++- adreno_gen8.c | 35 ++++++------- adreno_gen8_gmu.c | 8 +-- adreno_gen8_snapshot.c | 10 +++- adreno_snapshot.c | 34 ------------- gen7_reg.h | 18 +++---- gen8_reg.h | 22 ++++---- kgsl_regmap.h | 4 +- 14 files changed, 83 insertions(+), 292 deletions(-) diff --git a/a6xx_reg.h b/a6xx_reg.h index 0fabecbf8a..5592128f50 100644 --- a/a6xx_reg.h +++ b/a6xx_reg.h @@ -1219,8 +1219,8 @@ #define A6XX_RGMU_CX_PCC_DEBUG 0x1F83D /* GPU CX_MISC registers */ -#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0 0x1 -#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1 0x2 +#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0 0x27801 +#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1 0x27802 #define A6XX_LLC_NUM_GPU_SCIDS 5 #define A6XX_GPU_LLC_SCID_NUM_BITS 5 #define A6XX_GPU_LLC_SCID_MASK \ diff --git a/adreno.c b/adreno.c index 042c5a45fa..a269ed9a93 100644 --- a/adreno.c +++ b/adreno.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -907,40 +906,6 @@ static int adreno_of_get_power(struct adreno_device *adreno_dev, return 0; } -static void adreno_cx_misc_probe(struct kgsl_device *device) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct resource *res; - - res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, - "cx_misc"); - - if (res == NULL) - return; - - adreno_dev->cx_misc_len = resource_size(res); - adreno_dev->cx_misc_virt = devm_ioremap(&device->pdev->dev, - res->start, adreno_dev->cx_misc_len); -} - -static void adreno_isense_probe(struct kgsl_device *device) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct resource *res; - - res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, - "isense_cntl"); - if (res == NULL) - return; - - adreno_dev->isense_base = res->start - device->regmap.base->start; - adreno_dev->isense_len = resource_size(res); - adreno_dev->isense_virt = devm_ioremap(&device->pdev->dev, res->start, - adreno_dev->isense_len); - if (adreno_dev->isense_virt == NULL) - dev_warn(device->dev, "isense ioremap failed\n"); -} - /* Read the fuse through the new and fancy nvmem method */ static int adreno_read_speed_bin(struct platform_device *pdev) { @@ -1324,9 +1289,9 @@ int adreno_device_probe(struct platform_device *pdev, kgsl_regmap_add_region(&device->regmap, pdev, "cx_dbgc", NULL, NULL); /* Probe for the optional CX_MISC block */ - adreno_cx_misc_probe(device); + kgsl_regmap_add_region(&device->regmap, pdev, "cx_misc", NULL, NULL); - adreno_isense_probe(device); + kgsl_regmap_add_region(&device->regmap, pdev, "isense_cntl", NULL, NULL); /* Allocate the memstore for storing timestamps and other useful info */ @@ -2708,47 +2673,6 @@ int adreno_suspend_context(struct kgsl_device *device) return adreno_idle(device); } -void adreno_cx_misc_regread(struct adreno_device *adreno_dev, - unsigned int offsetwords, unsigned int *value) -{ - unsigned int cx_misc_offset; - - WARN_ONCE(!adreno_dev->cx_misc_virt, - "cx_misc region is not defined in device tree"); - - cx_misc_offset = (offsetwords << 2); - if (!adreno_dev->cx_misc_virt || - (cx_misc_offset >= adreno_dev->cx_misc_len)) - return; - - *value = __raw_readl(adreno_dev->cx_misc_virt + cx_misc_offset); - - /* - * ensure this read finishes before the next one. - * i.e. act like normal readl() - */ - rmb(); -} - -void adreno_isense_regread(struct adreno_device *adreno_dev, - unsigned int offsetwords, unsigned int *value) -{ - unsigned int isense_offset; - - isense_offset = (offsetwords << 2); - if (!adreno_dev->isense_virt || - (isense_offset >= adreno_dev->isense_len)) - return; - - *value = __raw_readl(adreno_dev->isense_virt + isense_offset); - - /* - * ensure this read finishes before the next one. - * i.e. act like normal readl() - */ - rmb(); -} - bool adreno_gx_is_on(struct adreno_device *adreno_dev) { const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); @@ -2756,38 +2680,6 @@ bool adreno_gx_is_on(struct adreno_device *adreno_dev) return gpudev->gx_is_on(adreno_dev); } -void adreno_cx_misc_regwrite(struct adreno_device *adreno_dev, - unsigned int offsetwords, unsigned int value) -{ - unsigned int cx_misc_offset; - - WARN_ONCE(!adreno_dev->cx_misc_virt, - "cx_misc region is not defined in device tree"); - - cx_misc_offset = (offsetwords << 2); - if (!adreno_dev->cx_misc_virt || - (cx_misc_offset >= adreno_dev->cx_misc_len)) - return; - - /* - * ensure previous writes post before this one, - * i.e. act like normal writel() - */ - wmb(); - __raw_writel(value, adreno_dev->cx_misc_virt + cx_misc_offset); -} - -void adreno_cx_misc_regrmw(struct adreno_device *adreno_dev, - unsigned int offsetwords, - unsigned int mask, unsigned int bits) -{ - unsigned int val = 0; - - adreno_cx_misc_regread(adreno_dev, offsetwords, &val); - val &= ~mask; - adreno_cx_misc_regwrite(adreno_dev, offsetwords, val | bits); -} - void adreno_profile_submit_time(struct adreno_submit_time *time) { struct kgsl_drawobj *drawobj; diff --git a/adreno.h b/adreno.h index e697fa2743..bb3a7008e3 100644 --- a/adreno.h +++ b/adreno.h @@ -518,11 +518,6 @@ struct adreno_dispatch_ops { * @dev: Reference to struct kgsl_device * @priv: Holds the private flags specific to the adreno_device * @chipid: Chip ID specific to the GPU - * @cx_misc_len: Length of the CX MISC register block - * @cx_misc_virt: Pointer where the CX MISC block is mapped - * @isense_base: Base physical address of isense block - * @isense_len: Length of the isense register block - * @isense_virt: Pointer where isense block is mapped * @gpucore: Pointer to the adreno_gpu_core structure * @pfp_fw: Buffer which holds the pfp ucode * @pfp_fw_size: Size of pfp ucode buffer @@ -592,11 +587,6 @@ struct adreno_device { unsigned int chipid; /** @uche_gmem_base: Base address of GMEM for UCHE access */ u64 uche_gmem_base; - unsigned int cx_misc_len; - void __iomem *cx_misc_virt; - unsigned long isense_base; - unsigned int isense_len; - void __iomem *isense_virt; const struct adreno_gpu_core *gpucore; struct adreno_firmware fw[2]; size_t gpmu_cmds_size; @@ -737,8 +727,6 @@ struct adreno_device { u32 ifpc_hyst; /** @ifpc_hyst_floor: IFPC long hysteresis floor value */ u32 ifpc_hyst_floor; - /** @cx_misc_base: CX MISC register block base offset */ - u32 cx_misc_base; /* * @no_restore_count: Keep track of perfcounter requests that don't have * ADRENO_PERFCOUNTER_GROUP_RESTORE flag set @@ -1086,15 +1074,6 @@ long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv, long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data); -void adreno_cx_misc_regread(struct adreno_device *adreno_dev, - unsigned int offsetwords, unsigned int *value); -void adreno_cx_misc_regwrite(struct adreno_device *adreno_dev, - unsigned int offsetwords, unsigned int value); -void adreno_cx_misc_regrmw(struct adreno_device *adreno_dev, - unsigned int offsetwords, - unsigned int mask, unsigned int bits); -void adreno_isense_regread(struct adreno_device *adreno_dev, - unsigned int offsetwords, unsigned int *value); bool adreno_gx_is_on(struct adreno_device *adreno_dev); u64 adreno_read_cx_timer(struct adreno_device *adreno_dev); diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 6e118c15b3..8103354c6e 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -1572,9 +1572,8 @@ static void a6xx_llc_configure_gpu_scid(struct adreno_device *adreno_dev) kgsl_regrmw(device, A6XX_GBIF_SCACHE_CNTL1, A6XX_GPU_LLC_SCID_MASK, gpu_cntl1_val); else - adreno_cx_misc_regrmw(adreno_dev, - A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1, - A6XX_GPU_LLC_SCID_MASK, gpu_cntl1_val); + kgsl_regrmw(device, A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1, + A6XX_GPU_LLC_SCID_MASK, gpu_cntl1_val); /* * On A660, the SCID programming for UCHE traffic is done in @@ -1614,10 +1613,8 @@ static void a6xx_llc_configure_gpuhtw_scid(struct adreno_device *adreno_dev) gpuhtw_scid = llcc_get_slice_id(adreno_dev->gpuhtw_llc_slice); - adreno_cx_misc_regrmw(adreno_dev, - A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1, - A6XX_GPUHTW_LLC_SCID_MASK, - gpuhtw_scid << A6XX_GPUHTW_LLC_SCID_SHIFT); + kgsl_regrmw(device, A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1, + A6XX_GPUHTW_LLC_SCID_MASK, gpuhtw_scid << A6XX_GPUHTW_LLC_SCID_SHIFT); } /* @@ -1642,8 +1639,7 @@ static void a6xx_llc_enable_overrides(struct adreno_device *adreno_dev) * writenoallocoverrideen=1 * write-no-alloc=1 - Do not allocates lines on write miss */ - adreno_cx_misc_regwrite(adreno_dev, - A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0, 0x3); + kgsl_regwrite(device, A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0, 0x3); } static const char *uche_client[7][3] = { diff --git a/adreno_a6xx_snapshot.c b/adreno_a6xx_snapshot.c index b5ca76686a..1a8e2d0bb1 100644 --- a/adreno_a6xx_snapshot.c +++ b/adreno_a6xx_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -1638,49 +1638,6 @@ static void _a6xx_do_crashdump(struct kgsl_device *device) crash_dump_valid = true; } -static size_t a6xx_snapshot_isense_registers(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv) -{ - struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; - struct kgsl_snapshot_registers *regs = priv; - unsigned int *data = (unsigned int *)(buf + sizeof(*header)); - int count = 0, j, k; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - - /* Figure out how many registers we are going to dump */ - - for (j = 0; j < regs->count; j++) { - int start = regs->regs[j * 2]; - int end = regs->regs[j * 2 + 1]; - - count += (end - start + 1); - } - - if (remain < (count * 8) + sizeof(*header)) { - SNAPSHOT_ERR_NOMEM(device, "ISENSE REGISTERS"); - return 0; - } - - for (j = 0; j < regs->count; j++) { - unsigned int start = regs->regs[j * 2]; - unsigned int end = regs->regs[j * 2 + 1]; - - for (k = start; k <= end; k++) { - unsigned int val; - - adreno_isense_regread(adreno_dev, - k - (adreno_dev->isense_base >> 2), &val); - *data++ = k; - *data++ = val; - } - } - - header->count = count; - - /* Return the size of the section */ - return (count * 8) + sizeof(*header); -} - /* Snapshot the preemption related buffers */ static size_t snapshot_preemption_record(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) @@ -1775,15 +1732,16 @@ void a6xx_snapshot(struct adreno_device *adreno_dev, */ a6xx_snapshot_debugbus(adreno_dev, snapshot); - /* RSCC registers are on cx */ - if (adreno_is_a650_family(adreno_dev)) { + /* Isense registers are on cx */ + if (adreno_is_a650_family(adreno_dev) && + kgsl_regmap_valid_offset(&device->regmap, a650_isense_registers[0])) { struct kgsl_snapshot_registers r; r.regs = a650_isense_registers; r.count = ARRAY_SIZE(a650_isense_registers) / 2; kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, - snapshot, a6xx_snapshot_isense_registers, &r); + snapshot, kgsl_snapshot_dump_registers, &r); } if (!gmu_core_isenabled(device)) { diff --git a/adreno_gen7.c b/adreno_gen7.c index 920569c636..80c3737dc6 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -348,6 +348,7 @@ int gen7_init(struct adreno_device *adreno_dev) #define CX_TIMER_INIT_SAMPLES 16 void gen7_cx_timer_init(struct adreno_device *adreno_dev) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u64 seed_val, tmr, skew = 0; int i; unsigned long flags; @@ -369,12 +370,8 @@ void gen7_cx_timer_init(struct adreno_device *adreno_dev) tmr2 = arch_timer_read_counter(); /* Write to the register and time it */ - adreno_cx_misc_regwrite(adreno_dev, - GEN7_GPU_CX_MISC_AO_COUNTER_LO, - lower_32_bits(tmr2)); - adreno_cx_misc_regwrite(adreno_dev, - GEN7_GPU_CX_MISC_AO_COUNTER_HI, - upper_32_bits(tmr2)); + kgsl_regwrite(device, GEN7_GPU_CX_MISC_AO_COUNTER_LO, lower_32_bits(tmr2)); + kgsl_regwrite(device, GEN7_GPU_CX_MISC_AO_COUNTER_HI, upper_32_bits(tmr2)); /* Barrier to make sure the write completes before timing it */ mb(); @@ -395,10 +392,8 @@ void gen7_cx_timer_init(struct adreno_device *adreno_dev) seed_val = tmr + skew; /* Seed the GPU CX counter with the adjusted timer */ - adreno_cx_misc_regwrite(adreno_dev, - GEN7_GPU_CX_MISC_AO_COUNTER_LO, lower_32_bits(seed_val)); - adreno_cx_misc_regwrite(adreno_dev, - GEN7_GPU_CX_MISC_AO_COUNTER_HI, upper_32_bits(seed_val)); + kgsl_regwrite(device, GEN7_GPU_CX_MISC_AO_COUNTER_LO, lower_32_bits(seed_val)); + kgsl_regwrite(device, GEN7_GPU_CX_MISC_AO_COUNTER_HI, upper_32_bits(seed_val)); local_irq_restore(flags); @@ -407,6 +402,7 @@ void gen7_cx_timer_init(struct adreno_device *adreno_dev) void gen7_get_gpu_feature_info(struct adreno_device *adreno_dev) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 feature_fuse = 0; /* Only Gen7_9_x has the HW feature information */ @@ -414,8 +410,7 @@ void gen7_get_gpu_feature_info(struct adreno_device *adreno_dev) return; /* Get HW feature soft fuse value */ - adreno_cx_misc_regread(adreno_dev, GEN7_GPU_CX_MISC_SW_FUSE_VALUE, - &feature_fuse); + kgsl_regread(device, GEN7_GPU_CX_MISC_SW_FUSE_VALUE, &feature_fuse); adreno_dev->fastblend_enabled = feature_fuse & BIT(GEN7_FASTBLEND_SW_FUSE); adreno_dev->raytracing_enabled = feature_fuse & BIT(GEN7_RAYTRACING_SW_FUSE); @@ -664,6 +659,7 @@ static u64 gen7_get_uche_trap_base(void) void gen7_enable_ahb_timeout_detection(struct adreno_device *adreno_dev) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 val; if (!adreno_dev->ahb_timeout_val) @@ -671,11 +667,11 @@ void gen7_enable_ahb_timeout_detection(struct adreno_device *adreno_dev) val = (ADRENO_AHB_CNTL_DEFAULT | FIELD_PREP(GENMASK(4, 0), adreno_dev->ahb_timeout_val)); - adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_AON_CNTL, val); - adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_GMU_CNTL, val); - adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_CP_CNTL, val); - adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL, val); - adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_HOST_CNTL, val); + kgsl_regwrite(device, GEN7_GPU_CX_MISC_CX_AHB_AON_CNTL, val); + kgsl_regwrite(device, GEN7_GPU_CX_MISC_CX_AHB_GMU_CNTL, val); + kgsl_regwrite(device, GEN7_GPU_CX_MISC_CX_AHB_CP_CNTL, val); + kgsl_regwrite(device, GEN7_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL, val); + kgsl_regwrite(device, GEN7_GPU_CX_MISC_CX_AHB_HOST_CNTL, val); } int gen7_start(struct adreno_device *adreno_dev) @@ -1658,7 +1654,6 @@ int gen7_probe_common(struct platform_device *pdev, adreno_dev->gpucore = gpucore; adreno_dev->chipid = chipid; - adreno_dev->cx_misc_base = GEN7_CX_MISC_BASE; adreno_reg_offset_init(gpudev->reg_offsets); diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 40f3eb8ea9..56b5690570 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -1562,9 +1562,15 @@ static void gen7_cx_misc_regs_snapshot(struct kgsl_device *device, } done: + regs_ptr = (const u32 *)gen7_snapshot_block_list->cx_misc_regs; + + if (!kgsl_regmap_valid_offset(&device->regmap, regs_ptr[0])) { + WARN_ONCE(1, "cx_misc registers are not defined in device tree"); + return; + } + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, - snapshot, adreno_snapshot_cx_misc_registers, - (void *)gen7_snapshot_block_list->cx_misc_regs); + snapshot, adreno_snapshot_registers_v2, (void *)regs_ptr); } static void gen7_snapshot_br_roq(struct kgsl_device *device, diff --git a/adreno_gen8.c b/adreno_gen8.c index f95bbf8590..97d5324fb5 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -489,6 +489,7 @@ int gen8_init(struct adreno_device *adreno_dev) #define CX_TIMER_INIT_SAMPLES 16 void gen8_cx_timer_init(struct adreno_device *adreno_dev) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u64 seed_val, tmr, skew = 0; int i; unsigned long flags; @@ -509,12 +510,8 @@ void gen8_cx_timer_init(struct adreno_device *adreno_dev) tmr2 = arch_timer_read_counter(); /* Write to the register and time it */ - adreno_cx_misc_regwrite(adreno_dev, - GEN8_GPU_CX_MISC_AO_COUNTER_LO, - lower_32_bits(tmr2)); - adreno_cx_misc_regwrite(adreno_dev, - GEN8_GPU_CX_MISC_AO_COUNTER_HI, - upper_32_bits(tmr2)); + kgsl_regwrite(device, GEN8_GPU_CX_MISC_AO_COUNTER_LO, lower_32_bits(tmr2)); + kgsl_regwrite(device, GEN8_GPU_CX_MISC_AO_COUNTER_HI, upper_32_bits(tmr2)); /* Barrier to make sure the write completes before timing it */ mb(); @@ -535,10 +532,8 @@ void gen8_cx_timer_init(struct adreno_device *adreno_dev) seed_val = tmr + skew; /* Seed the GPU CX counter with the adjusted timer */ - adreno_cx_misc_regwrite(adreno_dev, - GEN8_GPU_CX_MISC_AO_COUNTER_LO, lower_32_bits(seed_val)); - adreno_cx_misc_regwrite(adreno_dev, - GEN8_GPU_CX_MISC_AO_COUNTER_HI, upper_32_bits(seed_val)); + kgsl_regwrite(device, GEN8_GPU_CX_MISC_AO_COUNTER_LO, lower_32_bits(seed_val)); + kgsl_regwrite(device, GEN8_GPU_CX_MISC_AO_COUNTER_HI, upper_32_bits(seed_val)); local_irq_restore(flags); @@ -547,11 +542,11 @@ void gen8_cx_timer_init(struct adreno_device *adreno_dev) void gen8_get_gpu_feature_info(struct adreno_device *adreno_dev) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 feature_fuse = 0; /* Get HW feature soft fuse value */ - adreno_cx_misc_regread(adreno_dev, GEN8_GPU_CX_MISC_SW_FUSE_VALUE, - &feature_fuse); + kgsl_regread(device, GEN8_GPU_CX_MISC_SW_FUSE_VALUE, &feature_fuse); adreno_dev->fastblend_enabled = feature_fuse & BIT(GEN8_FASTBLEND_SW_FUSE); adreno_dev->raytracing_enabled = feature_fuse & BIT(GEN8_RAYTRACING_SW_FUSE); @@ -1003,6 +998,7 @@ static const struct kgsl_regmap_list gen8_0_0_bicubic_regs[] = { void gen8_enable_ahb_timeout_detection(struct adreno_device *adreno_dev) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 val; if (!adreno_dev->ahb_timeout_val) @@ -1010,11 +1006,11 @@ void gen8_enable_ahb_timeout_detection(struct adreno_device *adreno_dev) val = (ADRENO_AHB_CNTL_DEFAULT | FIELD_PREP(GENMASK(4, 0), adreno_dev->ahb_timeout_val)); - adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_AON_CNTL, val); - adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_GMU_CNTL, val); - adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_CP_CNTL, val); - adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL, val); - adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_HOST_CNTL, val); + kgsl_regwrite(device, GEN8_GPU_CX_MISC_CX_AHB_AON_CNTL, val); + kgsl_regwrite(device, GEN8_GPU_CX_MISC_CX_AHB_GMU_CNTL, val); + kgsl_regwrite(device, GEN8_GPU_CX_MISC_CX_AHB_CP_CNTL, val); + kgsl_regwrite(device, GEN8_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL, val); + kgsl_regwrite(device, GEN8_GPU_CX_MISC_CX_AHB_HOST_CNTL, val); } #define MIN_HBB 13 @@ -2083,11 +2079,10 @@ done: static irqreturn_t gen8_cx_host_irq_handler(int irq, void *data) { struct kgsl_device *device = data; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); u32 status; - adreno_cx_misc_regread(adreno_dev, GEN8_GPU_CX_MISC_INT_0_STATUS, &status); - adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_CLEAR_CMD, status); + kgsl_regread(device, GEN8_GPU_CX_MISC_INT_0_STATUS, &status); + kgsl_regwrite(device, GEN8_GPU_CX_MISC_INT_CLEAR_CMD, status); if (status & BIT(GEN8_CX_MISC_GPU_CC_IRQ)) KGSL_PWRCTRL_LOG_FREQLIM(device); diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 11ceb6c425..f271731dbe 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -604,8 +604,8 @@ void gen8_gmu_irq_enable(struct adreno_device *adreno_dev) return; /* Clear pending IRQs, unmask needed interrupts and enable CX host IRQ */ - adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_CLEAR_CMD, UINT_MAX); - adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_0_MASK, GEN8_CX_MISC_INT_MASK); + kgsl_regwrite(device, GEN8_GPU_CX_MISC_INT_CLEAR_CMD, UINT_MAX); + kgsl_regwrite(device, GEN8_GPU_CX_MISC_INT_0_MASK, GEN8_CX_MISC_INT_MASK); enable_irq(device->cx_host_irq_num); } @@ -631,8 +631,8 @@ void gen8_gmu_irq_disable(struct adreno_device *adreno_dev) /* Disable CX host IRQ, mask all interrupts and clear pending IRQs */ disable_irq(device->cx_host_irq_num); - adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_0_MASK, UINT_MAX); - adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_CLEAR_CMD, UINT_MAX); + kgsl_regwrite(device, GEN8_GPU_CX_MISC_INT_0_MASK, UINT_MAX); + kgsl_regwrite(device, GEN8_GPU_CX_MISC_INT_CLEAR_CMD, UINT_MAX); } static int gen8_gmu_hfi_start_msg(struct adreno_device *adreno_dev) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 5d44d74486..e21f8a47bc 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -1587,9 +1587,15 @@ static void gen8_cx_misc_regs_snapshot(struct kgsl_device *device, } legacy_snapshot: + regs_ptr = (const u32 *)gen8_snapshot_block_list->cx_misc_regs; + + if (!kgsl_regmap_valid_offset(&device->regmap, regs_ptr[0])) { + WARN_ONCE(1, "cx_misc registers are not defined in device tree"); + return; + } + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, - snapshot, adreno_snapshot_cx_misc_registers, - (void *)gen8_snapshot_block_list->cx_misc_regs); + snapshot, adreno_snapshot_registers_v2, (void *)regs_ptr); } void gen8_snapshot_external_core_regs(struct kgsl_device *device, diff --git a/adreno_snapshot.c b/adreno_snapshot.c index e37464e4d2..e6e3f40507 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -1299,37 +1299,3 @@ size_t adreno_snapshot_registers_v2(struct kgsl_device *device, u8 *buf, return (count * 4); } -size_t adreno_snapshot_cx_misc_registers(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - const u32 *ptr = (const u32 *)priv; - u32 *data = (unsigned int *)buf; - int count = 0, k; - - /* Figure out how many registers we are going to dump */ - count = adreno_snapshot_regs_count(ptr); - - if (remain < (count * sizeof(u32))) { - SNAPSHOT_ERR_NOMEM(device, "CX_MISC REGISTERS"); - return 0; - } - - for (; ptr[0] != UINT_MAX; ptr += 2) { - int cnt = REG_COUNT(ptr); - - if (cnt == 1) - *data++ = BIT(31) | ptr[0]; - else { - *data++ = ptr[0]; - *data++ = cnt; - } - - for (k = ptr[0]; k <= ptr[1]; k++) - adreno_cx_misc_regread(adreno_dev, - k - adreno_dev->cx_misc_base, data++); - } - - /* Return the size of the section */ - return (count * sizeof(u32)); -} diff --git a/gen7_reg.h b/gen7_reg.h index c77d96baf3..deaa2e2ec9 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -1300,16 +1300,14 @@ #define GEN7_SMMU_BASE 0x28000 /* GPU CX_MISC registers */ -#define GEN7_CX_MISC_BASE 0x27800 -#define GEN7_GPU_CX_MISC_CX_AHB_AON_CNTL 0x10 -#define GEN7_GPU_CX_MISC_CX_AHB_GMU_CNTL 0x11 -#define GEN7_GPU_CX_MISC_CX_AHB_CP_CNTL 0x12 -#define GEN7_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL 0x13 -#define GEN7_GPU_CX_MISC_CX_AHB_HOST_CNTL 0x14 -#define GEN7_GPU_CX_MISC_TCM_RET_CNTL 0x39 -#define GEN7_GPU_CX_MISC_AO_COUNTER_LO 0x80 -#define GEN7_GPU_CX_MISC_AO_COUNTER_HI 0x81 -#define GEN7_GPU_CX_MISC_SW_FUSE_VALUE 0x400 +#define GEN7_GPU_CX_MISC_CX_AHB_AON_CNTL 0x27810 +#define GEN7_GPU_CX_MISC_CX_AHB_GMU_CNTL 0x27811 +#define GEN7_GPU_CX_MISC_CX_AHB_CP_CNTL 0x27812 +#define GEN7_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL 0x27813 +#define GEN7_GPU_CX_MISC_CX_AHB_HOST_CNTL 0x27814 +#define GEN7_GPU_CX_MISC_AO_COUNTER_LO 0x27880 +#define GEN7_GPU_CX_MISC_AO_COUNTER_HI 0x27881 +#define GEN7_GPU_CX_MISC_SW_FUSE_VALUE 0x27C00 /* GPU SW Fuse Feature bit fields */ #define GEN7_FASTBLEND_SW_FUSE 0 diff --git a/gen8_reg.h b/gen8_reg.h index a41d9ebd64..fd4b45fbd1 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -1599,17 +1599,17 @@ #define GEN8_SMMU_BASE 0x28000 /* GPU CX_MISC registers */ -#define GEN8_GPU_CX_MISC_CX_AHB_AON_CNTL 0x10 -#define GEN8_GPU_CX_MISC_CX_AHB_GMU_CNTL 0x11 -#define GEN8_GPU_CX_MISC_CX_AHB_CP_CNTL 0x12 -#define GEN8_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL 0x13 -#define GEN8_GPU_CX_MISC_CX_AHB_HOST_CNTL 0x14 -#define GEN8_GPU_CX_MISC_INT_CLEAR_CMD 0x31 -#define GEN8_GPU_CX_MISC_INT_0_MASK 0x33 -#define GEN8_GPU_CX_MISC_INT_0_STATUS 0x34 -#define GEN8_GPU_CX_MISC_AO_COUNTER_LO 0x80 -#define GEN8_GPU_CX_MISC_AO_COUNTER_HI 0x81 -#define GEN8_GPU_CX_MISC_SW_FUSE_VALUE 0x400 +#define GEN8_GPU_CX_MISC_CX_AHB_AON_CNTL 0x27810 +#define GEN8_GPU_CX_MISC_CX_AHB_GMU_CNTL 0x27811 +#define GEN8_GPU_CX_MISC_CX_AHB_CP_CNTL 0x27812 +#define GEN8_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL 0x27813 +#define GEN8_GPU_CX_MISC_CX_AHB_HOST_CNTL 0x27814 +#define GEN8_GPU_CX_MISC_INT_CLEAR_CMD 0x27831 +#define GEN8_GPU_CX_MISC_INT_0_MASK 0x27833 +#define GEN8_GPU_CX_MISC_INT_0_STATUS 0x27834 +#define GEN8_GPU_CX_MISC_AO_COUNTER_LO 0x27880 +#define GEN8_GPU_CX_MISC_AO_COUNTER_HI 0x27881 +#define GEN8_GPU_CX_MISC_SW_FUSE_VALUE 0x27c00 /* GPU SW Fuse Feature bit fields */ #define GEN8_FASTBLEND_SW_FUSE 0 diff --git a/kgsl_regmap.h b/kgsl_regmap.h index 7a7b2d4d1f..efb6716a23 100644 --- a/kgsl_regmap.h +++ b/kgsl_regmap.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef KGSL_REGMAP_H @@ -49,7 +49,7 @@ struct kgsl_regmap { */ struct resource *base; /** @region: Array of regions for this regmap */ - struct kgsl_regmap_region region[4]; + struct kgsl_regmap_region region[5]; /** @count: Number of active regions in @region */ int count; }; From 8d89a74a49a8074c876e7ef6abf44ed7c91ce352 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Sun, 11 Feb 2024 17:54:57 +0530 Subject: [PATCH 0708/1016] kgsl: Fix multiple warnings and errors To improve code readability address multiple warnings and errors. Fixed coding style issues such as indentation, spacing, semicolon, etc. Change-Id: Id0f87f055ae69b92902d181ed61e0cf2fd77e6e2 Signed-off-by: Sanjay Yadav --- adreno.c | 8 +++--- adreno.h | 37 +++++++++++----------------- adreno_a5xx.h | 30 +++++++++++------------ adreno_a6xx.c | 4 +-- adreno_a6xx_coresight.c | 6 ++--- adreno_a6xx_hwsched.c | 1 - adreno_coresight.c | 5 ++-- adreno_debugfs.c | 2 +- adreno_dispatch.c | 2 +- adreno_drawctxt.h | 4 +-- adreno_gen7.c | 2 +- adreno_gen7_gmu.c | 46 +++++++++++++++++------------------ adreno_gen7_hwsched.c | 5 ++-- adreno_gen7_hwsched_hfi.c | 2 +- adreno_gen7_snapshot.c | 12 ++++----- adreno_hwsched.h | 4 +-- adreno_profile.h | 5 ++-- adreno_ringbuffer.c | 4 +-- adreno_snapshot.c | 22 ++++++++--------- governor_msm_adreno_tz.c | 4 +-- include/uapi/linux/msm_kgsl.h | 4 +-- kgsl.h | 2 +- kgsl_events.c | 4 +-- kgsl_pwrctrl.h | 2 +- kgsl_snapshot.c | 9 ++++--- kgsl_snapshot.h | 4 +-- kgsl_util.h | 6 ++--- 27 files changed, 112 insertions(+), 124 deletions(-) diff --git a/adreno.c b/adreno.c index a269ed9a93..dfe8d4d220 100644 --- a/adreno.c +++ b/adreno.c @@ -52,7 +52,7 @@ static bool adreno_preemption_enable; /* Nice level for the higher priority GPU start thread */ int adreno_wake_nice = -7; -/* Number of milliseconds to stay active active after a wake on touch */ +/* Number of milliseconds to stay active after a wake on touch */ unsigned int adreno_wake_timeout = 100; static u32 get_ucode_version(const u32 *data) @@ -135,7 +135,7 @@ static void adreno_zap_shader_unload(struct adreno_device *adreno_dev) /** * adreno_readreg64() - Read a 64bit register by getting its offset from the * offset array defined in gpudev node - * @adreno_dev: Pointer to the the adreno device + * @adreno_dev: Pointer to the adreno device * @lo: lower 32bit register enum that is to be read * @hi: higher 32bit register enum that is to be read * @val: 64 bit Register value read is placed here @@ -3141,8 +3141,6 @@ static void adreno_set_isdb_breakpoint_registers(struct kgsl_device *device) if (gpudev->set_isdb_breakpoint_registers) gpudev->set_isdb_breakpoint_registers(adreno_dev); - - return; } static void adreno_drawctxt_sched(struct kgsl_device *device, @@ -3692,6 +3690,6 @@ module_exit(kgsl_3d_exit); MODULE_DESCRIPTION("3D Graphics driver"); MODULE_LICENSE("GPL v2"); MODULE_SOFTDEP("pre: arm_smmu nvmem_qfprom socinfo governor_msm_adreno_tz governor_gpubw_mon"); -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0)) +#if (KERNEL_VERSION(5, 18, 0) <= LINUX_VERSION_CODE) MODULE_IMPORT_NS(DMA_BUF); #endif diff --git a/adreno.h b/adreno.h index bb3a7008e3..8ca4428329 100644 --- a/adreno.h +++ b/adreno.h @@ -519,14 +519,6 @@ struct adreno_dispatch_ops { * @priv: Holds the private flags specific to the adreno_device * @chipid: Chip ID specific to the GPU * @gpucore: Pointer to the adreno_gpu_core structure - * @pfp_fw: Buffer which holds the pfp ucode - * @pfp_fw_size: Size of pfp ucode buffer - * @pfp_fw_version: Version of pfp ucode - * @pfp: Memory descriptor which holds pfp ucode buffer info - * @pm4_fw: Buffer which holds the pm4 ucode - * @pm4_fw_size: Size of pm4 ucode buffer - * @pm4_fw_version: Version of pm4 ucode - * @pm4: Memory descriptor which holds pm4 ucode buffer info * @gpmu_cmds_size: Length of gpmu cmd stream * @gpmu_cmds: gpmu cmd stream * @ringbuffers: Array of pointers to adreno_ringbuffers @@ -570,8 +562,7 @@ struct adreno_dispatch_ops { * @lm_threshold_cross: number of current peaks exceeding threshold * @ifpc_count: Number of times the GPU went into IFPC * @highest_bank_bit: Value of the highest bank bit - * @csdev: Pointer to a coresight device (if applicable) - * @gpmu_throttle_counters - counteers for number of throttled clocks + * @gpmu_throttle_counters - counters for number of throttled clocks * @irq_storm_work: Worker to handle possible interrupt storms * @active_list: List to track active contexts * @active_list_lock: Lock to protect active_list @@ -1278,8 +1269,8 @@ static inline int adreno_is_gen7_2_x_family(struct adreno_device *adreno_dev) /* * adreno_checkreg_off() - Checks the validity of a register enum - * @adreno_dev: Pointer to adreno device - * @offset_name: The register enum that is checked + * @adreno_dev: Pointer to adreno device + * @offset_name: The register enum that is checked */ static inline bool adreno_checkreg_off(struct adreno_device *adreno_dev, enum adreno_regs offset_name) @@ -1307,9 +1298,9 @@ static inline bool adreno_checkreg_off(struct adreno_device *adreno_dev, /* * adreno_readreg() - Read a register by getting its offset from the * offset array defined in gpudev node - * @adreno_dev: Pointer to the the adreno device - * @offset_name: The register enum that is to be read - * @val: Register value read is placed here + * @adreno_dev: Pointer to the adreno device + * @offset_name: The register enum that is to be read + * @val: Register value read is placed here */ static inline void adreno_readreg(struct adreno_device *adreno_dev, enum adreno_regs offset_name, unsigned int *val) @@ -1326,9 +1317,9 @@ static inline void adreno_readreg(struct adreno_device *adreno_dev, /* * adreno_writereg() - Write a register by getting its offset from the * offset array defined in gpudev node - * @adreno_dev: Pointer to the the adreno device - * @offset_name: The register enum that is to be written - * @val: Value to write + * @adreno_dev: Pointer to the adreno device + * @offset_name: The register enum that is to be written + * @val: Value to write */ static inline void adreno_writereg(struct adreno_device *adreno_dev, enum adreno_regs offset_name, unsigned int val) @@ -1343,8 +1334,8 @@ static inline void adreno_writereg(struct adreno_device *adreno_dev, /* * adreno_getreg() - Returns the offset value of a register from the * register offset array in the gpudev node - * @adreno_dev: Pointer to the the adreno device - * @offset_name: The register enum whore offset is returned + * @adreno_dev: Pointer to the adreno device + * @offset_name: The register enum whore offset is returned */ static inline unsigned int adreno_getreg(struct adreno_device *adreno_dev, enum adreno_regs offset_name) @@ -1359,9 +1350,9 @@ static inline unsigned int adreno_getreg(struct adreno_device *adreno_dev, /* * adreno_write_gmureg() - Write a GMU register by getting its offset from the * offset array defined in gpudev node - * @adreno_dev: Pointer to the the adreno device - * @offset_name: The register enum that is to be written - * @val: Value to write + * @adreno_dev: Pointer to the adreno device + * @offset_name: The register enum that is to be written + * @val: Value to write */ static inline void adreno_write_gmureg(struct adreno_device *adreno_dev, enum adreno_regs offset_name, unsigned int val) diff --git a/adreno_a5xx.h b/adreno_a5xx.h index 61b46c7bee..dd8f701f0a 100644 --- a/adreno_a5xx.h +++ b/adreno_a5xx.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2015-2017,2019-2020 The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022,2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ADRENO_A5XX_H_ @@ -265,20 +265,20 @@ extern const struct adreno_perfcounters adreno_a5xx_perfcounters; int a5xx_ringbuffer_init(struct adreno_device *adreno_dev); /** -* a5xx_ringbuffer_addcmds - Submit a command to the ringbuffer -* @adreno_dev: An Adreno GPU handle -* @rb: Pointer to the ringbuffer to submit on -* @drawctxt: Pointer to the draw context for the submission, or NULL for -* internal submissions -* @flags: Flags for the submission -* @in: Commands to write to the ringbuffer -* @dwords: Size of @in (in dwords) -* @timestamp: Timestamp for the submission -* @time: Optional pointer to a submit time structure -* -* Submit a command to the ringbuffer. -* Return: 0 on success or negative on failure -*/ + * a5xx_ringbuffer_addcmds - Submit a command to the ringbuffer + * @adreno_dev: An Adreno GPU handle + * @rb: Pointer to the ringbuffer to submit on + * @drawctxt: Pointer to the draw context for the submission, or NULL for + * internal submissions + * @flags: Flags for the submission + * @in: Commands to write to the ringbuffer + * @dwords: Size of @in (in dwords) + * @timestamp: Timestamp for the submission + * @time: Optional pointer to a submit time structure + * + * Submit a command to the ringbuffer. + * Return: 0 on success or negative on failure + */ int a5xx_ringbuffer_addcmds(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, u32 flags, u32 *in, u32 dwords, u32 timestamp, diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 8103354c6e..3c744931c1 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -415,7 +415,7 @@ struct a6xx_reglist_list { }; #define REGLIST(_a) \ - (struct a6xx_reglist_list) { .regs = _a, .count = ARRAY_SIZE(_a), } + ((struct a6xx_reglist_list) { .regs = _a, .count = ARRAY_SIZE(_a), }) static void a6xx_patch_pwrup_reglist(struct adreno_device *adreno_dev) { @@ -2068,7 +2068,7 @@ int a6xx_perfcounter_update(struct adreno_device *adreno_dev, */ data[offset] = reg->select; data[offset + 1] = reg->countable; - data[offset + 2] = A6XX_RBBM_PERFCTR_CNTL, + data[offset + 2] = A6XX_RBBM_PERFCTR_CNTL; data[offset + 3] = 1; lock->list_length += 2; diff --git a/adreno_a6xx_coresight.c b/adreno_a6xx_coresight.c index 9c8e48f698..0d4de695cc 100644 --- a/adreno_a6xx_coresight.c +++ b/adreno_a6xx_coresight.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only /* -* Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. -* Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. -*/ + * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ #include "adreno.h" #include "adreno_a6xx.h" diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 75893892ee..dfe6bb3c6e 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -201,7 +201,6 @@ static void adreno_hwsched_snapshot_rb_payload(struct adreno_device *adreno_dev, err: snprintf(str, sizeof(str), "RB addr:0x%llx", gpuaddr); SNAPSHOT_ERR_NOMEM(device, str); - return; } static bool parse_payload_rb_legacy(struct adreno_device *adreno_dev, diff --git a/adreno_coresight.c b/adreno_coresight.c index 2fd9db3d2c..6740b44107 100644 --- a/adreno_coresight.c +++ b/adreno_coresight.c @@ -288,7 +288,6 @@ static void funnel_gfx_disable(struct coresight_device *csdev, struct coresight_ adreno_active_count_put(adreno_dev); err: mutex_unlock(&device->mutex); - return; } struct coresight_ops_link funnel_link_gfx_ops = { @@ -354,8 +353,8 @@ void adreno_coresight_add_device(struct adreno_device *adreno_dev, const char *n return; /* Set the funnel ops as graphics ops to bring GPU up before enabling funnel */ - if (funnel_gfx !=NULL && funnel_gfx->funnel_csdev != NULL - && funnel_gfx->funnel_csdev->ops == NULL) + if ((funnel_gfx != NULL) && (funnel_gfx->funnel_csdev != NULL) + && (funnel_gfx->funnel_csdev->ops == NULL)) funnel_gfx->funnel_csdev->ops = &funnel_gfx_ops; adreno_coresight_dev_probe(device, coresight, adreno_csdev, node); diff --git a/adreno_debugfs.c b/adreno_debugfs.c index f7cd9ffef4..1046fe9296 100644 --- a/adreno_debugfs.c +++ b/adreno_debugfs.c @@ -290,7 +290,7 @@ static void drawobj_print(struct seq_file *s, cmdobj_print(s, CMDOBJ(drawobj)); seq_puts(s, " flags: "); - print_flags(s, drawobj->flags, KGSL_DRAWOBJ_FLAGS), + print_flags(s, drawobj->flags, KGSL_DRAWOBJ_FLAGS); kgsl_drawobj_put(drawobj); seq_puts(s, "\n"); } diff --git a/adreno_dispatch.c b/adreno_dispatch.c index 1e4bc9231c..887d29e927 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -337,7 +337,7 @@ static struct kgsl_drawobj *_process_drawqueue_get_next_drawobj( * @cmdobj: Pointer to the KGSL command object to requeue * * Failure to submit a command to the ringbuffer isn't the fault of the command - * being submitted so if a failure happens, push it back on the head of the the + * being submitted so if a failure happens, push it back on the head of the * context queue to be reconsidered again unless the context got detached. */ static inline int adreno_dispatcher_requeue_cmdobj( diff --git a/adreno_drawctxt.h b/adreno_drawctxt.h index 3ff571b806..3dcbb68fd9 100644 --- a/adreno_drawctxt.h +++ b/adreno_drawctxt.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_DRAWCTXT_H #define __ADRENO_DRAWCTXT_H @@ -191,7 +191,7 @@ adreno_drawctxt_get_pagetable(struct adreno_context *drawctxt) /** * adreno_drawctxt_set_guilty - Mark a context as guilty and invalidate it * @device: Pointer to a GPU device handle - * @context: Poniter to the context to invalidate + * @context: Pointer to the context to invalidate * * Mark the specified context as guilty and invalidate it */ diff --git a/adreno_gen7.c b/adreno_gen7.c index 80c3737dc6..68df5f2b15 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1781,7 +1781,7 @@ int gen7_perfcounter_remove(struct adreno_device *adreno_dev, /* Look for the perfcounter to remove in the list */ for (i = 0; i < lock->dynamic_list_len - 1; i++) { - if ((data[offset + 1] == reg->select) && (data[offset] == pipe) ) { + if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { remove_counter = true; break; } diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index bd07aa8f89..cea158c30b 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1294,7 +1294,7 @@ void gen7_free_gmu_block(struct gen7_gmu_device *gmu, struct kgsl_memdesc *md) * Do not remove the vma node if we failed to unmap the entire buffer. This is because the * iommu driver considers remapping an already mapped iova as fatal. */ - if (md->size != iommu_unmap(gmu->domain, md->gmuaddr, md->size)) + if (md->size != iommu_unmap(gmu->domain, md->gmuaddr, md->size)) goto free; spin_lock(&vma->lock); @@ -2151,20 +2151,20 @@ static int gen7_bcl_sid_set(struct kgsl_device *device, u32 sid_id, u64 sid_val) return -EINVAL; switch (sid_id) { - case 0: - adreno_dev->bcl_data &= ~BCL_SID0_MASK; - bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID0_MASK, val); - break; - case 1: - adreno_dev->bcl_data &= ~BCL_SID1_MASK; - bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID1_MASK, val); - break; - case 2: - adreno_dev->bcl_data &= ~BCL_SID2_MASK; - bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID2_MASK, val); - break; - default: - return -EINVAL; + case 0: + adreno_dev->bcl_data &= ~BCL_SID0_MASK; + bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID0_MASK, val); + break; + case 1: + adreno_dev->bcl_data &= ~BCL_SID1_MASK; + bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID1_MASK, val); + break; + case 2: + adreno_dev->bcl_data &= ~BCL_SID2_MASK; + bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID2_MASK, val); + break; + default: + return -EINVAL; } return adreno_power_cycle_u32(adreno_dev, &adreno_dev->bcl_data, bcl_data); @@ -2179,14 +2179,14 @@ static u64 gen7_bcl_sid_get(struct kgsl_device *device, u32 sid_id) return 0; switch (sid_id) { - case 0: - return ((u64) FIELD_GET(BCL_SID0_MASK, adreno_dev->bcl_data)); - case 1: - return ((u64) FIELD_GET(BCL_SID1_MASK, adreno_dev->bcl_data)); - case 2: - return ((u64) FIELD_GET(BCL_SID2_MASK, adreno_dev->bcl_data)); - default: - return 0; + case 0: + return ((u64) FIELD_GET(BCL_SID0_MASK, adreno_dev->bcl_data)); + case 1: + return ((u64) FIELD_GET(BCL_SID1_MASK, adreno_dev->bcl_data)); + case 2: + return ((u64) FIELD_GET(BCL_SID2_MASK, adreno_dev->bcl_data)); + default: + return 0; } } diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 05e791fa6c..d588095319 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -246,7 +246,6 @@ static void adreno_hwsched_snapshot_rb_payload(struct adreno_device *adreno_dev, err: snprintf(str, sizeof(str), "RB addr:0x%llx", gpuaddr); SNAPSHOT_ERR_NOMEM(device, str); - return; } static bool parse_payload_rb_legacy(struct adreno_device *adreno_dev, @@ -1527,7 +1526,7 @@ static int gen7_hwsched_pm_suspend(struct adreno_device *adreno_dev) /* Halt any new submissions */ reinit_completion(&device->halt_gate); - /** + /* * Wait for the dispatcher to retire everything by waiting * for the active count to go to zero. */ @@ -1655,7 +1654,7 @@ static int process_inflight_hw_fences_after_reset(struct adreno_device *adreno_d struct list_head hw_fence_list; struct adreno_hw_fence_entry *entry, *tmp; - /** + /* * Since we need to wait for ack from GMU when sending each inflight fence back to GMU, we * cannot send them from within atomic context. Hence, walk list of such hardware fences * for each context and add it to this local list and then walk this list to send all these diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 84cb2a08e5..7f3bbfae7e 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1524,7 +1524,7 @@ int gen7_hwsched_hfi_init(struct adreno_device *adreno_dev) if (IS_ERR_OR_NULL(hw_hfi->f2h_task)) { hw_hfi->f2h_task = kthread_run(hfi_f2h_main, adreno_dev, "gmu_f2h"); - if(!IS_ERR(hw_hfi->f2h_task)) + if (!IS_ERR(hw_hfi->f2h_task)) sched_set_fifo(hw_hfi->f2h_task); } diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 56b5690570..27a21aa5dd 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -413,7 +413,7 @@ static size_t gen7_snapshot_trace_buffer_gfx_trace(struct kgsl_device *device, struct kgsl_snapshot_trace_buffer *header = (struct kgsl_snapshot_trace_buffer *) buf; u32 *data = (u32 *)(buf + sizeof(*header)); - struct gen7_trace_buffer_info* info = + struct gen7_trace_buffer_info *info = (struct gen7_trace_buffer_info *) priv; if (remain < SZ_2K + sizeof(*header)) { @@ -445,8 +445,8 @@ static size_t gen7_snapshot_trace_buffer_gfx_trace(struct kgsl_device *device, } /* Number of times the circular buffer has wrapped around */ - wrap_count = FIELD_GET(GENMASK(31,12), status); - write_ptr = FIELD_GET(GENMASK(8,0), status); + wrap_count = FIELD_GET(GENMASK(31, 12), status); + write_ptr = FIELD_GET(GENMASK(8, 0), status); /* Read partial buffer starting from 0 */ if (!wrap_count) { @@ -474,7 +474,7 @@ static size_t gen7_snapshot_trace_buffer_etb(struct kgsl_device *device, u32 read_ptr, count, write_ptr, val, idx = 0; struct kgsl_snapshot_trace_buffer *header = (struct kgsl_snapshot_trace_buffer *) buf; u32 *data = (u32 *)(buf + sizeof(*header)); - struct gen7_trace_buffer_info* info = (struct gen7_trace_buffer_info *) priv; + struct gen7_trace_buffer_info *info = (struct gen7_trace_buffer_info *) priv; /* Unlock ETB buffer */ qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_LAR, 0xC5ACCE55); @@ -607,8 +607,8 @@ static void gen7_snapshot_trace_buffer(struct kgsl_device *device, kgsl_regread(device, GEN7_CX_DBGC_CFG_DBGBUS_CNTLT, &val); } - info.granularity = FIELD_GET(GENMASK(14,12), val); - info.segment = FIELD_GET(GENMASK(31,28), val); + info.granularity = FIELD_GET(GENMASK(14, 12), val); + info.segment = FIELD_GET(GENMASK(31, 28), val); val_tmc_ctrl = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_CTRL); diff --git a/adreno_hwsched.h b/adreno_hwsched.h index a63e563549..b061df1321 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -137,11 +137,11 @@ void adreno_hwsched_trigger(struct adreno_device *adreno_dev); */ void adreno_hwsched_start(struct adreno_device *adreno_dev); /** - * adreno_hwsched_dispatcher_init() - Initialize the hwsched dispatcher + * adreno_hwsched_init() - Initialize the hwsched * @adreno_dev: pointer to the adreno device * @hwsched_ops: Pointer to target specific hwsched ops * - * Set up the dispatcher resources. + * Set up the hwsched resources. * Return: 0 on success or negative on failure. */ int adreno_hwsched_init(struct adreno_device *adreno_dev, diff --git a/adreno_profile.h b/adreno_profile.h index 1408d91a9b..1fc7d5ff44 100644 --- a/adreno_profile.h +++ b/adreno_profile.h @@ -1,14 +1,15 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2013-2014,2019-2021 The Linux Foundation. All rights reserved. + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_PROFILE_H #define __ADRENO_PROFILE_H /** * struct adreno_profile_assigns_list: linked list for assigned perf counters - * @list: linkage for nodes in list - * @name: group name or GPU name name + * @list: linkage for nodes in list + * @name: group name or GPU name * @groupid: group id * @countable: countable assigned to perfcounter * @offset: perfcounter register address offset diff --git a/adreno_ringbuffer.c b/adreno_ringbuffer.c index a502393d0e..f9ac132202 100644 --- a/adreno_ringbuffer.c +++ b/adreno_ringbuffer.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -351,7 +351,7 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, /** * adreno_ringbuffer_wait_callback() - Callback function for event registered * on a ringbuffer timestamp - * @device: Device for which the the callback is valid + * @device: Device for which the callback is valid * @context: The context of the event * @priv: The private parameter of the event * @result: Result of the event trigger diff --git a/adreno_snapshot.c b/adreno_snapshot.c index e6e3f40507..16bae92bb2 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -1056,17 +1056,17 @@ static void adreno_static_ib_dump(struct kgsl_device *device, return; /* Make sure that the last IB1 that was being executed is dumped. - * Since this was the last IB1 that was processed, we should have - * already added it to the list during the ringbuffer parse but we - * want to be double plus sure. - * The problem is that IB size from the register is the unprocessed size - * of the buffer not the original size, so if we didn't catch this - * buffer being directly used in the RB, then we might not be able to - * dump the whole thing. Try to dump the maximum possible size from the - * IB1 base address till the end of memdesc size so that we dont miss - * what we are interested in. Print a warning message so we can try to - * figure how often this really happens. - */ + * Since this was the last IB1 that was processed, we should have + * already added it to the list during the ringbuffer parse but we + * want to be double plus sure. + * The problem is that IB size from the register is the unprocessed size + * of the buffer not the original size, so if we didn't catch this + * buffer being directly used in the RB, then we might not be able to + * dump the whole thing. Try to dump the maximum possible size from the + * IB1 base address till the end of memdesc size so that we dont miss + * what we are interested in. Print a warning message so we can try to + * figure how often this really happens. + */ if (ib1base && (-ENOENT == find_object(ib1base, process))) { struct kgsl_mem_entry *entry; diff --git a/governor_msm_adreno_tz.c b/governor_msm_adreno_tz.c index 2c4f9f07df..4f52ae0ecd 100644 --- a/governor_msm_adreno_tz.c +++ b/governor_msm_adreno_tz.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include @@ -89,7 +89,7 @@ static ssize_t gpu_load_show(struct device *dev, /* * Average out the samples taken since last read * This will keep the average value in sync with - * with the client sampling duration. + * the client sampling duration. */ spin_lock(&sample_lock); if (acc_total) diff --git a/include/uapi/linux/msm_kgsl.h b/include/uapi/linux/msm_kgsl.h index 66251cfb0b..4d93db001b 100644 --- a/include/uapi/linux/msm_kgsl.h +++ b/include/uapi/linux/msm_kgsl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _UAPI_MSM_KGSL_H @@ -1393,7 +1393,7 @@ struct kgsl_gpu_event_timestamp { }; /** - * struct kgsl_gpu_event_fence - Specifies a fence ID to to free a GPU object on + * struct kgsl_gpu_event_fence - Specifies a fence ID to free a GPU object on * @fd: File descriptor for the fence */ struct kgsl_gpu_event_fence { diff --git a/kgsl.h b/kgsl.h index 335f50e6d1..adf762c5ad 100644 --- a/kgsl.h +++ b/kgsl.h @@ -377,7 +377,7 @@ typedef void (*kgsl_event_func)(struct kgsl_device *, struct kgsl_event_group *, * @device: Pointer to the KGSL device that owns the event * @context: Pointer to the context that owns the event * @timestamp: Timestamp for the event to expire - * @func: Callback function for for the event when it expires + * @func: Callback function for the event when it expires * @priv: Private data passed to the callback function * @node: List node for the kgsl_event_group list * @created: Jiffies when the event was created diff --git a/kgsl_events.c b/kgsl_events.c index c536ee0496..2d505a83b4 100644 --- a/kgsl_events.c +++ b/kgsl_events.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2011-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -80,7 +80,7 @@ static void _process_event_group(struct kgsl_device *device, context = group->context; /* - * Sanity check to be sure that we we aren't racing with the context + * Sanity check to be sure that we aren't racing with the context * getting destroyed */ if (WARN_ON(context != NULL && !_kgsl_context_get(context))) diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 29d7b346b8..aa112ff599 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -56,7 +56,7 @@ struct kgsl_pwr_constraint { }; /** - * struct kgsl_pwrlevel - Struct holding different pwrlevel info obtained from + * struct kgsl_pwrlevel - Struct holding different pwrlevel info obtained * from dtsi file * @gpu_freq: GPU frequency vote in Hz * @bus_freq: Bus bandwidth vote index diff --git a/kgsl_snapshot.c b/kgsl_snapshot.c index dd04963d3f..7c426543c4 100644 --- a/kgsl_snapshot.c +++ b/kgsl_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -101,7 +101,7 @@ static void kgsl_snapshot_put_object(struct kgsl_snapshot_object *obj) /** * kgsl_snapshot_have_object() - return 1 if the object has been processed * @snapshot: the snapshot data - * @process: The process that owns the the object to freeze + * @process: The process that owns the object to freeze * @gpuaddr: The gpu address of the object to freeze * @size: the size of the object (may not always be the size of the region) * @@ -617,16 +617,17 @@ static void kgsl_device_snapshot_atomic(struct kgsl_device *device) } /** - * kgsl_snapshot() - construct a device snapshot + * kgsl_device_snapshot() - construct a device snapshot * @device: device to snapshot * @context: the context that is hung, might be NULL if unknown. + * @context_lpac: the lpac context that is hung, might be NULL if unknown. * @gmu_fault: whether this snapshot is triggered by a GMU fault. * * Given a device, construct a binary snapshot dump of the current device state * and store it in the device snapshot memory. */ void kgsl_device_snapshot(struct kgsl_device *device, - struct kgsl_context *context, struct kgsl_context *context_lpac, + struct kgsl_context *context, struct kgsl_context *context_lpac, bool gmu_fault) { struct kgsl_snapshot *snapshot; diff --git a/kgsl_snapshot.h b/kgsl_snapshot.h index 52679de924..52807bfc57 100644 --- a/kgsl_snapshot.h +++ b/kgsl_snapshot.h @@ -124,7 +124,7 @@ struct kgsl_snapshot_mem_list_v2 { /* Indirect buffer sub-section header (v2) */ struct kgsl_snapshot_ib_v2 { - __u64 gpuaddr; /* GPU address of the the IB */ + __u64 gpuaddr; /* GPU address of the IB */ __u64 ptbase; /* Base for the pagetable the GPU address is valid in */ __u64 size; /* Size of the IB */ } __packed; @@ -294,7 +294,7 @@ struct kgsl_snapshot_trace_buffer { struct kgsl_snapshot_gpu_object_v2 { int type; /* Type of GPU object */ - __u64 gpuaddr; /* GPU address of the the object */ + __u64 gpuaddr; /* GPU address of the object */ __u64 ptbase; /* Base for the pagetable the GPU address is valid in */ __u64 size; /* Size of the object (in dwords) */ } __packed; diff --git a/kgsl_util.h b/kgsl_util.h index 74b622e27e..66f73eb577 100644 --- a/kgsl_util.h +++ b/kgsl_util.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _KGSL_UTIL_H_ @@ -236,9 +236,9 @@ static inline void kgsl_remove_from_minidump(char *name, u64 virt_addr, u64 phy_ } static inline int kgsl_add_va_to_minidump(struct device *dev, const char *name, void *ptr, - size_t size) + size_t size) { - return 0; + return 0; } static inline void kgsl_qcom_va_md_register(struct kgsl_device *device) From 9b2814d9e51be628c2b5ec5c9e9665ce16eee0a3 Mon Sep 17 00:00:00 2001 From: Deepak Kumar Date: Wed, 21 Feb 2024 23:12:15 +0530 Subject: [PATCH 0709/1016] kgsl: Deactivate GPU and GPUHTW llcc slices during recovery Currently, GPU and GPUHTW llcc slices are not deactivated during power off in recovery sequence but get activated in power on sequence. This results in extra enable refcount on these slices and they don't get deactivated ever. Deactivate these slices during power off in recovery sequence to ensure refcount is balanced and these slices get deactivated when gpu is in power off state. Change-Id: I29c661a629d3251af6b3496eb9f8ee92d2060466 Signed-off-by: Deepak Kumar Signed-off-by: Sanjay Yadav --- adreno.c | 7 +------ adreno.h | 14 ++++++++++++++ adreno_a6xx_gmu.c | 9 +++------ adreno_a6xx_hwsched.c | 9 +++------ adreno_a6xx_rgmu.c | 9 +++------ adreno_gen7_gmu.c | 9 +++------ adreno_gen7_hwsched.c | 9 +++------ adreno_gen8_gmu.c | 9 +++------ adreno_gen8_hwsched.c | 9 +++------ 9 files changed, 36 insertions(+), 48 deletions(-) diff --git a/adreno.c b/adreno.c index 042c5a45fa..58fcb188fa 100644 --- a/adreno.c +++ b/adreno.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -2056,11 +2055,7 @@ static int adreno_stop(struct kgsl_device *device) adreno_ringbuffer_stop(adreno_dev); - if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpu_llc_slice); - - if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + adreno_llcc_slice_deactivate(adreno_dev); adreno_set_active_ctxs_null(adreno_dev); diff --git a/adreno.h b/adreno.h index e697fa2743..f9c383f444 100644 --- a/adreno.h +++ b/adreno.h @@ -8,6 +8,7 @@ #include #include +#include #include "adreno_coresight.h" #include "adreno_dispatch.h" #include "adreno_drawctxt.h" @@ -2004,4 +2005,17 @@ bool adreno_smmu_is_stalled(struct adreno_device *adreno_dev); * Return - AHB timeout value to be programmed in AHB CNTL registers */ u32 adreno_get_ahb_timeout_val(struct adreno_device *adreno_dev, u32 noc_timeout_us); + +/** + * adreno_llcc_slice_deactivate - Deactivate GPU and GPUHTW llcc slices + * @adreno_dev: Adreno device handle + */ +static inline void adreno_llcc_slice_deactivate(struct adreno_device *adreno_dev) +{ + if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpu_llc_slice); + + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); +} #endif /*__ADRENO_H */ diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 35681ad46f..9d0fabd0c7 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -3410,11 +3409,7 @@ no_gx_power: adreno_ringbuffer_stop(adreno_dev); - if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpu_llc_slice); - - if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + adreno_llcc_slice_deactivate(adreno_dev); clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); @@ -3708,6 +3703,8 @@ int a6xx_gmu_reset(struct adreno_device *adreno_dev) a6xx_reset_preempt_records(adreno_dev); + adreno_llcc_slice_deactivate(adreno_dev); + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); /* Attempt to reboot the gmu and gpu */ diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 771ca83839..1f719ef322 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -7,7 +7,6 @@ #include #include #include -#include #include "adreno.h" #include "adreno_a6xx.h" @@ -889,11 +888,7 @@ no_gx_power: adreno_hwsched_unregister_contexts(adreno_dev); - if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpu_llc_slice); - - if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + adreno_llcc_slice_deactivate(adreno_dev); clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); @@ -1245,6 +1240,8 @@ int a6xx_hwsched_reset_replay(struct adreno_device *adreno_dev) a6xx_gmu_suspend(adreno_dev); + adreno_llcc_slice_deactivate(adreno_dev); + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); ret = a6xx_hwsched_boot(adreno_dev); diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 469f92b35d..636ac9b575 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -12,7 +12,6 @@ #include #include #include -#include #include "adreno.h" #include "adreno_a6xx.h" @@ -1085,11 +1084,7 @@ no_gx_power: adreno_ringbuffer_stop(adreno_dev); - if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpu_llc_slice); - - if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + adreno_llcc_slice_deactivate(adreno_dev); clear_bit(RGMU_PRIV_GPU_STARTED, &rgmu->flags); @@ -1113,6 +1108,8 @@ int a6xx_rgmu_reset(struct adreno_device *adreno_dev) a6xx_reset_preempt_records(adreno_dev); + adreno_llcc_slice_deactivate(adreno_dev); + clear_bit(RGMU_PRIV_GPU_STARTED, &rgmu->flags); /* Attempt rebooting the rgmu and gpu */ diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 14bfccd691..ab6a9eef0a 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -3059,11 +3058,7 @@ no_gx_power: adreno_ringbuffer_stop(adreno_dev); - if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpu_llc_slice); - - if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + adreno_llcc_slice_deactivate(adreno_dev); clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); @@ -3359,6 +3354,8 @@ int gen7_gmu_reset(struct adreno_device *adreno_dev) gen7_reset_preempt_records(adreno_dev); + adreno_llcc_slice_deactivate(adreno_dev); + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); /* Attempt to reboot the gmu and gpu */ diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 7f087c1690..b5be6e0fa9 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -7,7 +7,6 @@ #include #include #include -#include #include "adreno.h" #include "adreno_gen7.h" @@ -1238,11 +1237,7 @@ no_gx_power: adreno_hwsched_unregister_contexts(adreno_dev); - if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpu_llc_slice); - - if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + adreno_llcc_slice_deactivate(adreno_dev); clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); @@ -1791,6 +1786,8 @@ int gen7_hwsched_reset_replay(struct adreno_device *adreno_dev) adreno_hwsched_unregister_contexts(adreno_dev); + adreno_llcc_slice_deactivate(adreno_dev); + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); spin_lock(&hfi->hw_fence.lock); diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index fba3e66bfa..96a680370f 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -2903,11 +2902,7 @@ no_gx_power: adreno_ringbuffer_stop(adreno_dev); - if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpu_llc_slice); - - if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + adreno_llcc_slice_deactivate(adreno_dev); clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); @@ -3201,6 +3196,8 @@ int gen8_gmu_reset(struct adreno_device *adreno_dev) gen8_reset_preempt_records(adreno_dev); + adreno_llcc_slice_deactivate(adreno_dev); + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); /* Attempt to reboot the gmu and gpu */ diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 94e50a30bb..babfac5ecc 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -7,7 +7,6 @@ #include #include #include -#include #include "adreno.h" #include "adreno_gen8.h" @@ -1281,11 +1280,7 @@ no_gx_power: adreno_hwsched_unregister_contexts(adreno_dev); - if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpu_llc_slice); - - if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) - llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + adreno_llcc_slice_deactivate(adreno_dev); clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); @@ -1832,6 +1827,8 @@ int gen8_hwsched_reset_replay(struct adreno_device *adreno_dev) adreno_hwsched_unregister_contexts(adreno_dev); + adreno_llcc_slice_deactivate(adreno_dev); + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); spin_lock(&hfi->hw_fence.lock); From 79df41556aba2f47bcd4a31937d06ad3b7d8f0b5 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Thu, 1 Feb 2024 16:19:13 -0800 Subject: [PATCH 0710/1016] kgsl: gen8: Enable L0 preemption for gen8_0_0 Enable L0 preemption for gen8_0_0 GPU. Change-Id: Ie1b628b46f7db6d0b8a6b62376263d0d7201a88e Signed-off-by: Urvashi Agrawal --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index b0d6e03ea5..ca3d569db7 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2712,7 +2712,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_BCL | - ADRENO_IFPC | ADRENO_HW_FENCE, + ADRENO_IFPC | ADRENO_HW_FENCE | ADRENO_PREEMPTION, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, From 13f9763e562f329743f6ab639cbb0f4986993091 Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Mon, 18 Dec 2023 16:28:01 -0700 Subject: [PATCH 0711/1016] kgsl: gen8: Enable ACD on gen8_0_0 GPU Adaptive Clock Distribution feature helps mitigate peak current and voltage droops. Change-Id: I0209f542e1cdb10c2e398c50f9eef0e07211233a Signed-off-by: Carter Cooper --- adreno-gpulist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index ca3d569db7..09a5c99de1 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2712,7 +2712,8 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_BCL | - ADRENO_IFPC | ADRENO_HW_FENCE | ADRENO_PREEMPTION, + ADRENO_IFPC | ADRENO_HW_FENCE | ADRENO_PREEMPTION | + ADRENO_ACD, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, From e13360da9dd7189485b3fafd51e5f14c9e4d04bd Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 14 Mar 2024 19:50:23 +0530 Subject: [PATCH 0712/1016] kgsl: Ensure GPU operates within correct frequency range This reverts commit ec6148c417f6 ("kgsl: Ensure GPU operates within correct frequency range"). Some targets have frequency corner which is meant to be used for thermal requests only and should not be used during normal rendering. Currently, userspace can modify sysfs nodes which can violate this policy. Don't allow userspace to set frequency below minimum supported rendering level. Change-Id: Ic7cbcf10153aaf9c52dfb17ea98a346bfa1b95b1 Signed-off-by: Kamal Agrawal --- adreno.c | 1 + kgsl_pwrctrl.c | 5 +++-- kgsl_pwrctrl.h | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/adreno.c b/adreno.c index 042c5a45fa..e94eea76c6 100644 --- a/adreno.c +++ b/adreno.c @@ -719,6 +719,7 @@ static void adreno_of_get_initial_pwrlevels(struct kgsl_pwrctrl *pwr, if (level < 0 || level >= pwr->num_pwrlevels || level < pwr->default_pwrlevel) level = pwr->num_pwrlevels - 1; + pwr->min_render_pwrlevel = level; pwr->min_pwrlevel = level; } diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 58de4cbb65..ad29f442d4 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -414,8 +414,9 @@ static void kgsl_pwrctrl_min_pwrlevel_set(struct kgsl_device *device, struct kgsl_pwrctrl *pwr = &device->pwrctrl; mutex_lock(&device->mutex); - if (level >= pwr->num_pwrlevels) - level = pwr->num_pwrlevels - 1; + + if (level > pwr->min_render_pwrlevel) + level = pwr->min_render_pwrlevel; /* You can't set a minimum power level lower than the maximum */ if (level < pwr->max_pwrlevel) diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 29d7b346b8..43fbb55e24 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -88,6 +88,7 @@ struct kgsl_pwrlevel { * @default_pwrlevel - device wake up power level * @max_pwrlevel - maximum allowable powerlevel per the user * @min_pwrlevel - minimum allowable powerlevel per the user + * @min_render_pwrlevel - minimum allowable powerlevel for rendering * @num_pwrlevels - number of available power levels * @throttle_mask - LM throttle mask * @interval_timeout - timeout to be idle before a power event @@ -142,6 +143,7 @@ struct kgsl_pwrctrl { unsigned int default_pwrlevel; unsigned int max_pwrlevel; unsigned int min_pwrlevel; + unsigned int min_render_pwrlevel; unsigned int num_pwrlevels; unsigned int throttle_mask; u32 interval_timeout; From 443c29342c79cf7a57cc878475c3e8f1dfcd6110 Mon Sep 17 00:00:00 2001 From: Archana Sriram Date: Mon, 11 Mar 2024 17:50:00 +0530 Subject: [PATCH 0713/1016] kgsl: a6x: Update RSC and PDC sequence for GPU with GMU On A6x GPU with GMU, load PDC and RSC ucode before turning on the GFX rail. This avoids OOB timeout for oob_boot_slumber request on these targets at GMU first boot. Change-Id: I0939210b1c84948f44e55f7de4fdfc8b1957ba24 Signed-off-by: Archana Sriram --- adreno_a6xx_gmu.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 5e58de3d2f..ca88aae0a8 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -2405,6 +2405,21 @@ static int a6xx_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto err; + /* + * Load PDC and RSC ucode before turning on + * GFX rail. This avoids oob set timeout + * for oob_boot_slumber request during + * GMU first boot + */ + if (!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) { + ret = a6xx_load_pdc_ucode(adreno_dev); + if (ret) + goto err; + + a6xx_load_rsc_ucode(adreno_dev); + set_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags); + } + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_HFI_USE_REG)) { ret = a6xx_gmu_gfx_rail_on(adreno_dev); if (ret) { @@ -2419,15 +2434,6 @@ static int a6xx_gmu_first_boot(struct adreno_device *adreno_dev) goto err; } - if (!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) { - ret = a6xx_load_pdc_ucode(adreno_dev); - if (ret) - goto err; - - a6xx_load_rsc_ucode(adreno_dev); - set_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags); - } - ret = a6xx_gmu_hfi_start(adreno_dev); if (ret) goto err; From f917894b5319038a096bb6ab4936899ead6be87c Mon Sep 17 00:00:00 2001 From: Archana Sriram Date: Tue, 12 Mar 2024 16:16:13 +0530 Subject: [PATCH 0714/1016] kgsl: a6x: Update poll function for GMU register read For A6x GPU with GMU, use gmu_core_timed_poll_check() instead of poll_gmu_reg() for GMU_GMU2HOST_INTR_INFO register. This avoids timeout while waiting on ACK for command sequence. Change-Id: I6b5c94d0026a9b6d3efc64d8e80188b9dc0014a6 Signed-off-by: Archana Sriram --- adreno_a6xx_hfi.c | 56 +++-------------------------------------------- 1 file changed, 3 insertions(+), 53 deletions(-) diff --git a/adreno_a6xx_hfi.c b/adreno_a6xx_hfi.c index 7bde0a2afe..60fc30ad05 100644 --- a/adreno_a6xx_hfi.c +++ b/adreno_a6xx_hfi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -257,56 +257,6 @@ int a6xx_receive_ack_cmd(struct a6xx_gmu_device *gmu, void *rcvd, return -ENODEV; } -static int poll_gmu_reg(struct adreno_device *adreno_dev, - u32 offsetdwords, unsigned int expected_val, - unsigned int mask, unsigned int timeout_ms) -{ - unsigned int val; - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms); - u64 ao_pre_poll, ao_post_poll; - bool nmi = false; - - ao_pre_poll = a6xx_read_alwayson(adreno_dev); - - /* FIXME: readl_poll_timeout? */ - while (time_is_after_jiffies(timeout)) { - gmu_core_regread(device, offsetdwords, &val); - if ((val & mask) == expected_val) - return 0; - - /* - * If GMU firmware fails any assertion, error message is sent - * to KMD and NMI is triggered. So check if GMU is in NMI and - * timeout early. Bits [11:9] of A6XX_GMU_CM3_FW_INIT_RESULT - * contain GMU reset status. Non zero value here indicates that - * GMU reset is active, NMI handler would eventually complete - * and GMU would wait for recovery. - */ - gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &val); - if (val & 0xE00) { - nmi = true; - break; - } - - usleep_range(10, 100); - } - - ao_post_poll = a6xx_read_alwayson(adreno_dev); - - /* Check one last time */ - gmu_core_regread(device, offsetdwords, &val); - if ((val & mask) == expected_val) - return 0; - - dev_err(&gmu->pdev->dev, "kgsl hfi poll %s: always on: %lld ms\n", - nmi ? "abort" : "timeout", - div_u64((ao_post_poll - ao_pre_poll) * 52, USEC_PER_SEC)); - - return -ETIMEDOUT; -} - static int a6xx_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, void *data, u32 size_bytes, struct pending_cmd *ret_cmd) { @@ -327,8 +277,8 @@ static int a6xx_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, if (rc) return rc; - rc = poll_gmu_reg(adreno_dev, A6XX_GMU_GMU2HOST_INTR_INFO, - HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT); + rc = gmu_core_timed_poll_check(device, A6XX_GMU_GMU2HOST_INTR_INFO, + HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK); if (rc) { gmu_core_fault_snapshot(device); From 0b383fbbdee1a851edecd221669c0dedff0af408 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 16 Mar 2024 09:59:44 +0530 Subject: [PATCH 0715/1016] kgsl: gen8: Remove DBGC register programming during GPU boot During GPU boot-up, there is no need to write DBGC registers. Thus, remove them from non-context register list. Change-Id: I5a576b7b012bf471e859b268eab50edeb8224553 Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index b0d6e03ea5..2ec23920e8 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2587,12 +2587,6 @@ static const struct kgsl_regmap_list gen8_0_0_gbif_cx_regs[] = { /* GEN8_0_0 noncontext register list */ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { { GEN8_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_CNTLT, 0xf0004000, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_MASKL_0, 0x00000003, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0xffffffff, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0xffffffff, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_BYTEL_0, 0x00000008, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_BYTEL_1, 0x76543210, BIT(PIPE_NONE) }, { GEN8_GRAS_DBG_ECO_CNTL, 0x00000800, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_PC_AUTO_VERTEX_STRIDE, 0x00000001, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_PC_VIS_STREAM_CNTL, 0x10010000, BIT(PIPE_BR) | BIT(PIPE_BV) }, @@ -2743,12 +2737,6 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { /* GEN8_4_0 noncontext register list */ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { { GEN8_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_CNTLT, 0xf0004000, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_MASKL_0, 0x00000003, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0xffffffff, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0xffffffff, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_BYTEL_0, 0x00000008, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_BYTEL_1, 0x76543210, BIT(PIPE_NONE) }, { GEN8_GRAS_DBG_ECO_CNTL, 0x00000800, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_PC_AUTO_VERTEX_STRIDE, 0x00000001, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_PC_VIS_STREAM_CNTL, 0x10010000, BIT(PIPE_BR) | BIT(PIPE_BV) }, From 63f6bbb61603d9238ee499440d7af2f53b0c8ac4 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 16 Mar 2024 20:19:34 +0530 Subject: [PATCH 0716/1016] kgsl: gen8: Add memory barrier before reading SPTP registers Add a memory barrier before reading SPTP cluster registers to ensure that reads do not happen before writing the SP_READ_SEL register. Change-Id: If957e2cedb41a32de1e62b18021fd68dacddae13 Signed-off-by: Kamal Agrawal --- adreno_gen8_snapshot.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 5d44d74486..8c2372c9b7 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -746,6 +746,12 @@ static size_t gen8_legacy_snapshot_cluster_dbgahb(struct kgsl_device *device, kgsl_regwrite(device, GEN8_SP_READ_SEL, read_sel); + /* + * An explicit barrier is needed so that reads do not happen before + * the register write. + */ + mb(); + for (; ptr[0] != UINT_MAX; ptr += 2) { u32 count = REG_COUNT(ptr); From 5d043ac30c06eabd1ea97570f73137af91b1172d Mon Sep 17 00:00:00 2001 From: Vaishali Gupta Date: Sun, 17 Mar 2024 23:26:04 -0700 Subject: [PATCH 0717/1016] Revert "kgsl: Prevent wrapped around VA range allocation" This reverts commit e79a1524863aa51af201a64c4ea7ce2b4d0d2834. Signed-off-by: Vaishali Gupta Change-Id: Ibec8162636ad46b09bb0d46c9277984d5c77df7e --- kgsl.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/kgsl.c b/kgsl.c index 787d3324b3..f8bbc6b515 100644 --- a/kgsl.c +++ b/kgsl.c @@ -4104,14 +4104,6 @@ static u64 cap_alignment(struct kgsl_device *device, u64 flags) return flags | FIELD_PREP(KGSL_MEMALIGN_MASK, align); } -static u64 gpumem_max_va_size(struct kgsl_pagetable *pt, u64 flags) -{ - if (flags & KGSL_MEMFLAGS_FORCE_32BIT) - return pt->compat_va_end - pt->compat_va_start; - - return pt->va_end - pt->va_start; -} - static struct kgsl_mem_entry * gpumem_alloc_vbo_entry(struct kgsl_device_private *dev_priv, u64 size, u64 flags) @@ -4120,9 +4112,11 @@ gpumem_alloc_vbo_entry(struct kgsl_device_private *dev_priv, struct kgsl_device *device = dev_priv->device; struct kgsl_memdesc *memdesc; struct kgsl_mem_entry *entry; - struct kgsl_pagetable *pt; int ret; + if (!size) + return ERR_PTR(-EINVAL); + /* Disallow specific flags */ if (flags & (KGSL_MEMFLAGS_GPUREADONLY | KGSL_CACHEMODE_MASK)) return ERR_PTR(-EINVAL); @@ -4141,12 +4135,6 @@ gpumem_alloc_vbo_entry(struct kgsl_device_private *dev_priv, if ((flags & KGSL_MEMFLAGS_SECURE) && !check_and_warn_secured(device)) return ERR_PTR(-EOPNOTSUPP); - pt = (flags & KGSL_MEMFLAGS_SECURE) ? - device->mmu.securepagetable : private->pagetable; - - if (!size || (size > gpumem_max_va_size(pt, flags))) - return ERR_PTR(-EINVAL); - flags = cap_alignment(device, flags); entry = kgsl_mem_entry_create(); From 2d1e726963918732656aaf5d92c5f31c26a574eb Mon Sep 17 00:00:00 2001 From: Vaishali Gupta Date: Sun, 17 Mar 2024 23:26:08 -0700 Subject: [PATCH 0718/1016] Revert "msm: kgsl: Keep the timeline fence valid for logging" This reverts commit 50d53cac916ed6ed1554c85defe9c1c5ba2a41b7. Signed-off-by: Vaishali Gupta Change-Id: I576f45fceab24f873e00fb08d9cc4beffc0d6157 --- kgsl_drawobj.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/kgsl_drawobj.c b/kgsl_drawobj.c index ab98fa2170..82233b30bf 100644 --- a/kgsl_drawobj.c +++ b/kgsl_drawobj.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ /* @@ -588,8 +588,6 @@ static int drawobj_add_sync_timeline(struct kgsl_device *device, /* Set pending flag before adding callback to avoid race */ set_bit(event->id, &syncobj->pending); - /* Get a dma_fence refcount to hand over to the callback */ - dma_fence_get(event->fence); ret = dma_fence_add_callback(event->fence, &event->cb, drawobj_sync_timeline_fence_callback); @@ -604,16 +602,11 @@ static int drawobj_add_sync_timeline(struct kgsl_device *device, ret = 0; } - /* Put the refcount from fence creation */ - dma_fence_put(event->fence); kgsl_drawobj_put(drawobj); return ret; } trace_syncpoint_timeline_fence(event->syncobj, event->fence, false); - - /* Put the refcount from fence creation */ - dma_fence_put(event->fence); return 0; } From 2e8fbcd64e311ae1933dee8c23419056604d829b Mon Sep 17 00:00:00 2001 From: Vaishali Gupta Date: Sun, 17 Mar 2024 23:26:13 -0700 Subject: [PATCH 0719/1016] Revert "kgsl: hwfence: Take detached context refcount" This reverts commit 36af7cf615a62a9d48fcbed73a363ac75ff44876. Signed-off-by: Vaishali Gupta Change-Id: Ia847f77444819815fb69e621e9b405e6e110cd20 --- adreno_gen7_hwsched.c | 7 +------ adreno_gen7_hwsched_hfi.c | 4 +--- adreno_gen8_hwsched.c | 7 +------ adreno_gen8_hwsched_hfi.c | 4 +--- 4 files changed, 4 insertions(+), 18 deletions(-) diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index acf8cd29b3..da13a6078e 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1678,7 +1678,6 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d { struct adreno_hw_fence_entry *entry, *tmp; struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); - struct kgsl_context *context = NULL; int ret = 0; list_for_each_entry_safe(entry, tmp, &hfi->detached_hw_fence_list, node) { @@ -1692,11 +1691,7 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d if (ret) return ret; - context = &entry->drawctxt->base; - gen7_remove_hw_fence_entry(adreno_dev, entry); - - kgsl_context_put(context); } return ret; diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 41f380b5b6..21a0d3d074 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -3254,7 +3254,6 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; if ((timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)) { - _kgsl_context_get(&drawctxt->base); list_move_tail(&entry->node, &hfi->detached_hw_fence_list); continue; } @@ -3264,7 +3263,6 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d /* Also grab all the hardware fences which were never sent to GMU */ list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { - _kgsl_context_get(&drawctxt->base); list_move_tail(&entry->node, &hfi->detached_hw_fence_list); } } diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 795201c4a9..c6f5b35646 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1643,7 +1643,6 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d { struct adreno_hw_fence_entry *entry, *tmp; struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); - struct kgsl_context *context = NULL; int ret = 0; list_for_each_entry_safe(entry, tmp, &hfi->detached_hw_fence_list, node) { @@ -1657,11 +1656,7 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d if (ret) return ret; - context = &entry->drawctxt->base; - gen8_remove_hw_fence_entry(adreno_dev, entry); - - kgsl_context_put(context); } return ret; diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index f9d5f12b5f..1af4ea8ce5 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -3257,7 +3257,6 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; if ((timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)) { - _kgsl_context_get(&drawctxt->base); list_move_tail(&entry->node, &hfi->detached_hw_fence_list); continue; } @@ -3267,7 +3266,6 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d /* Also grab all the hardware fences which were never sent to GMU */ list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { - _kgsl_context_get(&drawctxt->base); list_move_tail(&entry->node, &hfi->detached_hw_fence_list); } } From 084710fa3afe82118e44c290278fa35222a345c3 Mon Sep 17 00:00:00 2001 From: Vaishali Gupta Date: Sun, 17 Mar 2024 23:26:17 -0700 Subject: [PATCH 0720/1016] Revert "kgsl: hwfence: Call fd_install after creating hw fence" This reverts commit af451578ba1eac59e37def344678d5bd67e01e93. Signed-off-by: Vaishali Gupta Change-Id: I69b2ba1975014974a3dd29da8d07ee3851baf431 --- kgsl_sync.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kgsl_sync.c b/kgsl_sync.c index 6810860618..b4cff282d3 100644 --- a/kgsl_sync.c +++ b/kgsl_sync.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012-2019, 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -487,12 +487,11 @@ int kgsl_add_fence_event(struct kgsl_device *device, ret = -EFAULT; goto out; } + fd_install(priv.fence_fd, kfence->sync_file->file); if (!retired) device->ftbl->create_hw_fence(device, kfence); - fd_install(priv.fence_fd, kfence->sync_file->file); - out: kgsl_context_put(context); if (ret) { From c2a247287a24bc499408f85f38166241ef2a92c5 Mon Sep 17 00:00:00 2001 From: Vaishali Gupta Date: Sun, 17 Mar 2024 23:26:22 -0700 Subject: [PATCH 0721/1016] Revert "msm: kgsl: Do not reclaim pages mapped in a VBO" This reverts commit 56b1ed9d2aacd0e1b7d32910b2becf9de63b2b1f. Signed-off-by: Vaishali Gupta Change-Id: I2bda4686730afe4826ba7172ae7d5e1b95d911fd --- kgsl.c | 3 +-- kgsl.h | 4 +--- kgsl_reclaim.c | 9 +-------- kgsl_vbo.c | 15 ++------------- 4 files changed, 5 insertions(+), 26 deletions(-) diff --git a/kgsl.c b/kgsl.c index f8bbc6b515..1431aee862 100644 --- a/kgsl.c +++ b/kgsl.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -259,7 +259,6 @@ static struct kgsl_mem_entry *kgsl_mem_entry_create(void) /* put this ref in userspace memory alloc and map ioctls */ kref_get(&entry->refcount); atomic_set(&entry->map_count, 0); - atomic_set(&entry->vbo_count, 0); } return entry; diff --git a/kgsl.h b/kgsl.h index 335f50e6d1..ad10700b32 100644 --- a/kgsl.h +++ b/kgsl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_H #define __KGSL_H @@ -362,8 +362,6 @@ struct kgsl_mem_entry { * debugfs accounting */ atomic_t map_count; - /** @vbo_count: Count how many VBO ranges this entry is mapped in */ - atomic_t vbo_count; }; struct kgsl_device_private; diff --git a/kgsl_reclaim.c b/kgsl_reclaim.c index 60823b95b2..91713b7604 100644 --- a/kgsl_reclaim.c +++ b/kgsl_reclaim.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -267,13 +267,6 @@ static u32 kgsl_reclaim_process(struct kgsl_process_private *process, continue; } - /* Do not reclaim pages mapped into a VBO */ - if (atomic_read(&valid_entry->vbo_count)) { - kgsl_mem_entry_put(entry); - next++; - continue; - } - if ((atomic_read(&process->unpinned_page_count) + memdesc->page_count) > kgsl_reclaim_max_page_limit) { kgsl_mem_entry_put(entry); diff --git a/kgsl_vbo.c b/kgsl_vbo.c index bf72c139db..a19a01b9f3 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -43,16 +43,12 @@ static struct kgsl_memdesc_bind_range *bind_range_create(u64 start, u64 last, return ERR_PTR(-EINVAL); } - atomic_inc(&entry->vbo_count); return range; } static void bind_range_destroy(struct kgsl_memdesc_bind_range *range) { - struct kgsl_mem_entry *entry = range->entry; - - atomic_dec(&entry->vbo_count); - kgsl_mem_entry_put(entry); + kgsl_mem_entry_put(range->entry); kfree(range); } @@ -365,12 +361,8 @@ static void kgsl_sharedmem_free_bind_op(struct kgsl_sharedmem_bind_op *op) if (IS_ERR_OR_NULL(op)) return; - for (i = 0; i < op->nr_ops; i++) { - /* Decrement the vbo_count we added when creating the bind_op */ - if (op->ops[i].entry) - atomic_dec(&op->ops[i].entry->vbo_count); + for (i = 0; i < op->nr_ops; i++) kgsl_mem_entry_put(op->ops[i].entry); - } kgsl_mem_entry_put(op->target); @@ -476,9 +468,6 @@ kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, goto err; } - /* Keep the child pinned in memory */ - atomic_inc(&entry->vbo_count); - /* Make sure the child is not a VBO */ if ((entry->memdesc.flags & KGSL_MEMFLAGS_VBO)) { ret = -EINVAL; From 3c7ff48eaf5c0138bcd41ec1b280d06ed8f08cf4 Mon Sep 17 00:00:00 2001 From: Vaishali Gupta Date: Sun, 17 Mar 2024 23:26:27 -0700 Subject: [PATCH 0722/1016] Revert "msm: kgsl: Do not release dma and anon buffers if unmap fails" This reverts commit 1cb0c3789d749dde487aca45abdc1b410c964d21. Signed-off-by: Vaishali Gupta Change-Id: I95e69a812479970817d0b87f36c4cfd04b0918f8 --- kgsl.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kgsl.c b/kgsl.c index 1431aee862..c142097789 100644 --- a/kgsl.c +++ b/kgsl.c @@ -330,9 +330,6 @@ static void kgsl_destroy_ion(struct kgsl_memdesc *memdesc) struct kgsl_mem_entry, memdesc); struct kgsl_dma_buf_meta *metadata = entry->priv_data; - if (memdesc->priv & KGSL_MEMDESC_MAPPED) - return; - if (metadata != NULL) { remove_dmabuf_list(metadata); #if (KERNEL_VERSION(6, 2, 0) <= LINUX_VERSION_CODE) @@ -361,9 +358,6 @@ static void kgsl_destroy_anon(struct kgsl_memdesc *memdesc) struct scatterlist *sg; struct page *page; - if (memdesc->priv & KGSL_MEMDESC_MAPPED) - return; - for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) { page = sg_page(sg); for (j = 0; j < (sg->length >> PAGE_SHIFT); j++) { From 39fa07d4e9d3a60ee750d57baf56ab63f2ca949d Mon Sep 17 00:00:00 2001 From: Vaishali Gupta Date: Sun, 17 Mar 2024 23:26:32 -0700 Subject: [PATCH 0723/1016] Revert "msm: kgsl: Put VBO child refcount if unmap succeeds" This reverts commit e859160503d9cfaddcf10cac6b6fcbc336c97bb9. Signed-off-by: Vaishali Gupta Change-Id: Ifebb4775edbd6eda9fb2eff11a2ee3063101f661 --- kgsl_vbo.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/kgsl_vbo.c b/kgsl_vbo.c index a19a01b9f3..92d3d84c94 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -46,12 +46,6 @@ static struct kgsl_memdesc_bind_range *bind_range_create(u64 start, u64 last, return range; } -static void bind_range_destroy(struct kgsl_memdesc_bind_range *range) -{ - kgsl_mem_entry_put(range->entry); - kfree(range); -} - static u64 bind_range_len(struct kgsl_memdesc_bind_range *range) { return (range->range.last - range->range.start) + 1; @@ -120,7 +114,8 @@ static void kgsl_memdesc_remove_range(struct kgsl_mem_entry *target, kgsl_mmu_map_zero_page_to_range(memdesc->pagetable, memdesc, range->range.start, bind_range_len(range)); - bind_range_destroy(range); + kgsl_mem_entry_put(range->entry); + kfree(range); } } @@ -180,7 +175,8 @@ static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, } } - bind_range_destroy(cur); + kgsl_mem_entry_put(cur->entry); + kfree(cur); continue; } @@ -257,7 +253,8 @@ static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, return ret; error: - bind_range_destroy(range); + kgsl_mem_entry_put(range->entry); + kfree(range); mutex_unlock(&memdesc->ranges_lock); return ret; } @@ -267,7 +264,6 @@ static void kgsl_sharedmem_vbo_put_gpuaddr(struct kgsl_memdesc *memdesc) struct interval_tree_node *node, *next; struct kgsl_memdesc_bind_range *range; int ret = 0; - bool unmap_fail; /* * If the VBO maps the zero range then we can unmap the entire @@ -277,8 +273,6 @@ static void kgsl_sharedmem_vbo_put_gpuaddr(struct kgsl_memdesc *memdesc) ret = kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, 0, memdesc->size); - unmap_fail = ret; - /* * FIXME: do we have a use after free potential here? We might need to * lock this and set a "do not update" bit @@ -300,16 +294,15 @@ static void kgsl_sharedmem_vbo_put_gpuaddr(struct kgsl_memdesc *memdesc) range->range.start, range->range.last - range->range.start + 1); - /* Put the child's refcount if unmap succeeds */ - if (!ret) - bind_range_destroy(range); - else - kfree(range); + /* If unmap failed, mark the child memdesc as still mapped */ + if (ret) + range->entry->memdesc.priv |= KGSL_MEMDESC_MAPPED; - unmap_fail = unmap_fail || ret; + kgsl_mem_entry_put(range->entry); + kfree(range); } - if (unmap_fail) + if (ret) return; /* Put back the GPU address */ From 3d76ab0022c606e8ed71dfa6c03d67812bd85e47 Mon Sep 17 00:00:00 2001 From: Vaishali Gupta Date: Sun, 17 Mar 2024 23:26:37 -0700 Subject: [PATCH 0724/1016] Revert "msm: kgsl: DEBUG: Add debug logs for NoC error" This reverts commit b11e1ca3c2996eb812256433a8ff8f5b1da48bbe. Signed-off-by: Vaishali Gupta Change-Id: I1c5c87278be00549b52eb6ba41e23dd9835500d7 --- adreno_gen8.c | 3 --- adreno_gen8_gmu_snapshot.c | 1 - 2 files changed, 4 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 1e4e319974..70cc3244de 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -412,9 +412,6 @@ static void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_CP_APERTURE_CNTL_HOST, aperture_val); - /* Add a barrier for memory writes to complete */ - mb(); - gen8_dev->aperture = aperture_val; } diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index cf4eb5e7ad..4d6250efb5 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -294,7 +294,6 @@ void gen8_gmu_snapshot(struct adreno_device *adreno_dev, * register in snapshot to analyze the system state even in partial * snapshot dump */ - dev_err(device->dev, "Snapshot capture started \n"); gen8_snapshot_external_core_regs(device, snapshot); gen8_gmu_device_snapshot(device, snapshot); From 6f3afc1a3bd0792bdf7e23bc1cdc652418a879c3 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 12 Mar 2024 20:47:15 +0530 Subject: [PATCH 0725/1016] kgsl: hwsched: Avoid unclocked access in interrupt handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KGSL handles few non-fatal interrupts like AHB error, TSB write error. Consider below sequence of execution: 1. Non fatal interrupt is generated by GPU 2. GPU Workload finishes 3. GMU removes its vote on GX 4. Interrupt handling in CPU - Access GPU register in interrupt handler (gen*_err_callback). This can result in device crash due to unclocked access. Fix this by reverting commit e8c1784fd392 ("kgsl: hwsched: Don’t put keepalive vote in irq_handler"). Change-Id: Id4694d13b663a1d617bd1d28f53a3e2252c38523 Signed-off-by: Kamal Agrawal --- adreno_a6xx.c | 24 +----------------------- adreno_gen7.c | 26 ++------------------------ adreno_gen8.c | 24 +----------------------- 3 files changed, 4 insertions(+), 70 deletions(-) diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 6e118c15b3..2c8c66a37d 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -1853,28 +1853,6 @@ static int a6xx_irq_poll_fence(struct adreno_device *adreno_dev) return 0; } -static irqreturn_t a6xx_hwsched_irq_handler(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - irqreturn_t ret = IRQ_NONE; - u32 status; - - if (a6xx_irq_poll_fence(adreno_dev)) { - adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); - return ret; - } - - kgsl_regread(device, A6XX_RBBM_INT_0_STATUS, &status); - - kgsl_regwrite(device, A6XX_RBBM_INT_CLEAR_CMD, status); - - ret = adreno_irq_callbacks(adreno_dev, a6xx_irq_funcs, status); - - trace_kgsl_a5xx_irq_status(adreno_dev, status); - - return ret; -} - static irqreturn_t a6xx_irq_handler(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -2361,7 +2339,7 @@ const struct a6xx_gpudev adreno_a6xx_hwsched_gpudev = { .reg_offsets = a6xx_register_offsets, .probe = a6xx_hwsched_probe, .snapshot = a6xx_hwsched_snapshot, - .irq_handler = a6xx_hwsched_irq_handler, + .irq_handler = a6xx_irq_handler, .iommu_fault_block = a6xx_iommu_fault_block, .context_detach = a6xx_hwsched_context_detach, .read_alwayson = a6xx_read_alwayson, diff --git a/adreno_gen7.c b/adreno_gen7.c index 920569c636..f76cd488b7 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1590,28 +1590,6 @@ static int gen7_irq_poll_fence(struct adreno_device *adreno_dev) return 0; } -static irqreturn_t gen7_hwsched_irq_handler(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - irqreturn_t ret = IRQ_NONE; - u32 status; - - if (gen7_irq_poll_fence(adreno_dev)) { - adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); - return ret; - } - - kgsl_regread(device, GEN7_RBBM_INT_0_STATUS, &status); - - kgsl_regwrite(device, GEN7_RBBM_INT_CLEAR_CMD, status); - - ret = adreno_irq_callbacks(adreno_dev, gen7_irq_funcs, status); - - trace_kgsl_gen7_irq_status(adreno_dev, status); - - return ret; -} - static irqreturn_t gen7_irq_handler(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -2214,7 +2192,7 @@ const struct gen7_gpudev adreno_gen7_9_0_hwsched_gpudev = { .reg_offsets = gen7_register_offsets, .probe = gen7_hwsched_probe, .snapshot = gen7_hwsched_snapshot, - .irq_handler = gen7_hwsched_irq_handler, + .irq_handler = gen7_irq_handler, .iommu_fault_block = gen7_iommu_fault_block, .context_detach = gen7_hwsched_context_detach, .read_alwayson = gen7_9_0_read_alwayson, @@ -2243,7 +2221,7 @@ const struct gen7_gpudev adreno_gen7_hwsched_gpudev = { .reg_offsets = gen7_register_offsets, .probe = gen7_hwsched_probe, .snapshot = gen7_hwsched_snapshot, - .irq_handler = gen7_hwsched_irq_handler, + .irq_handler = gen7_irq_handler, .iommu_fault_block = gen7_iommu_fault_block, .context_detach = gen7_hwsched_context_detach, .read_alwayson = gen7_read_alwayson, diff --git a/adreno_gen8.c b/adreno_gen8.c index f95bbf8590..54e07cbc85 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -2024,28 +2024,6 @@ static int gen8_irq_poll_fence(struct adreno_device *adreno_dev) return 0; } -static irqreturn_t gen8_hwsched_irq_handler(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - irqreturn_t ret = IRQ_NONE; - u32 status; - - if (gen8_irq_poll_fence(adreno_dev)) { - adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); - return ret; - } - - kgsl_regread(device, GEN8_RBBM_INT_0_STATUS, &status); - - kgsl_regwrite(device, GEN8_RBBM_INT_CLEAR_CMD, status); - - ret = adreno_irq_callbacks(adreno_dev, gen8_irq_funcs, status); - - trace_kgsl_gen8_irq_status(adreno_dev, status); - - return ret; -} - static irqreturn_t gen8_irq_handler(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -2724,7 +2702,7 @@ const struct gen8_gpudev adreno_gen8_hwsched_gpudev = { .reg_offsets = gen8_register_offsets, .probe = gen8_hwsched_probe, .snapshot = gen8_hwsched_snapshot, - .irq_handler = gen8_hwsched_irq_handler, + .irq_handler = gen8_irq_handler, .iommu_fault_block = gen8_iommu_fault_block, .context_detach = gen8_hwsched_context_detach, .read_alwayson = gen8_read_alwayson, From b193fd2e82073ac119c9e5a456b549d0a338e2be Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Wed, 6 Mar 2024 15:16:54 +0530 Subject: [PATCH 0726/1016] kgsl: Make GPU and GPUHTW llcc sysfs nodes take immediate effect Currently, GPU and GPUHTW sysfs nodes simply store value in the gpu_llc_slice_enable flag. With this change, it ensures that whenever llcc nodes change it triggers a power cycle followed by an update to the gpu_llc_slice_enable flag. Additionally, it handles cases where the gpu_llc_slice_enable flag is already set or not. In such cases, it returns early. Change-Id: I677d2a06a5b04c1f576cc44a5d0a4494bd4d9ccb Signed-off-by: Sanjay Yadav --- adreno.h | 4 ++-- adreno_sysfs.c | 18 +++++++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/adreno.h b/adreno.h index f9c383f444..d86226dc11 100644 --- a/adreno.h +++ b/adreno.h @@ -2012,10 +2012,10 @@ u32 adreno_get_ahb_timeout_val(struct adreno_device *adreno_dev, u32 noc_timeout */ static inline void adreno_llcc_slice_deactivate(struct adreno_device *adreno_dev) { - if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) + if (adreno_dev->gpu_llc_slice_enable && !IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) llcc_slice_deactivate(adreno_dev->gpu_llc_slice); - if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + if (adreno_dev->gpuhtw_llc_slice_enable && !IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); } #endif /*__ADRENO_H */ diff --git a/adreno_sysfs.c b/adreno_sysfs.c index 00d90845b0..d13ebb9e03 100644 --- a/adreno_sysfs.c +++ b/adreno_sysfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -102,9 +102,11 @@ static u32 _rt_bus_hint_show(struct adreno_device *adreno_dev) static int _gpu_llc_slice_enable_store(struct adreno_device *adreno_dev, bool val) { - if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) - adreno_dev->gpu_llc_slice_enable = val; - return 0; + if (IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice) || + (adreno_dev->gpu_llc_slice_enable == val)) + return 0; + + return adreno_power_cycle_bool(adreno_dev, &adreno_dev->gpu_llc_slice_enable, val); } static bool _gpu_llc_slice_enable_show(struct adreno_device *adreno_dev) @@ -115,9 +117,11 @@ static bool _gpu_llc_slice_enable_show(struct adreno_device *adreno_dev) static int _gpuhtw_llc_slice_enable_store(struct adreno_device *adreno_dev, bool val) { - if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) - adreno_dev->gpuhtw_llc_slice_enable = val; - return 0; + if (IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice) || + (adreno_dev->gpuhtw_llc_slice_enable == val)) + return 0; + + return adreno_power_cycle_bool(adreno_dev, &adreno_dev->gpuhtw_llc_slice_enable, val); } static bool _gpuhtw_llc_slice_enable_show(struct adreno_device *adreno_dev) From f34250a3a9aa73426dd002670ec64543e078c9c3 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 17 Mar 2024 16:45:35 +0530 Subject: [PATCH 0727/1016] kgsl: gen8: Adjust flag cache line invalidation transactions For gen8, update the number of transactions to 64 for flag cache line invalidation. Change-Id: Ic4167b4a8c2d466d4643b7bb67a4ceee2ed40c42 Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 1 + 1 file changed, 1 insertion(+) diff --git a/adreno_gen8.c b/adreno_gen8.c index f95bbf8590..4ae6595017 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1149,6 +1149,7 @@ int gen8_start(struct adreno_device *adreno_dev) kgsl_regwrite(device, GEN8_SP_NC_MODE_CNTL, FIELD_PREP(GENMASK(11, 10), hbb_hi) | + FIELD_PREP(GENMASK(5, 4), 2) | FIELD_PREP(GENMASK(3, 3), mal) | FIELD_PREP(GENMASK(2, 1), hbb_lo)); From 4485497a428a86916746b7a19bb70d3b77192597 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 20 Mar 2024 00:16:31 +0530 Subject: [PATCH 0728/1016] msm: kgsl: Hold fault lock while accessing context faults Currently, there is no lock held while accessing context faults during the fault report IOCTL. This could introduce a race which can lead to use after free. Thus, hold fault lock while accessing context faults. Change-Id: I09cbda15de3f3fc1074d5ec3e4b7b5daf211fe4a Signed-off-by: Kamal Agrawal --- kgsl.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kgsl.c b/kgsl.c index 787d3324b3..2e6cf06436 100644 --- a/kgsl.c +++ b/kgsl.c @@ -3905,6 +3905,8 @@ static int kgsl_update_fault_details(struct kgsl_context *context, memcpy(&faults[fault.type], &fault, sizeof(fault)); } + mutex_lock(&context->fault_lock); + list_for_each_entry(fault_node, &context->faults, node) { u32 fault_type = fault_node->type; @@ -3922,12 +3924,15 @@ static int kgsl_update_fault_details(struct kgsl_context *context, cur_idx[fault_type] * faults[fault_type].size), fault_node->priv, size)) { ret = -EFAULT; - goto err; + goto release_lock; } cur_idx[fault_type] += 1; } +release_lock: + mutex_unlock(&context->fault_lock); + err: kfree(faults); return ret; @@ -3941,8 +3946,10 @@ static int kgsl_update_fault_count(struct kgsl_context *context, struct kgsl_fault_node *fault_node; int i, j; + mutex_lock(&context->fault_lock); list_for_each_entry(fault_node, &context->faults, node) faultcount[fault_node->type]++; + mutex_unlock(&context->fault_lock); /* KGSL_FAULT_TYPE_NO_FAULT (i.e. 0) is not an actual fault type */ for (i = 0, j = 1; i < faultnents && j < KGSL_FAULT_TYPE_MAX; j++) { From 7367d03b37e8f84a73ed141adf1fd6428816c30b Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Thu, 22 Feb 2024 19:06:46 -0800 Subject: [PATCH 0729/1016] kgsl: Add support for KGSL_MEMTYPE_SURFACE In latest android version 15.0 onwards vulkan driver uses ioctl_kgsl_gpuobj_import() with KGSL_MEMTYPE_SURFACE. Hence add required support for this to account meminfo stats. Change-Id: I37639d0bd29e5c3861894dd4be9a0ab80ccbf57b Signed-off-by: Hareesh Gundu --- kgsl.c | 1 + kgsl_sharedmem.c | 4 ++-- kgsl_snapshot.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/kgsl.c b/kgsl.c index 787d3324b3..32bdeb16e3 100644 --- a/kgsl.c +++ b/kgsl.c @@ -3464,6 +3464,7 @@ void kgsl_get_egl_counts(struct kgsl_mem_entry *entry, switch (kgsl_memdesc_get_memtype(&scan_mem_entry->memdesc)) { case KGSL_MEMTYPE_EGL_SURFACE: + case KGSL_MEMTYPE_SURFACE: (*egl_surface_count)++; break; case KGSL_MEMTYPE_EGL_IMAGE: diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index a953d58e7e..3a6c013c77 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -259,8 +259,8 @@ imported_mem_show(struct kgsl_process_private *priv, kgsl_get_egl_counts(entry, &egl_surface_count, &egl_image_count); - if (kgsl_memdesc_get_memtype(m) == - KGSL_MEMTYPE_EGL_SURFACE) + if ((kgsl_memdesc_get_memtype(m) == KGSL_MEMTYPE_EGL_SURFACE) || + (kgsl_memdesc_get_memtype(m) == KGSL_MEMTYPE_SURFACE)) imported_mem += m->size; else if (egl_surface_count == 0) { uint64_t size = m->size; diff --git a/kgsl_snapshot.c b/kgsl_snapshot.c index dd04963d3f..5ab6e2ccd9 100644 --- a/kgsl_snapshot.c +++ b/kgsl_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -190,6 +190,7 @@ int kgsl_snapshot_get_object(struct kgsl_snapshot *snapshot, mem_type = kgsl_memdesc_get_memtype(&entry->memdesc); if (mem_type == KGSL_MEMTYPE_TEXTURE || mem_type == KGSL_MEMTYPE_EGL_SURFACE || + mem_type == KGSL_MEMTYPE_SURFACE || mem_type == KGSL_MEMTYPE_EGL_IMAGE) { ret = 0; goto err_put; From 4db2ecc19d34c89becd6ab158bb5d5c80ea77d7c Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 12 Sep 2023 12:23:19 +0530 Subject: [PATCH 0730/1016] kgsl: Skip user context record allocation for RB0 contexts User context record is used to save/restore VPC data across preemption. Highest priority ringbuffer i.e. RB0 always runs to completion without any preemption. Thus, user context records are not needed for RB0. Change-Id: Ib12b6d4a2f02302a74768f8eb1e14f7e43283034 Signed-off-by: Kamal Agrawal --- adreno_a6xx_preempt.c | 39 ++++++++++++------------------- adreno_drawctxt.c | 50 ++++++++++++++++++++++++++++++++++++++-- adreno_drawctxt.h | 23 +++++++++++++++++- adreno_gen7_preempt.c | 22 +++--------------- adreno_gen7_ringbuffer.c | 2 +- adreno_gen8_preempt.c | 20 ++-------------- adreno_gen8_ringbuffer.c | 2 +- 7 files changed, 92 insertions(+), 66 deletions(-) diff --git a/adreno_a6xx_preempt.c b/adreno_a6xx_preempt.c index 949301961b..73217508e6 100644 --- a/adreno_a6xx_preempt.c +++ b/adreno_a6xx_preempt.c @@ -493,18 +493,19 @@ u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, u32 *cmds) { - unsigned int *cmds_orig = cmds; - uint64_t gpuaddr = 0; + u32 *cmds_orig = cmds; + u64 gpuaddr = 0; if (!adreno_is_preemption_enabled(adreno_dev)) return 0; - if (drawctxt) { + if (drawctxt && drawctxt->base.user_ctxt_record) gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; + + if (gpuaddr) *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 15); - } else { + else *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); - } /* NULL SMMU_INFO buffer - we track in KMD */ *cmds++ = SET_PSEUDO_SMMU_INFO; @@ -517,7 +518,7 @@ u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, cmds += cp_gpuaddr(adreno_dev, cmds, rb->secure_preemption_desc->gpuaddr); - if (drawctxt) { + if (gpuaddr) { *cmds++ = SET_PSEUDO_NON_PRIV_SAVE_ADDR; cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); } @@ -533,27 +534,17 @@ u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, cmds += cp_gpuaddr(adreno_dev, cmds, rb->perfcounter_save_restore_desc->gpuaddr); - if (drawctxt) { - struct adreno_ringbuffer *rb = drawctxt->rb; - uint64_t dest = PREEMPT_SCRATCH_ADDR(adreno_dev, rb->id); + if (!drawctxt) + goto done; - *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2); - cmds += cp_gpuaddr(adreno_dev, cmds, dest); - *cmds++ = lower_32_bits(gpuaddr); - *cmds++ = upper_32_bits(gpuaddr); + cmds += adreno_prepare_preib_preempt_scratch(adreno_dev, drawctxt, cmds); - /* Add a KMD post amble to clear the perf counters during preemption */ - if (!adreno_dev->perfcounter) { - u64 kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev)); - *cmds++ = cp_type7_packet(CP_SET_AMBLE, 3); - *cmds++ = lower_32_bits(kmd_postamble_addr); - *cmds++ = upper_32_bits(kmd_postamble_addr); - *cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE) - | (FIELD_PREP(GENMASK(19, 0), adreno_dev->preempt.postamble_len)); - } - } + /* Add a KMD post amble to clear the perf counters during preemption */ + if (!adreno_dev->perfcounter) + cmds += adreno_prepare_preib_postamble_scratch(adreno_dev, cmds); - return (unsigned int) (cmds - cmds_orig); +done: + return (u32) (cmds - cmds_orig); } u32 a6xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev, diff --git a/adreno_drawctxt.c b/adreno_drawctxt.c index 705d0b7cef..14d8208c9b 100644 --- a/adreno_drawctxt.c +++ b/adreno_drawctxt.c @@ -7,6 +7,7 @@ #include #include "adreno.h" +#include "adreno_pm4types.h" #include "adreno_trace.h" static void wait_callback(struct kgsl_device *device, @@ -306,14 +307,59 @@ void adreno_drawctxt_set_guilty(struct kgsl_device *device, adreno_drawctxt_invalidate(device, context); } +u32 adreno_prepare_preib_preempt_scratch(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *cmds) +{ + struct adreno_ringbuffer *rb = drawctxt->rb; + u32 *cmds_orig = cmds; + u64 gpuaddr, dest; + + if (!drawctxt->base.user_ctxt_record) + return 0; + + dest = PREEMPT_SCRATCH_ADDR(adreno_dev, rb->id); + gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; + + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2); + cmds += cp_gpuaddr(adreno_dev, cmds, dest); + *cmds++ = lower_32_bits(gpuaddr); + *cmds++ = upper_32_bits(gpuaddr); + + return (u32) (cmds - cmds_orig); +} + +u32 adreno_prepare_preib_postamble_scratch(struct adreno_device *adreno_dev, u32 *cmds) +{ + u32 *cmds_orig = cmds; + u64 kmd_postamble_addr; + + if (!adreno_dev->preempt.postamble_len) + return 0; + + kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev)); + + *cmds++ = cp_type7_packet(CP_SET_AMBLE, 3); + *cmds++ = lower_32_bits(kmd_postamble_addr); + *cmds++ = upper_32_bits(kmd_postamble_addr); + *cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE) + | (FIELD_PREP(GENMASK(19, 0), adreno_dev->preempt.postamble_len)); + + return (u32) (cmds - cmds_orig); +} + static int drawctxt_preemption_init(struct kgsl_context *context) { struct kgsl_device *device = context->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); u64 flags = 0; - /* User context record is needed for a6x and beyond targets only */ - if (!adreno_preemption_feature_set(adreno_dev) || (ADRENO_GPUREV(adreno_dev) < 600)) + /* + * User context record is needed for a6x and beyond targets only. Also, + * highest priority ringbuffer i.e. RB0 always runs to completion without + * preemption. Thus, user context records are not needed for RB0. + */ + if (!adreno_preemption_feature_set(adreno_dev) || (ADRENO_GPUREV(adreno_dev) < 600) || + (adreno_get_level(context) == 0)) return 0; if (context->flags & KGSL_CONTEXT_SECURE) diff --git a/adreno_drawctxt.h b/adreno_drawctxt.h index 3ff571b806..17e7164d3a 100644 --- a/adreno_drawctxt.h +++ b/adreno_drawctxt.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_DRAWCTXT_H #define __ADRENO_DRAWCTXT_H @@ -198,6 +198,27 @@ adreno_drawctxt_get_pagetable(struct adreno_context *drawctxt) void adreno_drawctxt_set_guilty(struct kgsl_device *device, struct kgsl_context *context); +/** + * adreno_prepare_preib_preempt_scratch - Update drawctxt pointer in preemption + * scratch buffer before IB commands + * @adreno_dev: Pointer to the adreno device + * @drawctxt: Pointer to the adreno draw context + * @cmds: Pointer to the ringbuffer to insert opcodes + * + * Return: The number of dwords written to @cmds + */ +u32 adreno_prepare_preib_preempt_scratch(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *cmds); + +/** + * adreno_prepare_preib_postamble_scratch - Insert postamble packets before IB commands + * @adreno_dev: Pointer to the adreno device + * @cmds: Pointer to the ringbuffer to insert opcodes + * + * Return: The number of dwords written to @cmds + */ +u32 adreno_prepare_preib_postamble_scratch(struct adreno_device *adreno_dev, u32 *cmds); + /** * adreno_track_context - Add a context to active list and keep track of active contexts * @adreno_dev: Pointer to adreno device diff --git a/adreno_gen7_preempt.c b/adreno_gen7_preempt.c index a2fdcf0c8f..35c0348cf9 100644 --- a/adreno_gen7_preempt.c +++ b/adreno_gen7_preempt.c @@ -559,27 +559,11 @@ u32 gen7_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, done: if (drawctxt) { - struct adreno_ringbuffer *rb = drawctxt->rb; - u64 dest = PREEMPT_SCRATCH_ADDR(adreno_dev, rb->id); - u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; - - *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2); - cmds += cp_gpuaddr(adreno_dev, cmds, dest); - *cmds++ = lower_32_bits(gpuaddr); - *cmds++ = upper_32_bits(gpuaddr); - - if (adreno_dev->preempt.postamble_len) { - u64 kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev)); - - *cmds++ = cp_type7_packet(CP_SET_AMBLE, 3); - *cmds++ = lower_32_bits(kmd_postamble_addr); - *cmds++ = upper_32_bits(kmd_postamble_addr); - *cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE) - | (FIELD_PREP(GENMASK(19, 0), adreno_dev->preempt.postamble_len)); - } + cmds += adreno_prepare_preib_preempt_scratch(adreno_dev, drawctxt, cmds); + cmds += adreno_prepare_preib_postamble_scratch(adreno_dev, cmds); } - return (unsigned int) (cmds - cmds_orig); + return (u32) (cmds - cmds_orig); } u32 gen7_preemption_post_ibsubmit(struct adreno_device *adreno_dev, diff --git a/adreno_gen7_ringbuffer.c b/adreno_gen7_ringbuffer.c index 951d2abdd6..11d9036594 100644 --- a/adreno_gen7_ringbuffer.c +++ b/adreno_gen7_ringbuffer.c @@ -138,7 +138,7 @@ static int gen7_rb_context_switch(struct adreno_device *adreno_dev, cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1); cmds[count++] = 0x31; - if (adreno_is_preemption_enabled(adreno_dev)) { + if (adreno_is_preemption_enabled(adreno_dev) && drawctxt->base.user_ctxt_record) { u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; cmds[count++] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 3); diff --git a/adreno_gen8_preempt.c b/adreno_gen8_preempt.c index 949547a187..96146bddf3 100644 --- a/adreno_gen8_preempt.c +++ b/adreno_gen8_preempt.c @@ -564,24 +564,8 @@ u32 gen8_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, done: if (drawctxt) { - struct adreno_ringbuffer *rb = drawctxt->rb; - u64 dest = PREEMPT_SCRATCH_ADDR(adreno_dev, rb->id); - u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; - - *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2); - cmds += cp_gpuaddr(adreno_dev, cmds, dest); - *cmds++ = lower_32_bits(gpuaddr); - *cmds++ = upper_32_bits(gpuaddr); - - if (adreno_dev->preempt.postamble_len) { - u64 kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev)); - - *cmds++ = cp_type7_packet(CP_SET_AMBLE, 3); - *cmds++ = lower_32_bits(kmd_postamble_addr); - *cmds++ = upper_32_bits(kmd_postamble_addr); - *cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE) - | (FIELD_PREP(GENMASK(19, 0), adreno_dev->preempt.postamble_len)); - } + cmds += adreno_prepare_preib_preempt_scratch(adreno_dev, drawctxt, cmds); + cmds += adreno_prepare_preib_postamble_scratch(adreno_dev, cmds); } return (u32) (cmds - cmds_orig); diff --git a/adreno_gen8_ringbuffer.c b/adreno_gen8_ringbuffer.c index c229719d75..a959e89450 100644 --- a/adreno_gen8_ringbuffer.c +++ b/adreno_gen8_ringbuffer.c @@ -140,7 +140,7 @@ static int gen8_rb_context_switch(struct adreno_device *adreno_dev, cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1); cmds[count++] = 0x31; - if (adreno_is_preemption_enabled(adreno_dev)) { + if (adreno_is_preemption_enabled(adreno_dev) && drawctxt->base.user_ctxt_record) { u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; cmds[count++] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 3); From 41afbf935ef7d94e4a4147f00abfb016d5db6d30 Mon Sep 17 00:00:00 2001 From: Xhoendi Collaku Date: Fri, 2 Feb 2024 12:37:24 -0800 Subject: [PATCH 0731/1016] kgsl: gen8: Fix GMU register capture in snapshot flow When capturing GMU GX registers we need to ensure GX is ON, while the rest of the GMU registers can be captured when GX is OFF. Fix the GMU register capture so that we do not touch the GX domain registers when GX might be OFF in snapshot flow. Change-Id: Id8f389b01e8bbc11c148c0fad703cbbfb774535d Signed-off-by: Urvashi Agrawal Signed-off-by: Xhoendi Collaku --- adreno_gen8_0_0_snapshot.h | 3 +-- adreno_gen8_gmu_snapshot.c | 36 ++++++++++++++++++++---------------- adreno_gen8_snapshot.c | 5 +++-- adreno_gen8_snapshot.h | 12 +++++++----- 4 files changed, 31 insertions(+), 25 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index 5dba870f95..c43907f96e 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -1883,8 +1883,7 @@ static struct gen8_reg_list gen8_0_0_ahb_registers[] = { { UNSLICE, gen8_0_0_ahb_secure_gpu_registers }, }; -static struct gen8_reg_list gen8_gmu_registers[] = { - { UNSLICE, gen8_0_0_gmu_registers }, +static struct gen8_reg_list gen8_gmu_gx_registers[] = { { UNSLICE, gen8_0_0_gmugx_registers }, { SLICE, gen8_0_0_gmugx_slice_registers }, }; diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index 4d6250efb5..ce247991ca 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -247,7 +247,7 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, const struct gen8_snapshot_block_list *gen8_snapshot_block_list = gpucore->gen8_snapshot_block_list; u32 i, slice, j; - struct gen8_reg_list_info info; + struct gen8_reg_list_info info = {0}; kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, snapshot, gen8_gmu_snapshot_itcm, gmu); @@ -256,28 +256,32 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, gen8_gmu_snapshot_memories(device, gmu, snapshot); - for (i = 0 ; i < gen8_snapshot_block_list->num_gmu_regs; i++) { - struct gen8_reg_list *regs = &gen8_snapshot_block_list->gmu_regs[i]; - - slice = regs->slice_region ? MAX_PHYSICAL_SLICES : 1; - for (j = 0 ; j < slice; j++) { - info.regs = regs; - info.slice_id = (slice > 1) ? j : UINT_MAX; - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, - gen8_legacy_snapshot_registers, &info); - } - } - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, gen8_snapshot_rscc_registers, (void *) gen8_snapshot_block_list->rscc_regs); + /* Capture GMU registers which are on CX domain and unsliced */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, + adreno_snapshot_registers_v2, + (void *) gen8_snapshot_block_list->gmu_cx_unsliced_regs); + if (!gen8_gmu_gx_is_on(adreno_dev)) goto dtcm; /* Set fence to ALLOW mode so registers can be read */ kgsl_regwrite(device, GEN8_GMUAO_AHB_FENCE_CTRL, 0); - /* Make sure the previous write posted before reading */ - wmb(); + + /* Capture GMU registers which are on GX domain */ + for (i = 0 ; i < gen8_snapshot_block_list->num_gmu_gx_regs; i++) { + struct gen8_reg_list *regs = &gen8_snapshot_block_list->gmu_gx_regs[i]; + + slice = regs->slice_region ? MAX_PHYSICAL_SLICES : 1; + for (j = 0 ; j < slice; j++) { + info.regs = regs; + info.slice_id = j; + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, + gen8_legacy_snapshot_registers, &info); + } + } dtcm: kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 5d44d74486..139fb30265 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -28,8 +28,9 @@ const struct gen8_snapshot_block_list gen8_0_0_snapshot_block_list = { .cx_debugbus_blocks_len = ARRAY_SIZE(gen8_cx_debugbus_blocks), .external_core_regs = gen8_0_0_external_core_regs, .num_external_core_regs = ARRAY_SIZE(gen8_0_0_external_core_regs), - .gmu_regs = gen8_gmu_registers, - .num_gmu_regs = ARRAY_SIZE(gen8_gmu_registers), + .gmu_cx_unsliced_regs = gen8_0_0_gmu_registers, + .gmu_gx_regs = gen8_gmu_gx_registers, + .num_gmu_gx_regs = ARRAY_SIZE(gen8_gmu_gx_registers), .rscc_regs = gen8_0_0_rscc_rsc_registers, .reg_list = gen8_0_0_reg_list, .cx_misc_regs = gen8_0_0_cx_misc_registers, diff --git a/adreno_gen8_snapshot.h b/adreno_gen8_snapshot.h index 83090b67de..2cb8c4e2ee 100644 --- a/adreno_gen8_snapshot.h +++ b/adreno_gen8_snapshot.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_GEN8_SNAPSHOT_H #define __ADRENO_GEN8_SNAPSHOT_H @@ -610,10 +610,12 @@ struct gen8_snapshot_block_list { const u32 **external_core_regs; /* num_external_core_regs : length of external core registers list */ size_t num_external_core_regs; - /* gmu_registers : List of GMU registers */ - struct gen8_reg_list *gmu_regs; - /* num_gmu_regs : Length of GMU registers list */ - size_t num_gmu_regs; + /* gmu_cx_unsliced_regs : List of GMU CX unsliced registers */ + const u32 *gmu_cx_unsliced_regs; + /* gmu_gx_registers : List of GMU registers */ + struct gen8_reg_list *gmu_gx_regs; + /* num_gmu_gx_regs : Length of GMU registers list */ + size_t num_gmu_gx_regs; /* rscc_regs : List of RSCC registers */ const u32 *rscc_regs; /* reg_list : List of GPU internal registers */ From 4a72ae56b8a5f73253e04a58f3c84d04e06e700e Mon Sep 17 00:00:00 2001 From: Xhoendi Collaku Date: Mon, 12 Feb 2024 14:18:14 -0800 Subject: [PATCH 0732/1016] kgsl: gen8: Add memory barriers before reading aperture registers Add memory barriers after programming the aperture to ensure the write is posted before we read the registers behind the aperture. Change-Id: I019afc52ab02360b3d1d4be5e8bcf70b75385c9f Signed-off-by: Urvashi Agrawal Signed-off-by: Xhoendi Collaku --- adreno_gen7_perfcounter.c | 8 +++++++- adreno_gen8.c | 3 +++ adreno_gen8_perfcounter.c | 8 +++++++- adreno_gen8_snapshot.c | 28 +++++++++++++++++++--------- 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/adreno_gen7_perfcounter.c b/adreno_gen7_perfcounter.c index f3157375e9..76b5acd093 100644 --- a/adreno_gen7_perfcounter.c +++ b/adreno_gen7_perfcounter.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -45,6 +45,9 @@ static int gen7_counter_br_enable(struct adreno_device *adreno_dev, FIELD_PREP(GENMASK(13, 12), PIPE_BR), group->flags); kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, val); + /* Ensure all writes are posted before accessing the piped register */ + mb(); + if (!ret) reg->value = 0; @@ -67,6 +70,9 @@ static int gen7_counter_bv_enable(struct adreno_device *adreno_dev, FIELD_PREP(GENMASK(13, 12), PIPE_BV), group->flags); kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, val); + /* Ensure all writes are posted before accessing the piped register */ + mb(); + if (!ret) reg->value = 0; diff --git a/adreno_gen8.c b/adreno_gen8.c index f95bbf8590..268c758426 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -578,6 +578,9 @@ void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id, kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_CP_APERTURE_CNTL_HOST, aperture_val); + /* Make sure the aperture write goes through before reading the registers */ + mb(); + gen8_dev->aperture = aperture_val; } diff --git a/adreno_gen8_perfcounter.c b/adreno_gen8_perfcounter.c index 41e9ba34d9..60e4b5918a 100644 --- a/adreno_gen8_perfcounter.c +++ b/adreno_gen8_perfcounter.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -68,6 +68,9 @@ static int gen8_counter_br_enable(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val); + /* Ensure all writes are posted before reading the piped register */ + mb(); + if (!ret) reg->value = 0; @@ -91,6 +94,9 @@ static int gen8_counter_bv_enable(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val); + /* Ensure all writes are posted before reading the piped register */ + mb(); + if (!ret) reg->value = 0; diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 139fb30265..226abd7af6 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -190,6 +190,9 @@ size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL (info->slice_id, 0, 0, 0)); + /* Make sure the previous writes are posted before reading */ + mb(); + for (ptr = info->regs->regs; ptr[0] != UINT_MAX; ptr += 2) { count = REG_COUNT(ptr); @@ -676,6 +679,14 @@ done: kgsl_regrmw(device, GEN8_SP_DBG_CNTL, GENMASK(1, 0), 0x0); } +static void gen8_rmw_aperture(struct kgsl_device *device, + u32 offsetwords, u32 mask, u32 val, u32 pipe, u32 slice_id, u32 use_slice_id) +{ + gen8_host_aperture_set(ADRENO_DEVICE(device), pipe, slice_id, use_slice_id); + + kgsl_regmap_rmw(&device->regmap, offsetwords, mask, val); +} + static void gen8_snapshot_mempool(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { @@ -689,21 +700,17 @@ static void gen8_snapshot_mempool(struct kgsl_device *device, for (j = 0; j < slice; j++) { - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL - (j, cp_indexed_reg->pipe_id, 0, 0)); - /* set CP_CHICKEN_DBG[StabilizeMVC] to stabilize it while dumping */ - kgsl_regrmw(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x4); + gen8_rmw_aperture(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x4, + cp_indexed_reg->pipe_id, j, 1); kgsl_snapshot_indexed_registers_v2(device, snapshot, cp_indexed_reg->addr, cp_indexed_reg->data, 0, cp_indexed_reg->size, cp_indexed_reg->pipe_id, - ((slice > 1) ? j : UINT_MAX)); + ((cp_indexed_reg->slice_region == SLICE) ? j : UINT_MAX)); - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL - (j, cp_indexed_reg->pipe_id, 0, 0)); - - kgsl_regrmw(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x0); + gen8_rmw_aperture(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x0, + cp_indexed_reg->pipe_id, j, 1); } } } @@ -935,6 +942,9 @@ static size_t gen8_legacy_snapshot_mvc(struct kgsl_device *device, u8 *buf, if (info->cluster->sel) kgsl_regwrite(device, info->cluster->sel->host_reg, info->cluster->sel->val); + /* Make sure the previous writes are posted before reading */ + mb(); + for (; ptr[0] != UINT_MAX; ptr += 2) { u32 count = REG_COUNT(ptr); From 1fc185e330a65ef7d95c5bbb9169cc546ef8cba1 Mon Sep 17 00:00:00 2001 From: NISARG SHETH Date: Thu, 14 Mar 2024 11:14:29 +0530 Subject: [PATCH 0733/1016] kgsl: gen8: Enable L3 vote support for gen8_4_0 GPU Gen8_4_0 supports L3 voting to be able to successfully pin L3 frequency to a certain value during L3 characterization, debug scenarios and perf hints. Change-Id: I735953ae2a563026bcefb4b832e0bfd30995eeff Signed-off-by: NISARG SHETH --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 5162a46dbc..1b770ac56c 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2797,7 +2797,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .compatible = "qcom,adreno-gpu-gen8-4-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | - ADRENO_GMU_WARMBOOT, + ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, From 950a1c3369843f1b097e955a3189d875f4a24d83 Mon Sep 17 00:00:00 2001 From: NISARG SHETH Date: Thu, 14 Mar 2024 11:38:40 +0530 Subject: [PATCH 0734/1016] kgsl: gen8: Enable IFPC on gen8_4_0 GPU Inter-Frame Power Collapse is a power saving feature for GPU. Change-Id: I50bbf6c64f3df785363d353375b86cb85ef9bf91 Signed-off-by: NISARG SHETH --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 1b770ac56c..0d790c126c 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2797,7 +2797,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .compatible = "qcom,adreno-gpu-gen8-4-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | - ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE, + ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_IFPC, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, From 31fd9ebabffcab05c06013ff480aa16864f212b6 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 17 Mar 2024 18:30:22 +0530 Subject: [PATCH 0735/1016] kgsl: gen8: Update comment for RB_CMP_DBG_ECO_CNTL register Merged UFC request feature is enabled on gen8. Hence, update the comment for RB_CMP_DBG_ECO_CNTL register. Change-Id: I526a1e7e0ac48665e4804648ef80faf9f2a176cb Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index b0d6e03ea5..df3b3c4f34 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2610,7 +2610,7 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { { GEN8_RB_GC_GMEM_PROTECT, 0x0c000000, BIT(PIPE_BR) }, /* Configure number of outstanding transactions to 32 */ { GEN8_RB_RESOLVE_PREFETCH_CNTL, 0x00000007, BIT(PIPE_BR) }, - /* Disable ubwc merged UFC request feature */ + /* Configure UCHE request time out to 16 cycles for CCU/UCHE arbitration */ { GEN8_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) }, { GEN8_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, { GEN8_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, @@ -2766,7 +2766,7 @@ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { { GEN8_RB_GC_GMEM_PROTECT, 0x0c000000, BIT(PIPE_BR) }, /* Configure number of outstanding transactions to 32 */ { GEN8_RB_RESOLVE_PREFETCH_CNTL, 0x00000007, BIT(PIPE_BR) }, - /* Disable ubwc merged UFC request feature */ + /* Configure UCHE request time out to 16 cycles for CCU/UCHE arbitration */ { GEN8_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) }, { GEN8_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, { GEN8_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, From 3b3cf58d713d971ed822cec5c51551b498190762 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 9 Dec 2023 01:44:42 +0530 Subject: [PATCH 0736/1016] kgsl: gen8: Add coresight support Add coresight support for gen8. Change-Id: Ief494fae522c312507fd70b0d8b7b4eb3ceca082 Signed-off-by: Kamal Agrawal --- Kbuild | 1 + adreno_gen8.c | 2 + adreno_gen8.h | 7 + adreno_gen8_coresight.c | 419 ++++++++++++++++++++++++++++++++++++++++ build/kgsl_defs.bzl | 3 +- 5 files changed, 431 insertions(+), 1 deletion(-) create mode 100644 adreno_gen8_coresight.c diff --git a/Kbuild b/Kbuild index c9caf3da20..3e0afc05f3 100644 --- a/Kbuild +++ b/Kbuild @@ -160,4 +160,5 @@ msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_coresight.o msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_a5xx_coresight.o msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_a6xx_coresight.o msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_gen7_coresight.o +msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_gen8_coresight.o msm_kgsl-$(CONFIG_DEBUG_FS) += adreno_debugfs.o adreno_profile.o diff --git a/adreno_gen8.c b/adreno_gen8.c index f95bbf8590..730293fadd 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -2139,6 +2139,8 @@ int gen8_probe_common(struct platform_device *pdev, /* debugfs node for ACD calibration */ debugfs_create_file("acd_calibrate", 0644, device->d_debugfs, device, &acd_cal_fops); + gen8_coresight_init(adreno_dev); + /* Dump additional AQE 16KB data on top of default 128KB(64(BR)+64(BV)) */ device->snapshot_ctxt_record_size = ADRENO_FEATURE(adreno_dev, ADRENO_AQE) ? (GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES + SZ_16K) : diff --git a/adreno_gen8.h b/adreno_gen8.h index 03eb772446..14efc7a678 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -608,4 +608,11 @@ void gen8_regread_aperture(struct kgsl_device *device, */ void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id, u32 slice_id, u32 use_slice_id); + +#if IS_ENABLED(CONFIG_QCOM_KGSL_CORESIGHT) +void gen8_coresight_init(struct adreno_device *device); +#else +static inline void gen8_coresight_init(struct adreno_device *device) { } +#endif + #endif diff --git a/adreno_gen8_coresight.c b/adreno_gen8_coresight.c new file mode 100644 index 0000000000..2c03db9981 --- /dev/null +++ b/adreno_gen8_coresight.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2022, 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include + +#include "adreno.h" +#include "adreno_coresight.h" +#include "adreno_gen8.h" + +static struct adreno_coresight_register gen8_coresight_regs[] = { + { GEN8_DBGC_CFG_DBGBUS_SEL_A }, + { GEN8_DBGC_CFG_DBGBUS_SEL_B }, + { GEN8_DBGC_CFG_DBGBUS_SEL_C }, + { GEN8_DBGC_CFG_DBGBUS_SEL_D }, + { GEN8_DBGC_CFG_DBGBUS_CNTLT }, + { GEN8_DBGC_CFG_DBGBUS_CNTLM }, + { GEN8_DBGC_CFG_DBGBUS_OPL }, + { GEN8_DBGC_CFG_DBGBUS_OPE }, + { GEN8_DBGC_CFG_DBGBUS_IVTL_0 }, + { GEN8_DBGC_CFG_DBGBUS_IVTL_1 }, + { GEN8_DBGC_CFG_DBGBUS_IVTL_2 }, + { GEN8_DBGC_CFG_DBGBUS_IVTL_3 }, + { GEN8_DBGC_CFG_DBGBUS_MASKL_0 }, + { GEN8_DBGC_CFG_DBGBUS_MASKL_1 }, + { GEN8_DBGC_CFG_DBGBUS_MASKL_2 }, + { GEN8_DBGC_CFG_DBGBUS_MASKL_3 }, + { GEN8_DBGC_CFG_DBGBUS_BYTEL_0 }, + { GEN8_DBGC_CFG_DBGBUS_BYTEL_1 }, + { GEN8_DBGC_CFG_DBGBUS_IVTE_0 }, + { GEN8_DBGC_CFG_DBGBUS_IVTE_1 }, + { GEN8_DBGC_CFG_DBGBUS_IVTE_2 }, + { GEN8_DBGC_CFG_DBGBUS_IVTE_3 }, + { GEN8_DBGC_CFG_DBGBUS_MASKE_0 }, + { GEN8_DBGC_CFG_DBGBUS_MASKE_1 }, + { GEN8_DBGC_CFG_DBGBUS_MASKE_2 }, + { GEN8_DBGC_CFG_DBGBUS_MASKE_3 }, + { GEN8_DBGC_CFG_DBGBUS_NIBBLEE }, + { GEN8_DBGC_CFG_DBGBUS_PTRC0 }, + { GEN8_DBGC_CFG_DBGBUS_PTRC1 }, + { GEN8_DBGC_CFG_DBGBUS_LOADREG }, + { GEN8_DBGC_CFG_DBGBUS_IDX }, + { GEN8_DBGC_CFG_DBGBUS_CLRC }, + { GEN8_DBGC_CFG_DBGBUS_LOADIVT }, + { GEN8_DBGC_VBIF_DBG_CNTL }, + { GEN8_DBGC_DBG_LO_HI_GPIO }, + { GEN8_DBGC_EXT_TRACE_BUS_CNTL }, + { GEN8_DBGC_READ_AHB_THROUGH_DBG }, + { GEN8_DBGC_CFG_DBGBUS_TRACE_BUF1 }, + { GEN8_DBGC_CFG_DBGBUS_TRACE_BUF2 }, + { GEN8_DBGC_EVT_CFG }, + { GEN8_DBGC_EVT_INTF_SEL_0 }, + { GEN8_DBGC_EVT_INTF_SEL_1 }, + { GEN8_DBGC_ECO_CNTL }, + { GEN8_DBGC_AHB_DBG_CNTL }, +}; + +static struct adreno_coresight_register gen8_coresight_regs_cx[] = { + { GEN8_CX_DBGC_CFG_DBGBUS_SEL_A }, + { GEN8_CX_DBGC_CFG_DBGBUS_SEL_B }, + { GEN8_CX_DBGC_CFG_DBGBUS_SEL_C }, + { GEN8_CX_DBGC_CFG_DBGBUS_SEL_D }, + { GEN8_CX_DBGC_CFG_DBGBUS_CNTLT }, + { GEN8_CX_DBGC_CFG_DBGBUS_CNTLM }, + { GEN8_CX_DBGC_CFG_DBGBUS_OPL }, + { GEN8_CX_DBGC_CFG_DBGBUS_OPE }, + { GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0 }, + { GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1 }, + { GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2 }, + { GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3 }, + { GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0 }, + { GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1 }, + { GEN8_CX_DBGC_CFG_DBGBUS_MASKL_2 }, + { GEN8_CX_DBGC_CFG_DBGBUS_MASKL_3 }, + { GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0 }, + { GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1 }, + { GEN8_CX_DBGC_CFG_DBGBUS_IVTE_0 }, + { GEN8_CX_DBGC_CFG_DBGBUS_IVTE_1 }, + { GEN8_CX_DBGC_CFG_DBGBUS_IVTE_2 }, + { GEN8_CX_DBGC_CFG_DBGBUS_IVTE_3 }, + { GEN8_CX_DBGC_CFG_DBGBUS_MASKE_0 }, + { GEN8_CX_DBGC_CFG_DBGBUS_MASKE_1 }, + { GEN8_CX_DBGC_CFG_DBGBUS_MASKE_2 }, + { GEN8_CX_DBGC_CFG_DBGBUS_MASKE_3 }, + { GEN8_CX_DBGC_CFG_DBGBUS_NIBBLEE }, + { GEN8_CX_DBGC_CFG_DBGBUS_PTRC0 }, + { GEN8_CX_DBGC_CFG_DBGBUS_PTRC1 }, + { GEN8_CX_DBGC_CFG_DBGBUS_LOADREG }, + { GEN8_CX_DBGC_CFG_DBGBUS_IDX }, + { GEN8_CX_DBGC_CFG_DBGBUS_CLRC }, + { GEN8_CX_DBGC_CFG_DBGBUS_LOADIVT }, + { GEN8_CX_DBGC_VBIF_DBG_CNTL }, + { GEN8_CX_DBGC_DBG_LO_HI_GPIO }, + { GEN8_CX_DBGC_EXT_TRACE_BUS_CNTL }, + { GEN8_CX_DBGC_READ_AHB_THROUGH_DBG }, + { GEN8_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 }, + { GEN8_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 }, + { GEN8_CX_DBGC_EVT_CFG }, + { GEN8_CX_DBGC_EVT_INTF_SEL_0 }, + { GEN8_CX_DBGC_EVT_INTF_SEL_1 }, + { GEN8_CX_DBGC_PERF_ATB_CFG }, + { GEN8_CX_DBGC_ECO_CNTL }, + { GEN8_CX_DBGC_AHB_DBG_CNTL }, +}; + +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_a, &gen8_coresight_regs[0]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_b, &gen8_coresight_regs[1]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_c, &gen8_coresight_regs[2]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_d, &gen8_coresight_regs[3]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlt, &gen8_coresight_regs[4]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlm, &gen8_coresight_regs[5]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_opl, &gen8_coresight_regs[6]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ope, &gen8_coresight_regs[7]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_0, &gen8_coresight_regs[8]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_1, &gen8_coresight_regs[9]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_2, &gen8_coresight_regs[10]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_3, &gen8_coresight_regs[11]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_0, &gen8_coresight_regs[12]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_1, &gen8_coresight_regs[13]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_2, &gen8_coresight_regs[14]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_3, &gen8_coresight_regs[15]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_0, &gen8_coresight_regs[16]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_1, &gen8_coresight_regs[17]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_0, &gen8_coresight_regs[18]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_1, &gen8_coresight_regs[19]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_2, &gen8_coresight_regs[20]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_3, &gen8_coresight_regs[21]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_0, &gen8_coresight_regs[22]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_1, &gen8_coresight_regs[23]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_2, &gen8_coresight_regs[24]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_3, &gen8_coresight_regs[25]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_nibblee, &gen8_coresight_regs[26]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc0, &gen8_coresight_regs[27]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc1, &gen8_coresight_regs[28]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadreg, &gen8_coresight_regs[29]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_idx, &gen8_coresight_regs[30]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_clrc, &gen8_coresight_regs[31]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadivt, &gen8_coresight_regs[32]); +static ADRENO_CORESIGHT_ATTR(vbif_dbg_cntl, &gen8_coresight_regs[33]); +static ADRENO_CORESIGHT_ATTR(dbg_lo_hi_gpio, &gen8_coresight_regs[34]); +static ADRENO_CORESIGHT_ATTR(ext_trace_bus_cntl, &gen8_coresight_regs[35]); +static ADRENO_CORESIGHT_ATTR(read_ahb_through_dbg, &gen8_coresight_regs[36]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf1, &gen8_coresight_regs[37]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf2, &gen8_coresight_regs[38]); +static ADRENO_CORESIGHT_ATTR(evt_cfg, &gen8_coresight_regs[39]); +static ADRENO_CORESIGHT_ATTR(evt_intf_sel_0, &gen8_coresight_regs[40]); +static ADRENO_CORESIGHT_ATTR(evt_intf_sel_1, &gen8_coresight_regs[41]); +static ADRENO_CORESIGHT_ATTR(eco_cntl, &gen8_coresight_regs[42]); +static ADRENO_CORESIGHT_ATTR(ahb_dbg_cntl, &gen8_coresight_regs[43]); + +/*CX debug registers*/ +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_a, + &gen8_coresight_regs_cx[0]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_b, + &gen8_coresight_regs_cx[1]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_c, + &gen8_coresight_regs_cx[2]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_d, + &gen8_coresight_regs_cx[3]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_cntlt, + &gen8_coresight_regs_cx[4]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_cntlm, + &gen8_coresight_regs_cx[5]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_opl, + &gen8_coresight_regs_cx[6]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ope, + &gen8_coresight_regs_cx[7]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_0, + &gen8_coresight_regs_cx[8]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_1, + &gen8_coresight_regs_cx[9]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_2, + &gen8_coresight_regs_cx[10]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_3, + &gen8_coresight_regs_cx[11]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_0, + &gen8_coresight_regs_cx[12]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_1, + &gen8_coresight_regs_cx[13]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_2, + &gen8_coresight_regs_cx[14]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_3, + &gen8_coresight_regs_cx[15]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_bytel_0, + &gen8_coresight_regs_cx[16]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_bytel_1, + &gen8_coresight_regs_cx[17]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_0, + &gen8_coresight_regs_cx[18]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_1, + &gen8_coresight_regs_cx[19]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_2, + &gen8_coresight_regs_cx[20]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_3, + &gen8_coresight_regs_cx[21]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_0, + &gen8_coresight_regs_cx[22]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_1, + &gen8_coresight_regs_cx[23]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_2, + &gen8_coresight_regs_cx[24]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_3, + &gen8_coresight_regs_cx[25]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_nibblee, + &gen8_coresight_regs_cx[26]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ptrc0, + &gen8_coresight_regs_cx[27]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ptrc1, + &gen8_coresight_regs_cx[28]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_loadreg, + &gen8_coresight_regs_cx[29]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_idx, + &gen8_coresight_regs_cx[30]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_clrc, + &gen8_coresight_regs_cx[31]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_loadivt, + &gen8_coresight_regs_cx[32]); +static ADRENO_CORESIGHT_ATTR(cx_vbif_dbg_cntl, + &gen8_coresight_regs_cx[33]); +static ADRENO_CORESIGHT_ATTR(cx_dbg_lo_hi_gpio, + &gen8_coresight_regs_cx[34]); +static ADRENO_CORESIGHT_ATTR(cx_ext_trace_bus_cntl, + &gen8_coresight_regs_cx[35]); +static ADRENO_CORESIGHT_ATTR(cx_read_ahb_through_dbg, + &gen8_coresight_regs_cx[36]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_trace_buf1, + &gen8_coresight_regs_cx[37]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_trace_buf2, + &gen8_coresight_regs_cx[38]); +static ADRENO_CORESIGHT_ATTR(cx_evt_cfg, + &gen8_coresight_regs_cx[39]); +static ADRENO_CORESIGHT_ATTR(cx_evt_intf_sel_0, + &gen8_coresight_regs_cx[40]); +static ADRENO_CORESIGHT_ATTR(cx_evt_intf_sel_1, + &gen8_coresight_regs_cx[41]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_cfg, + &gen8_coresight_regs_cx[42]); +static ADRENO_CORESIGHT_ATTR(cx_eco_cntl, + &gen8_coresight_regs_cx[43]); +static ADRENO_CORESIGHT_ATTR(cx_ahb_dbg_cntl, + &gen8_coresight_regs_cx[44]); + +static struct attribute *gen8_coresight_attrs[] = { + &coresight_attr_cfg_dbgbus_sel_a.attr.attr, + &coresight_attr_cfg_dbgbus_sel_b.attr.attr, + &coresight_attr_cfg_dbgbus_sel_c.attr.attr, + &coresight_attr_cfg_dbgbus_sel_d.attr.attr, + &coresight_attr_cfg_dbgbus_cntlt.attr.attr, + &coresight_attr_cfg_dbgbus_cntlm.attr.attr, + &coresight_attr_cfg_dbgbus_opl.attr.attr, + &coresight_attr_cfg_dbgbus_ope.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_0.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_2.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_3.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_0.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_1.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_2.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_3.attr.attr, + &coresight_attr_cfg_dbgbus_bytel_0.attr.attr, + &coresight_attr_cfg_dbgbus_bytel_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_0.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_2.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_3.attr.attr, + &coresight_attr_cfg_dbgbus_maske_0.attr.attr, + &coresight_attr_cfg_dbgbus_maske_1.attr.attr, + &coresight_attr_cfg_dbgbus_maske_2.attr.attr, + &coresight_attr_cfg_dbgbus_maske_3.attr.attr, + &coresight_attr_cfg_dbgbus_nibblee.attr.attr, + &coresight_attr_cfg_dbgbus_ptrc0.attr.attr, + &coresight_attr_cfg_dbgbus_ptrc1.attr.attr, + &coresight_attr_cfg_dbgbus_loadreg.attr.attr, + &coresight_attr_cfg_dbgbus_idx.attr.attr, + &coresight_attr_cfg_dbgbus_clrc.attr.attr, + &coresight_attr_cfg_dbgbus_loadivt.attr.attr, + &coresight_attr_vbif_dbg_cntl.attr.attr, + &coresight_attr_dbg_lo_hi_gpio.attr.attr, + &coresight_attr_ext_trace_bus_cntl.attr.attr, + &coresight_attr_read_ahb_through_dbg.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf1.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf2.attr.attr, + &coresight_attr_evt_cfg.attr.attr, + &coresight_attr_evt_intf_sel_0.attr.attr, + &coresight_attr_evt_intf_sel_1.attr.attr, + &coresight_attr_eco_cntl.attr.attr, + &coresight_attr_ahb_dbg_cntl.attr.attr, + NULL, +}; + +/*cx*/ +static struct attribute *gen8_coresight_attrs_cx[] = { + &coresight_attr_cx_cfg_dbgbus_sel_a.attr.attr, + &coresight_attr_cx_cfg_dbgbus_sel_b.attr.attr, + &coresight_attr_cx_cfg_dbgbus_sel_c.attr.attr, + &coresight_attr_cx_cfg_dbgbus_sel_d.attr.attr, + &coresight_attr_cx_cfg_dbgbus_cntlt.attr.attr, + &coresight_attr_cx_cfg_dbgbus_cntlm.attr.attr, + &coresight_attr_cx_cfg_dbgbus_opl.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ope.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivtl_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivtl_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivtl_2.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivtl_3.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maskl_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maskl_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maskl_2.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maskl_3.attr.attr, + &coresight_attr_cx_cfg_dbgbus_bytel_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_bytel_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivte_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivte_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivte_2.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivte_3.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maske_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maske_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maske_2.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maske_3.attr.attr, + &coresight_attr_cx_cfg_dbgbus_nibblee.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ptrc0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ptrc1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_loadreg.attr.attr, + &coresight_attr_cx_cfg_dbgbus_idx.attr.attr, + &coresight_attr_cx_cfg_dbgbus_clrc.attr.attr, + &coresight_attr_cx_cfg_dbgbus_loadivt.attr.attr, + &coresight_attr_cx_vbif_dbg_cntl.attr.attr, + &coresight_attr_cx_dbg_lo_hi_gpio.attr.attr, + &coresight_attr_cx_ext_trace_bus_cntl.attr.attr, + &coresight_attr_cx_read_ahb_through_dbg.attr.attr, + &coresight_attr_cx_cfg_dbgbus_trace_buf1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_trace_buf2.attr.attr, + &coresight_attr_cx_evt_cfg.attr.attr, + &coresight_attr_cx_evt_intf_sel_0.attr.attr, + &coresight_attr_cx_evt_intf_sel_1.attr.attr, + &coresight_attr_cx_perf_atb_cfg.attr.attr, + &coresight_attr_cx_eco_cntl.attr.attr, + &coresight_attr_cx_ahb_dbg_cntl.attr.attr, + NULL, +}; + +static const struct attribute_group gen8_coresight_group = { + .attrs = gen8_coresight_attrs, +}; + +static const struct attribute_group *gen8_coresight_groups[] = { + &gen8_coresight_group, + NULL, +}; + +static const struct attribute_group gen8_coresight_group_cx = { + .attrs = gen8_coresight_attrs_cx, +}; + +static const struct attribute_group *gen8_coresight_groups_cx[] = { + &gen8_coresight_group_cx, + NULL, +}; + +static const struct adreno_coresight gen8_coresight = { + .registers = gen8_coresight_regs, + .count = ARRAY_SIZE(gen8_coresight_regs), + .groups = gen8_coresight_groups, +}; + +static const struct adreno_coresight gen8_coresight_cx = { + .registers = gen8_coresight_regs_cx, + .count = ARRAY_SIZE(gen8_coresight_regs_cx), + .groups = gen8_coresight_groups_cx, +}; + +static int name_match(struct device *dev, void *data) +{ + char *child_name = data; + + return strcmp(child_name, dev_name(dev)) == 0; +} + +void gen8_coresight_init(struct adreno_device *adreno_dev) +{ + struct adreno_funnel_device *funnel_gfx = &adreno_dev->funnel_gfx; + struct device *amba_dev; + + /* Find the amba funnel device associated with gfx coresight funnel */ + amba_dev = bus_find_device_by_name(&amba_bustype, NULL, "10963000.funnel"); + if (!amba_dev) + return; + + funnel_gfx->funnel_dev = device_find_child(amba_dev, "coresight-funnel-gfx", name_match); + if (funnel_gfx->funnel_dev == NULL) + return; + + funnel_gfx->funnel_csdev = to_coresight_device(funnel_gfx->funnel_dev); + if (funnel_gfx->funnel_csdev == NULL) + return; + + /* + * Since coresight_funnel_gfx component is in graphics block, GPU has to be powered up + * before enabling the funnel. Currently the generic coresight driver doesn't handle that. + * Override the funnel ops set by coresight driver with graphics funnel ops, so that the + * GPU can be brought up before enabling the funnel. + */ + funnel_gfx->funnel_ops = funnel_gfx->funnel_csdev->ops; + funnel_gfx->funnel_csdev->ops = NULL; + + /* + * The read-only sysfs node (funnel_ctrl) associated with gfx funnel reads the control + * register and could cause a NOC error when gpu is in slumber. Since we do not require + * such node, remove the attribute groups for the funnel. + */ + sysfs_remove_groups(&funnel_gfx->funnel_dev->kobj, funnel_gfx->funnel_csdev->dev.groups); + + adreno_coresight_add_device(adreno_dev, "qcom,gpu-coresight-gx", + &gen8_coresight, &adreno_dev->gx_coresight); + + adreno_coresight_add_device(adreno_dev, "qcom,gpu-coresight-cx", + &gen8_coresight_cx, &adreno_dev->cx_coresight); +} diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index f70d05fe23..bce5876928 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -134,7 +134,8 @@ def define_target_variant_module(target, variant): "adreno_coresight.c", "adreno_a5xx_coresight.c", "adreno_a6xx_coresight.c", - "adreno_gen7_coresight.c"] }, + "adreno_gen7_coresight.c", + "adreno_gen8_coresight.c"] }, "CONFIG_QCOM_KGSL_PROCESS_RECLAIM": { True: [ "kgsl_reclaim.c" ] }, "CONFIG_QCOM_KGSL_USE_SHMEM": { False: [ "kgsl_pool.c" ] }, "CONFIG_SYNC_FILE": { True: [ "kgsl_sync.c" ] }, From 04ad61113b5075b099df55cf029cd74d393583c6 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 17 Mar 2024 19:31:50 +0530 Subject: [PATCH 0737/1016] kgsl: gen8: Fix condition to determine slice ID The existing logic considers the slice ID as valid only when the number of slices exceeds 1. However, few GPUs have single slice. Thus, update the logic to determine slice ID based on the slice region. Change-Id: Icf2e3450ec493aee5477af0cbbded4dec4368cf8 Signed-off-by: Kamal Agrawal --- adreno_gen8_gmu_snapshot.c | 2 +- adreno_gen8_snapshot.c | 19 ++++++++----------- adreno_gen8_snapshot.h | 3 +++ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index ce247991ca..17fe920b44 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -277,7 +277,7 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, slice = regs->slice_region ? MAX_PHYSICAL_SLICES : 1; for (j = 0 ; j < slice; j++) { info.regs = regs; - info.slice_id = j; + info.slice_id = SLICE_ID(regs->slice_region, j); kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, gen8_legacy_snapshot_registers, &info); } diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index bbc3dc099d..8375dd53fa 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -68,9 +68,6 @@ const struct gen8_snapshot_block_list gen8_0_0_snapshot_block_list = { #define CD_REG_END 0xaaaaaaaa -#define NUMBER_OF_SLICES(is_sliced) (is_sliced ? MAX_PHYSICAL_SLICES : 1) -#define SLICE_ID(slices, j) ((slices > 1) ? j : UINT_MAX) - static u32 CD_WRITE(u64 *ptr, u32 offset, u64 val) { ptr[0] = val; @@ -710,7 +707,7 @@ static void gen8_snapshot_mempool(struct kgsl_device *device, kgsl_snapshot_indexed_registers_v2(device, snapshot, cp_indexed_reg->addr, cp_indexed_reg->data, 0, cp_indexed_reg->size, cp_indexed_reg->pipe_id, - ((cp_indexed_reg->slice_region == SLICE) ? j : UINT_MAX)); + SLICE_ID(cp_indexed_reg->slice_region, j)); /* Reset CP_CHICKEN_DBG[StabilizeMVC] once we are done */ gen8_rmw_aperture(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x0, @@ -845,7 +842,7 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, info.pipe_id = cluster->pipe_id; info.usptp_id = usptp; info.sp_id = sp; - info.slice_id = SLICE_ID(slice, j); + info.slice_id = SLICE_ID(cluster->slice_region, j); info.cluster_id = cluster->cluster_id; info.context_id = cluster->context_id; kgsl_snapshot_add_section(device, @@ -878,7 +875,7 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, info.pipe_id = cluster->pipe_id; info.usptp_id = usptp; info.sp_id = sp; - info.slice_id = SLICE_ID(slice, j); + info.slice_id = SLICE_ID(cluster->slice_region, j); info.statetype_id = cluster->statetype; info.cluster_id = cluster->cluster_id; info.context_id = cluster->context_id; @@ -1035,7 +1032,7 @@ static void gen8_snapshot_mvc_regs(struct kgsl_device *device, info.pipe_id = cluster->pipe_id; info.cluster_id = cluster->cluster_id; info.context_id = cluster->context_id; - info.slice_id = SLICE_ID(slice, j); + info.slice_id = SLICE_ID(cluster->slice_region, j); kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, func, &info); } @@ -1059,7 +1056,7 @@ static void gen8_snapshot_mvc_regs(struct kgsl_device *device, info.pipe_id = cluster->pipe_id; info.cluster_id = cluster->cluster_id; info.context_id = cluster->context_id; - info.slice_id = SLICE_ID(slice, j); + info.slice_id = SLICE_ID(cluster->slice_region, j); info.offset = offset; ptr += CD_WRITE(ptr, GEN8_CP_APERTURE_CNTL_CD, GEN8_CP_APERTURE_REG_VAL @@ -1492,7 +1489,7 @@ static void gen8_reglist_snapshot(struct kgsl_device *device, slices = NUMBER_OF_SLICES(regs->slice_region); for (j = 0; j < slices; j++) { info.regs = regs; - info.slice_id = SLICE_ID(slices, j); + info.slice_id = SLICE_ID(regs->slice_region, j); kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, func, &info); } @@ -1518,7 +1515,7 @@ static void gen8_reglist_snapshot(struct kgsl_device *device, if (regs->sel) ptr += CD_WRITE(ptr, regs->sel->cd_reg, regs->sel->val); info.regs = regs; - info.slice_id = SLICE_ID(slices, j); + info.slice_id = SLICE_ID(regs->slice_region, j); info.offset = offset; for (; regs_ptr[0] != UINT_MAX; regs_ptr += 2) { @@ -1714,7 +1711,7 @@ void gen8_snapshot(struct adreno_device *adreno_dev, for (j = 0; j < slices; j++) { info.regs = regs; - info.slice_id = SLICE_ID(slices, j); + info.slice_id = SLICE_ID(regs->slice_region, j); kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, gen8_legacy_snapshot_registers, &info); } diff --git a/adreno_gen8_snapshot.h b/adreno_gen8_snapshot.h index 2cb8c4e2ee..b4d6c7867f 100644 --- a/adreno_gen8_snapshot.h +++ b/adreno_gen8_snapshot.h @@ -42,6 +42,9 @@ enum location_id { #define MAX_PHYSICAL_SLICES 3 +#define NUMBER_OF_SLICES(region) ((region == SLICE) ? MAX_PHYSICAL_SLICES : 1) +#define SLICE_ID(region, j) ((region == SLICE) ? j : UINT_MAX) + #define GEN8_DEBUGBUS_BLOCK_SIZE 0x100 /* Number of dword to dump in snapshot for CP SQE */ From 11db04b22c093d29ea7a09074cf42d15804c8486 Mon Sep 17 00:00:00 2001 From: NISARG SHETH Date: Fri, 27 Oct 2023 16:32:16 +0530 Subject: [PATCH 0738/1016] kgsl: gen7: Add support for gen7_6_0 GPU Add an entry in the adreno gpulist to support gen7_6_0 GPU. Change-Id: I4732f5a9ec1ffbba0a17c7b2b8cf7122ac8e2a64 Signed-off-by: NISARG SHETH --- adreno-gpulist.h | 42 ++++++++++++++++++++++++++++++++++++++++++ adreno.h | 6 ++++-- adreno_gen7.c | 5 +++-- 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index b0d6e03ea5..2ad16e5743 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2058,6 +2058,10 @@ static const struct kgsl_regmap_list gen7_2_0_gbif_regs[] = { { GEN7_GMU_CX_MRC_GBIF_QOS_CTRL, 0x33 }, }; +static const u32 gen7_6_0_gbif_client_qos_values[KGSL_PRIORITY_MAX_RB_LEVELS] = { + 0x03230323, +}; + static const struct kgsl_regmap_list gen7_2_0_hwcg_regs[] = { { GEN7_RBBM_CLOCK_CNTL_SP0, 0x02222222 }, { GEN7_RBBM_CLOCK_CNTL2_SP0, 0x22022222 }, @@ -2231,6 +2235,43 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_4_0 = { .fast_bus_hint = true, }; +static const struct adreno_gen7_core adreno_gpu_core_gen7_6_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN7_6_0, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen7-6-0", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION | ADRENO_L3_VOTE | + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_BCL | + ADRENO_ACD | ADRENO_LPAC | ADRENO_DMS, + .gpudev = &adreno_gen7_hwsched_gpudev.base, + .perfcounters = &adreno_gen7_hwsched_perfcounters, + .uche_gmem_alignment = SZ_16M, + .gmem_size = 3 * SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_4M, + }, + .gmu_fw_version = GMU_VERSION(4, 1, 9), + .sqefw_name = "a740v3_sqe.fw", + .gmufw_name = "gmu_gen70200.bin", + .zap_name = "a740v3_zap", + .hwcg = gen7_2_0_hwcg_regs, + .hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs), + .ao_hwcg = gen7_2_0_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), + .gbif = gen7_2_0_gbif_regs, + .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen7_0_0_protected_regs, + .highest_bank_bit = 16, + .gmu_hub_clk_freq = 200000000, + .bcl_data = 1, + .gen7_snapshot_block_list = &gen7_2_0_snapshot_block_list, + .qos_value = gen7_6_0_gbif_client_qos_values, + .preempt_level = 1, + .ctxt_record_size = (4192 * SZ_1K), +}; + extern const struct gen7_snapshot_block_list gen7_9_0_snapshot_block_list; /* GEN7_9_0 protected register list */ @@ -2883,6 +2924,7 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen7_2_0.base, &adreno_gpu_core_gen7_2_1.base, &adreno_gpu_core_gen7_4_0.base, + &adreno_gpu_core_gen7_6_0.base, &adreno_gpu_core_gen7_9_0.base, &adreno_gpu_core_gen7_9_1.base, &adreno_gpu_core_gen7_14_0.base, diff --git a/adreno.h b/adreno.h index e697fa2743..c0c4143efb 100644 --- a/adreno.h +++ b/adreno.h @@ -240,6 +240,7 @@ enum adreno_gpurev { ADRENO_REV_GEN7_2_0 = ADRENO_GPUREV_VALUE(7, 2, 0), ADRENO_REV_GEN7_2_1 = ADRENO_GPUREV_VALUE(7, 2, 1), ADRENO_REV_GEN7_4_0 = ADRENO_GPUREV_VALUE(7, 4, 0), + ADRENO_REV_GEN7_6_0 = ADRENO_GPUREV_VALUE(7, 6, 0), ADRENO_REV_GEN7_9_0 = ADRENO_GPUREV_VALUE(7, 9, 0), ADRENO_REV_GEN7_9_1 = ADRENO_GPUREV_VALUE(7, 9, 1), ADRENO_REV_GEN7_14_0 = ADRENO_GPUREV_VALUE(7, 14, 0), @@ -1274,6 +1275,7 @@ ADRENO_TARGET(gen7_0_1, ADRENO_REV_GEN7_0_1) ADRENO_TARGET(gen7_2_0, ADRENO_REV_GEN7_2_0) ADRENO_TARGET(gen7_2_1, ADRENO_REV_GEN7_2_1) ADRENO_TARGET(gen7_4_0, ADRENO_REV_GEN7_4_0) +ADRENO_TARGET(gen7_6_0, ADRENO_REV_GEN7_6_0) ADRENO_TARGET(gen7_9_0, ADRENO_REV_GEN7_9_0) ADRENO_TARGET(gen7_9_1, ADRENO_REV_GEN7_9_1) ADRENO_TARGET(gen7_14_0, ADRENO_REV_GEN7_14_0) @@ -1293,8 +1295,8 @@ static inline int adreno_is_gen7_0_x_family(struct adreno_device *adreno_dev) static inline int adreno_is_gen7_2_x_family(struct adreno_device *adreno_dev) { return adreno_is_gen7_2_0(adreno_dev) || adreno_is_gen7_2_1(adreno_dev) || - adreno_is_gen7_9_x(adreno_dev) || adreno_is_gen7_14_0(adreno_dev) || - adreno_is_gen7_11_0(adreno_dev); + adreno_is_gen7_6_0(adreno_dev) || adreno_is_gen7_9_x(adreno_dev) || + adreno_is_gen7_14_0(adreno_dev) || adreno_is_gen7_11_0(adreno_dev); } /* diff --git a/adreno_gen7.c b/adreno_gen7.c index 920569c636..e46924c7bd 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -864,8 +864,9 @@ int gen7_start(struct adreno_device *adreno_dev) * the prefetch granularity size. */ if (adreno_is_gen7_0_0(adreno_dev) || adreno_is_gen7_0_1(adreno_dev) || - adreno_is_gen7_4_0(adreno_dev) || adreno_is_gen7_2_0(adreno_dev) - || adreno_is_gen7_2_1(adreno_dev) || adreno_is_gen7_11_0(adreno_dev)) { + adreno_is_gen7_4_0(adreno_dev) || adreno_is_gen7_2_0(adreno_dev) || + adreno_is_gen7_2_1(adreno_dev) || adreno_is_gen7_6_0(adreno_dev) || + adreno_is_gen7_11_0(adreno_dev)) { kgsl_regwrite(device, GEN7_CP_CHICKEN_DBG, 0x1); kgsl_regwrite(device, GEN7_CP_BV_CHICKEN_DBG, 0x1); /* Avoid configuring LPAC pipe on targets which do not have LPAC. */ From eff85cd41a2987bfed9fd00f360f74a1e0dd47bf Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 28 Mar 2024 23:16:57 +0530 Subject: [PATCH 0739/1016] kgsl: gen8: Fix few snapshot issues In gen8_snapshot_registers() function, the header is not considered as part of the buffer size. Similarly, in gen8_legacy_snapshot_registers(), the empty space check is performed without accounting for the header. Fix these by taking header size into account properly. Change-Id: I3439bb63c8e76f4d88eeda752ff4a7791aebd746 Signed-off-by: Kamal Agrawal --- adreno_gen8_snapshot.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index bbc3dc099d..e257bc082a 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -168,7 +168,7 @@ size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, struct kgsl_snapshot_mvc_regs_v3 *header = (struct kgsl_snapshot_mvc_regs_v3 *)buf; u32 *data = (u32 *)(buf + sizeof(*header)); - u32 size = adreno_snapshot_regs_count(ptr) * sizeof(*data); + u32 size = (adreno_snapshot_regs_count(ptr) * sizeof(*data)) + sizeof(*header); u32 count, k; if (remain < size) { @@ -206,7 +206,7 @@ size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, kgsl_regread(device, k, data++); } - return (size + sizeof(*header)); + return size; } static size_t gen8_snapshot_registers(struct kgsl_device *device, u8 *buf, @@ -219,7 +219,7 @@ static size_t gen8_snapshot_registers(struct kgsl_device *device, u8 *buf, u32 *data = (u32 *)(buf + sizeof(*header)); u32 *src; u32 cnt; - u32 size = adreno_snapshot_regs_count(ptr) * sizeof(*data); + u32 size = (adreno_snapshot_regs_count(ptr) * sizeof(*data)) + sizeof(*header); if (remain < size) { SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); From 0fe69b8f93a1f0df5de485c3250c320e4cbc176c Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Wed, 7 Feb 2024 10:06:50 -0800 Subject: [PATCH 0740/1016] kgsl: Provide statistics on GPU faults Track the GPU faults of each type and the proceeses that generate them. Provide sysfs nodes to read out the number of various GPU fault types that occurred since device reboot and a list of processes ranked according to the number of faults they generated. Change-Id: I910b766b6d6e631e20857b6d25663ba3ef87d09b Signed-off-by: Lynus Vaz --- adreno.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++ adreno.h | 34 ++++++++++++++++++ adreno_dispatch.c | 2 ++ adreno_hwsched.c | 5 +++ adreno_sysfs.c | 48 +++++++++++++++++++++++++ kgsl_device.h | 2 ++ 6 files changed, 180 insertions(+) diff --git a/adreno.c b/adreno.c index 7e0a3a299b..bd7e519e3a 100644 --- a/adreno.c +++ b/adreno.c @@ -1138,6 +1138,7 @@ static void adreno_setup_device(struct adreno_device *adreno_dev) INIT_LIST_HEAD(&adreno_dev->active_list); spin_lock_init(&adreno_dev->active_list_lock); + rwlock_init(&adreno_dev->fault_stats_lock); for (i = 0; i < ARRAY_SIZE(adreno_dev->ringbuffers); i++) { struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[i]; @@ -3320,6 +3321,94 @@ u64 adreno_read_cx_timer(struct adreno_device *adreno_dev) return arch_timer_read_counter(); } +static void add_proc_fault_list(struct adreno_device *adreno_dev, + struct kgsl_process_private *proc_priv) +{ + int cur_idx = -1; + int new_idx = -1; + int i; + + /* + * Add the process to array of faulting processes. This array is sorted (processes with + * more faults are higher ranked). + */ + write_lock(&adreno_dev->fault_stats_lock); + proc_priv->fault_count++; + + for (i = ARRAY_SIZE(adreno_dev->fault_procs) - 1; i >= 0; i--) { + struct adreno_fault_proc *proc = &adreno_dev->fault_procs[i]; + + /* Check whether this process is already tracked in the array */ + if (!strcmp(proc->comm, proc_priv->comm)) { + proc_priv->fault_count = ++proc->fault_count; + cur_idx = i; + break; + } + + /* Find left most entry with lower fault count than current process */ + if (proc->fault_count < proc_priv->fault_count) + new_idx = i; + } + + /* + * If the process is not currently tracked, we can place it at the left most index that has + * a fault count less than its fault count. If such an index does not exist, the array is + * already full. + */ + if (cur_idx < 0) { + if (new_idx >= 0) + goto write_new_idx; + goto unlock; + } + + /* Find the left most entry (if any) with a lower fault_count */ + for (new_idx = 0; new_idx < cur_idx; new_idx++) { + if (adreno_dev->fault_procs[new_idx].fault_count < proc_priv->fault_count) + break; + } + + /* Check if the current process is already at its correct position */ + if (new_idx == cur_idx) + goto unlock; + + /* Swap the two entries */ + adreno_dev->fault_procs[cur_idx].fault_count = + adreno_dev->fault_procs[new_idx].fault_count; + strscpy(adreno_dev->fault_procs[cur_idx].comm, adreno_dev->fault_procs[new_idx].comm, + sizeof(adreno_dev->fault_procs[cur_idx].comm)); + +write_new_idx: + adreno_dev->fault_procs[new_idx].fault_count = proc_priv->fault_count; + strscpy(adreno_dev->fault_procs[new_idx].comm, proc_priv->comm, + sizeof(adreno_dev->fault_procs[new_idx].comm)); + +unlock: + write_unlock(&adreno_dev->fault_stats_lock); +} + +void adreno_gpufault_stats(struct adreno_device *adreno_dev, + struct kgsl_drawobj *drawobj, struct kgsl_drawobj *drawobj_lpac, int fault) +{ + int i; + struct kgsl_process_private *proc_priv = NULL, *proc_priv_lpac; + + write_lock(&adreno_dev->fault_stats_lock); + for (i = 0; i < ARRAY_SIZE(adreno_dev->fault_counts); i++) + if (fault & BIT(i)) + adreno_dev->fault_counts[i]++; + write_unlock(&adreno_dev->fault_stats_lock); + + if (drawobj) { + proc_priv = drawobj->context->proc_priv; + add_proc_fault_list(adreno_dev, proc_priv); + } + if (drawobj_lpac) { + proc_priv_lpac = drawobj_lpac->context->proc_priv; + if (proc_priv_lpac != proc_priv) + add_proc_fault_list(adreno_dev, proc_priv_lpac); + } +} + static const struct kgsl_functable adreno_functable = { /* Mandatory functions */ .suspend_context = adreno_suspend_context, diff --git a/adreno.h b/adreno.h index 0ef1f89c91..70351591a2 100644 --- a/adreno.h +++ b/adreno.h @@ -257,6 +257,7 @@ enum adreno_gpurev { #define ADRENO_GMU_FAULT BIT(5) #define ADRENO_CTX_DETATCH_TIMEOUT_FAULT BIT(6) #define ADRENO_GMU_FAULT_SKIP_SNAPSHOT BIT(7) +#define ADRENO_FAULT_TYPES 8 /** * Bit fields for GPU_CX_MISC_CX_AHB_*_CNTL registers @@ -514,6 +515,18 @@ struct adreno_dispatch_ops { u32 (*get_fault)(struct adreno_device *adreno_dev); }; +/** + * struct adreno_fault_proc - Structure to hold data on faulting processes + */ +struct adreno_fault_proc { + /** @comm: Task name of the faulting process */ + char comm[TASK_COMM_LEN]; + /** @fault_count: Count of the GPU faults from this process */ + u32 fault_count; +}; + +#define ADRENO_MAX_FAULTING_PROCS 10 + /** * struct adreno_device - The mothership structure for all adreno related info * @dev: Reference to struct kgsl_device @@ -729,6 +742,15 @@ struct adreno_device { * If set, a timeout will occur in 2 ^ (ahb_timeout_val + 1) cycles. */ u32 ahb_timeout_val; + /** @fault_counts: Keep track of GPU faults */ + u32 fault_counts[ADRENO_FAULT_TYPES]; + /** + * @fault_procs: Array to keep track of per process GPU fault count sorted by the number + * of GPU faults + */ + struct adreno_fault_proc fault_procs[ADRENO_MAX_FAULTING_PROCS]; + /** @fault_stats_lock: A R/W lock to protect GPU fault statistics */ + rwlock_t fault_stats_lock; }; /** @@ -1988,4 +2010,16 @@ static inline void adreno_llcc_slice_deactivate(struct adreno_device *adreno_dev if (adreno_dev->gpuhtw_llc_slice_enable && !IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); } + +/** + * adreno_gpufault_stats - Update GPU fault statistics + * @adreno_dev: Adreno device handle + * @drawobj: GC drawobj that caused the fault + * @drawobj_lpac: LPAC drawobj that caused the fault + * @fault: Fault type + * + * Update statistics about GPU faults + */ +void adreno_gpufault_stats(struct adreno_device *adreno_dev, + struct kgsl_drawobj *drawobj, struct kgsl_drawobj *drawobj_lpac, int fault); #endif /*__ADRENO_H */ diff --git a/adreno_dispatch.c b/adreno_dispatch.c index 887d29e927..1cceaec899 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -1991,6 +1991,8 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) if (!(fault & ADRENO_GMU_FAULT_SKIP_SNAPSHOT)) do_header_and_snapshot(device, fault, hung_rb, cmdobj); + adreno_gpufault_stats(adreno_dev, cmdobj ? DRAWOBJ(cmdobj) : NULL, NULL, fault); + /* Turn off the KEEPALIVE vote from the ISR for hard fault */ if (gpudev->gpu_keepalive && fault & ADRENO_HARD_FAULT) gpudev->gpu_keepalive(adreno_dev, false); diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 367d7e58a5..d48dee8bab 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1729,6 +1729,8 @@ static void adreno_hwsched_reset_and_snapshot_legacy(struct adreno_device *adren drawobj = NULL; } + adreno_gpufault_stats(adreno_dev, drawobj, NULL, fault); + if (!drawobj) { if (fault & ADRENO_GMU_FAULT) gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); @@ -1822,6 +1824,8 @@ static void adreno_hwsched_reset_and_snapshot(struct adreno_device *adreno_dev, gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); else kgsl_device_snapshot(device, NULL, NULL, false); + + adreno_gpufault_stats(adreno_dev, NULL, NULL, fault); goto done; } @@ -1837,6 +1841,7 @@ static void adreno_hwsched_reset_and_snapshot(struct adreno_device *adreno_dev, } kgsl_device_snapshot(device, context, context_lpac, false); + adreno_gpufault_stats(adreno_dev, drawobj, drawobj_lpac, fault); if (drawobj) { force_retire_timestamp(device, drawobj); diff --git a/adreno_sysfs.c b/adreno_sysfs.c index d13ebb9e03..9fab257c9a 100644 --- a/adreno_sysfs.c +++ b/adreno_sysfs.c @@ -316,6 +316,50 @@ static int _lpac_store(struct adreno_device *adreno_dev, bool val) return -EINVAL; } +static ssize_t gpufaults_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + size_t count = 0; + int i; + + read_lock(&adreno_dev->fault_stats_lock); + for (i = 0; i < ARRAY_SIZE(adreno_dev->fault_counts); i++) + count += scnprintf(buf + count, PAGE_SIZE - 2 - count, "%u ", + adreno_dev->fault_counts[i]); + read_unlock(&adreno_dev->fault_stats_lock); + + buf[count++] = '\n'; + return count; +} + +static ssize_t gpufault_procs_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + size_t count = 0; + int i; + + read_lock(&adreno_dev->fault_stats_lock); + for (i = 0; i < ARRAY_SIZE(adreno_dev->fault_procs); i++) { + struct adreno_fault_proc *proc = &adreno_dev->fault_procs[i]; + + if (!proc->fault_count) + break; + + count += scnprintf(buf + count, PAGE_SIZE - 1 - count, "%s %u\n", + proc->comm, proc->fault_count); + + if (count >= PAGE_SIZE - 1) + break; + } + read_unlock(&adreno_dev->fault_stats_lock); + + return count; +} + ssize_t adreno_sysfs_store_u32(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -403,6 +447,8 @@ static ADRENO_SYSFS_BOOL(touch_wake); static ADRENO_SYSFS_BOOL(gmu_ab); static DEVICE_ATTR_RO(gpu_model); +static DEVICE_ATTR_RO(gpufaults); +static DEVICE_ATTR_RO(gpufault_procs); static const struct attribute *_attr_list[] = { &adreno_attr_ft_policy.attr.attr, @@ -429,6 +475,8 @@ static const struct attribute *_attr_list[] = { &adreno_attr_touch_wake.attr.attr, &adreno_attr_gmu_ab.attr.attr, &adreno_attr_clx.attr.attr, + &dev_attr_gpufaults.attr, + &dev_attr_gpufault_procs.attr, NULL, }; diff --git a/kgsl_device.h b/kgsl_device.h index 6b2d4300ea..9953dbcaaf 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -545,6 +545,8 @@ struct kgsl_process_private { * @cmdline: Cmdline string of the process */ char *cmdline; + /** @fault_count: Count of GPU faults from this process */ + u32 fault_count; }; struct kgsl_device_private { From 061b4899f2608c8d2eb734110a3a3576c690db2b Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 2 Feb 2024 14:58:35 -0800 Subject: [PATCH 0741/1016] kgsl: mmu: Add sysfs nodes for pagefault statistics Track the number of pagefaults since device boot and the processes that generate these pagefaults. Add sysfs nodes to allow userspace to read the number of pagefaults of each type and the top pagefaulting processes. Change-Id: I2da03f8027fb3a152b1fd7cb7a68d052d88febd2 Signed-off-by: Lynus Vaz --- kgsl.c | 1 + kgsl_device.h | 4 + kgsl_iommu.c | 208 ++++++++++++++++++++++++++++++++++++++++++++++++++ kgsl_iommu.h | 26 +++++++ kgsl_mmu.h | 13 +++- 5 files changed, 251 insertions(+), 1 deletion(-) diff --git a/kgsl.c b/kgsl.c index 32bdeb16e3..3c45100254 100644 --- a/kgsl.c +++ b/kgsl.c @@ -5220,6 +5220,7 @@ int kgsl_device_platform_probe(struct kgsl_device *device) /* Initialize common sysfs entries */ kgsl_pwrctrl_init_sysfs(device); + kgsl_mmu_sysfs_init(&device->mmu); timer_setup(&device->work_period_timer, kgsl_work_period_timer, 0); spin_lock_init(&device->work_period_lock); diff --git a/kgsl_device.h b/kgsl_device.h index 9953dbcaaf..56c547ad21 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -547,6 +547,10 @@ struct kgsl_process_private { char *cmdline; /** @fault_count: Count of GPU faults from this process */ u32 fault_count; + /** @pf_count: Total count of pagefaults from this process */ + u32 pf_count; + /** @pf_type_counts: Count of pagefaults of each type from this process */ + u32 pf_type_counts[KGSL_IOMMU_PAGEFAULT_TYPES]; }; struct kgsl_device_private { diff --git a/kgsl_iommu.c b/kgsl_iommu.c index b3a6ed25fc..aff5cb0c31 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -976,6 +976,92 @@ static void kgsl_iommu_add_fault_info(struct kgsl_context *context, kfree(report); } +static void _increment_pf_counts(struct kgsl_iommu *iommu, + struct kgsl_process_private *proc_priv, int flags) +{ + u32 type; + int cur_idx = -1; + int new_idx = -1; + int i; + + write_lock(&iommu->pf_stats_lock); + iommu->pf_type_counts[!!(flags & IOMMU_FAULT_WRITE)]++; + + type = ffs(flags & ~IOMMU_FAULT_WRITE) - 1; + if (type < ARRAY_SIZE(iommu->pf_type_counts)) + iommu->pf_type_counts[type]++; + + if (!proc_priv) + goto unlock; + + /* + * Add the process to array of pagefaulting processes. This array is sorted (processes with + * more pagefaults are higher ranked). + */ + proc_priv->pf_count++; + proc_priv->pf_type_counts[!!(flags & IOMMU_FAULT_WRITE)]++; + if (type < ARRAY_SIZE(proc_priv->pf_type_counts)) + proc_priv->pf_type_counts[type]++; + + for (i = ARRAY_SIZE(iommu->pf_procs) - 1; i >= 0; i--) { + struct kgsl_iommu_pf_proc *proc = &iommu->pf_procs[i]; + + /* Check whether this process is already tracked in the array */ + if (!strcmp(proc->comm, proc_priv->comm)) { + proc_priv->pf_count = ++proc->pf_count; + proc_priv->pf_type_counts[!!(flags & IOMMU_FAULT_WRITE)] = + ++proc->pf_type_counts[!!(flags & IOMMU_FAULT_WRITE)]; + if (type < ARRAY_SIZE(proc_priv->pf_type_counts)) + proc_priv->pf_type_counts[type] = ++proc->pf_type_counts[type]; + + cur_idx = i; + break; + } + + /* Find left most entry with lower fault count than current process */ + if (proc->pf_count < proc_priv->pf_count) + new_idx = i; + } + + /* + * If the process is not currently tracked, we can place it at the left most index that has + * a fault count less than its fault count. If such an index does not exist, the array is + * already full. + */ + if (cur_idx < 0) { + if (new_idx >= 0) + goto write_new_idx; + goto unlock; + } + + /* Find the left most entry (if any) with a lower pf_count */ + for (new_idx = 0; new_idx < cur_idx; new_idx++) { + if (iommu->pf_procs[new_idx].pf_count < proc_priv->pf_count) + break; + } + + /* Check if the current process is already at its correct position */ + if (new_idx == cur_idx) + goto unlock; + + /* Swap the two entries */ + iommu->pf_procs[cur_idx].pf_count = iommu->pf_procs[new_idx].pf_count; + memcpy(iommu->pf_procs[cur_idx].pf_type_counts, iommu->pf_procs[new_idx].pf_type_counts, + sizeof(iommu->pf_procs[cur_idx].pf_type_counts)); + strscpy(iommu->pf_procs[cur_idx].comm, iommu->pf_procs[new_idx].comm, + sizeof(iommu->pf_procs[cur_idx].comm)); + +write_new_idx: + iommu->pf_procs[new_idx].pf_count = proc_priv->pf_count; + memcpy(iommu->pf_procs[new_idx].pf_type_counts, proc_priv->pf_type_counts, + sizeof(iommu->pf_procs[new_idx].pf_type_counts)); + strscpy(iommu->pf_procs[new_idx].comm, proc_priv->comm, + sizeof(iommu->pf_procs[new_idx].comm)); + +unlock: + write_unlock(&iommu->pf_stats_lock); +} + static void kgsl_iommu_print_fault(struct kgsl_mmu *mmu, struct kgsl_iommu_context *ctxt, unsigned long addr, u64 ptbase, u32 contextid, @@ -1011,6 +1097,7 @@ static void kgsl_iommu_print_fault(struct kgsl_mmu *mmu, else fault_type = "unknown"; + _increment_pf_counts(&mmu->iommu, private, flags); /* FIXME: This seems buggy */ if (test_bit(KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE, &mmu->pfpolicy)) @@ -2721,6 +2808,8 @@ int kgsl_iommu_bind(struct kgsl_device *device, struct platform_device *pdev) if (!kgsl_vbo_zero_page) clear_bit(KGSL_MMU_SUPPORT_VBO, &mmu->features); + rwlock_init(&iommu->pf_stats_lock); + return 0; err: @@ -2733,6 +2822,124 @@ err: return ret; } +/* Update this array if KGSL_IOMMU_PAGEFAULT_TYPES increases */ +static const char *pf_types[KGSL_IOMMU_PAGEFAULT_TYPES] = { + [0] = "READ", /* Read fault */ + [1] = "WRITE", /* Write fault */ + [ilog2(IOMMU_FAULT_TRANSLATION)] = "TRANS", + [ilog2(IOMMU_FAULT_PERMISSION)] = "PERM", + [ilog2(IOMMU_FAULT_EXTERNAL)] = "EXT", + [ilog2(IOMMU_FAULT_TRANSACTION_STALLED)] = "STALL", +}; + +static ssize_t pagefaults_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + int i; + size_t num_chars = 0; + + /* Print out a header */ + num_chars += scnprintf(buf + num_chars, + PAGE_SIZE - num_chars - 2, + "%10s", "Total"); + + for (i = 0; i < ARRAY_SIZE(pf_types); i++) { + num_chars += scnprintf(buf + num_chars, + PAGE_SIZE - num_chars - 2, + "|%10s", pf_types[i]); + } + buf[num_chars++] = '\n'; + + read_lock(&iommu->pf_stats_lock); + + /* Sum of R and W pagefaults is the total */ + num_chars += scnprintf(buf + num_chars, + PAGE_SIZE - num_chars - 2, "%10u", + iommu->pf_type_counts[0] + iommu->pf_type_counts[1]); + + /* Print out count of each pagefault type */ + for (i = 0; i < ARRAY_SIZE(iommu->pf_type_counts); i++) { + num_chars += scnprintf(buf + num_chars, + PAGE_SIZE - num_chars - 2, + " %10u", iommu->pf_type_counts[i]); + + if (num_chars >= PAGE_SIZE - 2) + break; + } + + read_unlock(&iommu->pf_stats_lock); + + buf[num_chars++] = '\n'; + return num_chars; +} + +static ssize_t pagefault_procs_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + size_t num_chars = 0; + int i; + + /* Print out a header */ + num_chars += scnprintf(buf + num_chars, + PAGE_SIZE - num_chars - 2, + "%16s|%10s", "Process", "Total"); + + for (i = 0; i < ARRAY_SIZE(pf_types); i++) { + num_chars += scnprintf(buf + num_chars, + PAGE_SIZE - num_chars - 2, + "|%10s", pf_types[i]); + } + buf[num_chars++] = '\n'; + + read_lock(&iommu->pf_stats_lock); + for (i = 0; i < ARRAY_SIZE(iommu->pf_procs); i++) { + struct kgsl_iommu_pf_proc *proc = &iommu->pf_procs[i]; + int j; + + /* Array is sorted, so break on first 0 */ + if (!proc->pf_count) + break; + + num_chars += scnprintf(buf + num_chars, + PAGE_SIZE - num_chars - 1, "%16s %10u", + proc->comm, proc->pf_count); + + for (j = 0; j < ARRAY_SIZE(proc->pf_type_counts); j++) + num_chars += scnprintf(buf + num_chars, + PAGE_SIZE - num_chars - 1, " %10u", + proc->pf_type_counts[j]); + + if (num_chars < PAGE_SIZE - 1) + buf[num_chars++] = '\n'; + + if (num_chars >= PAGE_SIZE - 1) + break; + } + read_unlock(&iommu->pf_stats_lock); + + return num_chars; +} + +static DEVICE_ATTR_RO(pagefaults); +static DEVICE_ATTR_RO(pagefault_procs); + +static const struct attribute *iommu_sysfs_attr_list[] = { + &dev_attr_pagefaults.attr, + &dev_attr_pagefault_procs.attr, + NULL +}; + +static void kgsl_iommu_sysfs_init(struct kgsl_mmu *mmu) +{ + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); + + WARN_ON(sysfs_create_files(&device->dev->kobj, iommu_sysfs_attr_list)); +} + static const struct kgsl_mmu_ops kgsl_iommu_ops = { .mmu_close = kgsl_iommu_close, .mmu_start = kgsl_iommu_start, @@ -2745,6 +2952,7 @@ static const struct kgsl_mmu_ops kgsl_iommu_ops = { .mmu_getpagetable = kgsl_iommu_getpagetable, .mmu_map_global = kgsl_iommu_map_global, .mmu_send_tlb_hint = kgsl_iommu_send_tlb_hint, + .mmu_sysfs_init = kgsl_iommu_sysfs_init, }; static const struct kgsl_mmu_pt_ops iopgtbl_pt_ops = { diff --git a/kgsl_iommu.h b/kgsl_iommu.h index 4f70ef5c34..cdc7e528a9 100644 --- a/kgsl_iommu.h +++ b/kgsl_iommu.h @@ -8,6 +8,7 @@ #include #include +#include /* * These defines control the address range for allocations that * are mapped into all pagetables. @@ -114,6 +115,8 @@ /* offset at which a nop command is placed in setstate */ #define KGSL_IOMMU_SETSTATE_NOP_OFFSET 1024 +#define KGSL_IOMMU_PAGEFAULT_TYPES (ilog2(IOMMU_FAULT_TRANSACTION_STALLED) + 1) + /* * struct kgsl_iommu_context - Structure holding data about an iommu context * bank @@ -138,6 +141,20 @@ struct kgsl_iommu_context { struct adreno_smmu_priv adreno_smmu; }; +/* + * struct kgsl_iommu_pf_proc - Structure to hold data on pagefaulting processes + */ +struct kgsl_iommu_pf_proc { + /** @comm: Task name of the pagefaulting process */ + char comm[TASK_COMM_LEN]; + /** @pf_count: Total count of pagefaults from this process */ + u32 pf_count; + /** @pf_type_counts: Count of pagefaults of each type from this process */ + u32 pf_type_counts[KGSL_IOMMU_PAGEFAULT_TYPES]; +}; + +#define KGSL_IOMMU_MAX_PF_PROCS 10 + /* * struct kgsl_iommu - Structure holding iommu data for kgsl driver * @regbase: Virtual address of the IOMMU register base @@ -175,6 +192,15 @@ struct kgsl_iommu { u32 pagesize; /** @cx_regulator: CX regulator handle in case the IOMMU needs it */ struct regulator *cx_regulator; + /** @pf_type_counts: Keep track of pagefaults */ + u32 pf_type_counts[KGSL_IOMMU_PAGEFAULT_TYPES]; + /** + * @pf_procs: Array to keep track of per process pagefault count sorted by the number + * of pagefaults + */ + struct kgsl_iommu_pf_proc pf_procs[KGSL_IOMMU_MAX_PF_PROCS]; + /** @pf_stats_lock: A R/W lock to protect pagefault statistics */ + rwlock_t pf_stats_lock; }; /* diff --git a/kgsl_mmu.h b/kgsl_mmu.h index a5c8be7385..cbdd102134 100644 --- a/kgsl_mmu.h +++ b/kgsl_mmu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_MMU_H #define __KGSL_MMU_H @@ -116,6 +116,7 @@ struct kgsl_mmu_ops { void (*mmu_map_global)(struct kgsl_mmu *mmu, struct kgsl_memdesc *memdesc, u32 padding); void (*mmu_send_tlb_hint)(struct kgsl_mmu *mmu, bool hint); + void (*mmu_sysfs_init)(struct kgsl_mmu *mmu); }; struct kgsl_mmu_pt_ops { @@ -432,4 +433,14 @@ static inline int kgsl_iommu_bind(struct kgsl_device *device, struct platform_de ssize_t kgsl_mmu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot); + +/** + * kgsl_mmu_sysfs_init - Initialize sysfs nodes for the MMU + * @mmu: A KGSL MMU handle + */ +static inline void kgsl_mmu_sysfs_init(struct kgsl_mmu *mmu) +{ + if (MMU_OP_VALID(mmu, mmu_sysfs_init)) + mmu->mmu_ops->mmu_sysfs_init(mmu); +} #endif /* __KGSL_MMU_H */ From 828c2ab27c397744095049abfa717a6e7a50fc9d Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 31 Mar 2024 12:02:33 +0530 Subject: [PATCH 0742/1016] kgsl: gen8: Log current IB and AB in kgsl_buslevel trace This change ports commit 425c645fd3e8 ("kgsl: Log current IB and AB in kgsl_buslevel trace") to gen8. Change-Id: I0b40ce521b90f7cdf9fd4ec52b3e01ef81663b0e Signed-off-by: Kamal Agrawal --- adreno_gen8_gmu.c | 2 +- adreno_gen8_hwsched.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index a6130ec696..729785a472 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -2100,7 +2100,7 @@ static int gen8_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, pwr->cur_ab = ab; } - trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel, ab); + trace_kgsl_buslevel(device, pwr->active_pwrlevel, pwr->cur_buslevel, pwr->cur_ab); return ret; } diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 6a48d61981..f2ee6bbe22 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -1545,7 +1545,7 @@ static int gen8_hwsched_bus_set(struct adreno_device *adreno_dev, int buslevel, pwr->cur_ab = ab; } - trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel, ab); + trace_kgsl_buslevel(device, pwr->active_pwrlevel, pwr->cur_buslevel, pwr->cur_ab); return ret; } From 8a07c62a2f4d066c64f178d5c827f202ed7e2c58 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Tue, 12 Mar 2024 10:34:55 -0700 Subject: [PATCH 0743/1016] kgsl: hwsched: Add support for nonfatal F2H_MSG_CONTEXT_BAD packet There are few GPU RBBM non fatal error interrupts handled at GMU where it sends the F2H_MSG_CONTEXT_BAD packet to just log the error message instead of reset and recovery. Hence add required support for gen7 and gen8 targets to handle non fatal F2H_MSG_CONTEXT_BAD packet to log error message. Change-Id: I2c73b7ef1c524f1ec159dafb36aa476fed23283e Signed-off-by: Hareesh Gundu --- adreno.c | 14 ++++++- adreno.h | 16 ++++++++ adreno_a5xx.c | 6 +-- adreno_a6xx.c | 4 +- adreno_a6xx_gmu.c | 4 +- adreno_a6xx_hwsched.c | 4 +- adreno_a6xx_rgmu.c | 4 +- adreno_gen7_gmu.c | 4 +- adreno_gen7_hwsched.c | 8 +++- adreno_gen7_hwsched_hfi.c | 22 +++++++++-- adreno_gen8_gmu.c | 4 +- adreno_gen8_hwsched.c | 8 +++- adreno_gen8_hwsched_hfi.c | 22 +++++++++-- adreno_hfi.h | 10 +++++ adreno_hwsched.c | 79 +++++++++++++++++++++++++++++++++++++++ adreno_hwsched.h | 12 ++++++ kgsl_pwrctrl.c | 10 +++-- 17 files changed, 200 insertions(+), 31 deletions(-) diff --git a/adreno.c b/adreno.c index e5e180721f..2da2434980 100644 --- a/adreno.c +++ b/adreno.c @@ -357,6 +357,9 @@ void adreno_irqctrl(struct adreno_device *adreno_dev, int state) { const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + if (!adreno_dev->irq_mask) + return; + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK, state ? adreno_dev->irq_mask : 0); @@ -1169,6 +1172,15 @@ static const struct of_device_id adreno_component_match[] = { {}, }; +static int adreno_irq_setup(struct platform_device *pdev, + struct adreno_device *adreno_dev) +{ + if (!adreno_dev->irq_mask) + return 0; + + return kgsl_request_irq(pdev, "kgsl_3d0_irq", adreno_irq_handler, KGSL_DEVICE(adreno_dev)); +} + int adreno_device_probe(struct platform_device *pdev, struct adreno_device *adreno_dev) { @@ -1268,7 +1280,7 @@ int adreno_device_probe(struct platform_device *pdev, if (status) goto err_remove_llcc; - status = kgsl_request_irq(pdev, "kgsl_3d0_irq", adreno_irq_handler, device); + status = adreno_irq_setup(pdev, adreno_dev); if (status < 0) goto err_unbind; diff --git a/adreno.h b/adreno.h index 70351591a2..a4bf3e9321 100644 --- a/adreno.h +++ b/adreno.h @@ -2022,4 +2022,20 @@ static inline void adreno_llcc_slice_deactivate(struct adreno_device *adreno_dev */ void adreno_gpufault_stats(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj, struct kgsl_drawobj *drawobj_lpac, int fault); + +/** + * adreno_irq_free - Free an interrupt allocated for GPU + * @adreno_dev: Adreno device handle + */ +static inline void adreno_irq_free(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!(adreno_dev->irq_mask || device->pwrctrl.interrupt_num)) + return; + + devm_free_irq(&device->pdev->dev, device->pwrctrl.interrupt_num, device); + adreno_dev->irq_mask = 0; + device->pwrctrl.interrupt_num = 0; +} #endif /*__ADRENO_H */ diff --git a/adreno_a5xx.c b/adreno_a5xx.c index e6d0e577a7..e48c93a2f2 100644 --- a/adreno_a5xx.c +++ b/adreno_a5xx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -103,6 +103,8 @@ static int a5xx_probe(struct platform_device *pdev, INIT_WORK(&device->idle_check_ws, kgsl_idle_check); + adreno_dev->irq_mask = A5XX_INT_MASK; + ret = adreno_device_probe(pdev, adreno_dev); if (ret) return ret; @@ -1278,8 +1280,6 @@ static int a5xx_start(struct adreno_device *adreno_dev) adreno_get_bus_counters(adreno_dev); adreno_perfcounter_restore(adreno_dev); - adreno_dev->irq_mask = A5XX_INT_MASK; - if (adreno_is_a530(adreno_dev) && ADRENO_FEATURE(adreno_dev, ADRENO_LM)) adreno_perfcounter_kernel_get(adreno_dev, diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 7f26022d8d..e9e84067bb 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -1929,6 +1929,8 @@ static int a6xx_probe(struct platform_device *pdev, memset(adreno_dev, 0, sizeof(*adreno_dev)); + adreno_dev->irq_mask = A6XX_INT_MASK; + ret = a6xx_probe_common(pdev, adreno_dev, chipid, gpucore); if (ret) return ret; @@ -1943,8 +1945,6 @@ static int a6xx_probe(struct platform_device *pdev, INIT_WORK(&device->idle_check_ws, kgsl_idle_check); - adreno_dev->irq_mask = A6XX_INT_MASK; - return 0; } diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index d3148a2efc..8903c326f9 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -3681,6 +3681,8 @@ int a6xx_gmu_device_probe(struct platform_device *pdev, adreno_dev = &a6xx_dev->adreno_dev; + adreno_dev->irq_mask = A6XX_INT_MASK; + ret = a6xx_probe_common(pdev, adreno_dev, chipid, gpucore); if (ret) return ret; @@ -3695,8 +3697,6 @@ int a6xx_gmu_device_probe(struct platform_device *pdev, timer_setup(&device->idle_timer, gmu_idle_timer, 0); - adreno_dev->irq_mask = A6XX_INT_MASK; - return 0; } diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index fec902f7a4..5b354fdb2d 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -1287,6 +1287,8 @@ int a6xx_hwsched_probe(struct platform_device *pdev, adreno_dev->hwsched_enabled = true; + adreno_dev->irq_mask = A6XX_HWSCHED_INT_MASK; + ret = a6xx_probe_common(pdev, adreno_dev, chipid, gpucore); if (ret) return ret; @@ -1297,8 +1299,6 @@ int a6xx_hwsched_probe(struct platform_device *pdev, timer_setup(&device->idle_timer, hwsched_idle_timer, 0); - adreno_dev->irq_mask = A6XX_HWSCHED_INT_MASK; - return adreno_hwsched_init(adreno_dev, &a6xx_hwsched_ops); } diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index ac8d05cc3f..de71a099a8 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -1295,6 +1295,8 @@ int a6xx_rgmu_device_probe(struct platform_device *pdev, adreno_dev = &a6xx_dev->adreno_dev; + adreno_dev->irq_mask = A6XX_INT_MASK; + ret = a6xx_probe_common(pdev, adreno_dev, chipid, gpucore); if (ret) return ret; @@ -1309,8 +1311,6 @@ int a6xx_rgmu_device_probe(struct platform_device *pdev, timer_setup(&device->idle_timer, rgmu_idle_timer, 0); - adreno_dev->irq_mask = A6XX_INT_MASK; - return 0; } diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 5070728ff8..a607b42be5 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -3318,6 +3318,8 @@ int gen7_gmu_device_probe(struct platform_device *pdev, adreno_dev = &gen7_dev->adreno_dev; + adreno_dev->irq_mask = GEN7_INT_MASK; + ret = gen7_probe_common(pdev, adreno_dev, chipid, gpucore); if (ret) return ret; @@ -3339,8 +3341,6 @@ int gen7_gmu_device_probe(struct platform_device *pdev, adreno_dev->dms_enabled = true; } - adreno_dev->irq_mask = GEN7_INT_MASK; - return 0; } diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 2c10eae200..702ac42797 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -535,6 +535,10 @@ static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) /* Vote for minimal DDR BW for GMU to init */ level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min; + /* From this GMU FW all RBBM interrupts are handled at GMU */ + if (gmu->ver.core >= GMU_VERSION(5, 01, 06)) + adreno_irq_free(adreno_dev); + icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); /* Clear any hwsched faults that might have been left over */ @@ -1850,6 +1854,8 @@ int gen7_hwsched_probe(struct platform_device *pdev, adreno_dev->hwsched_enabled = true; + adreno_dev->irq_mask = GEN7_HWSCHED_INT_MASK; + ret = gen7_probe_common(pdev, adreno_dev, chipid, gpucore); if (ret) return ret; @@ -1860,8 +1866,6 @@ int gen7_hwsched_probe(struct platform_device *pdev, timer_setup(&device->idle_timer, hwsched_idle_timer, 0); - adreno_dev->irq_mask = GEN7_HWSCHED_INT_MASK; - if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) adreno_dev->lpac_enabled = true; diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 7f3bbfae7e..16fa234125 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -574,12 +574,16 @@ static u32 get_payload_rb_key(struct adreno_device *adreno_dev, return 0; } -static void log_gpu_fault(struct adreno_device *adreno_dev) +static bool log_gpu_fault(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct device *dev = &gmu->pdev->dev; struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + /* Return false for non fatal errors */ + if (adreno_hwsched_log_nonfatal_gpu_fault(adreno_dev, dev, cmd->error)) + return false; + switch (cmd->error) { case GMU_GPU_HW_HANG: dev_crit_ratelimited(dev, "MISC: GPU hang detected\n"); @@ -746,6 +750,9 @@ static void log_gpu_fault(struct adreno_device *adreno_dev) cmd->error); break; } + + /* Return true for fatal errors to perform recovery sequence */ + return true; } static bool is_queue_empty(struct adreno_device *adreno_dev, u32 queue_idx) @@ -790,11 +797,18 @@ static void process_ctx_bad(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - if (GMU_VER_MINOR(gmu->ver.hfi) < 2) + if (GMU_VER_MINOR(gmu->ver.hfi) < 2) { log_gpu_fault_legacy(adreno_dev); - else - log_gpu_fault(adreno_dev); + goto done; + } + /* Non fatal RBBM error interrupts don't go through reset and recovery */ + if (!log_gpu_fault(adreno_dev)) { + memset(adreno_dev->hwsched.ctxt_bad, 0x0, HFI_MAX_MSG_SIZE); + return; + } + +done: gen7_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); } diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index a6130ec696..f9d25b41b4 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -3150,6 +3150,8 @@ int gen8_gmu_device_probe(struct platform_device *pdev, adreno_dev = &gen8_dev->adreno_dev; + adreno_dev->irq_mask = GEN8_INT_MASK; + ret = gen8_probe_common(pdev, adreno_dev, chipid, gpucore); if (ret) return ret; @@ -3169,8 +3171,6 @@ int gen8_gmu_device_probe(struct platform_device *pdev, adreno_dev->dms_enabled = true; } - adreno_dev->irq_mask = GEN8_INT_MASK; - return 0; } diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 6a48d61981..33f1e9644e 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -593,6 +593,10 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) if (GMU_VER_MINOR(gmu->ver.hfi) >= 7) adreno_hwsched_register_hw_fence(adreno_dev); + /* From this GMU FW all RBBM interrupts are handled at GMU */ + if (gmu->ver.core >= GMU_VERSION(5, 01, 06)) + adreno_irq_free(adreno_dev); + gen8_hwsched_soccp_vote_init(adreno_dev); gen8_hwsched_soccp_vote(adreno_dev, true); @@ -1892,6 +1896,8 @@ int gen8_hwsched_probe(struct platform_device *pdev, adreno_dev->hwsched_enabled = true; + adreno_dev->irq_mask = GEN8_HWSCHED_INT_MASK; + ret = gen8_probe_common(pdev, adreno_dev, chipid, gpucore); if (ret) return ret; @@ -1902,8 +1908,6 @@ int gen8_hwsched_probe(struct platform_device *pdev, timer_setup(&device->idle_timer, hwsched_idle_timer, 0); - adreno_dev->irq_mask = GEN8_HWSCHED_INT_MASK; - if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) adreno_dev->lpac_enabled = true; diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index daffd1e098..7af4e2c275 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -566,12 +566,16 @@ static u32 get_payload_rb_key(struct adreno_device *adreno_dev, return 0; } -static void log_gpu_fault(struct adreno_device *adreno_dev) +static bool log_gpu_fault(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct device *dev = &gmu->pdev->dev; struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + /* Return false for non fatal errors */ + if (adreno_hwsched_log_nonfatal_gpu_fault(adreno_dev, dev, cmd->error)) + return false; + switch (cmd->error) { case GMU_GPU_HW_HANG: dev_crit_ratelimited(dev, "MISC: GPU hang detected\n"); @@ -738,6 +742,9 @@ static void log_gpu_fault(struct adreno_device *adreno_dev) cmd->error); break; } + + /* Return true for fatal errors to perform recovery sequence */ + return true; } static bool is_queue_empty(struct adreno_device *adreno_dev, u32 queue_idx) @@ -782,11 +789,18 @@ static void process_ctx_bad(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - if (GMU_VER_MINOR(gmu->ver.hfi) < 2) + if (GMU_VER_MINOR(gmu->ver.hfi) < 2) { log_gpu_fault_legacy(adreno_dev); - else - log_gpu_fault(adreno_dev); + goto done; + } + /* Non fatal RBBM error interrupts don't go through reset and recovery */ + if (!log_gpu_fault(adreno_dev)) { + memset(adreno_dev->hwsched.ctxt_bad, 0x0, HFI_MAX_MSG_SIZE); + return; + } + +done: gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); } diff --git a/adreno_hfi.h b/adreno_hfi.h index 385366b005..e2220cb0b8 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -1157,6 +1157,8 @@ struct payload_section { #define KEY_AQE0_HW_FAULT 12 #define KEY_AQE1_OPCODE_ERROR 13 #define KEY_AQE1_HW_FAULT 14 +#define KEY_CP_AHB_ERROR 30 +#define KEY_TSB_WRITE_ERROR 31 /* Keys for PAYLOAD_RB type payload */ #define KEY_RB_ID 1 @@ -1229,6 +1231,14 @@ struct payload_section { #define GMU_GPU_AQE1_ILLEGAL_INST_ERROR 629 /* GMU encountered a sync object which is signaled via software but not via hardware */ #define GMU_SYNCOBJ_TIMEOUT_ERROR 630 +/* Non fatal GPU error codes */ +#define GMU_CP_AHB_ERROR 650 +#define GMU_ATB_ASYNC_FIFO_OVERFLOW 651 +#define GMU_RBBM_ATB_BUF_OVERFLOW 652 +#define GMU_UCHE_OOB_ACCESS 653 +#define GMU_UCHE_TRAP_INTR 654 +#define GMU_TSB_WRITE_ERROR 655 + /* GPU encountered an unknown CP error */ #define GMU_CP_UNKNOWN_ERROR 700 diff --git a/adreno_hwsched.c b/adreno_hwsched.c index d48dee8bab..d3d9a997df 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -2383,3 +2383,82 @@ void adreno_hwsched_log_pending_hw_fences(struct adreno_device *adreno_dev, stru dev_err(dev, "%d: ctx=%llu seqno=%llu\n", i, entries[i].cmd.ctxt_id, entries[i].cmd.ts); } + +static void adreno_hwsched_lookup_key_value(struct adreno_device *adreno_dev, + u32 type, u32 key, u32 *ptr, u32 num_values) +{ + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + + if (!cmd->hdr) + return; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + /* key-value pair is 'num_values + 1' dwords */ + if ((payload->type == type) && (payload->data[i] == key)) { + u32 j = 1; + + do { + ptr[j - 1] = payload->data[i + j]; + j++; + } while (num_values--); + break; + } + + i += struct_size(payload, data, payload->dwords); + } +} + +bool adreno_hwsched_log_nonfatal_gpu_fault(struct adreno_device *adreno_dev, + struct device *dev, u32 error) +{ + bool non_fatal = true; + + switch (error) { + case GMU_CP_AHB_ERROR: { + u32 err_details[2]; + + adreno_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_AHB_ERROR, err_details, 2); + dev_crit_ratelimited(dev, + "CP: AHB bus error, CP_RL_ERROR_DETAILS_0:0x%x CP_RL_ERROR_DETAILS_1:0x%x\n", + err_details[0], err_details[1]); + break; + } + case GMU_ATB_ASYNC_FIFO_OVERFLOW: + dev_crit_ratelimited(dev, "RBBM: ATB ASYNC overflow\n"); + break; + case GMU_RBBM_ATB_BUF_OVERFLOW: + dev_crit_ratelimited(dev, "RBBM: ATB bus overflow\n"); + break; + case GMU_UCHE_OOB_ACCESS: + dev_crit_ratelimited(dev, "UCHE: Out of bounds access\n"); + break; + case GMU_UCHE_TRAP_INTR: + dev_crit_ratelimited(dev, "UCHE: Trap interrupt\n"); + break; + case GMU_TSB_WRITE_ERROR: { + u32 addr[2]; + + adreno_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_TSB_WRITE_ERROR, addr, 2); + dev_crit_ratelimited(dev, "TSB: Write error interrupt: Address: 0x%lx MID: %lu\n", + FIELD_GET(GENMASK(16, 0), addr[1]) << 32 | addr[0], + FIELD_GET(GENMASK(31, 23), addr[1])); + break; + } + default: + non_fatal = false; + break; + } + + return non_fatal; +} diff --git a/adreno_hwsched.h b/adreno_hwsched.h index b061df1321..dae80c785d 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -258,4 +258,16 @@ void adreno_hwsched_log_pending_hw_fences(struct adreno_device *adreno_dev, stru */ void adreno_hwsched_syncobj_kfence_put(struct kgsl_drawobj_sync *syncobj); +/** + * adreno_hwsched_log_nonfatal_gpu_fault - Logs non fatal GPU error from context bad hfi packet + * @adreno_dev: pointer to the adreno device + * @dev: Pointer to the struct device for the GMU platform device + * @error: Types of error that triggered from context bad HFI + * + * This function parses context bad hfi packet and logs error information. + * + * Return: True for non fatal error code else false. + */ +bool adreno_hwsched_log_nonfatal_gpu_fault(struct adreno_device *adreno_dev, + struct device *dev, u32 error); #endif diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index d55f1a3664..adb40e31cd 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1629,11 +1629,15 @@ void kgsl_pwrctrl_irq(struct kgsl_device *device, bool state) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; + if (!(device->freq_limiter_intr_num || pwr->interrupt_num)) + return; + if (state) { if (!test_and_set_bit(KGSL_PWRFLAGS_IRQ_ON, &pwr->power_flags)) { trace_kgsl_irq(device, state); - enable_irq(pwr->interrupt_num); + if (pwr->interrupt_num > 0) + enable_irq(pwr->interrupt_num); if (device->freq_limiter_intr_num > 0) enable_irq(device->freq_limiter_intr_num); } @@ -1643,9 +1647,9 @@ void kgsl_pwrctrl_irq(struct kgsl_device *device, bool state) trace_kgsl_irq(device, state); if (device->freq_limiter_intr_num > 0) disable_irq(device->freq_limiter_intr_num); - if (in_interrupt()) + if (in_interrupt() && (pwr->interrupt_num > 0)) disable_irq_nosync(pwr->interrupt_num); - else + else if (pwr->interrupt_num > 0) disable_irq(pwr->interrupt_num); } } From 368d4151762a2967155f91fc7785b954859d2b0f Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Sat, 16 Mar 2024 07:21:36 -0700 Subject: [PATCH 0744/1016] kgsl: gen8: Remove legacy F2H_MSG_CONTEXT_BAD support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All gen8 GPUs doesn’t have support for legacy F2H_MSG_CONTEXT_BAD packet in GMU FW. Hence remove related support from kgsl driver. Change-Id: I1f7d0c9dfe99a087e1d3a6b95b871c6694d28fda Signed-off-by: Hareesh Gundu --- adreno_gen8_hwsched.c | 41 +------- adreno_gen8_hwsched_hfi.c | 209 -------------------------------------- 2 files changed, 1 insertion(+), 249 deletions(-) diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 33f1e9644e..9186b020fd 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -240,38 +240,6 @@ err: SNAPSHOT_ERR_NOMEM(device, str); } -static bool parse_payload_rb_legacy(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot) -{ - struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; - u32 i = 0, payload_bytes; - void *start; - bool ret = false; - - /* Skip if we didn't receive a context bad HFI */ - if (!cmd->hdr) - return false; - - payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - - offsetof(struct hfi_context_bad_cmd_legacy, payload); - - start = &cmd->payload[0]; - - while (i < payload_bytes) { - struct payload_section *payload = start + i; - - if (payload->type == PAYLOAD_RB) { - adreno_hwsched_snapshot_rb_payload(adreno_dev, - snapshot, payload); - ret = true; - } - - i += sizeof(*payload) + (payload->dwords << 2); - } - - return ret; -} - static bool parse_payload_rb(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -355,7 +323,6 @@ static size_t snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf, void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); bool skip_memkind_rb = false; @@ -372,10 +339,7 @@ void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, * payloads are not present, fall back to dumping ringbuffers * based on MEMKIND_RB */ - if (GMU_VER_MINOR(gmu->ver.hfi) < 2) - parse_payload = parse_payload_rb_legacy(adreno_dev, snapshot); - else - parse_payload = parse_payload_rb(adreno_dev, snapshot); + parse_payload = parse_payload_rb(adreno_dev, snapshot); if (parse_payload) skip_memkind_rb = true; @@ -579,9 +543,6 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto clks_gdsc_off; - if (GMU_VER_MINOR(gmu->ver.hfi) < 2) - set_bit(ADRENO_HWSCHED_CTX_BAD_LEGACY, &adreno_dev->hwsched.flags); - gen8_gmu_irq_enable(adreno_dev); /* Vote for minimal DDR BW for GMU to init */ diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 7af4e2c275..77461d104f 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -219,65 +219,6 @@ u32 gen8_hwsched_parse_payload(struct payload_section *payload, u32 key) return 0; } -/* Look up a particular key's value for a given type of payload */ -static u32 gen8_hwsched_lookup_key_value_legacy(struct adreno_device *adreno_dev, - u32 type, u32 key) -{ - struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; - u32 i = 0, payload_bytes; - void *start; - - if (!cmd->hdr) - return 0; - - payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - - offsetof(struct hfi_context_bad_cmd_legacy, payload); - - start = &cmd->payload[0]; - - while (i < payload_bytes) { - struct payload_section *payload = start + i; - - if (payload->type == type) - return gen8_hwsched_parse_payload(payload, key); - - i += struct_size(payload, data, payload->dwords); - } - - return 0; -} - -static u32 get_payload_rb_key_legacy(struct adreno_device *adreno_dev, - u32 rb_id, u32 key) -{ - struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; - u32 i = 0, payload_bytes; - void *start; - - if (!cmd->hdr) - return 0; - - payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - - offsetof(struct hfi_context_bad_cmd_legacy, payload); - - start = &cmd->payload[0]; - - while (i < payload_bytes) { - struct payload_section *payload = start + i; - - if (payload->type == PAYLOAD_RB) { - u32 id = gen8_hwsched_parse_payload(payload, KEY_RB_ID); - - if (id == rb_id) - return gen8_hwsched_parse_payload(payload, key); - } - - i += struct_size(payload, data, payload->dwords); - } - - return 0; -} - struct syncobj_flags { unsigned long mask; const char *name; @@ -365,148 +306,6 @@ static void find_timeout_syncobj(struct adreno_device *adreno_dev, u32 ctxt_id, kgsl_context_put(context); } -static void log_gpu_fault_legacy(struct adreno_device *adreno_dev) -{ - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - struct device *dev = &gmu->pdev->dev; - struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; - - switch (cmd->error) { - case GMU_GPU_HW_HANG: - dev_crit_ratelimited(dev, "MISC: GPU hang detected\n"); - break; - case GMU_GPU_SW_HANG: - dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %u\n", - cmd->ctxt_id, cmd->ts); - break; - case GMU_CP_OPCODE_ERROR: - dev_crit_ratelimited(dev, - "CP opcode error interrupt | opcode=0x%8.8x\n", - gen8_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, - KEY_CP_OPCODE_ERROR)); - break; - case GMU_CP_PROTECTED_ERROR: { - u32 status = gen8_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, - KEY_CP_PROTECTED_ERROR); - - dev_crit_ratelimited(dev, - "CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", - status & (1 << 20) ? "READ" : "WRITE", - status & 0x3FFFF, status); - } - break; - case GMU_CP_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "CP Illegal instruction error\n"); - break; - case GMU_CP_UCODE_ERROR: - dev_crit_ratelimited(dev, "CP ucode error interrupt\n"); - break; - case GMU_CP_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, - "CP | Ringbuffer HW fault | status=0x%8.8x\n", - gen8_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, - KEY_CP_HW_FAULT)); - break; - case GMU_GPU_PREEMPT_TIMEOUT: { - u32 cur, next, cur_rptr, cur_wptr, next_rptr, next_wptr; - - cur = gen8_hwsched_lookup_key_value_legacy(adreno_dev, - PAYLOAD_PREEMPT_TIMEOUT, KEY_PREEMPT_TIMEOUT_CUR_RB_ID); - next = gen8_hwsched_lookup_key_value_legacy(adreno_dev, - PAYLOAD_PREEMPT_TIMEOUT, - KEY_PREEMPT_TIMEOUT_NEXT_RB_ID); - cur_rptr = get_payload_rb_key_legacy(adreno_dev, cur, KEY_RB_RPTR); - cur_wptr = get_payload_rb_key_legacy(adreno_dev, cur, KEY_RB_WPTR); - next_rptr = get_payload_rb_key_legacy(adreno_dev, next, KEY_RB_RPTR); - next_wptr = get_payload_rb_key_legacy(adreno_dev, next, KEY_RB_WPTR); - - dev_crit_ratelimited(dev, - "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n", - cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr); - } - break; - case GMU_CP_GPC_ERROR: - dev_crit_ratelimited(dev, "RBBM: GPC error\n"); - break; - case GMU_CP_BV_OPCODE_ERROR: - dev_crit_ratelimited(dev, - "CP BV opcode error | opcode=0x%8.8x\n", - gen8_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, - KEY_CP_BV_OPCODE_ERROR)); - break; - case GMU_CP_BV_PROTECTED_ERROR: { - u32 status = gen8_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, - KEY_CP_BV_PROTECTED_ERROR); - - dev_crit_ratelimited(dev, - "CP BV | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", - status & (1 << 20) ? "READ" : "WRITE", - status & 0x3FFFF, status); - } - break; - case GMU_CP_BV_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, - "CP BV | Ringbuffer HW fault | status=0x%8.8x\n", - gen8_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, - KEY_CP_HW_FAULT)); - break; - case GMU_CP_BV_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "CP BV Illegal instruction error\n"); - break; - case GMU_CP_BV_UCODE_ERROR: - dev_crit_ratelimited(dev, "CP BV ucode error interrupt\n"); - break; - case GMU_GPU_SW_FUSE_VIOLATION: - dev_crit_ratelimited(dev, "RBBM: SW Feature Fuse violation status=0x%8.8x\n", - gen8_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, - KEY_SWFUSE_VIOLATION_FAULT)); - break; - case GMU_GPU_AQE0_OPCODE_ERRROR: - dev_crit_ratelimited(dev, "AQE0 opcode error | opcode=0x%8.8x\n", - gen8_hwsched_lookup_key_value_legacy(adreno_dev, - PAYLOAD_FAULT_REGS, KEY_AQE0_OPCODE_ERROR)); - break; - case GMU_GPU_AQE0_UCODE_ERROR: - dev_crit_ratelimited(dev, "AQE0 ucode error interrupt\n"); - break; - case GMU_GPU_AQE0_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, "AQE0 HW fault | status=0x%8.8x\n", - gen8_hwsched_lookup_key_value_legacy(adreno_dev, - PAYLOAD_FAULT_REGS, KEY_AQE0_HW_FAULT)); - break; - case GMU_GPU_AQE0_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "AQE0 Illegal instruction error\n"); - break; - case GMU_GPU_AQE1_OPCODE_ERRROR: - dev_crit_ratelimited(dev, "AQE1 opcode error | opcode=0x%8.8x\n", - gen8_hwsched_lookup_key_value_legacy(adreno_dev, - PAYLOAD_FAULT_REGS, KEY_AQE1_OPCODE_ERROR)); - break; - case GMU_GPU_AQE1_UCODE_ERROR: - dev_crit_ratelimited(dev, "AQE1 ucode error interrupt\n"); - break; - case GMU_GPU_AQE1_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, "AQE1 HW fault | status=0x%8.8x\n", - gen8_hwsched_lookup_key_value_legacy(adreno_dev, - PAYLOAD_FAULT_REGS, KEY_AQE1_HW_FAULT)); - break; - case GMU_GPU_AQE1_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "AQE1 Illegal instruction error\n"); - break; - case GMU_SYNCOBJ_TIMEOUT_ERROR: - dev_crit_ratelimited(dev, "syncobj timeout ctx %d ts %u\n", - cmd->ctxt_id, cmd->ts); - find_timeout_syncobj(adreno_dev, cmd->ctxt_id, cmd->ts); - break; - case GMU_CP_UNKNOWN_ERROR: - fallthrough; - default: - dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n", - cmd->error); - break; - } -} - /* Look up a particular key's value for a given type of payload */ static u32 gen8_hwsched_lookup_key_value(struct adreno_device *adreno_dev, u32 type, u32 key) @@ -787,20 +586,12 @@ static u32 peek_next_header(struct adreno_device *adreno_dev, struct gen8_gmu_de static void process_ctx_bad(struct adreno_device *adreno_dev) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - - if (GMU_VER_MINOR(gmu->ver.hfi) < 2) { - log_gpu_fault_legacy(adreno_dev); - goto done; - } - /* Non fatal RBBM error interrupts don't go through reset and recovery */ if (!log_gpu_fault(adreno_dev)) { memset(adreno_dev->hwsched.ctxt_bad, 0x0, HFI_MAX_MSG_SIZE); return; } -done: gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); } From e732e7ea8657e7de4dfa92854010cc4b04cc60ba Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 3 Apr 2024 00:20:22 +0530 Subject: [PATCH 0745/1016] kgsl: gen8: Fix IFPC power up programming for pipe registers Pipe registers should be part of external power up register list. Thus, move pipe registers in IFPC power up register list to external power up register list. Change-Id: Ifbbdab397f6a13eaef40d9f5792dc7f67118d837 Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index e00f4fdeaa..e2255c84e2 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -81,7 +81,6 @@ static const u32 gen8_ifpc_pwrup_reglist[] = { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_16, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_17, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_18, - GEN8_CP_PROTECT_CNTL_PIPE, GEN8_CP_PROTECT_REG_GLOBAL, GEN8_CP_PROTECT_REG_GLOBAL + 1, GEN8_CP_PROTECT_REG_GLOBAL + 2, @@ -129,10 +128,11 @@ static const u32 gen8_ifpc_pwrup_reglist[] = { GEN8_CP_PROTECT_REG_GLOBAL + 44, GEN8_CP_PROTECT_REG_GLOBAL + 45, GEN8_CP_PROTECT_REG_GLOBAL + 63, - GEN8_CP_PROTECT_REG_PIPE + 15, }; static const struct gen8_pwrup_extlist gen8_0_0_pwrup_extlist[] = { + { GEN8_CP_PROTECT_CNTL_PIPE, BIT(PIPE_BR) | BIT(PIPE_BV) | BIT(PIPE_LPAC)}, + { GEN8_CP_PROTECT_REG_PIPE + 15, BIT(PIPE_BR) | BIT(PIPE_BV) | BIT(PIPE_LPAC)}, { GEN8_GRAS_TSEFE_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_GRAS_NC_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_GRAS_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, @@ -874,7 +874,7 @@ static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev) * Write external pipe specific regs (
- triplets) * offset and the current value into GPU buffer */ - for (pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) { + for (pipe_id = PIPE_BR; pipe_id <= PIPE_LPAC; pipe_id++) { for (i = 0; i < ARRAY_SIZE(gen8_0_0_pwrup_extlist); i++) { unsigned long pipe = (unsigned long)gen8_0_0_pwrup_extlist[i].pipelines; From 18d358452b17e83d8ebecfba470b8635f7c1c9e0 Mon Sep 17 00:00:00 2001 From: NISARG SHETH Date: Fri, 2 Feb 2024 18:00:04 +0530 Subject: [PATCH 0746/1016] kgsl: Use legacy path after first crashdumper timeout Once the crashdumper fails, there is a high likelihood that it will fail in subsequent invocations as well. Hence, avoid crashdumper path and use legacy snapshot path for further invocations post crashdumper script has timed out. Change-Id: Ia20daf843f9b868e3f40d34717cc8e285d3ecd89 Signed-off-by: NISARG SHETH --- adreno_gen7_snapshot.c | 3 +++ adreno_gen8_snapshot.c | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 40f3eb8ea9..0691b8b94d 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -207,6 +207,9 @@ static bool _gen7_do_crashdump(struct kgsl_device *device) unsigned int reg = 0; ktime_t timeout; + if (CD_SCRIPT_CHECK(device)) + return false; + kgsl_regwrite(device, GEN7_CP_CRASH_SCRIPT_BASE_LO, lower_32_bits(gen7_capturescript->gpuaddr)); kgsl_regwrite(device, GEN7_CP_CRASH_SCRIPT_BASE_HI, diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 5d44d74486..de410d5046 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -110,6 +110,9 @@ static bool _gen8_do_crashdump(struct kgsl_device *device) u32 reg = 0; ktime_t timeout; + if (CD_SCRIPT_CHECK(device)) + return false; + kgsl_regwrite(device, GEN8_CP_CRASH_DUMP_SCRIPT_BASE_LO, lower_32_bits(gen8_capturescript->gpuaddr)); kgsl_regwrite(device, GEN8_CP_CRASH_DUMP_SCRIPT_BASE_HI, @@ -886,6 +889,7 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, /* Marker for end of script */ CD_FINISH(ptr, offset); + func = gen8_legacy_snapshot_cluster_dbgahb; /* Try to run the crash dumper */ if (_gen8_do_crashdump(device)) func = gen8_snapshot_cluster_dbgahb; @@ -1062,6 +1066,7 @@ static void gen8_snapshot_mvc_regs(struct kgsl_device *device, /* Marker for end of script */ CD_FINISH(ptr, offset); + func = gen8_legacy_snapshot_mvc; /* Try to run the crash dumper */ if (_gen8_do_crashdump(device)) func = gen8_snapshot_mvc; @@ -1513,6 +1518,7 @@ static void gen8_reglist_snapshot(struct kgsl_device *device, /* Marker for end of script */ CD_FINISH(ptr, offset); + func = gen8_legacy_snapshot_registers; /* Try to run the crash dumper */ if (_gen8_do_crashdump(device)) func = gen8_snapshot_registers; From 3b700a7a9eff8286ffb2429449020f45559f09e5 Mon Sep 17 00:00:00 2001 From: NISARG SHETH Date: Fri, 8 Mar 2024 17:53:27 +0530 Subject: [PATCH 0747/1016] kgsl: hwsched: Don't allow GPU warmboot when GMU warmboot fails Currently, GPU warmboot can happen even if there was a failure during GMU warmboot sequence. Force coldboot for both GPU and GMU whenever there is an error during warmboot sequence. Change-Id: I60f512f34d94b8b1bb6e9934dd6f7e45a979f404 Signed-off-by: NISARG SHETH --- adreno_gen7_hwsched_hfi.c | 5 ++--- adreno_gen8_hwsched_hfi.c | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 3c397d8f06..049ae39e23 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -2237,9 +2237,6 @@ static int gen7_hwsched_coldboot_gpu(struct adreno_device *adreno_dev) struct pending_cmd ack = {0}; int ret = 0; - /* Clear the bit so we can set it when GPU bootup message recording is successful */ - clear_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags); - ret = gen7_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gpu_boot_scratch, HFI_WARMBOOT_SET_SCRATCH, true, &ack); if (ret) @@ -2315,6 +2312,7 @@ err: if (ret) { /* Clear the bit in case of an error so next boot will be coldboot */ clear_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags); + clear_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags); gen7_hwsched_hfi_stop(adreno_dev); } @@ -2363,6 +2361,7 @@ int gen7_hwsched_hfi_start(struct adreno_device *adreno_dev) /* Reset the variable here and set it when we successfully record the scratch */ clear_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags); + clear_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags); ret = gen7_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gmu_init_scratch, HFI_WARMBOOT_SET_SCRATCH, false, &ack); diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 41757ffd9b..40ac65c0a2 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -2193,9 +2193,6 @@ static int gen8_hwsched_coldboot_gpu(struct adreno_device *adreno_dev) struct pending_cmd ack = {0}; int ret = 0; - /* Clear the bit so we can set it when GPU bootup message recording is successful */ - clear_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags); - ret = gen8_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gpu_boot_scratch, HFI_WARMBOOT_SET_SCRATCH, true, &ack); if (ret) @@ -2271,6 +2268,7 @@ err: if (ret) { /* Clear the bit in case of an error so next boot will be coldboot */ clear_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags); + clear_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags); gen8_hwsched_hfi_stop(adreno_dev); } @@ -2319,6 +2317,7 @@ int gen8_hwsched_hfi_start(struct adreno_device *adreno_dev) /* Reset the variable here and set it when we successfully record the scratch */ clear_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags); + clear_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags); ret = gen8_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gmu_init_scratch, HFI_WARMBOOT_SET_SCRATCH, false, &ack); From 0359b43bfd5f9cdd01cf031163ab1dcdfc3fe3bf Mon Sep 17 00:00:00 2001 From: NISARG SHETH Date: Tue, 2 Apr 2024 22:44:05 +0530 Subject: [PATCH 0748/1016] kgsl: build: Add support to compile graphics driver for anorak Add support to compile graphics driver for anorak. Change-Id: Ifd1295242088db30758dd226270f4e342b72e462 Signed-off-by: NISARG SHETH --- config/anorak_consolidate_gpuconf | 10 ++++++++++ config/anorak_gki_gpuconf | 9 +++++++++ 2 files changed, 19 insertions(+) create mode 100644 config/anorak_consolidate_gpuconf create mode 100644 config/anorak_gki_gpuconf diff --git a/config/anorak_consolidate_gpuconf b/config/anorak_consolidate_gpuconf new file mode 100644 index 0000000000..853d8f55e0 --- /dev/null +++ b/config/anorak_consolidate_gpuconf @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL=m +CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 +CONFIG_QCOM_KGSL_SORT_POOL=y +CONFIG_QCOM_KGSL_FENCE_TRACE=y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y +CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" diff --git a/config/anorak_gki_gpuconf b/config/anorak_gki_gpuconf new file mode 100644 index 0000000000..a26c4bcab3 --- /dev/null +++ b/config/anorak_gki_gpuconf @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL=m +CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 +CONFIG_QCOM_KGSL_SORT_POOL=y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y +CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" From 3f71a91eb3cc2e665853ffc9c9d4e19a1001e674 Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Thu, 7 Dec 2023 13:09:24 -0800 Subject: [PATCH 0749/1016] msm: kgsl: Poll on write index to process start message HFI While handling start message HFI, GMU requests for perfcounter allocations or memory allocations by multiple tasks could cause misaligned interrupts which could cause KGSL to incorrectly timeout while having messages in the queue. Instead of polling for the interrupt status, poll the write index to see if it is different from read index and read the queue accordingly. And clear the outstanding interrupts after everything is processed. Change-Id: I5c7c08bdeab47c2a8338210a90520b3b9bc2a053 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- adreno_a6xx_hwsched_hfi.c | 72 +++++++++++++++++++++------------------ adreno_gen7_hwsched_hfi.c | 72 +++++++++++++++++++++------------------ adreno_gen8_hwsched_hfi.c | 72 +++++++++++++++++++++------------------ adreno_hwsched.c | 31 +++++++++++++++++ adreno_hwsched.h | 9 +++++ 5 files changed, 154 insertions(+), 102 deletions(-) diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 563abcab43..cf1a87399f 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -1005,57 +1005,61 @@ static int send_start_msg(struct adreno_device *adreno_dev) return rc; poll: - rc = gmu_core_timed_poll_check(device, A6XX_GMU_GMU2HOST_INTR_INFO, - HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK); + rc = adreno_hwsched_poll_msg_queue_write_index(gmu->hfi.hfi_mem); if (rc) { dev_err(&gmu->pdev->dev, "Timed out processing MSG_START seqnum: %d\n", seqnum); + gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); goto done; } - /* Clear the interrupt */ - gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, - HFI_IRQ_MSGQ_MASK); - - if (a6xx_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)) <= 0) { - dev_err(&gmu->pdev->dev, "MSG_START: no payload\n"); - rc = -EINVAL; + rc = a6xx_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)); + if (rc <= 0) { + dev_err(&gmu->pdev->dev, + "MSG_START: payload error: %d\n", + rc); + gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); goto done; } - if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { - rc = a6xx_receive_ack_cmd(gmu, rcvd, &pending_ack); - if (rc) - return rc; - - return check_ack_failure(adreno_dev, &pending_ack); - } - - if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_MEM_ALLOC) { + switch (MSG_HDR_GET_ID(rcvd[0])) { + case F2H_MSG_MEM_ALLOC: rc = mem_alloc_reply(adreno_dev, rcvd); - if (rc) - return rc; - - goto poll; - } - - if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_GMU_CNTR_REGISTER) { + break; + case F2H_MSG_GMU_CNTR_REGISTER: rc = gmu_cntr_register_reply(adreno_dev, rcvd); - if (rc) - return rc; - goto poll; + break; + default: + if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { + rc = a6xx_receive_ack_cmd(gmu, rcvd, &pending_ack); + /* Check ack failure if we received an expected ack */ + if (!rc) + rc = check_ack_failure(adreno_dev, &pending_ack); + goto done; + } else { + dev_err(&gmu->pdev->dev, + "MSG_START: unexpected response id:%d, type:%d\n", + MSG_HDR_GET_ID(rcvd[0]), + MSG_HDR_GET_TYPE(rcvd[0])); + gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); + rc = -EINVAL; + goto done; + } } - dev_err(&gmu->pdev->dev, - "MSG_START: unexpected response id:%d, type:%d\n", - MSG_HDR_GET_ID(rcvd[0]), - MSG_HDR_GET_TYPE(rcvd[0])); + if (!rc) + goto poll; done: - gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); - + /* Clear the interrupt */ + gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, HFI_IRQ_MSGQ_MASK); + /* + * Add a write barrier to post the interrupt clear so that we dont have a + * pending interrupt. + */ + wmb(); return rc; } diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 16fa234125..49ef08943f 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1823,57 +1823,61 @@ static int send_start_msg(struct adreno_device *adreno_dev) return rc; poll: - rc = gmu_core_timed_poll_check(device, GEN7_GMU_GMU2HOST_INTR_INFO, - HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK); + rc = adreno_hwsched_poll_msg_queue_write_index(gmu->hfi.hfi_mem); if (rc) { dev_err(&gmu->pdev->dev, "Timed out processing MSG_START seqnum: %d\n", seqnum); + gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); goto done; } - /* Clear the interrupt */ - gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, - HFI_IRQ_MSGQ_MASK); - - if (gen7_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)) <= 0) { - dev_err(&gmu->pdev->dev, "MSG_START: no payload\n"); - rc = -EINVAL; + rc = gen7_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)); + if (rc <= 0) { + dev_err(&gmu->pdev->dev, + "MSG_START: payload error: %d\n", + rc); + gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); goto done; } - if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { - rc = gen7_receive_ack_cmd(gmu, rcvd, &pending_ack); - if (rc) - return rc; - - return check_ack_failure(adreno_dev, &pending_ack); - } - - if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_MEM_ALLOC) { + switch (MSG_HDR_GET_ID(rcvd[0])) { + case F2H_MSG_MEM_ALLOC: rc = mem_alloc_reply(adreno_dev, rcvd); - if (rc) - return rc; - - goto poll; - } - - if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_GMU_CNTR_REGISTER) { + break; + case F2H_MSG_GMU_CNTR_REGISTER: rc = gmu_cntr_register_reply(adreno_dev, rcvd); - if (rc) - return rc; - goto poll; + break; + default: + if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { + rc = gen7_receive_ack_cmd(gmu, rcvd, &pending_ack); + /* Check ack failure if we received an expected ack */ + if (!rc) + rc = check_ack_failure(adreno_dev, &pending_ack); + goto done; + } else { + dev_err(&gmu->pdev->dev, + "MSG_START: unexpected response id:%d, type:%d\n", + MSG_HDR_GET_ID(rcvd[0]), + MSG_HDR_GET_TYPE(rcvd[0])); + gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); + rc = -EINVAL; + goto done; + } } - dev_err(&gmu->pdev->dev, - "MSG_START: unexpected response id:%d, type:%d\n", - MSG_HDR_GET_ID(rcvd[0]), - MSG_HDR_GET_TYPE(rcvd[0])); + if (!rc) + goto poll; done: - gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); - + /* Clear the interrupt */ + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, HFI_IRQ_MSGQ_MASK); + /* + * Add a write barrier to post the interrupt clear so that we dont have a + * pending interrupt. + */ + wmb(); return rc; } diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 7af4e2c275..270f5d5a25 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -1794,57 +1794,61 @@ static int send_start_msg(struct adreno_device *adreno_dev) return rc; poll: - rc = gmu_core_timed_poll_check(device, GEN8_GMUCX_GMU2HOST_INTR_INFO, - HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK); + rc = adreno_hwsched_poll_msg_queue_write_index(gmu->hfi.hfi_mem); if (rc) { dev_err(&gmu->pdev->dev, "Timed out processing MSG_START seqnum: %d\n", seqnum); + gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); goto done; } - /* Clear the interrupt */ - gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, - HFI_IRQ_MSGQ_MASK); - - if (gen8_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)) <= 0) { - dev_err(&gmu->pdev->dev, "MSG_START: no payload\n"); - rc = -EINVAL; + rc = gen8_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)); + if (rc <= 0) { + dev_err(&gmu->pdev->dev, + "MSG_START: payload error: %d\n", + rc); + gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); goto done; } - if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { - rc = gen8_receive_ack_cmd(gmu, rcvd, &pending_ack); - if (rc) - return rc; - - return check_ack_failure(adreno_dev, &pending_ack); - } - - if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_MEM_ALLOC) { + switch (MSG_HDR_GET_ID(rcvd[0])) { + case F2H_MSG_MEM_ALLOC: rc = mem_alloc_reply(adreno_dev, rcvd); - if (rc) - return rc; - - goto poll; - } - - if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_GMU_CNTR_REGISTER) { + break; + case F2H_MSG_GMU_CNTR_REGISTER: rc = gmu_cntr_register_reply(adreno_dev, rcvd); - if (rc) - return rc; - goto poll; + break; + default: + if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { + rc = gen8_receive_ack_cmd(gmu, rcvd, &pending_ack); + /* Check ack failure if we received an expected ack */ + if (!rc) + rc = check_ack_failure(adreno_dev, &pending_ack); + goto done; + } else { + dev_err(&gmu->pdev->dev, + "MSG_START: unexpected response id:%d, type:%d\n", + MSG_HDR_GET_ID(rcvd[0]), + MSG_HDR_GET_TYPE(rcvd[0])); + gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); + rc = -EINVAL; + goto done; + } } - dev_err(&gmu->pdev->dev, - "MSG_START: unexpected response id:%d, type:%d\n", - MSG_HDR_GET_ID(rcvd[0]), - MSG_HDR_GET_TYPE(rcvd[0])); + if (!rc) + goto poll; done: - gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); - + /* Clear the interrupt */ + gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, HFI_IRQ_MSGQ_MASK); + /* + * Add a write barrier to post the interrupt clear so that we dont have a + * pending interrupt. + */ + wmb(); return rc; } diff --git a/adreno_hwsched.c b/adreno_hwsched.c index d3d9a997df..5c11b5a1e3 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -13,6 +13,8 @@ #include #include +#define POLL_SLEEP_US 100 + /* * Number of commands that can be queued in a context before it sleeps * @@ -2462,3 +2464,32 @@ bool adreno_hwsched_log_nonfatal_gpu_fault(struct adreno_device *adreno_dev, return non_fatal; } + +int adreno_hwsched_poll_msg_queue_write_index(struct kgsl_memdesc *hfi_mem) +{ + struct hfi_queue_table *tbl = hfi_mem->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[HFI_MSG_ID]; + unsigned long timeout = jiffies + msecs_to_jiffies(HFI_RSP_TIMEOUT); + + while (time_before(jiffies, timeout)) { + if (hdr->write_index != hdr->read_index) + goto done; + + /* Wait for upto 100 us before trying again */ + usleep_range((POLL_SLEEP_US >> 2) + 1, POLL_SLEEP_US); + cpu_relax(); + } + + /* Check if the write index has advanced */ + if (hdr->write_index == hdr->read_index) + return -ETIMEDOUT; + +done: + /* + * This is to ensure that the queue is not read speculatively before the + * polling condition is evaluated. + */ + rmb(); + + return 0; +} diff --git a/adreno_hwsched.h b/adreno_hwsched.h index dae80c785d..729615a425 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -270,4 +270,13 @@ void adreno_hwsched_syncobj_kfence_put(struct kgsl_drawobj_sync *syncobj); */ bool adreno_hwsched_log_nonfatal_gpu_fault(struct adreno_device *adreno_dev, struct device *dev, u32 error); + +/** + * adreno_hwsched_poll_msg_queue_write_index - Poll on write index of HFI message queue + * @hfi_mem: Memory descriptor for HFI queue table + * + * Returns zero if write index advances or ETIMEDOUT if timed out polling + */ +int adreno_hwsched_poll_msg_queue_write_index(struct kgsl_memdesc *hfi_mem); + #endif From 3fad13d4a71f7bea8bd2fa297567c357d8f10aa0 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 10 Apr 2024 11:58:53 +0530 Subject: [PATCH 0750/1016] kgsl: gen8: Access LPAC registers when feature flag is enabled Some GPUs do not have LPAC pipe. Hence, snapshot LPAC registers only when LPAC feature flag is enabled. Change-Id: I01e414ebef5b8d0c26b74d7a6280f7782004baea Signed-off-by: Kamal Agrawal --- adreno_gen8_snapshot.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index d586a5c003..1c230d94ad 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -1696,16 +1696,18 @@ void gen8_snapshot(struct adreno_device *adreno_dev, gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, &snapshot->ib2size, PIPE_BR, 0, 0); - gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE, - GEN8_CP_IB1_BASE_HI_PIPE, &snapshot->ib1base_lpac, PIPE_LPAC, 0, 0); + if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) { + gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE, + GEN8_CP_IB1_BASE_HI_PIPE, &snapshot->ib1base_lpac, PIPE_LPAC, 0, 0); - gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE, - GEN8_CP_IB2_BASE_HI_PIPE, &snapshot->ib2base_lpac, PIPE_LPAC, 0, 0); + gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE, + GEN8_CP_IB2_BASE_HI_PIPE, &snapshot->ib2base_lpac, PIPE_LPAC, 0, 0); - gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE, + gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE, &snapshot->ib1size_lpac, PIPE_LPAC, 0, 0); - gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, + gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, &snapshot->ib2size_lpac, PIPE_LPAC, 0, 0); + } /* Assert the isStatic bit before triggering snapshot */ kgsl_regwrite(device, GEN8_RBBM_SNAPSHOT_STATUS, 0x1); From efab24d828950bc53f4389dd25f7313eb9fb4590 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Wed, 10 Apr 2024 12:32:31 -0700 Subject: [PATCH 0751/1016] kgsl: gen8: Add support for Gen8_0_1 Gen8_0_1 has a different compatible string. Add a new core structure to support Gen8_0_1. Change-Id: Ic95897e903b30cc5ed9c5cd311a964d699e4aca2 Signed-off-by: Hareesh Gundu --- adreno-gpulist.h | 38 ++++++++++++++++++++++++++++++++++++++ adreno.h | 1 + 2 files changed, 39 insertions(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 9f963e2902..961d4b2367 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2743,6 +2743,43 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .noc_timeout_us = 3410, /* 3.41 msec */ }; +static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN8_0_1, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen8-0-1", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | + ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_BCL | + ADRENO_IFPC | ADRENO_HW_FENCE | ADRENO_PREEMPTION | + ADRENO_ACD, + .gpudev = &adreno_gen8_hwsched_gpudev.base, + .perfcounters = &adreno_gen8_perfcounters, + .uche_gmem_alignment = SZ_64M, + .gmem_size = 12 * SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_8M, + }, + .aqefw_name = "gen80000_aqe.fw", + .sqefw_name = "gen80000_sqe.fw", + .gmufw_name = "gen80000_gmu.bin", + .zap_name = "gen80000_zap.mbn", + .ao_hwcg = gen8_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen8_ao_hwcg_regs), + .gbif = gen8_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen8_0_0_gbif_cx_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen8_0_0_protected_regs, + .nonctxt_regs = gen8_0_0_nonctxt_regs, + .highest_bank_bit = 16, + .gmu_hub_clk_freq = 200000000, + .gen8_snapshot_block_list = &gen8_0_0_snapshot_block_list, + .fast_bus_hint = true, + .bcl_data = 1, + .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), + .noc_timeout_us = 3410, /* 3.41 msec */ +}; + /* GEN8_4_0 noncontext register list */ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { { GEN8_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) }, @@ -2894,5 +2931,6 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen7_14_0.base, &adreno_gpu_core_gen7_11_0.base, &adreno_gpu_core_gen8_0_0.base, + &adreno_gpu_core_gen8_0_1.base, &adreno_gpu_core_gen8_4_0.base, }; diff --git a/adreno.h b/adreno.h index 70351591a2..d80abfa959 100644 --- a/adreno.h +++ b/adreno.h @@ -246,6 +246,7 @@ enum adreno_gpurev { ADRENO_REV_GEN7_14_0 = ADRENO_GPUREV_VALUE(7, 14, 0), ADRENO_REV_GEN7_11_0 = ADRENO_GPUREV_VALUE(7, 11, 0), ADRENO_REV_GEN8_0_0 = ADRENO_GPUREV_VALUE(8, 0, 0), + ADRENO_REV_GEN8_0_1 = ADRENO_GPUREV_VALUE(8, 0, 1), ADRENO_REV_GEN8_4_0 = ADRENO_GPUREV_VALUE(8, 4, 0), }; From afe82deda62521932129182854749e42efca1726 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Wed, 10 Apr 2024 10:05:45 -0700 Subject: [PATCH 0752/1016] kgsl: gen8: Disable PS kill wave early retire Gen8 dynamic wave pairing is leading to shader processor hang in certain scenarios. Hence disable PS kill wave early retire. Change-Id: I411b9cb1610b30b789819fd74b30e35a86726544 Signed-off-by: Hareesh Gundu --- adreno-gpulist.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 9f963e2902..573f236277 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2621,7 +2621,11 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { * BIT(30): Disable LPAC auto-promotion */ { GEN8_SP_CHICKEN_BITS_1, BIT(26) | BIT(30), BIT(PIPE_NONE) }, - { GEN8_SP_CHICKEN_BITS_2, 0x00800000, BIT(PIPE_NONE) }, + /* + * BIT(22): Disable PS out of order retire + * BIT(23): Enable half wave mode and MM instruction src&dst is half precision + */ + { GEN8_SP_CHICKEN_BITS_2, BIT(22) | BIT(23), BIT(PIPE_NONE) }, { GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, { GEN8_SP_READ_SEL, 0x0001ff00, BIT(PIPE_NONE) }, @@ -2780,7 +2784,11 @@ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { * BIT(30): Disable LPAC auto-promotion */ { GEN8_SP_CHICKEN_BITS_1, BIT(26) | BIT(30), BIT(PIPE_NONE) }, - { GEN8_SP_CHICKEN_BITS_2, 0x00800000, BIT(PIPE_NONE) }, + /* + * BIT(22): Disable PS out of order retire + * BIT(23): Enable half wave mode and MM instruction src&dst is half precision + */ + { GEN8_SP_CHICKEN_BITS_2, BIT(22) | BIT(23), BIT(PIPE_NONE) }, { GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, { GEN8_SP_READ_SEL, 0x0001ff00, BIT(PIPE_NONE) }, From d822e23db82ed41a5508f190249728b8479dbb04 Mon Sep 17 00:00:00 2001 From: Sravanthi Ubbara Date: Wed, 24 Jan 2024 15:16:16 +0530 Subject: [PATCH 0753/1016] kgsl: Fix null pointer dereference in kgsl_lock_sgt() failure case Update memdesc->sgt pointer before calling kgsl_lock_sgt(). This helps to fix null pointer deference when kgsl_lock_sgt() fails. Also, add a check for null pointer in kgsl_free_pages_from_sgt(). Change-Id: I1a72f26fcfbb99858f9a9365244995da36e69a67 Signed-off-by: Sravanthi Ubbara --- kgsl_sharedmem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index 1db9c2a5c2..6121674496 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1430,6 +1430,9 @@ static void kgsl_free_pages_from_sgt(struct kgsl_memdesc *memdesc) int i; struct scatterlist *sg; + if (WARN_ON(!memdesc->sgt)) + return; + for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) { /* * sg_alloc_table_from_pages() will collapse any physically @@ -1689,17 +1692,18 @@ static int kgsl_alloc_secure_pages(struct kgsl_device *device, /* Now that we've moved to a sg table don't need the pages anymore */ kvfree(pages); + memdesc->sgt = sgt; + ret = kgsl_lock_sgt(sgt, size); if (ret) { if (ret != -EADDRNOTAVAIL) kgsl_free_pages_from_sgt(memdesc); sg_free_table(sgt); kfree(sgt); + memdesc->sgt = NULL; return ret; } - memdesc->sgt = sgt; - KGSL_STATS_ADD(size, &kgsl_driver.stats.secure, &kgsl_driver.stats.secure_max); From d33d10ca686da80db2558bcb865b9d0679b9c391 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 21 Mar 2024 02:27:57 +0530 Subject: [PATCH 0754/1016] kgsl: gen7: Update GPU preemption context record size Update preemption context record size for gen7_9_0 and gen7_9_1 GPUs as per the recommendation. Change-Id: Icd4b1f784d5939cda5660d90f72bc6899ddebb6e Signed-off-by: Pankaj Gupta --- adreno-gpulist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index b0d6e03ea5..76fb19fb1c 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2319,7 +2319,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_9_0 = { .bcl_data = 1, .acv_perfmode_vote = BIT(2), .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), - .ctxt_record_size = (3572 * SZ_1K), + .ctxt_record_size = (4208 * SZ_1K), .preempt_level = 1, .fast_bus_hint = true, }; @@ -2357,7 +2357,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_9_1 = { .bcl_data = 1, .acv_perfmode_vote = BIT(2), .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), - .ctxt_record_size = (3572 * SZ_1K), + .ctxt_record_size = (4208 * SZ_1K), .preempt_level = 1, .fast_bus_hint = true, }; From 814620137d1ded797c4f47f8e000467363606332 Mon Sep 17 00:00:00 2001 From: Bruce Levy Date: Mon, 15 Apr 2024 17:06:18 -0700 Subject: [PATCH 0755/1016] Revert "kgsl: gen8: Add memory barriers before reading aperture registers" This reverts commit 4a72ae56b8a5f73253e04a58f3c84d04e06e700e. Signed-off-by: Bruce Levy Change-Id: I9c5244701d64f12e5c14a66c245037b147f8e71e --- adreno_gen7_perfcounter.c | 8 +------- adreno_gen8.c | 3 --- adreno_gen8_perfcounter.c | 8 +------- adreno_gen8_snapshot.c | 28 +++++++++------------------- 4 files changed, 11 insertions(+), 36 deletions(-) diff --git a/adreno_gen7_perfcounter.c b/adreno_gen7_perfcounter.c index 76b5acd093..f3157375e9 100644 --- a/adreno_gen7_perfcounter.c +++ b/adreno_gen7_perfcounter.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -45,9 +45,6 @@ static int gen7_counter_br_enable(struct adreno_device *adreno_dev, FIELD_PREP(GENMASK(13, 12), PIPE_BR), group->flags); kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, val); - /* Ensure all writes are posted before accessing the piped register */ - mb(); - if (!ret) reg->value = 0; @@ -70,9 +67,6 @@ static int gen7_counter_bv_enable(struct adreno_device *adreno_dev, FIELD_PREP(GENMASK(13, 12), PIPE_BV), group->flags); kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, val); - /* Ensure all writes are posted before accessing the piped register */ - mb(); - if (!ret) reg->value = 0; diff --git a/adreno_gen8.c b/adreno_gen8.c index 268c758426..f95bbf8590 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -578,9 +578,6 @@ void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id, kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_CP_APERTURE_CNTL_HOST, aperture_val); - /* Make sure the aperture write goes through before reading the registers */ - mb(); - gen8_dev->aperture = aperture_val; } diff --git a/adreno_gen8_perfcounter.c b/adreno_gen8_perfcounter.c index 60e4b5918a..41e9ba34d9 100644 --- a/adreno_gen8_perfcounter.c +++ b/adreno_gen8_perfcounter.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -68,9 +68,6 @@ static int gen8_counter_br_enable(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val); - /* Ensure all writes are posted before reading the piped register */ - mb(); - if (!ret) reg->value = 0; @@ -94,9 +91,6 @@ static int gen8_counter_bv_enable(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val); - /* Ensure all writes are posted before reading the piped register */ - mb(); - if (!ret) reg->value = 0; diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 226abd7af6..139fb30265 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -190,9 +190,6 @@ size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL (info->slice_id, 0, 0, 0)); - /* Make sure the previous writes are posted before reading */ - mb(); - for (ptr = info->regs->regs; ptr[0] != UINT_MAX; ptr += 2) { count = REG_COUNT(ptr); @@ -679,14 +676,6 @@ done: kgsl_regrmw(device, GEN8_SP_DBG_CNTL, GENMASK(1, 0), 0x0); } -static void gen8_rmw_aperture(struct kgsl_device *device, - u32 offsetwords, u32 mask, u32 val, u32 pipe, u32 slice_id, u32 use_slice_id) -{ - gen8_host_aperture_set(ADRENO_DEVICE(device), pipe, slice_id, use_slice_id); - - kgsl_regmap_rmw(&device->regmap, offsetwords, mask, val); -} - static void gen8_snapshot_mempool(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { @@ -700,17 +689,21 @@ static void gen8_snapshot_mempool(struct kgsl_device *device, for (j = 0; j < slice; j++) { + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL + (j, cp_indexed_reg->pipe_id, 0, 0)); + /* set CP_CHICKEN_DBG[StabilizeMVC] to stabilize it while dumping */ - gen8_rmw_aperture(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x4, - cp_indexed_reg->pipe_id, j, 1); + kgsl_regrmw(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x4); kgsl_snapshot_indexed_registers_v2(device, snapshot, cp_indexed_reg->addr, cp_indexed_reg->data, 0, cp_indexed_reg->size, cp_indexed_reg->pipe_id, - ((cp_indexed_reg->slice_region == SLICE) ? j : UINT_MAX)); + ((slice > 1) ? j : UINT_MAX)); - gen8_rmw_aperture(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x0, - cp_indexed_reg->pipe_id, j, 1); + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL + (j, cp_indexed_reg->pipe_id, 0, 0)); + + kgsl_regrmw(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x0); } } } @@ -942,9 +935,6 @@ static size_t gen8_legacy_snapshot_mvc(struct kgsl_device *device, u8 *buf, if (info->cluster->sel) kgsl_regwrite(device, info->cluster->sel->host_reg, info->cluster->sel->val); - /* Make sure the previous writes are posted before reading */ - mb(); - for (; ptr[0] != UINT_MAX; ptr += 2) { u32 count = REG_COUNT(ptr); From a5e92bcbcdd53d6390859fd50d44f6c0a4a7ab54 Mon Sep 17 00:00:00 2001 From: Bruce Levy Date: Mon, 15 Apr 2024 17:06:22 -0700 Subject: [PATCH 0756/1016] Revert "kgsl: gen8: Fix GMU register capture in snapshot flow" This reverts commit 41afbf935ef7d94e4a4147f00abfb016d5db6d30. Signed-off-by: Bruce Levy Change-Id: I16c168186b86b00d476abe2891c535267b3274a2 --- adreno_gen8_0_0_snapshot.h | 3 ++- adreno_gen8_gmu_snapshot.c | 36 ++++++++++++++++-------------------- adreno_gen8_snapshot.c | 5 ++--- adreno_gen8_snapshot.h | 12 +++++------- 4 files changed, 25 insertions(+), 31 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index c43907f96e..5dba870f95 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -1883,7 +1883,8 @@ static struct gen8_reg_list gen8_0_0_ahb_registers[] = { { UNSLICE, gen8_0_0_ahb_secure_gpu_registers }, }; -static struct gen8_reg_list gen8_gmu_gx_registers[] = { +static struct gen8_reg_list gen8_gmu_registers[] = { + { UNSLICE, gen8_0_0_gmu_registers }, { UNSLICE, gen8_0_0_gmugx_registers }, { SLICE, gen8_0_0_gmugx_slice_registers }, }; diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index ce247991ca..4d6250efb5 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -247,7 +247,7 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, const struct gen8_snapshot_block_list *gen8_snapshot_block_list = gpucore->gen8_snapshot_block_list; u32 i, slice, j; - struct gen8_reg_list_info info = {0}; + struct gen8_reg_list_info info; kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, snapshot, gen8_gmu_snapshot_itcm, gmu); @@ -256,32 +256,28 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, gen8_gmu_snapshot_memories(device, gmu, snapshot); + for (i = 0 ; i < gen8_snapshot_block_list->num_gmu_regs; i++) { + struct gen8_reg_list *regs = &gen8_snapshot_block_list->gmu_regs[i]; + + slice = regs->slice_region ? MAX_PHYSICAL_SLICES : 1; + for (j = 0 ; j < slice; j++) { + info.regs = regs; + info.slice_id = (slice > 1) ? j : UINT_MAX; + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, + gen8_legacy_snapshot_registers, &info); + } + } + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, gen8_snapshot_rscc_registers, (void *) gen8_snapshot_block_list->rscc_regs); - /* Capture GMU registers which are on CX domain and unsliced */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, - adreno_snapshot_registers_v2, - (void *) gen8_snapshot_block_list->gmu_cx_unsliced_regs); - if (!gen8_gmu_gx_is_on(adreno_dev)) goto dtcm; /* Set fence to ALLOW mode so registers can be read */ kgsl_regwrite(device, GEN8_GMUAO_AHB_FENCE_CTRL, 0); - - /* Capture GMU registers which are on GX domain */ - for (i = 0 ; i < gen8_snapshot_block_list->num_gmu_gx_regs; i++) { - struct gen8_reg_list *regs = &gen8_snapshot_block_list->gmu_gx_regs[i]; - - slice = regs->slice_region ? MAX_PHYSICAL_SLICES : 1; - for (j = 0 ; j < slice; j++) { - info.regs = regs; - info.slice_id = j; - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, - gen8_legacy_snapshot_registers, &info); - } - } + /* Make sure the previous write posted before reading */ + wmb(); dtcm: kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 139fb30265..5d44d74486 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -28,9 +28,8 @@ const struct gen8_snapshot_block_list gen8_0_0_snapshot_block_list = { .cx_debugbus_blocks_len = ARRAY_SIZE(gen8_cx_debugbus_blocks), .external_core_regs = gen8_0_0_external_core_regs, .num_external_core_regs = ARRAY_SIZE(gen8_0_0_external_core_regs), - .gmu_cx_unsliced_regs = gen8_0_0_gmu_registers, - .gmu_gx_regs = gen8_gmu_gx_registers, - .num_gmu_gx_regs = ARRAY_SIZE(gen8_gmu_gx_registers), + .gmu_regs = gen8_gmu_registers, + .num_gmu_regs = ARRAY_SIZE(gen8_gmu_registers), .rscc_regs = gen8_0_0_rscc_rsc_registers, .reg_list = gen8_0_0_reg_list, .cx_misc_regs = gen8_0_0_cx_misc_registers, diff --git a/adreno_gen8_snapshot.h b/adreno_gen8_snapshot.h index 2cb8c4e2ee..83090b67de 100644 --- a/adreno_gen8_snapshot.h +++ b/adreno_gen8_snapshot.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_GEN8_SNAPSHOT_H #define __ADRENO_GEN8_SNAPSHOT_H @@ -610,12 +610,10 @@ struct gen8_snapshot_block_list { const u32 **external_core_regs; /* num_external_core_regs : length of external core registers list */ size_t num_external_core_regs; - /* gmu_cx_unsliced_regs : List of GMU CX unsliced registers */ - const u32 *gmu_cx_unsliced_regs; - /* gmu_gx_registers : List of GMU registers */ - struct gen8_reg_list *gmu_gx_regs; - /* num_gmu_gx_regs : Length of GMU registers list */ - size_t num_gmu_gx_regs; + /* gmu_registers : List of GMU registers */ + struct gen8_reg_list *gmu_regs; + /* num_gmu_regs : Length of GMU registers list */ + size_t num_gmu_regs; /* rscc_regs : List of RSCC registers */ const u32 *rscc_regs; /* reg_list : List of GPU internal registers */ From 77acf0cdd54394d5420b3a13709c1742c2b2982f Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 14 Apr 2024 16:06:51 +0530 Subject: [PATCH 0757/1016] kgsl: Remove LM feature support from gen7 and gen8 Limits Management (LM) feature is not applicable to the gen7 and gen8 platforms. Consequently, remove the support from driver. While at it, perform some code clean up as well. Change-Id: I93296697a0fca2bc6b1bed8391d403a462341abf Signed-off-by: Kamal Agrawal --- adreno_gen7_gmu.c | 32 ++++++++++---------------------- adreno_gen7_hfi.c | 2 -- adreno_gen8_gmu.c | 20 +++++--------------- adreno_gen8_hfi.c | 2 -- gen7_reg.h | 1 - gen8_reg.h | 1 - 6 files changed, 15 insertions(+), 43 deletions(-) diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 5070728ff8..d868eb9190 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -464,9 +464,6 @@ int gen7_rscc_sleep_sequence(struct adreno_device *adreno_dev) gmu_core_regwrite(device, GEN7_GMU_RSCC_CONTROL_REQ, 0); - if (adreno_dev->lm_enabled) - gmu_core_regwrite(device, GEN7_GMU_AO_SPARE_CNTL, 0); - set_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags); return 0; @@ -790,7 +787,7 @@ int gen7_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); - unsigned int reg, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8; + u32 reg, reg1, reg2, reg3, reg4; unsigned long t; u64 ts1, ts2; @@ -832,34 +829,25 @@ int gen7_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) gmu_core_regread(device, GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS, ®2); gmu_core_regread(device, GEN7_GMU_RBBM_INT_UNMASKED_STATUS, ®3); gmu_core_regread(device, GEN7_GMU_GMU_PWR_COL_KEEPALIVE, ®4); - gmu_core_regread(device, GEN7_GMU_AO_SPARE_CNTL, ®5); dev_err(&gmu->pdev->dev, "----------------------[ GMU error ]----------------------\n"); - dev_err(&gmu->pdev->dev, - "Timeout waiting for lowest idle level %s\n", + dev_err(&gmu->pdev->dev, "Timeout waiting for lowest idle level %s\n", idle_level_name(gmu->idle_level)); dev_err(&gmu->pdev->dev, "Start: %llx (absolute ticks)\n", ts1); - dev_err(&gmu->pdev->dev, "Poll: %llx (ticks relative to start)\n", - ts2-ts1); - dev_err(&gmu->pdev->dev, - "RPMH_POWER_STATE=%x GFX_PWR_CLK_STATUS=%x\n", reg, reg1); + dev_err(&gmu->pdev->dev, "Poll: %llx (ticks relative to start)\n", ts2-ts1); + dev_err(&gmu->pdev->dev, "RPMH_POWER_STATE=%x GFX_PWR_CLK_STATUS=%x\n", reg, reg1); dev_err(&gmu->pdev->dev, "CX_BUSY_STATUS=%x\n", reg2); - dev_err(&gmu->pdev->dev, - "RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n", - reg3, reg4); - dev_err(&gmu->pdev->dev, "GEN7_GMU_AO_SPARE_CNTL=%x\n", reg5); + dev_err(&gmu->pdev->dev, "RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n", reg3, reg4); /* Access GX registers only when GX is ON */ if (is_on(reg1)) { - kgsl_regread(device, GEN7_CP_STATUS_1, ®6); - kgsl_regread(device, GEN7_CP_CP2GMU_STATUS, ®7); - kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, ®8); + kgsl_regread(device, GEN7_CP_STATUS_1, ®2); + kgsl_regread(device, GEN7_CP_CP2GMU_STATUS, ®3); + kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, ®4); - dev_err(&gmu->pdev->dev, "GEN7_CP_STATUS_1=%x\n", reg6); - dev_err(&gmu->pdev->dev, - "CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n", - reg7, reg8); + dev_err(&gmu->pdev->dev, "GEN7_CP_STATUS_1=%x\n", reg2); + dev_err(&gmu->pdev->dev, "CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n", reg3, reg4); } WARN_ON(1); diff --git a/adreno_gen7_hfi.c b/adreno_gen7_hfi.c index 10cdedd144..2abe6e75d3 100644 --- a/adreno_gen7_hfi.c +++ b/adreno_gen7_hfi.c @@ -390,8 +390,6 @@ static const char *feature_to_string(u32 feature) { if (feature == HFI_FEATURE_ACD) return "ACD"; - else if (feature == HFI_FEATURE_LM) - return "LM"; return "unknown"; } diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index a6130ec696..a11e30cdf8 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -384,9 +384,6 @@ int gen8_rscc_sleep_sequence(struct adreno_device *adreno_dev) gmu_core_regwrite(device, GEN8_GMUAO_RSCC_CONTROL_REQ, 0); - if (adreno_dev->lm_enabled) - gmu_core_regwrite(device, GEN8_GMUAO_AO_SPARE_CNTL, 0); - set_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags); return 0; @@ -702,7 +699,7 @@ int gen8_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); - u32 reg, reg1, reg2, reg3, reg4, reg5; + u32 reg, reg1, reg2, reg3, reg4; unsigned long t; u64 ts1, ts2; @@ -744,23 +741,16 @@ int gen8_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) gmu_core_regread(device, GEN8_GMUAO_GPU_CX_BUSY_STATUS, ®2); gmu_core_regread(device, GEN8_GMUAO_RBBM_INT_UNMASKED_STATUS_SHADOW, ®3); gmu_core_regread(device, GEN8_GMUCX_PWR_COL_KEEPALIVE, ®4); - gmu_core_regread(device, GEN8_GMUAO_AO_SPARE_CNTL, ®5); dev_err(&gmu->pdev->dev, "----------------------[ GMU error ]----------------------\n"); - dev_err(&gmu->pdev->dev, - "Timeout waiting for lowest idle level %s\n", + dev_err(&gmu->pdev->dev, "Timeout waiting for lowest idle level %s\n", idle_level_name(gmu->idle_level)); dev_err(&gmu->pdev->dev, "Start: %llx (absolute ticks)\n", ts1); - dev_err(&gmu->pdev->dev, "Poll: %llx (ticks relative to start)\n", - ts2-ts1); - dev_err(&gmu->pdev->dev, - "RPMH_POWER_STATE=%x GFX_PWR_CLK_STATUS=%x\n", reg, reg1); + dev_err(&gmu->pdev->dev, "Poll: %llx (ticks relative to start)\n", ts2-ts1); + dev_err(&gmu->pdev->dev, "RPMH_POWER_STATE=%x GFX_PWR_CLK_STATUS=%x\n", reg, reg1); dev_err(&gmu->pdev->dev, "CX_BUSY_STATUS=%x\n", reg2); - dev_err(&gmu->pdev->dev, - "RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n", - reg3, reg4); - dev_err(&gmu->pdev->dev, "GMUAO_AO_SPARE_CNTL=%x\n", reg5); + dev_err(&gmu->pdev->dev, "RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n", reg3, reg4); /* Access GX registers only when GX is ON */ if (is_on(reg1)) { diff --git a/adreno_gen8_hfi.c b/adreno_gen8_hfi.c index 14ca397360..d8532ca4ef 100644 --- a/adreno_gen8_hfi.c +++ b/adreno_gen8_hfi.c @@ -388,8 +388,6 @@ static const char *feature_to_string(u32 feature) { if (feature == HFI_FEATURE_ACD) return "ACD"; - else if (feature == HFI_FEATURE_LM) - return "LM"; return "unknown"; } diff --git a/gen7_reg.h b/gen7_reg.h index deaa2e2ec9..dc74e33c0d 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -1236,7 +1236,6 @@ #define GEN7_GMU_AHB_FENCE_STATUS 0x23b13 #define GEN7_GMU_AHB_FENCE_STATUS_CLR 0x23b14 #define GEN7_GMU_RBBM_INT_UNMASKED_STATUS 0x23b15 -#define GEN7_GMU_AO_SPARE_CNTL 0x23b16 #define GEN7_GPU_GMU_AO_GPU_LPAC_BUSY_STATUS 0x23b30 /* GMU RSC control registers */ diff --git a/gen8_reg.h b/gen8_reg.h index 4a16a5bf16..21f1b491b8 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -1577,7 +1577,6 @@ #define GEN8_GMUAO_AHB_FENCE_STATUS 0x23b13 #define GEN8_GMUAO_AHB_FENCE_STATUS_CLR 0x23b14 #define GEN8_GMUAO_RBBM_INT_UNMASKED_STATUS_SHADOW 0x23b15 -#define GEN8_GMUAO_AO_SPARE_CNTL 0x23b16 #define GEN8_GMUAO_LPAC_BUSY_STATUS 0x23b30 /* GMU countables */ From fdc0be68b8214a94e4f5490714f6723775292c5a Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 18 Apr 2024 00:43:21 +0530 Subject: [PATCH 0758/1016] kgsl: Simplify RBBM_PERFCTR_SRAM_INIT_CMD programming Currently, read-modify-write (RMW) PM4 packet is used to program RBBM_PERFCTR_SRAM_INIT_CMD register. However, the 'and' operation is always performed with 0, which has no effect. Thus, this can be simplified to a direct register write. Change-Id: Ia516510e6392a91466f0025741617d16c014186f Signed-off-by: Kamal Agrawal --- adreno_a6xx_preempt.c | 9 +++------ adreno_gen7_preempt.c | 6 ++---- adreno_gen8_preempt.c | 11 +++-------- 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/adreno_a6xx_preempt.c b/adreno_a6xx_preempt.c index 73217508e6..4a129967cc 100644 --- a/adreno_a6xx_preempt.c +++ b/adreno_a6xx_preempt.c @@ -724,17 +724,14 @@ int a6xx_preemption_init(struct adreno_device *adreno_dev) /* * First 28 dwords of the device scratch buffer are used to store shadow rb data. - * Reserve 11 dwords in the device scratch buffer from SCRATCH_POSTAMBLE_OFFSET for - * KMD postamble pm4 packets. This should be in *device->scratch* so that userspace - * cannot access it. + * Insert PM4 packets in device scratch buffer to clear perfcounters. This should + * be in *device->scratch* so that userspace cannot access it. */ if (!adreno_dev->perfcounter) { u32 *postamble = device->scratch->hostptr + SCRATCH_POSTAMBLE_OFFSET; u32 count = 0; - postamble[count++] = cp_type7_packet(CP_REG_RMW, 3); - postamble[count++] = A6XX_RBBM_PERFCTR_SRAM_INIT_CMD; - postamble[count++] = 0x0; + postamble[count++] = cp_type4_packet(A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); postamble[count++] = 0x1; postamble[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6); diff --git a/adreno_gen7_preempt.c b/adreno_gen7_preempt.c index 35c0348cf9..38cfbb5d27 100644 --- a/adreno_gen7_preempt.c +++ b/adreno_gen7_preempt.c @@ -482,11 +482,9 @@ void gen7_preemption_prepare_postamble(struct adreno_device *adreno_dev) */ preempt->postamble_bootup_len = count; - /* Reserve 11 dwords in the device scratch buffer to clear perfcounters */ + /* Insert PM4 packets in device scratch buffer to clear perfcounters */ if (!adreno_dev->perfcounter) { - postamble[count++] = cp_type7_packet(CP_REG_RMW, 3); - postamble[count++] = GEN7_RBBM_PERFCTR_SRAM_INIT_CMD; - postamble[count++] = 0x0; + postamble[count++] = cp_type4_packet(GEN7_RBBM_PERFCTR_SRAM_INIT_CMD, 1); postamble[count++] = 0x1; postamble[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6); diff --git a/adreno_gen8_preempt.c b/adreno_gen8_preempt.c index 96146bddf3..66a85da528 100644 --- a/adreno_gen8_preempt.c +++ b/adreno_gen8_preempt.c @@ -482,18 +482,13 @@ void gen8_preemption_prepare_postamble(struct adreno_device *adreno_dev) */ preempt->postamble_bootup_len = count; - /* Reserve 15 dwords in the device scratch buffer to clear perfcounters */ + /* Insert PM4 packets in device scratch buffer to clear perfcounters */ if (!adreno_dev->perfcounter) { - postamble[count++] = cp_type7_packet(CP_REG_RMW, 3); - postamble[count++] = GEN8_RBBM_PERFCTR_SRAM_INIT_CMD; - postamble[count++] = 0x0; + postamble[count++] = cp_type4_packet(GEN8_RBBM_PERFCTR_SRAM_INIT_CMD, 1); postamble[count++] = 0x1; - postamble[count++] = cp_type7_packet(CP_REG_RMW, 3); - postamble[count++] = GEN8_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD; - postamble[count++] = 0x0; + postamble[count++] = cp_type4_packet(GEN8_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1); postamble[count++] = 0x1; - postamble[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6); postamble[count++] = 0x3; postamble[count++] = GEN8_RBBM_PERFCTR_SRAM_INIT_STATUS; From a72cec65354b632446d0b00fe54dcfdb754f1490 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 13 Mar 2024 15:25:47 +0530 Subject: [PATCH 0759/1016] kgsl: hwsched: Avoid AHB fence polling Since AHB fence is always in allow mode for HWSCHED, there is no need to poll for it. Change-Id: I9fa385e9a13e066a69b5d8a614bc36acf96609d5 Signed-off-by: Kamal Agrawal --- adreno_a6xx.c | 30 +++++++++++++++++++++++++++++- adreno_gen7.c | 32 ++++++++++++++++++++++++++++++-- adreno_gen8.c | 30 +++++++++++++++++++++++++++++- 3 files changed, 88 insertions(+), 4 deletions(-) diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 7f26022d8d..e426972184 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -1849,6 +1849,34 @@ static int a6xx_irq_poll_fence(struct adreno_device *adreno_dev) return 0; } +static irqreturn_t a6xx_hwsched_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret = IRQ_NONE; + u32 status; + + /* + * On A6xx, the GPU can power down once the INT_0_STATUS is read + * below. But there still might be some register reads required + * so force the GMU/GPU into KEEPALIVE mode until done with the ISR. + */ + a6xx_gpu_keepalive(adreno_dev, true); + + kgsl_regread(device, A6XX_RBBM_INT_0_STATUS, &status); + + kgsl_regwrite(device, A6XX_RBBM_INT_CLEAR_CMD, status); + + ret = adreno_irq_callbacks(adreno_dev, a6xx_irq_funcs, status); + + trace_kgsl_a5xx_irq_status(adreno_dev, status); + + /* If hard fault, then let snapshot turn off the keepalive */ + if (!(adreno_gpu_fault(adreno_dev) & ADRENO_HARD_FAULT)) + a6xx_gpu_keepalive(adreno_dev, false); + + return ret; +} + static irqreturn_t a6xx_irq_handler(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -2335,7 +2363,7 @@ const struct a6xx_gpudev adreno_a6xx_hwsched_gpudev = { .reg_offsets = a6xx_register_offsets, .probe = a6xx_hwsched_probe, .snapshot = a6xx_hwsched_snapshot, - .irq_handler = a6xx_irq_handler, + .irq_handler = a6xx_hwsched_irq_handler, .iommu_fault_block = a6xx_iommu_fault_block, .context_detach = a6xx_hwsched_context_detach, .read_alwayson = a6xx_read_alwayson, diff --git a/adreno_gen7.c b/adreno_gen7.c index cef5d9c614..c1209416e3 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1587,6 +1587,34 @@ static int gen7_irq_poll_fence(struct adreno_device *adreno_dev) return 0; } +static irqreturn_t gen7_hwsched_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret = IRQ_NONE; + u32 status; + + /* + * GPU can power down once the INT_0_STATUS is read below. + * But there still might be some register reads required so + * force the GMU/GPU into KEEPALIVE mode until done with the ISR. + */ + gen7_gpu_keepalive(adreno_dev, true); + + kgsl_regread(device, GEN7_RBBM_INT_0_STATUS, &status); + + kgsl_regwrite(device, GEN7_RBBM_INT_CLEAR_CMD, status); + + ret = adreno_irq_callbacks(adreno_dev, gen7_irq_funcs, status); + + trace_kgsl_gen7_irq_status(adreno_dev, status); + + /* If hard fault, then let snapshot turn off the keepalive */ + if (!(adreno_gpu_fault(adreno_dev) & ADRENO_HARD_FAULT)) + gen7_gpu_keepalive(adreno_dev, false); + + return ret; +} + static irqreturn_t gen7_irq_handler(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -2188,7 +2216,7 @@ const struct gen7_gpudev adreno_gen7_9_0_hwsched_gpudev = { .reg_offsets = gen7_register_offsets, .probe = gen7_hwsched_probe, .snapshot = gen7_hwsched_snapshot, - .irq_handler = gen7_irq_handler, + .irq_handler = gen7_hwsched_irq_handler, .iommu_fault_block = gen7_iommu_fault_block, .context_detach = gen7_hwsched_context_detach, .read_alwayson = gen7_9_0_read_alwayson, @@ -2217,7 +2245,7 @@ const struct gen7_gpudev adreno_gen7_hwsched_gpudev = { .reg_offsets = gen7_register_offsets, .probe = gen7_hwsched_probe, .snapshot = gen7_hwsched_snapshot, - .irq_handler = gen7_irq_handler, + .irq_handler = gen7_hwsched_irq_handler, .iommu_fault_block = gen7_iommu_fault_block, .context_detach = gen7_hwsched_context_detach, .read_alwayson = gen7_read_alwayson, diff --git a/adreno_gen8.c b/adreno_gen8.c index 1aea41bb18..34000b4863 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -2036,6 +2036,34 @@ static int gen8_irq_poll_fence(struct adreno_device *adreno_dev) return 0; } +static irqreturn_t gen8_hwsched_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret = IRQ_NONE; + u32 status; + + /* + * GPU can power down once the INT_0_STATUS is read below. + * But there still might be some register reads required so + * force the GMU/GPU into KEEPALIVE mode until done with the ISR. + */ + gen8_gpu_keepalive(adreno_dev, true); + + kgsl_regread(device, GEN8_RBBM_INT_0_STATUS, &status); + + kgsl_regwrite(device, GEN8_RBBM_INT_CLEAR_CMD, status); + + ret = adreno_irq_callbacks(adreno_dev, gen8_irq_funcs, status); + + trace_kgsl_gen8_irq_status(adreno_dev, status); + + /* If hard fault, then let snapshot turn off the keepalive */ + if (!(adreno_gpu_fault(adreno_dev) & ADRENO_HARD_FAULT)) + gen8_gpu_keepalive(adreno_dev, false); + + return ret; +} + static irqreturn_t gen8_irq_handler(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -2717,7 +2745,7 @@ const struct gen8_gpudev adreno_gen8_hwsched_gpudev = { .reg_offsets = gen8_register_offsets, .probe = gen8_hwsched_probe, .snapshot = gen8_hwsched_snapshot, - .irq_handler = gen8_irq_handler, + .irq_handler = gen8_hwsched_irq_handler, .iommu_fault_block = gen8_iommu_fault_block, .context_detach = gen8_hwsched_context_detach, .read_alwayson = gen8_read_alwayson, From e9adea4b70cac90502e32e8743af4378ec7c8efe Mon Sep 17 00:00:00 2001 From: NISARG SHETH Date: Thu, 18 Apr 2024 11:18:33 +0530 Subject: [PATCH 0760/1016] kgsl: hwsched: Process message queue only when mask is enabled Currently message queue will be processed even if the message queue mask is disabled and there is some data present in debug queue. Process message queue only when the message queue is enabled for async communication. Change-Id: If1d09682c9f5dfaab47a1d6acdb26355e7cbf6b3 Signed-off-by: NISARG SHETH --- adreno_a6xx_hwsched_hfi.c | 3 +++ adreno_gen7_hwsched_hfi.c | 3 +++ adreno_gen8_hwsched_hfi.c | 3 +++ 3 files changed, 9 insertions(+) diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 563abcab43..b04c3d2985 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -466,6 +466,9 @@ static void a6xx_hwsched_process_msgq(struct adreno_device *adreno_dev) struct a6xx_hwsched_hfi *hw_hfi = to_a6xx_hwsched_hfi(adreno_dev); u32 rcvd[MAX_RCVD_SIZE], next_hdr; + if (!(hw_hfi->irq_mask & HFI_IRQ_MSGQ_MASK)) + return; + mutex_lock(&hw_hfi->msgq_mutex); for (;;) { diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 7f3bbfae7e..b8603eb29e 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1233,6 +1233,9 @@ void gen7_hwsched_process_msgq(struct adreno_device *adreno_dev) struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); u32 rcvd[MAX_RCVD_SIZE], next_hdr, type; + if (!(hw_hfi->irq_mask & HFI_IRQ_MSGQ_MASK)) + return; + mutex_lock(&hw_hfi->msgq_mutex); for (;;) { diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index daffd1e098..ceb0837109 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -1224,6 +1224,9 @@ void gen8_hwsched_process_msgq(struct adreno_device *adreno_dev) struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); u32 rcvd[MAX_RCVD_SIZE], next_hdr, type; + if (!(hw_hfi->irq_mask & HFI_IRQ_MSGQ_MASK)) + return; + mutex_lock(&hw_hfi->msgq_mutex); for (;;) { From 6ea6b0c76a70de18646a46b9177ba2df25389f32 Mon Sep 17 00:00:00 2001 From: Piyush Mehta Date: Fri, 19 Apr 2024 17:18:40 +0530 Subject: [PATCH 0761/1016] kgsl: gen7: Add zap file name extension for gen7_6_0 GPU Add zap file name extension 'mbn' for gen7_6_0 GPU. Change-Id: I1f1e50fc19de5e03fce283a139b587f7430fd5d5 Signed-off-by: Piyush Mehta --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 5e2efbddfd..6c8cb4f67e 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2254,7 +2254,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_6_0 = { .gmu_fw_version = GMU_VERSION(4, 1, 9), .sqefw_name = "a740v3_sqe.fw", .gmufw_name = "gmu_gen70200.bin", - .zap_name = "a740v3_zap", + .zap_name = "a740v3_zap.mbn", .hwcg = gen7_2_0_hwcg_regs, .hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs), .ao_hwcg = gen7_2_0_ao_hwcg_regs, From 0f5818232f1ed62afb8600f59f9bd3b224c4f330 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 14 Apr 2024 18:54:17 +0530 Subject: [PATCH 0762/1016] kgsl: gen8: Add TPL1_BICUBIC_WEIGHTS_TABLE_19 to powerup reglist TPL1_BICUBIC_WEIGHTS_TABLE_19 register is not retained across IFPC. Add this register to static IFPC power-up register list so that it is restored across IFPC boundary. Change-Id: Icaa13cfd2368b82e1bac29dd89da090341c893e0 Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 1 + 1 file changed, 1 insertion(+) diff --git a/adreno_gen8.c b/adreno_gen8.c index e2255c84e2..7b0f1a6371 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -81,6 +81,7 @@ static const u32 gen8_ifpc_pwrup_reglist[] = { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_16, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_17, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_18, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_19, GEN8_CP_PROTECT_REG_GLOBAL, GEN8_CP_PROTECT_REG_GLOBAL + 1, GEN8_CP_PROTECT_REG_GLOBAL + 2, From 468da5c03fee61ec6ad919d7834a6d56d238953d Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Wed, 10 Apr 2024 12:32:31 -0700 Subject: [PATCH 0763/1016] kgsl: gen8: Add support for Gen8_0_1 Gen8_0_1 has a different compatible string. Add a new core structure to support Gen8_0_1. Change-Id: Ic95897e903b30cc5ed9c5cd311a964d699e4aca2 Signed-off-by: Hareesh Gundu Signed-off-by: Bruce Levy --- adreno-gpulist.h | 38 ++++++++++++++++++++++++++++++++++++++ adreno.h | 1 + 2 files changed, 39 insertions(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 9f963e2902..961d4b2367 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2743,6 +2743,43 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .noc_timeout_us = 3410, /* 3.41 msec */ }; +static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN8_0_1, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen8-0-1", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | + ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_BCL | + ADRENO_IFPC | ADRENO_HW_FENCE | ADRENO_PREEMPTION | + ADRENO_ACD, + .gpudev = &adreno_gen8_hwsched_gpudev.base, + .perfcounters = &adreno_gen8_perfcounters, + .uche_gmem_alignment = SZ_64M, + .gmem_size = 12 * SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_8M, + }, + .aqefw_name = "gen80000_aqe.fw", + .sqefw_name = "gen80000_sqe.fw", + .gmufw_name = "gen80000_gmu.bin", + .zap_name = "gen80000_zap.mbn", + .ao_hwcg = gen8_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen8_ao_hwcg_regs), + .gbif = gen8_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen8_0_0_gbif_cx_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen8_0_0_protected_regs, + .nonctxt_regs = gen8_0_0_nonctxt_regs, + .highest_bank_bit = 16, + .gmu_hub_clk_freq = 200000000, + .gen8_snapshot_block_list = &gen8_0_0_snapshot_block_list, + .fast_bus_hint = true, + .bcl_data = 1, + .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), + .noc_timeout_us = 3410, /* 3.41 msec */ +}; + /* GEN8_4_0 noncontext register list */ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { { GEN8_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) }, @@ -2894,5 +2931,6 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen7_14_0.base, &adreno_gpu_core_gen7_11_0.base, &adreno_gpu_core_gen8_0_0.base, + &adreno_gpu_core_gen8_0_1.base, &adreno_gpu_core_gen8_4_0.base, }; diff --git a/adreno.h b/adreno.h index 70351591a2..d80abfa959 100644 --- a/adreno.h +++ b/adreno.h @@ -246,6 +246,7 @@ enum adreno_gpurev { ADRENO_REV_GEN7_14_0 = ADRENO_GPUREV_VALUE(7, 14, 0), ADRENO_REV_GEN7_11_0 = ADRENO_GPUREV_VALUE(7, 11, 0), ADRENO_REV_GEN8_0_0 = ADRENO_GPUREV_VALUE(8, 0, 0), + ADRENO_REV_GEN8_0_1 = ADRENO_GPUREV_VALUE(8, 0, 1), ADRENO_REV_GEN8_4_0 = ADRENO_GPUREV_VALUE(8, 4, 0), }; From 7eef2594d57d1ef03e06bfaba9da6c48e583040b Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 3 Mar 2024 21:01:34 +0530 Subject: [PATCH 0764/1016] kgsl: gen8: Remove DMS feature support Dynamic Mode Switch (DMS) feature is not applicable to the gen8 platforms. Consequently, remove the support from driver. Change-Id: Iffb878f68480bd4337098e05f5921613d72f4825 Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 4 ---- adreno_gen8_gmu.c | 5 ----- adreno_gen8_hwsched.c | 5 ----- adreno_gen8_hwsched_hfi.c | 23 ----------------------- 4 files changed, 37 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 6650587cca..da51d31c78 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -388,10 +388,6 @@ void gen8_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds) /* Enable the register init list with the spinlock */ mask |= BIT(8); - /* By default DMS is enabled from CP side, disable it if not supported */ - if (!adreno_dev->dms_enabled) - mask |= BIT(11); - cmds[i++] = cp_type7_packet(CP_ME_INIT, 7); /* Enabled ordinal mask */ diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index a45d1f9619..4b8fe92599 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -3166,11 +3166,6 @@ int gen8_gmu_device_probe(struct platform_device *pdev, timer_setup(&device->idle_timer, gmu_idle_timer, 0); - if (ADRENO_FEATURE(adreno_dev, ADRENO_DMS)) { - set_bit(ADRENO_DEVICE_DMS, &adreno_dev->priv); - adreno_dev->dms_enabled = true; - } - return 0; } diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 5f29f24873..a96b161f7a 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -1911,11 +1911,6 @@ int gen8_hwsched_probe(struct platform_device *pdev, if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) adreno_dev->lpac_enabled = true; - if (ADRENO_FEATURE(adreno_dev, ADRENO_DMS)) { - set_bit(ADRENO_DEVICE_DMS, &adreno_dev->priv); - adreno_dev->dms_enabled = true; - } - kgsl_mmu_set_feature(device, KGSL_MMU_PAGEFAULT_TERMINATE); if (ADRENO_FEATURE(adreno_dev, ADRENO_HW_FENCE)) diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 270f5d5a25..9cadd6efa5 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -2065,25 +2065,6 @@ static int gen8_hfi_send_hw_fence_feature_ctrl(struct adreno_device *adreno_dev) return ret; } -static int gen8_hfi_send_dms_feature_ctrl(struct adreno_device *adreno_dev) -{ - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - int ret; - - if (!test_bit(ADRENO_DEVICE_DMS, &adreno_dev->priv)) - return 0; - - ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_DMS, 1, 0); - if (ret == -ENOENT) { - dev_err(&gmu->pdev->dev, "GMU doesn't support DMS feature\n"); - clear_bit(ADRENO_DEVICE_DMS, &adreno_dev->priv); - adreno_dev->dms_enabled = false; - return 0; - } - - return ret; -} - static void gen8_spin_idle_debug_lpac(struct adreno_device *adreno_dev, const char *str) { @@ -2396,10 +2377,6 @@ int gen8_hwsched_hfi_start(struct adreno_device *adreno_dev) if (ret) goto err; - ret = gen8_hfi_send_dms_feature_ctrl(adreno_dev); - if (ret) - goto err; - /* Enable the long ib timeout detection */ if (adreno_long_ib_detect(adreno_dev)) { ret = gen8_hfi_send_feature_ctrl(adreno_dev, From 7fd7d6f5d33cde540c369fbdcc5392ca8e2004bb Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 25 Mar 2024 11:57:38 -0700 Subject: [PATCH 0765/1016] kgsl: gen8: Make gen8_start() register access sequential Linux kernel IO accessor relaxed memory access routines mandates memory barriers to enforce ordering which is not a performance efficient in critical paths such as GPU bootup. On ARM architecture readl/writel() translate into a full read/write synchronization barrier which stop execution until all previous writes/reads are done. So, use readl/writel() variants in gen8_start() register access to remove additional overhead with the memory barriers for each register access. Change-Id: Ie26d232ec8b4588b96cbc27712b24d6231171e88 Signed-off-by: Hareesh Gundu --- adreno_gen8.c | 17 +++++++++++++++++ kgsl_regmap.c | 38 +++++++++++++++++++++++++------------- kgsl_regmap.h | 2 ++ 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index da51d31c78..705032129d 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1043,10 +1043,20 @@ int gen8_start(struct adreno_device *adreno_dev) struct cpu_gpu_lock *pwrup_lock = adreno_dev->pwrup_reglist->hostptr; u64 uche_trap_base = gen8_get_uche_trap_base(); u32 rgba8888_lossless = 0; + int is_current_rt = rt_task(current); /* Reset aperture fields to go through first aperture write check */ gen8_dev->aperture = UINT_MAX; + /* + * Elevating the thread’s priority to FIFO to ensure sequential register access + * on the same CPU, avoiding context switches to a different CPU or thread. + */ + if (!is_current_rt) + sched_set_fifo(current); + + device->regmap.use_relaxed = false; + /* Make all blocks contribute to the GPU BUSY perf counter */ kgsl_regwrite(device, GEN8_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); @@ -1239,6 +1249,13 @@ int gen8_start(struct adreno_device *adreno_dev) adreno_dev->patch_reglist = true; } + /* Ensure very last register write is finished before we return from this function */ + mb(); + device->regmap.use_relaxed = true; + + if (!is_current_rt) + sched_set_normal(current, 0); + return 0; } diff --git a/kgsl_regmap.c b/kgsl_regmap.c index 6fcf305b95..c488d9eb5d 100644 --- a/kgsl_regmap.c +++ b/kgsl_regmap.c @@ -121,10 +121,13 @@ u32 kgsl_regmap_read(struct kgsl_regmap *regmap, u32 offset) if (region->ops && region->ops->preaccess) region->ops->preaccess(region); - val = readl_relaxed(region_addr(region, offset)); - /* Allow previous read to post before returning the value */ - rmb(); - + if (regmap->use_relaxed) { + val = readl_relaxed(region_addr(region, offset)); + /* Allow previous read to post before returning the value */ + rmb(); + } else { + val = readl(region_addr(region, offset)); + } return val; } @@ -138,10 +141,13 @@ void kgsl_regmap_write(struct kgsl_regmap *regmap, u32 value, u32 offset) if (region->ops && region->ops->preaccess) region->ops->preaccess(region); - /* Make sure all pending writes have posted first */ - wmb(); - writel_relaxed(value, region_addr(region, offset)); - + if (regmap->use_relaxed) { + /* Make sure all pending writes have posted first */ + wmb(); + writel_relaxed(value, region_addr(region, offset)); + } else { + writel(value, region_addr(region, offset)); + } trace_kgsl_regwrite(offset, value); } @@ -197,11 +203,17 @@ void kgsl_regmap_rmw(struct kgsl_regmap *regmap, u32 offset, u32 mask, if (region->ops && region->ops->preaccess) region->ops->preaccess(region); - val = readl_relaxed(region_addr(region, offset)); - /* Make sure the read posted and all pending writes are done */ - mb(); - writel_relaxed((val & ~mask) | or, region_addr(region, offset)); - + if (regmap->use_relaxed) { + val = readl_relaxed(region_addr(region, offset)); + /* Make sure the read posted and all pending writes are done */ + mb(); + writel_relaxed((val & ~mask) | or, region_addr(region, offset)); + } else { + val = readl(region_addr(region, offset)); + /* Make sure the read posted and all pending writes are done */ + mb(); + writel((val & ~mask) | or, region_addr(region, offset)); + } trace_kgsl_regwrite(offset, (val & ~mask) | or); } diff --git a/kgsl_regmap.h b/kgsl_regmap.h index efb6716a23..7121c976d2 100644 --- a/kgsl_regmap.h +++ b/kgsl_regmap.h @@ -52,6 +52,8 @@ struct kgsl_regmap { struct kgsl_regmap_region region[5]; /** @count: Number of active regions in @region */ int count; + /** @use_relaxed: To use relaxed variant of IO access API */ + bool use_relaxed; }; /** From c71440612a3977aa68375f68b29aede11516ee65 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Wed, 27 Mar 2024 15:07:35 -0700 Subject: [PATCH 0766/1016] kgsl: gen8: Update GPU hard reset sequence Update GPU hard reset sequence to improve robustness. Following changes are made in reset sequence. 1) Removes GPU GX register access including RBBM_SW_RESET. 2) Triggers GPU GX, MX shutdown from RSCC. Change-Id: Ib81188d336c3613b9891cd8b1c1e2724d7203f38 Signed-off-by: Hareesh Gundu --- adreno_gen8_gmu.c | 38 ++++++++++---------------------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 4b8fe92599..463b36a997 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1391,42 +1391,24 @@ static void gen8_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) /* Make sure above writes are committed before we proceed to recovery */ wmb(); - gmu_core_regwrite(device, GEN8_GMUCX_CM3_SYSRESET, 1); - - /* Halt GX traffic */ - if (gen8_gmu_gx_is_on(adreno_dev)) - _do_gbif_halt(device, GEN8_RBBM_GBIF_HALT, - GEN8_RBBM_GBIF_HALT_ACK, - GEN8_GBIF_GX_HALT_MASK, - "GX"); - /* Halt CX traffic */ _do_gbif_halt(device, GEN8_GBIF_HALT, GEN8_GBIF_HALT_ACK, GEN8_GBIF_ARB_HALT_MASK, "CX"); - if (gen8_gmu_gx_is_on(adreno_dev)) - kgsl_regwrite(device, GEN8_RBBM_SW_RESET_CMD, 0x1); - - /* Make sure above writes are posted before turning off power resources */ - wmb(); - - /* Allow the software reset to complete */ - udelay(100); - - /* - * This is based on the assumption that GMU is the only one controlling - * the GX HS. This code path is the only client voting for GX from linux - * kernel. - */ - if (!gen8_gmu_gx_is_on(adreno_dev)) - return; - /* * Switch gx gdsc control from GMU to CPU force non-zero reference * count in clk driver so next disable call will turn off the GDSC */ - kgsl_pwrctrl_enable_gx_gdsc(device); - kgsl_pwrctrl_disable_gx_gdsc(device); + if (gen8_gmu_gx_is_on(adreno_dev)) { + kgsl_pwrctrl_enable_gx_gdsc(device); + kgsl_pwrctrl_disable_gx_gdsc(device); + } + + /* + * Trigger RSC slumber sequence to turn off GMU controlled domains + * (GX, MXC) and remove GPU bus votes + */ + gen8_rscc_sleep_sequence(adreno_dev); if (gen8_gmu_gx_is_on(adreno_dev)) dev_err(&gmu->pdev->dev, "gx is stuck on\n"); From d25a909a9e7135faf40740b1e61150ef3baf09c3 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 26 Apr 2024 12:07:33 +0530 Subject: [PATCH 0767/1016] kgsl: gen8: Remove SP_DBG_CNTL register programming Remove SP_DBG_CNTL register programming from gen8 as per latest recommendation. Change-Id: I8a0892589608d570d6757146abc7fdbf3b68a929 Signed-off-by: Kamal Agrawal --- adreno_gen8_snapshot.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index a96d7bb67e..a7fd755c44 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -598,8 +598,6 @@ static void gen8_snapshot_shader(struct kgsl_device *device, size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) = gen8_legacy_snapshot_shader; - kgsl_regrmw(device, GEN8_SP_DBG_CNTL, GENMASK(1, 0), 3); - if (CD_SCRIPT_CHECK(device)) { for (i = 0; i < num_shader_blocks; i++) { struct gen8_shader_block *block = &shader_blocks[i]; @@ -623,7 +621,7 @@ static void gen8_snapshot_shader(struct kgsl_device *device, } } - goto done; + return; } for (i = 0; i < num_shader_blocks; i++) { @@ -674,9 +672,6 @@ static void gen8_snapshot_shader(struct kgsl_device *device, } } } - -done: - kgsl_regrmw(device, GEN8_SP_DBG_CNTL, GENMASK(1, 0), 0x0); } static void gen8_rmw_aperture(struct kgsl_device *device, From ca4e909a0f45694f6d08ac19beacf4702b7bf12f Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Wed, 24 Apr 2024 19:05:00 -0700 Subject: [PATCH 0768/1016] kgsl: snapshot: Use kstrtobool() instead of strtobool() The strtobool() kernel API was removed in kernel commit 9bf2850c9170 ("kstrtox: remove strtobool()"). Remove all uses of this API and instead use the replacement kstrtobool(). Change-Id: I181416507b40e28ade08b43da6e5d7871e490bb6 Signed-off-by: Lynus Vaz --- kgsl_snapshot.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/kgsl_snapshot.c b/kgsl_snapshot.c index 6f60535bf9..1fe9bcb033 100644 --- a/kgsl_snapshot.c +++ b/kgsl_snapshot.c @@ -922,9 +922,10 @@ static ssize_t force_panic_show(struct kgsl_device *device, char *buf) static ssize_t force_panic_store(struct kgsl_device *device, const char *buf, size_t count) { - if (strtobool(buf, &device->force_panic)) - return -EINVAL; - return count; + int ret; + + ret = kstrtobool(buf, &device->force_panic); + return ret ? ret : count; } /* Show the break_ib request status */ @@ -955,10 +956,10 @@ static ssize_t prioritize_unrecoverable_show( static ssize_t prioritize_unrecoverable_store( struct kgsl_device *device, const char *buf, size_t count) { - if (strtobool(buf, &device->prioritize_unrecoverable)) - return -EINVAL; + int ret; - return count; + ret = kstrtobool(buf, &device->prioritize_unrecoverable); + return ret ? ret : count; } /* Show the snapshot_crashdumper request status */ @@ -972,9 +973,10 @@ static ssize_t snapshot_crashdumper_show(struct kgsl_device *device, char *buf) static ssize_t snapshot_crashdumper_store(struct kgsl_device *device, const char *buf, size_t count) { - if (strtobool(buf, &device->snapshot_crashdumper)) - return -EINVAL; - return count; + int ret; + + ret = kstrtobool(buf, &device->snapshot_crashdumper); + return ret ? ret : count; } /* Show the timestamp of the last collected snapshot */ @@ -996,10 +998,10 @@ static ssize_t snapshot_legacy_show(struct kgsl_device *device, char *buf) static ssize_t snapshot_legacy_store(struct kgsl_device *device, const char *buf, size_t count) { - if (strtobool(buf, &device->snapshot_legacy)) - return -EINVAL; + int ret; - return count; + ret = kstrtobool(buf, &device->snapshot_legacy); + return ret ? ret : count; } static struct bin_attribute snapshot_attr = { From cfe6a30b8aaa9d2d32829d40d7d8c2decd0a00e1 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Wed, 24 Apr 2024 19:07:17 -0700 Subject: [PATCH 0769/1016] kgsl: Update the use of the shrinker API The shrinker API was modified in kernel commit c42d50aefd17 ("mm: shrinker: add infrastructure for dynamically allocating shrinker"). Update the pool and reclaim code to use the new shrinker API where available. Change-Id: Ica48d856e440d4a773da14cd7fa1b1b3a6b722f4 Signed-off-by: Lynus Vaz --- kgsl.h | 4 +++ kgsl_pool.c | 66 ++++++++++++++++++++++++++++++++++----------- kgsl_reclaim.c | 72 +++++++++++++++++++++++++++++++++++++------------- 3 files changed, 109 insertions(+), 33 deletions(-) diff --git a/kgsl.h b/kgsl.h index adf762c5ad..be8fe948ab 100644 --- a/kgsl.h +++ b/kgsl.h @@ -204,6 +204,10 @@ struct kgsl_driver { struct workqueue_struct *workqueue; /* @lockless_workqueue: Pointer to a workqueue handler which doesn't hold device mutex */ struct workqueue_struct *lockless_workqueue; + /** @pool_shrinker: Pointer to a shrinker that resizes the kgsl page pools */ + struct shrinker *pool_shrinker; + /** @reclaim_shrinker: Pointer to a shrinker that reclaims kgsl memory */ + struct shrinker *reclaim_shrinker; }; extern struct kgsl_driver kgsl_driver; diff --git a/kgsl_pool.c b/kgsl_pool.c index 68ce783d41..c17bf2251b 100644 --- a/kgsl_pool.c +++ b/kgsl_pool.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -621,14 +621,6 @@ kgsl_pool_shrink_count_objects(struct shrinker *shrinker, return kgsl_pool_size_nonreserved(); } -/* Shrinker callback data*/ -static struct shrinker kgsl_pool_shrinker = { - .count_objects = kgsl_pool_shrink_count_objects, - .scan_objects = kgsl_pool_shrink_scan_objects, - .seeks = DEFAULT_SEEKS, - .batch = 0, -}; - int kgsl_pool_reserved_get(void *data, u64 *val) { struct kgsl_page_pool *pool = data; @@ -710,6 +702,54 @@ static int kgsl_of_parse_mempool(struct kgsl_page_pool *pool, return 0; } +#if (KERNEL_VERSION(6, 7, 0) <= LINUX_VERSION_CODE) +static void kgsl_pool_shrinker_init(void) +{ + kgsl_driver.pool_shrinker = shrinker_alloc(0, "kgsl_pool_shrinker"); + + if (!kgsl_driver.pool_shrinker) + return; + + kgsl_driver.pool_shrinker->count_objects = kgsl_pool_shrink_count_objects; + kgsl_driver.pool_shrinker->scan_objects = kgsl_pool_shrink_scan_objects; + kgsl_driver.pool_shrinker->seeks = DEFAULT_SEEKS; + kgsl_driver.pool_shrinker->batch = 0; + + shrinker_register(kgsl_driver.pool_shrinker); +} + +static void kgsl_pool_shrinker_close(void) +{ + if (kgsl_driver.pool_shrinker) + shrinker_free(kgsl_driver.pool_shrinker); + + kgsl_driver.pool_shrinker = NULL; +} +#else +/* Shrinker callback data*/ +static struct shrinker kgsl_pool_shrinker = { + .count_objects = kgsl_pool_shrink_count_objects, + .scan_objects = kgsl_pool_shrink_scan_objects, + .seeks = DEFAULT_SEEKS, + .batch = 0, +}; + +static void kgsl_pool_shrinker_init(void) +{ + kgsl_driver.pool_shrinker = &kgsl_pool_shrinker; +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + register_shrinker(kgsl_driver.pool_shrinker, "kgsl_pool_shrinker"); +#else + register_shrinker(kgsl_driver.pool_shrinker); +#endif +} + +static void kgsl_pool_shrinker_close(void) +{ + unregister_shrinker(kgsl_driver.pool_shrinker); +} +#endif + void kgsl_probe_page_pools(void) { struct device_node *node, *child; @@ -739,11 +779,7 @@ void kgsl_probe_page_pools(void) of_node_put(node); /* Initialize shrinker */ -#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) - register_shrinker(&kgsl_pool_shrinker, "kgsl_pool_shrinker"); -#else - register_shrinker(&kgsl_pool_shrinker); -#endif + kgsl_pool_shrinker_init(); } void kgsl_exit_page_pools(void) @@ -754,7 +790,7 @@ void kgsl_exit_page_pools(void) kgsl_pool_reduce(INT_MAX, true); /* Unregister shrinker */ - unregister_shrinker(&kgsl_pool_shrinker); + kgsl_pool_shrinker_close(); /* Destroy helper structures */ for (i = 0; i < kgsl_num_pools; i++) diff --git a/kgsl_reclaim.c b/kgsl_reclaim.c index 6aca51e1e0..7ced752868 100644 --- a/kgsl_reclaim.c +++ b/kgsl_reclaim.c @@ -468,14 +468,6 @@ kgsl_reclaim_shrink_count_objects(struct shrinker *shrinker, return count_reclaimable; } -/* Shrinker callback data*/ -static struct shrinker kgsl_reclaim_shrinker = { - .count_objects = kgsl_reclaim_shrink_count_objects, - .scan_objects = kgsl_reclaim_shrink_scan_objects, - .seeks = DEFAULT_SEEKS, - .batch = 0, -}; - void kgsl_reclaim_proc_private_init(struct kgsl_process_private *process) { mutex_init(&process->reclaim_lock); @@ -485,28 +477,74 @@ void kgsl_reclaim_proc_private_init(struct kgsl_process_private *process) atomic_set(&process->unpinned_page_count, 0); } -int kgsl_reclaim_start(void) +#if (KERNEL_VERSION(6, 7, 0) <= LINUX_VERSION_CODE) +static int kgsl_reclaim_shrinker_init(void) +{ + kgsl_driver.reclaim_shrinker = shrinker_alloc(0, "kgsl_reclaim_shrinker"); + + if (!kgsl_driver.reclaim_shrinker) + return -ENOMEM; + + /* Initialize shrinker */ + kgsl_driver.reclaim_shrinker->count_objects = kgsl_reclaim_shrink_count_objects; + kgsl_driver.reclaim_shrinker->scan_objects = kgsl_reclaim_shrink_scan_objects; + kgsl_driver.reclaim_shrinker->seeks = DEFAULT_SEEKS; + kgsl_driver.reclaim_shrinker->batch = 0; + + shrinker_register(kgsl_driver.reclaim_shrinker); + return 0; +} + +static void kgsl_reclaim_shrinker_close(void) +{ + if (kgsl_driver.reclaim_shrinker) + shrinker_free(kgsl_driver.reclaim_shrinker); + + kgsl_driver.reclaim_shrinker = NULL; +} +#else +/* Shrinker callback data*/ +static struct shrinker kgsl_reclaim_shrinker = { + .count_objects = kgsl_reclaim_shrink_count_objects, + .scan_objects = kgsl_reclaim_shrink_scan_objects, + .seeks = DEFAULT_SEEKS, + .batch = 0, +}; + +static int kgsl_reclaim_shrinker_init(void) { int ret; + kgsl_driver.reclaim_shrinker = &kgsl_reclaim_shrinker; + /* Initialize shrinker */ #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) - ret = register_shrinker(&kgsl_reclaim_shrinker, "kgsl_reclaim_shrinker"); + ret = register_shrinker(kgsl_driver.reclaim_shrinker, "kgsl_reclaim_shrinker"); #else - ret = register_shrinker(&kgsl_reclaim_shrinker); + ret = register_shrinker(kgsl_driver.reclaim_shrinker); #endif - if (ret) - pr_err("kgsl: reclaim: Failed to register shrinker\n"); - return ret; } +static void kgsl_reclaim_shrinker_close(void) +{ + unregister_shrinker(kgsl_driver.reclaim_shrinker); +} +#endif + +int kgsl_reclaim_start(void) +{ + return kgsl_reclaim_shrinker_init(); +} + int kgsl_reclaim_init(void) { int ret = kgsl_reclaim_start(); - if (ret) + if (ret) { + pr_err("kgsl: reclaim: Failed to register shrinker\n"); return ret; + } INIT_WORK(&reclaim_work, kgsl_reclaim_background_work); @@ -515,8 +553,6 @@ int kgsl_reclaim_init(void) void kgsl_reclaim_close(void) { - /* Unregister shrinker */ - unregister_shrinker(&kgsl_reclaim_shrinker); - + kgsl_reclaim_shrinker_close(); cancel_work_sync(&reclaim_work); } From 2ff9c0a855eef670f2360b693816f379d1c4c118 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 18 Apr 2024 16:24:04 +0530 Subject: [PATCH 0770/1016] kgsl: hwsched: Inspect MSGQ upon HFI ack timeout for context unregister It is possible that due to HLOS latency issues or the hfi interrupt handler didn't run or the f2h daemon didn't run in expected time. Hence, before timing out, inspect MSGQ one last time to see if the ack came or not. If the ack is present, log an error and move on. Change-Id: I6163681d1bc9d8cf5ea1ff0317fc4a95fcac4908 Signed-off-by: Pankaj Gupta --- adreno_a6xx_hwsched_hfi.c | 82 +++++++++++++++++++-------------------- adreno_gen7_hwsched_hfi.c | 71 +++++++++++++++------------------ adreno_gen8_hwsched_hfi.c | 71 +++++++++++++++------------------ adreno_hfi.h | 21 ++++++++++ adreno_hwsched.c | 43 ++++++++++++++++++++ 5 files changed, 168 insertions(+), 120 deletions(-) diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 563abcab43..cd7ad6b3ee 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -2016,82 +2016,80 @@ int a6xx_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, return ret; } +static void trigger_context_unregister_fault(struct adreno_device *adreno_dev, + struct kgsl_context *context) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gmu_core_fault_snapshot(device, GMU_FAULT_CTX_UNREGISTER); + + /* + * Trigger dispatcher based reset and recovery. Invalidate the + * context so that any un-finished inflight submissions are not + * replayed after recovery. + */ + adreno_drawctxt_set_guilty(device, context); + adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); +} + static int send_context_unregister_hfi(struct adreno_device *adreno_dev, struct kgsl_context *context, u32 ts) { - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); struct pending_cmd pending_ack; struct hfi_unregister_ctxt_cmd cmd; u32 seqnum; - int rc; + int ret; /* Only send HFI if device is not in SLUMBER */ if (!context->gmu_registered || !test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) return 0; - rc = CMD_MSG_HDR(cmd, H2F_MSG_UNREGISTER_CONTEXT); - if (rc) - return rc; + ret = CMD_MSG_HDR(cmd, H2F_MSG_UNREGISTER_CONTEXT); + if (ret) + return ret; cmd.ctxt_id = context->id, cmd.ts = ts, - seqnum = atomic_inc_return(&gmu->hfi.seqnum); - cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); - - add_waiter(hfi, cmd.hdr, &pending_ack); - /* * Although we know device is powered on, we can still enter SLUMBER * because the wait for ack below is done without holding the mutex. So * take an active count before releasing the mutex so as to avoid a * concurrent SLUMBER sequence while GMU is un-registering this context. */ - a6xx_hwsched_active_count_get(adreno_dev); + ret = a6xx_hwsched_active_count_get(adreno_dev); + if (ret) { + trigger_context_unregister_fault(adreno_dev, context); + return ret; + } - rc = a6xx_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd)); - if (rc) - goto done; - - mutex_unlock(&device->mutex); - - rc = wait_for_completion_timeout(&pending_ack.complete, - msecs_to_jiffies(30 * 1000)); - if (!rc) { - dev_err(&gmu->pdev->dev, - "Ack timeout for context unregister seq: %d ctx: %u ts: %u\n", - MSG_HDR_GET_SEQNUM(pending_ack.sent_hdr), - context->id, ts); - rc = -ETIMEDOUT; - - mutex_lock(&device->mutex); - - gmu_core_fault_snapshot(device, GMU_FAULT_CTX_UNREGISTER); - - /* - * Trigger dispatcher based reset and recovery. Invalidate the - * context so that any un-finished inflight submissions are not - * replayed after recovery. - */ - adreno_drawctxt_set_guilty(device, context); - - adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); + add_waiter(hfi, cmd.hdr, &pending_ack); + ret = a6xx_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd)); + if (ret) { + trigger_context_unregister_fault(adreno_dev, context); goto done; } - mutex_lock(&device->mutex); + ret = adreno_hwsched_ctxt_unregister_wait_completion(adreno_dev, + &gmu->pdev->dev, &pending_ack, a6xx_hwsched_process_msgq, &cmd); + if (ret) { + trigger_context_unregister_fault(adreno_dev, context); + goto done; + } + + ret = check_ack_failure(adreno_dev, &pending_ack); - rc = check_ack_failure(adreno_dev, &pending_ack); done: a6xx_hwsched_active_count_put(adreno_dev); - del_waiter(hfi, &pending_ack); - return rc; + return ret; } void a6xx_hwsched_context_detach(struct adreno_context *drawctxt) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 7f3bbfae7e..91eab0120a 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -3922,17 +3922,26 @@ int gen7_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev, return ret; } +static void trigger_context_unregister_fault(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev), GMU_FAULT_CTX_UNREGISTER); + + /* Make sure we send all fences from this context to the TxQueue after recovery */ + move_detached_context_hardware_fences(adreno_dev, drawctxt); + gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); +} + static int send_context_unregister_hfi(struct adreno_device *adreno_dev, struct kgsl_context *context, u32 ts) { - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct adreno_context *drawctxt = ADRENO_CONTEXT(context); struct pending_cmd pending_ack; struct hfi_unregister_ctxt_cmd cmd; u32 seqnum; - int rc, ret; + int ret; /* Only send HFI if device is not in SLUMBER */ if (!context->gmu_registered || @@ -3948,60 +3957,44 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, cmd.ctxt_id = context->id, cmd.ts = ts, - seqnum = atomic_inc_return(&gmu->hfi.seqnum); - cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); - - add_waiter(hfi, cmd.hdr, &pending_ack); - /* * Although we know device is powered on, we can still enter SLUMBER * because the wait for ack below is done without holding the mutex. So * take an active count before releasing the mutex so as to avoid a * concurrent SLUMBER sequence while GMU is un-registering this context. */ - gen7_hwsched_active_count_get(adreno_dev); + ret = gen7_hwsched_active_count_get(adreno_dev); + if (ret) { + trigger_context_unregister_fault(adreno_dev, drawctxt); + return ret; + } - rc = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd)); - if (rc) - goto done; - - mutex_unlock(&device->mutex); - - rc = wait_for_completion_timeout(&pending_ack.complete, - msecs_to_jiffies(30 * 1000)); - if (!rc) { - dev_err(&gmu->pdev->dev, - "Ack timeout for context unregister seq: %d ctx: %u ts: %u\n", - MSG_HDR_GET_SEQNUM(pending_ack.sent_hdr), - context->id, ts); - rc = -ETIMEDOUT; - - mutex_lock(&device->mutex); - - gmu_core_fault_snapshot(device, GMU_FAULT_CTX_UNREGISTER); - - /* - * Make sure we send all fences from this context to the TxQueue after recovery - */ - move_detached_context_hardware_fences(adreno_dev, drawctxt); - gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); + add_waiter(hfi, cmd.hdr, &pending_ack); + ret = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd)); + if (ret) { + trigger_context_unregister_fault(adreno_dev, drawctxt); goto done; } - mutex_lock(&device->mutex); - - rc = check_detached_context_hardware_fences(adreno_dev, drawctxt); - if (rc) + ret = adreno_hwsched_ctxt_unregister_wait_completion(adreno_dev, + &gmu->pdev->dev, &pending_ack, gen7_hwsched_process_msgq, &cmd); + if (ret) { + trigger_context_unregister_fault(adreno_dev, drawctxt); goto done; + } + + ret = check_detached_context_hardware_fences(adreno_dev, drawctxt); + if (!ret) + ret = check_ack_failure(adreno_dev, &pending_ack); - rc = check_ack_failure(adreno_dev, &pending_ack); done: gen7_hwsched_active_count_put(adreno_dev); - del_waiter(hfi, &pending_ack); - return rc; + return ret; } void gen7_hwsched_context_detach(struct adreno_context *drawctxt) diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index daffd1e098..883c7760a2 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -3775,17 +3775,26 @@ int gen8_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev, return ret; } +static void trigger_context_unregister_fault(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev), GMU_FAULT_CTX_UNREGISTER); + + /* Make sure we send all fences from this context to the TxQueue after recovery */ + move_detached_context_hardware_fences(adreno_dev, drawctxt); + gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); +} + static int send_context_unregister_hfi(struct adreno_device *adreno_dev, struct kgsl_context *context, u32 ts) { - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); struct adreno_context *drawctxt = ADRENO_CONTEXT(context); struct pending_cmd pending_ack; struct hfi_unregister_ctxt_cmd cmd; u32 seqnum; - int rc, ret; + int ret; /* Only send HFI if device is not in SLUMBER */ if (!context->gmu_registered || @@ -3801,60 +3810,44 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, cmd.ctxt_id = context->id, cmd.ts = ts, - seqnum = atomic_inc_return(&gmu->hfi.seqnum); - cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); - - add_waiter(hfi, cmd.hdr, &pending_ack); - /* * Although we know device is powered on, we can still enter SLUMBER * because the wait for ack below is done without holding the mutex. So * take an active count before releasing the mutex so as to avoid a * concurrent SLUMBER sequence while GMU is un-registering this context. */ - gen8_hwsched_active_count_get(adreno_dev); + ret = gen8_hwsched_active_count_get(adreno_dev); + if (ret) { + trigger_context_unregister_fault(adreno_dev, drawctxt); + return ret; + } - rc = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd)); - if (rc) - goto done; - - mutex_unlock(&device->mutex); - - rc = wait_for_completion_timeout(&pending_ack.complete, - msecs_to_jiffies(30 * 1000)); - if (!rc) { - dev_err(&gmu->pdev->dev, - "Ack timeout for context unregister seq: %d ctx: %u ts: %u\n", - MSG_HDR_GET_SEQNUM(pending_ack.sent_hdr), - context->id, ts); - rc = -ETIMEDOUT; - - mutex_lock(&device->mutex); - - gmu_core_fault_snapshot(device, GMU_FAULT_CTX_UNREGISTER); - - /* - * Make sure we send all fences from this context to the TxQueue after recovery - */ - move_detached_context_hardware_fences(adreno_dev, drawctxt); - gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); + add_waiter(hfi, cmd.hdr, &pending_ack); + ret = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd)); + if (ret) { + trigger_context_unregister_fault(adreno_dev, drawctxt); goto done; } - mutex_lock(&device->mutex); - - rc = check_detached_context_hardware_fences(adreno_dev, drawctxt); - if (rc) + ret = adreno_hwsched_ctxt_unregister_wait_completion(adreno_dev, + &gmu->pdev->dev, &pending_ack, gen8_hwsched_process_msgq, &cmd); + if (ret) { + trigger_context_unregister_fault(adreno_dev, drawctxt); goto done; + } + + ret = check_detached_context_hardware_fences(adreno_dev, drawctxt); + if (!ret) + ret = check_ack_failure(adreno_dev, &pending_ack); - rc = check_ack_failure(adreno_dev, &pending_ack); done: gen8_hwsched_active_count_put(adreno_dev); - del_waiter(hfi, &pending_ack); - return rc; + return ret; } void gen8_hwsched_context_detach(struct adreno_context *drawctxt) diff --git a/adreno_hfi.h b/adreno_hfi.h index 385366b005..1e6a8942c1 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -1349,6 +1349,27 @@ int adreno_hwsched_wait_ack_completion(struct adreno_device *adreno_dev, struct device *dev, struct pending_cmd *ack, void (*process_msgq)(struct adreno_device *adreno_dev)); +/** + * adreno_hwsched_ctxt_unregister_wait_completion - Wait for HFI ack for context unregister + * adreno_dev: Pointer to the adreno device + * dev: Pointer to the device structure + * ack: Pointer to the pending ack + * process_msgq: Function pointer to the msgq processing function + * cmd: Pointer to the hfi packet header and data + * + * This function waits for the completion structure for context unregister hfi ack, + * which gets signaled asynchronously. In case there is a timeout, process the msgq + * one last time. If the ack is present, log an error and move on. If the ack isn't + * present, log an error and return -ETIMEDOUT. + * + * Return: 0 on success and -ETIMEDOUT on failure + */ +int adreno_hwsched_ctxt_unregister_wait_completion( + struct adreno_device *adreno_dev, + struct device *dev, struct pending_cmd *ack, + void (*process_msgq)(struct adreno_device *adreno_dev), + struct hfi_unregister_ctxt_cmd *cmd); + /** * hfi_get_minidump_string - Get the va-minidump string from entry * mem_kind: mem_kind type diff --git a/adreno_hwsched.c b/adreno_hwsched.c index d48dee8bab..144040db43 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -2335,6 +2335,49 @@ int adreno_hwsched_wait_ack_completion(struct adreno_device *adreno_dev, return -ETIMEDOUT; } +int adreno_hwsched_ctxt_unregister_wait_completion( + struct adreno_device *adreno_dev, + struct device *dev, struct pending_cmd *ack, + void (*process_msgq)(struct adreno_device *adreno_dev), + struct hfi_unregister_ctxt_cmd *cmd) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int ret; + u64 start, end; + + start = gpudev->read_alwayson(adreno_dev); + mutex_unlock(&device->mutex); + + ret = wait_for_completion_timeout(&ack->complete, + msecs_to_jiffies(msecs_to_jiffies(30 * 1000))); + + mutex_lock(&device->mutex); + if (ret) + return 0; + + /* + * It is possible the ack came, but due to HLOS latencies in processing hfi interrupt + * and/or the f2h daemon, the ack isn't processed yet. Hence, process the msgq one last + * time. + */ + process_msgq(adreno_dev); + end = gpudev->read_alwayson(adreno_dev); + + if (completion_done(&ack->complete)) { + dev_err_ratelimited(dev, + "Ack unprocessed for context unregister seq: %d ctx: %u ts: %u ticks=%llu/%llu\n", + MSG_HDR_GET_SEQNUM(ack->sent_hdr), cmd->ctxt_id, + cmd->ts, start, end); + return 0; + } + + dev_err_ratelimited(dev, + "Ack timeout for context unregister seq: %d ctx: %u ts: %u ticks=%llu/%llu\n", + MSG_HDR_GET_SEQNUM(ack->sent_hdr), cmd->ctxt_id, cmd->ts, start, end); + return -ETIMEDOUT; +} + u32 adreno_hwsched_parse_payload(struct payload_section *payload, u32 key) { u32 i; From 68840a0eac149823fb96df42c13610d8e897f1c2 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Mon, 22 Apr 2024 16:56:36 +0530 Subject: [PATCH 0771/1016] kgsl: Assert Qactive during slumber and recovery GEMNOC can enter power collapse state during GPU power down sequence. This could abort CX GDSC collapse. Recommendation is to assert Qactive before turning off CX GDSC. Change-Id: Id9a726dff3577db31fb6aced12b2030c8905de4a Signed-off-by: Kamal Agrawal --- adreno_a6xx_gmu.c | 17 +++++++++++++++++ adreno_a6xx_hwsched.c | 11 ++++++++++- adreno_gen7_gmu.c | 13 +++++++++++++ adreno_gen7_hwsched.c | 9 ++++++++- adreno_gen8_gmu.c | 13 +++++++++++++ adreno_gen8_hwsched.c | 9 ++++++++- 6 files changed, 69 insertions(+), 3 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 8903c326f9..257d9b28c9 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -1846,6 +1846,14 @@ static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) /* Disconnect GPU from BUS is not needed if CX GDSC goes off later */ + /* + * GEMNOC can enter power collapse state during GPU power down sequence. + * This could abort CX GDSC collapse. Assert Qactive to avoid this. + */ + if ((adreno_is_a662(adreno_dev) || adreno_is_a621(adreno_dev) || + adreno_is_a635(adreno_dev))) + gmu_core_regwrite(device, A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1); + /* Check no outstanding RPMh voting */ a6xx_complete_rpmh_votes(adreno_dev, GPU_RESET_TIMEOUT); @@ -1971,6 +1979,15 @@ static int a6xx_gmu_notify_slumber(struct adreno_device *adreno_dev) out: /* Make sure the fence is in ALLOW mode */ gmu_core_regwrite(device, A6XX_GMU_AO_AHB_FENCE_CTRL, 0); + + /* + * GEMNOC can enter power collapse state during GPU power down sequence. + * This could abort CX GDSC collapse. Assert Qactive to avoid this. + */ + if ((adreno_is_a662(adreno_dev) || adreno_is_a621(adreno_dev) || + adreno_is_a635(adreno_dev))) + gmu_core_regwrite(device, A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1); + return ret; } diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 5b354fdb2d..0b6e025e26 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -527,8 +527,17 @@ static int a6xx_hwsched_notify_slumber(struct adreno_device *adreno_dev) /* Disable the power counter so that the GMU is not busy */ gmu_core_regwrite(device, A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0); - return a6xx_hfi_send_cmd_async(adreno_dev, &req, sizeof(req)); + ret = a6xx_hfi_send_cmd_async(adreno_dev, &req, sizeof(req)); + /* + * GEMNOC can enter power collapse state during GPU power down sequence. + * This could abort CX GDSC collapse. Assert Qactive to avoid this. + */ + if ((adreno_is_a662(adreno_dev) || adreno_is_a621(adreno_dev) || + adreno_is_a635(adreno_dev))) + gmu_core_regwrite(device, A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1); + + return ret; } static int a6xx_hwsched_gmu_power_off(struct adreno_device *adreno_dev) { diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index a607b42be5..1a3f0272db 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1472,6 +1472,12 @@ static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) /* Disconnect GPU from BUS is not needed if CX GDSC goes off later */ + /* + * GEMNOC can enter power collapse state during GPU power down sequence. + * This could abort CX GDSC collapse. Assert Qactive to avoid this. + */ + gmu_core_regwrite(device, GEN7_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1); + /* Check no outstanding RPMh voting */ gen7_complete_rpmh_votes(gmu, 1); @@ -1554,6 +1560,13 @@ static int gen7_gmu_notify_slumber(struct adreno_device *adreno_dev) /* Make sure the fence is in ALLOW mode */ gmu_core_regwrite(device, GEN7_GMU_AO_AHB_FENCE_CTRL, 0); + + /* + * GEMNOC can enter power collapse state during GPU power down sequence. + * This could abort CX GDSC collapse. Assert Qactive to avoid this. + */ + gmu_core_regwrite(device, GEN7_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1); + return ret; } diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 702ac42797..9d6743dd0f 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -712,8 +712,15 @@ static int gen7_hwsched_notify_slumber(struct adreno_device *adreno_dev) /* Disable the power counter so that the GMU is not busy */ gmu_core_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0); - return gen7_hfi_send_cmd_async(adreno_dev, &req, sizeof(req)); + ret = gen7_hfi_send_cmd_async(adreno_dev, &req, sizeof(req)); + /* + * GEMNOC can enter power collapse state during GPU power down sequence. + * This could abort CX GDSC collapse. Assert Qactive to avoid this. + */ + gmu_core_regwrite(device, GEN7_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1); + + return ret; } static int gen7_hwsched_gmu_power_off(struct adreno_device *adreno_dev) { diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 4b8fe92599..737531b919 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1381,6 +1381,12 @@ static void gen8_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) /* Disconnect GPU from BUS is not needed if CX GDSC goes off later */ + /* + * GEMNOC can enter power collapse state during GPU power down sequence. + * This could abort CX GDSC collapse. Assert Qactive to avoid this. + */ + gmu_core_regwrite(device, GEN8_GMUCX_CX_FALNEXT_INTF, 0x1); + /* Check no outstanding RPMh voting */ gen8_complete_rpmh_votes(gmu, 1); @@ -1463,6 +1469,13 @@ static int gen8_gmu_notify_slumber(struct adreno_device *adreno_dev) /* Make sure the fence is in ALLOW mode */ gmu_core_regwrite(device, GEN8_GMUAO_AHB_FENCE_CTRL, 0); + + /* + * GEMNOC can enter power collapse state during GPU power down sequence. + * This could abort CX GDSC collapse. Assert Qactive to avoid this. + */ + gmu_core_regwrite(device, GEN8_GMUCX_CX_FALNEXT_INTF, 0x1); + return ret; } diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index a96b161f7a..e4cb4995cd 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -776,8 +776,15 @@ static int gen8_hwsched_notify_slumber(struct adreno_device *adreno_dev) /* Disable the power counter so that the GMU is not busy */ gmu_core_regwrite(device, GEN8_GMUCX_POWER_COUNTER_ENABLE, 0); - return gen8_hfi_send_cmd_async(adreno_dev, &req, sizeof(req)); + ret = gen8_hfi_send_cmd_async(adreno_dev, &req, sizeof(req)); + /* + * GEMNOC can enter power collapse state during GPU power down sequence. + * This could abort CX GDSC collapse. Assert Qactive to avoid this. + */ + gmu_core_regwrite(device, GEN8_GMUCX_CX_FALNEXT_INTF, 0x1); + + return ret; } static int gen8_hwsched_gmu_power_off(struct adreno_device *adreno_dev) { From e13fdc59ad845db24954db4e8787c3436d64f4ec Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 16 Apr 2024 02:23:53 +0530 Subject: [PATCH 0772/1016] kgsl: Update the list of HFI feature strings Currently, the HFI feature string list includes only ACD and LM. Add other HFI features as well to this list. While at it, make feature_to_string() generation agnostic to avoid code duplication. Change-Id: Ia4f48cec5aed8dfa0f7524148f09cfa6404c38bb Signed-off-by: Kamal Agrawal --- adreno.c | 39 +++++++++++++++++++++++++++++++++++++++ adreno_a6xx_hfi.c | 17 ++--------------- adreno_gen7_hfi.c | 15 ++------------- adreno_gen8_hfi.c | 15 ++------------- adreno_hfi.h | 10 ++++++++++ 5 files changed, 55 insertions(+), 41 deletions(-) diff --git a/adreno.c b/adreno.c index 2da2434980..ec7e053397 100644 --- a/adreno.c +++ b/adreno.c @@ -1097,6 +1097,45 @@ static int adreno_probe_llcc(struct adreno_device *adreno_dev, } #endif +const char *hfi_feature_to_string(u32 feature) +{ + switch (feature) { + case HFI_FEATURE_HWSCHED: + return "HWSCHED"; + case HFI_FEATURE_PREEMPTION: + return "PREEMPTION"; + case HFI_FEATURE_LM: + return "LM"; + case HFI_FEATURE_IFPC: + return "IFPC"; + case HFI_FEATURE_BCL: + return "BCL"; + case HFI_FEATURE_ACD: + return "ACD"; + case HFI_FEATURE_KPROF: + return "KPROF"; + case HFI_FEATURE_BAIL_OUT_TIMER: + return "BAIL_OUT_TIMER"; + case HFI_FEATURE_GMU_STATS: + return "GMU_STATS"; + case HFI_FEATURE_CLX: + return "CLX"; + case HFI_FEATURE_LSR: + return "LSR"; + case HFI_FEATURE_LPAC: + return "LPAC"; + case HFI_FEATURE_HW_FENCE: + return "HW_FENCE"; + case HFI_FEATURE_PERF_NORETAIN: + return "PERF_NORETAIN"; + case HFI_FEATURE_DMS: + return "DMS"; + case HFI_FEATURE_AQE: + return "AQE"; + } + return "unknown"; +} + static void adreno_regmap_op_preaccess(struct kgsl_regmap_region *region) { struct kgsl_device *device = region->priv; diff --git a/adreno_a6xx_hfi.c b/adreno_a6xx_hfi.c index c9d9604730..37367a305f 100644 --- a/adreno_a6xx_hfi.c +++ b/adreno_a6xx_hfi.c @@ -385,16 +385,6 @@ int a6xx_hfi_send_core_fw_start(struct adreno_device *adreno_dev) return a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); } -static const char *feature_to_string(uint32_t feature) -{ - if (feature == HFI_FEATURE_ACD) - return "ACD"; - else if (feature == HFI_FEATURE_LM) - return "LM"; - - return "unknown"; -} - int a6xx_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, uint32_t feature, uint32_t enable, uint32_t data) { @@ -412,11 +402,8 @@ int a6xx_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, ret = a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); if (ret) - dev_err(&gmu->pdev->dev, - "Unable to %s feature %s (%d)\n", - enable ? "enable" : "disable", - feature_to_string(feature), - feature); + dev_err(&gmu->pdev->dev, "Unable to %s feature %s (%d)\n", + enable ? "enable" : "disable", hfi_feature_to_string(feature), feature); return ret; } diff --git a/adreno_gen7_hfi.c b/adreno_gen7_hfi.c index 2abe6e75d3..39adc35b65 100644 --- a/adreno_gen7_hfi.c +++ b/adreno_gen7_hfi.c @@ -386,14 +386,6 @@ int gen7_hfi_send_core_fw_start(struct adreno_device *adreno_dev) return gen7_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); } -static const char *feature_to_string(u32 feature) -{ - if (feature == HFI_FEATURE_ACD) - return "ACD"; - - return "unknown"; -} - /* For sending hfi message inline to handle GMU return type error */ int gen7_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd, struct pending_cmd *ret_cmd, u32 size_bytes) @@ -449,11 +441,8 @@ int gen7_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, ret = gen7_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd)); if (ret < 0) - dev_err(&gmu->pdev->dev, - "Unable to %s feature %s (%d)\n", - enable ? "enable" : "disable", - feature_to_string(feature), - feature); + dev_err(&gmu->pdev->dev, "Unable to %s feature %s (%d)\n", + enable ? "enable" : "disable", hfi_feature_to_string(feature), feature); return ret; } diff --git a/adreno_gen8_hfi.c b/adreno_gen8_hfi.c index d8532ca4ef..1f53eae337 100644 --- a/adreno_gen8_hfi.c +++ b/adreno_gen8_hfi.c @@ -384,14 +384,6 @@ int gen8_hfi_send_core_fw_start(struct adreno_device *adreno_dev) return gen8_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); } -static const char *feature_to_string(u32 feature) -{ - if (feature == HFI_FEATURE_ACD) - return "ACD"; - - return "unknown"; -} - /* For sending hfi message inline to handle GMU return type error */ int gen8_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd, struct pending_cmd *ret_cmd, u32 size_bytes) @@ -447,11 +439,8 @@ int gen8_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, ret = gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd)); if (ret < 0) - dev_err(&gmu->pdev->dev, - "Unable to %s feature %s (%d)\n", - enable ? "enable" : "disable", - feature_to_string(feature), - feature); + dev_err(&gmu->pdev->dev, "Unable to %s feature %s (%d)\n", + enable ? "enable" : "disable", hfi_feature_to_string(feature), feature); return ret; } diff --git a/adreno_hfi.h b/adreno_hfi.h index e2220cb0b8..d1395da65c 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -1395,4 +1395,14 @@ static inline int hfi_get_minidump_string(u32 mem_kind, char *hfi_minidump_str, return 0; } + +/** + * hfi_feature_to_string - Convert an HFI feature value to its + * string representation + * @feature: HFI feature value to convert + * + * Return: Pointer to a string representing the given feature. + * If the feature is unknown, the function returns "unknown". + */ +const char *hfi_feature_to_string(u32 feature); #endif From 218c0841f0c36cfb6cd3dfbdd9788999e6ab1fb2 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 2 May 2024 12:13:20 +0530 Subject: [PATCH 0773/1016] kgsl: gen8: Enable BCL feature for gen8_4_0 GPU Battery Current Limiter prevents under voltage and provides overcurrent protection. Change-Id: I9a40e1c0b8ddcc53dde53c990ea117c6ead1f82c Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 656cece236..93f5ef0ca1 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2892,7 +2892,8 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .compatible = "qcom,adreno-gpu-gen8-4-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | - ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_IFPC, + ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_IFPC | + ADRENO_BCL, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, @@ -2916,6 +2917,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .gen8_snapshot_block_list = &gen8_0_0_snapshot_block_list, .qos_value = gen8_4_0_gbif_client_qos_values, .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), + .bcl_data = 1, }; static const struct adreno_gpu_core *adreno_gpulist[] = { From ed2e70c83794b118031ea37a877d05dcbb0f8a33 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 7 Apr 2024 22:16:40 +0530 Subject: [PATCH 0774/1016] kgsl: gen8: Program CP interrupt status mask registers Program CP interrupt status mask registers to forward the errors to global interrupt status register. Change-Id: I972d580ecd63fc50d5df945e4297ff76e29f74fe Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 192 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 128 insertions(+), 64 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index dfa54f7b2d..cdc5954764 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -22,6 +22,106 @@ #include "kgsl_trace.h" #include "kgsl_util.h" +/* CP Interrupt bits */ +#define GEN8_CP_GLOBAL_INT_HWFAULTBR 0 +#define GEN8_CP_GLOBAL_INT_HWFAULTBV 1 +#define GEN8_CP_GLOBAL_INT_HWFAULTLPAC 2 +#define GEN8_CP_GLOBAL_INT_HWFAULTAQE0 3 +#define GEN8_CP_GLOBAL_INT_HWFAULTAQE1 4 +#define GEN8_CP_GLOBAL_INT_HWFAULTDDEBR 5 +#define GEN8_CP_GLOBAL_INT_HWFAULTDDEBV 6 +#define GEN8_CP_GLOBAL_INT_SWFAULTBR 16 +#define GEN8_CP_GLOBAL_INT_SWFAULTBV 17 +#define GEN8_CP_GLOBAL_INT_SWFAULTLPAC 18 +#define GEN8_CP_GLOBAL_INT_SWFAULTAQE0 19 +#define GEN8_CP_GLOBAL_INT_SWFAULTAQE1 20 +#define GEN8_CP_GLOBAL_INT_SWFAULTDDEBR 21 +#define GEN8_CP_GLOBAL_INT_SWFAULTDDEBV 22 + +#define CP_INTERRUPT_STATUS_MASK_GLOBAL \ + (BIT(GEN8_CP_GLOBAL_INT_HWFAULTBR) | \ + BIT(GEN8_CP_GLOBAL_INT_HWFAULTBV) | \ + BIT(GEN8_CP_GLOBAL_INT_HWFAULTLPAC) | \ + BIT(GEN8_CP_GLOBAL_INT_HWFAULTAQE0) | \ + BIT(GEN8_CP_GLOBAL_INT_HWFAULTAQE1) | \ + BIT(GEN8_CP_GLOBAL_INT_HWFAULTDDEBR) | \ + BIT(GEN8_CP_GLOBAL_INT_HWFAULTDDEBV) | \ + BIT(GEN8_CP_GLOBAL_INT_SWFAULTBR) | \ + BIT(GEN8_CP_GLOBAL_INT_SWFAULTBV) | \ + BIT(GEN8_CP_GLOBAL_INT_SWFAULTLPAC) | \ + BIT(GEN8_CP_GLOBAL_INT_SWFAULTAQE0) | \ + BIT(GEN8_CP_GLOBAL_INT_SWFAULTAQE1) | \ + BIT(GEN8_CP_GLOBAL_INT_SWFAULTDDEBR) | \ + BIT(GEN8_CP_GLOBAL_INT_SWFAULTDDEBV)) + +/* CP HW Fault status bits */ +#define CP_HW_RBFAULT 0 +#define CP_HW_IB1FAULT 1 +#define CP_HW_IB2FAULT 2 +#define CP_HW_IB3FAULT 3 +#define CP_HW_SDSFAULT 4 +#define CP_HW_MRBFAULT 5 +#define CP_HW_VSDFAULT 6 +#define CP_HW_SQEREADBRUSTOVF 8 +#define CP_HW_EVENTENGINEOVF 9 +#define CP_HW_UCODEERROR 10 + +#define CP_HW_FAULT_STATUS_MASK_PIPE \ + (BIT(CP_HW_RBFAULT) | \ + BIT(CP_HW_IB1FAULT) | \ + BIT(CP_HW_IB2FAULT) | \ + BIT(CP_HW_IB3FAULT) | \ + BIT(CP_HW_SDSFAULT) | \ + BIT(CP_HW_MRBFAULT) | \ + BIT(CP_HW_VSDFAULT) | \ + BIT(CP_HW_SQEREADBRUSTOVF) | \ + BIT(CP_HW_EVENTENGINEOVF) | \ + BIT(CP_HW_UCODEERROR)) + +/* CP SW Fault status bits */ +#define CP_SW_CSFRBWRAP 0 +#define CP_SW_CSFIB1WRAP 1 +#define CP_SW_CSFIB2WRAP 2 +#define CP_SW_CSFIB3WRAP 3 +#define CP_SW_SDSWRAP 4 +#define CP_SW_MRBWRAP 5 +#define CP_SW_VSDWRAP 6 +#define CP_SW_OPCODEERROR 8 +#define CP_SW_VSDPARITYERROR 9 +#define CP_SW_REGISTERPROTECTIONERROR 10 +#define CP_SW_ILLEGALINSTRUCTION 11 +#define CP_SW_SMMUFAULT 12 +#define CP_SW_VBIFRESPCLIENT 13 +#define CP_SW_VBIFRESPTYPE 19 +#define CP_SW_VBIFRESPREAD 21 +#define CP_SW_VBIFRESP 22 +#define CP_SW_RTWROVF 23 +#define CP_SW_LRZRTWROVF 24 +#define CP_SW_LRZRTREFCNTOVF 25 +#define CP_SW_LRZRTCLRRESMISS 26 + +#define CP_SW_FAULT_STATUS_MASK_PIPE \ + (BIT(CP_SW_CSFRBWRAP) | \ + BIT(CP_SW_CSFIB1WRAP) | \ + BIT(CP_SW_CSFIB2WRAP) | \ + BIT(CP_SW_CSFIB3WRAP) | \ + BIT(CP_SW_SDSWRAP) | \ + BIT(CP_SW_MRBWRAP) | \ + BIT(CP_SW_VSDWRAP) | \ + BIT(CP_SW_OPCODEERROR) | \ + BIT(CP_SW_VSDPARITYERROR) | \ + BIT(CP_SW_REGISTERPROTECTIONERROR) | \ + BIT(CP_SW_ILLEGALINSTRUCTION) | \ + BIT(CP_SW_SMMUFAULT) | \ + BIT(CP_SW_VBIFRESPCLIENT) | \ + BIT(CP_SW_VBIFRESPTYPE) | \ + BIT(CP_SW_VBIFRESPREAD) | \ + BIT(CP_SW_VBIFRESP) | \ + BIT(CP_SW_RTWROVF) | \ + BIT(CP_SW_LRZRTWROVF) | \ + BIT(CP_SW_LRZRTREFCNTOVF) | \ + BIT(CP_SW_LRZRTCLRRESMISS)) + /* IFPC & Preemption static powerup restore list */ static const u32 gen8_pwrup_reglist[] = { GEN8_UCHE_MODE_CNTL, @@ -129,9 +229,14 @@ static const u32 gen8_ifpc_pwrup_reglist[] = { GEN8_CP_PROTECT_REG_GLOBAL + 44, GEN8_CP_PROTECT_REG_GLOBAL + 45, GEN8_CP_PROTECT_REG_GLOBAL + 63, + GEN8_CP_INTERRUPT_STATUS_MASK_GLOBAL, }; static const struct gen8_pwrup_extlist gen8_0_0_pwrup_extlist[] = { + { GEN8_CP_HW_FAULT_STATUS_MASK_PIPE, BIT(PIPE_BR) | BIT(PIPE_BV) | BIT(PIPE_LPAC) + | BIT(PIPE_AQE0) | BIT(PIPE_AQE1) | BIT(PIPE_DDE_BR) | BIT(PIPE_DDE_BV) }, + { GEN8_CP_INTERRUPT_STATUS_MASK_PIPE, BIT(PIPE_BR) | BIT(PIPE_BV) | BIT(PIPE_LPAC) + | BIT(PIPE_AQE0) | BIT(PIPE_AQE1) | BIT(PIPE_DDE_BR) | BIT(PIPE_DDE_BV) }, { GEN8_CP_PROTECT_CNTL_PIPE, BIT(PIPE_BR) | BIT(PIPE_BV) | BIT(PIPE_LPAC)}, { GEN8_CP_PROTECT_REG_PIPE + 15, BIT(PIPE_BR) | BIT(PIPE_BV) | BIT(PIPE_LPAC)}, { GEN8_GRAS_TSEFE_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, @@ -871,12 +976,17 @@ static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev) * Write external pipe specific regs (
- triplets) * offset and the current value into GPU buffer */ - for (pipe_id = PIPE_BR; pipe_id <= PIPE_LPAC; pipe_id++) { + for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) { for (i = 0; i < ARRAY_SIZE(gen8_0_0_pwrup_extlist); i++) { unsigned long pipe = (unsigned long)gen8_0_0_pwrup_extlist[i].pipelines; if (!test_bit(pipe_id, &pipe)) continue; + if ((pipe_id == PIPE_LPAC) && !ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) + continue; + if (((pipe_id == PIPE_AQE0) || (pipe_id == PIPE_AQE1)) && + !ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) + continue; *dest++ = FIELD_PREP(GENMASK(15, 12), pipe_id); *dest++ = gen8_0_0_pwrup_extlist[i].offset; @@ -1031,7 +1141,7 @@ int gen8_start(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); - u32 mal, mode = 0, mode2 = 0, rgb565_predicator = 0, amsbc = 0; + u32 mal, pipe_id, mode = 0, mode2 = 0, rgb565_predicator = 0, amsbc = 0; struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); /* @@ -1202,25 +1312,29 @@ int gen8_start(struct adreno_device *adreno_dev) _llc_configure_gpu_scid(adreno_dev); _llc_gpuhtw_slice_activate(adreno_dev); - gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, - GEN8_BR_APRIV_DEFAULT, PIPE_BR, 0, 0); - gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, - GEN8_APRIV_DEFAULT, PIPE_BV, 0, 0); + for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) { + if ((pipe_id == PIPE_LPAC) && !ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) + continue; + if (((pipe_id == PIPE_AQE0) || (pipe_id == PIPE_AQE1)) && + !ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) + continue; - if (adreno_dev->lpac_enabled) gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, - GEN8_APRIV_DEFAULT, PIPE_LPAC, 0, 0); - - if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) { - gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, - GEN8_APRIV_DEFAULT, PIPE_AQE0, 0, 0); - gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, - GEN8_APRIV_DEFAULT, PIPE_AQE1, 0, 0); + (pipe_id == PIPE_BR ? GEN8_BR_APRIV_DEFAULT : GEN8_APRIV_DEFAULT), + pipe_id, 0, 0); + gen8_regwrite_aperture(device, GEN8_CP_INTERRUPT_STATUS_MASK_PIPE, + CP_SW_FAULT_STATUS_MASK_PIPE, pipe_id, 0, 0); + gen8_regwrite_aperture(device, GEN8_CP_HW_FAULT_STATUS_MASK_PIPE, + CP_HW_FAULT_STATUS_MASK_PIPE, pipe_id, 0, 0); } /* Clear aperture register */ gen8_host_aperture_set(adreno_dev, 0, 0, 0); + /* Program CP interrupt status mask to enable HW and SW error interrupts */ + kgsl_regwrite(device, GEN8_CP_INTERRUPT_STATUS_MASK_GLOBAL, + CP_INTERRUPT_STATUS_MASK_GLOBAL); + _set_secvid(device); /* @@ -1539,56 +1653,6 @@ int gen8_microcode_read(struct adreno_device *adreno_dev) return adreno_get_firmware(adreno_dev, gen8_core->sqefw_name, sqe_fw); } -/* CP Interrupt bits */ -#define GEN8_CP_GLOBAL_INT_HWFAULTBR 0 -#define GEN8_CP_GLOBAL_INT_HWFAULTBV 1 -#define GEN8_CP_GLOBAL_INT_HWFAULTLPAC 2 -#define GEN8_CP_GLOBAL_INT_HWFAULTAQE0 3 -#define GEN8_CP_GLOBAL_INT_HWFAULTAQE1 4 -#define GEN8_CP_GLOBAL_INT_HWFAULTDDEBR 5 -#define GEN8_CP_GLOBAL_INT_HWFAULTDDEBV 6 -#define GEN8_CP_GLOBAL_INT_SWFAULTBR 16 -#define GEN8_CP_GLOBAL_INT_SWFAULTBV 17 -#define GEN8_CP_GLOBAL_INT_SWFAULTLPAC 18 -#define GEN8_CP_GLOBAL_INT_SWFAULTAQE0 19 -#define GEN8_CP_GLOBAL_INT_SWFAULTAQE1 20 -#define GEN8_CP_GLOBAL_INT_SWFAULTDDEBR 21 -#define GEN8_CP_GLOBAL_INT_SWFAULTDDEBV 22 - -/* CP HW Fault status bits */ -#define CP_HW_RBFAULT 0 -#define CP_HW_IB1FAULT 1 -#define CP_HW_IB2FAULT 2 -#define CP_HW_IB3FAULT 3 -#define CP_HW_SDSFAULT 4 -#define CP_HW_MRBFAULT 5 -#define CP_HW_VSDFAULT 6 -#define CP_HW_SQEREADBRUSTOVF 8 -#define CP_HW_EVENTENGINEOVF 9 -#define CP_HW_UCODEERROR 10 - -/* CP SW Fault status bits */ -#define CP_SW_CSFRBWRAP 0 -#define CP_SW_CSFIB1WRAP 1 -#define CP_SW_CSFIB2WRAP 2 -#define CP_SW_CSFIB3WRAP 3 -#define CP_SW_SDSWRAP 4 -#define CP_SW_MRBWRAP 5 -#define CP_SW_VSDWRAP 6 -#define CP_SW_OPCODEERROR 8 -#define CP_SW_VSDPARITYERROR 9 -#define CP_SW_REGISTERPROTECTIONERROR 10 -#define CP_SW_ILLEGALINSTRUCTION 11 -#define CP_SW_SMMUFAULT 12 -#define CP_SW_VBIFRESPCLIENT 13 -#define CP_SW_VBIFRESPTYPE 19 -#define CP_SW_VBIFRESPREAD 21 -#define CP_SW_VBIFRESP 22 -#define CP_SW_RTWROVF 23 -#define CP_SW_LRZRTWROVF 24 -#define CP_SW_LRZRTREFCNTOVF 25 -#define CP_SW_LRZRTCLRRESMISS 26 - static void gen8_get_cp_hwfault_status(struct adreno_device *adreno_dev, u32 status) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); From d3decd4171c0725336ab02699f58048f264c2b1d Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Tue, 30 Apr 2024 19:42:49 +0530 Subject: [PATCH 0775/1016] kgsl: Do not call of_platform_depopulate when user context probe fails kgsl_iommu_bind is called with component_mutex held. As of_platform_depopulate will call kgsl_mmu_dev_remove, which also needs component_mutex will cause a deadlock. Fix this by removing of_platform_depopulate from kgsl_iommu_bind. Also handle kgsl_set_smmu_aperture() error for user context probe. Change-Id: I50b5af92ad6668ca66cca2a3dae0dfcdfd4dfe4c Signed-off-by: Pankaj Gupta --- kgsl_iommu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index aff5cb0c31..f378e77be5 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -2544,7 +2544,9 @@ static int iommu_probe_user_context(struct kgsl_device *device, /* Enable TTBR0 on the default and LPAC contexts */ kgsl_iommu_set_ttbr0(&iommu->user_context, mmu, &pt->info.cfg); - kgsl_set_smmu_aperture(device, &iommu->user_context); + ret = kgsl_set_smmu_aperture(device, &iommu->user_context); + if (ret) + goto err; kgsl_iommu_set_ttbr0(&iommu->lpac_context, mmu, &pt->info.cfg); @@ -2760,10 +2762,8 @@ int kgsl_iommu_bind(struct kgsl_device *device, struct platform_device *pdev) /* Probe the default pagetable */ ret = iommu_probe_user_context(device, node); - if (ret) { - of_platform_depopulate(&pdev->dev); + if (ret) goto err; - } /* Probe the secure pagetable (this is optional) */ iommu_probe_secure_context(device, node); From c56ec808a73dc1d39a55c687b57e439d4767830c Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Tue, 19 Mar 2024 14:40:12 -0700 Subject: [PATCH 0776/1016] kgsl: gen8: Update some shader snapshot sizes Some shader sizes needed to be updated for gen8 snapshot. Change-Id: I31b0a9a948e23e15324210a6e9286957f896a224 Signed-off-by: Urvashi Agrawal --- adreno_gen8_0_0_snapshot.h | 42 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index 83f9fcc7d4..674dc520f6 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -32,16 +32,16 @@ static struct gen8_shader_block gen8_0_0_shader_blocks[] = { { SP_TMO_TAG, 0x0080, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, { SP_SMO_TAG, 0x0080, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, { SP_STATE_DATA, 0x0040, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_HWAVE_RAM, 0x0100, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { SP_HWAVE_RAM, 0x0200, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, { SP_L0_INST_BUF, 0x0080, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, { SP_LB_8_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, { SP_LB_9_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, { SP_LB_10_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, { SP_LB_11_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, { SP_LB_12_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { HLSQ_DATAPATH_DSTR_META, 0x0170, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_DATAPATH_DSTR_META, 0x0600, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_DATAPATH_DSTR_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_DESC_REMAP_META, 0x0018, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_DESC_REMAP_META, 0x00A0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_DESC_REMAP_META, 0x000C, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, { HLSQ_DESC_REMAP_META, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, @@ -49,12 +49,12 @@ static struct gen8_shader_block gen8_0_0_shader_blocks[] = { { HLSQ_SLICE_TOP_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, { HLSQ_L2STC_TAG_RAM, 0x0200, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_L2STC_INFO_CMD, 0x0474, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, { HLSQ_CHUNK_CPS_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, @@ -69,7 +69,7 @@ static struct gen8_shader_block gen8_0_0_shader_blocks[] = { { HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_MISC_RAM, 0x0640, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_MISC_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_CPS_MISC_RAM, 0x00B0, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, { HLSQ_CPS_MISC_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, @@ -79,26 +79,26 @@ static struct gen8_shader_block gen8_0_0_shader_blocks[] = { { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_MISC_RAM_TAG, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM_TAG, 0x0014, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_MISC_RAM_TAG, 0x0060, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_MISC_RAM_TAG, 0x0060, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_MISC_RAM_TAG, 0x0600, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_MISC_RAM_TAG, 0x0012, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_INST_RAM_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, { HLSQ_INST_RAM_TAG, 0x0004, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0060, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, { HLSQ_GFX_LOCAL_MISC_RAM, 0x03C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_GFX_LOCAL_MISC_RAM, 0x0280, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, { HLSQ_GFX_LOCAL_MISC_RAM, 0x0050, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0008, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0014, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x000C, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, { HLSQ_INST_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_STPROC_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_STPROC_META, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_SLICE_BACKEND_META, 0x0188, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_SLICE_BACKEND_META, 0x0188, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, { HLSQ_INST_RAM_2, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_DATAPATH_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, @@ -106,8 +106,8 @@ static struct gen8_shader_block gen8_0_0_shader_blocks[] = { { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, { HLSQ_INDIRECT_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_BACKEND_META, 0x0034, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_BACKEND_META, 0x0034, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, }; From 1b90ba74220c37ea34d49a059e86cf2fcdc3fa54 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 17 Mar 2024 22:50:49 +0530 Subject: [PATCH 0777/1016] kgsl: gen8: Make number of physical slices target specific Currently, the number of physical slices is hard-coded. However, it can vary from GPU to GPU. Thus, introduce a new function to fetch the number of slices for each target. Change-Id: Iec2bed221109080098cac159d07095d2dae346a2 Signed-off-by: Kamal Agrawal --- adreno_gen8.h | 12 ++++++++ adreno_gen8_0_0_snapshot.h | 58 +++++++++++++++++++------------------- adreno_gen8_gmu_snapshot.c | 4 +-- adreno_gen8_snapshot.c | 16 +++++------ adreno_gen8_snapshot.h | 5 ++-- 5 files changed, 53 insertions(+), 42 deletions(-) diff --git a/adreno_gen8.h b/adreno_gen8.h index 14efc7a678..bd9fb82ed5 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -12,6 +12,8 @@ #include "adreno_gen8_gmu.h" #include "gen8_reg.h" +#define GEN8_0_0_NUM_PHYSICAL_SLICES 3 + /* Forward struct declaration */ struct gen8_snapshot_block_list; @@ -615,4 +617,14 @@ void gen8_coresight_init(struct adreno_device *device); static inline void gen8_coresight_init(struct adreno_device *device) { } #endif +/** + * gen8_get_num_slices - Get the number of physical slices for Gen8 GPUs + * @adreno_dev: Handle to the adreno device + * + * Return: Number of physical slices available on Gen8 GPUs + */ +static inline u32 gen8_get_num_slices(struct adreno_device *adreno_dev) +{ + return GEN8_0_0_NUM_PHYSICAL_SLICES; +} #endif diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index 674dc520f6..6259281f80 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -10,35 +10,35 @@ #include "adreno_gen8_snapshot.h" static struct gen8_shader_block gen8_0_0_shader_blocks[] = { - { TP0_TMO_DATA, 0x0200, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { TP0_SMO_DATA, 0x0080, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { TP0_MIPMAP_BASE_DATA, 0x0080, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_INST_DATA_3, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_INST_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_INST_DATA_1, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_0_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_1_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_2_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_3_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_4_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_5_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_6_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_7_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_CB_RAM, 0x0390, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_13_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_14_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_INST_TAG, 0x0100, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_INST_DATA_2, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_TMO_TAG, 0x0080, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_SMO_TAG, 0x0080, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_STATE_DATA, 0x0040, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_HWAVE_RAM, 0x0200, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_L0_INST_BUF, 0x0080, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_8_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_9_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_10_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_11_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, - { SP_LB_12_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1}, + { TP0_TMO_DATA, 0x0200, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { TP0_SMO_DATA, 0x0080, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { TP0_MIPMAP_BASE_DATA, 0x0080, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_INST_DATA_3, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_INST_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_INST_DATA_1, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_0_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_1_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_2_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_3_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_4_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_5_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_6_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_7_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_CB_RAM, 0x0390, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_13_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_14_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_INST_TAG, 0x0100, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_INST_DATA_2, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_TMO_TAG, 0x0080, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_SMO_TAG, 0x0080, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_STATE_DATA, 0x0040, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_HWAVE_RAM, 0x0200, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_L0_INST_BUF, 0x0080, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_8_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_9_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_10_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_11_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_12_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, { HLSQ_DATAPATH_DSTR_META, 0x0600, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, { HLSQ_DATAPATH_DSTR_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, { HLSQ_DESC_REMAP_META, 0x00A0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index 17fe920b44..53eeb5f857 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -243,7 +243,7 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - const struct adreno_gen8_core *gpucore = to_gen8_core(ADRENO_DEVICE(device)); + const struct adreno_gen8_core *gpucore = to_gen8_core(adreno_dev); const struct gen8_snapshot_block_list *gen8_snapshot_block_list = gpucore->gen8_snapshot_block_list; u32 i, slice, j; @@ -274,7 +274,7 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, for (i = 0 ; i < gen8_snapshot_block_list->num_gmu_gx_regs; i++) { struct gen8_reg_list *regs = &gen8_snapshot_block_list->gmu_gx_regs[i]; - slice = regs->slice_region ? MAX_PHYSICAL_SLICES : 1; + slice = NUMBER_OF_SLICES(regs->slice_region, adreno_dev); for (j = 0 ; j < slice; j++) { info.regs = regs; info.slice_id = SLICE_ID(regs->slice_region, j); diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index a96d7bb67e..c1679a59cb 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -696,7 +696,7 @@ static void gen8_snapshot_mempool(struct kgsl_device *device, for (i = 0; i < mempool_index_registers_len; i++) { cp_indexed_reg = &gen8_snapshot_block_list->mempool_index_registers[i]; - slice = NUMBER_OF_SLICES(cp_indexed_reg->slice_region); + slice = NUMBER_OF_SLICES(cp_indexed_reg->slice_region, ADRENO_DEVICE(device)); for (j = 0; j < slice; j++) { @@ -842,7 +842,7 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, for (i = 0; i < num_sptp_clusters; i++) { struct gen8_sptp_cluster_registers *cluster = &sptp_clusters[i]; - slice = NUMBER_OF_SLICES(cluster->slice_region); + slice = NUMBER_OF_SLICES(cluster->slice_region, ADRENO_DEVICE(device)); for (sp = 0; sp < cluster->num_sps; sp++) { for (usptp = 0; usptp < cluster->num_usptps; usptp++) { for (j = 0; j < slice; j++) { @@ -870,7 +870,7 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, for (i = 0; i < num_sptp_clusters; i++) { struct gen8_sptp_cluster_registers *cluster = &sptp_clusters[i]; - slice = NUMBER_OF_SLICES(cluster->slice_region); + slice = NUMBER_OF_SLICES(cluster->slice_region, ADRENO_DEVICE(device)); cluster->offset = offset; @@ -1036,7 +1036,7 @@ static void gen8_snapshot_mvc_regs(struct kgsl_device *device, for (i = 0; i < num_cluster; i++) { struct gen8_cluster_registers *cluster = &clusters[i]; - slice = NUMBER_OF_SLICES(cluster->slice_region); + slice = NUMBER_OF_SLICES(cluster->slice_region, ADRENO_DEVICE(device)); for (j = 0; j < slice; j++) { info.cluster = cluster; info.pipe_id = cluster->pipe_id; @@ -1056,7 +1056,7 @@ static void gen8_snapshot_mvc_regs(struct kgsl_device *device, for (i = 0; i < num_cluster; i++) { struct gen8_cluster_registers *cluster = &clusters[i]; - slice = NUMBER_OF_SLICES(cluster->slice_region); + slice = NUMBER_OF_SLICES(cluster->slice_region, ADRENO_DEVICE(device)); cluster->offset = offset; for (j = 0; j < slice; j++) { @@ -1497,7 +1497,7 @@ static void gen8_reglist_snapshot(struct kgsl_device *device, for (i = 0; reg_list[i].regs; i++) { struct gen8_reg_list *regs = ®_list[i]; - slices = NUMBER_OF_SLICES(regs->slice_region); + slices = NUMBER_OF_SLICES(regs->slice_region, ADRENO_DEVICE(device)); for (j = 0; j < slices; j++) { info.regs = regs; info.slice_id = SLICE_ID(regs->slice_region, j); @@ -1514,7 +1514,7 @@ static void gen8_reglist_snapshot(struct kgsl_device *device, for (i = 0; reg_list[i].regs; i++) { struct gen8_reg_list *regs = ®_list[i]; - slices = NUMBER_OF_SLICES(regs->slice_region); + slices = NUMBER_OF_SLICES(regs->slice_region, ADRENO_DEVICE(device)); regs->offset = offset; for (j = 0; j < slices; j++) { @@ -1721,7 +1721,7 @@ void gen8_snapshot(struct adreno_device *adreno_dev, struct gen8_reg_list_info info = {0}; u32 j, slices; - slices = NUMBER_OF_SLICES(regs->slice_region); + slices = NUMBER_OF_SLICES(regs->slice_region, adreno_dev); for (j = 0; j < slices; j++) { info.regs = regs; diff --git a/adreno_gen8_snapshot.h b/adreno_gen8_snapshot.h index b4d6c7867f..bacbb4ad0a 100644 --- a/adreno_gen8_snapshot.h +++ b/adreno_gen8_snapshot.h @@ -40,9 +40,8 @@ enum location_id { #define UNSLICE 0 #define SLICE 1 -#define MAX_PHYSICAL_SLICES 3 - -#define NUMBER_OF_SLICES(region) ((region == SLICE) ? MAX_PHYSICAL_SLICES : 1) +#define NUMBER_OF_SLICES(region, adreno_dev) \ + ((region == SLICE) ? gen8_get_num_slices(adreno_dev) : 1) #define SLICE_ID(region, j) ((region == SLICE) ? j : UINT_MAX) #define GEN8_DEBUGBUS_BLOCK_SIZE 0x100 From da2a70662393180bfdf9703f14322f57af4ed82f Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Mon, 18 Mar 2024 16:39:23 -0700 Subject: [PATCH 0778/1016] kgsl: Properly track previously scaled GMU frequency If default bus vote when GPU wakes up end up greater than perf_ddr_bw, GMU would scale to the higher frequency. This then gets overridden by the gmu frequency set in bootup path. However, we consider the previous frequency to be the higher frequency which would prevent any scaling up until a scale down occurs. Prevent this by properly tracking scaled frequencies. Change-Id: Ic61c3c8114463a66dd4f15ec7bd552c786750332 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- adreno_a6xx_gmu.c | 32 +++++++++++++++++++++-------- adreno_a6xx_gmu.h | 11 ++++++++++ adreno_a6xx_hwsched.c | 35 +++++++++++++++++-------------- adreno_a6xx_hwsched_hfi.c | 9 -------- adreno_gen7_gmu.c | 32 +++++++++++++++++++++-------- adreno_gen7_gmu.h | 10 +++++++++ adreno_gen7_hwsched.c | 43 +++++++-------------------------------- adreno_gen8_gmu.c | 32 +++++++++++++++++++++-------- adreno_gen8_gmu.h | 11 ++++++++++ adreno_gen8_hwsched.c | 43 +++++++-------------------------------- 10 files changed, 138 insertions(+), 120 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 8903c326f9..1671089a3f 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -2312,21 +2312,37 @@ void a6xx_gmu_aop_send_acd_state(struct a6xx_gmu_device *gmu, bool flag) "AOP mbox send message failed: %d\n", ret); } +int a6xx_gmu_clock_set_rate(struct adreno_device *adreno_dev, u32 req_freq) +{ + struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + int ret = 0; + + a6xx_rdpm_cx_freq_update(gmu, req_freq / 1000); + + ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", + req_freq); + if (ret) { + dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n", + req_freq, ret); + return ret; + } + + trace_kgsl_gmu_pwrlevel(req_freq, gmu->cur_freq); + + gmu->cur_freq = req_freq; + + return ret; +} + int a6xx_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; - a6xx_rdpm_cx_freq_update(gmu, gmu->freqs[level] / 1000); - - ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", - gmu->freqs[level]); - if (ret) { - dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n", - gmu->freqs[level], ret); + ret = a6xx_gmu_clock_set_rate(adreno_dev, gmu->freqs[level]); + if (ret) return ret; - } ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "hub_clk", adreno_dev->gmu_hub_clk_freq); diff --git a/adreno_a6xx_gmu.h b/adreno_a6xx_gmu.h index 1e24ec84ca..9bafc4334d 100644 --- a/adreno_a6xx_gmu.h +++ b/adreno_a6xx_gmu.h @@ -107,6 +107,8 @@ struct a6xx_gmu_device { u32 stats_interval; /** @stats_kobj: kernel object for GMU stats directory in sysfs */ struct kobject stats_kobj; + /** @cur_freq: Tracks current frequency for GMU */ + u32 cur_freq; }; /* Helper function to get to a6xx gmu device from adreno device */ @@ -450,4 +452,13 @@ void a6xx_gmu_send_nmi(struct kgsl_device *device, bool force, */ int a6xx_gmu_add_to_minidump(struct adreno_device *adreno_dev); +/** + * a6xx_gmu_clock_set_rate - Set the gmu clock rate + * @adreno_dev: Handle to the adreno device + * @req_freq: Requested freq to set gmu to + * + * Returns 0 on success or error on clock set rate failure + */ +int a6xx_gmu_clock_set_rate(struct adreno_device *adreno_dev, u32 req_freq); + #endif diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 5b354fdb2d..adf3651dee 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -393,6 +393,13 @@ static int a6xx_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto err; + /* Switch to min GMU clock */ + ret = a6xx_gmu_clock_set_rate(adreno_dev, gmu->freqs[0]); + if (ret) { + a6xx_hwsched_hfi_stop(adreno_dev); + goto err; + } + icc_set_bw(pwr->icc_path, 0, 0); device->gmu_fault = false; @@ -460,6 +467,13 @@ static int a6xx_hwsched_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto err; + /* Switch to min GMU clock */ + ret = a6xx_gmu_clock_set_rate(adreno_dev, gmu->freqs[0]); + if (ret) { + a6xx_hwsched_hfi_stop(adreno_dev); + goto err; + } + device->gmu_fault = false; kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE); @@ -1061,8 +1075,8 @@ static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); - static unsigned long prev_freq; - unsigned long freq = gmu->freqs[0]; + u32 cur_freq = gmu->cur_freq; + u32 req_freq = gmu->freqs[0]; if (!gmu->perf_ddr_bw) return; @@ -1072,22 +1086,13 @@ static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) * a higher frequency */ if (pwr->ddr_table[buslevel] >= gmu->perf_ddr_bw) - freq = gmu->freqs[GMU_MAX_PWRLEVELS - 1]; + req_freq = gmu->freqs[GMU_MAX_PWRLEVELS - 1]; - if (prev_freq == freq) + if (cur_freq == req_freq) return; - if (kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", freq)) { - dev_err(&gmu->pdev->dev, "Unable to set the GMU clock to %ld\n", - freq); - return; - } - - a6xx_rdpm_cx_freq_update(gmu, freq / 1000); - - trace_kgsl_gmu_pwrlevel(freq, prev_freq); - - prev_freq = freq; + a6xx_gmu_clock_set_rate(adreno_dev, req_freq); + return; } static int a6xx_hwsched_bus_set(struct adreno_device *adreno_dev, int buslevel, diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index cf1a87399f..1c4cb8c826 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -1294,15 +1294,6 @@ int a6xx_hwsched_hfi_start(struct adreno_device *adreno_dev) if (ret) goto err; - /* Switch to min GMU clock */ - a6xx_rdpm_cx_freq_update(gmu, gmu->freqs[0] / 1000); - - ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", - gmu->freqs[0]); - if (ret) - dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n", - gmu->freqs[0], ret); - err: if (ret) a6xx_hwsched_hfi_stop(adreno_dev); diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index ce106dab6b..4d70033de0 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1851,21 +1851,37 @@ void gen7_gmu_aop_send_acd_state(struct gen7_gmu_device *gmu, bool flag) "AOP mbox send message failed: %d\n", ret); } +int gen7_gmu_clock_set_rate(struct adreno_device *adreno_dev, u32 req_freq) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + + gen7_rdpm_cx_freq_update(gmu, req_freq / 1000); + + ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", + req_freq); + if (ret) { + dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n", + req_freq, ret); + return ret; + } + + trace_kgsl_gmu_pwrlevel(req_freq, gmu->cur_freq); + + gmu->cur_freq = req_freq; + + return ret; +} + int gen7_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; - gen7_rdpm_cx_freq_update(gmu, gmu->freqs[level] / 1000); - - ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", - gmu->freqs[level]); - if (ret) { - dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n", - gmu->freqs[level], ret); + ret = gen7_gmu_clock_set_rate(adreno_dev, gmu->freqs[level]); + if (ret) return ret; - } ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "hub_clk", adreno_dev->gmu_hub_clk_freq); diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index d1a8231e72..ee6548a8f8 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -123,6 +123,8 @@ struct gen7_gmu_device { u32 switch_to_unsec_hdr; /** @dcvs_table: Table for gpu dcvs levels */ struct gen7_dcvs_table dcvs_table; + /** @cur_freq: Tracks scaled frequency for GMU */ + u32 cur_freq; }; /* Helper function to get to gen7 gmu device from adreno device */ @@ -511,4 +513,12 @@ size_t gen7_snapshot_gmu_mem(struct kgsl_device *device, */ u32 gen7_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab); +/** + * gen7_gmu_clock_set_rate - Set the gmu clock rate + * @adreno_dev: Handle to the adreno device + * @req_freq: Requested freq to set gmu to + * + * Returns 0 on success or error on clock set rate failure + */ +int gen7_gmu_clock_set_rate(struct adreno_device *adreno_dev, u32 req_freq); #endif diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 702ac42797..4e704d7df2 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -452,25 +452,6 @@ void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, read_unlock(&device->context_lock); } -static int gmu_clock_set_rate(struct adreno_device *adreno_dev) -{ - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - int ret = 0; - - /* Switch to min GMU clock */ - gen7_rdpm_cx_freq_update(gmu, gmu->freqs[0] / 1000); - - ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", - gmu->freqs[0]); - if (ret) - dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n", - gmu->freqs[0], ret); - - trace_kgsl_gmu_pwrlevel(gmu->freqs[0], gmu->freqs[GMU_MAX_PWRLEVELS - 1]); - - return ret; -} - static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -554,7 +535,7 @@ static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto err; - ret = gmu_clock_set_rate(adreno_dev); + ret = gen7_gmu_clock_set_rate(adreno_dev, gmu->freqs[0]); if (ret) { gen7_hwsched_hfi_stop(adreno_dev); goto err; @@ -639,7 +620,7 @@ static int gen7_hwsched_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto err; - ret = gmu_clock_set_rate(adreno_dev); + ret = gen7_gmu_clock_set_rate(adreno_dev, gmu->freqs[0]); if (ret) { gen7_hwsched_hfi_stop(adreno_dev); goto err; @@ -1441,8 +1422,8 @@ static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - static unsigned long prev_freq; - unsigned long freq = gmu->freqs[0]; + u32 cur_freq = gmu->cur_freq; + u32 req_freq = gmu->freqs[0]; if (!gmu->perf_ddr_bw) return; @@ -1452,22 +1433,12 @@ static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) * a higher frequency */ if (pwr->ddr_table[buslevel] >= gmu->perf_ddr_bw) - freq = gmu->freqs[GMU_MAX_PWRLEVELS - 1]; + req_freq = gmu->freqs[GMU_MAX_PWRLEVELS - 1]; - if (prev_freq == freq) + if (cur_freq == req_freq) return; - if (kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", freq)) { - dev_err(&gmu->pdev->dev, "Unable to set the GMU clock to %ld\n", - freq); - return; - } - - gen7_rdpm_cx_freq_update(gmu, freq / 1000); - - trace_kgsl_gmu_pwrlevel(freq, prev_freq); - - prev_freq = freq; + gen7_gmu_clock_set_rate(adreno_dev, req_freq); } static int gen7_hwsched_bus_set(struct adreno_device *adreno_dev, int buslevel, diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 2197cce544..77ef243883 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1757,21 +1757,37 @@ void gen8_gmu_aop_send_acd_state(struct gen8_gmu_device *gmu, bool flag) "AOP qmp send message failed: %d\n", ret); } +int gen8_gmu_clock_set_rate(struct adreno_device *adreno_dev, u32 req_freq) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret = 0; + + gen8_rdpm_cx_freq_update(gmu, req_freq / 1000); + + ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", + req_freq); + if (ret) { + dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n", + req_freq, ret); + return ret; + } + + trace_kgsl_gmu_pwrlevel(req_freq, gmu->cur_freq); + + gmu->cur_freq = req_freq; + + return ret; +} + int gen8_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; - gen8_rdpm_cx_freq_update(gmu, gmu->freqs[level] / 1000); - - ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", - gmu->freqs[level]); - if (ret) { - dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n", - gmu->freqs[level], ret); + ret = gen8_gmu_clock_set_rate(adreno_dev, gmu->freqs[level]); + if (ret) return ret; - } ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "hub_clk", adreno_dev->gmu_hub_clk_freq); diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index b468bed19f..81241f8240 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -123,6 +123,8 @@ struct gen8_gmu_device { u32 switch_to_unsec_hdr; /** @dcvs_table: Table for gpu dcvs levels */ struct gen8_dcvs_table dcvs_table; + /** @cur_freq: Tracks scaled frequency for GMU */ + u32 cur_freq; }; /* Helper function to get to gen8 gmu device from adreno device */ @@ -497,4 +499,13 @@ size_t gen8_snapshot_gmu_mem(struct kgsl_device *device, */ u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab); +/** + * gen8_gmu_clock_set_rate - Set the gmu clock rate + * @adreno_dev: Handle to the adreno device + * @req_freq: Requested freq to set gmu to + * + * Returns 0 on success or error on clock set rate failure + */ +int gen8_gmu_clock_set_rate(struct adreno_device *adreno_dev, u32 req_freq); + #endif diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 9be6a1eff9..029ac57744 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -404,25 +404,6 @@ void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, read_unlock(&device->context_lock); } -static int gmu_clock_set_rate(struct adreno_device *adreno_dev) -{ - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - int ret = 0; - - /* Switch to min GMU clock */ - gen8_rdpm_cx_freq_update(gmu, gmu->freqs[0] / 1000); - - ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", - gmu->freqs[0]); - if (ret) - dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n", - gmu->freqs[0], ret); - - trace_kgsl_gmu_pwrlevel(gmu->freqs[0], gmu->freqs[GMU_MAX_PWRLEVELS - 1]); - - return ret; -} - static void _get_hw_fence_entries(struct adreno_device *adreno_dev) { struct device_node *node = NULL; @@ -575,7 +556,7 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto err; - ret = gmu_clock_set_rate(adreno_dev); + ret = gen8_gmu_clock_set_rate(adreno_dev, gmu->freqs[0]); if (ret) { gen8_hwsched_hfi_stop(adreno_dev); goto err; @@ -663,7 +644,7 @@ static int gen8_hwsched_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto err; - ret = gmu_clock_set_rate(adreno_dev); + ret = gen8_gmu_clock_set_rate(adreno_dev, gmu->freqs[0]); if (ret) { gen8_hwsched_hfi_stop(adreno_dev); goto err; @@ -1448,8 +1429,8 @@ static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - static unsigned long prev_freq; - unsigned long freq = gmu->freqs[0]; + u32 cur_freq = gmu->cur_freq; + u32 req_freq = gmu->freqs[0]; if (!gmu->perf_ddr_bw) return; @@ -1459,22 +1440,12 @@ static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) * a higher frequency */ if (pwr->ddr_table[buslevel] >= gmu->perf_ddr_bw) - freq = gmu->freqs[GMU_MAX_PWRLEVELS - 1]; + req_freq = gmu->freqs[GMU_MAX_PWRLEVELS - 1]; - if (prev_freq == freq) + if (cur_freq == req_freq) return; - if (kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", freq)) { - dev_err(&gmu->pdev->dev, "Unable to set the GMU clock to %ld\n", - freq); - return; - } - - gen8_rdpm_cx_freq_update(gmu, freq / 1000); - - trace_kgsl_gmu_pwrlevel(freq, prev_freq); - - prev_freq = freq; + gen8_gmu_clock_set_rate(adreno_dev, req_freq); } static int gen8_hwsched_bus_set(struct adreno_device *adreno_dev, int buslevel, From 15e2d10efdc02d370a18f3380f9e87476eca1014 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Tue, 16 Apr 2024 21:02:30 +0530 Subject: [PATCH 0779/1016] kgsl: Fix couple of static checker warnings Initialize all members of hfi_warmboot_scratch_cmd to 0 in gen8_hwsched_hfi_send_warmboot_cmd() and set count to 0 in gen8_protect_init(). Change-Id: I7a6f45e71039bee0b649ca5335b50ae74ae4b3fd Signed-off-by: Sanjay Yadav --- adreno_gen8.c | 2 +- adreno_gen8_hwsched_hfi.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index dfa54f7b2d..31e3229a03 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -625,7 +625,7 @@ static void gen8_protect_init(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); const struct gen8_protected_regs *regs = gen8_core->protected_regs; - u32 count; + u32 count = 0; int i; /* diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 2a2153ef28..a62622beb8 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -1895,7 +1895,7 @@ static bool gen8_hwsched_warmboot_possible(struct adreno_device *adreno_dev) static int gen8_hwsched_hfi_send_warmboot_cmd(struct adreno_device *adreno_dev, struct kgsl_memdesc *desc, u32 flag, bool async, struct pending_cmd *ack) { - struct hfi_warmboot_scratch_cmd cmd; + struct hfi_warmboot_scratch_cmd cmd = {0}; int ret; if (!adreno_dev->warmboot_enabled) From 73fb70f1cff730f5b8274eac6ee59e2864b2983f Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Mon, 22 Apr 2024 21:59:33 +0530 Subject: [PATCH 0780/1016] kgsl: Set aperture for LPAC context during resume Set the LPAC SMMU aperture while resuming from hibernation or from deep sleep state. Change-Id: I38ece5d09b10ea45b1461163a8603be7a182d807 Signed-off-by: Pankaj Gupta --- adreno.c | 8 ++++++++ kgsl_iommu.c | 16 ++++++++-------- kgsl_iommu.h | 10 ++++++++++ 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/adreno.c b/adreno.c index 2da2434980..05096dd36c 100644 --- a/adreno.c +++ b/adreno.c @@ -1520,6 +1520,10 @@ static int adreno_pm_resume(struct device *dev) if (status) return status; + + status = kgsl_set_smmu_lpac_aperture(device, &iommu->lpac_context); + if (status < 0) + return status; } #endif @@ -3710,6 +3714,10 @@ static int adreno_hibernation_resume(struct device *dev) if (ret) goto err; + ret = kgsl_set_smmu_lpac_aperture(device, &iommu->lpac_context); + if (ret < 0) + goto err; + gmu_core_dev_force_first_boot(device); msm_adreno_tz_reinit(pwrscale->devfreqptr); diff --git a/kgsl_iommu.c b/kgsl_iommu.c index aff5cb0c31..4d0848a9bf 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -1450,12 +1450,14 @@ int kgsl_set_smmu_aperture(struct kgsl_device *device, return ret; } -static int set_smmu_lpac_aperture(struct kgsl_device *device, +int kgsl_set_smmu_lpac_aperture(struct kgsl_device *device, struct kgsl_iommu_context *context) { int ret; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - if (!test_bit(KGSL_MMU_SMMU_APERTURE, &device->mmu.features)) + if (!test_bit(KGSL_MMU_SMMU_APERTURE, &device->mmu.features) || + !ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) return 0; ret = qcom_scm_kgsl_set_smmu_lpac_aperture(context->cb_num); @@ -2548,12 +2550,10 @@ static int iommu_probe_user_context(struct kgsl_device *device, kgsl_iommu_set_ttbr0(&iommu->lpac_context, mmu, &pt->info.cfg); - if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) { - ret = set_smmu_lpac_aperture(device, &iommu->lpac_context); - if (ret < 0) { - kgsl_iommu_detach_context(&iommu->lpac_context); - goto err; - } + ret = kgsl_set_smmu_lpac_aperture(device, &iommu->lpac_context); + if (ret < 0) { + kgsl_iommu_detach_context(&iommu->lpac_context); + goto err; } return 0; diff --git a/kgsl_iommu.h b/kgsl_iommu.h index cdc7e528a9..4db2a55751 100644 --- a/kgsl_iommu.h +++ b/kgsl_iommu.h @@ -227,4 +227,14 @@ struct kgsl_iommu_pt { int kgsl_set_smmu_aperture(struct kgsl_device *device, struct kgsl_iommu_context *context); +/** + * kgsl_set_smmu_lpac_aperture - set SMMU Aperture for LPAC context + * @device: Pointer to the KGSL device + * @context: Pointer to the LPAC iommu context + * + * Return: 0 on success or negative on failure. + */ +int kgsl_set_smmu_lpac_aperture(struct kgsl_device *device, + struct kgsl_iommu_context *context); + #endif From 4a6e72481781469cc5ca604d162973b36cc97ad4 Mon Sep 17 00:00:00 2001 From: Kaushal Sanadhya Date: Wed, 24 Apr 2024 18:10:33 +0530 Subject: [PATCH 0781/1016] kgsl: gen8: Add support for Gen8_3_0 GPU Add support for Gen8_3_0 gpu. Change-Id: I7220ea0036443353cabe1ee7b7a45aeb1db4583d Signed-off-by: Kaushal Sanadhya --- adreno-gpulist.h | 91 ++++ adreno.h | 11 + adreno_gen8.c | 162 +++++++- adreno_gen8.h | 3 + adreno_gen8_3_0_snapshot.h | 823 +++++++++++++++++++++++++++++++++++++ adreno_gen8_snapshot.c | 32 ++ gen8_reg.h | 3 + 7 files changed, 1117 insertions(+), 8 deletions(-) create mode 100644 adreno_gen8_3_0_snapshot.h diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 656cece236..e54067f53c 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2918,6 +2918,96 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), }; +extern const struct gen8_snapshot_block_list gen8_3_0_snapshot_block_list; + +static const struct kgsl_regmap_list gen8_3_0_gbif_cx_regs[] = { + { GEN8_GBIF_QSB_SIDE0, 0x00071e20 }, + { GEN8_GBIF_QSB_SIDE1, 0x00071e20 }, + { GEN8_GBIF_QSB_SIDE2, 0x00071e20 }, + { GEN8_GBIF_QSB_SIDE3, 0x00071e20 }, + { GEN8_GBIF_CX_CONFIG, 0x20023000 }, +}; + +/* GEN8_3_0 noncontext register list */ +static const struct gen8_nonctxt_regs gen8_3_0_nonctxt_regs[] = { + { GEN8_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_CNTLT, 0xf0004000, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_MASKL_0, 0x00000003, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0xffffffff, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0xffffffff, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_BYTEL_0, 0x00000008, BIT(PIPE_NONE) }, + { GEN8_DBGC_CFG_DBGBUS_BYTEL_1, 0x76543210, BIT(PIPE_NONE) }, + { GEN8_GRAS_DBG_ECO_CNTL, 0x00f80800, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { GEN8_PC_AUTO_VERTEX_STRIDE, 0x00000001, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { GEN8_PC_VIS_STREAM_CNTL, 0x10010000, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1, 0x00000002, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { GEN8_PC_CHICKEN_BITS_1, 0x00000003, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { GEN8_PC_CHICKEN_BITS_2, 0x00000200, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { GEN8_PC_CHICKEN_BITS_3, 0x00500000, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { GEN8_PC_CHICKEN_BITS_4, 0x00500050, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { GEN8_RB_CCU_CNTL, 0x00000068, BIT(PIPE_BR) }, + { GEN8_RB_RESOLVE_PREFETCH_CNTL, 0x00000007, BIT(PIPE_BR) }, + { GEN8_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) }, + { GEN8_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + { GEN8_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + { GEN8_RBBM_WAIT_IDLE_CLOCKS_CNTL, 0x00000030, BIT(PIPE_NONE) }, + { GEN8_RBBM_WAIT_IDLE_CLOCKS_CNTL2, 0x00000030, BIT(PIPE_NONE) }, + { GEN8_UCHE_GBIF_GX_CONFIG, 0x010240e0, BIT(PIPE_NONE) }, + { GEN8_RBBM_GBIF_CLIENT_QOS_CNTL, 0x22122212, BIT(PIPE_NONE) }, + { GEN8_RBBM_CGC_P2S_CNTL, 0x00000040, BIT(PIPE_NONE) }, + { GEN8_SP_CHICKEN_BITS_2, BIT(22) | BIT(23), BIT(PIPE_NONE) }, + { GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, + { GEN8_SP_PERFCTR_SHADER_MASK, 0x0000003f, BIT(PIPE_NONE) }, + { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, + { GEN8_SP_READ_SEL, 0x0001ff00, BIT(PIPE_NONE) }, + { GEN8_TPL1_DBG_ECO_CNTL, 0x10000000, BIT(PIPE_NONE) }, + { GEN8_TPL1_DBG_ECO_CNTL1, 0x00000724, BIT(PIPE_NONE) }, + { GEN8_UCHE_MODE_CNTL, 0x00020000, BIT(PIPE_NONE) }, + { GEN8_UCHE_CCHE_MODE_CNTL, 0x00001000, BIT(PIPE_NONE) }, + { GEN8_UCHE_CCHE_CACHE_WAYS, 0x00000800, BIT(PIPE_NONE) }, + { GEN8_UCHE_CACHE_WAYS, 0x00080000, BIT(PIPE_NONE) }, + { GEN8_UCHE_VARB_IDLE_TIMEOUT, 0x00000020, BIT(PIPE_NONE) }, + { GEN8_VFD_DBG_ECO_CNTL, 0x00008000, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { GEN8_VFD_CB_BV_THRESHOLD, 0x00500050, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { GEN8_VFD_CB_BR_THRESHOLD, 0x00600060, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { GEN8_VFD_CB_BUSY_REQ_CNT, 0x00200020, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { GEN8_VFD_CB_LP_REQ_CNT, 0x00100020, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { GEN8_VPC_FLATSHADE_MODE_CNTL, 0x00000001, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { GEN8_VSC_BIN_SIZE, 0x00010001, BIT(PIPE_NONE) }, + { GEN8_RB_GC_GMEM_PROTECT, 0x00900000, BIT(PIPE_BR) }, + { 0 }, +}; + +static const struct adreno_gen8_core adreno_gpu_core_gen8_3_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN8_3_0, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen8-3-0", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION | ADRENO_IFPC, + .gpudev = &adreno_gen8_hwsched_gpudev.base, + .perfcounters = &adreno_gen8_perfcounters, + .uche_gmem_alignment = SZ_64M, + .gmem_size = (SZ_512K + SZ_64K), + .bus_width = 32, + .snapshot_size = SZ_8M, + }, + .sqefw_name = "gen80300_sqe.fw", + .gmufw_name = "gen80300_gmu.bin", + .zap_name = "gen80300_zap.mbn", + .ao_hwcg = gen8_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen8_ao_hwcg_regs), + .gbif = gen8_3_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen8_3_0_gbif_cx_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen8_0_0_protected_regs, /* Reuse the protected regs list from Gen8_0_0 */ + .nonctxt_regs = gen8_3_0_nonctxt_regs, + .highest_bank_bit = 15, + .gmu_hub_clk_freq = 200000000, + .gen8_snapshot_block_list = &gen8_3_0_snapshot_block_list, + .ctxt_record_size = (4558 * SZ_1K), +}; + static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_a306, /* Deprecated */ &adreno_gpu_core_a306a, /* Deprecated */ @@ -2970,5 +3060,6 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen7_11_0.base, &adreno_gpu_core_gen8_0_0.base, &adreno_gpu_core_gen8_0_1.base, + &adreno_gpu_core_gen8_3_0.base, &adreno_gpu_core_gen8_4_0.base, }; diff --git a/adreno.h b/adreno.h index 1fd8f47b66..2b5eeca983 100644 --- a/adreno.h +++ b/adreno.h @@ -248,6 +248,7 @@ enum adreno_gpurev { ADRENO_REV_GEN7_11_0 = ADRENO_GPUREV_VALUE(7, 11, 0), ADRENO_REV_GEN8_0_0 = ADRENO_GPUREV_VALUE(8, 0, 0), ADRENO_REV_GEN8_0_1 = ADRENO_GPUREV_VALUE(8, 0, 1), + ADRENO_REV_GEN8_3_0 = ADRENO_GPUREV_VALUE(8, 3, 0), ADRENO_REV_GEN8_4_0 = ADRENO_GPUREV_VALUE(8, 4, 0), }; @@ -1274,6 +1275,10 @@ ADRENO_TARGET(gen7_9_0, ADRENO_REV_GEN7_9_0) ADRENO_TARGET(gen7_9_1, ADRENO_REV_GEN7_9_1) ADRENO_TARGET(gen7_14_0, ADRENO_REV_GEN7_14_0) ADRENO_TARGET(gen7_11_0, ADRENO_REV_GEN7_11_0) +ADRENO_TARGET(gen8_0_0, ADRENO_REV_GEN8_0_0) +ADRENO_TARGET(gen8_0_1, ADRENO_REV_GEN8_0_1) +ADRENO_TARGET(gen8_3_0, ADRENO_REV_GEN8_3_0) +ADRENO_TARGET(gen8_4_0, ADRENO_REV_GEN8_4_0) static inline int adreno_is_gen7_9_x(struct adreno_device *adreno_dev) { @@ -1293,6 +1298,12 @@ static inline int adreno_is_gen7_2_x_family(struct adreno_device *adreno_dev) adreno_is_gen7_14_0(adreno_dev) || adreno_is_gen7_11_0(adreno_dev); } +static inline int adreno_is_gen8_0_x_family(struct adreno_device *adreno_dev) +{ + return adreno_is_gen8_0_0(adreno_dev) || adreno_is_gen8_0_1(adreno_dev) || + adreno_is_gen8_4_0(adreno_dev); +} + /* * adreno_checkreg_off() - Checks the validity of a register enum * @adreno_dev: Pointer to adreno device diff --git a/adreno_gen8.c b/adreno_gen8.c index dfa54f7b2d..dd51d82b22 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -48,6 +48,29 @@ static const u32 gen8_pwrup_reglist[] = { GEN8_SP_READ_SEL, }; +/* IFPC & Preemption static powerup restore list for gen8_3_0 */ +static const u32 gen8_3_0_pwrup_reglist[] = { + GEN8_UCHE_MODE_CNTL, + GEN8_UCHE_VARB_IDLE_TIMEOUT, + GEN8_UCHE_GBIF_GX_CONFIG, + GEN8_UCHE_CACHE_WAYS, + GEN8_UCHE_CCHE_MODE_CNTL, + GEN8_UCHE_CCHE_CACHE_WAYS, + GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_LO, + GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_HI, + GEN8_UCHE_WRITE_THRU_BASE_LO, + GEN8_UCHE_WRITE_THRU_BASE_HI, + GEN8_UCHE_TRAP_BASE_LO, + GEN8_UCHE_TRAP_BASE_HI, + GEN8_UCHE_CLIENT_PF, + GEN8_VSC_BIN_SIZE, + GEN8_RB_CMP_NC_MODE_CNTL, + GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, + GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_LO, + GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_HI, + GEN8_SP_READ_SEL, +}; + /* IFPC only static powerup restore list */ static const u32 gen8_ifpc_pwrup_reglist[] = { GEN8_RBBM_NC_MODE_CNTL, @@ -131,6 +154,86 @@ static const u32 gen8_ifpc_pwrup_reglist[] = { GEN8_CP_PROTECT_REG_GLOBAL + 63, }; +/* IFPC only static powerup restore list for gen8_3_0*/ +static const u32 gen8_3_0_ifpc_pwrup_reglist[] = { + GEN8_RBBM_NC_MODE_CNTL, + GEN8_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, + GEN8_RBBM_SLICE_NC_MODE_CNTL, + GEN8_SP_NC_MODE_CNTL, + GEN8_SP_CHICKEN_BITS_2, + GEN8_SP_CHICKEN_BITS_3, + GEN8_SP_PERFCTR_SHADER_MASK, + GEN8_TPL1_NC_MODE_CNTL, + GEN8_TPL1_DBG_ECO_CNTL, + GEN8_TPL1_DBG_ECO_CNTL1, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_1, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_2, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_3, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_4, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_5, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_6, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_7, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_8, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_9, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_10, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_11, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_12, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_13, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_14, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_15, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_16, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_17, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_18, + GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_19, + GEN8_CP_PROTECT_REG_GLOBAL, + GEN8_CP_PROTECT_REG_GLOBAL + 1, + GEN8_CP_PROTECT_REG_GLOBAL + 2, + GEN8_CP_PROTECT_REG_GLOBAL + 3, + GEN8_CP_PROTECT_REG_GLOBAL + 4, + GEN8_CP_PROTECT_REG_GLOBAL + 5, + GEN8_CP_PROTECT_REG_GLOBAL + 6, + GEN8_CP_PROTECT_REG_GLOBAL + 7, + GEN8_CP_PROTECT_REG_GLOBAL + 8, + GEN8_CP_PROTECT_REG_GLOBAL + 9, + GEN8_CP_PROTECT_REG_GLOBAL + 10, + GEN8_CP_PROTECT_REG_GLOBAL + 11, + GEN8_CP_PROTECT_REG_GLOBAL + 12, + GEN8_CP_PROTECT_REG_GLOBAL + 13, + GEN8_CP_PROTECT_REG_GLOBAL + 14, + GEN8_CP_PROTECT_REG_GLOBAL + 15, + GEN8_CP_PROTECT_REG_GLOBAL + 16, + GEN8_CP_PROTECT_REG_GLOBAL + 17, + GEN8_CP_PROTECT_REG_GLOBAL + 18, + GEN8_CP_PROTECT_REG_GLOBAL + 19, + GEN8_CP_PROTECT_REG_GLOBAL + 20, + GEN8_CP_PROTECT_REG_GLOBAL + 21, + GEN8_CP_PROTECT_REG_GLOBAL + 22, + GEN8_CP_PROTECT_REG_GLOBAL + 23, + GEN8_CP_PROTECT_REG_GLOBAL + 24, + GEN8_CP_PROTECT_REG_GLOBAL + 25, + GEN8_CP_PROTECT_REG_GLOBAL + 26, + GEN8_CP_PROTECT_REG_GLOBAL + 27, + GEN8_CP_PROTECT_REG_GLOBAL + 28, + GEN8_CP_PROTECT_REG_GLOBAL + 29, + GEN8_CP_PROTECT_REG_GLOBAL + 30, + GEN8_CP_PROTECT_REG_GLOBAL + 31, + GEN8_CP_PROTECT_REG_GLOBAL + 32, + GEN8_CP_PROTECT_REG_GLOBAL + 33, + GEN8_CP_PROTECT_REG_GLOBAL + 34, + GEN8_CP_PROTECT_REG_GLOBAL + 35, + GEN8_CP_PROTECT_REG_GLOBAL + 36, + GEN8_CP_PROTECT_REG_GLOBAL + 37, + GEN8_CP_PROTECT_REG_GLOBAL + 38, + GEN8_CP_PROTECT_REG_GLOBAL + 39, + GEN8_CP_PROTECT_REG_GLOBAL + 40, + GEN8_CP_PROTECT_REG_GLOBAL + 41, + GEN8_CP_PROTECT_REG_GLOBAL + 42, + GEN8_CP_PROTECT_REG_GLOBAL + 43, + GEN8_CP_PROTECT_REG_GLOBAL + 44, + GEN8_CP_PROTECT_REG_GLOBAL + 45, + GEN8_CP_PROTECT_REG_GLOBAL + 63, +}; + static const struct gen8_pwrup_extlist gen8_0_0_pwrup_extlist[] = { { GEN8_CP_PROTECT_CNTL_PIPE, BIT(PIPE_BR) | BIT(PIPE_BV) | BIT(PIPE_LPAC)}, { GEN8_CP_PROTECT_REG_PIPE + 15, BIT(PIPE_BR) | BIT(PIPE_BV) | BIT(PIPE_LPAC)}, @@ -160,6 +263,34 @@ static const struct gen8_pwrup_extlist gen8_0_0_pwrup_extlist[] = { { GEN8_VFD_DBG_ECO_CNTL, BIT(PIPE_BR) | BIT(PIPE_BV)}, }; +static const struct gen8_pwrup_extlist gen8_3_0_pwrup_extlist[] = { + { GEN8_CP_PROTECT_CNTL_PIPE, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_CP_PROTECT_REG_PIPE + 15, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_GRAS_TSEFE_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_GRAS_NC_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_GRAS_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_RB_CCU_CNTL, BIT(PIPE_BR)}, + { GEN8_RB_CCU_NC_MODE_CNTL, BIT(PIPE_BR)}, + { GEN8_RB_CMP_NC_MODE_CNTL, BIT(PIPE_BR)}, + { GEN8_RB_RESOLVE_PREFETCH_CNTL, BIT(PIPE_BR)}, + { GEN8_RB_CMP_DBG_ECO_CNTL, BIT(PIPE_BR)}, + { GEN8_RB_GC_GMEM_PROTECT, BIT(PIPE_BR)}, + { GEN8_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, BIT(PIPE_BR)}, + { GEN8_VPC_FLATSHADE_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_PC_CHICKEN_BITS_1, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_PC_CHICKEN_BITS_2, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_PC_CHICKEN_BITS_3, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_PC_CHICKEN_BITS_4, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_PC_AUTO_VERTEX_STRIDE, BIT(PIPE_BR) | BIT(PIPE_BV)}, + { GEN8_PC_VIS_STREAM_CNTL, BIT(PIPE_BR) | BIT(PIPE_BV)}, + { GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1, BIT(PIPE_BR) | BIT(PIPE_BV)}, + { GEN8_VFD_CB_BV_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_VFD_CB_BR_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_VFD_CB_BUSY_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_VFD_CB_LP_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN8_VFD_DBG_ECO_CNTL, BIT(PIPE_BR) | BIT(PIPE_BV)}, +}; + struct gen8_nonctxt_overrides gen8_nc_overrides[] = { { GEN8_UCHE_MODE_CNTL, BIT(PIPE_NONE), 0, 0, 0, }, { GEN8_UCHE_CACHE_WAYS, BIT(PIPE_NONE), 0, 0, 0, }, @@ -750,8 +881,11 @@ static void gen8_hwcg_set(struct adreno_device *adreno_dev, bool on) u32 value; int i; - /* Increase clock keep-on hysteresis from 5 cycles to 8 cycles */ - if (on) + /* + * Increase clock keep-on hysteresis from 5 cycles to 8 cycles + * for adreno_is_gen8_0_x_family. + */ + if ((adreno_is_gen8_0_x_family(adreno_dev)) && on) kgsl_regwrite(device, GEN8_RBBM_CGC_0_PC, 0x00000702); if (!adreno_dev->hwcg_enabled) @@ -797,14 +931,24 @@ static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev) struct gen8_nonctxt_overrides *nc_overrides = gen8_dev->nc_overrides; /* Static IFPC restore only registers */ - reglist[items].regs = gen8_ifpc_pwrup_reglist; - reglist[items].count = ARRAY_SIZE(gen8_ifpc_pwrup_reglist); + if (adreno_is_gen8_3_0(adreno_dev)) { + reglist[items].regs = gen8_3_0_ifpc_pwrup_reglist; + reglist[items].count = ARRAY_SIZE(gen8_3_0_ifpc_pwrup_reglist); + } else { + reglist[items].regs = gen8_ifpc_pwrup_reglist; + reglist[items].count = ARRAY_SIZE(gen8_ifpc_pwrup_reglist); + } lock->ifpc_list_len = reglist[items].count; items++; /* Static IFPC + preemption registers */ - reglist[items].regs = gen8_pwrup_reglist; - reglist[items].count = ARRAY_SIZE(gen8_pwrup_reglist); + if (adreno_is_gen8_3_0(adreno_dev)) { + reglist[items].regs = gen8_3_0_pwrup_reglist; + reglist[items].count = ARRAY_SIZE(gen8_3_0_pwrup_reglist); + } else { + reglist[items].regs = gen8_pwrup_reglist; + reglist[items].count = ARRAY_SIZE(gen8_pwrup_reglist); + } lock->preemption_list_len = reglist[items].count; items++; @@ -828,12 +972,14 @@ static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev) nc_overrides[j].list_type)) continue; - if ((reglist[i].regs == gen8_ifpc_pwrup_reglist) && + if ((reglist[i].regs == gen8_ifpc_pwrup_reglist || + reglist[i].regs == gen8_3_0_ifpc_pwrup_reglist) && (nc_overrides[j].list_type == 1)) { *dest++ = nc_overrides[j].offset; kgsl_regread(device, nc_overrides[j].offset, dest++); lock->ifpc_list_len++; - } else if ((reglist[i].regs == gen8_pwrup_reglist) && + } else if ((reglist[i].regs == gen8_pwrup_reglist || + reglist[i].regs == gen8_3_0_pwrup_reglist) && (nc_overrides[j].list_type == 2)) { *dest++ = nc_overrides[j].offset; kgsl_regread(device, nc_overrides[j].offset, dest++); diff --git a/adreno_gen8.h b/adreno_gen8.h index bd9fb82ed5..706e9df884 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -13,6 +13,7 @@ #include "gen8_reg.h" #define GEN8_0_0_NUM_PHYSICAL_SLICES 3 +#define GEN8_3_0_NUM_PHYSICAL_SLICES 1 /* Forward struct declaration */ struct gen8_snapshot_block_list; @@ -625,6 +626,8 @@ static inline void gen8_coresight_init(struct adreno_device *device) { } */ static inline u32 gen8_get_num_slices(struct adreno_device *adreno_dev) { + if (adreno_is_gen8_3_0(adreno_dev)) + return GEN8_3_0_NUM_PHYSICAL_SLICES; return GEN8_0_0_NUM_PHYSICAL_SLICES; } #endif diff --git a/adreno_gen8_3_0_snapshot.h b/adreno_gen8_3_0_snapshot.h new file mode 100644 index 0000000000..71a29e2349 --- /dev/null +++ b/adreno_gen8_3_0_snapshot.h @@ -0,0 +1,823 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#ifndef __ADRENO_GEN8_3_0_SNAPSHOT_H +#define __ADRENO_GEN8_3_0_SNAPSHOT_H + +#include "adreno_gen8_snapshot.h" +#include "adreno_gen8_0_0_snapshot.h" + +static const u32 gen8_3_0_debugbus_blocks[] = { + DEBUGBUS_GMU_GX_GC_US_I_0, + DEBUGBUS_DBGC_GC_US_I_0, + DEBUGBUS_RBBM_GC_US_I_0, + DEBUGBUS_LARC_GC_US_I_0, + DEBUGBUS_COM_GC_US_I_0, + DEBUGBUS_HLSQ_GC_US_I_0, + DEBUGBUS_CGC_GC_US_I_0, + DEBUGBUS_VSC_GC_US_I_0_0, + DEBUGBUS_VSC_GC_US_I_0_1, + DEBUGBUS_UFC_GC_US_I_0, + DEBUGBUS_UFC_GC_US_I_1, + DEBUGBUS_CP_GC_US_I_0_0, + DEBUGBUS_CP_GC_US_I_0_1, + DEBUGBUS_CP_GC_US_I_0_2, + DEBUGBUS_PC_BR_US_I_0, + DEBUGBUS_PC_BV_US_I_0, + DEBUGBUS_GPC_BR_US_I_0, + DEBUGBUS_GPC_BV_US_I_0, + DEBUGBUS_VPC_BR_US_I_0, + DEBUGBUS_VPC_BV_US_I_0, + DEBUGBUS_UCHE_WRAPPER_GC_US_I_0, + DEBUGBUS_UCHE_GC_US_I_0, + DEBUGBUS_CP_GC_S_0_I_0, + DEBUGBUS_PC_BR_S_0_I_0, + DEBUGBUS_PC_BV_S_0_I_0, + DEBUGBUS_TESS_GC_S_0_I_0, + DEBUGBUS_TSEBE_GC_S_0_I_0, + DEBUGBUS_RAS_GC_S_0_I_0, + DEBUGBUS_LRZ_BR_S_0_I_0, + DEBUGBUS_LRZ_BV_S_0_I_0, + DEBUGBUS_VFDP_GC_S_0_I_0, + DEBUGBUS_GPC_BR_S_0_I_0, + DEBUGBUS_GPC_BV_S_0_I_0, + DEBUGBUS_VPCFE_BR_S_0_I_0, + DEBUGBUS_VPCFE_BV_S_0_I_0, + DEBUGBUS_VPCBE_BR_S_0_I_0, + DEBUGBUS_VPCBE_BV_S_0_I_0, + DEBUGBUS_DBGC_GC_S_0_I_0, + DEBUGBUS_LARC_GC_S_0_I_0, + DEBUGBUS_RBBM_GC_S_0_I_0, + DEBUGBUS_CCRE_GC_S_0_I_0, + DEBUGBUS_CGC_GC_S_0_I_0, + DEBUGBUS_GMU_GC_S_0_I_0, + DEBUGBUS_SLICE_GC_S_0_I_0, + DEBUGBUS_HLSQ_SPTP_STAR_GC_S_0_I_0, + DEBUGBUS_USP_GC_S_0_I_0, + DEBUGBUS_USPTP_GC_S_0_I_0, + DEBUGBUS_USPTP_GC_S_0_I_1, + DEBUGBUS_TP_GC_S_0_I_0, + DEBUGBUS_TP_GC_S_0_I_1, + DEBUGBUS_RB_GC_S_0_I_0, + DEBUGBUS_CCU_GC_S_0_I_0, + DEBUGBUS_HLSQ_GC_S_0_I_0, + DEBUGBUS_VFD_GC_S_0_I_0, +}; +static struct gen8_shader_block gen8_3_0_shader_blocks[] = { + { TP0_TMO_DATA, 0x0200, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { TP0_SMO_DATA, 0x0080, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { TP0_MIPMAP_BASE_DATA, 0x0080, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_INST_DATA_3, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_INST_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_INST_DATA_1, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_0_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_1_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_2_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_3_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_4_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_5_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_6_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_7_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_CB_RAM, 0x0390, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_13_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_14_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_INST_TAG, 0x0100, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_INST_DATA_2, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_TMO_TAG, 0x0080, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_SMO_TAG, 0x0080, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_STATE_DATA, 0x0040, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_HWAVE_RAM, 0x0100, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_L0_INST_BUF, 0x0080, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_8_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_9_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_10_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_11_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { SP_LB_12_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, + { HLSQ_DATAPATH_DSTR_META, 0x0170, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_DATAPATH_DSTR_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_DESC_REMAP_META, 0x0018, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_DESC_REMAP_META, 0x000C, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_DESC_REMAP_META, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_SLICE_TOP_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_L2STC_TAG_RAM, 0x0200, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_L2STC_INFO_CMD, 0x0474, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_CHUNK_CPS_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_MISC_RAM, 0x0640, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_MISC_RAM, 0x00B0, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_MISC_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM, 0x0200, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CVS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_CPS_MISC_RAM_TAG, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM_TAG, 0x0014, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM_TAG, 0x0004, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x03C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x0280, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x0050, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0008, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_STPROC_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_INST_RAM_2, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_DATAPATH_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { HLSQ_INDIRECT_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, + { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, + { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, +}; + +/* + * Block : ['GMUAO', 'GMUCX', 'GMUCX_RAM'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 160 (Regs:616) + */ +static const u32 gen8_3_0_gmu_registers[] = { + 0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403, + 0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03, + 0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403, + 0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03, + 0x1f400, 0x1f40b, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507, + 0x1f509, 0x1f50b, 0x1f700, 0x1f701, 0x1f704, 0x1f706, 0x1f708, 0x1f709, + 0x1f70c, 0x1f70d, 0x1f710, 0x1f711, 0x1f713, 0x1f716, 0x1f718, 0x1f71d, + 0x1f720, 0x1f725, 0x1f729, 0x1f729, 0x1f730, 0x1f747, 0x1f750, 0x1f75a, + 0x1f75c, 0x1f75c, 0x1f780, 0x1f781, 0x1f784, 0x1f78b, 0x1f790, 0x1f797, + 0x1f7a0, 0x1f7a7, 0x1f7b0, 0x1f7b7, 0x1f7e0, 0x1f7e1, 0x1f7e4, 0x1f7e5, + 0x1f7e8, 0x1f7e9, 0x1f7ec, 0x1f7ed, 0x1f800, 0x1f804, 0x1f807, 0x1f808, + 0x1f80b, 0x1f80c, 0x1f80f, 0x1f80f, 0x1f811, 0x1f811, 0x1f813, 0x1f817, + 0x1f819, 0x1f81c, 0x1f824, 0x1f830, 0x1f840, 0x1f842, 0x1f848, 0x1f848, + 0x1f84c, 0x1f84c, 0x1f850, 0x1f850, 0x1f858, 0x1f859, 0x1f868, 0x1f869, + 0x1f878, 0x1f883, 0x1f930, 0x1f931, 0x1f934, 0x1f935, 0x1f938, 0x1f939, + 0x1f93c, 0x1f93d, 0x1f940, 0x1f941, 0x1f943, 0x1f943, 0x1f948, 0x1f94a, + 0x1f94f, 0x1f951, 0x1f954, 0x1f955, 0x1f95d, 0x1f95d, 0x1f962, 0x1f96b, + 0x1f970, 0x1f970, 0x1f97c, 0x1f97e, 0x1f980, 0x1f981, 0x1f984, 0x1f986, + 0x1f992, 0x1f993, 0x1f996, 0x1f99e, 0x1f9c0, 0x1f9cf, 0x1f9f0, 0x1f9f1, + 0x1f9f8, 0x1f9fa, 0x1f9fc, 0x1f9fc, 0x1fa00, 0x1fa03, 0x1fc00, 0x1fc01, + 0x1fc04, 0x1fc07, 0x1fc10, 0x1fc10, 0x1fc14, 0x1fc14, 0x1fc18, 0x1fc19, + 0x1fc20, 0x1fc20, 0x1fc24, 0x1fc26, 0x1fc30, 0x1fc33, 0x1fc38, 0x1fc3b, + 0x1fc40, 0x1fc49, 0x1fc50, 0x1fc59, 0x1fc60, 0x1fc7f, 0x1fca0, 0x1fcef, + 0x20000, 0x20007, 0x20010, 0x20015, 0x20018, 0x2001a, 0x2001c, 0x2001d, + 0x20020, 0x20021, 0x20024, 0x20025, 0x2002a, 0x2002c, 0x20030, 0x20031, + 0x20034, 0x20036, 0x20080, 0x20087, 0x20300, 0x20301, 0x20304, 0x20305, + 0x20308, 0x2030c, 0x20310, 0x20314, 0x20318, 0x2031a, 0x20320, 0x20322, + 0x20324, 0x20326, 0x20328, 0x2032a, 0x20330, 0x20333, 0x20338, 0x20338, + 0x20340, 0x20350, 0x20354, 0x2035b, 0x20360, 0x20367, 0x20370, 0x20377, + 0x23801, 0x23801, 0x23803, 0x23803, 0x23805, 0x23805, 0x23807, 0x23807, + 0x23809, 0x23809, 0x2380b, 0x2380b, 0x2380d, 0x2380d, 0x2380f, 0x2380f, + 0x23811, 0x23811, 0x23813, 0x23813, 0x23815, 0x23815, 0x23817, 0x23817, + 0x23819, 0x23819, 0x2381b, 0x2381b, 0x2381d, 0x2381d, 0x2381f, 0x23820, + 0x23822, 0x23822, 0x23824, 0x23824, 0x23826, 0x23826, 0x23828, 0x23828, + 0x2382a, 0x2382a, 0x2382c, 0x2382c, 0x2382e, 0x2382e, 0x23830, 0x23830, + 0x23832, 0x23832, 0x23834, 0x23834, 0x23836, 0x23836, 0x23838, 0x23838, + 0x2383a, 0x2383a, 0x2383c, 0x2383c, 0x2383e, 0x2383e, 0x23840, 0x23847, + 0x23b00, 0x23b01, 0x23b03, 0x23b03, 0x23b05, 0x23b0e, 0x23b10, 0x23b13, + 0x23b15, 0x23b16, 0x23b28, 0x23b28, 0x23b30, 0x23b30, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_gmu_registers), 8)); + +/* + * Block : ['GMUGX'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 17 (Regs:170) + */ +static const u32 gen8_3_0_gmugx_registers[] = { + 0x0dc00, 0x0dc0d, 0x0dc10, 0x0dc11, 0x0dc13, 0x0dc15, 0x0dc18, 0x0dc1a, + 0x0dc1c, 0x0dc23, 0x0dc26, 0x0dc2b, 0x0dc2e, 0x0dc2f, 0x0dc40, 0x0dc42, + 0x0dc60, 0x0dc7f, 0x0dc88, 0x0dc90, 0x0dc98, 0x0dc99, 0x0dca0, 0x0dcbf, + 0x0dcc8, 0x0dcd0, 0x0dcd8, 0x0dcd9, 0x0dce0, 0x0dcff, 0x0dd08, 0x0dd10, + 0x0dd18, 0x0dd19, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_gmugx_registers), 8)); + +/* + * Block : ['GMUGX'] + * REGION : SLICE + * Pipeline: PIPE_NONE + * pairs : 19 (Regs:224) + */ +static const u32 gen8_3_0_gmugx_slice_registers[] = { + 0x0e400, 0x0e401, 0x0e404, 0x0e404, 0x0e406, 0x0e408, 0x0e40a, 0x0e40a, + 0x0e40e, 0x0e42f, 0x0e438, 0x0e440, 0x0e448, 0x0e449, 0x0e490, 0x0e4af, + 0x0e4b8, 0x0e4c0, 0x0e4c8, 0x0e4c9, 0x0e4d0, 0x0e4ef, 0x0e4f8, 0x0e500, + 0x0e508, 0x0e509, 0x0e510, 0x0e52f, 0x0e538, 0x0e540, 0x0e548, 0x0e549, + 0x0e590, 0x0e5af, 0x0e5b8, 0x0e5c0, 0x0e5c8, 0x0e5c9, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_gmugx_slice_registers), 8)); + +/* + * Block : ['CP'] + * REGION : UNSLICE + * Pipeline: CP_PIPE_NONE + * Cluster : CLUSTER_NONE + * pairs : 16 (Regs:300) + */ +static const u32 gen8_3_0_cp_cp_pipe_none_registers[] = { + 0x00800, 0x0080a, 0x00813, 0x0081e, 0x00820, 0x0082d, 0x00838, 0x0083e, + 0x00840, 0x00847, 0x0084b, 0x0084c, 0x00850, 0x0088f, 0x008b5, 0x008b6, + 0x008c0, 0x008cb, 0x008d0, 0x008e4, 0x008e7, 0x008ee, 0x008fa, 0x008fb, + 0x00928, 0x00929, 0x00958, 0x0095b, 0x00980, 0x009ff, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_cp_pipe_none_registers), 8)); + +/* + * Block : ['CP'] + * REGION : UNSLICE + * Pipeline: CP_PIPE_LPAC + * Cluster : CLUSTER_NONE + * pairs : 3 (Regs:13) + */ +static const u32 gen8_3_0_cp_cp_pipe_lpac_registers[] = { + 0x00830, 0x00837, 0x008b3, 0x008b4, 0x008b7, 0x008b9, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_cp_pipe_lpac_registers), 8)); + +/* + * Block : ['CP'] + * REGION : UNSLICE + * Pipeline: CP_PIPE_AQE0 + * Cluster : CLUSTER_NONE + * pairs : 2 (Regs:5) + */ +static const u32 gen8_3_0_cp_cp_pipe_aqe0_registers[] = { + 0x008b3, 0x008b4, 0x008b7, 0x008b9, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_cp_pipe_aqe0_registers), 8)); + +/* + * Block : ['CP'] + * REGION : UNSLICE + * Pipeline: CP_PIPE_AQE1 + * Cluster : CLUSTER_NONE + * pairs : 2 (Regs:5) + */ +static const u32 gen8_3_0_cp_cp_pipe_aqe1_registers[] = { + 0x008b3, 0x008b4, 0x008b7, 0x008b9, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_cp_pipe_aqe1_registers), 8)); + +/* + * Block : ['CP'] + * REGION : UNSLICE + * Pipeline: CP_PIPE_DDE_BR + * Cluster : CLUSTER_NONE + * pairs : 3 (Regs:7) + */ +static const u32 gen8_3_0_cp_cp_pipe_dde_br_registers[] = { + 0x008b3, 0x008b4, 0x008b7, 0x008b9, 0x008fe, 0x008ff, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_cp_pipe_dde_br_registers), 8)); + +/* + * Block : ['CP'] + * REGION : UNSLICE + * Pipeline: CP_PIPE_DDE_BV + * Cluster : CLUSTER_NONE + * pairs : 3 (Regs:7) + */ +static const u32 gen8_3_0_cp_cp_pipe_dde_bv_registers[] = { + 0x008b3, 0x008b4, 0x008b7, 0x008b9, 0x008fe, 0x008ff, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_cp_pipe_dde_bv_registers), 8)); + +/* + * Block : ['SP'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * Cluster : CLUSTER_NONE + * Location: HLSQ_DP_STR + * pairs : 5 (Regs:18) + */ +static const u32 gen8_3_0_non_context_sp_pipe_none_hlsq_dp_str_registers[] = { + 0x0ae05, 0x0ae05, 0x0ae60, 0x0ae65, 0x0ae6b, 0x0ae6c, 0x0ae73, 0x0ae75, + 0x0aec0, 0x0aec5, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_non_context_sp_pipe_none_hlsq_dp_str_registers), 8)); + +/* + * Block : ['GRAS'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_VPC_VS + * pairs : 9 (Regs:238) + */ +static const u32 gen8_3_0_gras_slice_pipe_br_cluster_vpc_vs_registers[] = { + 0x08200, 0x08213, 0x08220, 0x08225, 0x08228, 0x0822d, 0x08230, 0x0823b, + 0x08240, 0x0825f, 0x08270, 0x0828f, 0x0829f, 0x082b7, 0x082d0, 0x0832f, + 0x08500, 0x08508, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_gras_slice_pipe_br_cluster_vpc_vs_registers), 8)); + +/* + * Block : ['GRAS'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_GRAS + * pairs : 14 (Regs:332) + */ +static const u32 gen8_3_0_gras_slice_pipe_br_cluster_gras_registers[] = { + 0x08080, 0x08080, 0x08086, 0x08092, 0x080c0, 0x080df, 0x08101, 0x08110, + 0x08130, 0x0814f, 0x08200, 0x08213, 0x08220, 0x08225, 0x08228, 0x0822d, + 0x08230, 0x0823b, 0x08240, 0x0825f, 0x08270, 0x0828f, 0x0829f, 0x082b7, + 0x082d0, 0x0832f, 0x08500, 0x08508, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_gras_slice_pipe_br_cluster_gras_registers), 8)); + +/* + * Block : ['GRAS'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_VPC_VS + * pairs : 9 (Regs:238) + */ +static const u32 gen8_3_0_gras_slice_pipe_bv_cluster_vpc_vs_registers[] = { + 0x08200, 0x08213, 0x08220, 0x08225, 0x08228, 0x0822d, 0x08230, 0x0823b, + 0x08240, 0x0825f, 0x08270, 0x0828f, 0x0829f, 0x082b7, 0x082d0, 0x0832f, + 0x08500, 0x08508, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_gras_slice_pipe_bv_cluster_vpc_vs_registers), 8)); + +/* + * Block : ['GRAS'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_GRAS + * pairs : 14 (Regs:332) + */ +static const u32 gen8_3_0_gras_slice_pipe_bv_cluster_gras_registers[] = { + 0x08080, 0x08080, 0x08086, 0x08092, 0x080c0, 0x080df, 0x08101, 0x08110, + 0x08130, 0x0814f, 0x08200, 0x08213, 0x08220, 0x08225, 0x08228, 0x0822d, + 0x08230, 0x0823b, 0x08240, 0x0825f, 0x08270, 0x0828f, 0x0829f, 0x082b7, + 0x082d0, 0x0832f, 0x08500, 0x08508, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_gras_slice_pipe_bv_cluster_gras_registers), 8)); + +static struct gen8_cluster_registers gen8_3_0_cp_clusters[] = { + { CLUSTER_NONE, UNSLICE, PIPE_NONE, STATE_NON_CONTEXT, + gen8_3_0_cp_cp_pipe_none_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_cp_cp_pipe_br_registers, }, + { CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_cp_slice_cp_pipe_br_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_BV, STATE_NON_CONTEXT, + gen8_0_0_cp_cp_pipe_bv_registers, }, + { CLUSTER_NONE, SLICE, PIPE_BV, STATE_NON_CONTEXT, + gen8_0_0_cp_slice_cp_pipe_bv_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_LPAC, STATE_NON_CONTEXT, + gen8_3_0_cp_cp_pipe_lpac_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_AQE0, STATE_NON_CONTEXT, + gen8_3_0_cp_cp_pipe_aqe0_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_AQE1, STATE_NON_CONTEXT, + gen8_3_0_cp_cp_pipe_aqe1_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_DDE_BR, STATE_NON_CONTEXT, + gen8_3_0_cp_cp_pipe_dde_br_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_DDE_BV, STATE_NON_CONTEXT, + gen8_3_0_cp_cp_pipe_dde_bv_registers, }, +}; +static struct gen8_cluster_registers gen8_3_0_mvc_clusters[] = { + { CLUSTER_NONE, UNSLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_pipe_br_registers, }, + { CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_slice_pipe_br_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_BV, STATE_NON_CONTEXT, + gen8_0_0_non_context_pipe_bv_registers, }, + { CLUSTER_NONE, SLICE, PIPE_BV, STATE_NON_CONTEXT, + gen8_0_0_non_context_slice_pipe_bv_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_LPAC, STATE_NON_CONTEXT, + gen8_0_0_non_context_pipe_lpac_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_rb_pipe_br_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_rb_slice_pipe_br_rac_registers, &gen8_0_0_rb_rac_sel, }, + { CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_rb_slice_pipe_br_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rac_registers, &gen8_0_0_rb_rac_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rac_registers, &gen8_0_0_rb_rac_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_3_0_gras_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_3_0_gras_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_3_0_gras_slice_pipe_br_cluster_gras_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_3_0_gras_slice_pipe_br_cluster_gras_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_3_0_gras_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_3_0_gras_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_3_0_gras_slice_pipe_bv_cluster_gras_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_3_0_gras_slice_pipe_bv_cluster_gras_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_pc_pipe_br_cluster_fe_us_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_pc_pipe_br_cluster_fe_us_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_pc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_pc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_pc_pipe_bv_cluster_fe_us_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_pc_pipe_bv_cluster_fe_us_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_pc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_pc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vfd_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vfd_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vfd_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vfd_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_pipe_br_cluster_vpc_us_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_pipe_br_cluster_vpc_us_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_ps_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_ps_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_pipe_bv_cluster_vpc_us_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_pipe_bv_cluster_vpc_us_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_ps_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_ps_registers, }, +}; +static struct gen8_sptp_cluster_registers gen8_3_0_sptp_clusters[] = { + { CLUSTER_NONE, UNSLICE, 1, 2, SP_NCTX_REG, PIPE_BR, 0, HLSQ_STATE, + gen8_0_0_non_context_sp_pipe_br_hlsq_state_registers, 0xae00}, + { CLUSTER_NONE, UNSLICE, 1, 2, SP_NCTX_REG, PIPE_BR, 0, SP_TOP, + gen8_0_0_non_context_sp_pipe_br_sp_top_registers, 0xae00}, + { CLUSTER_NONE, UNSLICE, 1, 2, SP_NCTX_REG, PIPE_BR, 0, USPTP, + gen8_0_0_non_context_sp_pipe_br_sp_top_registers, 0xae00}, + { CLUSTER_NONE, UNSLICE, 1, 2, SP_NCTX_REG, PIPE_BR, 0, HLSQ_DP_STR, + gen8_3_0_non_context_sp_pipe_none_hlsq_dp_str_registers, 0xae00}, + { CLUSTER_NONE, UNSLICE, 1, 2, TP0_NCTX_REG, PIPE_BR, 0, USPTP, + gen8_0_0_non_context_tpl1_pipe_br_usptp_registers, 0xb600}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_usptp_shared_const_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_shared_const_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, + gen8_0_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_cctx_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_shared_const_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_usptp_shared_const_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_cctx_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_shared_const_registers, + 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_DP, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + gen8_0_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_shared_const_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_cctx_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_shared_const_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800}, + { CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP, + gen8_0_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800}, + { CLUSTER_SP_VS, SLICE, 1, 2, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen8_0_0_tpl1_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xb000}, + { CLUSTER_SP_VS, SLICE, 1, 2, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + gen8_0_0_tpl1_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xb000}, + { CLUSTER_SP_VS, SLICE, 1, 2, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + gen8_0_0_tpl1_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xb000}, + { CLUSTER_SP_VS, SLICE, 1, 2, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, + gen8_0_0_tpl1_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, SLICE, 1, 2, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen8_0_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, SLICE, 1, 2, TP0_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + gen8_0_0_tpl1_slice_pipe_lpac_cluster_sp_ps_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, SLICE, 1, 2, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + gen8_0_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, SLICE, 1, 2, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP, + gen8_0_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, + { CLUSTER_SP_PS, SLICE, 1, 2, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP, + gen8_0_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, +}; +/* + * Before dumping the CP MVC + * Program CP_APERTURE_CNTL_* with pipeID={CP_PIPE} + * Then dump corresponding {Register_PIPE} + */ +static struct gen8_cp_indexed_reg gen8_3_0_cp_indexed_reg_list[] = { + { GEN8_CP_SQE_STAT_ADDR_PIPE, GEN8_CP_SQE_STAT_DATA_PIPE, UNSLICE, PIPE_BR, 0x00040}, + { GEN8_CP_SQE_STAT_ADDR_PIPE, GEN8_CP_SQE_STAT_DATA_PIPE, UNSLICE, PIPE_BV, 0x00040}, + { GEN8_CP_DRAW_STATE_ADDR_PIPE, GEN8_CP_DRAW_STATE_DATA_PIPE, UNSLICE, PIPE_BR, 0x00200}, + { GEN8_CP_DRAW_STATE_ADDR_PIPE, GEN8_CP_DRAW_STATE_DATA_PIPE, UNSLICE, PIPE_BV, 0x00200}, + { GEN8_CP_ROQ_DBG_ADDR_PIPE, GEN8_CP_ROQ_DBG_DATA_PIPE, UNSLICE, PIPE_BR, 0x00800}, + { GEN8_CP_ROQ_DBG_ADDR_PIPE, GEN8_CP_ROQ_DBG_DATA_PIPE, UNSLICE, PIPE_BV, 0x00800}, + { GEN8_CP_SQE_UCODE_DBG_ADDR_PIPE, GEN8_CP_SQE_UCODE_DBG_DATA_PIPE, + UNSLICE, PIPE_BR, 0x08000}, + { GEN8_CP_SQE_UCODE_DBG_ADDR_PIPE, GEN8_CP_SQE_UCODE_DBG_DATA_PIPE, + UNSLICE, PIPE_BV, 0x08000}, + { GEN8_CP_RESOURCE_TABLE_DBG_ADDR_BV, GEN8_CP_RESOURCE_TABLE_DBG_DATA_BV, + UNSLICE, PIPE_NONE, 0x04100}, + { GEN8_CP_FIFO_DBG_ADDR_DDE_PIPE, GEN8_CP_FIFO_DBG_DATA_DDE_PIPE, + UNSLICE, PIPE_DDE_BR, 0x01100}, + { GEN8_CP_FIFO_DBG_ADDR_DDE_PIPE, GEN8_CP_FIFO_DBG_DATA_DDE_PIPE, + UNSLICE, PIPE_DDE_BV, 0x01100}, +}; + +static struct gen8_reg_list gen8_3_0_gmu_gx_regs[] = { + { UNSLICE, gen8_3_0_gmugx_registers }, + { SLICE, gen8_3_0_gmugx_slice_registers }, +}; + +/* + * Block : ['GDPM_LKG'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 9 (Regs:26) + */ +static const u32 gen8_3_0_gdpm_lkg_registers[] = { + 0x21c00, 0x21c00, 0x21c08, 0x21c09, 0x21c0e, 0x21c0f, 0x21c4f, 0x21c50, + 0x21c52, 0x21c52, 0x21c54, 0x21c56, 0x21c58, 0x21c5a, 0x21c5c, 0x21c60, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_gdpm_lkg_registers), 8)); + +/* + * Block : ['GPU_CC_GPU_CC_REG'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 28 (Regs:130) + */ +static const u32 gen8_3_0_gpu_cc_gpu_cc_reg_registers[] = { + 0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, + 0x26400, 0x26406, 0x26415, 0x2641d, 0x2641f, 0x26437, 0x26439, 0x2643b, + 0x2643d, 0x2643f, 0x26443, 0x26444, 0x26478, 0x2647a, 0x26489, 0x2648a, + 0x2649c, 0x2649e, 0x264a0, 0x264a1, 0x264c5, 0x264c7, 0x264e8, 0x264ea, + 0x264f9, 0x264fc, 0x2650b, 0x2650b, 0x2651c, 0x2651e, 0x26540, 0x2654b, + 0x26554, 0x26556, 0x26558, 0x2655c, 0x2655e, 0x2655f, 0x26563, 0x26563, + 0x2656d, 0x26573, 0x26576, 0x26576, 0x26578, 0x2657a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_gpu_cc_gpu_cc_reg_registers), 8)); + +/* + * Block : ['GPU_CC_PLL0_CM_PLL_LUCID_OLE'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 1 (Regs:16) + */ +static const u32 gen8_3_0_gpu_cc_pll0_cm_pll_lucid_ole_registers[] = { + 0x24000, 0x2400f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_gpu_cc_pll0_cm_pll_lucid_ole_registers), 8)); + +/* + * Block : ['ACD_ACD'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 10 (Regs:53) + */ +static const u32 gen8_3_0_acd_acd_mnd_registers[] = { + 0x1a400, 0x1a416, 0x1a420, 0x1a42d, 0x1a430, 0x1a431, 0x1a435, 0x1a435, + 0x1a437, 0x1a437, 0x1a43a, 0x1a43a, 0x1a442, 0x1a442, 0x1a456, 0x1a458, + 0x1a45b, 0x1a45d, 0x1a45f, 0x1a462, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_acd_acd_mnd_registers), 8)); + +/* + * Block : ['GX_CLKCTL_GX_CLKCTL_REG'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 12 (Regs:82) + */ +static const u32 gen8_3_0_gx_clkctl_gx_clkctl_reg_registers[] = { + 0x1a000, 0x1a004, 0x1a008, 0x1a012, 0x1a014, 0x1a014, 0x1a017, 0x1a017, + 0x1a019, 0x1a019, 0x1a022, 0x1a022, 0x1a024, 0x1a029, 0x1a03f, 0x1a05d, + 0x1a060, 0x1a063, 0x1a065, 0x1a066, 0x1a068, 0x1a076, 0x1a078, 0x1a07b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_gx_clkctl_gx_clkctl_reg_registers), 8)); + +/* + * Block : ['GX_CLKCTL_PLL0_CM_PLL_LUCID_OLE'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 1 (Regs:16) + */ +static const u32 gen8_3_0_gx_clkctl_pll0_cm_pll_lucid_ole_registers[] = { + 0x19000, 0x1900f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_gx_clkctl_pll0_cm_pll_lucid_ole_registers), 8)); + +/* + * Block : ['RSCC_RSC'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 99 (Regs:598) + */ +static const u32 gen8_3_0_rscc_rsc_registers[] = { + 0x14000, 0x14034, 0x14036, 0x14036, 0x14040, 0x14042, 0x14044, 0x14045, + 0x14047, 0x14047, 0x14080, 0x14084, 0x14089, 0x1408c, 0x14091, 0x14094, + 0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac, 0x14100, 0x14104, + 0x14114, 0x14119, 0x14124, 0x14132, 0x14154, 0x1416b, 0x14340, 0x14341, + 0x14344, 0x14344, 0x14346, 0x1437c, 0x143f0, 0x143f8, 0x143fa, 0x143fe, + 0x14400, 0x14404, 0x14406, 0x1440a, 0x1440c, 0x14410, 0x14412, 0x14416, + 0x14418, 0x1441c, 0x1441e, 0x14422, 0x14424, 0x14424, 0x14498, 0x144a0, + 0x144a2, 0x144a6, 0x144a8, 0x144ac, 0x144ae, 0x144b2, 0x144b4, 0x144b8, + 0x144ba, 0x144be, 0x144c0, 0x144c4, 0x144c6, 0x144ca, 0x144cc, 0x144cc, + 0x14540, 0x14548, 0x1454a, 0x1454e, 0x14550, 0x14554, 0x14556, 0x1455a, + 0x1455c, 0x14560, 0x14562, 0x14566, 0x14568, 0x1456c, 0x1456e, 0x14572, + 0x14574, 0x14574, 0x145e8, 0x145f0, 0x145f2, 0x145f6, 0x145f8, 0x145fc, + 0x145fe, 0x14602, 0x14604, 0x14608, 0x1460a, 0x1460e, 0x14610, 0x14614, + 0x14616, 0x1461a, 0x1461c, 0x1461c, 0x14690, 0x14698, 0x1469a, 0x1469e, + 0x146a0, 0x146a4, 0x146a6, 0x146aa, 0x146ac, 0x146b0, 0x146b2, 0x146b6, + 0x146b8, 0x146bc, 0x146be, 0x146c2, 0x146c4, 0x146c4, 0x14738, 0x14740, + 0x14742, 0x14746, 0x14748, 0x1474c, 0x1474e, 0x14752, 0x14754, 0x14758, + 0x1475a, 0x1475e, 0x14760, 0x14764, 0x14766, 0x1476a, 0x1476c, 0x1476c, + 0x147e0, 0x147e8, 0x147ea, 0x147ee, 0x147f0, 0x147f4, 0x147f6, 0x147fa, + 0x147fc, 0x14800, 0x14802, 0x14806, 0x14808, 0x1480c, 0x1480e, 0x14812, + 0x14814, 0x14814, 0x14888, 0x14890, 0x14892, 0x14896, 0x14898, 0x1489c, + 0x1489e, 0x148a2, 0x148a4, 0x148a8, 0x148aa, 0x148ae, 0x148b0, 0x148b4, + 0x148b6, 0x148ba, 0x148bc, 0x148bc, 0x14930, 0x14938, 0x1493a, 0x1493e, + 0x14940, 0x14944, 0x14946, 0x1494a, 0x1494c, 0x14950, 0x14952, 0x14956, + 0x14958, 0x1495c, 0x1495e, 0x14962, 0x14964, 0x14964, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_3_0_rscc_rsc_registers), 8)); + +static const u32 *gen8_3_0_external_core_regs[] = { + gen8_3_0_gdpm_lkg_registers, + gen8_0_0_gpu_cc_ahb2phy_broadcast_swman_registers, + gen8_0_0_gpu_cc_ahb2phy_swman_registers, + gen8_3_0_gpu_cc_gpu_cc_reg_registers, + gen8_3_0_gpu_cc_pll0_cm_pll_lucid_ole_registers, + gen8_3_0_acd_acd_mnd_registers, + gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers, + gen8_0_0_gx_clkctl_ahb2phy_swman_registers, + gen8_3_0_gx_clkctl_pll0_cm_pll_lucid_ole_registers, + gen8_3_0_gx_clkctl_gx_clkctl_reg_registers, +}; +#endif /*_ADRENO_GEN8_3_0_SNAPSHOT_H */ diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index c1679a59cb..bf263808f4 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -6,6 +6,7 @@ #include "adreno.h" #include "adreno_gen8_0_0_snapshot.h" +#include "adreno_gen8_3_0_snapshot.h" #include "adreno_snapshot.h" static struct kgsl_memdesc *gen8_capturescript; @@ -48,6 +49,37 @@ const struct gen8_snapshot_block_list gen8_0_0_snapshot_block_list = { .mempool_index_registers_len = ARRAY_SIZE(gen8_0_0_cp_mempool_reg_list), }; +const struct gen8_snapshot_block_list gen8_3_0_snapshot_block_list = { + .pre_crashdumper_regs = gen8_0_0_ahb_registers, + .num_pre_crashdumper_regs = ARRAY_SIZE(gen8_0_0_ahb_registers), + .debugbus_blocks = gen8_3_0_debugbus_blocks, + .debugbus_blocks_len = ARRAY_SIZE(gen8_3_0_debugbus_blocks), + .gbif_debugbus_blocks = gen8_gbif_debugbus_blocks, + .gbif_debugbus_blocks_len = ARRAY_SIZE(gen8_gbif_debugbus_blocks), + .cx_debugbus_blocks = gen8_cx_debugbus_blocks, + .cx_debugbus_blocks_len = ARRAY_SIZE(gen8_cx_debugbus_blocks), + .external_core_regs = gen8_3_0_external_core_regs, + .num_external_core_regs = ARRAY_SIZE(gen8_3_0_external_core_regs), + .gmu_cx_unsliced_regs = gen8_3_0_gmu_registers, + .gmu_gx_regs = gen8_3_0_gmu_gx_regs, + .num_gmu_gx_regs = ARRAY_SIZE(gen8_3_0_gmu_gx_regs), + .rscc_regs = gen8_3_0_rscc_rsc_registers, + .reg_list = gen8_0_0_reg_list, + .cx_misc_regs = gen8_0_0_cx_misc_registers, + .shader_blocks = gen8_3_0_shader_blocks, + .num_shader_blocks = ARRAY_SIZE(gen8_3_0_shader_blocks), + .cp_clusters = gen8_3_0_cp_clusters, + .num_cp_clusters = ARRAY_SIZE(gen8_3_0_cp_clusters), + .clusters = gen8_3_0_mvc_clusters, + .num_clusters = ARRAY_SIZE(gen8_3_0_mvc_clusters), + .sptp_clusters = gen8_3_0_sptp_clusters, + .num_sptp_clusters = ARRAY_SIZE(gen8_3_0_sptp_clusters), + .index_registers = gen8_3_0_cp_indexed_reg_list, + .index_registers_len = ARRAY_SIZE(gen8_3_0_cp_indexed_reg_list), + .mempool_index_registers = gen8_0_0_cp_mempool_reg_list, + .mempool_index_registers_len = ARRAY_SIZE(gen8_0_0_cp_mempool_reg_list), +}; + #define GEN8_SP_READ_SEL_VAL(_sliceid, _location, _pipe, _statetype, _usptp, _sptp) \ (FIELD_PREP(GENMASK(25, 21), _sliceid) | \ FIELD_PREP(GENMASK(20, 18), _location) | \ diff --git a/gen8_reg.h b/gen8_reg.h index 21f1b491b8..e5ca1aeef7 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -39,6 +39,8 @@ #define GEN8_RBBM_GBIF_CLIENT_QOS_CNTL 0x008 #define GEN8_RBBM_GBIF_HALT 0x00a #define GEN8_RBBM_GBIF_HALT_ACK 0x00b +#define GEN8_RBBM_WAIT_IDLE_CLOCKS_CNTL 0x010 +#define GEN8_RBBM_WAIT_IDLE_CLOCKS_CNTL2 0x011 #define GEN8_RBBM_STATUS 0x012 #define GEN8_RBBM_STATUS1 0x013 #define GEN8_RBBM_GFX_STATUS 0x015 @@ -59,6 +61,7 @@ #define GEN8_RBBM_CLOCK_CNTL_GLOBAL 0x09a #define GEN8_RBBM_CGC_GLOBAL_LOAD_CMD 0x09b #define GEN8_RBBM_CGC_P2S_TRIG_CMD 0x09c +#define GEN8_RBBM_CGC_P2S_CNTL 0x09d #define GEN8_RBBM_CGC_P2S_STATUS 0x09f #define GEN8_RBBM_CGC_0_PC 0x10b From 4142d3c891dd4c78ed61dd8a72676cb558d413f9 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Wed, 8 May 2024 10:22:07 -0700 Subject: [PATCH 0782/1016] msm: kgsl: Pin a process in memory while creating a bind object Bind operations require the referenced mem_entry objects to be currently pinned in memory. To ensure this, reclaim the process to pinned state to bring all its mem_entry objects into memory when creating the bind object. Change-Id: I5b46891004d42bed6a0e3a0c66bcdcd25ad52321 Signed-off-by: Lynus Vaz --- kgsl_vbo.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kgsl_vbo.c b/kgsl_vbo.c index bf72c139db..c167a6f45d 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -12,6 +12,7 @@ #include "kgsl_device.h" #include "kgsl_mmu.h" +#include "kgsl_reclaim.h" #include "kgsl_sharedmem.h" #include "kgsl_trace.h" @@ -423,6 +424,12 @@ kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, op->nr_ops = ranges_nents; op->target = target; + /* Make sure process is pinned in memory before proceeding */ + atomic_inc(&private->cmd_count); + ret = kgsl_reclaim_to_pinned_state(private); + if (ret) + goto err; + for (i = 0; i < ranges_nents; i++) { struct kgsl_gpumem_bind_range range; struct kgsl_mem_entry *entry; @@ -523,12 +530,14 @@ kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, ranges += ranges_size; } + atomic_dec(&private->cmd_count); init_completion(&op->comp); kref_init(&op->ref); return op; err: + atomic_dec(&private->cmd_count); kgsl_sharedmem_free_bind_op(op); return ERR_PTR(ret); } From 77952c34790b153d33c5725cbc5e068ffc266a01 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 9 May 2024 22:05:32 +0530 Subject: [PATCH 0783/1016] msm: kgsl: Fix null pointer dereference in get_sg_from_child() for_each_sg_page() expects the number of scatter gather (SG) entries as third parameter. However, get_sg_from_child() incorrectly passes the number of pages to it. When the number of pages is higher than available SG entries, the loop processes the last SG entry. Due to this, target_sg becomes null leading to a null pointer dereference. Fix this by using for_each_sgtable_page(), which internally handles the correct parameter. Additionally, modify get_sg_from_child() to handle the logic for processing number of pages. Change-Id: I8851b2c2cea920c2133ac13c14a8268146f58266 Signed-off-by: Kamal Agrawal --- kgsl_iommu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index aff5cb0c31..e3ba161668 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -130,7 +130,7 @@ static int get_sg_from_child(struct sg_table *sgt, struct kgsl_memdesc *child, int pgoffset = (offset >> PAGE_SHIFT); struct scatterlist *target_sg; struct sg_page_iter iter; - int ret; + int i = 0, ret; if (child->pages) return sg_alloc_table_from_pages(sgt, @@ -143,9 +143,12 @@ static int get_sg_from_child(struct sg_table *sgt, struct kgsl_memdesc *child, target_sg = sgt->sgl; - for_each_sg_page(child->sgt->sgl, &iter, npages, pgoffset) { + for_each_sgtable_page(child->sgt, &iter, pgoffset) { sg_set_page(target_sg, sg_page_iter_page(&iter), PAGE_SIZE, 0); target_sg = sg_next(target_sg); + + if (++i == npages) + break; } return 0; From f7eee9baa921c40b98ebc589cec2777ebdd0bd96 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 18 Feb 2024 11:54:08 +0530 Subject: [PATCH 0784/1016] kgsl: build: Enable CONFIG_QCOM_KGSL_SYNX for niobe GPU Enable CONFIG_QCOM_KGSL_SYNX for niobe GPU to use SYNX APIs for hardware fence. Change-Id: I17234ae800e11172e23285c62b9102f8fd7d6cd7 Signed-off-by: Kamal Agrawal --- Android.mk | 2 +- build/kgsl_defs.bzl | 2 +- config/gki_niobe.conf | 6 +++--- config/niobe_consolidate_gpuconf | 1 + config/niobe_gki_gpuconf | 3 ++- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Android.mk b/Android.mk index b8dcdce788..066450788b 100644 --- a/Android.mk +++ b/Android.mk @@ -36,7 +36,7 @@ ifeq ($(TARGET_BOARD_PLATFORM), pineapple) KBUILD_OPTIONS += KBUILD_EXTRA_SYMBOLS+=$(PWD)/$(call intermediates-dir-for,DLKM,hw-fence-module-symvers)/Module.symvers endif -ifeq ($(TARGET_BOARD_PLATFORM), sun) +ifneq ($(filter sun niobe, $(TARGET_BOARD_PLATFORM)),) KBUILD_OPTIONS += KBUILD_EXTRA_SYMBOLS+=$(PWD)/$(call intermediates-dir-for,DLKM,synx-driver-symvers)/synx-driver-symvers endif diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index bce5876928..12b1f820c0 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -98,7 +98,7 @@ def external_deps(target, variant): ] # Add synx-kernel in the dependency list for targets that use it for hardware fences - if target in [ "sun" ]: + if target in [ "sun", "niobe" ]: deplist = deplist + [ "//vendor/qcom/opensource/synx-kernel:{}_modules".format(tv), "//vendor/qcom/opensource/synx-kernel:synx_headers" diff --git a/config/gki_niobe.conf b/config/gki_niobe.conf index 04506a8f0d..22b49f9538 100644 --- a/config/gki_niobe.conf +++ b/config/gki_niobe.conf @@ -1,12 +1,12 @@ # SPDX-License-Identifier: GPL-2.0-only -# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. +# Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 CONFIG_QCOM_KGSL_SORT_POOL = y CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT = y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" -CONFIG_QTI_HW_FENCE=y +CONFIG_QCOM_KGSL_SYNX=y ifneq ($(CONFIG_CORESIGHT),) CONFIG_QCOM_KGSL_CORESIGHT = y @@ -17,7 +17,7 @@ ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=1 \ -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" \ - -DCONFIG_QTI_HW_FENCE=1 # This is needed to enable the hw-fence driver APIs in kernel headers + -DCONFIG_QCOM_KGSL_SYNX=1 # This is needed to enable the hw-fence driver APIs in kernel headers ifneq ($(CONFIG_CORESIGHT),) ccflags-y += -DCONFIG_QCOM_KGSL_CORESIGHT=1 diff --git a/config/niobe_consolidate_gpuconf b/config/niobe_consolidate_gpuconf index 07a106aa48..4fad7183a0 100644 --- a/config/niobe_consolidate_gpuconf +++ b/config/niobe_consolidate_gpuconf @@ -8,3 +8,4 @@ CONFIG_QCOM_KGSL_FENCE_TRACE=y CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" +CONFIG_QCOM_KGSL_SYNX=y diff --git a/config/niobe_gki_gpuconf b/config/niobe_gki_gpuconf index 837c2df56d..682128bd54 100644 --- a/config/niobe_gki_gpuconf +++ b/config/niobe_gki_gpuconf @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. +# Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. CONFIG_QCOM_KGSL=m CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 @@ -7,3 +7,4 @@ CONFIG_QCOM_KGSL_SORT_POOL=y CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" +CONFIG_QCOM_KGSL_SYNX=y From 4cb55b6254fbd6ddfa34c68625e18bfa89fe1cd5 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 11 May 2024 17:49:50 +0530 Subject: [PATCH 0785/1016] kgsl: hwsched: Update kernel version check for remote proc APIs Remote proc APIs are needed for gen8_4_0 GPU, which is using kernel v6.1. Required APIs are already supported in v6.1, thus update the kernel version check. Change-Id: I1ebac5af86b2dc2acdd87b2d3c226b6cb9e647ba Signed-off-by: Kamal Agrawal --- kgsl_gmu_core.c | 2 +- kgsl_gmu_core.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kgsl_gmu_core.c b/kgsl_gmu_core.c index e98659ae03..64bb616616 100644 --- a/kgsl_gmu_core.c +++ b/kgsl_gmu_core.c @@ -378,7 +378,7 @@ void gmu_core_reset_trace_header(struct kgsl_gmu_trace *trace) trace->reset_hdr = false; } -#if (KERNEL_VERSION(6, 2, 0) <= LINUX_VERSION_CODE) +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) struct rproc *gmu_core_soccp_vote_init(struct device *dev) { u32 soccp_handle; diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index e9ff849573..11cb4ffd0a 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -8,7 +8,7 @@ #include #include -#if (KERNEL_VERSION(6, 2, 0) <= LINUX_VERSION_CODE) +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) #include #endif From 5b909d691e797145c905a43aa7efbfec97ab2413 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 11 May 2024 01:23:56 +0530 Subject: [PATCH 0786/1016] kgsl: gen8: Skip GMEM space in preemption context record for RB0 Since RB0 - being the highest priority ringbuffer always runs to completion, the GMEM data doesn't need to be saved or restored. Thus, there is no need to reserve GMEM space in preemption context record for RB0. Also, update the preemption context record dumping logic in snapshot. Change-Id: I839cde15b01cca520ba70f93e3f7b0ac0987a010 Signed-off-by: Kamal Agrawal --- adreno_gen8_preempt.c | 7 +++++++ adreno_gen8_snapshot.c | 2 ++ 2 files changed, 9 insertions(+) diff --git a/adreno_gen8_preempt.c b/adreno_gen8_preempt.c index 66a85da528..b675a5588a 100644 --- a/adreno_gen8_preempt.c +++ b/adreno_gen8_preempt.c @@ -684,6 +684,13 @@ static int gen8_preemption_ringbuffer_init(struct adreno_device *adreno_dev, if (gen8_core->ctxt_record_size) ctxt_record_size = gen8_core->ctxt_record_size; + /* + * Since RB0 always runs to completion, there is no need to + * save/restore GMEM data for RB0. + */ + if (rb->id == 0) + ctxt_record_size -= adreno_dev->gpucore->gmem_size; + ret = adreno_allocate_global(device, &rb->preemption_desc, ctxt_record_size, SZ_16K, 0, KGSL_MEMDESC_PRIVILEGED, "preemption_desc"); diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index a96d7bb67e..45044e2a09 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -1467,6 +1467,8 @@ static size_t snapshot_preemption_record(struct kgsl_device *device, u64 ctxt_record_size = max_t(u64, GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES, device->snapshot_ctxt_record_size); + ctxt_record_size = min_t(u64, ctxt_record_size, memdesc->size); + if (remain < (ctxt_record_size + sizeof(*header))) { SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); return 0; From 1969e10a6894767d21fa469857463db2bd261f5c Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Mon, 13 May 2024 15:27:07 +0530 Subject: [PATCH 0787/1016] kgsl: Fix AB voting in gen8_3_0 When GMU-based AB voting is enabled, the vote calculation relies on the number of DDR channels. Currently, this value is hard-coded for each generation. However, it can vary from target to target within the same generation. Thus, introduce a new member in gpu core structure to specify the number of ddr channels for each target. If GMU-based AB voting is supported but the number of DDR channels is not specified, print a warning and fallback to interconnect path for AB voting. This helps to fix the AB voting in gen8_3_0. Change-Id: I83cad77d6acd246e9c45a40e20dcf186d38c9a2a Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 7 +++++++ adreno.h | 2 ++ adreno_gen7_gmu.c | 8 ++++---- adreno_gen7_hwsched.c | 4 +++- adreno_gen8_gmu.c | 8 ++++---- adreno_gen8_hwsched.c | 4 +++- 6 files changed, 23 insertions(+), 10 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index e54067f53c..4789fe7761 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2343,6 +2343,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_9_0 = { .gmem_size = 3 * SZ_1M, .bus_width = 32, .snapshot_size = SZ_8M, + .num_ddr_channels = 4, }, .aqefw_name = "gen70900_aqe.fw", .sqefw_name = "gen70900_sqe.fw", @@ -2381,6 +2382,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_9_1 = { .gmem_size = 3 * SZ_1M, .bus_width = 32, .snapshot_size = SZ_8M, + .num_ddr_channels = 4, }, .aqefw_name = "gen70900_aqe.fw", .sqefw_name = "gen70900_sqe.fw", @@ -2452,6 +2454,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_11_0 = { .gmem_size = SZ_1M + SZ_512K, .bus_width = 32, .snapshot_size = SZ_4M, + .num_ddr_channels = 4, }, .gmu_fw_version = GMU_VERSION(4, 5, 0), .sqefw_name = "gen71100_sqe.fw", @@ -2761,6 +2764,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .gmem_size = 12 * SZ_1M, .bus_width = 32, .snapshot_size = SZ_8M, + .num_ddr_channels = 4, }, .aqefw_name = "gen80000_aqe.fw", .sqefw_name = "gen80000_sqe.fw", @@ -2798,6 +2802,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { .gmem_size = 12 * SZ_1M, .bus_width = 32, .snapshot_size = SZ_8M, + .num_ddr_channels = 4, }, .aqefw_name = "gen80000_aqe.fw", .sqefw_name = "gen80000_sqe.fw", @@ -2899,6 +2904,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .gmem_size = 12 * SZ_1M, .bus_width = 32, .snapshot_size = SZ_8M, + .num_ddr_channels = 4, }, .aqefw_name = "gen80000_aqe.fw", .sqefw_name = "gen80000_sqe.fw", @@ -2991,6 +2997,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_3_0 = { .gmem_size = (SZ_512K + SZ_64K), .bus_width = 32, .snapshot_size = SZ_8M, + .num_ddr_channels = 2, }, .sqefw_name = "gen80300_sqe.fw", .gmufw_name = "gen80300_gmu.bin", diff --git a/adreno.h b/adreno.h index 2b5eeca983..b150f26df3 100644 --- a/adreno.h +++ b/adreno.h @@ -494,6 +494,8 @@ struct adreno_gpu_core { u32 bus_width; /** @snapshot_size: Size of the static snapshot region in bytes */ u32 snapshot_size; + /** @num_ddr_channels: Number of DDR channels */ + u32 num_ddr_channels; }; /** diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index ce106dab6b..f2d5b374ac 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1992,7 +1992,9 @@ static int gen7_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto err; - if (gen7_hfi_send_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE, 0) == 1) { + if (gen7_hfi_send_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE, 0) == 1 && + !WARN_ONCE(!adreno_dev->gpucore->num_ddr_channels, + "Number of DDR channel is not specified in gpu core")) { adreno_dev->gmu_ab = true; set_bit(ADRENO_DEVICE_GMU_AB, &adreno_dev->priv); } @@ -2232,8 +2234,6 @@ static int gen7_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, return ret; } -#define NUM_CHANNELS 4 - u32 gen7_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab) { u16 vote = 0; @@ -2248,7 +2248,7 @@ u32 gen7_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab) * max ddr bandwidth (kbps) = (Max bw in kbps per channel * number of channel) * max ab (Mbps) = max ddr bandwidth (kbps) / 1000 */ - max_bw = pwr->ddr_table[pwr->ddr_table_count - 1] * NUM_CHANNELS; + max_bw = pwr->ddr_table[pwr->ddr_table_count - 1] * adreno_dev->gpucore->num_ddr_channels; max_ab = max_bw / 1000; /* diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 702ac42797..f358794349 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -560,7 +560,9 @@ static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) goto err; } - if (gen7_hwsched_hfi_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE) == 1) { + if (gen7_hwsched_hfi_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE) == 1 && + !WARN_ONCE(!adreno_dev->gpucore->num_ddr_channels, + "Number of DDR channel is not specified in gpu core")) { adreno_dev->gmu_ab = true; set_bit(ADRENO_DEVICE_GMU_AB, &adreno_dev->priv); } diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 2197cce544..1734af9b70 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1861,7 +1861,9 @@ static int gen8_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto err; - if (gen8_hfi_send_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE, 0) == 1) { + if (gen8_hfi_send_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE, 0) == 1 && + !WARN_ONCE(!adreno_dev->gpucore->num_ddr_channels, + "Number of DDR channel is not specified in gpu core")) { adreno_dev->gmu_ab = true; set_bit(ADRENO_DEVICE_GMU_AB, &adreno_dev->priv); } @@ -2094,8 +2096,6 @@ static int gen8_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, return ret; } -#define NUM_CHANNELS 4 - u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab) { u16 vote = 0; @@ -2110,7 +2110,7 @@ u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab) * max ddr bandwidth (kbps) = (Max bw in kbps per channel * number of channel) * max ab (Mbps) = max ddr bandwidth (kbps) / 1000 */ - max_bw = pwr->ddr_table[pwr->ddr_table_count - 1] * NUM_CHANNELS; + max_bw = pwr->ddr_table[pwr->ddr_table_count - 1] * adreno_dev->gpucore->num_ddr_channels; max_ab = max_bw / 1000; /* diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 9be6a1eff9..ecf48e8a6b 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -581,7 +581,9 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) goto err; } - if (gen8_hwsched_hfi_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE) == 1) { + if (gen8_hwsched_hfi_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE) == 1 && + !WARN_ONCE(!adreno_dev->gpucore->num_ddr_channels, + "Number of DDR channel is not specified in gpu core")) { adreno_dev->gmu_ab = true; set_bit(ADRENO_DEVICE_GMU_AB, &adreno_dev->priv); } From d0fc8a40d2169946d5a729562bc134d98aadb9ae Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Mon, 13 May 2024 17:00:12 +0530 Subject: [PATCH 0788/1016] kgsl: Fix code comments Fix few code comments: 1. Remove obsolete structure fields comments. 2. Update few spellings. Change-Id: Id6421530adcc96ea3d21cb06d7352f9ba6a7bddf Signed-off-by: Kamal Agrawal --- adreno.c | 4 ++-- adreno_a6xx_gmu.h | 8 -------- adreno_gen7_gmu.h | 8 -------- adreno_gen8_gmu.h | 8 -------- 4 files changed, 2 insertions(+), 26 deletions(-) diff --git a/adreno.c b/adreno.c index 2da2434980..5602359d67 100644 --- a/adreno.c +++ b/adreno.c @@ -3180,8 +3180,8 @@ bool adreno_smmu_is_stalled(struct adreno_device *adreno_dev) /* * RBBM_STATUS3:SMMU_STALLED_ON_FAULT (BIT 24) to tells if GPU - * encoutnered a pagefault. Gen8 page fault status checked from - * the software condition as RBBM_STATS3 is not available. + * encountered a pagefault. Gen8 page fault status checked from + * the software condition as RBBM_STATUS3 is not available. */ if (ADRENO_GPUREV(adreno_dev) < ADRENO_REV_GEN8_0_0) { adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS3, &val); diff --git a/adreno_a6xx_gmu.h b/adreno_a6xx_gmu.h index 1e24ec84ca..990522fd76 100644 --- a/adreno_a6xx_gmu.h +++ b/adreno_a6xx_gmu.h @@ -16,19 +16,11 @@ * @ver: GMU Version information * @irq: GMU interrupt number * @fw_image: GMU FW image - * @hfi_mem: pointer to HFI shared memory * @dump_mem: pointer to GMU debug dump memory * @gmu_log: gmu event log memory * @hfi: HFI controller - * @num_gpupwrlevels: number GPU frequencies in GPU freq table - * @num_bwlevel: number of GPU BW levels - * @num_cnocbwlevel: number CNOC BW levels - * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling * @clks: GPU subsystem clocks required for GMU functionality - * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different - * than default power level * @idle_level: Minimal GPU idle power level - * @fault_count: GMU fault count * @mailbox: Messages to AOP for ACD enable/disable go through this * @log_wptr_retention: Store the log wptr offset on slumber */ diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index d1a8231e72..c56ff130b2 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -23,19 +23,11 @@ struct gen7_dcvs_table { * @ver: GMU Version information * @irq: GMU interrupt number * @fw_image: GMU FW image - * @hfi_mem: pointer to HFI shared memory * @dump_mem: pointer to GMU debug dump memory * @gmu_log: gmu event log memory * @hfi: HFI controller - * @num_gpupwrlevels: number GPU frequencies in GPU freq table - * @num_bwlevel: number of GPU BW levels - * @num_cnocbwlevel: number CNOC BW levels - * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling * @clks: GPU subsystem clocks required for GMU functionality - * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different - * than default power level * @idle_level: Minimal GPU idle power level - * @fault_count: GMU fault count * @mailbox: Messages to AOP for ACD enable/disable go through this * @log_wptr_retention: Store the log wptr offset on slumber */ diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index b468bed19f..4be7adea42 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -24,19 +24,11 @@ struct gen8_dcvs_table { * @ver: GMU Version information * @irq: GMU interrupt number * @fw_image: GMU FW image - * @hfi_mem: pointer to HFI shared memory * @dump_mem: pointer to GMU debug dump memory * @gmu_log: gmu event log memory * @hfi: HFI controller - * @num_gpupwrlevels: number GPU frequencies in GPU freq table - * @num_bwlevel: number of GPU BW levels - * @num_cnocbwlevel: number CNOC BW levels - * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling * @clks: GPU subsystem clocks required for GMU functionality - * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different - * than default power level * @idle_level: Minimal GPU idle power level - * @fault_count: GMU fault count * @log_wptr_retention: Store the log wptr offset on slumber */ struct gen8_gmu_device { From 7ec726787ca176b2fea265054e228f091a13586b Mon Sep 17 00:00:00 2001 From: Linux Image Build Automation Date: Tue, 14 May 2024 02:03:18 -0700 Subject: [PATCH 0789/1016] Revert "kgsl: gen8: Add support for Gen8_0_1" This reverts commit 468da5c03fee61ec6ad919d7834a6d56d238953d. Change-Id: I8748d725c20b8ca55f6002f6b1cd791fece1dda2 Signed-off-by: Linux Image Build Automation --- adreno-gpulist.h | 38 -------------------------------------- adreno.h | 1 - 2 files changed, 39 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 961d4b2367..9f963e2902 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2743,43 +2743,6 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .noc_timeout_us = 3410, /* 3.41 msec */ }; -static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { - .base = { - DEFINE_ADRENO_REV(ADRENO_REV_GEN8_0_1, - UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), - .compatible = "qcom,adreno-gpu-gen8-0-1", - .features = ADRENO_APRIV | ADRENO_IOCOHERENT | - ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | - ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_BCL | - ADRENO_IFPC | ADRENO_HW_FENCE | ADRENO_PREEMPTION | - ADRENO_ACD, - .gpudev = &adreno_gen8_hwsched_gpudev.base, - .perfcounters = &adreno_gen8_perfcounters, - .uche_gmem_alignment = SZ_64M, - .gmem_size = 12 * SZ_1M, - .bus_width = 32, - .snapshot_size = SZ_8M, - }, - .aqefw_name = "gen80000_aqe.fw", - .sqefw_name = "gen80000_sqe.fw", - .gmufw_name = "gen80000_gmu.bin", - .zap_name = "gen80000_zap.mbn", - .ao_hwcg = gen8_ao_hwcg_regs, - .ao_hwcg_count = ARRAY_SIZE(gen8_ao_hwcg_regs), - .gbif = gen8_0_0_gbif_cx_regs, - .gbif_count = ARRAY_SIZE(gen8_0_0_gbif_cx_regs), - .hang_detect_cycles = 0xcfffff, - .protected_regs = gen8_0_0_protected_regs, - .nonctxt_regs = gen8_0_0_nonctxt_regs, - .highest_bank_bit = 16, - .gmu_hub_clk_freq = 200000000, - .gen8_snapshot_block_list = &gen8_0_0_snapshot_block_list, - .fast_bus_hint = true, - .bcl_data = 1, - .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), - .noc_timeout_us = 3410, /* 3.41 msec */ -}; - /* GEN8_4_0 noncontext register list */ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { { GEN8_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) }, @@ -2931,6 +2894,5 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen7_14_0.base, &adreno_gpu_core_gen7_11_0.base, &adreno_gpu_core_gen8_0_0.base, - &adreno_gpu_core_gen8_0_1.base, &adreno_gpu_core_gen8_4_0.base, }; diff --git a/adreno.h b/adreno.h index d80abfa959..70351591a2 100644 --- a/adreno.h +++ b/adreno.h @@ -246,7 +246,6 @@ enum adreno_gpurev { ADRENO_REV_GEN7_14_0 = ADRENO_GPUREV_VALUE(7, 14, 0), ADRENO_REV_GEN7_11_0 = ADRENO_GPUREV_VALUE(7, 11, 0), ADRENO_REV_GEN8_0_0 = ADRENO_GPUREV_VALUE(8, 0, 0), - ADRENO_REV_GEN8_0_1 = ADRENO_GPUREV_VALUE(8, 0, 1), ADRENO_REV_GEN8_4_0 = ADRENO_GPUREV_VALUE(8, 4, 0), }; From 59a7d206c6ba84316535bb1c9e59b8406f12cfce Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Mon, 13 May 2024 14:01:04 +0530 Subject: [PATCH 0790/1016] kgsl: gen8: Update shader block dump in snapshot Replace num_slices with slice_region in gen8_shader_block. Change-Id: I1519e4b1c73aa0e39af8e6bdaa48cb974fcedac2 Signed-off-by: Kamal Agrawal --- adreno_gen8_0_0_snapshot.h | 198 ++++++++++++++++++------------------- adreno_gen8_3_0_snapshot.h | 193 ++++++++++++++++++------------------ adreno_gen8_snapshot.c | 8 +- adreno_gen8_snapshot.h | 4 +- 4 files changed, 203 insertions(+), 200 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index 6259281f80..cb924cc162 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -10,105 +10,105 @@ #include "adreno_gen8_snapshot.h" static struct gen8_shader_block gen8_0_0_shader_blocks[] = { - { TP0_TMO_DATA, 0x0200, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { TP0_SMO_DATA, 0x0080, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { TP0_MIPMAP_BASE_DATA, 0x0080, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_INST_DATA_3, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_INST_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_INST_DATA_1, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_0_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_1_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_2_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_3_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_4_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_5_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_6_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_7_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_CB_RAM, 0x0390, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_13_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_14_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_INST_TAG, 0x0100, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_INST_DATA_2, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_TMO_TAG, 0x0080, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_SMO_TAG, 0x0080, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_STATE_DATA, 0x0040, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_HWAVE_RAM, 0x0200, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_L0_INST_BUF, 0x0080, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_8_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_9_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_10_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_11_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_12_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, GEN8_0_0_NUM_PHYSICAL_SLICES, 1}, - { HLSQ_DATAPATH_DSTR_META, 0x0600, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_DATAPATH_DSTR_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_DESC_REMAP_META, 0x00A0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_DESC_REMAP_META, 0x000C, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_DESC_REMAP_META, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_SLICE_TOP_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_L2STC_TAG_RAM, 0x0200, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_L2STC_INFO_CMD, 0x0474, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_CHUNK_CPS_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CHUNK_CPS_RAM, 0x0180, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_MISC_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_MISC_RAM, 0x00B0, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_MISC_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM, 0x0200, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_MISC_RAM_TAG, 0x0060, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_MISC_RAM_TAG, 0x0060, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_MISC_RAM_TAG, 0x0600, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_MISC_RAM_TAG, 0x0012, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM_TAG, 0x0004, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0060, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_LOCAL_MISC_RAM, 0x03C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_LOCAL_MISC_RAM, 0x0280, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_LOCAL_MISC_RAM, 0x0050, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0014, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x000C, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_STPROC_META, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_SLICE_BACKEND_META, 0x0188, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_SLICE_BACKEND_META, 0x0188, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM_2, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_DATAPATH_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_INDIRECT_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_BACKEND_META, 0x0034, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_BACKEND_META, 0x0034, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { TP0_TMO_DATA, 0x0200, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { TP0_SMO_DATA, 0x0080, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { TP0_MIPMAP_BASE_DATA, 0x0080, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_DATA_3, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_DATA_1, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_0_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_1_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_2_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_3_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_4_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_5_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_6_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_7_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_CB_RAM, 0x0390, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_13_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_14_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_TAG, 0x0100, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_DATA_2, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_TMO_TAG, 0x0080, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_SMO_TAG, 0x0080, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_STATE_DATA, 0x0040, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_HWAVE_RAM, 0x0200, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_L0_INST_BUF, 0x0080, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_8_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_9_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_10_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_11_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_12_DATA, 0x0800, 2, 1, PIPE_BR, USPTP, SLICE, 1}, + { HLSQ_DATAPATH_DSTR_META, 0x0600, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DATAPATH_DSTR_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DESC_REMAP_META, 0x00A0, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DESC_REMAP_META, 0x000C, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DESC_REMAP_META, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_TOP_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_L2STC_TAG_RAM, 0x0200, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_L2STC_INFO_CMD, 0x0474, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CPS_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CPS_RAM, 0x0180, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM, 0x00B0, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM, 0x0200, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_MISC_RAM_TAG, 0x0060, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_MISC_RAM_TAG, 0x0060, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM_TAG, 0x0600, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM_TAG, 0x0012, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_TAG, 0x0004, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0060, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x03C0, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x0280, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x0050, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0014, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x000C, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_STPROC_META, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_BACKEND_META, 0x0188, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_BACKEND_META, 0x0188, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_2, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DATAPATH_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INDIRECT_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_BACKEND_META, 0x0034, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_BACKEND_META, 0x0034, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, }; /* diff --git a/adreno_gen8_3_0_snapshot.h b/adreno_gen8_3_0_snapshot.h index 71a29e2349..6eb5182780 100644 --- a/adreno_gen8_3_0_snapshot.h +++ b/adreno_gen8_3_0_snapshot.h @@ -64,103 +64,104 @@ static const u32 gen8_3_0_debugbus_blocks[] = { DEBUGBUS_HLSQ_GC_S_0_I_0, DEBUGBUS_VFD_GC_S_0_I_0, }; + static struct gen8_shader_block gen8_3_0_shader_blocks[] = { - { TP0_TMO_DATA, 0x0200, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { TP0_SMO_DATA, 0x0080, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { TP0_MIPMAP_BASE_DATA, 0x0080, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_INST_DATA_3, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_INST_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_INST_DATA_1, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_0_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_1_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_2_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_3_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_4_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_5_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_6_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_7_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_CB_RAM, 0x0390, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_13_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_14_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_INST_TAG, 0x0100, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_INST_DATA_2, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_TMO_TAG, 0x0080, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_SMO_TAG, 0x0080, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_STATE_DATA, 0x0040, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_HWAVE_RAM, 0x0100, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_L0_INST_BUF, 0x0080, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_8_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_9_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_10_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_11_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { SP_LB_12_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, GEN8_3_0_NUM_PHYSICAL_SLICES, 1}, - { HLSQ_DATAPATH_DSTR_META, 0x0170, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_DATAPATH_DSTR_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_DESC_REMAP_META, 0x0018, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_DESC_REMAP_META, 0x000C, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_DESC_REMAP_META, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_SLICE_TOP_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_L2STC_TAG_RAM, 0x0200, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_L2STC_INFO_CMD, 0x0474, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_CHUNK_CPS_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_MISC_RAM, 0x0640, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_MISC_RAM, 0x00B0, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_MISC_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM, 0x0200, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CVS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_CPS_MISC_RAM_TAG, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM_TAG, 0x0014, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM_TAG, 0x0004, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_LOCAL_MISC_RAM, 0x03C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_LOCAL_MISC_RAM, 0x0280, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_LOCAL_MISC_RAM, 0x0050, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0008, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_STPROC_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_INST_RAM_2, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_DATAPATH_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, - { HLSQ_INDIRECT_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1}, - { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1}, - { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1}, + { TP0_TMO_DATA, 0x0200, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { TP0_SMO_DATA, 0x0080, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { TP0_MIPMAP_BASE_DATA, 0x0080, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_DATA_3, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_DATA_1, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_0_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_1_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_2_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_3_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_4_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_5_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_6_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_7_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_CB_RAM, 0x0390, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_13_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_14_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_TAG, 0x0100, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_DATA_2, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_TMO_TAG, 0x0080, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_SMO_TAG, 0x0080, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_STATE_DATA, 0x0040, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_HWAVE_RAM, 0x0200, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_L0_INST_BUF, 0x0080, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_8_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_9_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_10_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_11_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_12_DATA, 0x0800, 1, 1, PIPE_BR, USPTP, SLICE, 1}, + { HLSQ_DATAPATH_DSTR_META, 0x0600, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DATAPATH_DSTR_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DESC_REMAP_META, 0x00A0, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DESC_REMAP_META, 0x000C, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DESC_REMAP_META, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_TOP_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_L2STC_TAG_RAM, 0x0200, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_L2STC_INFO_CMD, 0x0474, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CPS_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM, 0x00B0, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM, 0x0200, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_MISC_RAM_TAG, 0x0060, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_MISC_RAM_TAG, 0x0060, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM_TAG, 0x0600, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM_TAG, 0x0012, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_TAG, 0x0004, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0060, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x03C0, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x0280, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x0050, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0014, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x000C, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_STPROC_META, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_BACKEND_META, 0x0188, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_BACKEND_META, 0x0188, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_2, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DATAPATH_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INDIRECT_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_BACKEND_META, 0x0034, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_BACKEND_META, 0x0034, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, }; /* diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index bf263808f4..74283d7dd9 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -635,8 +635,9 @@ static void gen8_snapshot_shader(struct kgsl_device *device, if (CD_SCRIPT_CHECK(device)) { for (i = 0; i < num_shader_blocks; i++) { struct gen8_shader_block *block = &shader_blocks[i]; + u32 slices = NUMBER_OF_SLICES(block->slice_region, ADRENO_DEVICE(device)); - for (slice = 0; slice < block->num_slices; slice++) { + for (slice = 0; slice < slices; slice++) { for (sp = 0; sp < block->num_sps; sp++) { for (usptp = 0; usptp < block->num_usptps; usptp++) { info.block = block; @@ -660,12 +661,13 @@ static void gen8_snapshot_shader(struct kgsl_device *device, for (i = 0; i < num_shader_blocks; i++) { struct gen8_shader_block *block = &shader_blocks[i]; + u32 slices = NUMBER_OF_SLICES(block->slice_region, ADRENO_DEVICE(device)); /* Build the crash script */ ptr = gen8_capturescript->hostptr; offset = 0; - for (slice = 0; slice < block->num_slices; slice++) { + for (slice = 0; slice < slices; slice++) { for (sp = 0; sp < block->num_sps; sp++) { for (usptp = 0; usptp < block->num_usptps; usptp++) { /* Program the aperture */ @@ -689,7 +691,7 @@ static void gen8_snapshot_shader(struct kgsl_device *device, func = gen8_snapshot_shader_memory; offset = 0; - for (slice = 0; slice < block->num_slices; slice++) { + for (slice = 0; slice < slices; slice++) { for (sp = 0; sp < block->num_sps; sp++) { for (usptp = 0; usptp < block->num_usptps; usptp++) { info.block = block; diff --git a/adreno_gen8_snapshot.h b/adreno_gen8_snapshot.h index bacbb4ad0a..f4f2e61e3a 100644 --- a/adreno_gen8_snapshot.h +++ b/adreno_gen8_snapshot.h @@ -79,8 +79,8 @@ struct gen8_shader_block { u32 pipeid; /* location: Location identifier for the block data */ u32 location; - /* num_slices: the number of slices to dump */ - u32 num_slices; + /* slice_region: slice region, if SLICE then loop over all slices */ + u32 slice_region; /* num_ctx: repeat id to loop */ u32 num_ctx; /* offset: The offset in the snasphot dump */ From 6331aaef011d47fe758c80240b4790389fb99b5a Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 16 May 2024 17:47:02 +0530 Subject: [PATCH 0791/1016] kgsl: gen8: Disable write slow pointer in data phase queue There is a possibility of deadlock between slow interface clients in UCHE. Fix this by disabling write slow pointer in data phase queue. Change-Id: I71ceda95fd50d5f573ffec94bf6d101af9d863e2 Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 6 ++++++ adreno_gen8.c | 2 ++ 2 files changed, 8 insertions(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index e54067f53c..4e108e4370 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2673,6 +2673,8 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { /* Limit gmem number of ways for GMEM requests in each set */ { GEN8_UCHE_CCHE_CACHE_WAYS, 0x00000800, BIT(PIPE_NONE)}, { GEN8_UCHE_CACHE_WAYS, 0x00080000, BIT(PIPE_NONE) }, + /* Disable write slow pointer in data phase queue */ + { GEN8_UCHE_HW_DBG_CNTL, BIT(8), BIT(PIPE_NONE) }, /* Configure UCHE to CCU switchthreshold timeout cycles */ { GEN8_UCHE_VARB_IDLE_TIMEOUT, 0x00000020, BIT(PIPE_NONE) }, { GEN8_VFD_DBG_ECO_CNTL, 0x00008000, BIT(PIPE_BR) | BIT(PIPE_BV) }, @@ -2867,6 +2869,8 @@ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { /* Limit gmem number of ways for GMEM requests in each set */ { GEN8_UCHE_CCHE_CACHE_WAYS, 0x00000800, BIT(PIPE_NONE)}, { GEN8_UCHE_CACHE_WAYS, 0x00080000, BIT(PIPE_NONE) }, + /* Disable write slow pointer in data phase queue */ + { GEN8_UCHE_HW_DBG_CNTL, BIT(8), BIT(PIPE_NONE) }, /* Configure UCHE to CCU switchthreshold timeout cycles */ { GEN8_UCHE_VARB_IDLE_TIMEOUT, 0x00000020, BIT(PIPE_NONE) }, { GEN8_VFD_DBG_ECO_CNTL, 0x00008000, BIT(PIPE_BR) | BIT(PIPE_BV) }, @@ -2965,6 +2969,8 @@ static const struct gen8_nonctxt_regs gen8_3_0_nonctxt_regs[] = { { GEN8_UCHE_MODE_CNTL, 0x00020000, BIT(PIPE_NONE) }, { GEN8_UCHE_CCHE_MODE_CNTL, 0x00001000, BIT(PIPE_NONE) }, { GEN8_UCHE_CCHE_CACHE_WAYS, 0x00000800, BIT(PIPE_NONE) }, + /* Disable write slow pointer in data phase queue */ + { GEN8_UCHE_HW_DBG_CNTL, BIT(8), BIT(PIPE_NONE) }, { GEN8_UCHE_CACHE_WAYS, 0x00080000, BIT(PIPE_NONE) }, { GEN8_UCHE_VARB_IDLE_TIMEOUT, 0x00000020, BIT(PIPE_NONE) }, { GEN8_VFD_DBG_ECO_CNTL, 0x00008000, BIT(PIPE_BV) | BIT(PIPE_BR) }, diff --git a/adreno_gen8.c b/adreno_gen8.c index dd51d82b22..cecd975836 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -34,6 +34,7 @@ static const u32 gen8_pwrup_reglist[] = { GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_HI, GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_LO, GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_HI, + GEN8_UCHE_HW_DBG_CNTL, GEN8_UCHE_WRITE_THRU_BASE_LO, GEN8_UCHE_WRITE_THRU_BASE_HI, GEN8_UCHE_TRAP_BASE_LO, @@ -58,6 +59,7 @@ static const u32 gen8_3_0_pwrup_reglist[] = { GEN8_UCHE_CCHE_CACHE_WAYS, GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_LO, GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_HI, + GEN8_UCHE_HW_DBG_CNTL, GEN8_UCHE_WRITE_THRU_BASE_LO, GEN8_UCHE_WRITE_THRU_BASE_HI, GEN8_UCHE_TRAP_BASE_LO, From 1f669e5817f95cfa60dbb5c1cdfb421ec2d28ce5 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Fri, 10 May 2024 11:56:23 -0700 Subject: [PATCH 0792/1016] kgsl: snapshot: Implement common gmu version and memory dump Currently gmu version and memory dump functions are implemented in target-specific code. As there is no hardware dependency implement them in common code to reuse across targets. Change-Id: I87deb073ddb272291e63517eeb5466deac4083ca Signed-off-by: Hareesh Gundu --- adreno_a6xx_gmu_snapshot.c | 62 ++---------------------------------- adreno_gen7_gmu.h | 12 ------- adreno_gen7_gmu_snapshot.c | 64 ++------------------------------------ adreno_gen7_hwsched.c | 4 +-- adreno_gen8_gmu.h | 12 ------- adreno_gen8_gmu_snapshot.c | 61 ++---------------------------------- adreno_gen8_hwsched.c | 4 +-- adreno_snapshot.c | 51 ++++++++++++++++++++++++++++++ adreno_snapshot.h | 28 ++++++++++++++--- kgsl_snapshot.h | 10 ++++++ 10 files changed, 97 insertions(+), 211 deletions(-) diff --git a/adreno_a6xx_gmu_snapshot.c b/adreno_a6xx_gmu_snapshot.c index 40fde7369d..839c60d956 100644 --- a/adreno_a6xx_gmu_snapshot.c +++ b/adreno_a6xx_gmu_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "a6xx_reg.h" @@ -154,37 +154,6 @@ static const unsigned int a650_rscc_registers[] = { 0x3915B, 0x3915B, }; -static size_t a6xx_snapshot_gmu_mem(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv) -{ - struct kgsl_snapshot_gmu_mem *mem_hdr = - (struct kgsl_snapshot_gmu_mem *)buf; - unsigned int *data = (unsigned int *) - (buf + sizeof(*mem_hdr)); - struct gmu_mem_type_desc *desc = priv; - - if (priv == NULL || desc->memdesc->hostptr == NULL) - return 0; - - if (remain < desc->memdesc->size + sizeof(*mem_hdr)) { - dev_err(device->dev, - "snapshot: Not enough memory for the gmu section %d\n", - desc->type); - return 0; - } - - memset(mem_hdr, 0, sizeof(*mem_hdr)); - mem_hdr->type = desc->type; - mem_hdr->hostaddr = (uintptr_t)desc->memdesc->hostptr; - mem_hdr->gmuaddr = desc->memdesc->gmuaddr; - mem_hdr->gpuaddr = 0; - - /* Just copy the ringbuffer, there are no active IBs */ - memcpy(data, desc->memdesc->hostptr, desc->memdesc->size); - - return desc->memdesc->size + sizeof(*mem_hdr); -} - static size_t a6xx_gmu_snapshot_dtcm(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) { @@ -268,35 +237,10 @@ static void a6xx_gmu_snapshot_memories(struct kgsl_device *device, kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, - snapshot, a6xx_snapshot_gmu_mem, &desc); + snapshot, adreno_snapshot_gmu_mem, &desc); } } -struct kgsl_snapshot_gmu_version { - uint32_t type; - uint32_t value; -}; - -static size_t a6xx_snapshot_gmu_version(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv) -{ - struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; - uint32_t *data = (uint32_t *) (buf + sizeof(*header)); - struct kgsl_snapshot_gmu_version *ver = priv; - - if (remain < DEBUG_SECTION_SZ(1)) { - SNAPSHOT_ERR_NOMEM(device, "GMU Version"); - return 0; - } - - header->type = ver->type; - header->size = 1; - - *data = ver->value; - - return DEBUG_SECTION_SZ(1); -} - static void a6xx_gmu_snapshot_versions(struct kgsl_device *device, struct a6xx_gmu_device *gmu, struct kgsl_snapshot *snapshot) @@ -318,7 +262,7 @@ static void a6xx_gmu_snapshot_versions(struct kgsl_device *device, for (i = 0; i < ARRAY_SIZE(gmu_vers); i++) kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, a6xx_snapshot_gmu_version, + snapshot, adreno_snapshot_gmu_version, &gmu_vers[i]); } diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 423308bfc2..b2930a9f70 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -484,18 +484,6 @@ void gen7_gmu_send_nmi(struct kgsl_device *device, bool force, */ int gen7_gmu_add_to_minidump(struct adreno_device *adreno_dev); -/** - * gen7_snapshot_gmu_mem - Snapshot a GMU memory descriptor - * @device: Pointer to the kgsl device - * @buf: Destination snapshot buffer - * @remain: Remaining size of the snapshot buffer - * @priv: Opaque handle - * - * Return: Number of bytes written to snapshot buffer - */ -size_t gen7_snapshot_gmu_mem(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv); - /** * gen7_bus_ab_quantize - Calculate the AB vote that needs to be sent to GMU * @adreno_dev: Handle to the adreno device diff --git a/adreno_gen7_gmu_snapshot.c b/adreno_gen7_gmu_snapshot.c index dede529959..d46a430145 100644 --- a/adreno_gen7_gmu_snapshot.c +++ b/adreno_gen7_gmu_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "gen7_reg.h" @@ -13,39 +13,6 @@ #include "adreno_gen7_2_0_snapshot.h" #include "kgsl_device.h" -size_t gen7_snapshot_gmu_mem(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv) -{ - struct kgsl_snapshot_gmu_mem *mem_hdr = - (struct kgsl_snapshot_gmu_mem *)buf; - unsigned int *data = (unsigned int *) - (buf + sizeof(*mem_hdr)); - struct gmu_mem_type_desc *desc = priv; - - if (priv == NULL || desc->memdesc->hostptr == NULL) - return 0; - - if (remain < desc->memdesc->size + sizeof(*mem_hdr)) { - dev_err(device->dev, - "snapshot: Not enough memory for the gmu section %d\n", - desc->type); - return 0; - } - - mem_hdr->type = desc->type; - mem_hdr->hostaddr = (u64)(uintptr_t)desc->memdesc->hostptr; - mem_hdr->gmuaddr = desc->memdesc->gmuaddr; - mem_hdr->gpuaddr = 0; - - /* The hw fence queues are mapped as iomem in the kernel */ - if (desc->type == SNAPSHOT_GMU_MEM_HW_FENCE) - memcpy_fromio(data, desc->memdesc->hostptr, desc->memdesc->size); - else - memcpy(data, desc->memdesc->hostptr, desc->memdesc->size); - - return desc->memdesc->size + sizeof(*mem_hdr); -} - static size_t gen7_gmu_snapshot_dtcm(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) { @@ -147,35 +114,10 @@ static void gen7_gmu_snapshot_memories(struct kgsl_device *device, kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, - snapshot, gen7_snapshot_gmu_mem, &desc); + snapshot, adreno_snapshot_gmu_mem, &desc); } } -struct kgsl_snapshot_gmu_version { - u32 type; - u32 value; -}; - -static size_t gen7_snapshot_gmu_version(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv) -{ - struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; - u32 *data = (u32 *) (buf + sizeof(*header)); - struct kgsl_snapshot_gmu_version *ver = priv; - - if (remain < DEBUG_SECTION_SZ(1)) { - SNAPSHOT_ERR_NOMEM(device, "GMU Version"); - return 0; - } - - header->type = ver->type; - header->size = 1; - - *data = ver->value; - - return DEBUG_SECTION_SZ(1); -} - static void gen7_gmu_snapshot_versions(struct kgsl_device *device, struct gen7_gmu_device *gmu, struct kgsl_snapshot *snapshot) @@ -197,7 +139,7 @@ static void gen7_gmu_snapshot_versions(struct kgsl_device *device, for (i = 0; i < ARRAY_SIZE(gmu_vers); i++) kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, gen7_snapshot_gmu_version, + snapshot, adreno_snapshot_gmu_version, &gmu_vers[i]); } diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 65b40807f2..09e5ecb4c3 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -326,7 +326,7 @@ static int snapshot_context_queue(int id, void *ptr, void *data) kgsl_snapshot_add_section(context->device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, - snapshot, gen7_snapshot_gmu_mem, &desc); + snapshot, adreno_snapshot_gmu_mem, &desc); return 0; } @@ -439,7 +439,7 @@ void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, - snapshot, gen7_snapshot_gmu_mem, &desc); + snapshot, adreno_snapshot_gmu_mem, &desc); } } diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index 88c45603f7..73f9a2d6c2 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -470,18 +470,6 @@ void gen8_gmu_send_nmi(struct kgsl_device *device, bool force, */ int gen8_gmu_add_to_minidump(struct adreno_device *adreno_dev); -/** - * gen8_snapshot_gmu_mem - Snapshot a GMU memory descriptor - * @device: Pointer to the kgsl device - * @buf: Destination snapshot buffer - * @remain: Remaining size of the snapshot buffer - * @priv: Opaque handle - * - * Return: Number of bytes written to snapshot buffer - */ -size_t gen8_snapshot_gmu_mem(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv); - /** * gen8_bus_ab_quantize - Calculate the AB vote that needs to be sent to GMU * @adreno_dev: Handle to the adreno device diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index 53eeb5f857..4619c1f26d 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -12,38 +12,6 @@ #include "gen8_reg.h" #include "kgsl_device.h" -size_t gen8_snapshot_gmu_mem(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv) -{ - struct kgsl_snapshot_gmu_mem *mem_hdr = - (struct kgsl_snapshot_gmu_mem *)buf; - u32 *data = (u32 *)(buf + sizeof(*mem_hdr)); - struct gmu_mem_type_desc *desc = priv; - - if (priv == NULL || desc->memdesc->hostptr == NULL) - return 0; - - if (remain < desc->memdesc->size + sizeof(*mem_hdr)) { - dev_err(device->dev, - "snapshot: Not enough memory for the gmu section %d\n", - desc->type); - return 0; - } - - mem_hdr->type = desc->type; - mem_hdr->hostaddr = (u64)(uintptr_t)desc->memdesc->hostptr; - mem_hdr->gmuaddr = desc->memdesc->gmuaddr; - mem_hdr->gpuaddr = 0; - - /* The hw fence queues are mapped as iomem in the kernel */ - if (desc->type == SNAPSHOT_GMU_MEM_HW_FENCE) - memcpy_fromio(data, desc->memdesc->hostptr, desc->memdesc->size); - else - memcpy(data, desc->memdesc->hostptr, desc->memdesc->size); - - return desc->memdesc->size + sizeof(*mem_hdr); -} - static size_t gen8_gmu_snapshot_dtcm(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) { @@ -137,35 +105,10 @@ static void gen8_gmu_snapshot_memories(struct kgsl_device *device, kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, - snapshot, gen8_snapshot_gmu_mem, &desc); + snapshot, adreno_snapshot_gmu_mem, &desc); } } -struct kgsl_snapshot_gmu_version { - u32 type; - u32 value; -}; - -static size_t gen8_snapshot_gmu_version(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv) -{ - struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; - u32 *data = (u32 *) (buf + sizeof(*header)); - struct kgsl_snapshot_gmu_version *ver = priv; - - if (remain < DEBUG_SECTION_SZ(1)) { - SNAPSHOT_ERR_NOMEM(device, "GMU Version"); - return 0; - } - - header->type = ver->type; - header->size = 1; - - *data = ver->value; - - return DEBUG_SECTION_SZ(1); -} - static void gen8_gmu_snapshot_versions(struct kgsl_device *device, struct gen8_gmu_device *gmu, struct kgsl_snapshot *snapshot) @@ -187,7 +130,7 @@ static void gen8_gmu_snapshot_versions(struct kgsl_device *device, for (i = 0; i < ARRAY_SIZE(gmu_vers); i++) kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, gen8_snapshot_gmu_version, + snapshot, adreno_snapshot_gmu_version, &gmu_vers[i]); } diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 20d722c0a7..774a8bf01b 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -286,7 +286,7 @@ static int snapshot_context_queue(int id, void *ptr, void *data) desc.type = SNAPSHOT_GMU_MEM_CONTEXT_QUEUE; kgsl_snapshot_add_section(context->device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, - snapshot, gen8_snapshot_gmu_mem, &desc); + snapshot, adreno_snapshot_gmu_mem, &desc); return 0; } @@ -394,7 +394,7 @@ void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, desc.type = SNAPSHOT_GMU_MEM_HW_FENCE; kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, - snapshot, gen8_snapshot_gmu_mem, &desc); + snapshot, adreno_snapshot_gmu_mem, &desc); } } diff --git a/adreno_snapshot.c b/adreno_snapshot.c index 16bae92bb2..ae0328ca1f 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -1299,3 +1299,54 @@ size_t adreno_snapshot_registers_v2(struct kgsl_device *device, u8 *buf, return (count * 4); } +size_t adreno_snapshot_gmu_version(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + u32 *data = (u32 *) (buf + sizeof(*header)); + struct kgsl_snapshot_gmu_version *ver = priv; + + if (remain < DEBUG_SECTION_SZ(1)) { + SNAPSHOT_ERR_NOMEM(device, "GMU Version"); + return 0; + } + + header->type = ver->type; + header->size = 1; + + *data = ver->value; + + return DEBUG_SECTION_SZ(1); +} + +size_t adreno_snapshot_gmu_mem(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_gmu_mem *mem_hdr = + (struct kgsl_snapshot_gmu_mem *)buf; + void *data = buf + sizeof(*mem_hdr); + struct gmu_mem_type_desc *desc = priv; + + if (priv == NULL || desc->memdesc->hostptr == NULL) + return 0; + + if (remain < desc->memdesc->size + sizeof(*mem_hdr)) { + dev_err(device->dev, + "snapshot: Not enough memory for the gmu section %d\n", + desc->type); + return 0; + } + + mem_hdr->type = desc->type; + mem_hdr->hostaddr = (u64)(uintptr_t)desc->memdesc->hostptr; + mem_hdr->gmuaddr = desc->memdesc->gmuaddr; + mem_hdr->gpuaddr = 0; + + /* The hw fence queues are mapped as iomem in the kernel */ + if (desc->type == SNAPSHOT_GMU_MEM_HW_FENCE) + memcpy_fromio(data, desc->memdesc->hostptr, desc->memdesc->size); + else + memcpy(data, desc->memdesc->hostptr, desc->memdesc->size); + + return desc->memdesc->size + sizeof(*mem_hdr); +} diff --git a/adreno_snapshot.h b/adreno_snapshot.h index f85d5b6666..a5d4cbc8ff 100644 --- a/adreno_snapshot.h +++ b/adreno_snapshot.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2013-2015,2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_SNAPSHOT_H #define __ADRENO_SNAPSHOT_H @@ -10,9 +10,6 @@ #define CP_CRASH_DUMPER_TIMEOUT 500 -#define DEBUG_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \ - + sizeof(struct kgsl_snapshot_debug)) - #define SHADER_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \ + sizeof(struct kgsl_snapshot_shader)) @@ -131,4 +128,27 @@ void adreno_snapshot_dump_all_ibs(struct kgsl_device *device, unsigned int *rbptr, struct kgsl_snapshot *snapshot); +/** + * adreno_snapshot_gmu_version - To dump gmu version info to snapshot buffer + * @device: Pointer to the kgsl device + * @buf: Destination snapshot buffer + * @remain: Remaining size of the snapshot buffer + * @priv: Opaque handle + * + * Return: Number of bytes written to snapshot buffer + */ +size_t adreno_snapshot_gmu_version(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv); + +/** + * kgsl_snapshot_gmu_mem - Snapshot a GMU memory descriptor + * @device: Pointer to the kgsl device + * @buf: Destination snapshot buffer + * @remain: Remaining size of the snapshot buffer + * @priv: Opaque handle + * + * Return: Number of bytes written to snapshot buffer + */ +size_t adreno_snapshot_gmu_mem(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv); #endif /*__ADRENO_SNAPSHOT_H */ diff --git a/kgsl_snapshot.h b/kgsl_snapshot.h index 52807bfc57..dbd9d18219 100644 --- a/kgsl_snapshot.h +++ b/kgsl_snapshot.h @@ -14,6 +14,9 @@ /* High word is static, low word is snapshot version ID */ #define SNAPSHOT_MAGIC 0x504D0002 +#define DEBUG_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \ + + sizeof(struct kgsl_snapshot_debug)) + /* GPU ID scheme: * [16:31] - core identifer (0x0002 for 2D or 0x0003 for 3D) * [00:16] - GPU specific identifier @@ -308,6 +311,13 @@ struct kgsl_snapshot_eventlog { u32 size; } __packed; +struct kgsl_snapshot_gmu_version { + /** @type: Type of the GMU version buffer */ + u32 type; + /** @value: GMU FW version value */ + u32 value; +} __packed; + struct kgsl_device; struct kgsl_process_private; From 673f4006a7640900604da322476671e79cfcfd82 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 23 May 2024 21:06:25 +0530 Subject: [PATCH 0793/1016] kgsl: Reseed GPU CX timer after suspend resume When the device enters in suspend state, the CX can be collapsed causing the GPU CX timer to pause. To ensure synchronization with the CPU timer, reseed the GPU CX timer after resuming from suspend or hibernation. This ensures that the GPU CX timer remains in sync with the CPU timer. Change-Id: Idda014c1878e22eebc4c500fbc9d836fe6d068a4 Signed-off-by: Pankaj Gupta --- adreno.c | 13 +++++++++++++ adreno_gen7.c | 5 ++++- adreno_gen7_gmu.c | 3 +++ adreno_gen7_hwsched.c | 3 +++ adreno_gen8.c | 2 +- adreno_gen8_gmu.c | 3 +++ adreno_gen8_hwsched.c | 3 +++ 7 files changed, 30 insertions(+), 2 deletions(-) diff --git a/adreno.c b/adreno.c index fa4313188a..660b19ca74 100644 --- a/adreno.c +++ b/adreno.c @@ -1611,6 +1611,12 @@ static int adreno_pm_suspend(struct device *dev) if (status) return status; + /* + * When the device enters in suspend state, the CX can be collapsed causing + * the GPU CX timer to pause. Clear the ADRENO_DEVICE_CX_TIMER_INITIALIZED + * flag to ensure that the CX timer is reseeded during resume. + */ + clear_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv); kgsl_reclaim_close(); kthread_flush_worker(device->events_worker); flush_workqueue(kgsl_driver.lockless_workqueue); @@ -3713,6 +3719,13 @@ static int adreno_hibernation_suspend(struct device *dev) if (status) goto err; + /* + * When the device enters in hibernation state, the CX will be collapsed causing + * the GPU CX timer to pause. Clear the ADRENO_DEVICE_CX_TIMER_INITIALIZED flag + * to ensure that the CX timer is reseeded during resume. + */ + clear_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv); + /* * Unload zap shader during device hibernation and reload it * during resume as there is possibility that TZ driver diff --git a/adreno_gen7.c b/adreno_gen7.c index c1209416e3..27b01d2fb4 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -353,7 +353,10 @@ void gen7_cx_timer_init(struct adreno_device *adreno_dev) int i; unsigned long flags; - /* Only gen7_9_x has the CX timer. Set it up just once */ + /* + * Only gen7_9_x has the CX timer. Set it up during first boot or + * after suspend resume. + */ if (!adreno_is_gen7_9_x(adreno_dev) || test_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv)) return; diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 69927d832a..a04b729edf 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -2076,6 +2076,9 @@ static int gen7_gmu_boot(struct adreno_device *adreno_dev) */ gen7_enable_ahb_timeout_detection(adreno_dev); + /* Initialize the CX timer */ + gen7_cx_timer_init(adreno_dev); + ret = gen7_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 65b40807f2..40df7cbf55 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -597,6 +597,9 @@ static int gen7_hwsched_gmu_boot(struct adreno_device *adreno_dev) */ gen7_enable_ahb_timeout_detection(adreno_dev); + /* Initialize the CX timer */ + gen7_cx_timer_init(adreno_dev); + ret = gen7_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_gen8.c b/adreno_gen8.c index d56e6d69c6..5683574df3 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -740,7 +740,7 @@ void gen8_cx_timer_init(struct adreno_device *adreno_dev) int i; unsigned long flags; - /* Set up the CX timer just once */ + /* Set it up during first boot or after suspend resume */ if (test_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv)) return; diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index e7680698bf..cd94f3c505 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1927,6 +1927,9 @@ static int gen8_gmu_boot(struct adreno_device *adreno_dev) */ gen8_enable_ahb_timeout_detection(adreno_dev); + /* Initialize the CX timer */ + gen8_cx_timer_init(adreno_dev); + ret = gen8_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 20d722c0a7..32887d4e65 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -619,6 +619,9 @@ static int gen8_hwsched_gmu_boot(struct adreno_device *adreno_dev) */ gen8_enable_ahb_timeout_detection(adreno_dev); + /* Initialize the CX timer */ + gen8_cx_timer_init(adreno_dev); + ret = gen8_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; From 32aa497142fc8b443acc9295be4acc6b3c267513 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Tue, 7 May 2024 12:28:07 -0700 Subject: [PATCH 0794/1016] msm: kgsl: Remove duplicate bind operation code The bind operation clean up code is duplicated in the worker and the destroy functions. Instead of doing the same steps at both places, simplify the code and do the clean up operations only in the destroy function. Change-Id: I14c9e4f31b40cf6c8062c4427461da3db7d2c097 Signed-off-by: Lynus Vaz --- kgsl_vbo.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/kgsl_vbo.c b/kgsl_vbo.c index bf72c139db..d27b7de4e0 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -369,10 +369,13 @@ static void kgsl_sharedmem_free_bind_op(struct kgsl_sharedmem_bind_op *op) /* Decrement the vbo_count we added when creating the bind_op */ if (op->ops[i].entry) atomic_dec(&op->ops[i].entry->vbo_count); - kgsl_mem_entry_put(op->ops[i].entry); + + /* Release the reference on the child entry */ + kgsl_mem_entry_put_deferred(op->ops[i].entry); } - kgsl_mem_entry_put(op->target); + /* Release the reference on the target entry */ + kgsl_mem_entry_put_deferred(op->target); kvfree(op->ops); kfree(op); @@ -559,23 +562,16 @@ static void kgsl_sharedmem_bind_worker(struct work_struct *work) op->ops[i].start, op->ops[i].last, op->ops[i].entry); - - /* Release the reference on the child entry */ - kgsl_mem_entry_put(op->ops[i].entry); - op->ops[i].entry = NULL; } - /* Release the reference on the target entry */ - kgsl_mem_entry_put(op->target); - op->target = NULL; - /* Wake up any threads waiting for the bind operation */ complete_all(&op->comp); if (op->callback) op->callback(op); - kref_put(&op->ref, kgsl_sharedmem_bind_range_destroy); + /* Put the refcount we took when scheduling the worker */ + kgsl_sharedmem_put_bind_op(op); } void kgsl_sharedmem_bind_ranges(struct kgsl_sharedmem_bind_op *op) From 0f188db27fe37341062f70e721ed143324ac465c Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Wed, 8 May 2024 00:51:47 +0530 Subject: [PATCH 0795/1016] kgsl: Detach context when user context probe fails When there is a failure during user context probe after setting the context, detach the context during cleanup to ensure proper error handling. Change-Id: Ib0417534dfc79af44972975b44c4ae67ad217336 Signed-off-by: Pankaj Gupta --- kgsl_iommu.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 2d102b6f2e..0f85f7a9e0 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -2537,8 +2537,10 @@ static int iommu_probe_user_context(struct kgsl_device *device, /* Make the default pagetable */ mmu->defaultpagetable = kgsl_iommu_default_pagetable(mmu); - if (IS_ERR(mmu->defaultpagetable)) - return PTR_ERR(mmu->defaultpagetable); + if (IS_ERR(mmu->defaultpagetable)) { + ret = PTR_ERR(mmu->defaultpagetable); + goto err; + } /* If IOPGTABLE isn't enabled then we are done */ if (!test_bit(KGSL_MMU_IOPGTABLE, &mmu->features)) @@ -2556,16 +2558,15 @@ static int iommu_probe_user_context(struct kgsl_device *device, kgsl_iommu_set_ttbr0(&iommu->lpac_context, mmu, &pt->info.cfg); ret = kgsl_set_smmu_lpac_aperture(device, &iommu->lpac_context); - if (ret < 0) { - kgsl_iommu_detach_context(&iommu->lpac_context); + if (ret < 0) goto err; - } return 0; err: kgsl_mmu_putpagetable(mmu->defaultpagetable); mmu->defaultpagetable = NULL; + kgsl_iommu_detach_context(&iommu->lpac_context); kgsl_iommu_detach_context(&iommu->user_context); return ret; From 0e87816bfb87648f8f6125e3f2dab2688e6df197 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Wed, 29 May 2024 15:38:54 -0700 Subject: [PATCH 0796/1016] kgsl: gen8: Rearrange some GMU register dumping in snapshot Ensure GX GDSC is ON to dump GXCLKCNTL registers and Debugbus in snapshot. Change-Id: I711df499a2499e7114044a9ef37b55c563f2939c Signed-off-by: Urvashi Agrawal --- adreno_gen8_0_0_snapshot.h | 21 ++++--- adreno_gen8_snapshot.c | 123 ++++++++++++++++++++----------------- 2 files changed, 77 insertions(+), 67 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index cb924cc162..5b09838f4d 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -1883,11 +1883,6 @@ static struct gen8_reg_list gen8_0_0_ahb_registers[] = { { UNSLICE, gen8_0_0_ahb_secure_gpu_registers }, }; -static struct gen8_reg_list gen8_gmu_gx_registers[] = { - { UNSLICE, gen8_0_0_gmugx_registers }, - { SLICE, gen8_0_0_gmugx_slice_registers }, -}; - /* * Block : ['GDPM_LKG'] * REGION : UNSLICE @@ -2062,10 +2057,16 @@ static const u32 *gen8_0_0_external_core_regs[] = { gen8_0_0_gpu_cc_ahb2phy_swman_registers, gen8_0_0_gpu_cc_gpu_cc_reg_registers, gen8_0_0_gpu_cc_pll0_cm_pll_taycan_common_registers, - gen8_0_0_acd_acd_mnd_registers, - gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers, - gen8_0_0_gx_clkctl_ahb2phy_swman_registers, - gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers, - gen8_0_0_gx_clkctl_gx_clkctl_reg_registers, }; + +static struct gen8_reg_list gen8_gmu_gx_registers[] = { + { UNSLICE, gen8_0_0_gmugx_registers }, + { UNSLICE, gen8_0_0_acd_acd_mnd_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_swman_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_gx_clkctl_reg_registers }, + { SLICE, gen8_0_0_gmugx_slice_registers }, +}; + #endif /*_ADRENO_GEN8_0_0_SNAPSHOT_H */ diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 9915ffa878..eda8485035 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -219,11 +219,13 @@ size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, if (info->regs->sel) kgsl_regwrite(device, info->regs->sel->host_reg, info->regs->sel->val); - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL - (info->slice_id, 0, 0, 0)); + if (info->regs->slice_region) { + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL + (info->slice_id, 0, 0, 0)); - /* Make sure the previous writes are posted before reading */ - mb(); + /* Make sure the previous writes are posted before reading */ + mb(); + } for (ptr = info->regs->regs; ptr[0] != UINT_MAX; ptr += 2) { count = REG_COUNT(ptr); @@ -1329,6 +1331,60 @@ static size_t gen8_snapshot_cx_side_dbgc_debugbus_block(struct kgsl_device *devi return size; } +static void gen8_snapshot_cx_debugbus(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + u32 i; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, + FIELD_PREP(GENMASK(27, 24), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, + FIELD_PREP(GENMASK(3, 0), 0x0) | + FIELD_PREP(GENMASK(7, 4), 0x1) | + FIELD_PREP(GENMASK(11, 8), 0x2) | + FIELD_PREP(GENMASK(15, 12), 0x3) | + FIELD_PREP(GENMASK(19, 16), 0x4) | + FIELD_PREP(GENMASK(23, 20), 0x5) | + FIELD_PREP(GENMASK(27, 24), 0x6) | + FIELD_PREP(GENMASK(31, 28), 0x7)); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, + FIELD_PREP(GENMASK(3, 0), 0x8) | + FIELD_PREP(GENMASK(7, 4), 0x9) | + FIELD_PREP(GENMASK(11, 8), 0xa) | + FIELD_PREP(GENMASK(15, 12), 0xb) | + FIELD_PREP(GENMASK(19, 16), 0xc) | + FIELD_PREP(GENMASK(23, 20), 0xd) | + FIELD_PREP(GENMASK(27, 24), 0xe) | + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); + + /* Dump the CX debugbus data if the block exists */ + if (kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) { + for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen8_snapshot_cx_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + } + } +} + /* gen8_snapshot_debugbus() - Capture debug bus data */ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) @@ -1371,41 +1427,6 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0); kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, - FIELD_PREP(GENMASK(31, 28), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, - FIELD_PREP(GENMASK(27, 24), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, - FIELD_PREP(GENMASK(3, 0), 0x0) | - FIELD_PREP(GENMASK(7, 4), 0x1) | - FIELD_PREP(GENMASK(11, 8), 0x2) | - FIELD_PREP(GENMASK(15, 12), 0x3) | - FIELD_PREP(GENMASK(19, 16), 0x4) | - FIELD_PREP(GENMASK(23, 20), 0x5) | - FIELD_PREP(GENMASK(27, 24), 0x6) | - FIELD_PREP(GENMASK(31, 28), 0x7)); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, - FIELD_PREP(GENMASK(3, 0), 0x8) | - FIELD_PREP(GENMASK(7, 4), 0x9) | - FIELD_PREP(GENMASK(11, 8), 0xa) | - FIELD_PREP(GENMASK(15, 12), 0xb) | - FIELD_PREP(GENMASK(19, 16), 0xc) | - FIELD_PREP(GENMASK(23, 20), 0xd) | - FIELD_PREP(GENMASK(27, 24), 0xe) | - FIELD_PREP(GENMASK(31, 28), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); - for (i = 0; i < gen8_snapshot_block_list->debugbus_blocks_len; i++) { kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUGBUS, @@ -1427,20 +1448,6 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, snapshot, gen8_snapshot_dbgc_side_debugbus_block, (void *) &gen8_snapshot_block_list->gbif_debugbus_blocks[i]); } - - /* Dump the CX debugbus data if the block exists */ - if (kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) { - for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_DEBUGBUS, - snapshot, gen8_snapshot_cx_dbgc_debugbus_block, - (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, - snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, - (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); - } - } } /* gen8_snapshot_sqe() - Dump SQE data in snapshot */ @@ -1702,12 +1709,10 @@ void gen8_snapshot(struct adreno_device *adreno_dev, if (!gmu_core_isenabled(device)) gen8_snapshot_external_core_regs(device, snapshot); - gen8_snapshot_trace_buffer(device, snapshot); - - gen8_snapshot_debugbus(adreno_dev, snapshot); - gen8_cx_misc_regs_snapshot(device, snapshot); + gen8_snapshot_cx_debugbus(adreno_dev, snapshot); + /* SQE Firmware */ kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, gen8_snapshot_sqe, NULL); @@ -1719,6 +1724,10 @@ void gen8_snapshot(struct adreno_device *adreno_dev, if (!adreno_gx_is_on(adreno_dev)) return; + gen8_snapshot_trace_buffer(device, snapshot); + + gen8_snapshot_debugbus(adreno_dev, snapshot); + is_current_rt = rt_task(current); if (is_current_rt) From 2946351191e40c9f07f3045a2f804290f4db3366 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 2 May 2024 10:27:18 +0530 Subject: [PATCH 0797/1016] kgsl: gen8: Enable ACD on gen8_4_0 GPU Adaptive Clock Distribution feature helps mitigate peak current and voltage droops. Change-Id: I21429b44c4a0bbf80de511f4b40eec5f1c642617 Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 7d54013373..b04a326850 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2893,7 +2893,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_IFPC | - ADRENO_BCL, + ADRENO_BCL | ADRENO_ACD, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, From 0e31eb38c25e96adb65e1db8999f21f5c3df9019 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Tue, 7 May 2024 12:28:07 -0700 Subject: [PATCH 0798/1016] msm: kgsl: Remove duplicate bind operation code The bind operation clean up code is duplicated in the worker and the destroy functions. Instead of doing the same steps at both places, simplify the code and do the clean up operations only in the destroy function. Change-Id: I14c9e4f31b40cf6c8062c4427461da3db7d2c097 Signed-off-by: Lynus Vaz --- kgsl_vbo.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/kgsl_vbo.c b/kgsl_vbo.c index bf72c139db..d27b7de4e0 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -369,10 +369,13 @@ static void kgsl_sharedmem_free_bind_op(struct kgsl_sharedmem_bind_op *op) /* Decrement the vbo_count we added when creating the bind_op */ if (op->ops[i].entry) atomic_dec(&op->ops[i].entry->vbo_count); - kgsl_mem_entry_put(op->ops[i].entry); + + /* Release the reference on the child entry */ + kgsl_mem_entry_put_deferred(op->ops[i].entry); } - kgsl_mem_entry_put(op->target); + /* Release the reference on the target entry */ + kgsl_mem_entry_put_deferred(op->target); kvfree(op->ops); kfree(op); @@ -559,23 +562,16 @@ static void kgsl_sharedmem_bind_worker(struct work_struct *work) op->ops[i].start, op->ops[i].last, op->ops[i].entry); - - /* Release the reference on the child entry */ - kgsl_mem_entry_put(op->ops[i].entry); - op->ops[i].entry = NULL; } - /* Release the reference on the target entry */ - kgsl_mem_entry_put(op->target); - op->target = NULL; - /* Wake up any threads waiting for the bind operation */ complete_all(&op->comp); if (op->callback) op->callback(op); - kref_put(&op->ref, kgsl_sharedmem_bind_range_destroy); + /* Put the refcount we took when scheduling the worker */ + kgsl_sharedmem_put_bind_op(op); } void kgsl_sharedmem_bind_ranges(struct kgsl_sharedmem_bind_op *op) From 176f02051816902621fa14512813305ac4d14b95 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Wed, 8 May 2024 10:22:07 -0700 Subject: [PATCH 0799/1016] msm: kgsl: Pin a process in memory while creating a bind object Bind operations require the referenced mem_entry objects to be currently pinned in memory. To ensure this, reclaim the process to pinned state to bring all its mem_entry objects into memory when creating the bind object. Change-Id: I5b46891004d42bed6a0e3a0c66bcdcd25ad52321 Signed-off-by: Lynus Vaz --- kgsl_vbo.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kgsl_vbo.c b/kgsl_vbo.c index d27b7de4e0..a8232dac5a 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -12,6 +12,7 @@ #include "kgsl_device.h" #include "kgsl_mmu.h" +#include "kgsl_reclaim.h" #include "kgsl_sharedmem.h" #include "kgsl_trace.h" @@ -426,6 +427,12 @@ kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, op->nr_ops = ranges_nents; op->target = target; + /* Make sure process is pinned in memory before proceeding */ + atomic_inc(&private->cmd_count); + ret = kgsl_reclaim_to_pinned_state(private); + if (ret) + goto err; + for (i = 0; i < ranges_nents; i++) { struct kgsl_gpumem_bind_range range; struct kgsl_mem_entry *entry; @@ -526,12 +533,14 @@ kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, ranges += ranges_size; } + atomic_dec(&private->cmd_count); init_completion(&op->comp); kref_init(&op->ref); return op; err: + atomic_dec(&private->cmd_count); kgsl_sharedmem_free_bind_op(op); return ERR_PTR(ret); } From 1cff0e8a37e44b12897d5834ec50e1c82d79a400 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Wed, 22 May 2024 13:41:07 -0600 Subject: [PATCH 0800/1016] kgsl: hwfence: Take context reference in drain_guilty_context_hw_fences Say, a context is invalidated, and we drain all its hardware fences in drawctxt->hw_fence_list as part of recovery sequence. However, the KGSL_CONTEXT_PRIV_INVALID remains set. Say, as part of replay, another context causes a GPU fault and we again run recovery. If the invalidated context has not been destroyed yet, after recovery we will again get a handle to that context. And say if context gets destroyed concurrently, we will end up de-referencing invalid drawctxt->hw_fence_list pointer. To fix this, try to take context reference before de-referencing drawctxt->hw_fence_list. In case the context is in the middle of getting destroyed, but hasn't been removed from the device->context_idr, _kgsl_context_get() will fail and avoid de-referencing the invalid drawctxt->hw_fence_list pointer. Change-Id: I93d8b7ad23c8cd6a805ed82a70b1b573f15c79ae Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched.c | 16 +++++++++++++--- adreno_gen8_hwsched.c | 16 +++++++++++++--- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 348e7d00f8..4c6767ae57 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -1705,11 +1705,12 @@ static int drain_guilty_context_hw_fences(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_context *context = NULL; struct adreno_context *guilty = NULL; - int id; + int id, ret = 0; read_lock(&device->context_lock); idr_for_each_entry(&device->context_idr, context, id) { - if (test_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv)) { + if (test_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv) && + _kgsl_context_get(context)) { guilty = ADRENO_CONTEXT(context); break; } @@ -1719,7 +1720,16 @@ static int drain_guilty_context_hw_fences(struct adreno_device *adreno_dev) if (!guilty) return 0; - return gen7_hwsched_drain_context_hw_fences(adreno_dev, guilty); + /* + * We don't need drawctxt spinlock to signal these fences since the only other place + * which can access these fences is the context detach path and device mutex + * ensures mutual exclusion between recovery thread and detach thread. + */ + ret = gen7_hwsched_drain_context_hw_fences(adreno_dev, guilty); + + kgsl_context_put(&guilty->base); + + return ret; } static int handle_hw_fences_after_reset(struct adreno_device *adreno_dev) diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 5e3c1b3ef3..38e76585eb 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -1708,11 +1708,12 @@ static int drain_guilty_context_hw_fences(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_context *context = NULL; struct adreno_context *guilty = NULL; - int id; + int id, ret = 0; read_lock(&device->context_lock); idr_for_each_entry(&device->context_idr, context, id) { - if (test_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv)) { + if (test_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv) && + _kgsl_context_get(context)) { guilty = ADRENO_CONTEXT(context); break; } @@ -1722,7 +1723,16 @@ static int drain_guilty_context_hw_fences(struct adreno_device *adreno_dev) if (!guilty) return 0; - return gen8_hwsched_drain_context_hw_fences(adreno_dev, guilty); + /* + * We don't need drawctxt spinlock to signal these fences since the only other place + * which can retire these fences is the context detach path and device mutex + * ensures mutual exclusion between recovery thread and detach thread. + */ + ret = gen8_hwsched_drain_context_hw_fences(adreno_dev, guilty); + + kgsl_context_put(&guilty->base); + + return ret; } static int handle_hw_fences_after_reset(struct adreno_device *adreno_dev) From c3401ca426c64c1de3ef923783999b1ac23789eb Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 23 May 2024 14:08:48 -0600 Subject: [PATCH 0801/1016] kgsl: hwfence: Fix GMU fault when soccp vote fails Currently, if soccp vote fails during SLUMBER exit, we still end up sending hardware fences to GMU. This causes GMU fault when GMU tries to store this fence to the context's hardware fence buffer, which was not allocated by kgsl. Fix this by destroying any pending hardware fences so that they don't get sent to GMU. Also, force a cold boot since GMU doesn't need to send hardware fence related mem alloc requests. Change-Id: I17d87f25df3d3971ce773d7bf815a17e2c59746c Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched.c | 6 +++--- adreno_gen7_hwsched_hfi.c | 36 ++++++++++-------------------------- adreno_gen7_hwsched_hfi.h | 10 +--------- adreno_gen8_hwsched.c | 13 +++++++------ adreno_gen8_hwsched_hfi.c | 36 ++++++++++-------------------------- adreno_gen8_hwsched_hfi.h | 9 +-------- adreno_hwsched.c | 19 ++++++++++++++++++- adreno_hwsched.h | 14 ++++++++++++-- 8 files changed, 62 insertions(+), 81 deletions(-) diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 4c6767ae57..24f83712f1 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -1140,7 +1140,7 @@ static void drain_ctx_hw_fences_cpu(struct adreno_device *adreno_dev, spin_lock(&drawctxt->lock); list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) { kgsl_hw_fence_trigger_cpu(KGSL_DEVICE(adreno_dev), entry->kfence); - gen7_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } spin_unlock(&drawctxt->lock); } @@ -1607,7 +1607,7 @@ static void process_context_hw_fences_after_reset(struct adreno_device *adreno_d /* Delete the fences that GMU has sent to the TxQueue */ if (timestamp_cmp(hdr->out_fence_ts, (u32)entry->cmd.ts) >= 0) { - gen7_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); continue; } @@ -1692,7 +1692,7 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d context = &entry->drawctxt->base; - gen7_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); kgsl_context_put(context); } diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index ee9662cda7..c73c574608 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -146,21 +146,6 @@ static void gen7_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) MSG_HDR_GET_SEQNUM(waiters[i])); } -/* This function is called while holding the drawctxt spinlock */ -void gen7_remove_hw_fence_entry(struct adreno_device *adreno_dev, - struct adreno_hw_fence_entry *entry) -{ - struct adreno_hwsched *hwsched = &adreno_dev->hwsched; - struct adreno_context *drawctxt = entry->drawctxt; - - atomic_dec(&hwsched->hw_fence_count); - drawctxt->hw_fence_count--; - - dma_fence_put(&entry->kfence->fence); - list_del_init(&entry->node); - kmem_cache_free(hwsched->hw_fence_cache, entry); -} - static void _retire_inflight_hw_fences(struct adreno_device *adreno_dev, struct kgsl_context *context) { @@ -182,7 +167,7 @@ static void _retire_inflight_hw_fences(struct adreno_device *adreno_dev, if (timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0) break; - gen7_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } spin_unlock(&drawctxt->lock); } @@ -1087,7 +1072,7 @@ static int _send_deferred_hw_fence(struct adreno_device *adreno_dev, if (!retired) list_move_tail(&entry->node, &drawctxt->hw_fence_inflight_list); else - gen7_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); spin_unlock(&drawctxt->lock); return 0; @@ -3217,7 +3202,7 @@ int gen7_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_d break; } - gen7_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } spin_unlock(&drawctxt->lock); @@ -3245,7 +3230,7 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d continue; } - gen7_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } /* Also grab all the hardware fences which were never sent to GMU */ @@ -3281,7 +3266,7 @@ static int check_detached_context_hardware_fences(struct adreno_device *adreno_d ret = -EINVAL; goto fault; } - gen7_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } /* Send hardware fences (to TxQueue) that were not dispatched to GMU */ @@ -3292,7 +3277,7 @@ static int check_detached_context_hardware_fences(struct adreno_device *adreno_d if (ret) goto fault; - gen7_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } return 0; @@ -3557,7 +3542,7 @@ void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev, if (__ratelimit(&_rs)) dev_err(&gmu->pdev->dev, "hw fence for ctx:%d ts:%d ret:%d may not be destroyed\n", kfence->context_id, kfence->timestamp, ret); - gen7_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); kgsl_hw_fence_destroy(kfence); goto done; } @@ -3628,8 +3613,7 @@ static void process_hw_fence_queue(struct adreno_device *adreno_dev, * A fence that is sent to GMU must be added to the drawctxt->hw_fence_inflight_list * so that we can keep track of when GMU sends it to the TxQueue */ - list_del_init(&entry->node); - list_add_tail(&entry->node, &drawctxt->hw_fence_inflight_list); + list_move_tail(&entry->node, &drawctxt->hw_fence_inflight_list); } } @@ -3917,7 +3901,7 @@ static void drain_context_hw_fence_cpu(struct adreno_device *adreno_dev, kgsl_hw_fence_trigger_cpu(KGSL_DEVICE(adreno_dev), entry->kfence); - gen7_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } } @@ -3936,7 +3920,7 @@ int gen7_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev, if (ret) break; - gen7_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } return ret; diff --git a/adreno_gen7_hwsched_hfi.h b/adreno_gen7_hwsched_hfi.h index 80f52c94a3..3afcac0e60 100644 --- a/adreno_gen7_hwsched_hfi.h +++ b/adreno_gen7_hwsched_hfi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ADRENO_GEN7_HWSCHED_HFI_H_ @@ -311,14 +311,6 @@ int gen7_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev, int gen7_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_dev, struct adreno_context *drawctxt); -/** - * gen7_remove_hw_fence_entry - Remove hardware fence entry - * @adreno_dev: pointer to the adreno device - * @entry: Pointer to the hardware fence entry - */ -void gen7_remove_hw_fence_entry(struct adreno_device *adreno_dev, - struct adreno_hw_fence_entry *entry); - /** * gen7_hwsched_disable_hw_fence_throttle - Disable hardware fence throttling after reset * @adreno_dev: pointer to the adreno device diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 38e76585eb..d384c917f0 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -465,14 +465,15 @@ static void gen8_hwsched_soccp_vote(struct adreno_device *adreno_dev, bool pwr_o set_bit(GEN8_HWSCHED_HW_FENCE_ABORT_BIT, &hw_hfi->hw_fence.flags); spin_unlock(&hw_hfi->hw_fence.lock); - clear_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags); - /* * It is possible that some hardware fences were created while we were in slumber. Since * soccp power vote failed, these hardware fences may never be signaled. Hence, log them * for debug purposes. */ - adreno_hwsched_log_pending_hw_fences(adreno_dev, &gmu->pdev->dev); + adreno_hwsched_log_destroy_pending_hw_fences(adreno_dev, &gmu->pdev->dev); + adreno_mark_for_coldboot(adreno_dev); + + adreno_hwsched_deregister_hw_fence(adreno_dev); } static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) @@ -1145,7 +1146,7 @@ static void drain_ctx_hw_fences_cpu(struct adreno_device *adreno_dev, spin_lock(&drawctxt->lock); list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) { kgsl_hw_fence_trigger_cpu(KGSL_DEVICE(adreno_dev), entry->kfence); - gen8_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } spin_unlock(&drawctxt->lock); } @@ -1610,7 +1611,7 @@ static void process_context_hw_fences_after_reset(struct adreno_device *adreno_d /* Delete the fences that GMU has sent to the TxQueue */ if (timestamp_cmp(hdr->out_fence_ts, (u32)entry->cmd.ts) >= 0) { - gen8_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); continue; } @@ -1695,7 +1696,7 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d context = &entry->drawctxt->base; - gen8_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); kgsl_context_put(context); } diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index abae3cf580..09f5120b5b 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -126,21 +126,6 @@ static void gen8_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) MSG_HDR_GET_SEQNUM(waiters[i])); } -/* This function is called while holding the drawctxt spinlock */ -void gen8_remove_hw_fence_entry(struct adreno_device *adreno_dev, - struct adreno_hw_fence_entry *entry) -{ - struct adreno_hwsched *hwsched = &adreno_dev->hwsched; - struct adreno_context *drawctxt = entry->drawctxt; - - atomic_dec(&hwsched->hw_fence_count); - drawctxt->hw_fence_count--; - - dma_fence_put(&entry->kfence->fence); - list_del_init(&entry->node); - kmem_cache_free(hwsched->hw_fence_cache, entry); -} - static void _retire_inflight_hw_fences(struct adreno_device *adreno_dev, struct kgsl_context *context) { @@ -162,7 +147,7 @@ static void _retire_inflight_hw_fences(struct adreno_device *adreno_dev, if (timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0) break; - gen8_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } spin_unlock(&drawctxt->lock); } @@ -868,7 +853,7 @@ static int _send_deferred_hw_fence(struct adreno_device *adreno_dev, if (!retired) list_move_tail(&entry->node, &drawctxt->hw_fence_inflight_list); else - gen8_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); spin_unlock(&drawctxt->lock); return 0; @@ -2903,7 +2888,7 @@ int gen8_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_d break; } - gen8_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } spin_unlock(&drawctxt->lock); @@ -2931,7 +2916,7 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d continue; } - gen8_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } /* Also grab all the hardware fences which were never sent to GMU */ @@ -2967,7 +2952,7 @@ static int check_detached_context_hardware_fences(struct adreno_device *adreno_d ret = -EINVAL; goto fault; } - gen8_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } /* Send hardware fences (to TxQueue) that were not dispatched to GMU */ @@ -2978,7 +2963,7 @@ static int check_detached_context_hardware_fences(struct adreno_device *adreno_d if (ret) goto fault; - gen8_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } return 0; @@ -3246,7 +3231,7 @@ void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, "hw fence for ctx:%d ts:%d ret:%d may not be destroyed\n", kfence->context_id, kfence->timestamp, ret); kgsl_hw_fence_destroy(kfence); - gen8_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); goto done; } @@ -3316,8 +3301,7 @@ static void process_hw_fence_queue(struct adreno_device *adreno_dev, * A fence that is sent to GMU must be added to the drawctxt->hw_fence_inflight_list * so that we can keep track of when GMU sends it to the TxQueue */ - list_del_init(&entry->node); - list_add_tail(&entry->node, &drawctxt->hw_fence_inflight_list); + list_move_tail(&entry->node, &drawctxt->hw_fence_inflight_list); } } @@ -3538,7 +3522,7 @@ static void drain_context_hw_fence_cpu(struct adreno_device *adreno_dev, kgsl_hw_fence_trigger_cpu(KGSL_DEVICE(adreno_dev), entry->kfence); - gen8_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } } @@ -3557,7 +3541,7 @@ int gen8_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev, if (ret) break; - gen8_remove_hw_fence_entry(adreno_dev, entry); + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } return ret; diff --git a/adreno_gen8_hwsched_hfi.h b/adreno_gen8_hwsched_hfi.h index 5b08a315c8..9abca4a052 100644 --- a/adreno_gen8_hwsched_hfi.h +++ b/adreno_gen8_hwsched_hfi.h @@ -35,6 +35,7 @@ /* * This is used to avoid creating any more hardware fences until concurrent reset/recovery completes + * or when soccp vote fails */ #define GEN8_HWSCHED_HW_FENCE_ABORT_BIT 0x2 @@ -309,14 +310,6 @@ int gen8_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev, int gen8_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_dev, struct adreno_context *drawctxt); -/** - * gen8_remove_hw_fence_entry - Remove hardware fence entry - * @adreno_dev: pointer to the adreno device - * @entry: Pointer to the hardware fence entry - */ -void gen8_remove_hw_fence_entry(struct adreno_device *adreno_dev, - struct adreno_hw_fence_entry *entry); - /** * gen8_hwsched_disable_hw_fence_throttle - Disable hardware fence throttling after reset * @adreno_dev: pointer to the adreno device diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 53ce6f28d5..6b656e4233 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -2393,7 +2393,8 @@ u32 adreno_hwsched_parse_payload(struct payload_section *payload, u32 key) return 0; } -void adreno_hwsched_log_pending_hw_fences(struct adreno_device *adreno_dev, struct device *dev) +void adreno_hwsched_log_destroy_pending_hw_fences(struct adreno_device *adreno_dev, + struct device *dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_hw_fence_entry entries[5]; @@ -2412,6 +2413,7 @@ void adreno_hwsched_log_pending_hw_fences(struct adreno_device *adreno_dev, stru if (count < ARRAY_SIZE(entries)) memcpy(&entries[count], entry, sizeof(*entry)); count++; + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } spin_unlock(&drawctxt->lock); @@ -2536,3 +2538,18 @@ done: return 0; } + +/* This function can be called while holding the drawctxt spinlock */ +void adreno_hwsched_remove_hw_fence_entry(struct adreno_device *adreno_dev, + struct adreno_hw_fence_entry *entry) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct adreno_context *drawctxt = entry->drawctxt; + + atomic_dec(&hwsched->hw_fence_count); + drawctxt->hw_fence_count--; + + dma_fence_put(&entry->kfence->fence); + list_del_init(&entry->node); + kmem_cache_free(hwsched->hw_fence_cache, entry); +} diff --git a/adreno_hwsched.h b/adreno_hwsched.h index 729615a425..1288bc1f80 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -245,11 +245,13 @@ u32 adreno_hwsched_parse_payload(struct payload_section *payload, u32 key); u32 adreno_hwsched_gpu_fault(struct adreno_device *adreno_dev); /** - * adreno_hwsched_log_pending_fences - Log any pending hardware fences if soccp vote failed + * adreno_hwsched_log_destroy_pending_fences - Log and destroy any pending hardware fences if soccp + * vote failed * @adreno_dev: pointer to the adreno device * @dev: Pointer to the gmu pdev device */ -void adreno_hwsched_log_pending_hw_fences(struct adreno_device *adreno_dev, struct device *dev); +void adreno_hwsched_log_destroy_pending_hw_fences(struct adreno_device *adreno_dev, + struct device *dev); /** * adreno_hwsched_syncobj_kfence_put - Put back kfence context refcounts for this sync object @@ -279,4 +281,12 @@ bool adreno_hwsched_log_nonfatal_gpu_fault(struct adreno_device *adreno_dev, */ int adreno_hwsched_poll_msg_queue_write_index(struct kgsl_memdesc *hfi_mem); +/** + * adreno_hwsched_remove_hw_fence_entry - Remove hardware fence entry + * @adreno_dev: pointer to the adreno device + * @entry: Pointer to the hardware fence entry + */ +void adreno_hwsched_remove_hw_fence_entry(struct adreno_device *adreno_dev, + struct adreno_hw_fence_entry *entry); + #endif From 7e5d873e22191094181aeacd97c205ab5ce83efc Mon Sep 17 00:00:00 2001 From: Amit Kushwaha Date: Wed, 22 May 2024 15:27:22 +0530 Subject: [PATCH 0802/1016] kgsl: build: Enable perf build config for monaco Add perf build config and header rule for monaco. This fixed KGSL compilation using bazel build. Change-Id: Ia333d6682700727c0c117144e6717ceeda9d7776 Signed-off-by: Amit Kushwaha --- build/kgsl_defs.bzl | 5 +++++ config/monaco_perf_gpuconf | 10 ++++++++++ 2 files changed, 15 insertions(+) create mode 100644 config/monaco_perf_gpuconf diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index 12b1f820c0..3df6ce4fd3 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -104,6 +104,11 @@ def external_deps(target, variant): "//vendor/qcom/opensource/synx-kernel:synx_headers" ] + if target in [ "monaco" ]: + deplist = deplist + [ + "//vendor/qcom/opensource/mm-drivers/hw_fence:hw_fence_headers" + ] + native.genrule( name = "{}_defconfig".format(tv), srcs = defconfigs + [ "config/{}_gpuconf".format(tv) ], diff --git a/config/monaco_perf_gpuconf b/config/monaco_perf_gpuconf new file mode 100644 index 0000000000..22f5e75c5b --- /dev/null +++ b/config/monaco_perf_gpuconf @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL=m +CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=y +CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=y +CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 +CONFIG_QCOM_KGSL_SORT_POOL=y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" From 9f26d21679789f9984096b5e6fd9d03f1d205068 Mon Sep 17 00:00:00 2001 From: Piyush Mehta Date: Thu, 6 Jun 2024 11:17:55 +0530 Subject: [PATCH 0803/1016] kgsl: gen7: Update snapshot registers for gen7_6_0 Few registers from gen7_2_0 are not available on gen7_6_0. Hence, update snapshot registers for gen7_6_0. Change-Id: Ide4803b355696a52c360baceea60b906aa3503ee Signed-off-by: Piyush Mehta --- adreno-gpulist.h | 3 ++- adreno_gen7_6_0_snapshot.h | 46 ++++++++++++++++++++++++++++++++++++++ adreno_gen7_gmu_snapshot.c | 1 + adreno_gen7_snapshot.c | 27 ++++++++++++++++++++++ 4 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 adreno_gen7_6_0_snapshot.h diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 5941239600..0fa259d907 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2048,6 +2048,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_1 = { }; extern const struct gen7_snapshot_block_list gen7_2_0_snapshot_block_list; +extern const struct gen7_snapshot_block_list gen7_6_0_snapshot_block_list; static const struct kgsl_regmap_list gen7_2_0_gbif_regs[] = { { GEN7_GBIF_QSB_SIDE0, 0x00071620 }, @@ -2266,7 +2267,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_6_0 = { .highest_bank_bit = 16, .gmu_hub_clk_freq = 200000000, .bcl_data = 1, - .gen7_snapshot_block_list = &gen7_2_0_snapshot_block_list, + .gen7_snapshot_block_list = &gen7_6_0_snapshot_block_list, .qos_value = gen7_6_0_gbif_client_qos_values, .preempt_level = 1, .ctxt_record_size = (4192 * SZ_1K), diff --git a/adreno_gen7_6_0_snapshot.h b/adreno_gen7_6_0_snapshot.h new file mode 100644 index 0000000000..2a37bd7e1e --- /dev/null +++ b/adreno_gen7_6_0_snapshot.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#ifndef __ADRENO_GEN7_6_0_SNAPSHOT_H +#define __ADRENO_GEN7_6_0_SNAPSHOT_H + +#include "adreno_gen7_snapshot.h" + +static const u32 gen7_6_0_gpucc_registers[] = { + 0x24000, 0x2400f, 0x24400, 0x2440f, 0x24800, 0x24805, 0x24c00, 0x24cff, + 0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, + 0x26400, 0x26405, 0x26414, 0x2641d, 0x2642a, 0x26430, 0x26432, 0x26432, + 0x26441, 0x2644b, 0x2644d, 0x26455, 0x26466, 0x26468, 0x26478, 0x2647a, + 0x26489, 0x2648a, 0x2649c, 0x2649e, 0x264a0, 0x264a3, 0x264c5, 0x264c7, + 0x264d6, 0x264d8, 0x264e8, 0x264e9, 0x264f9, 0x264fc, 0x2651c, 0x2651e, + 0x26540, 0x26573, 0x26576, 0x26576, 0x26600, 0x26616, 0x26620, 0x2662d, + 0x26630, 0x26631, 0x26635, 0x26635, 0x26637, 0x26637, 0x2663a, 0x2663a, + 0x26642, 0x26642, 0x26656, 0x26658, 0x2665b, 0x2665d, 0x2665f, 0x26662, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_6_0_gpucc_registers), 8)); + +static const u32 gen7_6_0_cpr_registers[] = { + 0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c, + 0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850, + 0x26880, 0x2689e, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee, + 0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f, + 0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274ac, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_6_0_cpr_registers), 8)); + +static const u32 gen7_6_0_dpm_lkg_registers[] = { + 0x21c00, 0x21c00, 0x21c08, 0x21c09, 0x21c0e, 0x21c0f, 0x21c4f, 0x21c50, + 0x21c52, 0x21c52, 0x21c54, 0x21c56, 0x21c58, 0x21c5a, 0x21c5c, 0x21c60, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_6_0_dpm_lkg_registers), 8)); + +static const u32 *gen7_6_0_external_core_regs[] = { + gen7_6_0_gpucc_registers, + gen7_6_0_cpr_registers, + gen7_6_0_dpm_lkg_registers, +}; +#endif /*_ADRENO_GEN7_6_0_SNAPSHOT_H */ diff --git a/adreno_gen7_gmu_snapshot.c b/adreno_gen7_gmu_snapshot.c index d46a430145..c4f32a8c58 100644 --- a/adreno_gen7_gmu_snapshot.c +++ b/adreno_gen7_gmu_snapshot.c @@ -11,6 +11,7 @@ #include "adreno_snapshot.h" #include "adreno_gen7_0_0_snapshot.h" #include "adreno_gen7_2_0_snapshot.h" +#include "adreno_gen7_6_0_snapshot.h" #include "kgsl_device.h" static size_t gen7_gmu_snapshot_dtcm(struct kgsl_device *device, diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 2819dfb316..bc109d81de 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -8,6 +8,7 @@ #include "adreno_snapshot.h" #include "adreno_gen7_0_0_snapshot.h" #include "adreno_gen7_2_0_snapshot.h" +#include "adreno_gen7_6_0_snapshot.h" #include "adreno_gen7_9_0_snapshot.h" #include "adreno_gen7_14_0_snapshot.h" #include "adreno_gen7_11_0_snapshot.h" @@ -73,6 +74,32 @@ const struct gen7_snapshot_block_list gen7_2_0_snapshot_block_list = { .index_registers_len = ARRAY_SIZE(gen7_cp_indexed_reg_list), }; +const struct gen7_snapshot_block_list gen7_6_0_snapshot_block_list = { + .pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers, + .debugbus_blocks = gen7_2_0_debugbus_blocks, + .debugbus_blocks_len = ARRAY_SIZE(gen7_2_0_debugbus_blocks), + .gbif_debugbus_blocks = gen7_gbif_debugbus_blocks, + .gbif_debugbus_blocks_len = ARRAY_SIZE(gen7_gbif_debugbus_blocks), + .cx_debugbus_blocks = gen7_cx_dbgc_debugbus_blocks, + .cx_debugbus_blocks_len = ARRAY_SIZE(gen7_cx_dbgc_debugbus_blocks), + .external_core_regs = gen7_6_0_external_core_regs, + .num_external_core_regs = ARRAY_SIZE(gen7_6_0_external_core_regs), + .gmu_regs = gen7_2_0_gmu_registers, + .gmu_gx_regs = gen7_2_0_gmugx_registers, + .rscc_regs = gen7_2_0_rscc_registers, + .reg_list = gen7_2_0_reg_list, + .cx_misc_regs = gen7_0_0_cx_misc_registers, + .shader_blocks = gen7_2_0_shader_blocks, + .num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks), + .clusters = gen7_2_0_clusters, + .num_clusters = ARRAY_SIZE(gen7_2_0_clusters), + .sptp_clusters = gen7_2_0_sptp_clusters, + .num_sptp_clusters = ARRAY_SIZE(gen7_2_0_sptp_clusters), + .post_crashdumper_regs = gen7_0_0_post_crashdumper_registers, + .index_registers = gen7_cp_indexed_reg_list, + .index_registers_len = ARRAY_SIZE(gen7_cp_indexed_reg_list), +}; + const struct gen7_snapshot_block_list gen7_9_0_snapshot_block_list = { .pre_crashdumper_regs = gen7_9_0_pre_crashdumper_gpu_registers, .debugbus_blocks = gen7_9_0_debugbus_blocks, From a9ab2d9a9681a748ca8fa202c18c1eafcef7dd49 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 30 May 2024 12:22:39 +0530 Subject: [PATCH 0804/1016] kgsl: gen8: Remove VSC_BIN_SIZE from non-context register list Based on latest recommendation, there is no need to program VSC_BIN_SIZE from KGSL. Hence, remove it from non-context register list. Change-Id: Ie4500637d2d4eaab309250c75de3d12c90efecea Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 3 --- adreno_gen8.c | 2 -- gen8_reg.h | 1 - 3 files changed, 6 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 0fa259d907..4fe7025f43 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2687,7 +2687,6 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { { GEN8_VFD_CB_BUSY_REQ_CNT, 0x00200020, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_VFD_CB_LP_REQ_CNT, 0x00100020, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_VPC_FLATSHADE_MODE_CNTL, 0x00000001, BIT(PIPE_BR) | BIT(PIPE_BV) }, - { GEN8_VSC_BIN_SIZE, 0x00010001, BIT(PIPE_NONE) }, /* Disable redundant tile data optimization */ { GEN8_VSC_KMD_DBG_ECO_CNTL, BIT(11), BIT(PIPE_NONE)}, { 0 }, @@ -2885,7 +2884,6 @@ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { { GEN8_VFD_CB_BUSY_REQ_CNT, 0x00200020, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_VFD_CB_LP_REQ_CNT, 0x00100020, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_VPC_FLATSHADE_MODE_CNTL, 0x00000001, BIT(PIPE_BR) | BIT(PIPE_BV) }, - { GEN8_VSC_BIN_SIZE, 0x00010001, BIT(PIPE_NONE) }, /* Disable redundant tile data optimization */ { GEN8_VSC_KMD_DBG_ECO_CNTL, BIT(11), BIT(PIPE_NONE)}, { 0 }, @@ -2988,7 +2986,6 @@ static const struct gen8_nonctxt_regs gen8_3_0_nonctxt_regs[] = { { GEN8_VFD_CB_BUSY_REQ_CNT, 0x00200020, BIT(PIPE_BV) | BIT(PIPE_BR) }, { GEN8_VFD_CB_LP_REQ_CNT, 0x00100020, BIT(PIPE_BV) | BIT(PIPE_BR) }, { GEN8_VPC_FLATSHADE_MODE_CNTL, 0x00000001, BIT(PIPE_BV) | BIT(PIPE_BR) }, - { GEN8_VSC_BIN_SIZE, 0x00010001, BIT(PIPE_NONE) }, { GEN8_RB_GC_GMEM_PROTECT, 0x00900000, BIT(PIPE_BR) }, { 0 }, }; diff --git a/adreno_gen8.c b/adreno_gen8.c index e7893d291e..65e08efb36 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -140,7 +140,6 @@ static const u32 gen8_pwrup_reglist[] = { GEN8_UCHE_TRAP_BASE_LO, GEN8_UCHE_TRAP_BASE_HI, GEN8_UCHE_CLIENT_PF, - GEN8_VSC_BIN_SIZE, GEN8_VSC_KMD_DBG_ECO_CNTL, GEN8_RB_CMP_NC_MODE_CNTL, GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, @@ -165,7 +164,6 @@ static const u32 gen8_3_0_pwrup_reglist[] = { GEN8_UCHE_TRAP_BASE_LO, GEN8_UCHE_TRAP_BASE_HI, GEN8_UCHE_CLIENT_PF, - GEN8_VSC_BIN_SIZE, GEN8_RB_CMP_NC_MODE_CNTL, GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_LO, diff --git a/gen8_reg.h b/gen8_reg.h index e5ca1aeef7..a60908583c 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -510,7 +510,6 @@ #define GEN8_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD 0x5e8 #define GEN8_RBBM_SLICE_PERFCTR_FLUSH_HOST_CMD 0x5eb #define GEN8_RBBM_SLICE_NC_MODE_CNTL 0x5ec -#define GEN8_VSC_BIN_SIZE 0xc02 #define GEN8_VSC_KMD_DBG_ECO_CNTL 0xdf0 /* DBGC_CFG registers */ From d33681dedc2b3aae29abbfedf2f4c74ad5820e29 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 6 Jun 2024 19:41:51 +0530 Subject: [PATCH 0805/1016] kgsl: gen8: Clear aperture register properly At few places, aperture register is not cleared after programming registers behind aperture. Fix them by clearing aperture register properly. Change-Id: If82346a317d95218b04e396536df611e11d3e89e Signed-off-by: Kamal Agrawal --- adreno_gen8.c | 6 ++++++ adreno_gen8_snapshot.c | 3 +++ 2 files changed, 9 insertions(+) diff --git a/adreno_gen8.c b/adreno_gen8.c index 65e08efb36..695ffe3e69 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -918,6 +918,9 @@ static void gen8_protect_init(struct adreno_device *adreno_dev) FIELD_PREP(GENMASK(30, 18), count) | FIELD_PREP(BIT(31), regs[i].noaccess), PIPE_LPAC, 0, 0); + + /* Clear aperture register */ + gen8_host_aperture_set(adreno_dev, 0, 0, 0); } static void gen8_nonctxt_regconfig(struct adreno_device *adreno_dev) @@ -1164,6 +1167,9 @@ static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev) } mutex_unlock(&gen8_dev->nc_mutex); + /* Clear aperture register */ + gen8_host_aperture_set(adreno_dev, 0, 0, 0); + lock->dynamic_list_len = gen8_dev->ext_pwrup_list_len; } diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index cfb5ea63ea..878ce0d215 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -1743,6 +1743,9 @@ void gen8_snapshot(struct adreno_device *adreno_dev, &snapshot->ib2size_lpac, PIPE_LPAC, 0, 0); } + /* Clear aperture register */ + gen8_host_aperture_set(adreno_dev, 0, 0, 0); + /* Assert the isStatic bit before triggering snapshot */ kgsl_regwrite(device, GEN8_RBBM_SNAPSHOT_STATUS, 0x1); From 22e5035c66b4b2743da121630e2094031a67dbbc Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 7 Dec 2022 14:10:53 +0530 Subject: [PATCH 0806/1016] kgsl: iommu: Fallback to SMMU fault handler for global fault Whenever there is a fault in global address range, fall back to smmu fault handler which prints useful debug information. Change-Id: If77dfed8f4651508efd5b31899cf4a36692f3c3a Signed-off-by: Kamal Agrawal --- kgsl_iommu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 2d102b6f2e..a165bd45a1 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -1257,6 +1257,13 @@ static int kgsl_iommu_fault_handler(struct kgsl_mmu *mmu, kgsl_context_put(context); kgsl_process_private_put(private); + /* + * Fallback to smmu fault handler during globals faults to print useful + * debug information. + */ + if (!stall && kgsl_iommu_addr_is_global(mmu, addr)) + return -ENOSYS; + /* Return -EBUSY to keep the IOMMU driver from resuming on a stall */ return stall ? -EBUSY : 0; } @@ -2467,6 +2474,8 @@ static int kgsl_iommu_setup_context(struct kgsl_mmu *mmu, return -ENODEV; } + qcom_iommu_set_fault_model(context->domain, QCOM_IOMMU_FAULT_MODEL_NON_FATAL); + _enable_gpuhtw_llc(mmu, context->domain); ret = iommu_attach_device(context->domain, &context->pdev->dev); From bf842dd45db7956728714c97801c73b6ad83b06c Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 6 May 2024 23:40:35 -0700 Subject: [PATCH 0807/1016] kgsl: snapshot: Make SQE firmware dumping target independent Snapshot SQE firmware dumping is not dependent on hardware. Hence make it target independent. Change-Id: I9c1bdb3fead51aebbdc056cb99eea6af7004bc28 Signed-off-by: Hareesh Gundu --- adreno_a6xx_snapshot.c | 28 ---------------------------- adreno_gen7_snapshot.c | 26 -------------------------- adreno_gen7_snapshot.h | 5 +---- adreno_gen8_snapshot.c | 26 -------------------------- adreno_gen8_snapshot.h | 3 --- adreno_snapshot.c | 34 ++++++++++++++++++++++++++++++++++ adreno_snapshot.h | 3 +++ 7 files changed, 38 insertions(+), 87 deletions(-) diff --git a/adreno_a6xx_snapshot.c b/adreno_a6xx_snapshot.c index 1a8e2d0bb1..838d22c37f 100644 --- a/adreno_a6xx_snapshot.c +++ b/adreno_a6xx_snapshot.c @@ -1554,30 +1554,6 @@ static void a6xx_snapshot_debugbus(struct adreno_device *adreno_dev, } } - - -/* a6xx_snapshot_sqe() - Dump SQE data in snapshot */ -static size_t a6xx_snapshot_sqe(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; - unsigned int *data = (unsigned int *)(buf + sizeof(*header)); - struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); - - if (remain < DEBUG_SECTION_SZ(1)) { - SNAPSHOT_ERR_NOMEM(device, "SQE VERSION DEBUG"); - return 0; - } - - /* Dump the SQE firmware version */ - header->type = SNAPSHOT_DEBUG_SQE_VERSION; - header->size = 1; - *data = fw->version; - - return DEBUG_SECTION_SZ(1); -} - static void _a6xx_do_crashdump(struct kgsl_device *device) { u32 val = 0; @@ -1757,10 +1733,6 @@ void a6xx_snapshot(struct adreno_device *adreno_dev, sptprac_on = a6xx_gmu_sptprac_is_on(adreno_dev); - /* SQE Firmware */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, a6xx_snapshot_sqe, NULL); - if (!adreno_gx_is_on(adreno_dev)) return; diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index bc109d81de..1540141986 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -1399,28 +1399,6 @@ static void gen7_snapshot_debugbus(struct adreno_device *adreno_dev, } } -/* gen7_snapshot_sqe() - Dump SQE data in snapshot */ -static size_t gen7_snapshot_sqe(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; - unsigned int *data = (unsigned int *)(buf + sizeof(*header)); - struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); - - if (remain < DEBUG_SECTION_SZ(GEN7_SQE_FW_SNAPSHOT_DWORDS)) { - SNAPSHOT_ERR_NOMEM(device, "SQE VERSION DEBUG"); - return 0; - } - - /* Dump the SQE firmware version */ - header->type = SNAPSHOT_DEBUG_SQE_VERSION; - header->size = GEN7_SQE_FW_SNAPSHOT_DWORDS; - memcpy(data, fw->memdesc->hostptr, (GEN7_SQE_FW_SNAPSHOT_DWORDS * sizeof(u32))); - - return DEBUG_SECTION_SZ(GEN7_SQE_FW_SNAPSHOT_DWORDS); -} - /* gen7_snapshot_aqe() - Dump AQE data in snapshot */ static size_t gen7_snapshot_aqe(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) @@ -1736,10 +1714,6 @@ void gen7_snapshot(struct adreno_device *adreno_dev, gen7_cx_misc_regs_snapshot(device, snapshot); - /* SQE Firmware */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, gen7_snapshot_sqe, NULL); - /* AQE Firmware */ kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, gen7_snapshot_aqe, NULL); diff --git a/adreno_gen7_snapshot.h b/adreno_gen7_snapshot.h index 5a06d4df18..bc985844fd 100644 --- a/adreno_gen7_snapshot.h +++ b/adreno_gen7_snapshot.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_GEN7_SNAPSHOT_H #define __ADRENO_GEN7_SNAPSHOT_H @@ -32,9 +32,6 @@ #define GEN7_DEBUGBUS_BLOCK_SIZE 0x100 -/* Number of dword to dump in snapshot for CP SQE */ -#define GEN7_SQE_FW_SNAPSHOT_DWORDS 5 - struct gen7_sel_reg { unsigned int host_reg; unsigned int cd_reg; diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 878ce0d215..2030279566 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -1438,28 +1438,6 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, } } -/* gen8_snapshot_sqe() - Dump SQE data in snapshot */ -static size_t gen8_snapshot_sqe(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; - u32 *data = (u32 *)(buf + sizeof(*header)); - struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); - - if (remain < DEBUG_SECTION_SZ(GEN8_SQE_FW_SNAPSHOT_DWORDS)) { - SNAPSHOT_ERR_NOMEM(device, "SQE VERSION DEBUG"); - return 0; - } - - /* Dump the SQE firmware version */ - header->type = SNAPSHOT_DEBUG_SQE_VERSION; - header->size = GEN8_SQE_FW_SNAPSHOT_DWORDS; - memcpy(data, fw->memdesc->hostptr, (GEN8_SQE_FW_SNAPSHOT_DWORDS * sizeof(u32))); - - return DEBUG_SECTION_SZ(GEN8_SQE_FW_SNAPSHOT_DWORDS); -} - /* gen8_snapshot_aqe() - Dump AQE data in snapshot */ static size_t gen8_snapshot_aqe(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) @@ -1703,10 +1681,6 @@ void gen8_snapshot(struct adreno_device *adreno_dev, gen8_cx_misc_regs_snapshot(device, snapshot); - /* SQE Firmware */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, gen8_snapshot_sqe, NULL); - /* AQE Firmware */ kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, gen8_snapshot_aqe, NULL); diff --git a/adreno_gen8_snapshot.h b/adreno_gen8_snapshot.h index f4f2e61e3a..1ea6ba2550 100644 --- a/adreno_gen8_snapshot.h +++ b/adreno_gen8_snapshot.h @@ -46,9 +46,6 @@ enum location_id { #define GEN8_DEBUGBUS_BLOCK_SIZE 0x100 -/* Number of dword to dump in snapshot for CP SQE */ -#define GEN8_SQE_FW_SNAPSHOT_DWORDS 5 - struct sel_reg { u32 host_reg; u32 cd_reg; diff --git a/adreno_snapshot.c b/adreno_snapshot.c index ae0328ca1f..5ed2820923 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -1103,6 +1103,36 @@ static void adreno_static_ib_dump(struct kgsl_device *device, } +/** + * adreno_snapshot_sqe - Dump the SQE firmware version and few dwords + * @device: Pointer to the kgsl device + * @buf: Pointer to the snapshot buffer + * @remain: Remaining size in snapshot buffer + * @priv: Private pointer to pass to the function + * + * Return: Number of bytes written to the snapshot buffer + */ +static size_t adreno_snapshot_sqe(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + void *data = buf + sizeof(*header); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); + + if (remain < DEBUG_SECTION_SZ(SQE_FW_SNAPSHOT_DWORDS)) { + SNAPSHOT_ERR_NOMEM(device, "SQE VERSION DEBUG"); + return 0; + } + + /* Dump the SQE firmware version and few dwords */ + header->type = SNAPSHOT_DEBUG_SQE_VERSION; + header->size = SQE_FW_SNAPSHOT_DWORDS; + memcpy(data, fw->memdesc->hostptr, (SQE_FW_SNAPSHOT_DWORDS * sizeof(u32))); + + return DEBUG_SECTION_SZ(SQE_FW_SNAPSHOT_DWORDS); +} + /* adreno_snapshot - Snapshot the Adreno GPU state * @device - KGSL device to snapshot * @snapshot - Pointer to the snapshot instance @@ -1142,6 +1172,10 @@ void adreno_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot, snapshot->process = setup_fault_process(device, snapshot, context); snapshot->process_lpac = setup_fault_process(device, snapshot, context_lpac); + /* Dump SQE Firmware version and few dwords */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_sqe, NULL); + /* Add GPU specific sections - registers mainly, but other stuff too */ if (gpudev->snapshot) gpudev->snapshot(adreno_dev, snapshot); diff --git a/adreno_snapshot.h b/adreno_snapshot.h index a5d4cbc8ff..c9c1e87e7b 100644 --- a/adreno_snapshot.h +++ b/adreno_snapshot.h @@ -8,6 +8,9 @@ #include "kgsl_snapshot.h" +/* Number of dwords to dump in snapshot for CP SQE */ +#define SQE_FW_SNAPSHOT_DWORDS 5 + #define CP_CRASH_DUMPER_TIMEOUT 500 #define SHADER_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \ From 921aa0164bb548d93a05ad6ba85f501ae3fc9880 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 6 May 2024 23:48:54 -0700 Subject: [PATCH 0808/1016] kgsl: snapshot: Implement adreno_snapshot_firmware() Implement generic adreno_snapshot_firmware() for snapshot SQE and AQE firmware dump info in one common place. Change-Id: Ia847b3895e01dfea68979c4e7625c095de2697c8 Signed-off-by: Hareesh Gundu --- adreno_gen7_snapshot.c | 29 ---------------------------- adreno_gen8_snapshot.c | 29 ---------------------------- adreno_snapshot.c | 43 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 40 insertions(+), 61 deletions(-) diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 1540141986..0280484787 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -1399,31 +1399,6 @@ static void gen7_snapshot_debugbus(struct adreno_device *adreno_dev, } } -/* gen7_snapshot_aqe() - Dump AQE data in snapshot */ -static size_t gen7_snapshot_aqe(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; - u32 *data = (unsigned int *)(buf + sizeof(*header)); - struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_AQE); - - if (!ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) - return 0; - - if (remain < DEBUG_SECTION_SZ(1)) { - SNAPSHOT_ERR_NOMEM(device, "AQE VERSION DEBUG"); - return 0; - } - - /* Dump the AQE firmware version */ - header->type = SNAPSHOT_DEBUG_AQE_VERSION; - header->size = 1; - *data = fw->version; - - return DEBUG_SECTION_SZ(1); -} - /* Snapshot the preemption related buffers */ static size_t snapshot_preemption_record(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) @@ -1714,10 +1689,6 @@ void gen7_snapshot(struct adreno_device *adreno_dev, gen7_cx_misc_regs_snapshot(device, snapshot); - /* AQE Firmware */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, gen7_snapshot_aqe, NULL); - if (!adreno_gx_is_on(adreno_dev)) return; diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 2030279566..6191e7d338 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -1438,31 +1438,6 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, } } -/* gen8_snapshot_aqe() - Dump AQE data in snapshot */ -static size_t gen8_snapshot_aqe(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; - u32 *data = (u32 *)(buf + sizeof(*header)); - struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_AQE); - - if (!ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) - return 0; - - if (remain < DEBUG_SECTION_SZ(1)) { - SNAPSHOT_ERR_NOMEM(device, "AQE VERSION DEBUG"); - return 0; - } - - /* Dump the AQE firmware version */ - header->type = SNAPSHOT_DEBUG_AQE_VERSION; - header->size = 1; - *data = fw->version; - - return DEBUG_SECTION_SZ(1); -} - /* Snapshot the preemption related buffers */ static size_t snapshot_preemption_record(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) @@ -1681,10 +1656,6 @@ void gen8_snapshot(struct adreno_device *adreno_dev, gen8_cx_misc_regs_snapshot(device, snapshot); - /* AQE Firmware */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, gen8_snapshot_aqe, NULL); - if (!adreno_gx_is_on(adreno_dev)) return; diff --git a/adreno_snapshot.c b/adreno_snapshot.c index 5ed2820923..e0b5195f64 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -1133,6 +1133,44 @@ static size_t adreno_snapshot_sqe(struct kgsl_device *device, u8 *buf, return DEBUG_SECTION_SZ(SQE_FW_SNAPSHOT_DWORDS); } +/* adreno_snapshot_aqe() - Dump AQE data in snapshot */ +static size_t adreno_snapshot_aqe(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_AQE); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) + return 0; + + if (remain < DEBUG_SECTION_SZ(1)) { + SNAPSHOT_ERR_NOMEM(device, "AQE VERSION DEBUG"); + return 0; + } + + /* Dump the AQE firmware version */ + header->type = SNAPSHOT_DEBUG_AQE_VERSION; + header->size = 1; + *data = fw->version; + + return DEBUG_SECTION_SZ(1); +} + +static void adreno_snapshot_firmware(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + + /* Dump SQE firmware version and few dwords */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_sqe, NULL); + + /* Dump AQE firmware version */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_aqe, NULL); +} + /* adreno_snapshot - Snapshot the Adreno GPU state * @device - KGSL device to snapshot * @snapshot - Pointer to the snapshot instance @@ -1172,9 +1210,8 @@ void adreno_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot, snapshot->process = setup_fault_process(device, snapshot, context); snapshot->process_lpac = setup_fault_process(device, snapshot, context_lpac); - /* Dump SQE Firmware version and few dwords */ - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, - snapshot, adreno_snapshot_sqe, NULL); + /* Dump firmware version and few dwords */ + adreno_snapshot_firmware(device, snapshot); /* Add GPU specific sections - registers mainly, but other stuff too */ if (gpudev->snapshot) From e5dbd5e9d259506f94752c40b04b467986b0e98c Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Mon, 3 Jun 2024 12:29:13 -0700 Subject: [PATCH 0809/1016] kgsl: gen8: Add missing pieces for CP error handling New error codes for CP are missing from error handling, add these to make sure we do not see an unknown fault in valid cases. Change-Id: I7470bad8b2d257c0eca28b7fb8459447b4bcfc22 Signed-off-by: Urvashi Agrawal --- adreno_gen7_hwsched_hfi.c | 8 +-- adreno_gen8_hwsched_hfi.c | 122 +++++++++++++++++++++++++++++++++++++- adreno_hfi.h | 38 +++++++++++- 3 files changed, 160 insertions(+), 8 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index c73c574608..19ba5607b0 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -454,7 +454,7 @@ static void log_gpu_fault_legacy(struct adreno_device *adreno_dev) gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_SWFUSE_VIOLATION_FAULT)); break; - case GMU_GPU_AQE0_OPCODE_ERRROR: + case GMU_GPU_AQE0_OPCODE_ERROR: dev_crit_ratelimited(dev, "AQE0 opcode error | opcode=0x%8.8x\n", gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE0_OPCODE_ERROR)); @@ -470,7 +470,7 @@ static void log_gpu_fault_legacy(struct adreno_device *adreno_dev) case GMU_GPU_AQE0_ILLEGAL_INST_ERROR: dev_crit_ratelimited(dev, "AQE0 Illegal instruction error\n"); break; - case GMU_GPU_AQE1_OPCODE_ERRROR: + case GMU_GPU_AQE1_OPCODE_ERROR: dev_crit_ratelimited(dev, "AQE1 opcode error | opcode=0x%8.8x\n", gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE1_OPCODE_ERROR)); @@ -691,7 +691,7 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_SWFUSE_VIOLATION_FAULT)); break; - case GMU_GPU_AQE0_OPCODE_ERRROR: + case GMU_GPU_AQE0_OPCODE_ERROR: dev_crit_ratelimited(dev, "AQE0 opcode error | opcode=0x%8.8x\n", gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE0_OPCODE_ERROR)); @@ -707,7 +707,7 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) case GMU_GPU_AQE0_ILLEGAL_INST_ERROR: dev_crit_ratelimited(dev, "AQE0 Illegal instruction error\n"); break; - case GMU_GPU_AQE1_OPCODE_ERRROR: + case GMU_GPU_AQE1_OPCODE_ERROR: dev_crit_ratelimited(dev, "AQE1 opcode error | opcode=0x%8.8x\n", gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE1_OPCODE_ERROR)); diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 09f5120b5b..c5c6ab97e6 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -482,7 +482,7 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_SWFUSE_VIOLATION_FAULT)); break; - case GMU_GPU_AQE0_OPCODE_ERRROR: + case GMU_GPU_AQE0_OPCODE_ERROR: dev_crit_ratelimited(dev, "AQE0 opcode error | opcode=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE0_OPCODE_ERROR)); @@ -498,7 +498,7 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) case GMU_GPU_AQE0_ILLEGAL_INST_ERROR: dev_crit_ratelimited(dev, "AQE0 Illegal instruction error\n"); break; - case GMU_GPU_AQE1_OPCODE_ERRROR: + case GMU_GPU_AQE1_OPCODE_ERROR: dev_crit_ratelimited(dev, "AQE1 opcode error | opcode=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE1_OPCODE_ERROR)); @@ -519,6 +519,124 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) cmd->gc.ctxt_id, cmd->gc.ts); find_timeout_syncobj(adreno_dev, cmd->gc.ctxt_id, cmd->gc.ts); break; + case GMU_CP_DDEBR_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP DDE BR | Ringbuffer HW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_DDEBR_HW_FAULT)); + break; + case GMU_CP_DDEBR_OPCODE_ERROR: + dev_crit_ratelimited(dev, + "CP DDE BR opcode error | opcode=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_DDEBR_OPCODE_ERROR)); + break; + case GMU_CP_DDEBR_UCODE_ERROR: + dev_crit_ratelimited(dev, "CP DDE BR ucode error\n"); + break; + case GMU_CP_DDEBR_PROTECTED_ERROR: { + u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_DDEBR_PROTECTED_ERROR); + + dev_crit_ratelimited(dev, + "CP DDE BR | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", + status & (1 << 20) ? "READ" : "WRITE", + status & 0x3FFFF, status); + } + break; + case GMU_CP_DDEBR_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "CP DDEBR Illegal instruction error\n"); + break; + case GMU_CP_DDEBV_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP DDE BV | Ringbuffer HW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_DDEBV_HW_FAULT)); + break; + case GMU_CP_DDEBV_OPCODE_ERROR: + dev_crit_ratelimited(dev, + "CP DDE BV opcode error | opcode=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_DDEBV_OPCODE_ERROR)); + break; + case GMU_CP_DDEBV_UCODE_ERROR: + dev_crit_ratelimited(dev, "CP DDE BV ucode error\n"); + break; + case GMU_CP_DDEBV_PROTECTED_ERROR: { + u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_DDEBV_PROTECTED_ERROR); + + dev_crit_ratelimited(dev, + "CP DDE BV | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", + status & (1 << 20) ? "READ" : "WRITE", + status & 0x3FFFF, status); + } + break; + case GMU_CP_DDEBV_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "CP DDE BV Illegal instruction error\n"); + break; + case GMU_CP_BR_SW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP BR | SW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_BR_SW_FAULT)); + break; + case GMU_CP_BV_SW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP BV | SW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_BV_SW_FAULT)); + break; + case GMU_CP_LPAC_SW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP LPAC | SW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_LPAC_SW_FAULT)); + break; + case GMU_CP_AQE0_SW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP AQE0 | SW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_AQE0_SW_FAULT)); + break; + case GMU_CP_AQE1_SW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP AQE1 | SW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_AQE1_SW_FAULT)); + break; + case GMU_CP_AQE0_PROTECTED_ERROR: { + u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_AQE0_PROTECTED_ERROR); + + dev_crit_ratelimited(dev, + "CP AQE0 | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", + status & (1 << 20) ? "READ" : "WRITE", + status & 0x3FFFF, status); + } + break; + case GMU_CP_AQE1_PROTECTED_ERROR: { + u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_AQE1_PROTECTED_ERROR); + + dev_crit_ratelimited(dev, + "CP AQE1 | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", + status & (1 << 20) ? "READ" : "WRITE", + status & 0x3FFFF, status); + } + break; + case GMU_CP_DDEBR_SW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP DDE BR | SW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_DDEBR_SW_FAULT)); + break; + case GMU_CP_DDEBV_SW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP DDE BV | SW fault | status=0x%8.8x\n", + gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_DDEBV_SW_FAULT)); + break; case GMU_CP_UNKNOWN_ERROR: fallthrough; default: diff --git a/adreno_hfi.h b/adreno_hfi.h index 9fd602b54a..f48529b73d 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -1157,6 +1157,21 @@ struct payload_section { #define KEY_AQE0_HW_FAULT 12 #define KEY_AQE1_OPCODE_ERROR 13 #define KEY_AQE1_HW_FAULT 14 +#define KEY_CP_BR_SW_FAULT 15 +#define KEY_CP_BV_SW_FAULT 16 +#define KEY_CP_LPAC_SW_FAULT 17 +#define KEY_CP_AQE0_SW_FAULT 18 +#define KEY_CP_AQE0_PROTECTED_ERROR 19 +#define KEY_CP_AQE1_SW_FAULT 20 +#define KEY_CP_AQE1_PROTECTED_ERROR 21 +#define KEY_CP_DDEBR_OPCODE_ERROR 22 +#define KEY_CP_DDEBR_PROTECTED_ERROR 23 +#define KEY_CP_DDEBR_HW_FAULT 24 +#define KEY_CP_DDEBR_SW_FAULT 25 +#define KEY_CP_DDEBV_OPCODE_ERROR 26 +#define KEY_CP_DDEBV_PROTECTED_ERROR 27 +#define KEY_CP_DDEBV_HW_FAULT 28 +#define KEY_CP_DDEBV_SW_FAULT 29 #define KEY_CP_AHB_ERROR 30 #define KEY_TSB_WRITE_ERROR 31 @@ -1221,16 +1236,35 @@ struct payload_section { /* Fault due to software fuse violation interrupt */ #define GMU_GPU_SW_FUSE_VIOLATION 621 /* AQE related error codes */ -#define GMU_GPU_AQE0_OPCODE_ERRROR 622 +#define GMU_GPU_AQE0_OPCODE_ERROR 622 #define GMU_GPU_AQE0_UCODE_ERROR 623 #define GMU_GPU_AQE0_HW_FAULT_ERROR 624 #define GMU_GPU_AQE0_ILLEGAL_INST_ERROR 625 -#define GMU_GPU_AQE1_OPCODE_ERRROR 626 +#define GMU_GPU_AQE1_OPCODE_ERROR 626 #define GMU_GPU_AQE1_UCODE_ERROR 627 #define GMU_GPU_AQE1_HW_FAULT_ERROR 628 #define GMU_GPU_AQE1_ILLEGAL_INST_ERROR 629 /* GMU encountered a sync object which is signaled via software but not via hardware */ #define GMU_SYNCOBJ_TIMEOUT_ERROR 630 +#define GMU_CP_DDEBR_HW_FAULT_ERROR 631 +#define GMU_CP_DDEBR_OPCODE_ERROR 632 +#define GMU_CP_DDEBR_UCODE_ERROR 633 +#define GMU_CP_DDEBR_PROTECTED_ERROR 634 +#define GMU_CP_DDEBR_ILLEGAL_INST_ERROR 635 +#define GMU_CP_DDEBV_HW_FAULT_ERROR 636 +#define GMU_CP_DDEBV_OPCODE_ERROR 637 +#define GMU_CP_DDEBV_UCODE_ERROR 638 +#define GMU_CP_DDEBV_PROTECTED_ERROR 639 +#define GMU_CP_DDEBV_ILLEGAL_INST_ERROR 640 +#define GMU_CP_BR_SW_FAULT_ERROR 641 +#define GMU_CP_BV_SW_FAULT_ERROR 642 +#define GMU_CP_LPAC_SW_FAULT_ERROR 643 +#define GMU_CP_AQE0_SW_FAULT_ERROR 644 +#define GMU_CP_AQE1_SW_FAULT_ERROR 645 +#define GMU_CP_AQE0_PROTECTED_ERROR 646 +#define GMU_CP_AQE1_PROTECTED_ERROR 647 +#define GMU_CP_DDEBR_SW_FAULT_ERROR 648 +#define GMU_CP_DDEBV_SW_FAULT_ERROR 649 /* Non fatal GPU error codes */ #define GMU_CP_AHB_ERROR 650 #define GMU_ATB_ASYNC_FIFO_OVERFLOW 651 From 130cb0be8ff8b4cefa2d16b05b0d5d925b9a3c25 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 7 Apr 2024 21:10:00 +0530 Subject: [PATCH 0810/1016] kgsl: gmu: Fix unclocked access to RBBM_SW_RESET_CMD register Currently, RBBM_SW_RESET is performed when GX rail and clock are enabled. GX rail and clock are enabled at the start of IFPC exit sequence. However, there are scenarios where the IFPC exit sequence is still in progress, and the necessary clocks (such as gx_cxo_clk) for RBBM_SW_RESET may not be enabled. This situation leads to unclocked access. Fix this by triggering RBBM_SW_RESET_CMD only when GPU is fully active i.e., IFPC sequence is completed. Change-Id: I3ea3d6799150b1ed5f13ba9d5047a7b9d2fcfee3 Signed-off-by: Kamal Agrawal --- adreno_a6xx_gmu.c | 17 ++++++++++++++++- adreno_gen7_gmu.c | 17 ++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 87f3f8d4c1..4eb1232a20 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -1209,6 +1209,14 @@ bool a619_holi_gx_is_on(struct adreno_device *adreno_dev) return is_on(val); } +static bool a6xx_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device) +{ + u32 val; + + gmu_core_regread(device, A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE, &val); + return (val == GPU_HW_ACTIVE) ? true : false; +} + /* * a6xx_gmu_sptprac_is_on() - Check if SPTP is on using pwr status register * @adreno_dev - Pointer to adreno_device @@ -1881,7 +1889,14 @@ static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) kgsl_regwrite(device, A6XX_GBIF_HALT, 0x0); } - if (a6xx_gmu_gx_is_on(adreno_dev)) + /* + * GX_CXO_CLK is needed to access RBBM_SW_RESET_CMD register. There are + * scenarios where the IFPC exit sequence is still in progress, and the + * above clock may not be enabled. This situation leads to unclocked + * access. Thus, trigger RBBM_SW_RESET_CMD only when GPU is fully active + * i.e., IFPC sequence is completed. + */ + if (a6xx_gmu_gx_is_on(adreno_dev) && a6xx_gmu_rpmh_pwr_state_is_active(device)) kgsl_regwrite(device, A6XX_RBBM_SW_RESET_CMD, 0x1); /* Make sure above writes are posted before turning off power resources */ diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index aa2663ab7f..f4c1e80a3b 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -772,6 +772,14 @@ bool gen7_gmu_gx_is_on(struct adreno_device *adreno_dev) return is_on(val); } +static bool gen7_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device) +{ + u32 val; + + gmu_core_regread(device, GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE, &val); + return (val == GPU_HW_ACTIVE) ? true : false; +} + static const char *idle_level_name(int level) { if (level == GPU_HW_ACTIVE) @@ -1489,7 +1497,14 @@ static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) _do_gbif_halt(device, GEN7_GBIF_HALT, GEN7_GBIF_HALT_ACK, GEN7_GBIF_ARB_HALT_MASK, "CX"); - if (gen7_gmu_gx_is_on(adreno_dev)) + /* + * GX_CXO_CLK is needed to access RBBM_SW_RESET_CMD register. There are + * scenarios where the IFPC exit sequence is still in progress, and the + * above clock may not be enabled. This situation leads to unclocked + * access. Thus, trigger RBBM_SW_RESET_CMD only when GPU is fully active + * i.e., IFPC sequence is completed. + */ + if (gen7_gmu_gx_is_on(adreno_dev) && gen7_gmu_rpmh_pwr_state_is_active(device)) kgsl_regwrite(device, GEN7_RBBM_SW_RESET_CMD, 0x1); /* Make sure above writes are posted before turning off power resources */ From 06ede7c29d3f4de10b5f95e7a4e8fab3c65da739 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 6 Jun 2024 20:14:43 +0530 Subject: [PATCH 0811/1016] kgsl: Update documentation for IOCTL_KGSL_GPUOBJ_ALLOC For 32 bit MMUs, IOCTL_KGSL_GPUOBJ_ALLOC returns 32-bit virtual address. For 64-bit MMUs, if KGSL_MEMFLAGS_FORCE_32BIT is not set, this IOCTL will return 64-bit VA for both 64-bit and compat tasks. Update the documentation in header file to reflect the same. Change-Id: I0d5d8f54ed35c0b8f9f6264b708697a136365486 Signed-off-by: Kamal Agrawal --- include/uapi/linux/msm_kgsl.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/uapi/linux/msm_kgsl.h b/include/uapi/linux/msm_kgsl.h index 4d93db001b..9e1a1f0348 100644 --- a/include/uapi/linux/msm_kgsl.h +++ b/include/uapi/linux/msm_kgsl.h @@ -1355,6 +1355,11 @@ struct kgsl_gpuobj_alloc { /* Let the user know that this header supports the gpuobj metadata */ #define KGSL_GPUOBJ_ALLOC_METADATA_MAX 64 +/* + * For 32 bit MMUs, this IOCTL returns 32-bit virtual address. + * For 64-bit MMUs, if KGSL_MEMFLAGS_FORCE_32BIT is not set, this + * IOCTL will return 64-bit VA for both 64-bit and compat tasks. + */ #define IOCTL_KGSL_GPUOBJ_ALLOC \ _IOWR(KGSL_IOC_TYPE, 0x45, struct kgsl_gpuobj_alloc) From 69135dbbe816413054b691f923a6302e99a1cda2 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 23 May 2024 12:48:03 -0600 Subject: [PATCH 0812/1016] kgsl: hwfence: Handle more than one invalidated context With LPAC enabled, it is possible that two contexts get invalidated during single reset-recovery sequence. Hence, add support to drain hardware fences from all invalidated contexts during reset recovery sequence. Also, move gen8/7_hwsched_drain_context_hw_fences() to adreno_gen8/7_hwsched.c which is the only place it is used. Change-Id: I68028f46fb16ccb342e673a24691c9c468482179 Signed-off-by: Harshdeep Dhatt --- adreno_drawctxt.c | 3 +++ adreno_gen7_hwsched.c | 49 ++++++++++++++++++++++++++++++++------- adreno_gen7_hwsched_hfi.c | 21 ----------------- adreno_gen7_hwsched_hfi.h | 12 ---------- adreno_gen8_hwsched.c | 49 ++++++++++++++++++++++++++++++++------- adreno_gen8_hwsched_hfi.c | 21 ----------------- adreno_gen8_hwsched_hfi.h | 12 ---------- kgsl_device.h | 3 +++ 8 files changed, 88 insertions(+), 82 deletions(-) diff --git a/adreno_drawctxt.c b/adreno_drawctxt.c index 14d8208c9b..e302e74e23 100644 --- a/adreno_drawctxt.c +++ b/adreno_drawctxt.c @@ -265,6 +265,9 @@ void adreno_drawctxt_invalidate(struct kgsl_device *device, spin_lock(&drawctxt->lock); set_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv); + if (!list_empty(&drawctxt->hw_fence_list)) + set_bit(KGSL_CONTEXT_PRIV_INVALID_DRAIN_HW_FENCE, &context->priv); + /* * set the timestamp to the last value since the context is invalidated * and we want the pending events for this context to go away diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 24f83712f1..156c85b841 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -1700,16 +1700,36 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d return ret; } -static int drain_guilty_context_hw_fences(struct adreno_device *adreno_dev) +static int gen7_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct adreno_hw_fence_entry *entry, *tmp; + int ret = 0; + + /* We don't need the drawctxt lock here as this context has already been invalidated */ + list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { + + /* Any error here is fatal */ + ret = gen7_send_hw_fence_hfi_wait_ack(adreno_dev, entry, + HW_FENCE_FLAG_SKIP_MEMSTORE); + if (ret) + break; + + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); + } + + return ret; +} + +static struct adreno_context *_get_guilty_context(struct kgsl_device *device) { - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_context *context = NULL; struct adreno_context *guilty = NULL; - int id, ret = 0; + int id; read_lock(&device->context_lock); idr_for_each_entry(&device->context_idr, context, id) { - if (test_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv) && + if (test_and_clear_bit(KGSL_CONTEXT_PRIV_INVALID_DRAIN_HW_FENCE, &context->priv) && _kgsl_context_get(context)) { guilty = ADRENO_CONTEXT(context); break; @@ -1717,17 +1737,30 @@ static int drain_guilty_context_hw_fences(struct adreno_device *adreno_dev) } read_unlock(&device->context_lock); - if (!guilty) - return 0; + return guilty; +} + +static int drain_guilty_context_hw_fences(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_context *guilty = _get_guilty_context(device); + int ret = 0; /* * We don't need drawctxt spinlock to signal these fences since the only other place * which can access these fences is the context detach path and device mutex * ensures mutual exclusion between recovery thread and detach thread. */ - ret = gen7_hwsched_drain_context_hw_fences(adreno_dev, guilty); + while (guilty) { + ret = gen7_hwsched_drain_context_hw_fences(adreno_dev, guilty); - kgsl_context_put(&guilty->base); + kgsl_context_put(&guilty->base); + + if (ret) + break; + + guilty = _get_guilty_context(device); + } return ret; } diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 19ba5607b0..7f362120cc 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -3905,27 +3905,6 @@ static void drain_context_hw_fence_cpu(struct adreno_device *adreno_dev, } } -int gen7_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev, - struct adreno_context *drawctxt) -{ - struct adreno_hw_fence_entry *entry, *tmp; - int ret = 0; - - /* We don't need the drawctxt lock here as this context has already been invalidated */ - list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { - - /* Any error here is fatal */ - ret = gen7_send_hw_fence_hfi_wait_ack(adreno_dev, entry, - HW_FENCE_FLAG_SKIP_MEMSTORE); - if (ret) - break; - - adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); - } - - return ret; -} - static void trigger_context_unregister_fault(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { diff --git a/adreno_gen7_hwsched_hfi.h b/adreno_gen7_hwsched_hfi.h index 3afcac0e60..a42e7663e9 100644 --- a/adreno_gen7_hwsched_hfi.h +++ b/adreno_gen7_hwsched_hfi.h @@ -285,18 +285,6 @@ int gen7_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev, struct kgsl_sync_fence *kfence); -/** - * gen7_hwsched_drain_context_hw_fences - Drain context's hardware fences via GMU - * @adreno_dev: Pointer to adreno device - * @drawctxt: Pointer to the adreno context which is to be flushed - * - * Trigger hardware fences that were never dispatched to GMU - * - * Return: Zero on success or negative error on failure - */ -int gen7_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev, - struct adreno_context *drawctxt); - /** * gen7_hwsched_check_context_inflight_hw_fences - Check whether all hardware fences * from a context have been sent to the TxQueue or not diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index d384c917f0..4694de955c 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -1704,16 +1704,36 @@ static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_d return ret; } -static int drain_guilty_context_hw_fences(struct adreno_device *adreno_dev) +static int gen8_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct adreno_hw_fence_entry *entry, *tmp; + int ret = 0; + + /* We don't need the drawctxt lock here as this context has already been invalidated */ + list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { + + /* Any error here is fatal */ + ret = gen8_send_hw_fence_hfi_wait_ack(adreno_dev, entry, + HW_FENCE_FLAG_SKIP_MEMSTORE); + if (ret) + break; + + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); + } + + return ret; +} + +static struct adreno_context *_get_guilty_context(struct kgsl_device *device) { - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_context *context = NULL; struct adreno_context *guilty = NULL; - int id, ret = 0; + int id; read_lock(&device->context_lock); idr_for_each_entry(&device->context_idr, context, id) { - if (test_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv) && + if (test_and_clear_bit(KGSL_CONTEXT_PRIV_INVALID_DRAIN_HW_FENCE, &context->priv) && _kgsl_context_get(context)) { guilty = ADRENO_CONTEXT(context); break; @@ -1721,17 +1741,30 @@ static int drain_guilty_context_hw_fences(struct adreno_device *adreno_dev) } read_unlock(&device->context_lock); - if (!guilty) - return 0; + return guilty; +} + +static int drain_guilty_context_hw_fences(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_context *guilty = _get_guilty_context(device); + int ret = 0; /* * We don't need drawctxt spinlock to signal these fences since the only other place * which can retire these fences is the context detach path and device mutex * ensures mutual exclusion between recovery thread and detach thread. */ - ret = gen8_hwsched_drain_context_hw_fences(adreno_dev, guilty); + while (guilty) { + ret = gen8_hwsched_drain_context_hw_fences(adreno_dev, guilty); - kgsl_context_put(&guilty->base); + kgsl_context_put(&guilty->base); + + if (ret) + break; + + guilty = _get_guilty_context(device); + } return ret; } diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index c5c6ab97e6..0943195fa4 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -3644,27 +3644,6 @@ static void drain_context_hw_fence_cpu(struct adreno_device *adreno_dev, } } -int gen8_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev, - struct adreno_context *drawctxt) -{ - struct adreno_hw_fence_entry *entry, *tmp; - int ret = 0; - - /* We don't need the drawctxt lock here as this context has already been invalidated */ - list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { - - /* Any error here is fatal */ - ret = gen8_send_hw_fence_hfi_wait_ack(adreno_dev, entry, - HW_FENCE_FLAG_SKIP_MEMSTORE); - if (ret) - break; - - adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); - } - - return ret; -} - static void trigger_context_unregister_fault(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { diff --git a/adreno_gen8_hwsched_hfi.h b/adreno_gen8_hwsched_hfi.h index 9abca4a052..3930a6ff40 100644 --- a/adreno_gen8_hwsched_hfi.h +++ b/adreno_gen8_hwsched_hfi.h @@ -284,18 +284,6 @@ int gen8_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, struct kgsl_sync_fence *kfence); -/** - * gen8_hwsched_drain_context_hw_fences - Drain context's hardware fences via GMU - * @adreno_dev: Pointer to adreno device - * @drawctxt: Pointer to the adreno context which is to be flushed - * - * Trigger hardware fences that were never dispatched to GMU - * - * Return: Zero on success or negative error on failure - */ -int gen8_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev, - struct adreno_context *drawctxt); - /** * gen8_hwsched_check_context_inflight_hw_fences - Check whether all hardware fences * from a context have been sent to the TxQueue or not diff --git a/kgsl_device.h b/kgsl_device.h index 56c547ad21..d458649aac 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -364,6 +364,8 @@ struct kgsl_device { * @KGSL_CONTEXT_PRIV_PAGEFAULT - The context has caused a page fault. * @KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC - this value and higher values are * reserved for devices specific use. + * @KGSL_CONTEXT_PRIV_INVALID_DRAIN_HW_FENCE - this context got invalidated + * and needs its hardware fences drained after device reset */ enum kgsl_context_priv { KGSL_CONTEXT_PRIV_SUBMITTED = 0, @@ -371,6 +373,7 @@ enum kgsl_context_priv { KGSL_CONTEXT_PRIV_INVALID, KGSL_CONTEXT_PRIV_PAGEFAULT, KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC = 16, + KGSL_CONTEXT_PRIV_INVALID_DRAIN_HW_FENCE, }; struct kgsl_process_private; From 667189a1c75aa7a1f00b62c774de6f060e6fa46e Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 23 May 2024 12:56:40 -0600 Subject: [PATCH 0813/1016] kgsl: hwfence: Take context refcount upon hardware fence creation This ensures that the adreno context doesn't get destroyed until all its hardware fences have been destroyed. The context destroy must happen in a deferred manner since we cannot call kgsl_context_destroy() from atomic context. Change-Id: I51e8816c31e70d56194101fbe9ca053dbec865bb Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched_hfi.c | 10 ++++++++-- adreno_gen8_hwsched_hfi.c | 10 ++++++++-- adreno_hwsched.c | 1 + kgsl.c | 18 ++++++++++++++++++ kgsl.h | 6 ++++++ kgsl_device.h | 15 +++++++++++++++ 6 files changed, 56 insertions(+), 4 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 7f362120cc..8024ca8f53 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -3373,15 +3373,21 @@ static struct adreno_hw_fence_entry *allocate_hw_fence_entry(struct adreno_devic if (!DRAWCTXT_SLOT_AVAILABLE(drawctxt->hw_fence_count)) return NULL; - entry = kmem_cache_zalloc(hwsched->hw_fence_cache, GFP_ATOMIC); - if (!entry) + if (_kgsl_context_get(&drawctxt->base)) return NULL; + entry = kmem_cache_zalloc(hwsched->hw_fence_cache, GFP_ATOMIC); + if (!entry) { + kgsl_context_put_deferred(&drawctxt->base); + return NULL; + } + entry->kfence = kfence; entry->drawctxt = drawctxt; if (setup_hw_fence_info_cmd(adreno_dev, entry)) { kmem_cache_free(hwsched->hw_fence_cache, entry); + kgsl_context_put_deferred(&drawctxt->base); return NULL; } diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 0943195fa4..1e641ea2e2 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -3178,15 +3178,21 @@ static struct adreno_hw_fence_entry *allocate_hw_fence_entry(struct adreno_devic if (!DRAWCTXT_SLOT_AVAILABLE(drawctxt->hw_fence_count)) return NULL; - entry = kmem_cache_zalloc(hwsched->hw_fence_cache, GFP_ATOMIC); - if (!entry) + if (_kgsl_context_get(&drawctxt->base)) return NULL; + entry = kmem_cache_zalloc(hwsched->hw_fence_cache, GFP_ATOMIC); + if (!entry) { + kgsl_context_put_deferred(&drawctxt->base); + return NULL; + } + entry->kfence = kfence; entry->drawctxt = drawctxt; if (setup_hw_fence_info_cmd(adreno_dev, entry)) { kmem_cache_free(hwsched->hw_fence_cache, entry); + kgsl_context_put_deferred(&drawctxt->base); return NULL; } diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 6b656e4233..f5ffca5def 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -2552,4 +2552,5 @@ void adreno_hwsched_remove_hw_fence_entry(struct adreno_device *adreno_dev, dma_fence_put(&entry->kfence->fence); list_del_init(&entry->node); kmem_cache_free(hwsched->hw_fence_cache, entry); + kgsl_context_put_deferred(&drawctxt->base); } diff --git a/kgsl.c b/kgsl.c index 634568230b..da8ae3ec85 100644 --- a/kgsl.c +++ b/kgsl.c @@ -441,6 +441,24 @@ void kgsl_mem_entry_destroy_deferred(struct kref *kref) queue_work(kgsl_driver.lockless_workqueue, &entry->work); } +/* Scheduled by kgsl_context_destroy_deferred() */ +static void _deferred_context_destroy(struct work_struct *work) +{ + struct kgsl_context *context = + container_of(work, struct kgsl_context, deferred_destroy_ws); + + kgsl_context_destroy(&context->refcount); +} + +void kgsl_context_destroy_deferred(struct kref *kref) +{ + struct kgsl_context *context = + container_of(kref, struct kgsl_context, refcount); + + INIT_WORK(&context->deferred_destroy_ws, _deferred_context_destroy); + queue_work(kgsl_driver.lockless_workqueue, &context->deferred_destroy_ws); +} + /* Commit the entry to the process so it can be accessed by other operations */ static void kgsl_mem_entry_commit_process(struct kgsl_mem_entry *entry) { diff --git a/kgsl.h b/kgsl.h index be8fe948ab..a275225203 100644 --- a/kgsl.h +++ b/kgsl.h @@ -707,4 +707,10 @@ static inline bool kgsl_addr_range_overlap(uint64_t gpuaddr1, */ void kgsl_work_period_update(struct kgsl_device *device, struct gpu_work_period *period, u64 active); + +/** + * kgsl_context_destroy_deferred() - Destroy context in a deferred manner + * @kref: Pointer to context refcount + */ +void kgsl_context_destroy_deferred(struct kref *kref); #endif /* __KGSL_H */ diff --git a/kgsl_device.h b/kgsl_device.h index d458649aac..b7841e8466 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -454,6 +454,8 @@ struct kgsl_context { struct list_head faults; /** @fault_lock: Mutex to protect faults */ struct mutex fault_lock; + /** @deferred_destroy_ws: Work struct used to destroy context in a deferred manner */ + struct work_struct deferred_destroy_ws; }; #define _context_comm(_c) \ @@ -821,6 +823,19 @@ kgsl_context_put(struct kgsl_context *context) kref_put(&context->refcount, kgsl_context_destroy); } +/* + * kgsl_context_put_deferred() - Puts refcount and triggers deferred + * context destroy when refcount is the last refcount. + * @context: context to put + * + * Use this to put a context from within atomic context + */ +static inline void kgsl_context_put_deferred(struct kgsl_context *context) +{ + if (context) + kref_put(&context->refcount, kgsl_context_destroy_deferred); +} + /** * kgsl_context_detached() - check if a context is detached * @context: the context From 25489fcf43637e1ceda38f73b5e79327a9a47bcc Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 6 May 2024 23:58:47 -0700 Subject: [PATCH 0814/1016] kgsl: hwsched: Implement generic ringbuffer snapshot Hwsched ringbuffer information is extracted from the F2H_MSG_CONTEXT_BAD packet. The extracted information is the same for different target variants. Hence implement generic functions to parse F2H_MSG_CONTEXT_BAD packet and dump ringbuffer data into snapshot. Change-Id: Iac646b311cf57f537287ce59e2d468fa22be74cc Signed-off-by: Hareesh Gundu --- adreno.h | 2 + adreno_a6xx_hwsched.c | 193 +------------------------------------ adreno_a6xx_hwsched_hfi.c | 18 ++++ adreno_a6xx_hwsched_hfi.h | 12 ++- adreno_gen7.c | 2 + adreno_gen7_hwsched.c | 196 +------------------------------------- adreno_gen7_hwsched_hfi.c | 19 ++++ adreno_gen7_hwsched_hfi.h | 10 ++ adreno_gen8.c | 2 + adreno_gen8_hwsched.c | 164 +------------------------------ adreno_gen8_hwsched_hfi.c | 38 ++++---- adreno_gen8_hwsched_hfi.h | 22 ++--- adreno_hwsched.c | 172 +++++++++++++++++++++++++++++++++ adreno_hwsched.h | 11 ++- 14 files changed, 287 insertions(+), 574 deletions(-) diff --git a/adreno.h b/adreno.h index b150f26df3..b55c4c58cf 100644 --- a/adreno.h +++ b/adreno.h @@ -832,6 +832,8 @@ enum adreno_regs { ADRENO_REG_CP_ME_RAM_DATA, ADRENO_REG_CP_RB_BASE, ADRENO_REG_CP_RB_BASE_HI, + ADRENO_REG_CP_LPAC_RB_BASE, + ADRENO_REG_CP_LPAC_RB_BASE_HI, ADRENO_REG_CP_RB_RPTR_ADDR_LO, ADRENO_REG_CP_RB_RPTR_ADDR_HI, ADRENO_REG_CP_RB_RPTR, diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 69b8504411..fbe285e2cc 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -17,33 +17,6 @@ #include "kgsl_device.h" #include "kgsl_trace.h" -static size_t adreno_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; - u32 *data = (u32 *)(buf + sizeof(*header)); - struct kgsl_memdesc *rb = (struct kgsl_memdesc *)priv; - - if (remain < rb->size + sizeof(*header)) { - SNAPSHOT_ERR_NOMEM(device, "RB"); - return 0; - } - - header->start = 0; - header->end = rb->size >> 2; - header->rptr = 0; - header->rbsize = rb->size >> 2; - header->count = rb->size >> 2; - header->timestamp_queued = 0; - header->timestamp_retired = 0; - header->gpuaddr = rb->gpuaddr; - header->id = 0; - - memcpy(data, rb->hostptr, rb->size); - - return rb->size + sizeof(*header); -} - static void a6xx_hwsched_snapshot_preemption_record(struct kgsl_device *device, struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset) { @@ -104,167 +77,6 @@ static void snapshot_preemption_records(struct kgsl_device *device, offset); } -static void *get_rb_hostptr(struct adreno_device *adreno_dev, - u64 gpuaddr, u32 size) -{ - struct a6xx_hwsched_hfi *hw_hfi = to_a6xx_hwsched_hfi(adreno_dev); - u64 offset; - u32 i; - - for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { - struct kgsl_memdesc *md = hw_hfi->mem_alloc_table[i].md; - - if (md && (gpuaddr >= md->gpuaddr) && - ((gpuaddr + size) <= (md->gpuaddr + md->size))) { - offset = gpuaddr - md->gpuaddr; - return md->hostptr + offset; - } - } - return NULL; -} - -static u32 a6xx_copy_gpu_global(void *out, void *in, u32 size) -{ - if (out && in) { - memcpy(out, in, size); - return size; - } - - return 0; -} - - -static void adreno_hwsched_snapshot_rb_payload(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot, struct payload_section *payload) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct kgsl_snapshot_section_header *section_header = - (struct kgsl_snapshot_section_header *)snapshot->ptr; - u8 *buf = snapshot->ptr + sizeof(*section_header); - struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; - u32 *data = (u32 *)(buf + sizeof(*header)); - u32 size = adreno_hwsched_parse_payload(payload, KEY_RB_SIZEDWORDS) << 2; - u64 lo, hi, gpuaddr; - void *rb_hostptr; - char str[16]; - - lo = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_LO); - hi = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_HI); - gpuaddr = hi << 32 | lo; - - /* Sanity check to make sure there is enough for the header */ - if (snapshot->remain < sizeof(*section_header)) - goto err; - - rb_hostptr = get_rb_hostptr(adreno_dev, gpuaddr, size); - - /* If the gpuaddress and size don't match any allocation, then abort */ - if (((snapshot->remain - sizeof(*section_header)) < - (size + sizeof(*header))) || - !a6xx_copy_gpu_global(data, rb_hostptr, size)) - goto err; - - if (device->dump_all_ibs) { - u64 rbaddr; - - kgsl_regread64(device, A6XX_CP_RB_BASE, - A6XX_CP_RB_BASE_HI, &rbaddr); - - /* Parse all IBs from current RB */ - if (rbaddr == gpuaddr) - adreno_snapshot_dump_all_ibs(device, rb_hostptr, snapshot); - } - - header->start = 0; - header->end = size >> 2; - header->rptr = adreno_hwsched_parse_payload(payload, KEY_RB_RPTR); - header->wptr = adreno_hwsched_parse_payload(payload, KEY_RB_WPTR); - header->rbsize = size >> 2; - header->count = size >> 2; - header->timestamp_queued = adreno_hwsched_parse_payload(payload, - KEY_RB_QUEUED_TS); - header->timestamp_retired = adreno_hwsched_parse_payload(payload, - KEY_RB_RETIRED_TS); - header->gpuaddr = gpuaddr; - header->id = adreno_hwsched_parse_payload(payload, KEY_RB_ID); - - section_header->magic = SNAPSHOT_SECTION_MAGIC; - section_header->id = KGSL_SNAPSHOT_SECTION_RB_V2; - section_header->size = size + sizeof(*header) + sizeof(*section_header); - - snapshot->ptr += section_header->size; - snapshot->remain -= section_header->size; - snapshot->size += section_header->size; - - return; -err: - snprintf(str, sizeof(str), "RB addr:0x%llx", gpuaddr); - SNAPSHOT_ERR_NOMEM(device, str); -} - -static bool parse_payload_rb_legacy(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot) -{ - struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; - u32 i = 0, payload_bytes; - void *start; - bool ret = false; - - /* Skip if we didn't receive a context bad HFI */ - if (!cmd->hdr) - return false; - - payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - - offsetof(struct hfi_context_bad_cmd_legacy, payload); - - start = &cmd->payload[0]; - - while (i < payload_bytes) { - struct payload_section *payload = start + i; - - if (payload->type == PAYLOAD_RB) { - adreno_hwsched_snapshot_rb_payload(adreno_dev, snapshot, payload); - ret = true; - } - - i += sizeof(*payload) + (payload->dwords << 2); - } - - return ret; -} - -static bool parse_payload_rb(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot) -{ - struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; - u32 i = 0, payload_bytes; - void *start; - bool ret = false; - - /* Skip if we didn't receive a context bad HFI */ - if (!cmd->hdr) - return false; - - payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - - offsetof(struct hfi_context_bad_cmd, payload); - - start = &cmd->payload[0]; - - while (i < payload_bytes) { - struct payload_section *payload = start + i; - - if (payload->type == PAYLOAD_RB) { - adreno_hwsched_snapshot_rb_payload(adreno_dev, - snapshot, payload); - ret = true; - } - - i += sizeof(*payload) + (payload->dwords << 2); - } - - return ret; -} - void a6xx_hwsched_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -286,9 +98,9 @@ void a6xx_hwsched_snapshot(struct adreno_device *adreno_dev, * based on MEMKIND_RB */ if (GMU_VER_MINOR(gmu->ver.hfi) < 2) - parse_payload = parse_payload_rb_legacy(adreno_dev, snapshot); + parse_payload = adreno_hwsched_parse_payload_rb_legacy(adreno_dev, snapshot); else - parse_payload = parse_payload_rb(adreno_dev, snapshot); + parse_payload = adreno_hwsched_parse_payload_rb(adreno_dev, snapshot); if (parse_payload) skip_memkind_rb = true; @@ -1282,6 +1094,7 @@ const struct adreno_power_ops a6xx_hwsched_power_ops = { const struct adreno_hwsched_ops a6xx_hwsched_ops = { .submit_drawobj = a6xx_hwsched_submit_drawobj, .preempt_count = a6xx_hwsched_preempt_count_get, + .get_rb_hostptr = a6xx_hwsched_get_rb_hostptr, }; int a6xx_hwsched_probe(struct platform_device *pdev, diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 7bb3945ecc..cdc386a8ed 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -2159,3 +2159,21 @@ done: return rc ? 0 : pending_ack.results[2]; } + +void *a6xx_hwsched_get_rb_hostptr(struct adreno_device *adreno_dev, + u64 gpuaddr, u32 size) +{ + struct a6xx_hwsched_hfi *hw_hfi = to_a6xx_hwsched_hfi(adreno_dev); + u64 offset; + u32 i; + + for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { + struct kgsl_memdesc *md = hw_hfi->mem_alloc_table[i].md; + + if (kgsl_gpuaddr_in_memdesc(md, gpuaddr, size)) { + offset = gpuaddr - md->gpuaddr; + return md->hostptr + offset; + } + } + return NULL; +} diff --git a/adreno_a6xx_hwsched_hfi.h b/adreno_a6xx_hwsched_hfi.h index 8e508723fc..27ec2d2292 100644 --- a/adreno_a6xx_hwsched_hfi.h +++ b/adreno_a6xx_hwsched_hfi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ADRENO_A6XX_HWSCHED_HFI_H_ @@ -149,4 +149,14 @@ struct a6xx_hwsched_hfi *to_a6xx_hwsched_hfi(struct adreno_device *adreno_dev); */ u32 a6xx_hwsched_preempt_count_get(struct adreno_device *adreno_dev); +/** + * a6xx_hwsched_get_rb_hostptr - Get rinbuffer host pointer + * @adreno_dev: pointer to the adreno device + * @gpuaddr: ringbuffer gpu address + * @size: size of the ringbuffer + * + * Return: Host pointer of the gpu ringbuffer + */ +void *a6xx_hwsched_get_rb_hostptr(struct adreno_device *adreno_dev, + u64 gpuaddr, u32 size); #endif diff --git a/adreno_gen7.c b/adreno_gen7.c index 27b01d2fb4..d0108ee562 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1705,6 +1705,8 @@ int gen7_probe_common(struct platform_device *pdev, static unsigned int gen7_register_offsets[ADRENO_REG_REGISTER_MAX] = { ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, GEN7_CP_RB_BASE), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, GEN7_CP_RB_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_LPAC_RB_BASE, GEN7_CP_LPAC_RB_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_LPAC_RB_BASE_HI, GEN7_CP_LPAC_RB_BASE_HI), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, GEN7_CP_RB_RPTR), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, GEN7_CP_RB_WPTR), ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, GEN7_CP_SQE_CNTL), diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 156c85b841..6221ea3413 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -61,33 +61,6 @@ void gen7_hwsched_fault(struct adreno_device *adreno_dev, u32 fault) adreno_hwsched_fault(adreno_dev, fault); } -static size_t adreno_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; - u32 *data = (u32 *)(buf + sizeof(*header)); - struct kgsl_memdesc *rb = (struct kgsl_memdesc *)priv; - - if (remain < rb->size + sizeof(*header)) { - SNAPSHOT_ERR_NOMEM(device, "RB"); - return 0; - } - - header->start = 0; - header->end = rb->size >> 2; - header->rptr = 0; - header->rbsize = rb->size >> 2; - header->count = rb->size >> 2; - header->timestamp_queued = 0; - header->timestamp_retired = 0; - header->gpuaddr = rb->gpuaddr; - header->id = 0; - - memcpy(data, rb->hostptr, rb->size); - - return rb->size + sizeof(*header); -} - static void gen7_hwsched_snapshot_preemption_record(struct kgsl_device *device, struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset) { @@ -147,170 +120,6 @@ static void snapshot_preemption_records(struct kgsl_device *device, offset); } -static void *get_rb_hostptr(struct adreno_device *adreno_dev, - u64 gpuaddr, u32 size) -{ - struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); - u64 offset; - u32 i; - - for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { - struct kgsl_memdesc *md = hw_hfi->mem_alloc_table[i].md; - - if (md && (gpuaddr >= md->gpuaddr) && - ((gpuaddr + size) <= (md->gpuaddr + md->size))) { - offset = gpuaddr - md->gpuaddr; - return md->hostptr + offset; - } - } - - return NULL; -} - -static u32 gen7_copy_gpu_global(void *out, void *in, u32 size) -{ - if (out && in) { - memcpy(out, in, size); - return size; - } - - return 0; -} - -static void adreno_hwsched_snapshot_rb_payload(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot, struct payload_section *payload) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct kgsl_snapshot_section_header *section_header = - (struct kgsl_snapshot_section_header *)snapshot->ptr; - u8 *buf = snapshot->ptr + sizeof(*section_header); - struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; - u32 *data = (u32 *)(buf + sizeof(*header)); - u32 size = adreno_hwsched_parse_payload(payload, KEY_RB_SIZEDWORDS) << 2; - u64 lo, hi, gpuaddr; - void *rb_hostptr; - char str[16]; - - lo = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_LO); - hi = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_HI); - gpuaddr = hi << 32 | lo; - - /* Sanity check to make sure there is enough for the header */ - if (snapshot->remain < sizeof(*section_header)) - goto err; - - rb_hostptr = get_rb_hostptr(adreno_dev, gpuaddr, size); - - /* If the gpuaddress and size don't match any allocation, then abort */ - if (((snapshot->remain - sizeof(*section_header)) < - (size + sizeof(*header))) || - !gen7_copy_gpu_global(data, rb_hostptr, size)) - goto err; - - if (device->dump_all_ibs) { - u64 rbaddr, lpac_rbaddr; - - kgsl_regread64(device, GEN7_CP_RB_BASE, - GEN7_CP_RB_BASE_HI, &rbaddr); - kgsl_regread64(device, GEN7_CP_LPAC_RB_BASE, - GEN7_CP_LPAC_RB_BASE_HI, &lpac_rbaddr); - - /* Parse all IBs from current RB */ - if ((rbaddr == gpuaddr) || (lpac_rbaddr == gpuaddr)) - adreno_snapshot_dump_all_ibs(device, rb_hostptr, snapshot); - } - - header->start = 0; - header->end = size >> 2; - header->rptr = adreno_hwsched_parse_payload(payload, KEY_RB_RPTR); - header->wptr = adreno_hwsched_parse_payload(payload, KEY_RB_WPTR); - header->rbsize = size >> 2; - header->count = size >> 2; - header->timestamp_queued = adreno_hwsched_parse_payload(payload, - KEY_RB_QUEUED_TS); - header->timestamp_retired = adreno_hwsched_parse_payload(payload, - KEY_RB_RETIRED_TS); - header->gpuaddr = gpuaddr; - header->id = adreno_hwsched_parse_payload(payload, KEY_RB_ID); - - section_header->magic = SNAPSHOT_SECTION_MAGIC; - section_header->id = KGSL_SNAPSHOT_SECTION_RB_V2; - section_header->size = size + sizeof(*header) + sizeof(*section_header); - - snapshot->ptr += section_header->size; - snapshot->remain -= section_header->size; - snapshot->size += section_header->size; - - return; -err: - snprintf(str, sizeof(str), "RB addr:0x%llx", gpuaddr); - SNAPSHOT_ERR_NOMEM(device, str); -} - -static bool parse_payload_rb_legacy(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot) -{ - struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; - u32 i = 0, payload_bytes; - void *start; - bool ret = false; - - /* Skip if we didn't receive a context bad HFI */ - if (!cmd->hdr) - return false; - - payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - - offsetof(struct hfi_context_bad_cmd_legacy, payload); - - start = &cmd->payload[0]; - - while (i < payload_bytes) { - struct payload_section *payload = start + i; - - if (payload->type == PAYLOAD_RB) { - adreno_hwsched_snapshot_rb_payload(adreno_dev, - snapshot, payload); - ret = true; - } - - i += sizeof(*payload) + (payload->dwords << 2); - } - - return ret; -} - -static bool parse_payload_rb(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot) -{ - struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; - u32 i = 0, payload_bytes; - void *start; - bool ret = false; - - /* Skip if we didn't receive a context bad HFI */ - if (!cmd->hdr) - return false; - - payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - - offsetof(struct hfi_context_bad_cmd, payload); - - start = &cmd->payload[0]; - - while (i < payload_bytes) { - struct payload_section *payload = start + i; - - if (payload->type == PAYLOAD_RB) { - adreno_hwsched_snapshot_rb_payload(adreno_dev, - snapshot, payload); - ret = true; - } - - i += sizeof(*payload) + (payload->dwords << 2); - } - - return ret; -} - static int snapshot_context_queue(int id, void *ptr, void *data) { struct kgsl_snapshot *snapshot = data; @@ -381,9 +190,9 @@ void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, * based on MEMKIND_RB */ if (GMU_VER_MINOR(gmu->ver.hfi) < 2) - parse_payload = parse_payload_rb_legacy(adreno_dev, snapshot); + parse_payload = adreno_hwsched_parse_payload_rb_legacy(adreno_dev, snapshot); else - parse_payload = parse_payload_rb(adreno_dev, snapshot); + parse_payload = adreno_hwsched_parse_payload_rb(adreno_dev, snapshot); if (parse_payload) skip_memkind_rb = true; @@ -1861,6 +1670,7 @@ const struct adreno_hwsched_ops gen7_hwsched_ops = { .submit_drawobj = gen7_hwsched_submit_drawobj, .preempt_count = gen7_hwsched_preempt_count_get, .create_hw_fence = gen7_hwsched_create_hw_fence, + .get_rb_hostptr = gen7_hwsched_get_rb_hostptr, }; int gen7_hwsched_probe(struct platform_device *pdev, diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 8024ca8f53..47cdf27550 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -4208,3 +4208,22 @@ done: return ret; } + +void *gen7_hwsched_get_rb_hostptr(struct adreno_device *adreno_dev, + u64 gpuaddr, u32 size) +{ + struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); + u64 offset; + u32 i; + + for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { + struct kgsl_memdesc *md = hw_hfi->mem_alloc_table[i].md; + + if (kgsl_gpuaddr_in_memdesc(md, gpuaddr, size)) { + offset = gpuaddr - md->gpuaddr; + return md->hostptr + offset; + } + } + + return NULL; +} diff --git a/adreno_gen7_hwsched_hfi.h b/adreno_gen7_hwsched_hfi.h index a42e7663e9..2b05d2e94f 100644 --- a/adreno_gen7_hwsched_hfi.h +++ b/adreno_gen7_hwsched_hfi.h @@ -329,4 +329,14 @@ void gen7_hwsched_process_msgq(struct adreno_device *adreno_dev); */ int gen7_hwsched_boot_gpu(struct adreno_device *adreno_dev); +/** + * gen7_hwsched_get_rb_hostptr - Get rinbuffer host pointer + * @adreno_dev: pointer to the adreno device + * @gpuaddr: ringbuffer gpu address + * @size: size of the ringbuffer + * + * Return: Host pointer of the gpu ringbuffer + */ +void *gen7_hwsched_get_rb_hostptr(struct adreno_device *adreno_dev, + u64 gpuaddr, u32 size); #endif diff --git a/adreno_gen8.c b/adreno_gen8.c index 695ffe3e69..c949b539c5 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -2401,6 +2401,8 @@ int gen8_probe_common(struct platform_device *pdev, static u32 gen8_register_offsets[ADRENO_REG_REGISTER_MAX] = { ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, GEN8_CP_RB_BASE_LO_GC), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, GEN8_CP_RB_BASE_HI_GC), + ADRENO_REG_DEFINE(ADRENO_REG_CP_LPAC_RB_BASE, GEN8_CP_RB_BASE_LO_LPAC), + ADRENO_REG_DEFINE(ADRENO_REG_CP_LPAC_RB_BASE_HI, GEN8_CP_RB_BASE_HI_LPAC), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, GEN8_CP_RB_RPTR_BR), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, GEN8_CP_RB_WPTR_GC), ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, GEN8_CP_SQE_CNTL), diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 4694de955c..4ff3deb756 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -62,33 +62,6 @@ void gen8_hwsched_fault(struct adreno_device *adreno_dev, u32 fault) adreno_hwsched_fault(adreno_dev, fault); } -static size_t gen8_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; - u32 *data = (u32 *)(buf + sizeof(*header)); - struct kgsl_memdesc *rb = (struct kgsl_memdesc *)priv; - - if (remain < rb->size + sizeof(*header)) { - SNAPSHOT_ERR_NOMEM(device, "RB"); - return 0; - } - - header->start = 0; - header->end = rb->size >> 2; - header->rptr = 0; - header->rbsize = rb->size >> 2; - header->count = rb->size >> 2; - header->timestamp_queued = 0; - header->timestamp_retired = 0; - header->gpuaddr = rb->gpuaddr; - header->id = 0; - - memcpy(data, rb->hostptr, rb->size); - - return rb->size + sizeof(*header); -} - static void gen8_hwsched_snapshot_preemption_record(struct kgsl_device *device, struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset) { @@ -140,138 +113,6 @@ static void snapshot_preemption_records(struct kgsl_device *device, offset); } -static void *get_rb_hostptr(struct adreno_device *adreno_dev, - u64 gpuaddr, u32 size) -{ - struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); - u64 offset; - u32 i; - - for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { - struct kgsl_memdesc *md = hw_hfi->mem_alloc_table[i].md; - - if (md && (gpuaddr >= md->gpuaddr) && - ((gpuaddr + size) <= (md->gpuaddr + md->size))) { - offset = gpuaddr - md->gpuaddr; - return md->hostptr + offset; - } - } - - return NULL; -} - -static u32 gen8_copy_gpu_global(void *out, void *in, u32 size) -{ - if (out && in) { - memcpy(out, in, size); - return size; - } - - return 0; -} - -static void adreno_hwsched_snapshot_rb_payload(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot, struct payload_section *payload) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct kgsl_snapshot_section_header *section_header = - (struct kgsl_snapshot_section_header *)snapshot->ptr; - u8 *buf = snapshot->ptr + sizeof(*section_header); - struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; - u32 *data = (u32 *)(buf + sizeof(*header)); - u32 size = gen8_hwsched_parse_payload(payload, KEY_RB_SIZEDWORDS) << 2; - u64 lo, hi, gpuaddr; - void *rb_hostptr; - char str[16]; - - lo = gen8_hwsched_parse_payload(payload, KEY_RB_GPUADDR_LO); - hi = gen8_hwsched_parse_payload(payload, KEY_RB_GPUADDR_HI); - gpuaddr = hi << 32 | lo; - - /* Sanity check to make sure there is enough for the header */ - if (snapshot->remain < sizeof(*section_header)) - goto err; - - rb_hostptr = get_rb_hostptr(adreno_dev, gpuaddr, size); - - /* If the gpuaddress and size don't match any allocation, then abort */ - if (((snapshot->remain - sizeof(*section_header)) < - (size + sizeof(*header))) || - !gen8_copy_gpu_global(data, rb_hostptr, size)) - goto err; - - if (device->dump_all_ibs) { - u64 rbaddr, lpac_rbaddr; - - kgsl_regread64(device, GEN8_CP_RB_BASE_LO_GC, - GEN8_CP_RB_BASE_HI_GC, &rbaddr); - kgsl_regread64(device, GEN8_CP_RB_BASE_LO_LPAC, - GEN8_CP_RB_BASE_HI_LPAC, &lpac_rbaddr); - - /* Parse all IBs from current RB */ - if ((rbaddr == gpuaddr) || (lpac_rbaddr == gpuaddr)) - adreno_snapshot_dump_all_ibs(device, rb_hostptr, snapshot); - } - - header->start = 0; - header->end = size >> 2; - header->rptr = gen8_hwsched_parse_payload(payload, KEY_RB_RPTR); - header->wptr = gen8_hwsched_parse_payload(payload, KEY_RB_WPTR); - header->rbsize = size >> 2; - header->count = size >> 2; - header->timestamp_queued = gen8_hwsched_parse_payload(payload, - KEY_RB_QUEUED_TS); - header->timestamp_retired = gen8_hwsched_parse_payload(payload, - KEY_RB_RETIRED_TS); - header->gpuaddr = gpuaddr; - header->id = gen8_hwsched_parse_payload(payload, KEY_RB_ID); - - section_header->magic = SNAPSHOT_SECTION_MAGIC; - section_header->id = KGSL_SNAPSHOT_SECTION_RB_V2; - section_header->size = size + sizeof(*header) + sizeof(*section_header); - - snapshot->ptr += section_header->size; - snapshot->remain -= section_header->size; - snapshot->size += section_header->size; - - return; -err: - snprintf(str, sizeof(str), "RB addr:0x%llx", gpuaddr); - SNAPSHOT_ERR_NOMEM(device, str); -} - -static bool parse_payload_rb(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot) -{ - struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; - u32 i = 0, payload_bytes; - void *start; - bool ret = false; - - /* Skip if we didn't receive a context bad HFI */ - if (!cmd->hdr) - return false; - - payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - - offsetof(struct hfi_context_bad_cmd, payload); - - start = &cmd->payload[0]; - - while (i < payload_bytes) { - struct payload_section *payload = start + i; - - if (payload->type == PAYLOAD_RB) { - adreno_hwsched_snapshot_rb_payload(adreno_dev, - snapshot, payload); - ret = true; - } - - i += sizeof(*payload) + (payload->dwords << 2); - } - - return ret; -} - static int snapshot_context_queue(int id, void *ptr, void *data) { struct kgsl_snapshot *snapshot = data; @@ -339,7 +180,7 @@ void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, * payloads are not present, fall back to dumping ringbuffers * based on MEMKIND_RB */ - parse_payload = parse_payload_rb(adreno_dev, snapshot); + parse_payload = adreno_hwsched_parse_payload_rb(adreno_dev, snapshot); if (parse_payload) skip_memkind_rb = true; @@ -350,7 +191,7 @@ void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, if (entry->desc.mem_kind == HFI_MEMKIND_RB && !skip_memkind_rb) kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB_V2, - snapshot, gen8_hwsched_snapshot_rb, + snapshot, adreno_hwsched_snapshot_rb, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_SCRATCH) @@ -1865,6 +1706,7 @@ const struct adreno_hwsched_ops gen8_hwsched_ops = { .submit_drawobj = gen8_hwsched_submit_drawobj, .preempt_count = gen8_hwsched_preempt_count_get, .create_hw_fence = gen8_hwsched_create_hw_fence, + .get_rb_hostptr = gen8_hwsched_get_rb_hostptr, }; int gen8_hwsched_probe(struct platform_device *pdev, diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 1e641ea2e2..701af8c8d2 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -191,19 +191,6 @@ static void log_profiling_info(struct adreno_device *adreno_dev, u32 *rcvd) kgsl_context_put(context); } -u32 gen8_hwsched_parse_payload(struct payload_section *payload, u32 key) -{ - u32 i; - - /* Each key-value pair is 2 dwords */ - for (i = 0; i < payload->dwords; i += 2) { - if (payload->data[i] == key) - return payload->data[i + 1]; - } - - return 0; -} - struct syncobj_flags { unsigned long mask; const char *name; @@ -311,7 +298,7 @@ static u32 gen8_hwsched_lookup_key_value(struct adreno_device *adreno_dev, struct payload_section *payload = start + i; if (payload->type == type) - return gen8_hwsched_parse_payload(payload, key); + return adreno_hwsched_parse_payload(payload, key); i += struct_size(payload, data, payload->dwords); } @@ -338,10 +325,10 @@ static u32 get_payload_rb_key(struct adreno_device *adreno_dev, struct payload_section *payload = start + i; if (payload->type == PAYLOAD_RB) { - u32 id = gen8_hwsched_parse_payload(payload, KEY_RB_ID); + u32 id = adreno_hwsched_parse_payload(payload, KEY_RB_ID); if (id == rb_id) - return gen8_hwsched_parse_payload(payload, key); + return adreno_hwsched_parse_payload(payload, key); } i += struct_size(payload, data, payload->dwords); @@ -3804,3 +3791,22 @@ done: return ret; } + +void *gen8_hwsched_get_rb_hostptr(struct adreno_device *adreno_dev, + u64 gpuaddr, u32 size) +{ + struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); + u64 offset; + u32 i; + + for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { + struct kgsl_memdesc *md = hw_hfi->mem_alloc_table[i].md; + + if (kgsl_gpuaddr_in_memdesc(md, gpuaddr, size)) { + offset = gpuaddr - md->gpuaddr; + return md->hostptr + offset; + } + } + + return NULL; +} diff --git a/adreno_gen8_hwsched_hfi.h b/adreno_gen8_hwsched_hfi.h index 3930a6ff40..9c37cc6340 100644 --- a/adreno_gen8_hwsched_hfi.h +++ b/adreno_gen8_hwsched_hfi.h @@ -209,18 +209,6 @@ struct gen8_hwsched_hfi *to_gen8_hwsched_hfi(struct adreno_device *adreno_dev); */ u32 gen8_hwsched_preempt_count_get(struct adreno_device *adreno_dev); -/** - * gen8_hwsched_parse_payload - Parse payload to look up a key - * @payload: Pointer to a payload section - * @key: The key who's value is to be looked up - * - * This function parses the payload data which is a sequence - * of key-value pairs. - * - * Return: The value of the key or 0 if key is not found - */ -u32 gen8_hwsched_parse_payload(struct payload_section *payload, u32 key); - /** * gen8_hwsched_lpac_cp_init - Send CP_INIT to LPAC via HFI * @adreno_dev: Pointer to adreno device structure @@ -328,4 +316,14 @@ void gen8_hwsched_process_msgq(struct adreno_device *adreno_dev); */ int gen8_hwsched_boot_gpu(struct adreno_device *adreno_dev); +/** + * gen8_hwsched_get_rb_hostptr - Get rinbuffer host pointer + * @adreno_dev: pointer to the adreno device + * @gpuaddr: ringbuffer gpu address + * @size: size of the ringbuffer + * + * Return: Host pointer of the gpu ringbuffer + */ +void *gen8_hwsched_get_rb_hostptr(struct adreno_device *adreno_dev, + u64 gpuaddr, u32 size); #endif diff --git a/adreno_hwsched.c b/adreno_hwsched.c index f5ffca5def..6701c26c18 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -2554,3 +2554,175 @@ void adreno_hwsched_remove_hw_fence_entry(struct adreno_device *adreno_dev, kmem_cache_free(hwsched->hw_fence_cache, entry); kgsl_context_put_deferred(&drawctxt->base); } + +size_t adreno_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + struct kgsl_memdesc *rb = (struct kgsl_memdesc *)priv; + + if (remain < rb->size + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "RB"); + return 0; + } + + header->start = 0; + header->end = rb->size >> 2; + header->rptr = 0; + header->rbsize = rb->size >> 2; + header->count = rb->size >> 2; + header->timestamp_queued = 0; + header->timestamp_retired = 0; + header->gpuaddr = rb->gpuaddr; + header->id = 0; + + memcpy(data, rb->hostptr, rb->size); + + return rb->size + sizeof(*header); +} + +static u32 copy_gpu_global(void *out, void *in, u32 size) +{ + if (out && in) { + memcpy(out, in, size); + return size; + } + + return 0; +} + +static void adreno_hwsched_snapshot_rb_payload(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot, struct payload_section *payload) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_snapshot_section_header *section_header = + (struct kgsl_snapshot_section_header *)snapshot->ptr; + u8 *buf = snapshot->ptr + sizeof(*section_header); + struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + u32 size = adreno_hwsched_parse_payload(payload, KEY_RB_SIZEDWORDS) << 2; + const struct adreno_hwsched_ops *hwsched_ops = adreno_dev->hwsched.hwsched_ops; + u64 lo, hi, gpuaddr; + void *rb_hostptr; + char str[16]; + + lo = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_LO); + hi = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_HI); + gpuaddr = hi << 32 | lo; + + /* Sanity check to make sure there is enough for the header */ + if (snapshot->remain < sizeof(*section_header)) + goto err; + + if (hwsched_ops->get_rb_hostptr) + rb_hostptr = hwsched_ops->get_rb_hostptr(adreno_dev, gpuaddr, size); + + /* If the gpuaddress and size don't match any allocation, then abort */ + if (((snapshot->remain - sizeof(*section_header)) < (size + sizeof(*header))) || + !copy_gpu_global(data, rb_hostptr, size)) + goto err; + + if (device->dump_all_ibs) { + u64 rbaddr, lpac_rbaddr; + + adreno_readreg64(adreno_dev, ADRENO_REG_CP_RB_BASE, + ADRENO_REG_CP_RB_BASE_HI, &rbaddr); + + adreno_readreg64(adreno_dev, ADRENO_REG_CP_LPAC_RB_BASE, + ADRENO_REG_CP_LPAC_RB_BASE_HI, &lpac_rbaddr); + + /* Parse all IBs from current RB */ + if ((rbaddr == gpuaddr) || (lpac_rbaddr == gpuaddr)) + adreno_snapshot_dump_all_ibs(device, rb_hostptr, snapshot); + } + + header->start = 0; + header->end = size >> 2; + header->rptr = adreno_hwsched_parse_payload(payload, KEY_RB_RPTR); + header->wptr = adreno_hwsched_parse_payload(payload, KEY_RB_WPTR); + header->rbsize = size >> 2; + header->count = size >> 2; + header->timestamp_queued = adreno_hwsched_parse_payload(payload, + KEY_RB_QUEUED_TS); + header->timestamp_retired = adreno_hwsched_parse_payload(payload, + KEY_RB_RETIRED_TS); + header->gpuaddr = gpuaddr; + header->id = adreno_hwsched_parse_payload(payload, KEY_RB_ID); + + section_header->magic = SNAPSHOT_SECTION_MAGIC; + section_header->id = KGSL_SNAPSHOT_SECTION_RB_V2; + section_header->size = size + sizeof(*header) + sizeof(*section_header); + + snapshot->ptr += section_header->size; + snapshot->remain -= section_header->size; + snapshot->size += section_header->size; + + return; +err: + snprintf(str, sizeof(str), "RB addr:0x%llx", gpuaddr); + SNAPSHOT_ERR_NOMEM(device, str); +} + +bool adreno_hwsched_parse_payload_rb_legacy(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + bool ret = false; + + /* Skip if we didn't receive a context bad HFI */ + if (!cmd->hdr) + return false; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd_legacy, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == PAYLOAD_RB) { + adreno_hwsched_snapshot_rb_payload(adreno_dev, snapshot, payload); + ret = true; + } + + i += sizeof(*payload) + (payload->dwords << 2); + } + + return ret; +} + +bool adreno_hwsched_parse_payload_rb(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + bool ret = false; + + /* Skip if we didn't receive a context bad HFI */ + if (!cmd->hdr) + return false; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == PAYLOAD_RB) { + adreno_hwsched_snapshot_rb_payload(adreno_dev, + snapshot, payload); + ret = true; + } + + i += sizeof(*payload) + (payload->dwords << 2); + } + + return ret; +} diff --git a/adreno_hwsched.h b/adreno_hwsched.h index 1288bc1f80..3ffbf4fcc3 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -52,7 +52,10 @@ struct adreno_hwsched_ops { */ void (*create_hw_fence)(struct adreno_device *adreno_dev, struct kgsl_sync_fence *kfence); - + /** + * @get_rb_hostptr - Target specific function to get ringbuffer host pointer + */ + void *(*get_rb_hostptr)(struct adreno_device *adreno_dev, u64 gpuaddr, u32 size); }; /** @@ -289,4 +292,10 @@ int adreno_hwsched_poll_msg_queue_write_index(struct kgsl_memdesc *hfi_mem); void adreno_hwsched_remove_hw_fence_entry(struct adreno_device *adreno_dev, struct adreno_hw_fence_entry *entry); +bool adreno_hwsched_parse_payload_rb_legacy(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); +bool adreno_hwsched_parse_payload_rb(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); +size_t adreno_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); #endif From a52560c32c4effed09bb02ed7d53f8e678020efd Mon Sep 17 00:00:00 2001 From: Bruce Levy Date: Tue, 18 Jun 2024 23:11:06 -0700 Subject: [PATCH 0815/1016] Revert "msm: kgsl: Pin a process in memory while creating a bind object" This reverts commit 176f02051816902621fa14512813305ac4d14b95. Signed-off-by: Bruce Levy Change-Id: I9246ae4373d5a3d5d28e92d661ff1e44a512fcd6 --- kgsl_vbo.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/kgsl_vbo.c b/kgsl_vbo.c index a8232dac5a..d27b7de4e0 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -12,7 +12,6 @@ #include "kgsl_device.h" #include "kgsl_mmu.h" -#include "kgsl_reclaim.h" #include "kgsl_sharedmem.h" #include "kgsl_trace.h" @@ -427,12 +426,6 @@ kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, op->nr_ops = ranges_nents; op->target = target; - /* Make sure process is pinned in memory before proceeding */ - atomic_inc(&private->cmd_count); - ret = kgsl_reclaim_to_pinned_state(private); - if (ret) - goto err; - for (i = 0; i < ranges_nents; i++) { struct kgsl_gpumem_bind_range range; struct kgsl_mem_entry *entry; @@ -533,14 +526,12 @@ kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, ranges += ranges_size; } - atomic_dec(&private->cmd_count); init_completion(&op->comp); kref_init(&op->ref); return op; err: - atomic_dec(&private->cmd_count); kgsl_sharedmem_free_bind_op(op); return ERR_PTR(ret); } From 33226ccd655f843df2e380c4c7a53237d5cdc35d Mon Sep 17 00:00:00 2001 From: Bruce Levy Date: Tue, 18 Jun 2024 23:11:09 -0700 Subject: [PATCH 0816/1016] Revert "msm: kgsl: Remove duplicate bind operation code" This reverts commit 0e31eb38c25e96adb65e1db8999f21f5c3df9019. Signed-off-by: Bruce Levy Change-Id: I532e37947209c99b88259ffe3675b4804da08c75 --- kgsl_vbo.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/kgsl_vbo.c b/kgsl_vbo.c index d27b7de4e0..bf72c139db 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -369,13 +369,10 @@ static void kgsl_sharedmem_free_bind_op(struct kgsl_sharedmem_bind_op *op) /* Decrement the vbo_count we added when creating the bind_op */ if (op->ops[i].entry) atomic_dec(&op->ops[i].entry->vbo_count); - - /* Release the reference on the child entry */ - kgsl_mem_entry_put_deferred(op->ops[i].entry); + kgsl_mem_entry_put(op->ops[i].entry); } - /* Release the reference on the target entry */ - kgsl_mem_entry_put_deferred(op->target); + kgsl_mem_entry_put(op->target); kvfree(op->ops); kfree(op); @@ -562,16 +559,23 @@ static void kgsl_sharedmem_bind_worker(struct work_struct *work) op->ops[i].start, op->ops[i].last, op->ops[i].entry); + + /* Release the reference on the child entry */ + kgsl_mem_entry_put(op->ops[i].entry); + op->ops[i].entry = NULL; } + /* Release the reference on the target entry */ + kgsl_mem_entry_put(op->target); + op->target = NULL; + /* Wake up any threads waiting for the bind operation */ complete_all(&op->comp); if (op->callback) op->callback(op); - /* Put the refcount we took when scheduling the worker */ - kgsl_sharedmem_put_bind_op(op); + kref_put(&op->ref, kgsl_sharedmem_bind_range_destroy); } void kgsl_sharedmem_bind_ranges(struct kgsl_sharedmem_bind_op *op) From 9afc9d6a4807212f74e4d93fc50e55e55a9bc71d Mon Sep 17 00:00:00 2001 From: Bruce Levy Date: Tue, 18 Jun 2024 23:11:12 -0700 Subject: [PATCH 0817/1016] Revert "kgsl: gen8: Rearrange some GMU register dumping in snapshot" This reverts commit 0e87816bfb87648f8f6125e3f2dab2688e6df197. Signed-off-by: Bruce Levy Change-Id: I8f78c7dcb7cf244fc560a9f55bc20bed75499e75 --- adreno_gen8_0_0_snapshot.h | 21 +++---- adreno_gen8_snapshot.c | 123 +++++++++++++++++-------------------- 2 files changed, 67 insertions(+), 77 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index 5b09838f4d..cb924cc162 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -1883,6 +1883,11 @@ static struct gen8_reg_list gen8_0_0_ahb_registers[] = { { UNSLICE, gen8_0_0_ahb_secure_gpu_registers }, }; +static struct gen8_reg_list gen8_gmu_gx_registers[] = { + { UNSLICE, gen8_0_0_gmugx_registers }, + { SLICE, gen8_0_0_gmugx_slice_registers }, +}; + /* * Block : ['GDPM_LKG'] * REGION : UNSLICE @@ -2057,16 +2062,10 @@ static const u32 *gen8_0_0_external_core_regs[] = { gen8_0_0_gpu_cc_ahb2phy_swman_registers, gen8_0_0_gpu_cc_gpu_cc_reg_registers, gen8_0_0_gpu_cc_pll0_cm_pll_taycan_common_registers, + gen8_0_0_acd_acd_mnd_registers, + gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers, + gen8_0_0_gx_clkctl_ahb2phy_swman_registers, + gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers, + gen8_0_0_gx_clkctl_gx_clkctl_reg_registers, }; - -static struct gen8_reg_list gen8_gmu_gx_registers[] = { - { UNSLICE, gen8_0_0_gmugx_registers }, - { UNSLICE, gen8_0_0_acd_acd_mnd_registers }, - { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers }, - { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_swman_registers }, - { UNSLICE, gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers }, - { UNSLICE, gen8_0_0_gx_clkctl_gx_clkctl_reg_registers }, - { SLICE, gen8_0_0_gmugx_slice_registers }, -}; - #endif /*_ADRENO_GEN8_0_0_SNAPSHOT_H */ diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index eda8485035..9915ffa878 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -219,13 +219,11 @@ size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, if (info->regs->sel) kgsl_regwrite(device, info->regs->sel->host_reg, info->regs->sel->val); - if (info->regs->slice_region) { - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL - (info->slice_id, 0, 0, 0)); + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL + (info->slice_id, 0, 0, 0)); - /* Make sure the previous writes are posted before reading */ - mb(); - } + /* Make sure the previous writes are posted before reading */ + mb(); for (ptr = info->regs->regs; ptr[0] != UINT_MAX; ptr += 2) { count = REG_COUNT(ptr); @@ -1331,60 +1329,6 @@ static size_t gen8_snapshot_cx_side_dbgc_debugbus_block(struct kgsl_device *devi return size; } -static void gen8_snapshot_cx_debugbus(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot) -{ - u32 i; - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, - FIELD_PREP(GENMASK(31, 28), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, - FIELD_PREP(GENMASK(27, 24), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, - FIELD_PREP(GENMASK(3, 0), 0x0) | - FIELD_PREP(GENMASK(7, 4), 0x1) | - FIELD_PREP(GENMASK(11, 8), 0x2) | - FIELD_PREP(GENMASK(15, 12), 0x3) | - FIELD_PREP(GENMASK(19, 16), 0x4) | - FIELD_PREP(GENMASK(23, 20), 0x5) | - FIELD_PREP(GENMASK(27, 24), 0x6) | - FIELD_PREP(GENMASK(31, 28), 0x7)); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, - FIELD_PREP(GENMASK(3, 0), 0x8) | - FIELD_PREP(GENMASK(7, 4), 0x9) | - FIELD_PREP(GENMASK(11, 8), 0xa) | - FIELD_PREP(GENMASK(15, 12), 0xb) | - FIELD_PREP(GENMASK(19, 16), 0xc) | - FIELD_PREP(GENMASK(23, 20), 0xd) | - FIELD_PREP(GENMASK(27, 24), 0xe) | - FIELD_PREP(GENMASK(31, 28), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); - - /* Dump the CX debugbus data if the block exists */ - if (kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) { - for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_DEBUGBUS, - snapshot, gen8_snapshot_cx_dbgc_debugbus_block, - (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, - snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, - (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); - } - } -} - /* gen8_snapshot_debugbus() - Capture debug bus data */ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) @@ -1427,6 +1371,41 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0); kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, + FIELD_PREP(GENMASK(27, 24), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, + FIELD_PREP(GENMASK(3, 0), 0x0) | + FIELD_PREP(GENMASK(7, 4), 0x1) | + FIELD_PREP(GENMASK(11, 8), 0x2) | + FIELD_PREP(GENMASK(15, 12), 0x3) | + FIELD_PREP(GENMASK(19, 16), 0x4) | + FIELD_PREP(GENMASK(23, 20), 0x5) | + FIELD_PREP(GENMASK(27, 24), 0x6) | + FIELD_PREP(GENMASK(31, 28), 0x7)); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, + FIELD_PREP(GENMASK(3, 0), 0x8) | + FIELD_PREP(GENMASK(7, 4), 0x9) | + FIELD_PREP(GENMASK(11, 8), 0xa) | + FIELD_PREP(GENMASK(15, 12), 0xb) | + FIELD_PREP(GENMASK(19, 16), 0xc) | + FIELD_PREP(GENMASK(23, 20), 0xd) | + FIELD_PREP(GENMASK(27, 24), 0xe) | + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); + for (i = 0; i < gen8_snapshot_block_list->debugbus_blocks_len; i++) { kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUGBUS, @@ -1448,6 +1427,20 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, snapshot, gen8_snapshot_dbgc_side_debugbus_block, (void *) &gen8_snapshot_block_list->gbif_debugbus_blocks[i]); } + + /* Dump the CX debugbus data if the block exists */ + if (kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) { + for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen8_snapshot_cx_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + } + } } /* gen8_snapshot_sqe() - Dump SQE data in snapshot */ @@ -1709,9 +1702,11 @@ void gen8_snapshot(struct adreno_device *adreno_dev, if (!gmu_core_isenabled(device)) gen8_snapshot_external_core_regs(device, snapshot); - gen8_cx_misc_regs_snapshot(device, snapshot); + gen8_snapshot_trace_buffer(device, snapshot); - gen8_snapshot_cx_debugbus(adreno_dev, snapshot); + gen8_snapshot_debugbus(adreno_dev, snapshot); + + gen8_cx_misc_regs_snapshot(device, snapshot); /* SQE Firmware */ kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, @@ -1724,10 +1719,6 @@ void gen8_snapshot(struct adreno_device *adreno_dev, if (!adreno_gx_is_on(adreno_dev)) return; - gen8_snapshot_trace_buffer(device, snapshot); - - gen8_snapshot_debugbus(adreno_dev, snapshot); - is_current_rt = rt_task(current); if (is_current_rt) From cc6408ebb4b40905cf101a8416a58528b48ef7c6 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Wed, 29 May 2024 15:38:54 -0700 Subject: [PATCH 0818/1016] kgsl: gen8: Rearrange some GMU register dumping in snapshot Ensure GX GDSC is ON to dump GXCLKCNTL registers and Debugbus in snapshot. Signed-off-by: Urvashi Agrawal Signed-off-by: Xhoendi Collaku Change-Id: I3778108f3f12fdba47beb3d8a94f4a583fab4736 --- adreno_gen8_0_0_snapshot.h | 21 +++--- adreno_gen8_gmu.c | 11 ++++ adreno_gen8_gmu.h | 1 + adreno_gen8_gmu_snapshot.c | 2 +- adreno_gen8_snapshot.c | 127 ++++++++++++++++++++----------------- 5 files changed, 92 insertions(+), 70 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index cb924cc162..5b09838f4d 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -1883,11 +1883,6 @@ static struct gen8_reg_list gen8_0_0_ahb_registers[] = { { UNSLICE, gen8_0_0_ahb_secure_gpu_registers }, }; -static struct gen8_reg_list gen8_gmu_gx_registers[] = { - { UNSLICE, gen8_0_0_gmugx_registers }, - { SLICE, gen8_0_0_gmugx_slice_registers }, -}; - /* * Block : ['GDPM_LKG'] * REGION : UNSLICE @@ -2062,10 +2057,16 @@ static const u32 *gen8_0_0_external_core_regs[] = { gen8_0_0_gpu_cc_ahb2phy_swman_registers, gen8_0_0_gpu_cc_gpu_cc_reg_registers, gen8_0_0_gpu_cc_pll0_cm_pll_taycan_common_registers, - gen8_0_0_acd_acd_mnd_registers, - gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers, - gen8_0_0_gx_clkctl_ahb2phy_swman_registers, - gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers, - gen8_0_0_gx_clkctl_gx_clkctl_reg_registers, }; + +static struct gen8_reg_list gen8_gmu_gx_registers[] = { + { UNSLICE, gen8_0_0_gmugx_registers }, + { UNSLICE, gen8_0_0_acd_acd_mnd_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_swman_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_gx_clkctl_reg_registers }, + { SLICE, gen8_0_0_gmugx_slice_registers }, +}; + #endif /*_ADRENO_GEN8_0_0_SNAPSHOT_H */ diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index de6fcc89b3..6a900072ad 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -684,6 +684,14 @@ bool gen8_gmu_gx_is_on(struct adreno_device *adreno_dev) return is_on(val); } +bool gen8_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device) +{ + u32 val; + + gmu_core_regread(device, GEN8_GMUCX_RPMH_POWER_STATE, &val); + return (val == GPU_HW_ACTIVE) ? true : false; +} + static const char *idle_level_name(int level) { if (level == GPU_HW_ACTIVE) @@ -855,6 +863,9 @@ void gen8_gmu_register_config(struct adreno_device *adreno_dev) /* Clear any previously set cm3 fault */ atomic_set(&gmu->cm3_fault, 0); + /* Init the power state register before GMU turns on GX */ + gmu_core_regwrite(device, GEN8_GMUCX_RPMH_POWER_STATE, 0xDEADD00D); + /* Vote veto for FAL10 */ gmu_core_regwrite(device, GEN8_GMUCX_CX_FALNEXT_INTF, 0x1); gmu_core_regwrite(device, GEN8_GMUCX_CX_FAL_INTF, 0x1); diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index 73f9a2d6c2..39bfcc993a 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -488,4 +488,5 @@ u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab); */ int gen8_gmu_clock_set_rate(struct adreno_device *adreno_dev, u32 req_freq); +bool gen8_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device); #endif diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index 4619c1f26d..179bb6a4fb 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -207,7 +207,7 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, adreno_snapshot_registers_v2, (void *) gen8_snapshot_block_list->gmu_cx_unsliced_regs); - if (!gen8_gmu_gx_is_on(adreno_dev)) + if (!gen8_gmu_rpmh_pwr_state_is_active(device)) goto dtcm; /* Set fence to ALLOW mode so registers can be read */ diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index cfb5ea63ea..e994f909da 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -219,11 +219,13 @@ size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, if (info->regs->sel) kgsl_regwrite(device, info->regs->sel->host_reg, info->regs->sel->val); - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL - (info->slice_id, 0, 0, 0)); + if (info->regs->slice_region) { + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL + (info->slice_id, 0, 0, 0)); - /* Make sure the previous writes are posted before reading */ - mb(); + /* Make sure the previous writes are posted before reading */ + mb(); + } for (ptr = info->regs->regs; ptr[0] != UINT_MAX; ptr += 2) { count = REG_COUNT(ptr); @@ -1324,6 +1326,60 @@ static size_t gen8_snapshot_cx_side_dbgc_debugbus_block(struct kgsl_device *devi return size; } +static void gen8_snapshot_cx_debugbus(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + u32 i; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, + FIELD_PREP(GENMASK(27, 24), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, + FIELD_PREP(GENMASK(3, 0), 0x0) | + FIELD_PREP(GENMASK(7, 4), 0x1) | + FIELD_PREP(GENMASK(11, 8), 0x2) | + FIELD_PREP(GENMASK(15, 12), 0x3) | + FIELD_PREP(GENMASK(19, 16), 0x4) | + FIELD_PREP(GENMASK(23, 20), 0x5) | + FIELD_PREP(GENMASK(27, 24), 0x6) | + FIELD_PREP(GENMASK(31, 28), 0x7)); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, + FIELD_PREP(GENMASK(3, 0), 0x8) | + FIELD_PREP(GENMASK(7, 4), 0x9) | + FIELD_PREP(GENMASK(11, 8), 0xa) | + FIELD_PREP(GENMASK(15, 12), 0xb) | + FIELD_PREP(GENMASK(19, 16), 0xc) | + FIELD_PREP(GENMASK(23, 20), 0xd) | + FIELD_PREP(GENMASK(27, 24), 0xe) | + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); + + /* Dump the CX debugbus data if the block exists */ + if (kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) { + for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen8_snapshot_cx_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + } + } +} + /* gen8_snapshot_debugbus() - Capture debug bus data */ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) @@ -1366,41 +1422,6 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0); kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, - FIELD_PREP(GENMASK(31, 28), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, - FIELD_PREP(GENMASK(27, 24), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, - FIELD_PREP(GENMASK(3, 0), 0x0) | - FIELD_PREP(GENMASK(7, 4), 0x1) | - FIELD_PREP(GENMASK(11, 8), 0x2) | - FIELD_PREP(GENMASK(15, 12), 0x3) | - FIELD_PREP(GENMASK(19, 16), 0x4) | - FIELD_PREP(GENMASK(23, 20), 0x5) | - FIELD_PREP(GENMASK(27, 24), 0x6) | - FIELD_PREP(GENMASK(31, 28), 0x7)); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, - FIELD_PREP(GENMASK(3, 0), 0x8) | - FIELD_PREP(GENMASK(7, 4), 0x9) | - FIELD_PREP(GENMASK(11, 8), 0xa) | - FIELD_PREP(GENMASK(15, 12), 0xb) | - FIELD_PREP(GENMASK(19, 16), 0xc) | - FIELD_PREP(GENMASK(23, 20), 0xd) | - FIELD_PREP(GENMASK(27, 24), 0xe) | - FIELD_PREP(GENMASK(31, 28), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); - for (i = 0; i < gen8_snapshot_block_list->debugbus_blocks_len; i++) { kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUGBUS, @@ -1422,20 +1443,6 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, snapshot, gen8_snapshot_dbgc_side_debugbus_block, (void *) &gen8_snapshot_block_list->gbif_debugbus_blocks[i]); } - - /* Dump the CX debugbus data if the block exists */ - if (kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) { - for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_DEBUGBUS, - snapshot, gen8_snapshot_cx_dbgc_debugbus_block, - (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, - snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, - (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); - } - } } /* gen8_snapshot_sqe() - Dump SQE data in snapshot */ @@ -1619,7 +1626,7 @@ static void gen8_cx_misc_regs_snapshot(struct kgsl_device *device, u64 *ptr, offset = 0; const u32 *regs_ptr = (const u32 *)gen8_snapshot_block_list->cx_misc_regs; - if (CD_SCRIPT_CHECK(device) || !adreno_gx_is_on(ADRENO_DEVICE(device))) + if (CD_SCRIPT_CHECK(device) || !gen8_gmu_rpmh_pwr_state_is_active(device)) goto legacy_snapshot; /* Build the crash script */ @@ -1697,12 +1704,10 @@ void gen8_snapshot(struct adreno_device *adreno_dev, if (!gmu_core_isenabled(device)) gen8_snapshot_external_core_regs(device, snapshot); - gen8_snapshot_trace_buffer(device, snapshot); - - gen8_snapshot_debugbus(adreno_dev, snapshot); - gen8_cx_misc_regs_snapshot(device, snapshot); + gen8_snapshot_cx_debugbus(adreno_dev, snapshot); + /* SQE Firmware */ kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, gen8_snapshot_sqe, NULL); @@ -1711,9 +1716,13 @@ void gen8_snapshot(struct adreno_device *adreno_dev, kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, gen8_snapshot_aqe, NULL); - if (!adreno_gx_is_on(adreno_dev)) + if (!gen8_gmu_rpmh_pwr_state_is_active(device)) return; + gen8_snapshot_trace_buffer(device, snapshot); + + gen8_snapshot_debugbus(adreno_dev, snapshot); + is_current_rt = rt_task(current); if (is_current_rt) From bffc8e58b6f595f33e82062a36bceb9bfe6dc836 Mon Sep 17 00:00:00 2001 From: Bruce Levy Date: Mon, 17 Jun 2024 18:23:39 -0700 Subject: [PATCH 0819/1016] kgsl: Add missing include of linux/of_device.h Resolve compilation errors by adding missing dependency on linux/of_device.h. Change-Id: I8b233e28dfdda3783ec4466a9b0a13519b712c53 Signed-off-by: Bruce Levy Signed-off-by: Carter Cooper --- kgsl_iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 2d102b6f2e..6c033c2cf8 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include From 3d2317ace56a6cb0049e8671a6c33ab0a83a9bc4 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 27 Jun 2024 06:09:23 +0530 Subject: [PATCH 0820/1016] kgsl: gen8: Update CLX parameters Update CLX parameters for gen8 targets based on latest recommendation. Change-Id: I78883b786bbe4436454ab49b2753269909861e25 Signed-off-by: Kamal Agrawal --- adreno_gen8_hfi.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/adreno_gen8_hfi.c b/adreno_gen8_hfi.c index 1f53eae337..1a79a9d1d3 100644 --- a/adreno_gen8_hfi.c +++ b/adreno_gen8_hfi.c @@ -582,17 +582,29 @@ int gen8_hfi_send_clx_feature_ctrl(struct adreno_device *adreno_dev) return ret; cmd.version = FIELD_PREP(GENMASK(31, 16), 0x2) | FIELD_PREP(GENMASK(15, 0), 0x1); - /* cmd.domain[0] is never used but needed per hfi spec */ + /* GFX domain */ + cmd.domain[0].data0 = FIELD_PREP(GENMASK(31, 29), 1) | + FIELD_PREP(GENMASK(28, 28), 1) | + FIELD_PREP(GENMASK(27, 22), 4) | + FIELD_PREP(GENMASK(21, 16), 55) | + FIELD_PREP(GENMASK(15, 0), 0); + cmd.domain[0].clxt = 0; + cmd.domain[0].clxh = 0; + cmd.domain[0].urgmode = 1; + cmd.domain[0].lkgen = 0; + cmd.domain[0].currbudget = 100; + + /* MxG domain */ cmd.domain[1].data0 = FIELD_PREP(GENMASK(31, 29), 1) | FIELD_PREP(GENMASK(28, 28), 1) | FIELD_PREP(GENMASK(27, 22), 1) | - FIELD_PREP(GENMASK(21, 16), 40) | + FIELD_PREP(GENMASK(21, 16), 55) | FIELD_PREP(GENMASK(15, 0), 0); cmd.domain[1].clxt = 0; cmd.domain[1].clxh = 0; cmd.domain[1].urgmode = 1; cmd.domain[1].lkgen = 0; - cmd.domain[1].currbudget = 50; + cmd.domain[1].currbudget = 100; return gen8_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); } From 4d74ae8854c49af29903b6b617c6f876b01e1018 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Sat, 13 Apr 2024 07:40:19 -0700 Subject: [PATCH 0821/1016] kgsl: snapshot: Implement generic function for AQE firmware dump Snapshot AQE firmware dumping is not dependent on hardware. Hence implement target independent generic function to dump AQE firmware in snapshot. Change-Id: Id59cf903022f1b4928766cea9756ad87887c1a98 Signed-off-by: Hareesh Gundu --- adreno_gen7_hwsched.c | 31 +------------------------------ adreno_gen8_hwsched.c | 31 +------------------------------ adreno_hwsched.c | 28 ++++++++++++++++++++++++++++ adreno_hwsched.h | 2 ++ 4 files changed, 32 insertions(+), 60 deletions(-) diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 6221ea3413..a07d543334 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -140,35 +140,6 @@ static int snapshot_context_queue(int id, void *ptr, void *data) return 0; } -/* Snapshot AQE buffer */ -static size_t snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct kgsl_memdesc *memdesc = priv; - - struct kgsl_snapshot_gpu_object_v2 *header = - (struct kgsl_snapshot_gpu_object_v2 *)buf; - - u8 *ptr = buf + sizeof(*header); - - if (IS_ERR_OR_NULL(memdesc) || memdesc->size == 0) - return 0; - - if (remain < (memdesc->size + sizeof(*header))) { - SNAPSHOT_ERR_NOMEM(device, "AQE BUFFER"); - return 0; - } - - header->size = memdesc->size >> 2; - header->gpuaddr = memdesc->gpuaddr; - header->ptbase = MMU_DEFAULT_TTBR0(device); - header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; - - memcpy(ptr, memdesc->hostptr, memdesc->size); - - return memdesc->size + sizeof(*header); -} - void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -237,7 +208,7 @@ void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, if (entry->desc.mem_kind == HFI_MEMKIND_AQE_BUFFER) kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, - snapshot, snapshot_aqe_buffer, + snapshot, adreno_hwsched_snapshot_aqe_buffer, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_HW_FENCE) { diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 4ff3deb756..a606ffed60 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -132,35 +132,6 @@ static int snapshot_context_queue(int id, void *ptr, void *data) return 0; } -/* Snapshot AQE buffer */ -static size_t snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct kgsl_memdesc *memdesc = priv; - - struct kgsl_snapshot_gpu_object_v2 *header = - (struct kgsl_snapshot_gpu_object_v2 *)buf; - - u8 *ptr = buf + sizeof(*header); - - if (IS_ERR_OR_NULL(memdesc) || memdesc->size == 0) - return 0; - - if (remain < (memdesc->size + sizeof(*header))) { - SNAPSHOT_ERR_NOMEM(device, "AQE BUFFER"); - return 0; - } - - header->size = memdesc->size >> 2; - header->gpuaddr = memdesc->gpuaddr; - header->ptbase = MMU_DEFAULT_TTBR0(device); - header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; - - memcpy(ptr, memdesc->hostptr, memdesc->size); - - return memdesc->size + sizeof(*header); -} - void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -225,7 +196,7 @@ void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, if (entry->desc.mem_kind == HFI_MEMKIND_AQE_BUFFER) kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, - snapshot, snapshot_aqe_buffer, + snapshot, adreno_hwsched_snapshot_aqe_buffer, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_HW_FENCE) { diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 6701c26c18..967b1b95bc 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -2726,3 +2726,31 @@ bool adreno_hwsched_parse_payload_rb(struct adreno_device *adreno_dev, return ret; } + +size_t adreno_hwsched_snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_memdesc *memdesc = priv; + + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)buf; + + u8 *ptr = buf + sizeof(*header); + + if (IS_ERR_OR_NULL(memdesc) || memdesc->size == 0) + return 0; + + if (remain < (memdesc->size + sizeof(*header))) { + SNAPSHOT_ERR_NOMEM(device, "AQE BUFFER"); + return 0; + } + + header->size = memdesc->size >> 2; + header->gpuaddr = memdesc->gpuaddr; + header->ptbase = MMU_DEFAULT_TTBR0(device); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + memcpy(ptr, memdesc->hostptr, memdesc->size); + + return memdesc->size + sizeof(*header); +} diff --git a/adreno_hwsched.h b/adreno_hwsched.h index 3ffbf4fcc3..dcaa0651c1 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -298,4 +298,6 @@ bool adreno_hwsched_parse_payload_rb(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot); size_t adreno_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf, size_t remain, void *priv); +size_t adreno_hwsched_snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); #endif From e1f751abfedf5304d8f651f8cdd69c4fc439d05e Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Sat, 13 Apr 2024 08:23:13 -0700 Subject: [PATCH 0822/1016] kgsl: snapshot: Remove target specific context queue dumping Context queues are generic software managed queues which are independent from hardware. Hence replace target specific functions with the generic function implementation. Change-Id: Iebdcb63f9348292a6c06e8e7f58d55c5da21c350 Signed-off-by: Hareesh Gundu --- adreno_gen7_hwsched.c | 28 ++-------------------------- adreno_gen8_hwsched.c | 23 +---------------------- adreno_hwsched.c | 29 +++++++++++++++++++++++++++++ adreno_hwsched.h | 2 ++ 4 files changed, 34 insertions(+), 48 deletions(-) diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index a07d543334..a3a405891e 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -120,26 +120,6 @@ static void snapshot_preemption_records(struct kgsl_device *device, offset); } -static int snapshot_context_queue(int id, void *ptr, void *data) -{ - struct kgsl_snapshot *snapshot = data; - struct kgsl_context *context = ptr; - struct adreno_context *drawctxt = ADRENO_CONTEXT(context); - struct gmu_mem_type_desc desc; - - if (!context->gmu_registered) - return 0; - - desc.memdesc = &drawctxt->gmu_context_queue; - desc.type = SNAPSHOT_GMU_MEM_CONTEXT_QUEUE; - - kgsl_snapshot_add_section(context->device, - KGSL_SNAPSHOT_SECTION_GMU_MEMORY, - snapshot, adreno_snapshot_gmu_mem, &desc); - - return 0; -} - void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -224,12 +204,8 @@ void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, } - if (!adreno_hwsched_context_queue_enabled(adreno_dev)) - return; - - read_lock(&device->context_lock); - idr_for_each(&device->context_idr, snapshot_context_queue, snapshot); - read_unlock(&device->context_lock); + if (adreno_hwsched_context_queue_enabled(adreno_dev)) + adreno_hwsched_snapshot_context_queue(adreno_dev, snapshot); } static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index a606ffed60..0958935cc3 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -113,25 +113,6 @@ static void snapshot_preemption_records(struct kgsl_device *device, offset); } -static int snapshot_context_queue(int id, void *ptr, void *data) -{ - struct kgsl_snapshot *snapshot = data; - struct kgsl_context *context = ptr; - struct adreno_context *drawctxt = ADRENO_CONTEXT(context); - struct gmu_mem_type_desc desc; - - if (!context->gmu_registered) - return 0; - - desc.memdesc = &drawctxt->gmu_context_queue; - desc.type = SNAPSHOT_GMU_MEM_CONTEXT_QUEUE; - kgsl_snapshot_add_section(context->device, - KGSL_SNAPSHOT_SECTION_GMU_MEMORY, - snapshot, adreno_snapshot_gmu_mem, &desc); - - return 0; -} - void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -211,9 +192,7 @@ void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, } - read_lock(&device->context_lock); - idr_for_each(&device->context_idr, snapshot_context_queue, snapshot); - read_unlock(&device->context_lock); + adreno_hwsched_snapshot_context_queue(adreno_dev, snapshot); } static void _get_hw_fence_entries(struct adreno_device *adreno_dev) diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 967b1b95bc..e0a1ddfec9 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -2754,3 +2754,32 @@ size_t adreno_hwsched_snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf, return memdesc->size + sizeof(*header); } + +static int snapshot_context_queue(int id, void *ptr, void *data) +{ + struct kgsl_snapshot *snapshot = data; + struct kgsl_context *context = ptr; + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + struct gmu_mem_type_desc desc; + + if (!context->gmu_registered) + return 0; + + desc.memdesc = &drawctxt->gmu_context_queue; + desc.type = SNAPSHOT_GMU_MEM_CONTEXT_QUEUE; + kgsl_snapshot_add_section(context->device, + KGSL_SNAPSHOT_SECTION_GMU_MEMORY, + snapshot, adreno_snapshot_gmu_mem, &desc); + + return 0; +} + +void adreno_hwsched_snapshot_context_queue(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + read_lock(&device->context_lock); + idr_for_each(&device->context_idr, snapshot_context_queue, snapshot); + read_unlock(&device->context_lock); +} diff --git a/adreno_hwsched.h b/adreno_hwsched.h index dcaa0651c1..d9a18f5f83 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -300,4 +300,6 @@ size_t adreno_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf, size_t remain, void *priv); size_t adreno_hwsched_snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf, size_t remain, void *priv); +void adreno_hwsched_snapshot_context_queue(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); #endif From 0d62f775d60ff87245c4da1c0d19741d2662ffd2 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 20 May 2024 11:47:07 -0700 Subject: [PATCH 0823/1016] kgsl: snapshot: Implement generic function for preemption buffer dump Preemption buffer dumping is not dependent on hardware variant. Hence implement generic function to dump preemption buffers in snapshot. Change-Id: Ic456c66e453599a229393925b93e607a4cead518 Signed-off-by: Hareesh Gundu --- adreno_a5xx_snapshot.c | 42 ++-------------------------- adreno_a6xx_hwsched.c | 62 +----------------------------------------- adreno_a6xx_snapshot.c | 42 +--------------------------- adreno_gen7_hwsched.c | 61 +---------------------------------------- adreno_gen7_snapshot.c | 43 ++--------------------------- adreno_gen8_hwsched.c | 53 +----------------------------------- adreno_gen8_snapshot.c | 40 ++------------------------- adreno_hwsched.c | 49 +++++++++++++++++++++++++++++++++ adreno_hwsched.h | 2 ++ adreno_snapshot.c | 43 +++++++++++++++++++++++++++++ adreno_snapshot.h | 11 ++++++++ 11 files changed, 115 insertions(+), 333 deletions(-) diff --git a/adreno_a5xx_snapshot.c b/adreno_a5xx_snapshot.c index 232319a61e..71fecc05c2 100644 --- a/adreno_a5xx_snapshot.c +++ b/adreno_a5xx_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2015-2020, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -721,34 +721,6 @@ out: return (count * 8) + sizeof(*header); } -/* Snapshot a preemption record buffer */ -static size_t snapshot_preemption_record(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct kgsl_memdesc *memdesc = priv; - - struct kgsl_snapshot_gpu_object_v2 *header = - (struct kgsl_snapshot_gpu_object_v2 *)buf; - - u8 *ptr = buf + sizeof(*header); - - if (remain < (SZ_64K + sizeof(*header))) { - SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); - return 0; - } - - header->size = SZ_64K >> 2; - header->gpuaddr = memdesc->gpuaddr; - header->ptbase = - kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); - header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; - - memcpy(ptr, memdesc->hostptr, SZ_64K); - - return SZ_64K + sizeof(*header); -} - - static void _a5xx_do_crashdump(struct kgsl_device *device) { unsigned long wait_time; @@ -938,9 +910,7 @@ void a5xx_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - unsigned int i; u32 hi, lo; - struct adreno_ringbuffer *rb; struct registers regs; /* Disable Clock gating temporarily for the debug bus to work */ @@ -1038,15 +1008,7 @@ void a5xx_snapshot(struct adreno_device *adreno_dev, a5xx_snapshot_shader(device, snapshot); /* Preemption record */ - if (adreno_is_preemption_enabled(adreno_dev)) { - FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, - snapshot, snapshot_preemption_record, - rb->preemption_desc); - } - } - + adreno_snapshot_preemption_record(device, snapshot); } static int _a5xx_crashdump_init_shader(struct a5xx_shader_block *block, diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index fbe285e2cc..ee29aa13fe 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -17,66 +17,6 @@ #include "kgsl_device.h" #include "kgsl_trace.h" -static void a6xx_hwsched_snapshot_preemption_record(struct kgsl_device *device, - struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset) -{ - struct kgsl_snapshot_section_header *section_header = - (struct kgsl_snapshot_section_header *)snapshot->ptr; - u8 *dest = snapshot->ptr + sizeof(*section_header); - struct kgsl_snapshot_gpu_object_v2 *header = - (struct kgsl_snapshot_gpu_object_v2 *)dest; - const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(ADRENO_DEVICE(device)); - u64 ctxt_record_size = A6XX_CP_CTXRECORD_SIZE_IN_BYTES; - size_t section_size; - - if (a6xx_core->ctxt_record_size) - ctxt_record_size = a6xx_core->ctxt_record_size; - - ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size); - - section_size = sizeof(*section_header) + sizeof(*header) + ctxt_record_size; - - if (snapshot->remain < section_size) { - SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); - return; - } - - section_header->magic = SNAPSHOT_SECTION_MAGIC; - section_header->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2; - section_header->size = section_size; - - header->size = ctxt_record_size >> 2; - header->gpuaddr = md->gpuaddr + offset; - header->ptbase = - kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); - header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; - - dest += sizeof(*header); - - memcpy(dest, md->hostptr + offset, ctxt_record_size); - - snapshot->ptr += section_header->size; - snapshot->remain -= section_header->size; - snapshot->size += section_header->size; -} - -static void snapshot_preemption_records(struct kgsl_device *device, - struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md) -{ - const struct adreno_a6xx_core *a6xx_core = - to_a6xx_core(ADRENO_DEVICE(device)); - u64 ctxt_record_size = A6XX_CP_CTXRECORD_SIZE_IN_BYTES; - u64 offset; - - if (a6xx_core->ctxt_record_size) - ctxt_record_size = a6xx_core->ctxt_record_size; - - /* All preemption records exist as a single mem alloc entry */ - for (offset = 0; offset < md->size; offset += ctxt_record_size) - a6xx_hwsched_snapshot_preemption_record(device, snapshot, md, - offset); -} - void a6xx_hwsched_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -133,7 +73,7 @@ void a6xx_hwsched_snapshot(struct adreno_device *adreno_dev, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_CSW_PRIV_NON_SECURE) - snapshot_preemption_records(device, snapshot, + adreno_hwsched_snapshot_preemption_records(device, snapshot, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_PREEMPT_SCRATCH) diff --git a/adreno_a6xx_snapshot.c b/adreno_a6xx_snapshot.c index 838d22c37f..0261fe8f64 100644 --- a/adreno_a6xx_snapshot.c +++ b/adreno_a6xx_snapshot.c @@ -1614,38 +1614,6 @@ static void _a6xx_do_crashdump(struct kgsl_device *device) crash_dump_valid = true; } -/* Snapshot the preemption related buffers */ -static size_t snapshot_preemption_record(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv) -{ - struct kgsl_memdesc *memdesc = priv; - struct kgsl_snapshot_gpu_object_v2 *header = - (struct kgsl_snapshot_gpu_object_v2 *)buf; - u8 *ptr = buf + sizeof(*header); - const struct adreno_a6xx_core *gpucore = to_a6xx_core(ADRENO_DEVICE(device)); - u64 ctxt_record_size = A6XX_CP_CTXRECORD_SIZE_IN_BYTES; - - if (gpucore->ctxt_record_size) - ctxt_record_size = gpucore->ctxt_record_size; - - ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size); - - if (remain < (ctxt_record_size + sizeof(*header))) { - SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); - return 0; - } - - header->size = ctxt_record_size >> 2; - header->gpuaddr = memdesc->gpuaddr; - header->ptbase = - kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); - header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; - - memcpy(ptr, memdesc->hostptr, ctxt_record_size); - - return ctxt_record_size + sizeof(*header); -} - static size_t a6xx_snapshot_cp_roq(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) { @@ -1694,7 +1662,6 @@ void a6xx_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_ringbuffer *rb; bool sptprac_on; unsigned int i; u32 hi, lo; @@ -1863,14 +1830,7 @@ void a6xx_snapshot(struct adreno_device *adreno_dev, } /* Preemption record */ - if (adreno_is_preemption_enabled(adreno_dev)) { - FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, - snapshot, snapshot_preemption_record, - rb->preemption_desc); - } - } + adreno_snapshot_preemption_record(device, snapshot); } static int _a6xx_crashdump_init_mvc(struct adreno_device *adreno_dev, diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index a3a405891e..27ce36a2df 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -61,65 +61,6 @@ void gen7_hwsched_fault(struct adreno_device *adreno_dev, u32 fault) adreno_hwsched_fault(adreno_dev, fault); } -static void gen7_hwsched_snapshot_preemption_record(struct kgsl_device *device, - struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset) -{ - struct kgsl_snapshot_section_header *section_header = - (struct kgsl_snapshot_section_header *)snapshot->ptr; - u8 *dest = snapshot->ptr + sizeof(*section_header); - struct kgsl_snapshot_gpu_object_v2 *header = - (struct kgsl_snapshot_gpu_object_v2 *)dest; - const struct adreno_gen7_core *gen7_core = to_gen7_core(ADRENO_DEVICE(device)); - u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES; - size_t section_size; - - if (gen7_core->ctxt_record_size) - ctxt_record_size = gen7_core->ctxt_record_size; - - ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size); - - section_size = sizeof(*section_header) + sizeof(*header) + ctxt_record_size; - if (snapshot->remain < section_size) { - SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); - return; - } - - section_header->magic = SNAPSHOT_SECTION_MAGIC; - section_header->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2; - section_header->size = section_size; - - header->size = ctxt_record_size >> 2; - header->gpuaddr = md->gpuaddr + offset; - header->ptbase = - kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); - header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; - - dest += sizeof(*header); - - memcpy(dest, md->hostptr + offset, ctxt_record_size); - - snapshot->ptr += section_header->size; - snapshot->remain -= section_header->size; - snapshot->size += section_header->size; -} - -static void snapshot_preemption_records(struct kgsl_device *device, - struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md) -{ - const struct adreno_gen7_core *gen7_core = - to_gen7_core(ADRENO_DEVICE(device)); - u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES; - u64 offset; - - if (gen7_core->ctxt_record_size) - ctxt_record_size = gen7_core->ctxt_record_size; - - /* All preemption records exist as a single mem alloc entry */ - for (offset = 0; offset < md->size; offset += ctxt_record_size) - gen7_hwsched_snapshot_preemption_record(device, snapshot, md, - offset); -} - void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -176,7 +117,7 @@ void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_CSW_PRIV_NON_SECURE) - snapshot_preemption_records(device, snapshot, + adreno_hwsched_snapshot_preemption_records(device, snapshot, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_PREEMPT_SCRATCH) diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 0280484787..4d5b21a284 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -1399,38 +1399,6 @@ static void gen7_snapshot_debugbus(struct adreno_device *adreno_dev, } } -/* Snapshot the preemption related buffers */ -static size_t snapshot_preemption_record(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv) -{ - struct kgsl_memdesc *memdesc = priv; - struct kgsl_snapshot_gpu_object_v2 *header = - (struct kgsl_snapshot_gpu_object_v2 *)buf; - u8 *ptr = buf + sizeof(*header); - const struct adreno_gen7_core *gpucore = to_gen7_core(ADRENO_DEVICE(device)); - u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES; - - if (gpucore->ctxt_record_size) - ctxt_record_size = gpucore->ctxt_record_size; - - ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size); - - if (remain < (ctxt_record_size + sizeof(*header))) { - SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); - return 0; - } - - header->size = ctxt_record_size >> 2; - header->gpuaddr = memdesc->gpuaddr; - header->ptbase = - kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); - header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; - - memcpy(ptr, memdesc->hostptr, ctxt_record_size); - - return ctxt_record_size + sizeof(*header); -} - static void gen7_reglist_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { @@ -1646,7 +1614,6 @@ void gen7_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_ringbuffer *rb; unsigned int i; u32 hi, lo, cgc = 0, cgc1 = 0, cgc2 = 0; const struct adreno_gen7_core *gpucore = to_gen7_core(ADRENO_DEVICE(device)); @@ -1783,14 +1750,8 @@ void gen7_snapshot(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN7_RBBM_SNAPSHOT_STATUS, 0x0); /* Preemption record */ - if (adreno_is_preemption_enabled(adreno_dev)) { - FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, - snapshot, snapshot_preemption_record, - rb->preemption_desc); - } - } + adreno_snapshot_preemption_record(device, snapshot); + if (is_current_rt) sched_set_fifo(current); } diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 0958935cc3..7303e9112b 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -62,57 +62,6 @@ void gen8_hwsched_fault(struct adreno_device *adreno_dev, u32 fault) adreno_hwsched_fault(adreno_dev, fault); } -static void gen8_hwsched_snapshot_preemption_record(struct kgsl_device *device, - struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset) -{ - struct kgsl_snapshot_section_header *section_header = - (struct kgsl_snapshot_section_header *)snapshot->ptr; - u8 *dest = snapshot->ptr + sizeof(*section_header); - struct kgsl_snapshot_gpu_object_v2 *header = - (struct kgsl_snapshot_gpu_object_v2 *)dest; - u64 ctxt_record_size = max_t(u64, GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES, - device->snapshot_ctxt_record_size); - size_t section_size; - - section_size = sizeof(*section_header) + sizeof(*header) + ctxt_record_size; - if (snapshot->remain < section_size) { - SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); - return; - } - - section_header->magic = SNAPSHOT_SECTION_MAGIC; - section_header->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2; - section_header->size = section_size; - - header->size = ctxt_record_size >> 2; - header->gpuaddr = md->gpuaddr + offset; - header->ptbase = - kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); - header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; - - dest += sizeof(*header); - - memcpy(dest, md->hostptr + offset, ctxt_record_size); - - snapshot->ptr += section_header->size; - snapshot->remain -= section_header->size; - snapshot->size += section_header->size; -} - -static void snapshot_preemption_records(struct kgsl_device *device, - struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md) -{ - u64 ctxt_record_size = md->size; - u64 offset; - - do_div(ctxt_record_size, KGSL_PRIORITY_MAX_RB_LEVELS); - - /* All preemption records exist as a single mem alloc entry */ - for (offset = 0; offset < md->size; offset += ctxt_record_size) - gen8_hwsched_snapshot_preemption_record(device, snapshot, md, - offset); -} - void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -165,7 +114,7 @@ void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_CSW_PRIV_NON_SECURE) - snapshot_preemption_records(device, snapshot, + adreno_hwsched_snapshot_preemption_records(device, snapshot, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_PREEMPT_SCRATCH) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 6191e7d338..2072d7db01 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -1438,35 +1438,6 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, } } -/* Snapshot the preemption related buffers */ -static size_t snapshot_preemption_record(struct kgsl_device *device, - u8 *buf, size_t remain, void *priv) -{ - struct kgsl_memdesc *memdesc = priv; - struct kgsl_snapshot_gpu_object_v2 *header = - (struct kgsl_snapshot_gpu_object_v2 *)buf; - u8 *ptr = buf + sizeof(*header); - u64 ctxt_record_size = max_t(u64, GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES, - device->snapshot_ctxt_record_size); - - ctxt_record_size = min_t(u64, ctxt_record_size, memdesc->size); - - if (remain < (ctxt_record_size + sizeof(*header))) { - SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); - return 0; - } - - header->size = ctxt_record_size >> 2; - header->gpuaddr = memdesc->gpuaddr; - header->ptbase = - kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); - header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; - - memcpy(ptr, memdesc->hostptr, ctxt_record_size); - - return ctxt_record_size + sizeof(*header); -} - static void gen8_reglist_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { @@ -1638,7 +1609,6 @@ void gen8_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_ringbuffer *rb; u32 i; const struct adreno_gen8_core *gpucore = to_gen8_core(ADRENO_DEVICE(device)); int is_current_rt; @@ -1743,14 +1713,8 @@ void gen8_snapshot(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN8_RBBM_SNAPSHOT_STATUS, 0x0); /* Preemption record */ - if (adreno_is_preemption_enabled(adreno_dev)) { - FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, - snapshot, snapshot_preemption_record, - rb->preemption_desc); - } - } + adreno_snapshot_preemption_record(device, snapshot); + if (is_current_rt) sched_set_fifo(current); } diff --git a/adreno_hwsched.c b/adreno_hwsched.c index e0a1ddfec9..70653063c8 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -2783,3 +2783,52 @@ void adreno_hwsched_snapshot_context_queue(struct adreno_device *adreno_dev, idr_for_each(&device->context_idr, snapshot_context_queue, snapshot); read_unlock(&device->context_lock); } + +static void hwsched_snapshot_preemption_record(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset) +{ + struct kgsl_snapshot_section_header *section_header = + (struct kgsl_snapshot_section_header *)snapshot->ptr; + u8 *dest = snapshot->ptr + sizeof(*section_header); + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)dest; + u64 ctxt_record_size = min_t(u64, device->snapshot_ctxt_record_size, md->size); + size_t section_size; + + section_size = sizeof(*section_header) + sizeof(*header) + ctxt_record_size; + if (snapshot->remain < section_size) { + SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); + return; + } + + section_header->magic = SNAPSHOT_SECTION_MAGIC; + section_header->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2; + section_header->size = section_size; + + header->size = ctxt_record_size >> 2; + header->gpuaddr = md->gpuaddr + offset; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + dest += sizeof(*header); + + memcpy(dest, md->hostptr + offset, ctxt_record_size); + + snapshot->ptr += section_header->size; + snapshot->remain -= section_header->size; + snapshot->size += section_header->size; +} + +void adreno_hwsched_snapshot_preemption_records(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md) +{ + u64 ctxt_record_size = md->size; + u64 offset; + + do_div(ctxt_record_size, KGSL_PRIORITY_MAX_RB_LEVELS); + + /* All preemption records exist as a single mem alloc entry */ + for (offset = 0; offset < md->size; offset += ctxt_record_size) + hwsched_snapshot_preemption_record(device, snapshot, md, offset); +} diff --git a/adreno_hwsched.h b/adreno_hwsched.h index d9a18f5f83..ee7158c82f 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -302,4 +302,6 @@ size_t adreno_hwsched_snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf, size_t remain, void *priv); void adreno_hwsched_snapshot_context_queue(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot); +void adreno_hwsched_snapshot_preemption_records(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md); #endif diff --git a/adreno_snapshot.c b/adreno_snapshot.c index e0b5195f64..c20f700ee3 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -1421,3 +1421,46 @@ size_t adreno_snapshot_gmu_mem(struct kgsl_device *device, return desc->memdesc->size + sizeof(*mem_hdr); } + +/* Snapshot the preemption related buffers */ +static size_t snapshot_preemption_record(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_memdesc *memdesc = priv; + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)buf; + u8 *ptr = buf + sizeof(*header); + u64 ctxt_record_size = min_t(u64, device->snapshot_ctxt_record_size, memdesc->size); + + if (remain < (ctxt_record_size + sizeof(*header))) { + SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); + return 0; + } + + header->size = ctxt_record_size >> 2; + header->gpuaddr = memdesc->gpuaddr; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + memcpy(ptr, memdesc->hostptr, ctxt_record_size); + + return ctxt_record_size + sizeof(*header); +} + +void adreno_snapshot_preemption_record(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_ringbuffer *rb; + u32 i; + + if (adreno_is_preemption_enabled(adreno_dev)) { + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, snapshot_preemption_record, + rb->preemption_desc); + } + } +} diff --git a/adreno_snapshot.h b/adreno_snapshot.h index c9c1e87e7b..ce5822567a 100644 --- a/adreno_snapshot.h +++ b/adreno_snapshot.h @@ -154,4 +154,15 @@ size_t adreno_snapshot_gmu_version(struct kgsl_device *device, */ size_t adreno_snapshot_gmu_mem(struct kgsl_device *device, u8 *buf, size_t remain, void *priv); + +/** + * adreno_snapshot_preemption_record - To dump preemption related buffers + * @device: Pointer to the kgsl device + * @snapshot: Pointer to the snapshot structure + * + * Snapshot the preemption related buffers . + */ +void adreno_snapshot_preemption_record(struct kgsl_device *device, + struct kgsl_snapshot *snapshot); + #endif /*__ADRENO_SNAPSHOT_H */ From 713b1608f2cb24e8bcd086a8cad2f655f46216c5 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Tue, 7 May 2024 00:41:13 -0700 Subject: [PATCH 0824/1016] kgsl: snapshot: Consolidate hwsched snapshot related functions Consolidate hwsched snapshot related functions into single file for better code maintenance. Change-Id: Ib613e85b9af3515e39265881de4c3564920fca34 Signed-off-by: Hareesh Gundu --- Kbuild | 1 + adreno_gen7_hwsched.c | 2 +- adreno_gen8_hwsched.c | 2 +- adreno_hwsched.c | 277 ------------------------------------ adreno_hwsched.h | 13 -- adreno_hwsched_snapshot.c | 287 ++++++++++++++++++++++++++++++++++++++ adreno_hwsched_snapshot.h | 27 ++++ adreno_snapshot.h | 1 + build/kgsl_defs.bzl | 1 + 9 files changed, 319 insertions(+), 292 deletions(-) create mode 100644 adreno_hwsched_snapshot.c create mode 100644 adreno_hwsched_snapshot.h diff --git a/Kbuild b/Kbuild index 3e0afc05f3..9da1a02433 100644 --- a/Kbuild +++ b/Kbuild @@ -146,6 +146,7 @@ msm_kgsl-y += \ adreno_gen8_rpmh.o \ adreno_gen8_snapshot.o \ adreno_hwsched.o \ + adreno_hwsched_snapshot.o \ adreno_ioctl.o \ adreno_perfcounter.o \ adreno_ringbuffer.o \ diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 27ce36a2df..e9ef028388 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -146,7 +146,7 @@ void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, } if (adreno_hwsched_context_queue_enabled(adreno_dev)) - adreno_hwsched_snapshot_context_queue(adreno_dev, snapshot); + adreno_hwsched_snapshot_context_queue(device, snapshot); } static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 7303e9112b..e3e416a1b9 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -141,7 +141,7 @@ void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, } - adreno_hwsched_snapshot_context_queue(adreno_dev, snapshot); + adreno_hwsched_snapshot_context_queue(device, snapshot); } static void _get_hw_fence_entries(struct adreno_device *adreno_dev) diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 70653063c8..1b5528ad08 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -2555,280 +2555,3 @@ void adreno_hwsched_remove_hw_fence_entry(struct adreno_device *adreno_dev, kgsl_context_put_deferred(&drawctxt->base); } -size_t adreno_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; - u32 *data = (u32 *)(buf + sizeof(*header)); - struct kgsl_memdesc *rb = (struct kgsl_memdesc *)priv; - - if (remain < rb->size + sizeof(*header)) { - SNAPSHOT_ERR_NOMEM(device, "RB"); - return 0; - } - - header->start = 0; - header->end = rb->size >> 2; - header->rptr = 0; - header->rbsize = rb->size >> 2; - header->count = rb->size >> 2; - header->timestamp_queued = 0; - header->timestamp_retired = 0; - header->gpuaddr = rb->gpuaddr; - header->id = 0; - - memcpy(data, rb->hostptr, rb->size); - - return rb->size + sizeof(*header); -} - -static u32 copy_gpu_global(void *out, void *in, u32 size) -{ - if (out && in) { - memcpy(out, in, size); - return size; - } - - return 0; -} - -static void adreno_hwsched_snapshot_rb_payload(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot, struct payload_section *payload) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct kgsl_snapshot_section_header *section_header = - (struct kgsl_snapshot_section_header *)snapshot->ptr; - u8 *buf = snapshot->ptr + sizeof(*section_header); - struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; - u32 *data = (u32 *)(buf + sizeof(*header)); - u32 size = adreno_hwsched_parse_payload(payload, KEY_RB_SIZEDWORDS) << 2; - const struct adreno_hwsched_ops *hwsched_ops = adreno_dev->hwsched.hwsched_ops; - u64 lo, hi, gpuaddr; - void *rb_hostptr; - char str[16]; - - lo = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_LO); - hi = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_HI); - gpuaddr = hi << 32 | lo; - - /* Sanity check to make sure there is enough for the header */ - if (snapshot->remain < sizeof(*section_header)) - goto err; - - if (hwsched_ops->get_rb_hostptr) - rb_hostptr = hwsched_ops->get_rb_hostptr(adreno_dev, gpuaddr, size); - - /* If the gpuaddress and size don't match any allocation, then abort */ - if (((snapshot->remain - sizeof(*section_header)) < (size + sizeof(*header))) || - !copy_gpu_global(data, rb_hostptr, size)) - goto err; - - if (device->dump_all_ibs) { - u64 rbaddr, lpac_rbaddr; - - adreno_readreg64(adreno_dev, ADRENO_REG_CP_RB_BASE, - ADRENO_REG_CP_RB_BASE_HI, &rbaddr); - - adreno_readreg64(adreno_dev, ADRENO_REG_CP_LPAC_RB_BASE, - ADRENO_REG_CP_LPAC_RB_BASE_HI, &lpac_rbaddr); - - /* Parse all IBs from current RB */ - if ((rbaddr == gpuaddr) || (lpac_rbaddr == gpuaddr)) - adreno_snapshot_dump_all_ibs(device, rb_hostptr, snapshot); - } - - header->start = 0; - header->end = size >> 2; - header->rptr = adreno_hwsched_parse_payload(payload, KEY_RB_RPTR); - header->wptr = adreno_hwsched_parse_payload(payload, KEY_RB_WPTR); - header->rbsize = size >> 2; - header->count = size >> 2; - header->timestamp_queued = adreno_hwsched_parse_payload(payload, - KEY_RB_QUEUED_TS); - header->timestamp_retired = adreno_hwsched_parse_payload(payload, - KEY_RB_RETIRED_TS); - header->gpuaddr = gpuaddr; - header->id = adreno_hwsched_parse_payload(payload, KEY_RB_ID); - - section_header->magic = SNAPSHOT_SECTION_MAGIC; - section_header->id = KGSL_SNAPSHOT_SECTION_RB_V2; - section_header->size = size + sizeof(*header) + sizeof(*section_header); - - snapshot->ptr += section_header->size; - snapshot->remain -= section_header->size; - snapshot->size += section_header->size; - - return; -err: - snprintf(str, sizeof(str), "RB addr:0x%llx", gpuaddr); - SNAPSHOT_ERR_NOMEM(device, str); -} - -bool adreno_hwsched_parse_payload_rb_legacy(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot) -{ - struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; - u32 i = 0, payload_bytes; - void *start; - bool ret = false; - - /* Skip if we didn't receive a context bad HFI */ - if (!cmd->hdr) - return false; - - payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - - offsetof(struct hfi_context_bad_cmd_legacy, payload); - - start = &cmd->payload[0]; - - while (i < payload_bytes) { - struct payload_section *payload = start + i; - - if (payload->type == PAYLOAD_RB) { - adreno_hwsched_snapshot_rb_payload(adreno_dev, snapshot, payload); - ret = true; - } - - i += sizeof(*payload) + (payload->dwords << 2); - } - - return ret; -} - -bool adreno_hwsched_parse_payload_rb(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot) -{ - struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; - u32 i = 0, payload_bytes; - void *start; - bool ret = false; - - /* Skip if we didn't receive a context bad HFI */ - if (!cmd->hdr) - return false; - - payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - - offsetof(struct hfi_context_bad_cmd, payload); - - start = &cmd->payload[0]; - - while (i < payload_bytes) { - struct payload_section *payload = start + i; - - if (payload->type == PAYLOAD_RB) { - adreno_hwsched_snapshot_rb_payload(adreno_dev, - snapshot, payload); - ret = true; - } - - i += sizeof(*payload) + (payload->dwords << 2); - } - - return ret; -} - -size_t adreno_hwsched_snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) -{ - struct kgsl_memdesc *memdesc = priv; - - struct kgsl_snapshot_gpu_object_v2 *header = - (struct kgsl_snapshot_gpu_object_v2 *)buf; - - u8 *ptr = buf + sizeof(*header); - - if (IS_ERR_OR_NULL(memdesc) || memdesc->size == 0) - return 0; - - if (remain < (memdesc->size + sizeof(*header))) { - SNAPSHOT_ERR_NOMEM(device, "AQE BUFFER"); - return 0; - } - - header->size = memdesc->size >> 2; - header->gpuaddr = memdesc->gpuaddr; - header->ptbase = MMU_DEFAULT_TTBR0(device); - header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; - - memcpy(ptr, memdesc->hostptr, memdesc->size); - - return memdesc->size + sizeof(*header); -} - -static int snapshot_context_queue(int id, void *ptr, void *data) -{ - struct kgsl_snapshot *snapshot = data; - struct kgsl_context *context = ptr; - struct adreno_context *drawctxt = ADRENO_CONTEXT(context); - struct gmu_mem_type_desc desc; - - if (!context->gmu_registered) - return 0; - - desc.memdesc = &drawctxt->gmu_context_queue; - desc.type = SNAPSHOT_GMU_MEM_CONTEXT_QUEUE; - kgsl_snapshot_add_section(context->device, - KGSL_SNAPSHOT_SECTION_GMU_MEMORY, - snapshot, adreno_snapshot_gmu_mem, &desc); - - return 0; -} - -void adreno_hwsched_snapshot_context_queue(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - read_lock(&device->context_lock); - idr_for_each(&device->context_idr, snapshot_context_queue, snapshot); - read_unlock(&device->context_lock); -} - -static void hwsched_snapshot_preemption_record(struct kgsl_device *device, - struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset) -{ - struct kgsl_snapshot_section_header *section_header = - (struct kgsl_snapshot_section_header *)snapshot->ptr; - u8 *dest = snapshot->ptr + sizeof(*section_header); - struct kgsl_snapshot_gpu_object_v2 *header = - (struct kgsl_snapshot_gpu_object_v2 *)dest; - u64 ctxt_record_size = min_t(u64, device->snapshot_ctxt_record_size, md->size); - size_t section_size; - - section_size = sizeof(*section_header) + sizeof(*header) + ctxt_record_size; - if (snapshot->remain < section_size) { - SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); - return; - } - - section_header->magic = SNAPSHOT_SECTION_MAGIC; - section_header->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2; - section_header->size = section_size; - - header->size = ctxt_record_size >> 2; - header->gpuaddr = md->gpuaddr + offset; - header->ptbase = - kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); - header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; - - dest += sizeof(*header); - - memcpy(dest, md->hostptr + offset, ctxt_record_size); - - snapshot->ptr += section_header->size; - snapshot->remain -= section_header->size; - snapshot->size += section_header->size; -} - -void adreno_hwsched_snapshot_preemption_records(struct kgsl_device *device, - struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md) -{ - u64 ctxt_record_size = md->size; - u64 offset; - - do_div(ctxt_record_size, KGSL_PRIORITY_MAX_RB_LEVELS); - - /* All preemption records exist as a single mem alloc entry */ - for (offset = 0; offset < md->size; offset += ctxt_record_size) - hwsched_snapshot_preemption_record(device, snapshot, md, offset); -} diff --git a/adreno_hwsched.h b/adreno_hwsched.h index ee7158c82f..d78bc871da 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -291,17 +291,4 @@ int adreno_hwsched_poll_msg_queue_write_index(struct kgsl_memdesc *hfi_mem); */ void adreno_hwsched_remove_hw_fence_entry(struct adreno_device *adreno_dev, struct adreno_hw_fence_entry *entry); - -bool adreno_hwsched_parse_payload_rb_legacy(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot); -bool adreno_hwsched_parse_payload_rb(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot); -size_t adreno_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv); -size_t adreno_hwsched_snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv); -void adreno_hwsched_snapshot_context_queue(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot); -void adreno_hwsched_snapshot_preemption_records(struct kgsl_device *device, - struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md); #endif diff --git a/adreno_hwsched_snapshot.c b/adreno_hwsched_snapshot.c new file mode 100644 index 0000000000..29e5a0ed44 --- /dev/null +++ b/adreno_hwsched_snapshot.c @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_hwsched_snapshot.h" +#include "adreno_snapshot.h" + +size_t adreno_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + struct kgsl_memdesc *rb = (struct kgsl_memdesc *)priv; + + if (remain < rb->size + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "RB"); + return 0; + } + + header->start = 0; + header->end = rb->size >> 2; + header->rptr = 0; + header->rbsize = rb->size >> 2; + header->count = rb->size >> 2; + header->timestamp_queued = 0; + header->timestamp_retired = 0; + header->gpuaddr = rb->gpuaddr; + header->id = 0; + + memcpy(data, rb->hostptr, rb->size); + + return rb->size + sizeof(*header); +} + +static u32 copy_gpu_global(void *out, void *in, u32 size) +{ + if (out && in) { + memcpy(out, in, size); + return size; + } + + return 0; +} + +static void adreno_hwsched_snapshot_rb_payload(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot, struct payload_section *payload) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_snapshot_section_header *section_header = + (struct kgsl_snapshot_section_header *)snapshot->ptr; + u8 *buf = snapshot->ptr + sizeof(*section_header); + struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + u32 size = adreno_hwsched_parse_payload(payload, KEY_RB_SIZEDWORDS) << 2; + const struct adreno_hwsched_ops *hwsched_ops = adreno_dev->hwsched.hwsched_ops; + u64 lo, hi, gpuaddr; + void *rb_hostptr; + char str[16]; + + lo = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_LO); + hi = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_HI); + gpuaddr = hi << 32 | lo; + + /* Sanity check to make sure there is enough for the header */ + if (snapshot->remain < sizeof(*section_header)) + goto err; + + if (hwsched_ops->get_rb_hostptr) + rb_hostptr = hwsched_ops->get_rb_hostptr(adreno_dev, gpuaddr, size); + + /* If the gpuaddress and size don't match any allocation, then abort */ + if (((snapshot->remain - sizeof(*section_header)) < (size + sizeof(*header))) || + !copy_gpu_global(data, rb_hostptr, size)) + goto err; + + if (device->dump_all_ibs) { + u64 rbaddr, lpac_rbaddr; + + adreno_readreg64(adreno_dev, ADRENO_REG_CP_RB_BASE, + ADRENO_REG_CP_RB_BASE_HI, &rbaddr); + + adreno_readreg64(adreno_dev, ADRENO_REG_CP_LPAC_RB_BASE, + ADRENO_REG_CP_LPAC_RB_BASE_HI, &lpac_rbaddr); + + /* Parse all IBs from current RB */ + if ((rbaddr == gpuaddr) || (lpac_rbaddr == gpuaddr)) + adreno_snapshot_dump_all_ibs(device, rb_hostptr, snapshot); + } + + header->start = 0; + header->end = size >> 2; + header->rptr = adreno_hwsched_parse_payload(payload, KEY_RB_RPTR); + header->wptr = adreno_hwsched_parse_payload(payload, KEY_RB_WPTR); + header->rbsize = size >> 2; + header->count = size >> 2; + header->timestamp_queued = adreno_hwsched_parse_payload(payload, + KEY_RB_QUEUED_TS); + header->timestamp_retired = adreno_hwsched_parse_payload(payload, + KEY_RB_RETIRED_TS); + header->gpuaddr = gpuaddr; + header->id = adreno_hwsched_parse_payload(payload, KEY_RB_ID); + + section_header->magic = SNAPSHOT_SECTION_MAGIC; + section_header->id = KGSL_SNAPSHOT_SECTION_RB_V2; + section_header->size = size + sizeof(*header) + sizeof(*section_header); + + snapshot->ptr += section_header->size; + snapshot->remain -= section_header->size; + snapshot->size += section_header->size; + + return; +err: + snprintf(str, sizeof(str), "RB addr:0x%llx", gpuaddr); + SNAPSHOT_ERR_NOMEM(device, str); +} + +bool adreno_hwsched_parse_payload_rb_legacy(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + bool ret = false; + + /* Skip if we didn't receive a context bad HFI */ + if (!cmd->hdr) + return false; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd_legacy, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == PAYLOAD_RB) { + adreno_hwsched_snapshot_rb_payload(adreno_dev, snapshot, payload); + ret = true; + } + + i += sizeof(*payload) + (payload->dwords << 2); + } + + return ret; +} + +bool adreno_hwsched_parse_payload_rb(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + bool ret = false; + + /* Skip if we didn't receive a context bad HFI */ + if (!cmd->hdr) + return false; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == PAYLOAD_RB) { + adreno_hwsched_snapshot_rb_payload(adreno_dev, + snapshot, payload); + ret = true; + } + + i += sizeof(*payload) + (payload->dwords << 2); + } + + return ret; +} + +size_t adreno_hwsched_snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_memdesc *memdesc = priv; + + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)buf; + + u8 *ptr = buf + sizeof(*header); + + if (IS_ERR_OR_NULL(memdesc) || memdesc->size == 0) + return 0; + + if (remain < (memdesc->size + sizeof(*header))) { + SNAPSHOT_ERR_NOMEM(device, "AQE BUFFER"); + return 0; + } + + header->size = memdesc->size >> 2; + header->gpuaddr = memdesc->gpuaddr; + header->ptbase = MMU_DEFAULT_TTBR0(device); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + memcpy(ptr, memdesc->hostptr, memdesc->size); + + return memdesc->size + sizeof(*header); +} + +static int snapshot_context_queue(int id, void *ptr, void *data) +{ + struct kgsl_snapshot *snapshot = data; + struct kgsl_context *context = ptr; + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + struct gmu_mem_type_desc desc; + + if (!context->gmu_registered) + return 0; + + desc.memdesc = &drawctxt->gmu_context_queue; + desc.type = SNAPSHOT_GMU_MEM_CONTEXT_QUEUE; + kgsl_snapshot_add_section(context->device, + KGSL_SNAPSHOT_SECTION_GMU_MEMORY, + snapshot, adreno_snapshot_gmu_mem, &desc); + + return 0; +} + +void adreno_hwsched_snapshot_context_queue(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + if (!adreno_hwsched_context_queue_enabled(ADRENO_DEVICE(device))) + return; + + read_lock(&device->context_lock); + idr_for_each(&device->context_idr, snapshot_context_queue, snapshot); + read_unlock(&device->context_lock); +} + +static void hwsched_snapshot_preemption_record(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset) +{ + struct kgsl_snapshot_section_header *section_header = + (struct kgsl_snapshot_section_header *)snapshot->ptr; + u8 *dest = snapshot->ptr + sizeof(*section_header); + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)dest; + u64 ctxt_record_size = min_t(u64, device->snapshot_ctxt_record_size, md->size); + size_t section_size; + + section_size = sizeof(*section_header) + sizeof(*header) + ctxt_record_size; + if (snapshot->remain < section_size) { + SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); + return; + } + + section_header->magic = SNAPSHOT_SECTION_MAGIC; + section_header->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2; + section_header->size = section_size; + + header->size = ctxt_record_size >> 2; + header->gpuaddr = md->gpuaddr + offset; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + dest += sizeof(*header); + + memcpy(dest, md->hostptr + offset, ctxt_record_size); + + snapshot->ptr += section_header->size; + snapshot->remain -= section_header->size; + snapshot->size += section_header->size; +} + +void adreno_hwsched_snapshot_preemption_records(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md) +{ + u64 ctxt_record_size = md->size; + u64 offset; + + do_div(ctxt_record_size, KGSL_PRIORITY_MAX_RB_LEVELS); + + /* All preemption records exist as a single mem alloc entry */ + for (offset = 0; offset < md->size; offset += ctxt_record_size) + hwsched_snapshot_preemption_record(device, snapshot, md, offset); +} diff --git a/adreno_hwsched_snapshot.h b/adreno_hwsched_snapshot.h new file mode 100644 index 0000000000..c3e10516a5 --- /dev/null +++ b/adreno_hwsched_snapshot.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _ADRENO_HWSCHED_SNAPSHOT_H_ +#define _ADRENO_HWSCHED_SNAPSHOT_H_ + +bool adreno_hwsched_parse_payload_rb_legacy(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + +bool adreno_hwsched_parse_payload_rb(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + +size_t adreno_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); + +size_t adreno_hwsched_snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); + +void adreno_hwsched_snapshot_context_queue(struct kgsl_device *device, + struct kgsl_snapshot *snapshot); + +void adreno_hwsched_snapshot_preemption_records(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md); + +#endif diff --git a/adreno_snapshot.h b/adreno_snapshot.h index ce5822567a..6d330624a2 100644 --- a/adreno_snapshot.h +++ b/adreno_snapshot.h @@ -6,6 +6,7 @@ #ifndef __ADRENO_SNAPSHOT_H #define __ADRENO_SNAPSHOT_H +#include "adreno_hwsched_snapshot.h" #include "kgsl_snapshot.h" /* Number of dwords to dump in snapshot for CP SQE */ diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index 3df6ce4fd3..745550fb2f 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -53,6 +53,7 @@ def kgsl_get_srcs(): "adreno_gen8_rpmh.c", "adreno_gen8_snapshot.c", "adreno_hwsched.c", + "adreno_hwsched_snapshot.c", "adreno_ioctl.c", "adreno_perfcounter.c", "adreno_ringbuffer.c", From 3e8d5dabcebfb7eb89d841f7b144512f26ac18f9 Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Wed, 13 Mar 2024 11:27:37 -0600 Subject: [PATCH 0825/1016] kgsl: gen8: Program CGC related registers with recommmended value Several CGC registers require non-reset values before bringing up the GPU. Update the list of register/values that are programmed. Change-Id: Ie9be8ee8cc6e9cbfae45016cea8a5edec51fac24 Signed-off-by: Carter Cooper --- adreno-gpulist.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 5941239600..75f349aecf 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2652,6 +2652,9 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { { GEN8_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) }, { GEN8_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, { GEN8_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + { GEN8_RBBM_WAIT_IDLE_CLOCKS_CNTL, 0x00000030, BIT(PIPE_NONE) }, + { GEN8_RBBM_WAIT_IDLE_CLOCKS_CNTL2, 0x00000030, BIT(PIPE_NONE) }, + { GEN8_RBBM_CGC_P2S_CNTL, 0x00000040, BIT(PIPE_NONE) }, /* Enable contribution of all shader stages to SP perfcounters */ { GEN8_SP_PERFCTR_SHADER_MASK, 0x0000003f, BIT(PIPE_NONE) }, /* @@ -2850,6 +2853,9 @@ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { { GEN8_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) }, { GEN8_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, { GEN8_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + { GEN8_RBBM_WAIT_IDLE_CLOCKS_CNTL, 0x00000030, BIT(PIPE_NONE) }, + { GEN8_RBBM_WAIT_IDLE_CLOCKS_CNTL2, 0x00000030, BIT(PIPE_NONE) }, + { GEN8_RBBM_CGC_P2S_CNTL, 0x00000040, BIT(PIPE_NONE) }, /* Enable contribution of all shader stages to SP perfcounters */ { GEN8_SP_PERFCTR_SHADER_MASK, 0x0000003f, BIT(PIPE_NONE) }, /* From 5e328cce921e2834deaf4e9ff8ded65ae0dd3fcb Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 30 May 2024 13:21:04 +0530 Subject: [PATCH 0826/1016] kgsl: gen8: Update snapshot dumping logic for preemption record GMU doesn't reserve GMEM space in preemption context record for RB0. Thus, update the preemption context record dumping logic in snapshot for gen8 hardware scheduler. As the updated logic relies on context record size, specify it in gpulist. Change-Id: I54a1ba068d223736357f0d8d0d483a1ab661992c Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 3 +++ adreno_a6xx_hwsched.c | 16 +++++++++++++++- adreno_gen7_hwsched.c | 16 +++++++++++++++- adreno_gen8_hwsched.c | 32 +++++++++++++++++++++++++++++++- adreno_hwsched_snapshot.c | 25 +++++++++---------------- adreno_hwsched_snapshot.h | 4 ++-- 6 files changed, 75 insertions(+), 21 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 4fe7025f43..b3eb36eb89 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2786,6 +2786,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .bcl_data = 1, .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), .noc_timeout_us = 3410, /* 3.41 msec */ + .ctxt_record_size = (13536 * SZ_1K), }; static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { @@ -2824,6 +2825,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { .bcl_data = 1, .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), .noc_timeout_us = 3410, /* 3.41 msec */ + .ctxt_record_size = (13536 * SZ_1K), }; /* GEN8_4_0 noncontext register list */ @@ -2927,6 +2929,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .qos_value = gen8_4_0_gbif_client_qos_values, .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), .bcl_data = 1, + .ctxt_record_size = (13536 * SZ_1K), }; extern const struct gen8_snapshot_block_list gen8_3_0_snapshot_block_list; diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index ee29aa13fe..e31f3f6f80 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -17,6 +17,20 @@ #include "kgsl_device.h" #include "kgsl_trace.h" +static void a6xx_hwsched_snapshot_preemption_records(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md) +{ + u64 ctxt_record_size = md->size; + u64 offset; + + do_div(ctxt_record_size, KGSL_PRIORITY_MAX_RB_LEVELS); + + /* All preemption records exist as a single mem alloc entry */ + for (offset = 0; offset < md->size; offset += ctxt_record_size) + adreno_hwsched_snapshot_preemption_record(device, snapshot, md, + offset, ctxt_record_size); +} + void a6xx_hwsched_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -73,7 +87,7 @@ void a6xx_hwsched_snapshot(struct adreno_device *adreno_dev, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_CSW_PRIV_NON_SECURE) - adreno_hwsched_snapshot_preemption_records(device, snapshot, + a6xx_hwsched_snapshot_preemption_records(device, snapshot, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_PREEMPT_SCRATCH) diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index e9ef028388..444091dcf3 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -61,6 +61,20 @@ void gen7_hwsched_fault(struct adreno_device *adreno_dev, u32 fault) adreno_hwsched_fault(adreno_dev, fault); } +static void gen7_hwsched_snapshot_preemption_records(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md) +{ + u64 ctxt_record_size = md->size; + u64 offset; + + do_div(ctxt_record_size, KGSL_PRIORITY_MAX_RB_LEVELS); + + /* All preemption records exist as a single mem alloc entry */ + for (offset = 0; offset < md->size; offset += ctxt_record_size) + adreno_hwsched_snapshot_preemption_record(device, snapshot, md, + offset, ctxt_record_size); +} + void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -117,7 +131,7 @@ void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_CSW_PRIV_NON_SECURE) - adreno_hwsched_snapshot_preemption_records(device, snapshot, + gen7_hwsched_snapshot_preemption_records(device, snapshot, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_PREEMPT_SCRATCH) diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index e3e416a1b9..72976fa719 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -62,6 +62,36 @@ void gen8_hwsched_fault(struct adreno_device *adreno_dev, u32 fault) adreno_hwsched_fault(adreno_dev, fault); } +static void gen8_hwsched_snapshot_preemption_records(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + u64 offset = 0, ctxt_record_size = md->size; + u64 rb0_ctxt_record_size = PAGE_ALIGN(gen8_core->ctxt_record_size); + int i; + + /* Check whether GMU has removed GMEM size from RB0 context record */ + if (md->size == (rb0_ctxt_record_size * KGSL_PRIORITY_MAX_RB_LEVELS)) { + do_div(ctxt_record_size, KGSL_PRIORITY_MAX_RB_LEVELS); + } else { + rb0_ctxt_record_size -= PAGE_ALIGN(adreno_dev->gpucore->gmem_size); + ctxt_record_size -= rb0_ctxt_record_size; + do_div(ctxt_record_size, KGSL_PRIORITY_MAX_RB_LEVELS - 1); + } + + adreno_hwsched_snapshot_preemption_record(device, snapshot, md, offset, + rb0_ctxt_record_size); + offset += rb0_ctxt_record_size; + + /* All preemption records exist as a single mem alloc entry */ + for (i = 1; i < KGSL_PRIORITY_MAX_RB_LEVELS; i++) { + adreno_hwsched_snapshot_preemption_record(device, snapshot, md, + offset, ctxt_record_size); + offset += ctxt_record_size; + } +} + void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -114,7 +144,7 @@ void gen8_hwsched_snapshot(struct adreno_device *adreno_dev, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_CSW_PRIV_NON_SECURE) - adreno_hwsched_snapshot_preemption_records(device, snapshot, + gen8_hwsched_snapshot_preemption_records(device, snapshot, entry->md); if (entry->desc.mem_kind == HFI_MEMKIND_PREEMPT_SCRATCH) diff --git a/adreno_hwsched_snapshot.c b/adreno_hwsched_snapshot.c index 29e5a0ed44..7cc369ea89 100644 --- a/adreno_hwsched_snapshot.c +++ b/adreno_hwsched_snapshot.c @@ -237,17 +237,23 @@ void adreno_hwsched_snapshot_context_queue(struct kgsl_device *device, read_unlock(&device->context_lock); } -static void hwsched_snapshot_preemption_record(struct kgsl_device *device, - struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset) +void adreno_hwsched_snapshot_preemption_record(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset, u64 size) { struct kgsl_snapshot_section_header *section_header = (struct kgsl_snapshot_section_header *)snapshot->ptr; u8 *dest = snapshot->ptr + sizeof(*section_header); struct kgsl_snapshot_gpu_object_v2 *header = (struct kgsl_snapshot_gpu_object_v2 *)dest; - u64 ctxt_record_size = min_t(u64, device->snapshot_ctxt_record_size, md->size); + u64 ctxt_record_size = min_t(u64, device->snapshot_ctxt_record_size, size); size_t section_size; + if (WARN_RATELIMIT((ctxt_record_size > md->size) || + (offset > (md->size - ctxt_record_size)), + "Invalid preemption context record size: md_size: 0x%llx, ctxt_record_size: 0x%llx\n", + md->size, ctxt_record_size)) + return; + section_size = sizeof(*section_header) + sizeof(*header) + ctxt_record_size; if (snapshot->remain < section_size) { SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); @@ -272,16 +278,3 @@ static void hwsched_snapshot_preemption_record(struct kgsl_device *device, snapshot->remain -= section_header->size; snapshot->size += section_header->size; } - -void adreno_hwsched_snapshot_preemption_records(struct kgsl_device *device, - struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md) -{ - u64 ctxt_record_size = md->size; - u64 offset; - - do_div(ctxt_record_size, KGSL_PRIORITY_MAX_RB_LEVELS); - - /* All preemption records exist as a single mem alloc entry */ - for (offset = 0; offset < md->size; offset += ctxt_record_size) - hwsched_snapshot_preemption_record(device, snapshot, md, offset); -} diff --git a/adreno_hwsched_snapshot.h b/adreno_hwsched_snapshot.h index c3e10516a5..edbe9c17ab 100644 --- a/adreno_hwsched_snapshot.h +++ b/adreno_hwsched_snapshot.h @@ -21,7 +21,7 @@ size_t adreno_hwsched_snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf, void adreno_hwsched_snapshot_context_queue(struct kgsl_device *device, struct kgsl_snapshot *snapshot); -void adreno_hwsched_snapshot_preemption_records(struct kgsl_device *device, - struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md); +void adreno_hwsched_snapshot_preemption_record(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset, u64 size); #endif From b3b9cc31e45548bbf5ff39b96b8957c51fbb4b41 Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Mon, 6 May 2024 09:37:06 +0530 Subject: [PATCH 0827/1016] kgsl: Fix invalid pointer access in snapshot dump_all IBs path In the adreno_snapshot_dump_all_ibs() call there is a chance that the rbptr index goes beyond max RB size which would results kernel panic. Fix this by wrapping around the index. Change-Id: I1f0932e174e8e7cb02ace6e9c096933f780e3c6b Signed-off-by: SIVA MULLATI --- adreno_snapshot.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/adreno_snapshot.c b/adreno_snapshot.c index c20f700ee3..d25ffeb9f2 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -284,13 +284,13 @@ void adreno_snapshot_dump_all_ibs(struct kgsl_device *device, uint64_t ibsize; if (ADRENO_LEGACY_PM4(adreno_dev)) { - ibaddr = rbptr[index + 1]; - ibsize = rbptr[index + 2]; + ibaddr = rbptr[(index + 1) % KGSL_RB_DWORDS]; + ibsize = rbptr[(index + 2) % KGSL_RB_DWORDS]; index += 3; } else { - ibaddr = rbptr[index + 2]; - ibaddr = ibaddr << 32 | rbptr[index + 1]; - ibsize = rbptr[index + 3]; + ibaddr = rbptr[(index + 2) % KGSL_RB_DWORDS]; + ibaddr = ibaddr << 32 | rbptr[(index + 1) % KGSL_RB_DWORDS]; + ibsize = rbptr[(index + 3) % KGSL_RB_DWORDS]; index += 4; } From 55f94e81cca749d180bf315eef673994bf0a9f1b Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 18 Jun 2024 21:27:51 +0530 Subject: [PATCH 0828/1016] kgsl: gen8: Clear aperture register after mempool dump Currently, aperture register is not cleared after dumping mempool debug data in snapshot. Fix this by clearing the aperture register. Change-Id: I3cb5e82da7ae17196323867561c1d58ec53c7786 Signed-off-by: Kamal Agrawal --- adreno_gen8_snapshot.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 2072d7db01..847b7ce966 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -749,6 +749,9 @@ static void gen8_snapshot_mempool(struct kgsl_device *device, cp_indexed_reg->pipe_id, j, 1); } } + + /* Clear aperture register */ + gen8_host_aperture_set(ADRENO_DEVICE(device), 0, 0, 0); } static u32 gen8_read_dbgahb(struct kgsl_device *device, From a0faa843a0e3de470f27bdcb91dc1c1c930a5a50 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 13 Jun 2024 17:46:39 +0530 Subject: [PATCH 0829/1016] kgsl: gen8: Clear capturescript before each CD invocation Currently, the capturescript accumulates content from previous invocation, leading to repeated execution of the entire script. Ensure that the content from previous invocation is cleared before each execution. This optimization significantly reduces snapshot latency for gen8. Change-Id: I9d8c53c97650d5872f60c223727cc1d1e9ae1046 Signed-off-by: Kamal Agrawal --- adreno_gen8_snapshot.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 847b7ce966..4da7c50d4c 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -896,9 +896,6 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, return; } - /* Build the crash script */ - ptr = gen8_capturescript->hostptr; - for (i = 0; i < num_sptp_clusters; i++) { struct gen8_sptp_cluster_registers *cluster = &sptp_clusters[i]; @@ -922,6 +919,9 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, info.context_id = cluster->context_id; info.offset = offset; + /* Build the crash script */ + ptr = gen8_capturescript->hostptr; + /* Program the aperture */ ptr += CD_WRITE(ptr, GEN8_SP_READ_SEL, GEN8_SP_READ_SEL_VAL (j, cluster->location_id, cluster->pipe_id, @@ -1082,9 +1082,6 @@ static void gen8_snapshot_mvc_regs(struct kgsl_device *device, return; } - /* Build the crash script */ - ptr = gen8_capturescript->hostptr; - for (i = 0; i < num_cluster; i++) { struct gen8_cluster_registers *cluster = &clusters[i]; @@ -1101,6 +1098,9 @@ static void gen8_snapshot_mvc_regs(struct kgsl_device *device, info.slice_id = SLICE_ID(cluster->slice_region, j); info.offset = offset; + /* Build the crash script */ + ptr = gen8_capturescript->hostptr; + ptr += CD_WRITE(ptr, GEN8_CP_APERTURE_CNTL_CD, GEN8_CP_APERTURE_REG_VAL (j, cluster->pipe_id, cluster->cluster_id, cluster->context_id)); @@ -1466,9 +1466,6 @@ static void gen8_reglist_snapshot(struct kgsl_device *device, return; } - /* Build the crash script */ - ptr = (u64 *)gen8_capturescript->hostptr; - for (i = 0; reg_list[i].regs; i++) { struct gen8_reg_list *regs = ®_list[i]; @@ -1478,6 +1475,9 @@ static void gen8_reglist_snapshot(struct kgsl_device *device, for (j = 0; j < slices; j++) { const u32 *regs_ptr = regs->regs; + /* Build the crash script */ + ptr = gen8_capturescript->hostptr; + ptr += CD_WRITE(ptr, GEN8_CP_APERTURE_CNTL_CD, GEN8_CP_APERTURE_REG_VAL (j, 0, 0, 0)); /* Program the SEL_CNTL_CD register appropriately */ From b3f61f90202f84bfd2470350f4a5198acbcd9fda Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Mon, 17 Jun 2024 12:36:59 +0530 Subject: [PATCH 0830/1016] kgsl: gen8: Dump CPR registers in snapshot for gen8_0_x GPUs Dump core power reduction (CPR) registers in snapshot for gen8_0_x GPUs. Change-Id: Icf6e4088ee5f74c4bd87ea897313a078d0b7773b Signed-off-by: Kamal Agrawal --- adreno_gen8_0_0_snapshot.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index cb924cc162..050854acda 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -2056,6 +2056,22 @@ static const u32 gen8_0_0_rscc_rsc_registers[] = { }; static_assert(IS_ALIGNED(sizeof(gen8_0_0_rscc_rsc_registers), 8)); +/* + * Block : ['CPR'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 20 (Regs:479) + */ +static const u32 gen8_0_0_cpr_registers[] = { + 0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c, + 0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850, + 0x26880, 0x2689f, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee, + 0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f, + 0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274c4, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_cpr_registers), 8)); + static const u32 *gen8_0_0_external_core_regs[] = { gen8_0_0_gdpm_lkg_registers, gen8_0_0_gpu_cc_ahb2phy_broadcast_swman_registers, @@ -2067,5 +2083,6 @@ static const u32 *gen8_0_0_external_core_regs[] = { gen8_0_0_gx_clkctl_ahb2phy_swman_registers, gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers, gen8_0_0_gx_clkctl_gx_clkctl_reg_registers, + gen8_0_0_cpr_registers, }; #endif /*_ADRENO_GEN8_0_0_SNAPSHOT_H */ From 390f3ec0b4a2bdad13214f070e20b0de1c09d981 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 13 Jun 2024 18:55:33 +0530 Subject: [PATCH 0831/1016] kgsl: gen8: Fix debug AHB register collection in legacy path Currently, the state type is not specified during debug AHB register collection through the legacy path. Fix this by specifying state type properly. Change-Id: I31549a740c8024587a565c77561bc223e65d0a8d Signed-off-by: Kamal Agrawal --- adreno_gen8_snapshot.c | 1 + 1 file changed, 1 insertion(+) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 4da7c50d4c..a51b71d881 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -884,6 +884,7 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, info.usptp_id = usptp; info.sp_id = sp; info.slice_id = SLICE_ID(cluster->slice_region, j); + info.statetype_id = cluster->statetype; info.cluster_id = cluster->cluster_id; info.context_id = cluster->context_id; kgsl_snapshot_add_section(device, From d3fc2f46100eb365021df533117366246f4d39de Mon Sep 17 00:00:00 2001 From: Archana Sriram Date: Fri, 31 May 2024 15:32:24 +0530 Subject: [PATCH 0832/1016] kgsl: build: Add changes to compile graphics-kernel for parrot Add changes to compile graphics kernel code for parrot. Change-Id: Id21ba484593bf6de4836782cc4021faeeac790c8 Signed-off-by: Archana Sriram Signed-off-by: Himanshu Agrawal --- Kbuild | 3 +++ build/kgsl_defs.bzl | 2 +- config/parrot_consolidate_gpuconf | 11 +++++++++++ config/parrot_perf_gpuconf | 1 + 4 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 config/parrot_consolidate_gpuconf create mode 120000 config/parrot_perf_gpuconf diff --git a/Kbuild b/Kbuild index 9da1a02433..091d00e061 100644 --- a/Kbuild +++ b/Kbuild @@ -67,6 +67,9 @@ endif ifeq ($(CONFIG_ARCH_BENGAL), y) include $(KGSL_PATH)/config/gki_bengal.conf endif +ifeq ($(CONFIG_ARCH_PARROT), y) + include $(KGSL_PATH)/config/gki_parrot.conf +endif ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index 745550fb2f..b969b687e4 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -105,7 +105,7 @@ def external_deps(target, variant): "//vendor/qcom/opensource/synx-kernel:synx_headers" ] - if target in [ "monaco" ]: + if target in [ "monaco", "parrot" ]: deplist = deplist + [ "//vendor/qcom/opensource/mm-drivers/hw_fence:hw_fence_headers" ] diff --git a/config/parrot_consolidate_gpuconf b/config/parrot_consolidate_gpuconf new file mode 100644 index 0000000000..8d287a42ef --- /dev/null +++ b/config/parrot_consolidate_gpuconf @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL=m +CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 +CONFIG_QCOM_KGSL_SORT_POOL=y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y +CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" +CONFIG_QCOM_KGSL_USE_SHMEM=y +CONFIG_QCOM_KGSL_PROCESS_RECLAIM=y diff --git a/config/parrot_perf_gpuconf b/config/parrot_perf_gpuconf new file mode 120000 index 0000000000..9cb6bff14a --- /dev/null +++ b/config/parrot_perf_gpuconf @@ -0,0 +1 @@ +parrot_consolidate_gpuconf \ No newline at end of file From e03ac42159e22d8302a4da09a1fd2cf495720113 Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Tue, 11 Jun 2024 16:22:30 -0700 Subject: [PATCH 0833/1016] kgsl: Add ability to vote for dependent domains for GX levels Modify perf table to add support for new MxG corner for each power levels. Additional voltage corners are being introduced on some targets for MxG, which will not exist on MxA. Repurpose the cx_votes variable to support other dependent domain votes as well. Change-Id: Ifa5b75a1863d329ae07af367756494ec619b8b46 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- adreno_a6xx_rpmh.c | 8 +-- adreno_gen7_rpmh.c | 6 +- adreno_gen8_rpmh.c | 150 +++++++++++++++++++++++++++++---------------- adreno_hfi.h | 4 +- 4 files changed, 107 insertions(+), 61 deletions(-) diff --git a/adreno_a6xx_rpmh.c b/adreno_a6xx_rpmh.c index 9d4a10f4d2..0d451ae3ca 100644 --- a/adreno_a6xx_rpmh.c +++ b/adreno_a6xx_rpmh.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -354,10 +354,10 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); table->gx_votes[0].freq = 0; - table->gx_votes[0].cx_vote = 0; + table->gx_votes[0].dep_vote = 0; /* Disable cx vote in gmu dcvs table if it is not supported in DT */ if (pwr->pwrlevels[0].cx_level == 0xffffffff) - table->gx_votes[0].cx_vote = 0xffffffff; + table->gx_votes[0].dep_vote = 0xffffffff; /* GMU power levels are in ascending order */ for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) { @@ -367,7 +367,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000; ret = to_cx_hlvl(cx_rail, cx_vlvl, - &table->gx_votes[index].cx_vote); + &table->gx_votes[index].dep_vote); if (ret) { dev_err(&gmu->pdev->dev, "Unsupported cx corner: %u\n", cx_vlvl); diff --git a/adreno_gen7_rpmh.c b/adreno_gen7_rpmh.c index be6cac5b7c..88a173bf58 100644 --- a/adreno_gen7_rpmh.c +++ b/adreno_gen7_rpmh.c @@ -350,10 +350,10 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); table->gx_votes[0].freq = 0; - table->gx_votes[0].cx_vote = 0; + table->gx_votes[0].dep_vote = 0; /* Disable cx vote in gmu dcvs table if it is not supported in DT */ if (pwr->pwrlevels[0].cx_level == 0xffffffff) - table->gx_votes[0].cx_vote = 0xffffffff; + table->gx_votes[0].dep_vote = 0xffffffff; /* GMU power levels are in ascending order */ for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) { @@ -363,7 +363,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000; ret = to_cx_hlvl(cx_rail, cx_vlvl, - &table->gx_votes[index].cx_vote); + &table->gx_votes[index].dep_vote); if (ret) { dev_err(device->dev, "Unsupported cx corner: %u\n", cx_vlvl); diff --git a/adreno_gen8_rpmh.c b/adreno_gen8_rpmh.c index 361a436b04..525626740c 100644 --- a/adreno_gen8_rpmh.c +++ b/adreno_gen8_rpmh.c @@ -293,27 +293,64 @@ static int setup_cx_arc_votes(struct gen8_gmu_device *gmu, return ret; } -static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl) +#define GEN8_DEP_VOTE_SET(cx, mx) \ + (FIELD_PREP(GENMASK(31, 14), 0x3FFFF) | \ + FIELD_PREP(GENMASK(13, 8), mx) | \ + FIELD_PREP(GENMASK(7, 0), cx)) + +static int setup_dependency_domain_tbl(u32 *votes, + struct rpmh_arc_vals *dep_rail, struct rpmh_arc_vals *cx_rail, + u16 *vlvl, u32 *cx_vlvl, u32 num_entries) { - u32 i; + u32 cx_vote, mx_vote; + int i, j; - /* - * This means that the Gx level doesn't have a dependency on Cx level. - * Return the same value to disable cx voting at GMU. - */ - if (vlvl == 0xffffffff) { - *hlvl = vlvl; - return 0; - } + for (i = 1; i < num_entries; i++) { + bool found_match = false; - for (i = 0; i < cx_rail->num; i++) { - if (cx_rail->val[i] >= vlvl) { - *hlvl = i; - return 0; + if (cx_vlvl[i] == 0xffffffff) { + /* This means that the Gx level doesn't have a dependency on Cx level */ + cx_vote = 0xff; + found_match = true; + } else { + for (j = 0; j < cx_rail->num; j++) { + if (cx_rail->val[j] >= cx_vlvl[i]) { + cx_vote = j; + found_match = true; + break; + } + } } + + /* If we did not find a matching VLVL level then abort */ + if (!found_match) { + pr_err("kgsl: Unsupported cx corner: %u\n", cx_vlvl[i]); + return -EINVAL; + } + + /* + * Set Mx dependency domain votes for Gx level. Look for indexes + * whose vlvl value is greater than or equal to the vlvl value + * of the corresponding index of dependency rail + */ + for (j = 0; j < dep_rail->num; j++) { + if (dep_rail->val[j] >= vlvl[i]) { + mx_vote = j; + found_match = true; + break; + } + } + + /* If we did not find a matching VLVL level then abort */ + if (!found_match) { + pr_err("kgsl: Unsupported mx corner: %u\n", vlvl[i]); + return -EINVAL; + } + + votes[i] = GEN8_DEP_VOTE_SET(cx_vote, mx_vote); } - return -EINVAL; + return 0; } /* @@ -321,71 +358,77 @@ static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl) * @hfi: Pointer to hfi device * @pri_rail: Pointer to primary power rail vlvl table * @sec_rail: Pointer to second/dependent power rail vlvl table + * @gmxc_rail: Pointer to MxG power rail vlvl table * * This function initializes the gx votes for all gpu frequencies * for gpu dcvs */ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, - struct rpmh_arc_vals *cx_rail) + struct rpmh_arc_vals *gmxc_rail, struct rpmh_arc_vals *cx_rail) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen8_dcvs_table *table = &gmu->dcvs_table; - u32 index; u16 vlvl_tbl[MAX_GX_LEVELS]; + u32 cx_vlvl_tbl[MAX_GX_LEVELS]; u32 gx_votes[MAX_GX_LEVELS]; + u32 dep_votes[MAX_GX_LEVELS]; int ret, i; - if (pwr->num_pwrlevels + 1 > ARRAY_SIZE(vlvl_tbl)) { + table->gpu_level_num = pwr->num_pwrlevels + 1; + + if (table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) { dev_err(device->dev, "Defined more GPU DCVS levels than RPMh can support\n"); return -ERANGE; } - /* Add the zero powerlevel for the perf table */ - table->gpu_level_num = pwr->num_pwrlevels + 1; - + /* Initialize vlvl tables */ memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); + memset(cx_vlvl_tbl, 0, sizeof(cx_vlvl_tbl)); + /* Fill the vlvl tables. GMU power levels are in ascending order */ + for (i = 1; i < table->gpu_level_num; i++) { + vlvl_tbl[i] = pwr->pwrlevels[pwr->num_pwrlevels - i].voltage_level; + cx_vlvl_tbl[i] = pwr->pwrlevels[pwr->num_pwrlevels - i].cx_level; + } + + /* If the target does not have a dedicated Mx rail, use secondary rail */ + if (gmxc_rail == NULL) + ret = setup_volt_dependency_tbl(gx_votes, pri_rail, sec_rail, + vlvl_tbl, table->gpu_level_num); + else + ret = setup_volt_dependency_tbl(gx_votes, pri_rail, gmxc_rail, + vlvl_tbl, table->gpu_level_num); + if (ret) + return ret; + + ret = setup_dependency_domain_tbl(dep_votes, sec_rail, cx_rail, + vlvl_tbl, cx_vlvl_tbl, table->gpu_level_num); + if (ret) + return ret; + + /* Populate DCVS table with all the votes */ + for (i = 1; i < table->gpu_level_num; i++) { + table->gx_votes[i].freq = pwr->pwrlevels[pwr->num_pwrlevels - i].gpu_freq / 1000; + table->gx_votes[i].vote = gx_votes[i]; + table->gx_votes[i].dep_vote = dep_votes[i]; + } + + /* Add the zero powerlevel for the perf table */ table->gx_votes[0].freq = 0; - table->gx_votes[0].cx_vote = 0; - /* Disable cx vote in gmu dcvs table if it is not supported in DT */ - if (pwr->pwrlevels[0].cx_level == 0xffffffff) - table->gx_votes[0].cx_vote = 0xffffffff; - - /* GMU power levels are in ascending order */ - for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) { - u32 cx_vlvl = pwr->pwrlevels[i].cx_level; - - vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level; - table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000; - - ret = to_cx_hlvl(cx_rail, cx_vlvl, - &table->gx_votes[index].cx_vote); - if (ret) { - dev_err(device->dev, "Unsupported cx corner: %u\n", - cx_vlvl); - return ret; - } - } - - ret = setup_volt_dependency_tbl(gx_votes, pri_rail, - sec_rail, vlvl_tbl, table->gpu_level_num); - if (!ret) { - for (i = 0; i < table->gpu_level_num; i++) - table->gx_votes[i].vote = gx_votes[i]; - } + table->gx_votes[0].vote = 0; + table->gx_votes[0].dep_vote = 0xFFFFFFFF; return ret; - } static int build_dcvs_table(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - struct rpmh_arc_vals gx_arc, cx_arc, mx_arc; + struct rpmh_arc_vals gx_arc, cx_arc, mx_arc, gmxc_arc; int ret; ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl"); @@ -406,12 +449,15 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) /* If the target supports dedicated MxC rail, read the same */ if (cmd_db_read_addr("gmxc.lvl")) { - ret = rpmh_arc_cmds(&mx_arc, "gmxc.lvl"); + ret = rpmh_arc_cmds(&gmxc_arc, "gmxc.lvl"); if (ret) return ret; + ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &gmxc_arc, &cx_arc); + } else { + ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, NULL, &cx_arc); } - return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &cx_arc); + return ret; } /* diff --git a/adreno_hfi.h b/adreno_hfi.h index f48529b73d..b7f1c26cb3 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -541,8 +541,8 @@ struct hfi_bwtable_cmd { struct opp_gx_desc { u32 vote; - /* This is 'acdLvl' in gmu fw which is now repurposed for cx vote */ - u32 cx_vote; + /* This is 'acdLvl' in gmu fw which is now repurposed for various dependency votes */ + u32 dep_vote; u32 freq; } __packed; From b9ccedb6eee8680350a677549ed351cf0aafcb15 Mon Sep 17 00:00:00 2001 From: Hemasri Yallanki Date: Mon, 27 May 2024 11:11:19 +0530 Subject: [PATCH 0834/1016] kgsl: gen8: Remove DBGC register programming during GPU boot During GPU boot-up, there is no need to write DBGC registers. Thus, remove them from non-context register list for gen8_3_0 . Change-Id: I39efc028192ef9a0771f13c529e1a78a0ef419f3 Signed-off-by: Hemasri Yallanki --- adreno-gpulist.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index b3eb36eb89..f11c77ba1b 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2945,12 +2945,6 @@ static const struct kgsl_regmap_list gen8_3_0_gbif_cx_regs[] = { /* GEN8_3_0 noncontext register list */ static const struct gen8_nonctxt_regs gen8_3_0_nonctxt_regs[] = { { GEN8_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_CNTLT, 0xf0004000, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_MASKL_0, 0x00000003, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0xffffffff, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0xffffffff, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_BYTEL_0, 0x00000008, BIT(PIPE_NONE) }, - { GEN8_DBGC_CFG_DBGBUS_BYTEL_1, 0x76543210, BIT(PIPE_NONE) }, { GEN8_GRAS_DBG_ECO_CNTL, 0x00f80800, BIT(PIPE_BV) | BIT(PIPE_BR) }, { GEN8_PC_AUTO_VERTEX_STRIDE, 0x00000001, BIT(PIPE_BV) | BIT(PIPE_BR) }, { GEN8_PC_VIS_STREAM_CNTL, 0x10010000, BIT(PIPE_BV) | BIT(PIPE_BR) }, From ff8057f0006429267651dc09c95ff1bf01471da3 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Mon, 6 May 2024 14:15:49 +0530 Subject: [PATCH 0835/1016] kgsl: gen7: Fix GEN7 register programming Currently, the BV pipe of the GEN7_GRAS_NC_MODE_CNTL register is not programmed correctly. Due to this it shows reset value. To fix this, program both the BR and BV pipes using the GEN7_CP_APERTURE_CNTL_HOST then, write to the GEN7_GRAS_NC_MODE_CNTL register for BR and BV respectively. For gen7 there are pipe specific registers which need to be part of the static powerup list for CP to restore at IFPC interval. Hence add support to include all the pipe register to external powerup reglist. Change-Id: I756b0fb38cf4b808203ee4bc5a00c08884ce2c4e Signed-off-by: Sanjay Yadav --- adreno.h | 6 ++ adreno_gen7.c | 165 +++++++++++++++++++++++++++++++++++++++++--------- adreno_gen7.h | 14 +++++ 3 files changed, 157 insertions(+), 28 deletions(-) diff --git a/adreno.h b/adreno.h index b55c4c58cf..339f2be92e 100644 --- a/adreno.h +++ b/adreno.h @@ -1308,6 +1308,12 @@ static inline int adreno_is_gen8_0_x_family(struct adreno_device *adreno_dev) adreno_is_gen8_4_0(adreno_dev); } +/* Gen7 target which does not support concurrent binning */ +static inline int adreno_is_gen7_no_cb_family(struct adreno_device *adreno_dev) +{ + return adreno_is_gen7_14_0(adreno_dev); +} + /* * adreno_checkreg_off() - Checks the validity of a register enum * @adreno_dev: Pointer to adreno device diff --git a/adreno_gen7.c b/adreno_gen7.c index d0108ee562..f65aab31d3 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -35,8 +35,6 @@ static const u32 gen7_pwrup_reglist[] = { GEN7_UCHE_CACHE_WAYS, GEN7_UCHE_MODE_CNTL, GEN7_RB_NC_MODE_CNTL, - GEN7_RB_CMP_DBG_ECO_CNTL, - GEN7_GRAS_NC_MODE_CNTL, GEN7_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, GEN7_UCHE_GBIF_GX_CONFIG, GEN7_UCHE_CLIENT_PF, @@ -55,9 +53,7 @@ static const u32 gen7_0_0_pwrup_reglist[] = { GEN7_UCHE_CACHE_WAYS, GEN7_UCHE_MODE_CNTL, GEN7_RB_NC_MODE_CNTL, - GEN7_RB_CMP_DBG_ECO_CNTL, GEN7_SP_NC_MODE_CNTL, - GEN7_GRAS_NC_MODE_CNTL, GEN7_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, GEN7_UCHE_GBIF_GX_CONFIG, GEN7_UCHE_CLIENT_PF, @@ -183,6 +179,31 @@ static const u32 gen7_9_x_ifpc_pwrup_reglist[] = { GEN7_TPL1_BICUBIC_WEIGHTS_TABLE_4, }; +static const struct gen7_pwrup_extlist gen7_pwrup_extlist_cb[] = { + { GEN7_GRAS_NC_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, + { GEN7_RB_CMP_DBG_ECO_CNTL, BIT(PIPE_BR)}, +}; + +static const u32 gen7_pwrup_reglist_no_cb[] = { + GEN7_UCHE_TRAP_BASE_LO, + GEN7_UCHE_TRAP_BASE_HI, + GEN7_UCHE_WRITE_THRU_BASE_LO, + GEN7_UCHE_WRITE_THRU_BASE_HI, + GEN7_UCHE_GMEM_RANGE_MIN_LO, + GEN7_UCHE_GMEM_RANGE_MIN_HI, + GEN7_UCHE_GMEM_RANGE_MAX_LO, + GEN7_UCHE_GMEM_RANGE_MAX_HI, + GEN7_UCHE_CACHE_WAYS, + GEN7_UCHE_MODE_CNTL, + GEN7_RB_NC_MODE_CNTL, + GEN7_RB_CMP_DBG_ECO_CNTL, + GEN7_GRAS_NC_MODE_CNTL, + GEN7_SP_NC_MODE_CNTL, + GEN7_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, + GEN7_UCHE_GBIF_GX_CONFIG, + GEN7_UCHE_CLIENT_PF, +}; + static int acd_calibrate_set(void *data, u64 val) { struct kgsl_device *device = data; @@ -425,6 +446,41 @@ void gen7_get_gpu_feature_info(struct adreno_device *adreno_dev) adreno_dev->feature_fuse = feature_fuse; } +void gen7_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id) +{ + struct gen7_device *gen7_dev = container_of(adreno_dev, + struct gen7_device, adreno_dev); + u32 aperture_val = (FIELD_PREP(GENMASK(13, 12), pipe_id)); + + /* Check if we already set the aperture */ + if (gen7_dev->aperture == aperture_val) + return; + + kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN7_CP_APERTURE_CNTL_HOST, aperture_val); + /* Make sure the write finishes for respective pipe_id */ + mb(); + + gen7_dev->aperture = aperture_val; +} + +static inline void gen7_regwrite_aperture(struct kgsl_device *device, + u32 offsetwords, u32 value, u32 pipe) +{ + gen7_host_aperture_set(ADRENO_DEVICE(device), pipe); + + kgsl_regmap_write(&device->regmap, value, offsetwords); + /* Ensure that the previous register write has occurred */ + mb(); +} + +void gen7_regread_aperture(struct kgsl_device *device, + u32 offsetwords, u32 *value, u32 pipe) +{ + gen7_host_aperture_set(ADRENO_DEVICE(device), pipe); + + *value = kgsl_regmap_read(&device->regmap, offsetwords); +} + #define GEN7_PROTECT_DEFAULT (BIT(0) | BIT(1) | BIT(3)) static void gen7_protect_init(struct adreno_device *adreno_dev) { @@ -520,14 +576,16 @@ static void gen7_hwcg_set(struct adreno_device *adreno_dev, bool on) static void gen7_patch_pwrup_reglist(struct adreno_device *adreno_dev) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_device *gen7_dev = container_of(adreno_dev, struct gen7_device, adreno_dev); struct adreno_reglist_list reglist[3]; void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; - u32 items = 0, i, j; + u32 items = 0, i, j, pipe_id; u32 *dest = ptr + sizeof(*lock); - /* Static IFPC-only registers */ - if (adreno_is_gen7_0_x_family(adreno_dev) || adreno_is_gen7_14_0(adreno_dev)) { + /* Static IFPC restore only registers */ + if (adreno_is_gen7_0_x_family(adreno_dev) || adreno_is_gen7_no_cb_family(adreno_dev)) { reglist[items].regs = gen7_0_0_ifpc_pwrup_reglist; reglist[items].count = ARRAY_SIZE(gen7_0_0_ifpc_pwrup_reglist); } else { @@ -545,7 +603,10 @@ static void gen7_patch_pwrup_reglist(struct adreno_device *adreno_dev) } /* Static IFPC + preemption registers */ - if (adreno_is_gen7_0_x_family(adreno_dev) || adreno_is_gen7_14_0(adreno_dev)) { + if (adreno_is_gen7_no_cb_family(adreno_dev)) { + reglist[items].regs = gen7_pwrup_reglist_no_cb; + reglist[items].count = ARRAY_SIZE(gen7_pwrup_reglist_no_cb); + } else if (adreno_is_gen7_0_x_family(adreno_dev)) { reglist[items].regs = gen7_0_0_pwrup_reglist; reglist[items].count = ARRAY_SIZE(gen7_0_0_pwrup_reglist); } else { @@ -564,13 +625,13 @@ static void gen7_patch_pwrup_reglist(struct adreno_device *adreno_dev) for (j = 0; j < reglist[i].count; j++) { *dest++ = r[j]; - kgsl_regread(KGSL_DEVICE(adreno_dev), r[j], dest++); + kgsl_regread(device, r[j], dest++); } } /* * The overall register list is composed of - * 1. Static IFPC-only registers + * 1. Static IFPC restore only registers * 2. Static IFPC + preemption registers * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) * @@ -582,8 +643,39 @@ static void gen7_patch_pwrup_reglist(struct adreno_device *adreno_dev) * dynamic list with triplets as * (
), and the length is * stored as number for triplets in dynamic_list_len. + * + * Starting with Gen7, some of the registers that are initialized statically + * by the kernel are pipe-specific. Because only the dynamic list is able to + * support specifying a pipe ID, these registers are bundled along with any + * dynamic entries such as perf counter selects into a single dynamic list. */ - lock->dynamic_list_len = 0; + gen7_dev->ext_pwrup_list_len = 0; + + /* + * Write external pipe specific regs (
- triplets) + * offset and the current value into GPU buffer + */ + if (adreno_is_gen7_no_cb_family(adreno_dev)) { + lock->dynamic_list_len = 0; + return; + } + + for (pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) { + for (i = 0; i < ARRAY_SIZE(gen7_pwrup_extlist_cb); i++) { + unsigned long pipe = (unsigned long)gen7_pwrup_extlist_cb[i].pipelines; + + if (!test_bit(pipe_id, &pipe)) + continue; + + *dest++ = FIELD_PREP(GENMASK(13, 12), pipe_id); + *dest++ = gen7_pwrup_extlist_cb[i].offset; + gen7_regread_aperture(device, gen7_pwrup_extlist_cb[i].offset, + dest++, pipe_id); + gen7_dev->ext_pwrup_list_len++; + } + } + + lock->dynamic_list_len = gen7_dev->ext_pwrup_list_len; } /* _llc_configure_gpu_scid() - Program the sub-cache ID for all GPU blocks */ @@ -682,6 +774,7 @@ int gen7_start(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev); u32 mal, mode = 0, rgb565_predicator = 0; + struct gen7_device *gen7_dev = container_of(adreno_dev, struct gen7_device, adreno_dev); /* * HBB values 13 to 16 can represented LSB of HBB from 0 to 3. * Any HBB value beyond 16 needs programming MSB of HBB. @@ -744,7 +837,7 @@ int gen7_start(struct adreno_device *adreno_dev) * CP takes care of the restore during IFPC exit. We need to restore at slumber * boundary as well */ - if (pwrup_lock->dynamic_list_len > 0) + if (pwrup_lock->dynamic_list_len - gen7_dev->ext_pwrup_list_len > 0) kgsl_regwrite(device, GEN7_RBBM_PERFCTR_CNTL, 0x1); /* Turn on the IFPC counter (countable 4 on XOCLK4) */ @@ -797,9 +890,19 @@ int gen7_start(struct adreno_device *adreno_dev) ((mal == 64) ? BIT(3) : 0) | FIELD_PREP(GENMASK(2, 1), hbb_lo)); - kgsl_regwrite(device, GEN7_GRAS_NC_MODE_CNTL, - FIELD_PREP(GENMASK(8, 5), - (adreno_dev->highest_bank_bit - 13))); + if (!adreno_is_gen7_no_cb_family(adreno_dev)) { + gen7_regwrite_aperture(device, GEN7_GRAS_NC_MODE_CNTL, FIELD_PREP(GENMASK(8, 5), + (adreno_dev->highest_bank_bit - 13)), PIPE_BR); + + gen7_regwrite_aperture(device, GEN7_GRAS_NC_MODE_CNTL, FIELD_PREP(GENMASK(8, 5), + (adreno_dev->highest_bank_bit - 13)), PIPE_BV); + + /* Clear aperture register */ + gen7_host_aperture_set(adreno_dev, 0); + } else { + kgsl_regwrite(device, GEN7_GRAS_NC_MODE_CNTL, FIELD_PREP(GENMASK(8, 5), + (adreno_dev->highest_bank_bit - 13))); + } kgsl_regwrite(device, GEN7_UCHE_MODE_CNTL, ((mal == 64) ? BIT(23) : 0) | @@ -1763,22 +1866,24 @@ int gen7_perfcounter_remove(struct adreno_device *adreno_dev, struct adreno_perfcount_register *reg, u32 groupid) { const struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + struct gen7_device *gen7_dev = container_of(adreno_dev, struct gen7_device, adreno_dev); const struct adreno_perfcount_group *group; void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; - u32 *data = ptr + sizeof(*lock); - int offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; - int i, second_last_offset, last_offset; bool remove_counter = false; - u32 pipe = FIELD_PREP(GENMASK(13, 12), _get_pipeid(groupid)); + u32 offset = ((lock->ifpc_list_len + lock->preemption_list_len) * 2) + + (gen7_dev->ext_pwrup_list_len * 3); + int i, last_offset, second_last_offset; + u32 *data = ptr + sizeof(*lock), pipe = FIELD_PREP(GENMASK(13, 12), _get_pipeid(groupid)); + u16 perfcntr_list_len = lock->dynamic_list_len - gen7_dev->ext_pwrup_list_len; - if (!lock->dynamic_list_len) + if (!perfcntr_list_len) return -EINVAL; group = &(counters->groups[groupid]); if (!(group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE)) { - if (lock->dynamic_list_len != 1) + if (perfcntr_list_len != 1) return 0; if (kgsl_hwlock(lock)) { @@ -1831,9 +1936,9 @@ disable_perfcounter: * If dynamic list length is 1 and no_restore_count is 0, then we can remove the * only entry in the list, which is the GEN7_RBBM_PERFCTRL_CNTL. */ - if (lock->dynamic_list_len == 1 && !adreno_dev->no_restore_count) { + if (perfcntr_list_len == 1 && !adreno_dev->no_restore_count) { memset(&data[offset], 0, 3 * sizeof(u32)); - lock->dynamic_list_len = 0; + lock->dynamic_list_len = gen7_dev->ext_pwrup_list_len; } kgsl_hwunlock(lock); @@ -1843,14 +1948,18 @@ disable_perfcounter: int gen7_perfcounter_update(struct adreno_device *adreno_dev, struct adreno_perfcount_register *reg, bool update_reg, u32 pipe, unsigned long flags) { + struct gen7_device *gen7_dev = container_of(adreno_dev, struct gen7_device, adreno_dev); void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; + u32 offset = ((lock->ifpc_list_len + lock->preemption_list_len) * 2) + + (gen7_dev->ext_pwrup_list_len * 3); u32 *data = ptr + sizeof(*lock); - int i, offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; + int i; + u16 perfcntr_list_len = lock->dynamic_list_len - gen7_dev->ext_pwrup_list_len; bool select_reg_present = false; if (flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) { - for (i = 0; i < lock->dynamic_list_len; i++) { + for (i = 0; i < perfcntr_list_len; i++) { if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { select_reg_present = true; break; @@ -1861,7 +1970,7 @@ int gen7_perfcounter_update(struct adreno_device *adreno_dev, offset += 3; } - } else if (lock->dynamic_list_len) { + } else if (perfcntr_list_len) { goto update; } @@ -1882,8 +1991,8 @@ int gen7_perfcounter_update(struct adreno_device *adreno_dev, } /* Initialize the lock->dynamic_list_len to account for GEN7_RBBM_PERFCTR_CNTL */ - if (!lock->dynamic_list_len) - lock->dynamic_list_len = 1; + if (!perfcntr_list_len) + lock->dynamic_list_len = gen7_dev->ext_pwrup_list_len + 1; /* * For all targets GEN7_RBBM_PERFCTR_CNTL needs to be the last entry, diff --git a/adreno_gen7.h b/adreno_gen7.h index 74d082eca0..21c276af7f 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -41,6 +41,20 @@ struct gen7_device { struct gen7_gmu_device gmu; /** @adreno_dev: Container for the generic adreno device */ struct adreno_device adreno_dev; + /** @aperture: The last value that the host aperture register was programmed to */ + u32 aperture; + /** @ext_pwrup_list_len: External pwrup reglist length */ + u16 ext_pwrup_list_len; +}; + +/** + * struct gen7_pwrup_extlist - container for powerup external reglist + */ +struct gen7_pwrup_extlist { + /** offset: Dword offset of the register to write */ + u32 offset; + /** pipelines: pipelines to write */ + u32 pipelines; }; /** From 4141aa9238ae4885aae156694b641b518af3e9c5 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 26 Jun 2024 00:00:46 +0530 Subject: [PATCH 0836/1016] kgsl: hwfence: Update adreno_ext_hw_fence_signal print string adreno_input_hw_fence event uses 'ctx' key for draw object context identifier and 'id' for fence context. Use the same key string in adreno_ext_hw_fence_signal event to avoid confusion. Change-Id: I47f0c04958b1b32ded95392a4f06b9f2fc7701e2 Signed-off-by: Kamal Agrawal --- adreno_trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno_trace.h b/adreno_trace.h index 86c4da7345..16974a83f7 100644 --- a/adreno_trace.h +++ b/adreno_trace.h @@ -968,7 +968,7 @@ TRACE_EVENT(adreno_ext_hw_fence_signal, __entry->flags = flags; __entry->ticks = gmu_ticks; ), - TP_printk("ctx=%llu seqno=%llu flags=0x%x ticks=%llu", + TP_printk("id=%llu seqno=%llu flags=0x%x ticks=%llu", __entry->context, __entry->seq_no, __entry->flags, __entry->ticks ) ); From 094986bb2fc5d6b4e554bac6649296c03eea3d41 Mon Sep 17 00:00:00 2001 From: Archana Sriram Date: Fri, 14 Jun 2024 18:17:36 +0530 Subject: [PATCH 0837/1016] kgsl: gen7: Add functions to check supported features in GMU Add functions to check if: 1) Chipid should be passed to GMU instead of GPU REV 2) AB voting is supported through GMU. Change-Id: Ib477cf1b5c7cc033dad306b19b36b042a1ba6f16 Signed-off-by: Archana Sriram --- adreno_gen7_gmu.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index f4c1e80a3b..d65311dad5 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -59,6 +59,28 @@ static struct gmu_vma_entry gen7_gmu_vma[] = { }, }; +/** + * adreno_gmu_chipid_based() - Return true for targets where Chipid + * should be passed to GMU instead of GPU REV + * @adreno_dev: A pointer to the adreno_device + */ +static inline int adreno_gmu_chipid_based(struct adreno_device *adreno_dev) +{ + return adreno_is_gen7_0_0(adreno_dev) || adreno_is_gen7_0_1(adreno_dev) || + adreno_is_gen7_4_0(adreno_dev) || adreno_is_gen7_3_0(adreno_dev); +} + +/** + * adreno_gmu_ab_support() - Return true for targets where AB voting + * is supported through GMU + * @adreno_dev: A pointer to the adreno_device + */ +static inline int adreno_gmu_ab_support(struct adreno_device *adreno_dev) +{ + return adreno_is_gen7_9_0(adreno_dev) || adreno_is_gen7_9_1(adreno_dev) || + adreno_is_gen7_11_0(adreno_dev); +} + static ssize_t log_stream_enable_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { @@ -987,6 +1009,8 @@ void gen7_gmu_register_config(struct adreno_device *adreno_dev) /* Pass chipid to GMU FW, must happen before starting GMU */ gmu_core_regwrite(device, GEN7_GMU_GENERAL_10, + adreno_gmu_chipid_based(adreno_dev) ? + ADRENO_GMU_CHIPID(adreno_dev->chipid) : ADRENO_GMU_REV(ADRENO_GPUREV(adreno_dev))); /* Log size is encoded in (number of 4K units - 1) */ @@ -2036,7 +2060,8 @@ static int gen7_gmu_first_boot(struct adreno_device *adreno_dev) if (ret) goto err; - if (gen7_hfi_send_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE, 0) == 1 && + if (adreno_gmu_ab_support(adreno_dev) && + gen7_hfi_send_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE, 0) == 1 && !WARN_ONCE(!adreno_dev->gpucore->num_ddr_channels, "Number of DDR channel is not specified in gpu core")) { adreno_dev->gmu_ab = true; From f268a3badf7770033125cd1b999c82c0c1625b2b Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Wed, 29 May 2024 15:38:54 -0700 Subject: [PATCH 0838/1016] kgsl: gen8: Rearrange some GMU register dumping in snapshot 1. Ensure GX GDSC is ON to dump GMU registers on GX headswitch 2. Check for RPMH state to make sure GPU is active when dumping the registers. 3. CX debugbus does not need GX ON, move it to its own function so that it can be called independently. 4. Move the GX debugbus and tracebuffer under the GX check to ensure we do not read/write these registers without checking for GX ON. Change-Id: I711df499a2499e7114044a9ef37b55c563f2939c Signed-off-by: Urvashi Agrawal --- adreno_gen8.c | 4 +- adreno_gen8_0_0_snapshot.h | 21 ++++--- adreno_gen8_gmu.c | 11 ++++ adreno_gen8_gmu.h | 7 +++ adreno_gen8_gmu_snapshot.c | 3 +- adreno_gen8_snapshot.c | 125 ++++++++++++++++++++----------------- 6 files changed, 101 insertions(+), 70 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index c949b539c5..442ec114a3 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -2870,7 +2870,7 @@ static void gen8_lpac_fault_header(struct adreno_device *adreno_dev, pr_context(device, drawobj->context, "lpac cmdline: %s\n", drawctxt->base.proc_priv->cmdline); - if (!gx_on) + if (!gen8_gmu_rpmh_pwr_state_is_active(device) || !gx_on) goto done; kgsl_regread(device, GEN8_RBBM_LPAC_STATUS, &status); @@ -2926,7 +2926,7 @@ static void gen8_fault_header(struct adreno_device *adreno_dev, drawctxt->base.proc_priv->cmdline); } - if (!gx_on) + if (!gen8_gmu_rpmh_pwr_state_is_active(device) || !gx_on) goto done; kgsl_regread(device, GEN8_RBBM_STATUS, &status); diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index 050854acda..f5f04344ec 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -1883,11 +1883,6 @@ static struct gen8_reg_list gen8_0_0_ahb_registers[] = { { UNSLICE, gen8_0_0_ahb_secure_gpu_registers }, }; -static struct gen8_reg_list gen8_gmu_gx_registers[] = { - { UNSLICE, gen8_0_0_gmugx_registers }, - { SLICE, gen8_0_0_gmugx_slice_registers }, -}; - /* * Block : ['GDPM_LKG'] * REGION : UNSLICE @@ -2078,11 +2073,17 @@ static const u32 *gen8_0_0_external_core_regs[] = { gen8_0_0_gpu_cc_ahb2phy_swman_registers, gen8_0_0_gpu_cc_gpu_cc_reg_registers, gen8_0_0_gpu_cc_pll0_cm_pll_taycan_common_registers, - gen8_0_0_acd_acd_mnd_registers, - gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers, - gen8_0_0_gx_clkctl_ahb2phy_swman_registers, - gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers, - gen8_0_0_gx_clkctl_gx_clkctl_reg_registers, gen8_0_0_cpr_registers, }; + +static struct gen8_reg_list gen8_gmu_gx_registers[] = { + { UNSLICE, gen8_0_0_gmugx_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_swman_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_gx_clkctl_reg_registers }, + { UNSLICE, gen8_0_0_acd_acd_mnd_registers }, + { SLICE, gen8_0_0_gmugx_slice_registers }, +}; + #endif /*_ADRENO_GEN8_0_0_SNAPSHOT_H */ diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index de6fcc89b3..291fb52822 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -684,6 +684,14 @@ bool gen8_gmu_gx_is_on(struct adreno_device *adreno_dev) return is_on(val); } +bool gen8_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device) +{ + u32 val; + + gmu_core_regread(device, GEN8_GMUCX_RPMH_POWER_STATE, &val); + return (val == GPU_HW_ACTIVE) ? true : false; +} + static const char *idle_level_name(int level) { if (level == GPU_HW_ACTIVE) @@ -855,6 +863,9 @@ void gen8_gmu_register_config(struct adreno_device *adreno_dev) /* Clear any previously set cm3 fault */ atomic_set(&gmu->cm3_fault, 0); + /* Init the power state register before GMU turns on GX */ + gmu_core_regwrite(device, GEN8_GMUCX_RPMH_POWER_STATE, 0xF); + /* Vote veto for FAL10 */ gmu_core_regwrite(device, GEN8_GMUCX_CX_FALNEXT_INTF, 0x1); gmu_core_regwrite(device, GEN8_GMUCX_CX_FAL_INTF, 0x1); diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index 73f9a2d6c2..d4831f3c4d 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -488,4 +488,11 @@ u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab); */ int gen8_gmu_clock_set_rate(struct adreno_device *adreno_dev, u32 req_freq); +/** + * gen8_gmu_rpmh_pwr_state_is_active - Check the state of GPU HW + * @device: Pointer to the kgsl device + * + * Returns true on active or false otherwise + */ +bool gen8_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device); #endif diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index 4619c1f26d..7c3bd4e536 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -207,7 +207,8 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, adreno_snapshot_registers_v2, (void *) gen8_snapshot_block_list->gmu_cx_unsliced_regs); - if (!gen8_gmu_gx_is_on(adreno_dev)) + if (!gen8_gmu_rpmh_pwr_state_is_active(device) || + !gen8_gmu_gx_is_on(adreno_dev)) goto dtcm; /* Set fence to ALLOW mode so registers can be read */ diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index a51b71d881..1e0fe73ae8 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -219,8 +219,9 @@ size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, if (info->regs->sel) kgsl_regwrite(device, info->regs->sel->host_reg, info->regs->sel->val); - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL - (info->slice_id, 0, 0, 0)); + if (info->regs->slice_region) + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL + (info->slice_id, 0, 0, 0)); /* Make sure the previous writes are posted before reading */ mb(); @@ -1328,6 +1329,61 @@ static size_t gen8_snapshot_cx_side_dbgc_debugbus_block(struct kgsl_device *devi return size; } +static void gen8_snapshot_cx_debugbus(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + u32 i; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, + FIELD_PREP(GENMASK(27, 24), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, + FIELD_PREP(GENMASK(3, 0), 0x0) | + FIELD_PREP(GENMASK(7, 4), 0x1) | + FIELD_PREP(GENMASK(11, 8), 0x2) | + FIELD_PREP(GENMASK(15, 12), 0x3) | + FIELD_PREP(GENMASK(19, 16), 0x4) | + FIELD_PREP(GENMASK(23, 20), 0x5) | + FIELD_PREP(GENMASK(27, 24), 0x6) | + FIELD_PREP(GENMASK(31, 28), 0x7)); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, + FIELD_PREP(GENMASK(3, 0), 0x8) | + FIELD_PREP(GENMASK(7, 4), 0x9) | + FIELD_PREP(GENMASK(11, 8), 0xa) | + FIELD_PREP(GENMASK(15, 12), 0xb) | + FIELD_PREP(GENMASK(19, 16), 0xc) | + FIELD_PREP(GENMASK(23, 20), 0xd) | + FIELD_PREP(GENMASK(27, 24), 0xe) | + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); + + /* Dump the CX debugbus data if the block exists */ + if (!kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) + return; + + for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen8_snapshot_cx_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + } +} + /* gen8_snapshot_debugbus() - Capture debug bus data */ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) @@ -1370,41 +1426,6 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0); kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, - FIELD_PREP(GENMASK(31, 28), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, - FIELD_PREP(GENMASK(27, 24), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, - FIELD_PREP(GENMASK(3, 0), 0x0) | - FIELD_PREP(GENMASK(7, 4), 0x1) | - FIELD_PREP(GENMASK(11, 8), 0x2) | - FIELD_PREP(GENMASK(15, 12), 0x3) | - FIELD_PREP(GENMASK(19, 16), 0x4) | - FIELD_PREP(GENMASK(23, 20), 0x5) | - FIELD_PREP(GENMASK(27, 24), 0x6) | - FIELD_PREP(GENMASK(31, 28), 0x7)); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, - FIELD_PREP(GENMASK(3, 0), 0x8) | - FIELD_PREP(GENMASK(7, 4), 0x9) | - FIELD_PREP(GENMASK(11, 8), 0xa) | - FIELD_PREP(GENMASK(15, 12), 0xb) | - FIELD_PREP(GENMASK(19, 16), 0xc) | - FIELD_PREP(GENMASK(23, 20), 0xd) | - FIELD_PREP(GENMASK(27, 24), 0xe) | - FIELD_PREP(GENMASK(31, 28), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); - for (i = 0; i < gen8_snapshot_block_list->debugbus_blocks_len; i++) { kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUGBUS, @@ -1426,20 +1447,6 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, snapshot, gen8_snapshot_dbgc_side_debugbus_block, (void *) &gen8_snapshot_block_list->gbif_debugbus_blocks[i]); } - - /* Dump the CX debugbus data if the block exists */ - if (kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) { - for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_DEBUGBUS, - snapshot, gen8_snapshot_cx_dbgc_debugbus_block, - (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, - snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, - (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); - } - } } static void gen8_reglist_snapshot(struct kgsl_device *device, @@ -1547,7 +1554,8 @@ static void gen8_cx_misc_regs_snapshot(struct kgsl_device *device, u64 *ptr, offset = 0; const u32 *regs_ptr = (const u32 *)gen8_snapshot_block_list->cx_misc_regs; - if (CD_SCRIPT_CHECK(device) || !adreno_gx_is_on(ADRENO_DEVICE(device))) + if (CD_SCRIPT_CHECK(device) || !gen8_gmu_rpmh_pwr_state_is_active(device) + || !gen8_gmu_gx_is_on(ADRENO_DEVICE(device))) goto legacy_snapshot; /* Build the crash script */ @@ -1624,15 +1632,18 @@ void gen8_snapshot(struct adreno_device *adreno_dev, if (!gmu_core_isenabled(device)) gen8_snapshot_external_core_regs(device, snapshot); + gen8_cx_misc_regs_snapshot(device, snapshot); + + gen8_snapshot_cx_debugbus(adreno_dev, snapshot); + + if (!gen8_gmu_rpmh_pwr_state_is_active(device) || + !gen8_gmu_gx_is_on(adreno_dev)) + return; + gen8_snapshot_trace_buffer(device, snapshot); gen8_snapshot_debugbus(adreno_dev, snapshot); - gen8_cx_misc_regs_snapshot(device, snapshot); - - if (!adreno_gx_is_on(adreno_dev)) - return; - is_current_rt = rt_task(current); if (is_current_rt) From c98badf88c4595aab6c3a8362f9c6b068e453059 Mon Sep 17 00:00:00 2001 From: NISARG SHETH Date: Fri, 7 Jun 2024 15:18:02 +0530 Subject: [PATCH 0839/1016] kgsl: Enable AHB timeout detection for gen_8_4_0 Define noc_timeout_us for gen_8_4_0 GPU to enable AHB timeout detection Change-Id: I175539ca4be961fd268cb03e00c342e30a3b0864 Signed-off-by: NISARG SHETH --- adreno-gpulist.h | 1 + 1 file changed, 1 insertion(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index f11c77ba1b..6522fadceb 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2930,6 +2930,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), .bcl_data = 1, .ctxt_record_size = (13536 * SZ_1K), + .noc_timeout_us = 3410, /* 3.41 msec */ }; extern const struct gen8_snapshot_block_list gen8_3_0_snapshot_block_list; From c0b731371a77abd3dcefed85e79e0918bf324140 Mon Sep 17 00:00:00 2001 From: Archana Sriram Date: Sun, 9 Jun 2024 20:58:34 +0530 Subject: [PATCH 0840/1016] kgsl: gen7: Add support for gen7_3_0 GPU Add changes to support gen7_3_0 GPU. Change-Id: I5115a46616ff0022578a98e70754405564e30bf1 Signed-off-by: Archana Sriram Signed-off-by: Himanshu Agrawal --- adreno-gpulist.h | 96 +++++++++++ adreno.h | 8 +- adreno_gen7.c | 79 +++++---- adreno_gen7_3_0_snapshot.h | 342 +++++++++++++++++++++++++++++++++++++ adreno_gen7_snapshot.c | 36 +++- 5 files changed, 518 insertions(+), 43 deletions(-) create mode 100644 adreno_gen7_3_0_snapshot.h diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 6522fadceb..0d7b54c6c2 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2059,6 +2059,16 @@ static const struct kgsl_regmap_list gen7_2_0_gbif_regs[] = { { GEN7_GMU_CX_MRC_GBIF_QOS_CTRL, 0x33 }, }; +extern const struct gen7_snapshot_block_list gen7_3_0_snapshot_block_list; + +static const struct kgsl_regmap_list gen7_3_0_gbif_regs[] = { + { GEN7_GBIF_QSB_SIDE0, 0x00071620 }, + { GEN7_GBIF_QSB_SIDE1, 0x00071620 }, + { GEN7_GBIF_QSB_SIDE2, 0x00071620 }, + { GEN7_GBIF_QSB_SIDE3, 0x00071620 }, + { GEN7_RBBM_GBIF_CLIENT_QOS_CNTL, 0x00000003 }, +}; + static const u32 gen7_6_0_gbif_client_qos_values[KGSL_PRIORITY_MAX_RB_LEVELS] = { 0x03230323, }; @@ -2120,6 +2130,58 @@ static const struct kgsl_regmap_list gen7_2_0_hwcg_regs[] = { { GEN7_RBBM_CLOCK_HYST_GMU_GX, 0x00000555 }, }; +static const struct kgsl_regmap_list gen7_3_0_hwcg_regs[] = { + { GEN7_RBBM_CLOCK_CNTL_SP0, 0x02222222 }, + { GEN7_RBBM_CLOCK_CNTL2_SP0, 0x02022222 }, + { GEN7_RBBM_CLOCK_HYST_SP0, 0x0000f3cf }, + { GEN7_RBBM_CLOCK_DELAY_SP0, 0x00000080 }, + { GEN7_RBBM_CLOCK_CNTL_TP0, 0x22222220 }, + { GEN7_RBBM_CLOCK_CNTL2_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL3_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL4_TP0, 0x00222222 }, + { GEN7_RBBM_CLOCK_HYST_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST2_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST3_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST4_TP0, 0x00077777 }, + { GEN7_RBBM_CLOCK_DELAY_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY2_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY3_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY4_TP0, 0x00011111 }, + { GEN7_RBBM_CLOCK_CNTL_UCHE, 0x22222222 }, + { GEN7_RBBM_CLOCK_HYST_UCHE, 0x00000004 }, + { GEN7_RBBM_CLOCK_DELAY_UCHE, 0x00000002 }, + { GEN7_RBBM_CLOCK_CNTL_RB0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL2_RB0, 0x01002222 }, + { GEN7_RBBM_CLOCK_CNTL_CCU0, 0x00002220 }, + { GEN7_RBBM_CLOCK_HYST_RB_CCU0, 0x44000f00 }, + { GEN7_RBBM_CLOCK_CNTL_RAC, 0x25222022 }, + { GEN7_RBBM_CLOCK_CNTL2_RAC, 0x00555555 }, + { GEN7_RBBM_CLOCK_DELAY_RAC, 0x00000011 }, + { GEN7_RBBM_CLOCK_HYST_RAC, 0x00440044 }, + { GEN7_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222 }, + { GEN7_RBBM_CLOCK_MODE2_GRAS, 0x00000222 }, + { GEN7_RBBM_CLOCK_MODE_GPC, 0x02222223 }, + { GEN7_RBBM_CLOCK_MODE_VFD, 0x00002222 }, + { GEN7_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000 }, + { GEN7_RBBM_CLOCK_HYST_GPC, 0x04104004 }, + { GEN7_RBBM_CLOCK_HYST_VFD, 0x00000000 }, + { GEN7_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000 }, + { GEN7_RBBM_CLOCK_DELAY_GPC, 0x00000200 }, + { GEN7_RBBM_CLOCK_DELAY_VFD, 0x00002222 }, + { GEN7_RBBM_CLOCK_MODE_HLSQ, 0x00002222 }, + { GEN7_RBBM_CLOCK_DELAY_HLSQ, 0x00000000 }, + { GEN7_RBBM_CLOCK_HYST_HLSQ, 0x00000000 }, + { GEN7_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002 }, + { GEN7_RBBM_CLOCK_MODE_CP, 0x00000223 }, + { GEN7_RBBM_CLOCK_CNTL, 0x8aa8aa82 }, + { GEN7_RBBM_ISDB_CNT, 0x00000182 }, + { GEN7_RBBM_RAC_THRESHOLD_CNT, 0x00000000 }, + { GEN7_RBBM_SP_HYST_CNT, 0x00000000 }, + { GEN7_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222 }, + { GEN7_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111 }, + { GEN7_RBBM_CLOCK_HYST_GMU_GX, 0x00000555 }, +}; + static const struct kgsl_regmap_list gen7_2_0_ao_hwcg_regs[] = { { GEN7_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 0x00020202 }, { GEN7_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 0x00010111 }, @@ -2200,6 +2262,39 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_2_1 = { .fast_bus_hint = true, }; +static const struct adreno_gen7_core adreno_gpu_core_gen7_3_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN7_3_0, + UINT_MAX, UINT_MAX, UINT_MAX, 0), + .compatible = "qcom,adreno-gpu-gen7-3-0", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | + ADRENO_PREEMPTION | ADRENO_BCL | ADRENO_ACD, + .gpudev = &adreno_gen7_gmu_gpudev.base, + .perfcounters = &adreno_gen7_no_cb_perfcounters, + .uche_gmem_alignment = 0, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .gmu_fw_version = GMU_VERSION(4, 0, 0), + .sqefw_name = "a710_sqe.fw", + .gmufw_name = "gmu_gen70000.bin", + .zap_name = "a710_zap.mdt", + .hwcg = gen7_3_0_hwcg_regs, + .hwcg_count = ARRAY_SIZE(gen7_3_0_hwcg_regs), + .ao_hwcg = gen7_0_0_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), + .gbif = gen7_3_0_gbif_regs, + .gbif_count = ARRAY_SIZE(gen7_3_0_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen7_0_0_protected_regs, + .highest_bank_bit = 15, + .gen7_snapshot_block_list = &gen7_3_0_snapshot_block_list, + .preempt_level = 1, + .ctxt_record_size = (862 * SZ_1K), +}; + static const struct adreno_gen7_core adreno_gpu_core_gen7_4_0 = { .base = { DEFINE_ADRENO_REV(ADRENO_REV_GEN7_4_0, @@ -3063,6 +3158,7 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_a662.base, &adreno_gpu_core_gen7_2_0.base, &adreno_gpu_core_gen7_2_1.base, + &adreno_gpu_core_gen7_3_0.base, &adreno_gpu_core_gen7_4_0.base, &adreno_gpu_core_gen7_6_0.base, &adreno_gpu_core_gen7_9_0.base, diff --git a/adreno.h b/adreno.h index 339f2be92e..3fc604eb7c 100644 --- a/adreno.h +++ b/adreno.h @@ -240,6 +240,7 @@ enum adreno_gpurev { ADRENO_REV_GEN7_0_1 = ADRENO_GPUREV_VALUE(7, 0, 1), ADRENO_REV_GEN7_2_0 = ADRENO_GPUREV_VALUE(7, 2, 0), ADRENO_REV_GEN7_2_1 = ADRENO_GPUREV_VALUE(7, 2, 1), + ADRENO_REV_GEN7_3_0 = ADRENO_GPUREV_VALUE(7, 3, 0), ADRENO_REV_GEN7_4_0 = ADRENO_GPUREV_VALUE(7, 4, 0), ADRENO_REV_GEN7_6_0 = ADRENO_GPUREV_VALUE(7, 6, 0), ADRENO_REV_GEN7_9_0 = ADRENO_GPUREV_VALUE(7, 9, 0), @@ -1273,6 +1274,7 @@ ADRENO_TARGET(gen7_0_0, ADRENO_REV_GEN7_0_0) ADRENO_TARGET(gen7_0_1, ADRENO_REV_GEN7_0_1) ADRENO_TARGET(gen7_2_0, ADRENO_REV_GEN7_2_0) ADRENO_TARGET(gen7_2_1, ADRENO_REV_GEN7_2_1) +ADRENO_TARGET(gen7_3_0, ADRENO_REV_GEN7_3_0) ADRENO_TARGET(gen7_4_0, ADRENO_REV_GEN7_4_0) ADRENO_TARGET(gen7_6_0, ADRENO_REV_GEN7_6_0) ADRENO_TARGET(gen7_9_0, ADRENO_REV_GEN7_9_0) @@ -1292,7 +1294,7 @@ static inline int adreno_is_gen7_9_x(struct adreno_device *adreno_dev) static inline int adreno_is_gen7_0_x_family(struct adreno_device *adreno_dev) { return adreno_is_gen7_0_0(adreno_dev) || adreno_is_gen7_0_1(adreno_dev) || - adreno_is_gen7_4_0(adreno_dev); + adreno_is_gen7_4_0(adreno_dev) || adreno_is_gen7_3_0(adreno_dev); } static inline int adreno_is_gen7_2_x_family(struct adreno_device *adreno_dev) @@ -1308,10 +1310,10 @@ static inline int adreno_is_gen8_0_x_family(struct adreno_device *adreno_dev) adreno_is_gen8_4_0(adreno_dev); } -/* Gen7 target which does not support concurrent binning */ +/* Gen7 targets which does not support concurrent binning */ static inline int adreno_is_gen7_no_cb_family(struct adreno_device *adreno_dev) { - return adreno_is_gen7_14_0(adreno_dev); + return adreno_is_gen7_14_0(adreno_dev) || adreno_is_gen7_3_0(adreno_dev); } /* diff --git a/adreno_gen7.c b/adreno_gen7.c index f65aab31d3..be1d9529c9 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -944,11 +944,12 @@ int gen7_start(struct adreno_device *adreno_dev) kgsl_regwrite(device, GEN7_CP_APRIV_CNTL, GEN7_BR_APRIV_DEFAULT); - /* gen7_14_0 does not have BV and LPAC hence skip regwrite */ - if (!adreno_is_gen7_14_0(adreno_dev)) { + /* Skip this regwrite for Gen7 targets that do not have BV and LPAC */ + if (!adreno_is_gen7_no_cb_family(adreno_dev)) kgsl_regwrite(device, GEN7_CP_BV_APRIV_CNTL, GEN7_APRIV_DEFAULT); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) kgsl_regwrite(device, GEN7_CP_LPAC_APRIV_CNTL, GEN7_APRIV_DEFAULT); - } /* Marking AQE Instruction cache fetches as privileged */ if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) @@ -1210,8 +1211,8 @@ int gen7_rb_start(struct adreno_device *adreno_dev) kgsl_regwrite(device, GEN7_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr)); kgsl_regwrite(device, GEN7_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr)); - /* gen7_14_0 does not have BV hence skip regwrite */ - if (!adreno_is_gen7_14_0(adreno_dev)) { + /* Skip this regwrite for Gen7 targets that do not have BV */ + if (!adreno_is_gen7_no_cb_family(adreno_dev)) { addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_rptr); kgsl_regwrite(device, GEN7_CP_BV_RB_RPTR_ADDR_LO, lower_32_bits(addr)); kgsl_regwrite(device, GEN7_CP_BV_RB_RPTR_ADDR_HI, upper_32_bits(addr)); @@ -1357,46 +1358,50 @@ static void gen7_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit) if (status1 & BIT(CP_INT_ILLEGALINSTRUCTION)) dev_crit_ratelimited(dev, "CP Illegal instruction error\n"); - if (status1 & BIT(CP_INT_OPCODEERRORLPAC)) - dev_crit_ratelimited(dev, "CP Opcode error LPAC\n"); + if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) { + if (status1 & BIT(CP_INT_OPCODEERRORLPAC)) + dev_crit_ratelimited(dev, "CP Opcode error LPAC\n"); - if (status1 & BIT(CP_INT_UCODEERRORLPAC)) - dev_crit_ratelimited(dev, "CP ucode error LPAC\n"); + if (status1 & BIT(CP_INT_UCODEERRORLPAC)) + dev_crit_ratelimited(dev, "CP ucode error LPAC\n"); - if (status1 & BIT(CP_INT_CPHWFAULTLPAC)) - dev_crit_ratelimited(dev, "CP hw fault LPAC\n"); + if (status1 & BIT(CP_INT_CPHWFAULTLPAC)) + dev_crit_ratelimited(dev, "CP hw fault LPAC\n"); - if (status1 & BIT(CP_INT_REGISTERPROTECTIONLPAC)) - dev_crit_ratelimited(dev, "CP register protection LPAC\n"); + if (status1 & BIT(CP_INT_REGISTERPROTECTIONLPAC)) + dev_crit_ratelimited(dev, "CP register protection LPAC\n"); - if (status1 & BIT(CP_INT_ILLEGALINSTRUCTIONLPAC)) - dev_crit_ratelimited(dev, "CP illegal instruction LPAC\n"); - - if (status1 & BIT(CP_INT_OPCODEERRORBV)) { - kgsl_regwrite(device, GEN7_CP_BV_SQE_STAT_ADDR, 1); - kgsl_regread(device, GEN7_CP_BV_SQE_STAT_DATA, &opcode); - dev_crit_ratelimited(dev, "CP opcode error BV | opcode=0x%8.8x\n", opcode); + if (status1 & BIT(CP_INT_ILLEGALINSTRUCTIONLPAC)) + dev_crit_ratelimited(dev, "CP illegal instruction LPAC\n"); } - if (status1 & BIT(CP_INT_UCODEERRORBV)) - dev_crit_ratelimited(dev, "CP ucode error BV\n"); + if (!adreno_is_gen7_no_cb_family(adreno_dev)) { + if (status1 & BIT(CP_INT_OPCODEERRORBV)) { + kgsl_regwrite(device, GEN7_CP_BV_SQE_STAT_ADDR, 1); + kgsl_regread(device, GEN7_CP_BV_SQE_STAT_DATA, &opcode); + dev_crit_ratelimited(dev, "CP opcode error BV | opcode=0x%8.8x\n", opcode); + } - if (status1 & BIT(CP_INT_CPHWFAULTBV)) { - kgsl_regread(device, GEN7_CP_BV_HW_FAULT, &status2); - dev_crit_ratelimited(dev, - "CP BV | Ringbuffer HW fault | status=%x\n", status2); + if (status1 & BIT(CP_INT_UCODEERRORBV)) + dev_crit_ratelimited(dev, "CP ucode error BV\n"); + + if (status1 & BIT(CP_INT_CPHWFAULTBV)) { + kgsl_regread(device, GEN7_CP_BV_HW_FAULT, &status2); + dev_crit_ratelimited(dev, + "CP BV | Ringbuffer HW fault | status=%x\n", status2); + } + + if (status1 & BIT(CP_INT_REGISTERPROTECTIONBV)) { + kgsl_regread(device, GEN7_CP_BV_PROTECT_STATUS, &status2); + dev_crit_ratelimited(dev, + "CP BV | Protected mode error | %s | addr=%x | status=%x\n", + status2 & BIT(20) ? "READ" : "WRITE", + status2 & 0x3ffff, status2); + } + + if (status1 & BIT(CP_INT_ILLEGALINSTRUCTIONBV)) + dev_crit_ratelimited(dev, "CP illegal instruction BV\n"); } - - if (status1 & BIT(CP_INT_REGISTERPROTECTIONBV)) { - kgsl_regread(device, GEN7_CP_BV_PROTECT_STATUS, &status2); - dev_crit_ratelimited(dev, - "CP BV | Protected mode error | %s | addr=%x | status=%x\n", - status2 & BIT(20) ? "READ" : "WRITE", - status2 & 0x3ffff, status2); - } - - if (status1 & BIT(CP_INT_ILLEGALINSTRUCTIONBV)) - dev_crit_ratelimited(dev, "CP illegal instruction BV\n"); } static void gen7_err_callback(struct adreno_device *adreno_dev, int bit) diff --git a/adreno_gen7_3_0_snapshot.h b/adreno_gen7_3_0_snapshot.h new file mode 100644 index 0000000000..7d27facccf --- /dev/null +++ b/adreno_gen7_3_0_snapshot.h @@ -0,0 +1,342 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#ifndef __ADRENO_GEN7_3_0_SNAPSHOT_H +#define __ADRENO_GEN7_3_0_SNAPSHOT_H + +#include "adreno_gen7_snapshot.h" +#include "adreno_gen7_0_0_snapshot.h" + +static const u32 gen7_3_0_debugbus_blocks[] = { + DEBUGBUS_CP_0_0, + DEBUGBUS_CP_0_1, + DEBUGBUS_RBBM, + DEBUGBUS_HLSQ, + DEBUGBUS_UCHE_0, + DEBUGBUS_TESS_BR, + DEBUGBUS_PC_BR, + DEBUGBUS_VFDP_BR, + DEBUGBUS_VPC_BR, + DEBUGBUS_TSE_BR, + DEBUGBUS_RAS_BR, + DEBUGBUS_VSC, + DEBUGBUS_COM_0, + DEBUGBUS_LRZ_BR, + DEBUGBUS_UFC_0, + DEBUGBUS_UFC_1, + DEBUGBUS_GMU_GX, + DEBUGBUS_DBGC, + DEBUGBUS_GPC_BR, + DEBUGBUS_LARC, + DEBUGBUS_HLSQ_SPTP, + DEBUGBUS_RB_0, + DEBUGBUS_UCHE_WRAPPER, + DEBUGBUS_CCU_0, + DEBUGBUS_VFD_BR_0, + DEBUGBUS_VFD_BR_1, + DEBUGBUS_USP_0, + DEBUGBUS_TP_0, + DEBUGBUS_TP_1, + DEBUGBUS_USPTP_0, + DEBUGBUS_USPTP_1, +}; + +static struct gen7_shader_block gen7_3_0_shader_blocks[] = { + {HLSQ_CPS_MISC_RAM_1, 0x200, 1, 1, PIPE_BR, HLSQ_STATE}, + {SP_LB_0_DATA, 0x800, 1, 2, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 1, 2, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 1, 2, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 1, 2, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 1, 2, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 1, 2, PIPE_BR, USPTP}, + {TP0_TMO_DATA, 0x200, 1, 2, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 1, 2, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 1, 2, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 1, 2, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 1, 2, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 1, 2, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 1, 2, PIPE_BR, USPTP}, + {SP_TMO_TAG, 0x80, 1, 2, PIPE_BR, USPTP}, + {SP_SMO_TAG, 0x80, 1, 2, PIPE_BR, USPTP}, + {SP_STATE_DATA, 0x40, 1, 2, PIPE_BR, USPTP}, + {SP_HWAVE_RAM, 0x100, 1, 2, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 1, 2, PIPE_BR, USPTP}, + {HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x10, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x10, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x300, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x300, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_CHUNK_CVS_RAM, 0x1c0, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_CHUNK_CPS_RAM, 0x300, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_CHUNK_CVS_RAM_TAG, 0x40, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_CHUNK_CPS_RAM_TAG, 0x40, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_ICB_CVS_CB_BASE_TAG, 0x10, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_ICB_CPS_CB_BASE_TAG, 0x10, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_CVS_MISC_RAM, 0x280, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_CPS_MISC_RAM, 0x800, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_INST_RAM, 0x800, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_GFX_CVS_CONST_RAM, 0x800, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_GFX_CPS_CONST_RAM, 0x800, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_CVS_MISC_RAM_TAG, 0x10, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_CPS_MISC_RAM_TAG, 0x10, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_INST_RAM_TAG, 0x80, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_GFX_CVS_CONST_RAM_TAG, 0x64, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_GFX_CPS_CONST_RAM_TAG, 0x64, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_INST_RAM_1, 0x800, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_STPROC_META, 0x10, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_BV_BE_META, 0x10, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_DATAPATH_META, 0x20, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_FRONTEND_META, 0x40, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_INDIRECT_META, 0x10, 1, 1, PIPE_BR, HLSQ_STATE}, + {HLSQ_BACKEND_META, 0x40, 1, 1, PIPE_BR, HLSQ_STATE}, +}; + +static const u32 gen7_3_0_gpu_registers[] = { + 0x00000, 0x00000, 0x00002, 0x00002, 0x00011, 0x00012, 0x00016, 0x0001b, + 0x0001f, 0x00032, 0x00038, 0x0003c, 0x00042, 0x00042, 0x00044, 0x00044, + 0x00047, 0x00047, 0x00049, 0x0004a, 0x0004c, 0x0004c, 0x00050, 0x00050, + 0x00056, 0x00056, 0x00073, 0x00075, 0x000ad, 0x000ae, 0x000b0, 0x000b0, + 0x000b4, 0x000b4, 0x000b8, 0x000b8, 0x000bc, 0x000bc, 0x000c0, 0x000c0, + 0x000c4, 0x000c4, 0x000c8, 0x000c8, 0x000cc, 0x000cc, 0x000d0, 0x000d0, + 0x000d4, 0x000d4, 0x000d8, 0x000d8, 0x000dc, 0x000dc, 0x000e0, 0x000e0, + 0x000e4, 0x000e4, 0x000e8, 0x000e8, 0x000ec, 0x000ec, 0x000f0, 0x000f0, + 0x000f4, 0x000f4, 0x000f8, 0x000f8, 0x00100, 0x00100, 0x00104, 0x0010b, + 0x0010f, 0x0011d, 0x0012f, 0x0012f, 0x00200, 0x0020d, 0x00211, 0x00211, + 0x00215, 0x00243, 0x00260, 0x00268, 0x00272, 0x00274, 0x00286, 0x00286, + 0x0028a, 0x0028a, 0x0028c, 0x0028c, 0x00300, 0x00401, 0x00500, 0x00500, + 0x00507, 0x0050b, 0x0050f, 0x0050f, 0x00511, 0x00511, 0x00533, 0x00534, + 0x00536, 0x00536, 0x00540, 0x00555, 0x00564, 0x00567, 0x00800, 0x00808, + 0x00810, 0x00813, 0x00820, 0x00821, 0x00823, 0x00827, 0x00830, 0x00834, + 0x00840, 0x00841, 0x00843, 0x00847, 0x0084f, 0x00886, 0x008a0, 0x008ab, + 0x008c0, 0x008c0, 0x008c4, 0x008c5, 0x008d0, 0x008dd, 0x008f0, 0x008f3, + 0x00900, 0x00903, 0x00908, 0x00911, 0x00928, 0x0093e, 0x00942, 0x0094d, + 0x00980, 0x00984, 0x0098d, 0x0098f, 0x009b0, 0x009b4, 0x009c2, 0x009c9, + 0x009ce, 0x009d7, 0x00a00, 0x00a00, 0x00a02, 0x00a03, 0x00a10, 0x00a4f, + 0x00a67, 0x00a6c, 0x00a9c, 0x00a9f, 0x00c00, 0x00c00, 0x00c02, 0x00c04, + 0x00c06, 0x00c06, 0x00c10, 0x00cd9, 0x00ce0, 0x00d0c, 0x00df0, 0x00df4, + 0x00e01, 0x00e02, 0x00e07, 0x00e0e, 0x00e10, 0x00e12, 0x00e17, 0x00e17, + 0x00e19, 0x00e19, 0x00e1b, 0x00e2b, 0x00e30, 0x00e32, 0x00e38, 0x00e3c, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_3_0_gpu_registers), 8)); + +static const u32 gen7_3_0_gmu_registers[] = { + 0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403, + 0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03, + 0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403, + 0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03, + 0x1f400, 0x1f40b, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507, + 0x1f509, 0x1f50b, 0x1f800, 0x1f804, 0x1f807, 0x1f808, 0x1f80b, 0x1f80c, + 0x1f80f, 0x1f80f, 0x1f811, 0x1f811, 0x1f813, 0x1f817, 0x1f819, 0x1f81c, + 0x1f824, 0x1f82a, 0x1f82d, 0x1f830, 0x1f840, 0x1f853, 0x1f860, 0x1f860, + 0x1f870, 0x1f879, 0x1f87f, 0x1f87f, 0x1f888, 0x1f889, 0x1f8a0, 0x1f8a2, + 0x1f8a4, 0x1f8af, 0x1f8c0, 0x1f8c1, 0x1f8c3, 0x1f8c4, 0x1f8d0, 0x1f8d0, + 0x1f8ec, 0x1f8ec, 0x1f8f0, 0x1f8f1, 0x1f910, 0x1f911, 0x1f920, 0x1f921, + 0x1f924, 0x1f925, 0x1f928, 0x1f929, 0x1f92c, 0x1f92d, 0x1f940, 0x1f940, + 0x1f942, 0x1f944, 0x1f948, 0x1f94a, 0x1f980, 0x1f981, 0x1f984, 0x1f986, + 0x1f992, 0x1f993, 0x1f996, 0x1f99e, 0x1f9c0, 0x1f9c0, 0x1f9c5, 0x1f9d4, + 0x1f9f1, 0x1f9f1, 0x1f9f8, 0x1f9fa, 0x1fa00, 0x1fa03, 0x20000, 0x20005, + 0x20008, 0x20009, 0x20010, 0x20012, 0x20018, 0x20018, 0x20020, 0x20023, + 0x20030, 0x20031, 0x23801, 0x23801, 0x23803, 0x23803, 0x23805, 0x23805, + 0x23807, 0x23807, 0x23809, 0x23809, 0x2380b, 0x2380b, 0x2380d, 0x2380d, + 0x2380f, 0x2380f, 0x23811, 0x23811, 0x23813, 0x23813, 0x23815, 0x23815, + 0x23817, 0x23817, 0x23819, 0x23819, 0x2381b, 0x2381b, 0x2381d, 0x2381d, + 0x2381f, 0x23820, 0x23822, 0x23822, 0x23824, 0x23824, 0x23826, 0x23826, + 0x23828, 0x23828, 0x2382a, 0x2382a, 0x2382c, 0x2382c, 0x2382e, 0x2382e, + 0x23830, 0x23830, 0x23832, 0x23832, 0x23834, 0x23834, 0x23836, 0x23836, + 0x23838, 0x23838, 0x2383a, 0x2383a, 0x2383c, 0x2383c, 0x2383e, 0x2383e, + 0x23840, 0x23847, 0x23b00, 0x23b01, 0x23b03, 0x23b03, 0x23b05, 0x23b0e, + 0x23b10, 0x23b13, 0x23b15, 0x23b16, 0x23b20, 0x23b20, 0x23b28, 0x23b28, + 0x23b30, 0x23b30, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_3_0_gmu_registers), 8)); + +static const u32 gen7_3_0_gmu_gx_registers[] = { + 0x1a802, 0x1a802, 0x1a883, 0x1a884, 0x1a900, 0x1a92b, 0x1a940, 0x1a940, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_3_0_gmu_gx_registers), 8)); + +static const u32 gen7_3_0_noncontext_pipe_bv_registers[] = { + 0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b, + 0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640, + 0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a, + 0x09640, 0x09640, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f, + 0x0a630, 0x0a631, 0x0a638, 0x0a638, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_3_0_noncontext_pipe_bv_registers), 8)); + +static const u32 gen7_3_0_noncontext_rb_rac_pipe_br_registers[] = { + 0x08e10, 0x08e1c, 0x08e20, 0x08e25, 0x08e51, 0x08e54, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_3_0_noncontext_rb_rac_pipe_br_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: HLSQ_STATE */ +static const u32 gen7_3_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers[] = { + 0x0aa40, 0x0aabf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_3_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: uSPTP */ +static const u32 gen7_3_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers[] = { + 0x0aa40, 0x0aabf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_3_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers), 8)); + +static const struct gen7_sel_reg gen7_3_0_rb_rac_sel = { + .host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST, + .cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD, + .val = 0x0, +}; + +static const struct gen7_sel_reg gen7_3_0_rb_rbp_sel = { + .host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST, + .cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD, + .val = 0x9, +}; + +static struct gen7_cluster_registers gen7_3_0_clusters[] = { + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_0_0_noncontext_pipe_br_registers, }, + { CLUSTER_NONE, PIPE_BV, STATE_NON_CONTEXT, + gen7_3_0_noncontext_pipe_bv_registers, }, + { CLUSTER_NONE, PIPE_LPAC, STATE_NON_CONTEXT, + gen7_0_0_noncontext_pipe_lpac_registers, }, + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_3_0_noncontext_rb_rac_pipe_br_registers, &gen7_3_0_rb_rac_sel, }, + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_0_0_noncontext_rb_rbp_pipe_br_registers, &gen7_3_0_rb_rbp_sel, }, + { CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_gras_cluster_gras_pipe_br_registers, }, + { CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_gras_cluster_gras_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_pc_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_pc_cluster_fe_pipe_br_registers, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_3_0_rb_rac_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_3_0_rb_rac_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_3_0_rb_rbp_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_3_0_rb_rbp_sel, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vfd_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_0_0_vfd_cluster_fe_pipe_bv_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vfd_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_0_0_vfd_cluster_fe_pipe_bv_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_fe_pipe_br_registers, }, + { CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, }, + { CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, }, + { CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, }, + { CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, }, +}; + +static struct gen7_sptp_cluster_registers gen7_3_0_sptp_clusters[] = { + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, HLSQ_STATE, + gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers, 0xae00 }, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, SP_TOP, + gen7_0_0_sp_noncontext_pipe_br_sp_top_registers, 0xae00 }, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, USPTP, + gen7_0_0_sp_noncontext_pipe_br_usptp_registers, 0xae00 }, + { CLUSTER_NONE, TP0_NCTX_REG, PIPE_BR, 0, USPTP, + gen7_0_0_tpl1_noncontext_pipe_br_registers, 0xb600 }, + { CLUSTER_NONE, TP0_NCTX_REG, PIPE_LPAC, 0, USPTP, + gen7_0_0_tpl1_noncontext_pipe_lpac_registers, 0xb780 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa980 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa980 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa980 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa980 }, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa980 }, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa980 }, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa980 }, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa980 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE, + gen7_3_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers, 0xa980 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + gen7_3_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers, 0xa980 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE, + gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE, + gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800 }, + { CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb180 }, + { CLUSTER_SP_PS, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb180 }, + { CLUSTER_SP_PS, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP, + gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb180 }, + { CLUSTER_SP_PS, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP, + gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb180 }, + { CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers, 0xb000 }, + { CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers, 0xb000 }, + { CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers, 0xb000 }, + { CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, + gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers, 0xb000 }, +}; + +static const u32 gen7_3_0_cpr_registers[] = { + 0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c, + 0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850, + 0x26880, 0x26889, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee, + 0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f, + 0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274ac, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_3_0_cpr_registers), 8)); + +static struct gen7_reg_list gen7_3_0_reg_list[] = { + { gen7_3_0_gpu_registers, NULL }, + { gen7_0_0_cx_misc_registers, NULL }, + { gen7_0_0_dpm_registers, NULL }, + { NULL, NULL }, +}; + +static const u32 *gen7_3_0_external_core_regs[] = { + gen7_0_0_gpucc_registers, + gen7_3_0_cpr_registers, +}; + +static struct gen7_cp_indexed_reg gen7_3_0_cp_indexed_reg_list[] = { + { GEN7_CP_SQE_STAT_ADDR, GEN7_CP_SQE_STAT_DATA, 0x33}, + { GEN7_CP_DRAW_STATE_ADDR, GEN7_CP_DRAW_STATE_DATA, 0x100}, + { GEN7_CP_SQE_UCODE_DBG_ADDR, GEN7_CP_SQE_UCODE_DBG_DATA, 0x8000}, +}; + +#endif /*_ADRENO_GEN7_3_0_SNAPSHOT_H */ diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 4d5b21a284..1d4b63bd85 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -8,6 +8,7 @@ #include "adreno_snapshot.h" #include "adreno_gen7_0_0_snapshot.h" #include "adreno_gen7_2_0_snapshot.h" +#include "adreno_gen7_3_0_snapshot.h" #include "adreno_gen7_6_0_snapshot.h" #include "adreno_gen7_9_0_snapshot.h" #include "adreno_gen7_14_0_snapshot.h" @@ -74,6 +75,32 @@ const struct gen7_snapshot_block_list gen7_2_0_snapshot_block_list = { .index_registers_len = ARRAY_SIZE(gen7_cp_indexed_reg_list), }; +const struct gen7_snapshot_block_list gen7_3_0_snapshot_block_list = { + .pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers, + .debugbus_blocks = gen7_3_0_debugbus_blocks, + .debugbus_blocks_len = ARRAY_SIZE(gen7_3_0_debugbus_blocks), + .gbif_debugbus_blocks = gen7_gbif_debugbus_blocks, + .gbif_debugbus_blocks_len = ARRAY_SIZE(gen7_gbif_debugbus_blocks), + .cx_debugbus_blocks = gen7_cx_dbgc_debugbus_blocks, + .cx_debugbus_blocks_len = ARRAY_SIZE(gen7_cx_dbgc_debugbus_blocks), + .external_core_regs = gen7_3_0_external_core_regs, + .num_external_core_regs = ARRAY_SIZE(gen7_3_0_external_core_regs), + .gmu_regs = gen7_3_0_gmu_registers, + .gmu_gx_regs = gen7_3_0_gmu_gx_registers, + .rscc_regs = gen7_0_0_rscc_registers, + .reg_list = gen7_3_0_reg_list, + .cx_misc_regs = gen7_0_0_cx_misc_registers, + .shader_blocks = gen7_3_0_shader_blocks, + .num_shader_blocks = ARRAY_SIZE(gen7_3_0_shader_blocks), + .clusters = gen7_3_0_clusters, + .num_clusters = ARRAY_SIZE(gen7_3_0_clusters), + .sptp_clusters = gen7_3_0_sptp_clusters, + .num_sptp_clusters = ARRAY_SIZE(gen7_3_0_sptp_clusters), + .post_crashdumper_regs = gen7_0_0_post_crashdumper_registers, + .index_registers = gen7_3_0_cp_indexed_reg_list, + .index_registers_len = ARRAY_SIZE(gen7_3_0_cp_indexed_reg_list), +}; + const struct gen7_snapshot_block_list gen7_6_0_snapshot_block_list = { .pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers, .debugbus_blocks = gen7_2_0_debugbus_blocks, @@ -773,7 +800,7 @@ static void gen7_snapshot_mempool(struct kgsl_device *device, GEN7_CP_MEM_POOL_DBG_ADDR, GEN7_CP_MEM_POOL_DBG_DATA, 0, 0x2200); - if (!adreno_is_gen7_14_0(ADRENO_DEVICE(device))) { + if (!adreno_is_gen7_no_cb_family(ADRENO_DEVICE(device))) { kgsl_regrmw(device, GEN7_CP_BV_CHICKEN_DBG, 0x4, 0x4); kgsl_snapshot_indexed_registers(device, snapshot, GEN7_CP_BV_MEM_POOL_DBG_ADDR, GEN7_CP_BV_MEM_POOL_DBG_DATA, @@ -1726,10 +1753,13 @@ void gen7_snapshot(struct adreno_device *adreno_dev, if (!adreno_is_gen7_9_x(adreno_dev)) gen7_snapshot_br_roq(device, snapshot); - if (!adreno_is_gen7_9_x(adreno_dev) && !adreno_is_gen7_14_0(adreno_dev)) { + if (!adreno_is_gen7_9_x(adreno_dev) && + !adreno_is_gen7_no_cb_family(adreno_dev)) gen7_snapshot_bv_roq(device, snapshot); + + if (!adreno_is_gen7_9_x(adreno_dev) && + ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) gen7_snapshot_lpac_roq(device, snapshot); - } /* Mempool debug data */ gen7_snapshot_mempool(device, snapshot); From 8c9925287daf41ec586e958af1053e25cdc2a3c4 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Wed, 12 Jun 2024 11:29:41 +0530 Subject: [PATCH 0841/1016] kgsl: a6x: Update HFI feature value to HFI_FEATURE_KPROF Use HFI_FEATURE_KPROF (16) to align with the feature values defined in GMU. Change-Id: Ia93e397f27cbe09121a6ac31cad167327e074cc2 Signed-off-by: Pankaj Gupta --- adreno_a6xx_hwsched_hfi.c | 3 +-- adreno_hfi.h | 3 --- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index cdc386a8ed..136a0241bb 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -1238,8 +1238,7 @@ int a6xx_hwsched_hfi_start(struct adreno_device *adreno_dev) if (ret) goto err; - ret = a6xx_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_A6XX_KPROF, - 1, 0); + ret = a6xx_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_KPROF, 1, 0); if (ret) goto err; diff --git a/adreno_hfi.h b/adreno_hfi.h index f48529b73d..b4fadd2337 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -92,9 +92,6 @@ enum hfi_table_type { HFI_TABLE_MAX, }; -/* A6xx uses a different value for KPROF */ -#define HFI_FEATURE_A6XX_KPROF 14 - /* For Gen7 & Gen8 ACD */ #define F_PWR_ACD_CALIBRATE 78 From 748cfb08e95149ce31afdbebdd5ede0ee394fad3 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 2 Jul 2024 14:54:24 +0530 Subject: [PATCH 0842/1016] kgsl: pwrctrl: Fix sysfs show for max_clock_mhz and max_gpuclk The max_clock_mhz and max_gpuclk sysfs nodes utilize the pm qos framework to set the thermal limit. However, the current show API for these sysfs nodes does not take PM QOS max power level into account. Fix it by consolidating the thermal and PM QOS power levels. Change-Id: I2476f793da384e1082a2a6e632c67d1d5500309d Signed-off-by: Kamal Agrawal --- kgsl_pwrctrl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 409974375e..dcd46fe677 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -528,9 +528,11 @@ static ssize_t max_gpuclk_show(struct device *dev, { struct kgsl_device *device = dev_get_drvdata(dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; + u32 max_pwrlevel = max_t(u32, READ_ONCE(pwr->thermal_pwrlevel), + READ_ONCE(pwr->pmqos_max_pwrlevel)); return scnprintf(buf, PAGE_SIZE, "%d\n", - device->pwrctrl.pwrlevels[pwr->thermal_pwrlevel].gpu_freq); + device->pwrctrl.pwrlevels[max_pwrlevel].gpu_freq); } static ssize_t gpuclk_store(struct device *dev, @@ -939,9 +941,11 @@ static ssize_t min_clock_mhz_store(struct device *dev, static ssize_t _max_clock_mhz_show(struct kgsl_device *device, char *buf) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; + u32 max_pwrlevel = max_t(u32, READ_ONCE(pwr->thermal_pwrlevel), + READ_ONCE(pwr->pmqos_max_pwrlevel)); return scnprintf(buf, PAGE_SIZE, "%d\n", - pwr->pwrlevels[pwr->thermal_pwrlevel].gpu_freq / 1000000); + pwr->pwrlevels[max_pwrlevel].gpu_freq / 1000000); } static ssize_t max_clock_mhz_show(struct device *dev, From 92fa83cd69a9478484630e3dbf43dc3d4c750397 Mon Sep 17 00:00:00 2001 From: Linux Image Build Automation Date: Wed, 3 Jul 2024 13:41:50 -0700 Subject: [PATCH 0843/1016] Revert "kgsl: gen8: Rearrange some GMU register dumping in snapshot" This reverts commit cc6408ebb4b40905cf101a8416a58528b48ef7c6. Change-Id: I899fc46bedbc1242d5defe655da58a0105663b4f Signed-off-by: Linux Image Build Automation --- adreno_gen8_0_0_snapshot.h | 21 +++--- adreno_gen8_gmu.c | 11 ---- adreno_gen8_gmu.h | 1 - adreno_gen8_gmu_snapshot.c | 2 +- adreno_gen8_snapshot.c | 127 +++++++++++++++++-------------------- 5 files changed, 70 insertions(+), 92 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index 5b09838f4d..cb924cc162 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -1883,6 +1883,11 @@ static struct gen8_reg_list gen8_0_0_ahb_registers[] = { { UNSLICE, gen8_0_0_ahb_secure_gpu_registers }, }; +static struct gen8_reg_list gen8_gmu_gx_registers[] = { + { UNSLICE, gen8_0_0_gmugx_registers }, + { SLICE, gen8_0_0_gmugx_slice_registers }, +}; + /* * Block : ['GDPM_LKG'] * REGION : UNSLICE @@ -2057,16 +2062,10 @@ static const u32 *gen8_0_0_external_core_regs[] = { gen8_0_0_gpu_cc_ahb2phy_swman_registers, gen8_0_0_gpu_cc_gpu_cc_reg_registers, gen8_0_0_gpu_cc_pll0_cm_pll_taycan_common_registers, + gen8_0_0_acd_acd_mnd_registers, + gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers, + gen8_0_0_gx_clkctl_ahb2phy_swman_registers, + gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers, + gen8_0_0_gx_clkctl_gx_clkctl_reg_registers, }; - -static struct gen8_reg_list gen8_gmu_gx_registers[] = { - { UNSLICE, gen8_0_0_gmugx_registers }, - { UNSLICE, gen8_0_0_acd_acd_mnd_registers }, - { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers }, - { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_swman_registers }, - { UNSLICE, gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers }, - { UNSLICE, gen8_0_0_gx_clkctl_gx_clkctl_reg_registers }, - { SLICE, gen8_0_0_gmugx_slice_registers }, -}; - #endif /*_ADRENO_GEN8_0_0_SNAPSHOT_H */ diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 6a900072ad..de6fcc89b3 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -684,14 +684,6 @@ bool gen8_gmu_gx_is_on(struct adreno_device *adreno_dev) return is_on(val); } -bool gen8_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device) -{ - u32 val; - - gmu_core_regread(device, GEN8_GMUCX_RPMH_POWER_STATE, &val); - return (val == GPU_HW_ACTIVE) ? true : false; -} - static const char *idle_level_name(int level) { if (level == GPU_HW_ACTIVE) @@ -863,9 +855,6 @@ void gen8_gmu_register_config(struct adreno_device *adreno_dev) /* Clear any previously set cm3 fault */ atomic_set(&gmu->cm3_fault, 0); - /* Init the power state register before GMU turns on GX */ - gmu_core_regwrite(device, GEN8_GMUCX_RPMH_POWER_STATE, 0xDEADD00D); - /* Vote veto for FAL10 */ gmu_core_regwrite(device, GEN8_GMUCX_CX_FALNEXT_INTF, 0x1); gmu_core_regwrite(device, GEN8_GMUCX_CX_FAL_INTF, 0x1); diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index 39bfcc993a..73f9a2d6c2 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -488,5 +488,4 @@ u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab); */ int gen8_gmu_clock_set_rate(struct adreno_device *adreno_dev, u32 req_freq); -bool gen8_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device); #endif diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index 179bb6a4fb..4619c1f26d 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -207,7 +207,7 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, adreno_snapshot_registers_v2, (void *) gen8_snapshot_block_list->gmu_cx_unsliced_regs); - if (!gen8_gmu_rpmh_pwr_state_is_active(device)) + if (!gen8_gmu_gx_is_on(adreno_dev)) goto dtcm; /* Set fence to ALLOW mode so registers can be read */ diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index e994f909da..cfb5ea63ea 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -219,13 +219,11 @@ size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, if (info->regs->sel) kgsl_regwrite(device, info->regs->sel->host_reg, info->regs->sel->val); - if (info->regs->slice_region) { - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL - (info->slice_id, 0, 0, 0)); + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL + (info->slice_id, 0, 0, 0)); - /* Make sure the previous writes are posted before reading */ - mb(); - } + /* Make sure the previous writes are posted before reading */ + mb(); for (ptr = info->regs->regs; ptr[0] != UINT_MAX; ptr += 2) { count = REG_COUNT(ptr); @@ -1326,60 +1324,6 @@ static size_t gen8_snapshot_cx_side_dbgc_debugbus_block(struct kgsl_device *devi return size; } -static void gen8_snapshot_cx_debugbus(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot) -{ - u32 i; - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, - FIELD_PREP(GENMASK(31, 28), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, - FIELD_PREP(GENMASK(27, 24), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, - FIELD_PREP(GENMASK(3, 0), 0x0) | - FIELD_PREP(GENMASK(7, 4), 0x1) | - FIELD_PREP(GENMASK(11, 8), 0x2) | - FIELD_PREP(GENMASK(15, 12), 0x3) | - FIELD_PREP(GENMASK(19, 16), 0x4) | - FIELD_PREP(GENMASK(23, 20), 0x5) | - FIELD_PREP(GENMASK(27, 24), 0x6) | - FIELD_PREP(GENMASK(31, 28), 0x7)); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, - FIELD_PREP(GENMASK(3, 0), 0x8) | - FIELD_PREP(GENMASK(7, 4), 0x9) | - FIELD_PREP(GENMASK(11, 8), 0xa) | - FIELD_PREP(GENMASK(15, 12), 0xb) | - FIELD_PREP(GENMASK(19, 16), 0xc) | - FIELD_PREP(GENMASK(23, 20), 0xd) | - FIELD_PREP(GENMASK(27, 24), 0xe) | - FIELD_PREP(GENMASK(31, 28), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); - - /* Dump the CX debugbus data if the block exists */ - if (kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) { - for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_DEBUGBUS, - snapshot, gen8_snapshot_cx_dbgc_debugbus_block, - (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, - snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, - (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); - } - } -} - /* gen8_snapshot_debugbus() - Capture debug bus data */ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) @@ -1422,6 +1366,41 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0); kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, + FIELD_PREP(GENMASK(27, 24), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, + FIELD_PREP(GENMASK(3, 0), 0x0) | + FIELD_PREP(GENMASK(7, 4), 0x1) | + FIELD_PREP(GENMASK(11, 8), 0x2) | + FIELD_PREP(GENMASK(15, 12), 0x3) | + FIELD_PREP(GENMASK(19, 16), 0x4) | + FIELD_PREP(GENMASK(23, 20), 0x5) | + FIELD_PREP(GENMASK(27, 24), 0x6) | + FIELD_PREP(GENMASK(31, 28), 0x7)); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, + FIELD_PREP(GENMASK(3, 0), 0x8) | + FIELD_PREP(GENMASK(7, 4), 0x9) | + FIELD_PREP(GENMASK(11, 8), 0xa) | + FIELD_PREP(GENMASK(15, 12), 0xb) | + FIELD_PREP(GENMASK(19, 16), 0xc) | + FIELD_PREP(GENMASK(23, 20), 0xd) | + FIELD_PREP(GENMASK(27, 24), 0xe) | + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); + for (i = 0; i < gen8_snapshot_block_list->debugbus_blocks_len; i++) { kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUGBUS, @@ -1443,6 +1422,20 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, snapshot, gen8_snapshot_dbgc_side_debugbus_block, (void *) &gen8_snapshot_block_list->gbif_debugbus_blocks[i]); } + + /* Dump the CX debugbus data if the block exists */ + if (kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) { + for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen8_snapshot_cx_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + } + } } /* gen8_snapshot_sqe() - Dump SQE data in snapshot */ @@ -1626,7 +1619,7 @@ static void gen8_cx_misc_regs_snapshot(struct kgsl_device *device, u64 *ptr, offset = 0; const u32 *regs_ptr = (const u32 *)gen8_snapshot_block_list->cx_misc_regs; - if (CD_SCRIPT_CHECK(device) || !gen8_gmu_rpmh_pwr_state_is_active(device)) + if (CD_SCRIPT_CHECK(device) || !adreno_gx_is_on(ADRENO_DEVICE(device))) goto legacy_snapshot; /* Build the crash script */ @@ -1704,9 +1697,11 @@ void gen8_snapshot(struct adreno_device *adreno_dev, if (!gmu_core_isenabled(device)) gen8_snapshot_external_core_regs(device, snapshot); - gen8_cx_misc_regs_snapshot(device, snapshot); + gen8_snapshot_trace_buffer(device, snapshot); - gen8_snapshot_cx_debugbus(adreno_dev, snapshot); + gen8_snapshot_debugbus(adreno_dev, snapshot); + + gen8_cx_misc_regs_snapshot(device, snapshot); /* SQE Firmware */ kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, @@ -1716,13 +1711,9 @@ void gen8_snapshot(struct adreno_device *adreno_dev, kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, gen8_snapshot_aqe, NULL); - if (!gen8_gmu_rpmh_pwr_state_is_active(device)) + if (!adreno_gx_is_on(adreno_dev)) return; - gen8_snapshot_trace_buffer(device, snapshot); - - gen8_snapshot_debugbus(adreno_dev, snapshot); - is_current_rt = rt_task(current); if (is_current_rt) From d61fe296a3b1ef6f26a81f5571d22b4ca8eec308 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 10 Jul 2024 22:51:14 +0530 Subject: [PATCH 0844/1016] kgsl: gen8: Use aperture set API during perfcounter enablement CP_APERTURE_CNTL_HOST register read and write may be reordered during perfcounter enablement. This may lead to incorrect aperture being restored. Also, aperture is programmed before accessing the registers behind aperture. Thus, there is no need to restore the older aperture. Change-Id: I33ca63ea8c411061097254cea6f53ca7f5b8778b Signed-off-by: Kamal Agrawal --- adreno_gen8_perfcounter.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/adreno_gen8_perfcounter.c b/adreno_gen8_perfcounter.c index 60e4b5918a..4aecca1b19 100644 --- a/adreno_gen8_perfcounter.c +++ b/adreno_gen8_perfcounter.c @@ -55,21 +55,15 @@ static int gen8_counter_br_enable(struct adreno_device *adreno_dev, const struct adreno_perfcount_group *group, u32 counter, u32 countable) { - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_perfcount_register *reg = &group->regs[counter]; - int ret = 0; - u32 val = 0; + int ret; - kgsl_regread(device, GEN8_CP_APERTURE_CNTL_HOST, &val); - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(15, 12), PIPE_BR)); + gen8_host_aperture_set(adreno_dev, PIPE_BR, 0, 0); ret = gen8_perfcounter_update(adreno_dev, reg, true, FIELD_PREP(GENMASK(15, 12), PIPE_BR), group->flags); - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val); - - /* Ensure all writes are posted before reading the piped register */ - mb(); + gen8_host_aperture_set(adreno_dev, 0, 0, 0); if (!ret) reg->value = 0; @@ -81,21 +75,15 @@ static int gen8_counter_bv_enable(struct adreno_device *adreno_dev, const struct adreno_perfcount_group *group, u32 counter, u32 countable) { - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_perfcount_register *reg = &group->regs[counter]; - int ret = 0; - u32 val = 0; + int ret; - kgsl_regread(device, GEN8_CP_APERTURE_CNTL_HOST, &val); - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(15, 12), PIPE_BV)); + gen8_host_aperture_set(adreno_dev, PIPE_BV, 0, 0); ret = gen8_perfcounter_update(adreno_dev, reg, true, FIELD_PREP(GENMASK(15, 12), PIPE_BV), group->flags); - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val); - - /* Ensure all writes are posted before reading the piped register */ - mb(); + gen8_host_aperture_set(adreno_dev, 0, 0, 0); if (!ret) reg->value = 0; From 53e2e15c0513e9cc02f75fa1d454bee9cc468435 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 10 Jul 2024 22:03:50 +0530 Subject: [PATCH 0845/1016] kgsl: gen8: Add memory barrier before polling perfctr flush status RBBM_SLICE_PERFCTR_FLUSH_HOST_CMD register write and RBBM_PERFCTR_FLUSH_HOST_STATUS register read may be re-ordered. Ensure that the writes are posted before polling for perf counter flush status register. Change-Id: Ia6b6b580bf762bf75b21cbe6254cf11de6cd1cb5 Signed-off-by: Kamal Agrawal --- adreno_gen8_perfcounter.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/adreno_gen8_perfcounter.c b/adreno_gen8_perfcounter.c index 60e4b5918a..4109a8d79b 100644 --- a/adreno_gen8_perfcounter.c +++ b/adreno_gen8_perfcounter.c @@ -24,6 +24,9 @@ static void gen8_rbbm_perfctr_flush(struct kgsl_device *device) kgsl_regwrite(device, GEN8_RBBM_PERFCTR_FLUSH_HOST_CMD, BIT(0)); kgsl_regwrite(device, GEN8_RBBM_SLICE_PERFCTR_FLUSH_HOST_CMD, BIT(0)); + /* Ensure all writes are posted before polling status register */ + wmb(); + ret = kgsl_regmap_read_poll_timeout(&device->regmap, GEN8_RBBM_PERFCTR_FLUSH_HOST_STATUS, val, (val & PERFCOUNTER_FLUSH_DONE_MASK) == PERFCOUNTER_FLUSH_DONE_MASK, 100, 100 * 1000); From bd362cd220c4613ca6a6770455e438035c3fa09b Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Tue, 11 Jun 2024 16:22:30 -0700 Subject: [PATCH 0846/1016] kgsl: Add ability to vote for dependent domains for GX levels Modify perf table to add support for new MxG corner for each power levels. Additional voltage corners are being introduced on some targets for MxG, which will not exist on MxA. Repurpose the cx_votes variable to support other dependent domain votes as well. Change-Id: I26e0f00984c98f5e9efd9b3719a30929a6d0e627 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- adreno_a6xx_rpmh.c | 8 +-- adreno_gen7_rpmh.c | 6 +- adreno_gen8_rpmh.c | 150 +++++++++++++++++++++++++++++---------------- adreno_hfi.h | 4 +- 4 files changed, 107 insertions(+), 61 deletions(-) diff --git a/adreno_a6xx_rpmh.c b/adreno_a6xx_rpmh.c index 9d4a10f4d2..0d451ae3ca 100644 --- a/adreno_a6xx_rpmh.c +++ b/adreno_a6xx_rpmh.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -354,10 +354,10 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); table->gx_votes[0].freq = 0; - table->gx_votes[0].cx_vote = 0; + table->gx_votes[0].dep_vote = 0; /* Disable cx vote in gmu dcvs table if it is not supported in DT */ if (pwr->pwrlevels[0].cx_level == 0xffffffff) - table->gx_votes[0].cx_vote = 0xffffffff; + table->gx_votes[0].dep_vote = 0xffffffff; /* GMU power levels are in ascending order */ for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) { @@ -367,7 +367,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000; ret = to_cx_hlvl(cx_rail, cx_vlvl, - &table->gx_votes[index].cx_vote); + &table->gx_votes[index].dep_vote); if (ret) { dev_err(&gmu->pdev->dev, "Unsupported cx corner: %u\n", cx_vlvl); diff --git a/adreno_gen7_rpmh.c b/adreno_gen7_rpmh.c index be6cac5b7c..88a173bf58 100644 --- a/adreno_gen7_rpmh.c +++ b/adreno_gen7_rpmh.c @@ -350,10 +350,10 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); table->gx_votes[0].freq = 0; - table->gx_votes[0].cx_vote = 0; + table->gx_votes[0].dep_vote = 0; /* Disable cx vote in gmu dcvs table if it is not supported in DT */ if (pwr->pwrlevels[0].cx_level == 0xffffffff) - table->gx_votes[0].cx_vote = 0xffffffff; + table->gx_votes[0].dep_vote = 0xffffffff; /* GMU power levels are in ascending order */ for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) { @@ -363,7 +363,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000; ret = to_cx_hlvl(cx_rail, cx_vlvl, - &table->gx_votes[index].cx_vote); + &table->gx_votes[index].dep_vote); if (ret) { dev_err(device->dev, "Unsupported cx corner: %u\n", cx_vlvl); diff --git a/adreno_gen8_rpmh.c b/adreno_gen8_rpmh.c index 361a436b04..525626740c 100644 --- a/adreno_gen8_rpmh.c +++ b/adreno_gen8_rpmh.c @@ -293,27 +293,64 @@ static int setup_cx_arc_votes(struct gen8_gmu_device *gmu, return ret; } -static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl) +#define GEN8_DEP_VOTE_SET(cx, mx) \ + (FIELD_PREP(GENMASK(31, 14), 0x3FFFF) | \ + FIELD_PREP(GENMASK(13, 8), mx) | \ + FIELD_PREP(GENMASK(7, 0), cx)) + +static int setup_dependency_domain_tbl(u32 *votes, + struct rpmh_arc_vals *dep_rail, struct rpmh_arc_vals *cx_rail, + u16 *vlvl, u32 *cx_vlvl, u32 num_entries) { - u32 i; + u32 cx_vote, mx_vote; + int i, j; - /* - * This means that the Gx level doesn't have a dependency on Cx level. - * Return the same value to disable cx voting at GMU. - */ - if (vlvl == 0xffffffff) { - *hlvl = vlvl; - return 0; - } + for (i = 1; i < num_entries; i++) { + bool found_match = false; - for (i = 0; i < cx_rail->num; i++) { - if (cx_rail->val[i] >= vlvl) { - *hlvl = i; - return 0; + if (cx_vlvl[i] == 0xffffffff) { + /* This means that the Gx level doesn't have a dependency on Cx level */ + cx_vote = 0xff; + found_match = true; + } else { + for (j = 0; j < cx_rail->num; j++) { + if (cx_rail->val[j] >= cx_vlvl[i]) { + cx_vote = j; + found_match = true; + break; + } + } } + + /* If we did not find a matching VLVL level then abort */ + if (!found_match) { + pr_err("kgsl: Unsupported cx corner: %u\n", cx_vlvl[i]); + return -EINVAL; + } + + /* + * Set Mx dependency domain votes for Gx level. Look for indexes + * whose vlvl value is greater than or equal to the vlvl value + * of the corresponding index of dependency rail + */ + for (j = 0; j < dep_rail->num; j++) { + if (dep_rail->val[j] >= vlvl[i]) { + mx_vote = j; + found_match = true; + break; + } + } + + /* If we did not find a matching VLVL level then abort */ + if (!found_match) { + pr_err("kgsl: Unsupported mx corner: %u\n", vlvl[i]); + return -EINVAL; + } + + votes[i] = GEN8_DEP_VOTE_SET(cx_vote, mx_vote); } - return -EINVAL; + return 0; } /* @@ -321,71 +358,77 @@ static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl) * @hfi: Pointer to hfi device * @pri_rail: Pointer to primary power rail vlvl table * @sec_rail: Pointer to second/dependent power rail vlvl table + * @gmxc_rail: Pointer to MxG power rail vlvl table * * This function initializes the gx votes for all gpu frequencies * for gpu dcvs */ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, - struct rpmh_arc_vals *cx_rail) + struct rpmh_arc_vals *gmxc_rail, struct rpmh_arc_vals *cx_rail) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen8_dcvs_table *table = &gmu->dcvs_table; - u32 index; u16 vlvl_tbl[MAX_GX_LEVELS]; + u32 cx_vlvl_tbl[MAX_GX_LEVELS]; u32 gx_votes[MAX_GX_LEVELS]; + u32 dep_votes[MAX_GX_LEVELS]; int ret, i; - if (pwr->num_pwrlevels + 1 > ARRAY_SIZE(vlvl_tbl)) { + table->gpu_level_num = pwr->num_pwrlevels + 1; + + if (table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) { dev_err(device->dev, "Defined more GPU DCVS levels than RPMh can support\n"); return -ERANGE; } - /* Add the zero powerlevel for the perf table */ - table->gpu_level_num = pwr->num_pwrlevels + 1; - + /* Initialize vlvl tables */ memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); + memset(cx_vlvl_tbl, 0, sizeof(cx_vlvl_tbl)); + /* Fill the vlvl tables. GMU power levels are in ascending order */ + for (i = 1; i < table->gpu_level_num; i++) { + vlvl_tbl[i] = pwr->pwrlevels[pwr->num_pwrlevels - i].voltage_level; + cx_vlvl_tbl[i] = pwr->pwrlevels[pwr->num_pwrlevels - i].cx_level; + } + + /* If the target does not have a dedicated Mx rail, use secondary rail */ + if (gmxc_rail == NULL) + ret = setup_volt_dependency_tbl(gx_votes, pri_rail, sec_rail, + vlvl_tbl, table->gpu_level_num); + else + ret = setup_volt_dependency_tbl(gx_votes, pri_rail, gmxc_rail, + vlvl_tbl, table->gpu_level_num); + if (ret) + return ret; + + ret = setup_dependency_domain_tbl(dep_votes, sec_rail, cx_rail, + vlvl_tbl, cx_vlvl_tbl, table->gpu_level_num); + if (ret) + return ret; + + /* Populate DCVS table with all the votes */ + for (i = 1; i < table->gpu_level_num; i++) { + table->gx_votes[i].freq = pwr->pwrlevels[pwr->num_pwrlevels - i].gpu_freq / 1000; + table->gx_votes[i].vote = gx_votes[i]; + table->gx_votes[i].dep_vote = dep_votes[i]; + } + + /* Add the zero powerlevel for the perf table */ table->gx_votes[0].freq = 0; - table->gx_votes[0].cx_vote = 0; - /* Disable cx vote in gmu dcvs table if it is not supported in DT */ - if (pwr->pwrlevels[0].cx_level == 0xffffffff) - table->gx_votes[0].cx_vote = 0xffffffff; - - /* GMU power levels are in ascending order */ - for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) { - u32 cx_vlvl = pwr->pwrlevels[i].cx_level; - - vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level; - table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000; - - ret = to_cx_hlvl(cx_rail, cx_vlvl, - &table->gx_votes[index].cx_vote); - if (ret) { - dev_err(device->dev, "Unsupported cx corner: %u\n", - cx_vlvl); - return ret; - } - } - - ret = setup_volt_dependency_tbl(gx_votes, pri_rail, - sec_rail, vlvl_tbl, table->gpu_level_num); - if (!ret) { - for (i = 0; i < table->gpu_level_num; i++) - table->gx_votes[i].vote = gx_votes[i]; - } + table->gx_votes[0].vote = 0; + table->gx_votes[0].dep_vote = 0xFFFFFFFF; return ret; - } static int build_dcvs_table(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - struct rpmh_arc_vals gx_arc, cx_arc, mx_arc; + struct rpmh_arc_vals gx_arc, cx_arc, mx_arc, gmxc_arc; int ret; ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl"); @@ -406,12 +449,15 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) /* If the target supports dedicated MxC rail, read the same */ if (cmd_db_read_addr("gmxc.lvl")) { - ret = rpmh_arc_cmds(&mx_arc, "gmxc.lvl"); + ret = rpmh_arc_cmds(&gmxc_arc, "gmxc.lvl"); if (ret) return ret; + ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &gmxc_arc, &cx_arc); + } else { + ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, NULL, &cx_arc); } - return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &cx_arc); + return ret; } /* diff --git a/adreno_hfi.h b/adreno_hfi.h index f48529b73d..b7f1c26cb3 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -541,8 +541,8 @@ struct hfi_bwtable_cmd { struct opp_gx_desc { u32 vote; - /* This is 'acdLvl' in gmu fw which is now repurposed for cx vote */ - u32 cx_vote; + /* This is 'acdLvl' in gmu fw which is now repurposed for various dependency votes */ + u32 dep_vote; u32 freq; } __packed; From 334fbadc748d4502004cadb57fdbf63eda98a801 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Mon, 17 Jun 2024 12:36:59 +0530 Subject: [PATCH 0847/1016] kgsl: gen8: Dump CPR registers in snapshot for gen8_0_x GPUs Dump core power reduction (CPR) registers in snapshot for gen8_0_x GPUs. Change-Id: I3b1d370ee15be31dcef0a8fc4a904543e4f1c630 Signed-off-by: Kamal Agrawal --- adreno_gen8_0_0_snapshot.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index cb924cc162..050854acda 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -2056,6 +2056,22 @@ static const u32 gen8_0_0_rscc_rsc_registers[] = { }; static_assert(IS_ALIGNED(sizeof(gen8_0_0_rscc_rsc_registers), 8)); +/* + * Block : ['CPR'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 20 (Regs:479) + */ +static const u32 gen8_0_0_cpr_registers[] = { + 0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c, + 0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850, + 0x26880, 0x2689f, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee, + 0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f, + 0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274c4, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_0_0_cpr_registers), 8)); + static const u32 *gen8_0_0_external_core_regs[] = { gen8_0_0_gdpm_lkg_registers, gen8_0_0_gpu_cc_ahb2phy_broadcast_swman_registers, @@ -2067,5 +2083,6 @@ static const u32 *gen8_0_0_external_core_regs[] = { gen8_0_0_gx_clkctl_ahb2phy_swman_registers, gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers, gen8_0_0_gx_clkctl_gx_clkctl_reg_registers, + gen8_0_0_cpr_registers, }; #endif /*_ADRENO_GEN8_0_0_SNAPSHOT_H */ From 4a56d40193f04adb3104299d034c970e50d53429 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Wed, 29 May 2024 15:38:54 -0700 Subject: [PATCH 0848/1016] kgsl: gen8: Rearrange some GMU register dumping in snapshot 1. Ensure GX GDSC is ON to dump GMU registers on GX headswitch 2. Check for RPMH state to make sure GPU is active when dumping the registers. 3. CX debugbus does not need GX ON, move it to its own function so that it can be called independently. 4. Move the GX debugbus and tracebuffer under the GX check to ensure we do not read/write these registers without checking for GX ON. Change-Id: Ib3410746a583f79fb90c42117981d1a6bd860b5e Signed-off-by: Urvashi Agrawal --- adreno_gen8.c | 4 +- adreno_gen8_0_0_snapshot.h | 21 ++++--- adreno_gen8_gmu.c | 11 ++++ adreno_gen8_gmu.h | 7 +++ adreno_gen8_gmu_snapshot.c | 3 +- adreno_gen8_snapshot.c | 125 ++++++++++++++++++++----------------- 6 files changed, 101 insertions(+), 70 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index c949b539c5..442ec114a3 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -2870,7 +2870,7 @@ static void gen8_lpac_fault_header(struct adreno_device *adreno_dev, pr_context(device, drawobj->context, "lpac cmdline: %s\n", drawctxt->base.proc_priv->cmdline); - if (!gx_on) + if (!gen8_gmu_rpmh_pwr_state_is_active(device) || !gx_on) goto done; kgsl_regread(device, GEN8_RBBM_LPAC_STATUS, &status); @@ -2926,7 +2926,7 @@ static void gen8_fault_header(struct adreno_device *adreno_dev, drawctxt->base.proc_priv->cmdline); } - if (!gx_on) + if (!gen8_gmu_rpmh_pwr_state_is_active(device) || !gx_on) goto done; kgsl_regread(device, GEN8_RBBM_STATUS, &status); diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index 050854acda..f5f04344ec 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -1883,11 +1883,6 @@ static struct gen8_reg_list gen8_0_0_ahb_registers[] = { { UNSLICE, gen8_0_0_ahb_secure_gpu_registers }, }; -static struct gen8_reg_list gen8_gmu_gx_registers[] = { - { UNSLICE, gen8_0_0_gmugx_registers }, - { SLICE, gen8_0_0_gmugx_slice_registers }, -}; - /* * Block : ['GDPM_LKG'] * REGION : UNSLICE @@ -2078,11 +2073,17 @@ static const u32 *gen8_0_0_external_core_regs[] = { gen8_0_0_gpu_cc_ahb2phy_swman_registers, gen8_0_0_gpu_cc_gpu_cc_reg_registers, gen8_0_0_gpu_cc_pll0_cm_pll_taycan_common_registers, - gen8_0_0_acd_acd_mnd_registers, - gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers, - gen8_0_0_gx_clkctl_ahb2phy_swman_registers, - gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers, - gen8_0_0_gx_clkctl_gx_clkctl_reg_registers, gen8_0_0_cpr_registers, }; + +static struct gen8_reg_list gen8_gmu_gx_registers[] = { + { UNSLICE, gen8_0_0_gmugx_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_swman_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_gx_clkctl_reg_registers }, + { UNSLICE, gen8_0_0_acd_acd_mnd_registers }, + { SLICE, gen8_0_0_gmugx_slice_registers }, +}; + #endif /*_ADRENO_GEN8_0_0_SNAPSHOT_H */ diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index de6fcc89b3..291fb52822 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -684,6 +684,14 @@ bool gen8_gmu_gx_is_on(struct adreno_device *adreno_dev) return is_on(val); } +bool gen8_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device) +{ + u32 val; + + gmu_core_regread(device, GEN8_GMUCX_RPMH_POWER_STATE, &val); + return (val == GPU_HW_ACTIVE) ? true : false; +} + static const char *idle_level_name(int level) { if (level == GPU_HW_ACTIVE) @@ -855,6 +863,9 @@ void gen8_gmu_register_config(struct adreno_device *adreno_dev) /* Clear any previously set cm3 fault */ atomic_set(&gmu->cm3_fault, 0); + /* Init the power state register before GMU turns on GX */ + gmu_core_regwrite(device, GEN8_GMUCX_RPMH_POWER_STATE, 0xF); + /* Vote veto for FAL10 */ gmu_core_regwrite(device, GEN8_GMUCX_CX_FALNEXT_INTF, 0x1); gmu_core_regwrite(device, GEN8_GMUCX_CX_FAL_INTF, 0x1); diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index 73f9a2d6c2..d4831f3c4d 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -488,4 +488,11 @@ u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab); */ int gen8_gmu_clock_set_rate(struct adreno_device *adreno_dev, u32 req_freq); +/** + * gen8_gmu_rpmh_pwr_state_is_active - Check the state of GPU HW + * @device: Pointer to the kgsl device + * + * Returns true on active or false otherwise + */ +bool gen8_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device); #endif diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index 4619c1f26d..7c3bd4e536 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -207,7 +207,8 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, adreno_snapshot_registers_v2, (void *) gen8_snapshot_block_list->gmu_cx_unsliced_regs); - if (!gen8_gmu_gx_is_on(adreno_dev)) + if (!gen8_gmu_rpmh_pwr_state_is_active(device) || + !gen8_gmu_gx_is_on(adreno_dev)) goto dtcm; /* Set fence to ALLOW mode so registers can be read */ diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 2072d7db01..f16ba10a3c 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -219,8 +219,9 @@ size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, if (info->regs->sel) kgsl_regwrite(device, info->regs->sel->host_reg, info->regs->sel->val); - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL - (info->slice_id, 0, 0, 0)); + if (info->regs->slice_region) + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL + (info->slice_id, 0, 0, 0)); /* Make sure the previous writes are posted before reading */ mb(); @@ -1324,6 +1325,61 @@ static size_t gen8_snapshot_cx_side_dbgc_debugbus_block(struct kgsl_device *devi return size; } +static void gen8_snapshot_cx_debugbus(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + u32 i; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, + FIELD_PREP(GENMASK(27, 24), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, + FIELD_PREP(GENMASK(3, 0), 0x0) | + FIELD_PREP(GENMASK(7, 4), 0x1) | + FIELD_PREP(GENMASK(11, 8), 0x2) | + FIELD_PREP(GENMASK(15, 12), 0x3) | + FIELD_PREP(GENMASK(19, 16), 0x4) | + FIELD_PREP(GENMASK(23, 20), 0x5) | + FIELD_PREP(GENMASK(27, 24), 0x6) | + FIELD_PREP(GENMASK(31, 28), 0x7)); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, + FIELD_PREP(GENMASK(3, 0), 0x8) | + FIELD_PREP(GENMASK(7, 4), 0x9) | + FIELD_PREP(GENMASK(11, 8), 0xa) | + FIELD_PREP(GENMASK(15, 12), 0xb) | + FIELD_PREP(GENMASK(19, 16), 0xc) | + FIELD_PREP(GENMASK(23, 20), 0xd) | + FIELD_PREP(GENMASK(27, 24), 0xe) | + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); + + /* Dump the CX debugbus data if the block exists */ + if (!kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) + return; + + for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen8_snapshot_cx_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + } +} + /* gen8_snapshot_debugbus() - Capture debug bus data */ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) @@ -1366,41 +1422,6 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0); kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, - FIELD_PREP(GENMASK(31, 28), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, - FIELD_PREP(GENMASK(27, 24), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, - FIELD_PREP(GENMASK(3, 0), 0x0) | - FIELD_PREP(GENMASK(7, 4), 0x1) | - FIELD_PREP(GENMASK(11, 8), 0x2) | - FIELD_PREP(GENMASK(15, 12), 0x3) | - FIELD_PREP(GENMASK(19, 16), 0x4) | - FIELD_PREP(GENMASK(23, 20), 0x5) | - FIELD_PREP(GENMASK(27, 24), 0x6) | - FIELD_PREP(GENMASK(31, 28), 0x7)); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, - FIELD_PREP(GENMASK(3, 0), 0x8) | - FIELD_PREP(GENMASK(7, 4), 0x9) | - FIELD_PREP(GENMASK(11, 8), 0xa) | - FIELD_PREP(GENMASK(15, 12), 0xb) | - FIELD_PREP(GENMASK(19, 16), 0xc) | - FIELD_PREP(GENMASK(23, 20), 0xd) | - FIELD_PREP(GENMASK(27, 24), 0xe) | - FIELD_PREP(GENMASK(31, 28), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); - for (i = 0; i < gen8_snapshot_block_list->debugbus_blocks_len; i++) { kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUGBUS, @@ -1422,20 +1443,6 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, snapshot, gen8_snapshot_dbgc_side_debugbus_block, (void *) &gen8_snapshot_block_list->gbif_debugbus_blocks[i]); } - - /* Dump the CX debugbus data if the block exists */ - if (kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) { - for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_DEBUGBUS, - snapshot, gen8_snapshot_cx_dbgc_debugbus_block, - (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, - snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, - (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); - } - } } static void gen8_reglist_snapshot(struct kgsl_device *device, @@ -1543,7 +1550,8 @@ static void gen8_cx_misc_regs_snapshot(struct kgsl_device *device, u64 *ptr, offset = 0; const u32 *regs_ptr = (const u32 *)gen8_snapshot_block_list->cx_misc_regs; - if (CD_SCRIPT_CHECK(device) || !adreno_gx_is_on(ADRENO_DEVICE(device))) + if (CD_SCRIPT_CHECK(device) || !gen8_gmu_rpmh_pwr_state_is_active(device) + || !gen8_gmu_gx_is_on(ADRENO_DEVICE(device))) goto legacy_snapshot; /* Build the crash script */ @@ -1620,15 +1628,18 @@ void gen8_snapshot(struct adreno_device *adreno_dev, if (!gmu_core_isenabled(device)) gen8_snapshot_external_core_regs(device, snapshot); + gen8_cx_misc_regs_snapshot(device, snapshot); + + gen8_snapshot_cx_debugbus(adreno_dev, snapshot); + + if (!gen8_gmu_rpmh_pwr_state_is_active(device) || + !gen8_gmu_gx_is_on(adreno_dev)) + return; + gen8_snapshot_trace_buffer(device, snapshot); gen8_snapshot_debugbus(adreno_dev, snapshot); - gen8_cx_misc_regs_snapshot(device, snapshot); - - if (!adreno_gx_is_on(adreno_dev)) - return; - is_current_rt = rt_task(current); if (is_current_rt) From 2f227522b46ea4f4d621c86f7fd20b841f95b873 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 11 Jul 2024 10:55:08 -0600 Subject: [PATCH 0849/1016] kgsl: Mark gx power domain as syscore device This is to disallow genPD framework from toggling the gx gdsc during system suspend/resume path. Change-Id: I2e203dee76ca04103bc9ff31fc330a467d612ecf Signed-off-by: Harshdeep Dhatt --- kgsl_pwrctrl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 409974375e..ad4b81a35d 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1518,6 +1518,7 @@ static int kgsl_pwrctrl_probe_gx_gdsc(struct kgsl_device *device, struct platfor return IS_ERR(gx_pd) ? PTR_ERR(gx_pd) : -EINVAL; } pwr->gx_pd = gx_pd; + dev_pm_syscore_device(pwr->gx_pd, true); } else { struct regulator *gx_regulator = devm_regulator_get(&pdev->dev, "vdd"); From 923ca968cfd36d055382d314024c454b1edc2b1c Mon Sep 17 00:00:00 2001 From: Archana Sriram Date: Thu, 11 Jul 2024 15:26:46 +0530 Subject: [PATCH 0850/1016] kgsl: Update AB vote for targets with no gmu_ab support Currently, KGSL doesn't release AB vote during slumber for targets that supports AB voting through legacy ICC path. This is because AB vote 0 is treated as invalid DCVS vote. Fix this by updating the condition to determine AB vote. Change-Id: Icc71089f5a45205a91fe82aa82933ce046c07527 Signed-off-by: Archana Sriram --- adreno_gen7_gmu.c | 2 +- adreno_gen7_hwsched.c | 2 +- adreno_gen8_gmu.c | 2 +- adreno_gen8_hwsched.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index d65311dad5..16ba8167a8 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -2282,7 +2282,7 @@ static int gen7_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, if (buslevel == pwr->cur_buslevel) buslevel = INVALID_DCVS_IDX; - if ((ab == pwr->cur_ab) || (ab == 0)) + if ((ab == pwr->cur_ab) || ((ab == 0) && (adreno_dev->gmu_ab))) ab = INVALID_AB_VALUE; if ((ab == INVALID_AB_VALUE) && (buslevel == INVALID_DCVS_IDX)) diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 444091dcf3..ad4829cbee 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -1178,7 +1178,7 @@ static int gen7_hwsched_bus_set(struct adreno_device *adreno_dev, int buslevel, if (buslevel == pwr->cur_buslevel) buslevel = INVALID_DCVS_IDX; - if ((ab == pwr->cur_ab) || (ab == 0)) + if ((ab == pwr->cur_ab) || ((ab == 0) && (adreno_dev->gmu_ab))) ab = INVALID_AB_VALUE; if ((ab == INVALID_AB_VALUE) && (buslevel == INVALID_DCVS_IDX)) diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 291fb52822..f7c36c5402 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -2097,7 +2097,7 @@ static int gen8_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, if (buslevel == pwr->cur_buslevel) buslevel = INVALID_DCVS_IDX; - if ((ab == pwr->cur_ab) || (ab == 0)) + if ((ab == pwr->cur_ab) || ((ab == 0) && (adreno_dev->gmu_ab))) ab = INVALID_AB_VALUE; if ((ab == INVALID_AB_VALUE) && (buslevel == INVALID_DCVS_IDX)) diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 72976fa719..e68e0e1d89 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -1241,7 +1241,7 @@ static int gen8_hwsched_bus_set(struct adreno_device *adreno_dev, int buslevel, if (buslevel == pwr->cur_buslevel) buslevel = INVALID_DCVS_IDX; - if ((ab == pwr->cur_ab) || (ab == 0)) + if ((ab == pwr->cur_ab) || ((ab == 0) && (adreno_dev->gmu_ab))) ab = INVALID_AB_VALUE; if ((ab == INVALID_AB_VALUE) && (buslevel == INVALID_DCVS_IDX)) From 510c0ba1be34afe94aca6d4025d69c311975c2d5 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 4 Jul 2024 22:24:18 +0530 Subject: [PATCH 0851/1016] kgsl: hwfence: Fix out of bound access during context queue read The context queue in kgsl is a circular buffer. However, the `queue` variable in `find_timeout_syncobj()` and the `cmd` variable in `log_syncobj()` are decoded without considering the circular nature of the context queue. As a result, this can lead to out-of-bounds access. Fix it by introducing a function that correctly reads the context queue. Change-Id: I751f3b16782b56a5a32efe616d98f1e2fe4dd781 Signed-off-by: Kamal Agrawal --- adreno_gen7_hwsched_hfi.c | 24 ++++++++++++++++-------- adreno_gen8_hwsched_hfi.c | 24 ++++++++++++++++-------- adreno_hwsched.c | 21 +++++++++++++++++++++ adreno_hwsched.h | 12 ++++++++++++ 4 files changed, 65 insertions(+), 16 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 47cdf27550..39c25257a4 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -298,16 +298,22 @@ static void _get_syncobj_string(char *str, u32 max_size, struct hfi_syncobj_lega } } -static void log_syncobj(struct gen7_gmu_device *gmu, struct hfi_submit_syncobj *cmd) +static void log_syncobj(struct gen7_gmu_device *gmu, struct adreno_context *drawctxt, + struct hfi_submit_syncobj *cmd, u32 syncobj_read_idx) { - struct hfi_syncobj_legacy *syncobj = (struct hfi_syncobj_legacy *)&cmd[1]; + struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; + struct hfi_syncobj_legacy syncobj; char str[128]; u32 i = 0; for (i = 0; i < cmd->num_syncobj; i++) { - _get_syncobj_string(str, sizeof(str), syncobj, i); + if (adreno_gmu_context_queue_read(drawctxt, (u32 *) &syncobj, syncobj_read_idx, + sizeof(syncobj) >> 2)) + break; + + _get_syncobj_string(str, sizeof(str), &syncobj, i); dev_err(&gmu->pdev->dev, "%s\n", str); - syncobj++; + syncobj_read_idx = (syncobj_read_idx + (sizeof(syncobj) >> 2)) % hdr->queue_size; } } @@ -318,7 +324,7 @@ static void find_timeout_syncobj(struct adreno_device *adreno_dev, u32 ctxt_id, struct adreno_context *drawctxt; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gmu_context_queue_header *hdr; - struct hfi_submit_syncobj *cmd; + struct hfi_submit_syncobj cmd; u32 *queue, i; int ret; @@ -342,10 +348,12 @@ static void find_timeout_syncobj(struct adreno_device *adreno_dev, u32 ctxt_id, continue; } - cmd = (struct hfi_submit_syncobj *)&queue[i]; + if (adreno_gmu_context_queue_read(drawctxt, (u32 *) &cmd, i, sizeof(cmd) >> 2)) + break; - if (cmd->timestamp == ts) { - log_syncobj(gmu, cmd); + if (cmd.timestamp == ts) { + log_syncobj(gmu, drawctxt, &cmd, + (i + (sizeof(cmd) >> 2)) % hdr->queue_size); break; } i = (i + MSG_HDR_GET_SIZE(queue[i])) % hdr->queue_size; diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 701af8c8d2..930604622d 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -218,16 +218,22 @@ static void _get_syncobj_string(char *str, u32 max_size, struct hfi_syncobj *syn } } -static void log_syncobj(struct gen8_gmu_device *gmu, struct hfi_submit_syncobj *cmd) +static void log_syncobj(struct gen8_gmu_device *gmu, struct adreno_context *drawctxt, + struct hfi_submit_syncobj *cmd, u32 syncobj_read_idx) { - struct hfi_syncobj *syncobj = (struct hfi_syncobj *)&cmd[1]; + struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; + struct hfi_syncobj syncobj; char str[128]; u32 i = 0; for (i = 0; i < cmd->num_syncobj; i++) { - _get_syncobj_string(str, sizeof(str), syncobj, i); + if (adreno_gmu_context_queue_read(drawctxt, (u32 *) &syncobj, syncobj_read_idx, + sizeof(syncobj) >> 2)) + break; + + _get_syncobj_string(str, sizeof(str), &syncobj, i); dev_err(&gmu->pdev->dev, "%s\n", str); - syncobj++; + syncobj_read_idx = (syncobj_read_idx + (sizeof(syncobj) >> 2)) % hdr->queue_size; } } @@ -238,7 +244,7 @@ static void find_timeout_syncobj(struct adreno_device *adreno_dev, u32 ctxt_id, struct adreno_context *drawctxt; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gmu_context_queue_header *hdr; - struct hfi_submit_syncobj *cmd; + struct hfi_submit_syncobj cmd; u32 *queue, i; int ret; @@ -262,10 +268,12 @@ static void find_timeout_syncobj(struct adreno_device *adreno_dev, u32 ctxt_id, continue; } - cmd = (struct hfi_submit_syncobj *)&queue[i]; + if (adreno_gmu_context_queue_read(drawctxt, (u32 *) &cmd, i, sizeof(cmd) >> 2)) + break; - if (cmd->timestamp == ts) { - log_syncobj(gmu, cmd); + if (cmd.timestamp == ts) { + log_syncobj(gmu, drawctxt, &cmd, + (i + (sizeof(cmd) >> 2)) % hdr->queue_size); break; } i = (i + MSG_HDR_GET_SIZE(queue[i])) % hdr->queue_size; diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 1b5528ad08..4bc4a8052a 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -2166,6 +2166,27 @@ void adreno_hwsched_unregister_contexts(struct adreno_device *adreno_dev) hwsched->global_ctxt_gmu_registered = false; } +int adreno_gmu_context_queue_read(struct adreno_context *drawctxt, u32 *output, + u32 read_idx, u32 size_dwords) +{ + struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; + u32 *queue = drawctxt->gmu_context_queue.hostptr + sizeof(*hdr); + u32 i; + + if ((size_dwords > hdr->queue_size) || (read_idx >= hdr->queue_size)) + return -EINVAL; + + /* Clear the output data before populating */ + memset(output, 0, size_dwords << 2); + + for (i = 0; i < size_dwords; i++) { + output[i] = queue[read_idx]; + read_idx = (read_idx + 1) % hdr->queue_size; + } + + return 0; +} + static int hwsched_idle(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); diff --git a/adreno_hwsched.h b/adreno_hwsched.h index d78bc871da..46da5f66a3 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -291,4 +291,16 @@ int adreno_hwsched_poll_msg_queue_write_index(struct kgsl_memdesc *hfi_mem); */ void adreno_hwsched_remove_hw_fence_entry(struct adreno_device *adreno_dev, struct adreno_hw_fence_entry *entry); + +/** + * adreno_gmu_context_queue_read - Read data from context queue + * @drawctxt: Pointer to the adreno draw context + * @output: Pointer to read the data into + * @read_idx: Index to read the data from + * @size: Number of dwords to read from the context queue + * + * Return: 0 on success or negative error on failure + */ +int adreno_gmu_context_queue_read(struct adreno_context *drawctxt, u32 *output, + u32 read_idx, u32 size); #endif From 3ecd4cacd86f0ecb8be0573bf507ba38d38ac85d Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 12 Jul 2024 10:47:13 +0530 Subject: [PATCH 0852/1016] kgsl: hwsched: Make few functions generation agnostic gmu_context_queue_write() and add_profile_events() are independent of GPU generation. Thus, move it to generic file to reduce code duplication. Change-Id: I3122f2fc8c39ff00fe352191d923006d4b00dd09 Signed-off-by: Kamal Agrawal --- adreno.h | 14 ++++ adreno_a6xx_hwsched_hfi.c | 64 +---------------- adreno_dispatch.h | 2 + adreno_gen7_hwsched_hfi.c | 140 ++------------------------------------ adreno_gen8_hwsched_hfi.c | 137 ++----------------------------------- adreno_hwsched.c | 133 ++++++++++++++++++++++++++++++++++++ adreno_hwsched.h | 26 +++++++ adreno_ringbuffer.h | 17 +---- 8 files changed, 185 insertions(+), 348 deletions(-) diff --git a/adreno.h b/adreno.h index 3fc604eb7c..3aaad52dd8 100644 --- a/adreno.h +++ b/adreno.h @@ -822,6 +822,20 @@ struct adreno_drawobj_profile_entry { ((_index) * sizeof(struct adreno_drawobj_profile_entry) \ + offsetof(struct adreno_drawobj_profile_entry, _member)) +/** + * struct adreno_submit_time - utility structure to store the wall clock / GPU + * ticks at command submit time + * @ticks: GPU ticks at submit time (from the 19.2Mhz timer) + * @ktime: local clock time (in nanoseconds) + * @utime: Wall clock time + * @drawobj: the object that we want to profile + */ +struct adreno_submit_time { + u64 ticks; + u64 ktime; + struct timespec64 utime; + struct kgsl_drawobj *drawobj; +}; /** * adreno_regs: List of registers that are used in kgsl driver for all diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index cdc386a8ed..6ce07193ea 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -5,7 +5,6 @@ */ #include -#include #include #include "adreno.h" @@ -1570,67 +1569,6 @@ void a6xx_hwsched_hfi_remove(struct adreno_device *adreno_dev) kthread_stop(hw_hfi->f2h_task); } -static void a6xx_add_profile_events(struct adreno_device *adreno_dev, - struct kgsl_drawobj_cmd *cmdobj, struct adreno_submit_time *time) -{ - unsigned long flags; - u64 time_in_s; - unsigned long time_in_ns; - struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); - struct kgsl_context *context = drawobj->context; - struct submission_info info = {0}; - - if (!time) - return; - - /* - * Here we are attempting to create a mapping between the - * GPU time domain (alwayson counter) and the CPU time domain - * (local_clock) by sampling both values as close together as - * possible. This is useful for many types of debugging and - * profiling. In order to make this mapping as accurate as - * possible, we must turn off interrupts to avoid running - * interrupt handlers between the two samples. - */ - - local_irq_save(flags); - - /* Read always on registers */ - time->ticks = a6xx_read_alwayson(adreno_dev); - - /* Trace the GPU time to create a mapping to ftrace time */ - trace_adreno_cmdbatch_sync(context->id, context->priority, - drawobj->timestamp, time->ticks); - - /* Get the kernel clock for time since boot */ - time->ktime = local_clock(); - - /* Get the timeofday for the wall time (for the user) */ - ktime_get_real_ts64(&time->utime); - - local_irq_restore(flags); - - /* Return kernel clock time to the client if requested */ - time_in_s = time->ktime; - time_in_ns = do_div(time_in_s, 1000000000); - - info.inflight = -1; - info.rb_id = adreno_get_level(context); - info.gmu_dispatch_queue = context->gmu_dispatch_queue; - - cmdobj->submit_ticks = time->ticks; - - msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_SUBMIT, - pid_nr(context->proc_priv->pid), - context->id, drawobj->timestamp, - !!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME)); - trace_adreno_cmdbatch_submitted(drawobj, &info, time->ticks, - (unsigned long) time_in_s, time_in_ns / 1000, 0); - - log_kgsl_cmdbatch_submitted_event(context->id, drawobj->timestamp, - context->priority, drawobj->flags); -} - static u32 get_next_dq(u32 priority) { struct dq_info *info = &a6xx_hfi_dqs[priority]; @@ -1820,7 +1758,7 @@ static int a6xx_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, uint3 if (!cmdobj) goto done; - a6xx_add_profile_events(adreno_dev, cmdobj, time); + adreno_hwsched_add_profile_events(adreno_dev, cmdobj, time); /* * Put the profiling information in the user profiling buffer. diff --git a/adreno_dispatch.h b/adreno_dispatch.h index 397dbe27dd..3b2a17ce6f 100644 --- a/adreno_dispatch.h +++ b/adreno_dispatch.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ____ADRENO_DISPATCHER_H @@ -91,6 +92,7 @@ enum adreno_dispatcher_flags { }; struct adreno_device; +struct adreno_submit_time; struct kgsl_device; void adreno_dispatcher_start(struct kgsl_device *device); diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 39c25257a4..f840b567c4 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -5,7 +5,6 @@ */ #include -#include #include #include "adreno.h" @@ -2749,69 +2748,6 @@ void gen7_hwsched_hfi_remove(struct adreno_device *adreno_dev) kthread_stop(hw_hfi->f2h_task); } -static void gen7_add_profile_events(struct adreno_device *adreno_dev, - struct kgsl_drawobj_cmd *cmdobj, struct adreno_submit_time *time) -{ - unsigned long flags; - u64 time_in_s; - unsigned long time_in_ns; - struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); - struct kgsl_context *context = drawobj->context; - struct submission_info info = {0}; - struct adreno_hwsched *hwsched = &adreno_dev->hwsched; - const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); - - if (!time) - return; - - /* - * Here we are attempting to create a mapping between the - * GPU time domain (alwayson counter) and the CPU time domain - * (local_clock) by sampling both values as close together as - * possible. This is useful for many types of debugging and - * profiling. In order to make this mapping as accurate as - * possible, we must turn off interrupts to avoid running - * interrupt handlers between the two samples. - */ - - local_irq_save(flags); - - /* Read always on registers */ - time->ticks = gpudev->read_alwayson(adreno_dev); - - /* Trace the GPU time to create a mapping to ftrace time */ - trace_adreno_cmdbatch_sync(context->id, context->priority, - drawobj->timestamp, time->ticks); - - /* Get the kernel clock for time since boot */ - time->ktime = local_clock(); - - /* Get the timeofday for the wall time (for the user) */ - ktime_get_real_ts64(&time->utime); - - local_irq_restore(flags); - - /* Return kernel clock time to the client if requested */ - time_in_s = time->ktime; - time_in_ns = do_div(time_in_s, 1000000000); - - info.inflight = hwsched->inflight; - info.rb_id = adreno_get_level(context); - info.gmu_dispatch_queue = context->gmu_dispatch_queue; - - cmdobj->submit_ticks = time->ticks; - - msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_SUBMIT, - pid_nr(context->proc_priv->pid), - context->id, drawobj->timestamp, - !!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME)); - trace_adreno_cmdbatch_submitted(drawobj, &info, time->ticks, - (unsigned long) time_in_s, time_in_ns / 1000, 0); - - log_kgsl_cmdbatch_submitted_event(context->id, drawobj->timestamp, - context->priority, drawobj->flags); -} - static void init_gmu_context_queue(struct adreno_context *drawctxt) { struct kgsl_memdesc *md = &drawctxt->gmu_context_queue; @@ -3019,74 +2955,6 @@ static void populate_ibs(struct adreno_device *adreno_dev, #define DISPQ_IRQ_BIT(_idx) BIT((_idx) + HFI_DSP_IRQ_BASE) #define DISPQ_SYNC_IRQ_BIT(_idx) ((DISPQ_IRQ_BIT(_idx) << (KGSL_PRIORITY_MAX_RB_LEVELS + 1))) -int gen7_gmu_context_queue_write(struct adreno_device *adreno_dev, - struct kgsl_memdesc *gmu_context_queue, u32 *msg, u32 size_bytes, - struct kgsl_drawobj *drawobj, struct adreno_submit_time *time) -{ - struct gmu_context_queue_header *hdr = gmu_context_queue->hostptr; - const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); - u32 *queue = gmu_context_queue->hostptr + sizeof(*hdr); - u32 i, empty_space, write_idx = hdr->write_index, read_idx = hdr->read_index; - u32 size_dwords = size_bytes >> 2; - u32 align_size = ALIGN(size_dwords, SZ_4); - u32 id = MSG_HDR_GET_ID(*msg); - struct kgsl_drawobj_cmd *cmdobj = NULL; - - empty_space = (write_idx >= read_idx) ? - (hdr->queue_size - (write_idx - read_idx)) - : (read_idx - write_idx); - - if (empty_space <= align_size) - return -ENOSPC; - - if (!IS_ALIGNED(size_bytes, sizeof(u32))) - return -EINVAL; - - for (i = 0; i < size_dwords; i++) { - queue[write_idx] = msg[i]; - write_idx = (write_idx + 1) % hdr->queue_size; - } - - /* Cookify any non used data at the end of the write buffer */ - for (; i < align_size; i++) { - queue[write_idx] = 0xfafafafa; - write_idx = (write_idx + 1) % hdr->queue_size; - } - - /* Ensure packet is written out before proceeding */ - wmb(); - - if (!drawobj) - goto done; - - if (drawobj->type & SYNCOBJ_TYPE) { - struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); - - trace_adreno_syncobj_submitted(drawobj->context->id, drawobj->timestamp, - syncobj->num_hw_fence, gpudev->read_alwayson(adreno_dev)); - goto done; - } - - cmdobj = CMDOBJ(drawobj); - - gen7_add_profile_events(adreno_dev, cmdobj, time); - - /* - * Put the profiling information in the user profiling buffer. - * The hfi_update_write_idx below has a wmb() before the actual - * write index update to ensure that the GMU does not see the - * packet before the profile data is written out. - */ - adreno_profile_submit_time(time); - -done: - trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg)); - - hfi_update_write_idx(&hdr->write_index, write_idx); - - return 0; -} - static u32 get_irq_bit(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { if (!adreno_hwsched_context_queue_enabled(adreno_dev)) @@ -3187,7 +3055,7 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_SYNCOBJ, HFI_MSG_CMD); cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); - return gen7_gmu_context_queue_write(adreno_dev, &drawctxt->gmu_context_queue, + return adreno_gmu_context_queue_write(adreno_dev, &drawctxt->gmu_context_queue, (u32 *)cmd, cmd_sizebytes, drawobj, NULL); } @@ -3675,7 +3543,7 @@ static int gen7_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 q if (!cmdobj) goto done; - gen7_add_profile_events(adreno_dev, cmdobj, time); + adreno_hwsched_add_profile_events(adreno_dev, cmdobj, time); /* * Put the profiling information in the user profiling buffer. @@ -3782,7 +3650,7 @@ skipib: cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); if (adreno_hwsched_context_queue_enabled(adreno_dev)) - ret = gen7_gmu_context_queue_write(adreno_dev, + ret = adreno_gmu_context_queue_write(adreno_dev, &drawctxt->gmu_context_queue, (u32 *)cmd, cmd_sizebytes, drawobj, &time); else ret = gen7_hfi_dispatch_queue_write(adreno_dev, @@ -4125,7 +3993,7 @@ static int submit_global_ctxt_cmd(struct adreno_device *adreno_dev, u64 gpuaddr, cmd.submit_cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.submit_cmd.hdr, seqnum, cmd_size >> 2); if (adreno_hwsched_context_queue_enabled(adreno_dev)) - ret = gen7_gmu_context_queue_write(adreno_dev, + ret = adreno_gmu_context_queue_write(adreno_dev, &hwsched->global_ctxtq, (u32 *)&cmd, cmd_size, NULL, NULL); else ret = gen7_hfi_dispatch_queue_write(adreno_dev, HFI_DSP_ID_0, diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 930604622d..ccd6f0df5f 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -5,7 +5,6 @@ */ #include -#include #include #include "adreno.h" @@ -17,7 +16,6 @@ #include "kgsl_device.h" #include "kgsl_eventlog.h" #include "kgsl_pwrctrl.h" -#include "kgsl_trace.h" #include "kgsl_util.h" #define HFI_QUEUE_MAX (HFI_QUEUE_DEFAULT_CNT) @@ -2570,69 +2568,6 @@ void gen8_hwsched_hfi_remove(struct adreno_device *adreno_dev) kthread_stop(hw_hfi->f2h_task); } -static void gen8_add_profile_events(struct adreno_device *adreno_dev, - struct kgsl_drawobj_cmd *cmdobj, struct adreno_submit_time *time) -{ - unsigned long flags; - u64 time_in_s; - unsigned long time_in_ns; - struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); - struct kgsl_context *context = drawobj->context; - struct submission_info info = {0}; - struct adreno_hwsched *hwsched = &adreno_dev->hwsched; - const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); - - if (!time) - return; - - /* - * Here we are attempting to create a mapping between the - * GPU time domain (alwayson counter) and the CPU time domain - * (local_clock) by sampling both values as close together as - * possible. This is useful for many types of debugging and - * profiling. In order to make this mapping as accurate as - * possible, we must turn off interrupts to avoid running - * interrupt handlers between the two samples. - */ - - local_irq_save(flags); - - /* Read always on registers */ - time->ticks = gpudev->read_alwayson(adreno_dev); - - /* Trace the GPU time to create a mapping to ftrace time */ - trace_adreno_cmdbatch_sync(context->id, context->priority, - drawobj->timestamp, time->ticks); - - /* Get the kernel clock for time since boot */ - time->ktime = local_clock(); - - /* Get the timeofday for the wall time (for the user) */ - ktime_get_real_ts64(&time->utime); - - local_irq_restore(flags); - - /* Return kernel clock time to the client if requested */ - time_in_s = time->ktime; - time_in_ns = do_div(time_in_s, 1000000000); - - info.inflight = hwsched->inflight; - info.rb_id = adreno_get_level(context); - info.gmu_dispatch_queue = context->gmu_dispatch_queue; - - cmdobj->submit_ticks = time->ticks; - - msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_SUBMIT, - pid_nr(context->proc_priv->pid), - context->id, drawobj->timestamp, - !!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME)); - trace_adreno_cmdbatch_submitted(drawobj, &info, time->ticks, - (unsigned long) time_in_s, time_in_ns / 1000, 0); - - log_kgsl_cmdbatch_submitted_event(context->id, drawobj->timestamp, - context->priority, drawobj->flags); -} - static void init_gmu_context_queue(struct adreno_context *drawctxt) { struct kgsl_memdesc *md = &drawctxt->gmu_context_queue; @@ -2815,70 +2750,6 @@ static void populate_ibs(struct adreno_device *adreno_dev, #define DISPQ_IRQ_BIT(_idx) BIT((_idx) + HFI_DSP_IRQ_BASE) #define DISPQ_SYNC_IRQ_BIT(_idx) ((DISPQ_IRQ_BIT(_idx) << (KGSL_PRIORITY_MAX_RB_LEVELS + 1))) -int gen8_gmu_context_queue_write(struct adreno_device *adreno_dev, - struct adreno_context *drawctxt, u32 *msg, u32 size_bytes, - struct kgsl_drawobj *drawobj, struct adreno_submit_time *time) -{ - struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; - const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); - u32 *queue = drawctxt->gmu_context_queue.hostptr + sizeof(*hdr); - u32 i, empty_space, write_idx = hdr->write_index, read_idx = hdr->read_index; - u32 size_dwords = size_bytes >> 2; - u32 align_size = ALIGN(size_dwords, SZ_4); - u32 id = MSG_HDR_GET_ID(*msg); - struct kgsl_drawobj_cmd *cmdobj = NULL; - - empty_space = (write_idx >= read_idx) ? - (hdr->queue_size - (write_idx - read_idx)) - : (read_idx - write_idx); - - if (empty_space <= align_size) - return -ENOSPC; - - if (!IS_ALIGNED(size_bytes, sizeof(u32))) - return -EINVAL; - - for (i = 0; i < size_dwords; i++) { - queue[write_idx] = msg[i]; - write_idx = (write_idx + 1) % hdr->queue_size; - } - - /* Cookify any non used data at the end of the write buffer */ - for (; i < align_size; i++) { - queue[write_idx] = 0xfafafafa; - write_idx = (write_idx + 1) % hdr->queue_size; - } - - /* Ensure packet is written out before proceeding */ - wmb(); - - if (drawobj->type & SYNCOBJ_TYPE) { - struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); - - trace_adreno_syncobj_submitted(drawobj->context->id, drawobj->timestamp, - syncobj->num_hw_fence, gpudev->read_alwayson(adreno_dev)); - goto done; - } - - cmdobj = CMDOBJ(drawobj); - - gen8_add_profile_events(adreno_dev, cmdobj, time); - - /* - * Put the profiling information in the user profiling buffer. - * The hfi_update_write_idx below has a wmb() before the actual - * write index update to ensure that the GMU does not see the - * packet before the profile data is written out. - */ - adreno_profile_submit_time(time); - -done: - trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg)); - - hfi_update_write_idx(&hdr->write_index, write_idx); - - return 0; -} static u32 get_irq_bit(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { @@ -2978,8 +2849,8 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); - return gen8_gmu_context_queue_write(adreno_dev, drawctxt, (u32 *)cmd, cmd_sizebytes, - drawobj, NULL); + return adreno_gmu_context_queue_write(adreno_dev, &drawctxt->gmu_context_queue, + (u32 *)cmd, cmd_sizebytes, drawobj, NULL); } int gen8_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_dev, @@ -3512,8 +3383,8 @@ skipib: seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); - ret = gen8_gmu_context_queue_write(adreno_dev, drawctxt, (u32 *)cmd, cmd_sizebytes, drawobj, - &time); + ret = adreno_gmu_context_queue_write(adreno_dev, &drawctxt->gmu_context_queue, + (u32 *)cmd, cmd_sizebytes, drawobj, &time); if (ret) return ret; diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 4bc4a8052a..6f6c5297dc 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -9,8 +9,11 @@ #include "adreno_snapshot.h" #include "adreno_sysfs.h" #include "adreno_trace.h" +#include "kgsl_eventlog.h" #include "kgsl_timeline.h" +#include "kgsl_trace.h" #include +#include #include #define POLL_SLEEP_US 100 @@ -2576,3 +2579,133 @@ void adreno_hwsched_remove_hw_fence_entry(struct adreno_device *adreno_dev, kgsl_context_put_deferred(&drawctxt->base); } +void adreno_hwsched_add_profile_events(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, struct adreno_submit_time *time) +{ + unsigned long flags; + u64 time_in_s; + unsigned long time_in_ns; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct kgsl_context *context = drawobj->context; + struct submission_info info = {0}; + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (!time) + return; + + /* + * Here we are attempting to create a mapping between the + * GPU time domain (alwayson counter) and the CPU time domain + * (local_clock) by sampling both values as close together as + * possible. This is useful for many types of debugging and + * profiling. In order to make this mapping as accurate as + * possible, we must turn off interrupts to avoid running + * interrupt handlers between the two samples. + */ + + local_irq_save(flags); + + /* Read always on registers */ + time->ticks = gpudev->read_alwayson(adreno_dev); + + /* Trace the GPU time to create a mapping to ftrace time */ + trace_adreno_cmdbatch_sync(context->id, context->priority, + drawobj->timestamp, time->ticks); + + /* Get the kernel clock for time since boot */ + time->ktime = local_clock(); + + /* Get the timeofday for the wall time (for the user) */ + ktime_get_real_ts64(&time->utime); + + local_irq_restore(flags); + + /* Return kernel clock time to the client if requested */ + time_in_s = time->ktime; + time_in_ns = do_div(time_in_s, 1000000000); + + info.inflight = hwsched->inflight; + info.rb_id = adreno_get_level(context); + info.gmu_dispatch_queue = context->gmu_dispatch_queue; + + cmdobj->submit_ticks = time->ticks; + + msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_SUBMIT, + pid_nr(context->proc_priv->pid), + context->id, drawobj->timestamp, + !!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME)); + trace_adreno_cmdbatch_submitted(drawobj, &info, time->ticks, + (unsigned long) time_in_s, time_in_ns / 1000, 0); + + log_kgsl_cmdbatch_submitted_event(context->id, drawobj->timestamp, + context->priority, drawobj->flags); +} + +int adreno_gmu_context_queue_write(struct adreno_device *adreno_dev, + struct kgsl_memdesc *gmu_context_queue, u32 *msg, u32 size_bytes, + struct kgsl_drawobj *drawobj, struct adreno_submit_time *time) +{ + struct gmu_context_queue_header *hdr = gmu_context_queue->hostptr; + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u32 *queue = gmu_context_queue->hostptr + sizeof(*hdr); + u32 i, empty_space, write_idx = hdr->write_index, read_idx = hdr->read_index; + u32 size_dwords = size_bytes >> 2; + u32 align_size = ALIGN(size_dwords, SZ_4); + u32 id = MSG_HDR_GET_ID(*msg); + struct kgsl_drawobj_cmd *cmdobj = NULL; + + empty_space = (write_idx >= read_idx) ? + (hdr->queue_size - (write_idx - read_idx)) + : (read_idx - write_idx); + + if (empty_space <= align_size) + return -ENOSPC; + + if (!IS_ALIGNED(size_bytes, sizeof(u32))) + return -EINVAL; + + for (i = 0; i < size_dwords; i++) { + queue[write_idx] = msg[i]; + write_idx = (write_idx + 1) % hdr->queue_size; + } + + /* Cookify any non used data at the end of the write buffer */ + for (; i < align_size; i++) { + queue[write_idx] = 0xfafafafa; + write_idx = (write_idx + 1) % hdr->queue_size; + } + + /* Ensure packet is written out before proceeding */ + wmb(); + + if (!drawobj) + goto done; + + if (drawobj->type & SYNCOBJ_TYPE) { + struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); + + trace_adreno_syncobj_submitted(drawobj->context->id, drawobj->timestamp, + syncobj->num_hw_fence, gpudev->read_alwayson(adreno_dev)); + goto done; + } + + cmdobj = CMDOBJ(drawobj); + + adreno_hwsched_add_profile_events(adreno_dev, cmdobj, time); + + /* + * Put the profiling information in the user profiling buffer. + * The hfi_update_write_idx below has a wmb() before the actual + * write index update to ensure that the GMU does not see the + * packet before the profile data is written out. + */ + adreno_profile_submit_time(time); + +done: + trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg)); + + hfi_update_write_idx(&hdr->write_index, write_idx); + + return 0; +} diff --git a/adreno_hwsched.h b/adreno_hwsched.h index 46da5f66a3..f9668874e9 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -303,4 +303,30 @@ void adreno_hwsched_remove_hw_fence_entry(struct adreno_device *adreno_dev, */ int adreno_gmu_context_queue_read(struct adreno_context *drawctxt, u32 *output, u32 read_idx, u32 size); + +/** + * adreno_gmu_context_queue_write - Write data to context queue + * + * @adreno_dev: Pointer to adreno device structure + * @gmu_context_queue: Pointer to the memory descriptor for context queue + * @msg: Pointer to the message data to be written + * @size_bytes: Size of the message data in bytes + * @drawobj: Pointer to the draw object + * @time: Pointer to the submission time information + * + * Return: 0 on success or negative error on failure + */ +int adreno_gmu_context_queue_write(struct adreno_device *adreno_dev, + struct kgsl_memdesc *gmu_context_queue, u32 *msg, u32 size_bytes, + struct kgsl_drawobj *drawobj, struct adreno_submit_time *time); + +/** + * adreno_hwsched_add_profile_events - Add profiling events + * + * @adreno_dev: Pointer to the adreno device structure + * @cmdobj: Pointer to the command object + * @time: Pointer to the submission time information + */ +void adreno_hwsched_add_profile_events(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, struct adreno_submit_time *time); #endif diff --git a/adreno_ringbuffer.h b/adreno_ringbuffer.h index 64a4d16d65..b02376057f 100644 --- a/adreno_ringbuffer.h +++ b/adreno_ringbuffer.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_RINGBUFFER_H #define __ADRENO_RINGBUFFER_H @@ -52,21 +52,6 @@ struct kgsl_device; struct kgsl_device_private; -/** - * struct adreno_submit_time - utility structure to store the wall clock / GPU - * ticks at command submit time - * @ticks: GPU ticks at submit time (from the 19.2Mhz timer) - * @ktime: local clock time (in nanoseconds) - * @utime: Wall clock time - * @drawobj: the object that we want to profile - */ -struct adreno_submit_time { - uint64_t ticks; - u64 ktime; - struct timespec64 utime; - struct kgsl_drawobj *drawobj; -}; - /** * This is to keep track whether the SET_PSEUDO_REGISTER packet needs to be submitted * or not From 8e6565deb140806ae4dd883dc8372750af4b7f96 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Mon, 15 Jul 2024 12:44:27 +0530 Subject: [PATCH 0853/1016] kgsl: pwrctrl: Add sub_type constraint check to update expiry When a context is submitted with a MAX constraint sub_type, followed by back-to-back submissions of a MIN constraint sub_type, the expiry of the MAX constraint is increased. This results in the max_pwrlevel being retained for a longer period. To fix this, add sub_type constraint check also. This ensures that expiry is updated only if the previous and current sub_type constraint are the same. Change-Id: I46cd6efa3c2bd698bf26febfb1928f4fcd8ce30b Signed-off-by: Sanjay Yadav --- kgsl_pwrctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 409974375e..bb7a1d33c6 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -322,7 +322,7 @@ void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, kgsl_pwrctrl_pwrlevel_change(device, constraint); /* Trace the constraint being set by the driver */ trace_kgsl_constraint(device, pwrc_old->type, constraint, 1); - } else if (pwrc_old->type == pwrc->type) { + } else if ((pwrc_old->type == pwrc->type) && (pwrc_old->sub_type == pwrc->sub_type)) { pwrc_old->owner_id = id; pwrc_old->owner_timestamp = ts; pwrc_old->expires = jiffies + From 760c7057d4be6598c2d625851e0f0d0b00b8a467 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sun, 14 Jul 2024 19:20:47 +0530 Subject: [PATCH 0854/1016] kgsl: pwrctrl: Remove thermal_pwrlevel_floor variable The thermal_pwrlevel_floor hint is no longer sent by the thermal subsystem. As a result, we can safely remove this variable. Also, replace unsigned int with u32 in _adjust_pwrlevel(). Change-Id: I082fcad75b23900dc66de33ae4ec5dce180f4e13 Signed-off-by: Kamal Agrawal --- kgsl_pwrctrl.c | 25 +++++++++---------------- kgsl_pwrctrl.h | 2 -- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 409974375e..b04590469f 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -66,31 +66,25 @@ static void _bimc_clk_prepare_enable(struct kgsl_device *device, /** * _adjust_pwrlevel() - Given a requested power level do bounds checking on the * constraints and return the nearest possible level - * @device: Pointer to the kgsl_device struct + * @pwr: kgsl_pwrctrl structure for the device * @level: Requested level * @pwrc: Pointer to the power constraint to be applied * - * Apply thermal and max/min limits first. Then force the level with a + * Apply thermal and max/min limits first. Then force the level with a * constraint if one exists. */ -static unsigned int _adjust_pwrlevel(struct kgsl_pwrctrl *pwr, int level, - struct kgsl_pwr_constraint *pwrc) +static u32 _adjust_pwrlevel(struct kgsl_pwrctrl *pwr, u32 level, struct kgsl_pwr_constraint *pwrc) { - unsigned int thermal_pwrlevel = READ_ONCE(pwr->thermal_pwrlevel); - unsigned int max_pwrlevel = max_t(unsigned int, thermal_pwrlevel, + u32 thermal_pwrlevel = READ_ONCE(pwr->thermal_pwrlevel); + u32 max_pwrlevel = max_t(u32, thermal_pwrlevel, pwr->max_pwrlevel); - unsigned int min_pwrlevel = min_t(unsigned int, - pwr->thermal_pwrlevel_floor, - pwr->min_pwrlevel); + u32 min_pwrlevel = pwr->min_pwrlevel; /* Ensure that max pwrlevel is within pmqos max limit */ - max_pwrlevel = max_t(unsigned int, max_pwrlevel, - READ_ONCE(pwr->pmqos_max_pwrlevel)); + max_pwrlevel = max_t(u32, max_pwrlevel, READ_ONCE(pwr->pmqos_max_pwrlevel)); - /* Ensure that max/min pwrlevels are within thermal max/min limits */ - max_pwrlevel = min_t(unsigned int, max_pwrlevel, - pwr->thermal_pwrlevel_floor); - min_pwrlevel = max_t(unsigned int, min_pwrlevel, thermal_pwrlevel); + /* Ensure that min pwrlevel is within thermal limit */ + min_pwrlevel = max_t(u32, min_pwrlevel, thermal_pwrlevel); switch (pwrc->type) { case KGSL_CONSTRAINT_PWRLEVEL: { @@ -1983,7 +1977,6 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) /* Initialize the thermal clock constraints */ pwr->thermal_pwrlevel = 0; - pwr->thermal_pwrlevel_floor = pwr->num_pwrlevels - 1; result = dev_pm_qos_add_request(&pdev->dev, &pwr->sysfs_thermal_req, DEV_PM_QOS_MAX_FREQUENCY, PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index e17aac4ffd..a567d0feca 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -84,7 +84,6 @@ struct kgsl_pwrlevel { * @active_pwrlevel - The currently active power level * @previous_pwrlevel - The power level before transition * @thermal_pwrlevel - maximum powerlevel constraint from thermal - * @thermal_pwrlevel_floor - minimum powerlevel constraint from thermal * @default_pwrlevel - device wake up power level * @max_pwrlevel - maximum allowable powerlevel per the user * @min_pwrlevel - minimum allowable powerlevel per the user @@ -139,7 +138,6 @@ struct kgsl_pwrctrl { unsigned int active_pwrlevel; unsigned int previous_pwrlevel; unsigned int thermal_pwrlevel; - unsigned int thermal_pwrlevel_floor; unsigned int default_pwrlevel; unsigned int max_pwrlevel; unsigned int min_pwrlevel; From a251a8ddbebdbeec0117c90cbbbd647fab7cd365 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Thu, 16 May 2024 14:20:31 +0530 Subject: [PATCH 0855/1016] kgsl: swsched: Flush LRZ before every page table switch When the incoming context tries to access the previous contexts LRZ buffer, it would result in page fault. Fix this by flushing LRZ before every page table switch. Change-Id: Ie28e7b46586935493e0ac08d3af7d00929f3c149 Signed-off-by: Sanjay Yadav --- adreno_gen7_ringbuffer.c | 12 +++++++++++- adreno_gen8_ringbuffer.c | 12 +++++++++++- adreno_pm4types.h | 3 ++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/adreno_gen7_ringbuffer.c b/adreno_gen7_ringbuffer.c index 11d9036594..875fc86b50 100644 --- a/adreno_gen7_ringbuffer.c +++ b/adreno_gen7_ringbuffer.c @@ -65,7 +65,7 @@ static int gen7_rb_context_switch(struct adreno_device *adreno_dev, adreno_drawctxt_get_pagetable(drawctxt); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int count = 0; - u32 cmds[55]; + u32 cmds[61]; /* Sync both threads */ cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); @@ -84,6 +84,16 @@ static int gen7_rb_context_switch(struct adreno_device *adreno_dev, cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR; if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) { + /* Flush LRZ before every pagetable switch */ + cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[count++] = CP_SET_THREAD_BOTH; + + cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1); + /* Add event ID for LRZ flush as packet payload */ + cmds[count++] = LRZ_CACHE_FLUSH; + + cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR; /* Clear performance counters during context switches */ if (!adreno_dev->perfcounter) { diff --git a/adreno_gen8_ringbuffer.c b/adreno_gen8_ringbuffer.c index a959e89450..ad37e8b036 100644 --- a/adreno_gen8_ringbuffer.c +++ b/adreno_gen8_ringbuffer.c @@ -65,7 +65,7 @@ static int gen8_rb_context_switch(struct adreno_device *adreno_dev, adreno_drawctxt_get_pagetable(drawctxt); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int count = 0; - u32 cmds[57]; + u32 cmds[63]; /* Sync both threads */ cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); @@ -84,6 +84,16 @@ static int gen8_rb_context_switch(struct adreno_device *adreno_dev, cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR; if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) { + /* Flush LRZ before every pagetable switch */ + cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[count++] = CP_SET_THREAD_BOTH; + + cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1); + /* Add event ID for LRZ flush as packet payload */ + cmds[count++] = LRZ_CACHE_FLUSH; + + cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR; /* Clear performance counters during context switches */ if (!adreno_dev->perfcounter) { diff --git a/adreno_pm4types.h b/adreno_pm4types.h index 7e2dc23003..455e988cb0 100644 --- a/adreno_pm4types.h +++ b/adreno_pm4types.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_PM4TYPES_H #define __ADRENO_PM4TYPES_H @@ -163,6 +163,7 @@ /* This is a commonly used CP_EVENT_WRITE */ #define CACHE_FLUSH_TS 4 #define CACHE_CLEAN 0x31 +#define LRZ_CACHE_FLUSH 0x26 /* Controls which threads execute the PM4 commands the follow this packet */ #define CP_THREAD_CONTROL 0x17 From 3e1075f57865ae4d32b265d3012879db2d9a05ce Mon Sep 17 00:00:00 2001 From: Linux Image Build Automation Date: Wed, 17 Jul 2024 01:28:51 -0700 Subject: [PATCH 0856/1016] Revert "kgsl: gen8: Rearrange some GMU register dumping in snapshot" This reverts commit 4a56d40193f04adb3104299d034c970e50d53429. Change-Id: If6f0e58561eb6ebf8a388b4690e5a05396f531fe Signed-off-by: Linux Image Build Automation --- adreno_gen8.c | 4 +- adreno_gen8_0_0_snapshot.h | 21 +++---- adreno_gen8_gmu.c | 11 ---- adreno_gen8_gmu.h | 7 --- adreno_gen8_gmu_snapshot.c | 3 +- adreno_gen8_snapshot.c | 125 +++++++++++++++++-------------------- 6 files changed, 70 insertions(+), 101 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 442ec114a3..c949b539c5 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -2870,7 +2870,7 @@ static void gen8_lpac_fault_header(struct adreno_device *adreno_dev, pr_context(device, drawobj->context, "lpac cmdline: %s\n", drawctxt->base.proc_priv->cmdline); - if (!gen8_gmu_rpmh_pwr_state_is_active(device) || !gx_on) + if (!gx_on) goto done; kgsl_regread(device, GEN8_RBBM_LPAC_STATUS, &status); @@ -2926,7 +2926,7 @@ static void gen8_fault_header(struct adreno_device *adreno_dev, drawctxt->base.proc_priv->cmdline); } - if (!gen8_gmu_rpmh_pwr_state_is_active(device) || !gx_on) + if (!gx_on) goto done; kgsl_regread(device, GEN8_RBBM_STATUS, &status); diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index f5f04344ec..050854acda 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -1883,6 +1883,11 @@ static struct gen8_reg_list gen8_0_0_ahb_registers[] = { { UNSLICE, gen8_0_0_ahb_secure_gpu_registers }, }; +static struct gen8_reg_list gen8_gmu_gx_registers[] = { + { UNSLICE, gen8_0_0_gmugx_registers }, + { SLICE, gen8_0_0_gmugx_slice_registers }, +}; + /* * Block : ['GDPM_LKG'] * REGION : UNSLICE @@ -2073,17 +2078,11 @@ static const u32 *gen8_0_0_external_core_regs[] = { gen8_0_0_gpu_cc_ahb2phy_swman_registers, gen8_0_0_gpu_cc_gpu_cc_reg_registers, gen8_0_0_gpu_cc_pll0_cm_pll_taycan_common_registers, + gen8_0_0_acd_acd_mnd_registers, + gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers, + gen8_0_0_gx_clkctl_ahb2phy_swman_registers, + gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers, + gen8_0_0_gx_clkctl_gx_clkctl_reg_registers, gen8_0_0_cpr_registers, }; - -static struct gen8_reg_list gen8_gmu_gx_registers[] = { - { UNSLICE, gen8_0_0_gmugx_registers }, - { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers }, - { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_swman_registers }, - { UNSLICE, gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers }, - { UNSLICE, gen8_0_0_gx_clkctl_gx_clkctl_reg_registers }, - { UNSLICE, gen8_0_0_acd_acd_mnd_registers }, - { SLICE, gen8_0_0_gmugx_slice_registers }, -}; - #endif /*_ADRENO_GEN8_0_0_SNAPSHOT_H */ diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 291fb52822..de6fcc89b3 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -684,14 +684,6 @@ bool gen8_gmu_gx_is_on(struct adreno_device *adreno_dev) return is_on(val); } -bool gen8_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device) -{ - u32 val; - - gmu_core_regread(device, GEN8_GMUCX_RPMH_POWER_STATE, &val); - return (val == GPU_HW_ACTIVE) ? true : false; -} - static const char *idle_level_name(int level) { if (level == GPU_HW_ACTIVE) @@ -863,9 +855,6 @@ void gen8_gmu_register_config(struct adreno_device *adreno_dev) /* Clear any previously set cm3 fault */ atomic_set(&gmu->cm3_fault, 0); - /* Init the power state register before GMU turns on GX */ - gmu_core_regwrite(device, GEN8_GMUCX_RPMH_POWER_STATE, 0xF); - /* Vote veto for FAL10 */ gmu_core_regwrite(device, GEN8_GMUCX_CX_FALNEXT_INTF, 0x1); gmu_core_regwrite(device, GEN8_GMUCX_CX_FAL_INTF, 0x1); diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index d4831f3c4d..73f9a2d6c2 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -488,11 +488,4 @@ u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab); */ int gen8_gmu_clock_set_rate(struct adreno_device *adreno_dev, u32 req_freq); -/** - * gen8_gmu_rpmh_pwr_state_is_active - Check the state of GPU HW - * @device: Pointer to the kgsl device - * - * Returns true on active or false otherwise - */ -bool gen8_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device); #endif diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index 7c3bd4e536..4619c1f26d 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -207,8 +207,7 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, adreno_snapshot_registers_v2, (void *) gen8_snapshot_block_list->gmu_cx_unsliced_regs); - if (!gen8_gmu_rpmh_pwr_state_is_active(device) || - !gen8_gmu_gx_is_on(adreno_dev)) + if (!gen8_gmu_gx_is_on(adreno_dev)) goto dtcm; /* Set fence to ALLOW mode so registers can be read */ diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index f16ba10a3c..2072d7db01 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -219,9 +219,8 @@ size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, if (info->regs->sel) kgsl_regwrite(device, info->regs->sel->host_reg, info->regs->sel->val); - if (info->regs->slice_region) - kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL - (info->slice_id, 0, 0, 0)); + kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL + (info->slice_id, 0, 0, 0)); /* Make sure the previous writes are posted before reading */ mb(); @@ -1325,61 +1324,6 @@ static size_t gen8_snapshot_cx_side_dbgc_debugbus_block(struct kgsl_device *devi return size; } -static void gen8_snapshot_cx_debugbus(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot) -{ - u32 i; - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, - FIELD_PREP(GENMASK(31, 28), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, - FIELD_PREP(GENMASK(27, 24), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, - FIELD_PREP(GENMASK(3, 0), 0x0) | - FIELD_PREP(GENMASK(7, 4), 0x1) | - FIELD_PREP(GENMASK(11, 8), 0x2) | - FIELD_PREP(GENMASK(15, 12), 0x3) | - FIELD_PREP(GENMASK(19, 16), 0x4) | - FIELD_PREP(GENMASK(23, 20), 0x5) | - FIELD_PREP(GENMASK(27, 24), 0x6) | - FIELD_PREP(GENMASK(31, 28), 0x7)); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, - FIELD_PREP(GENMASK(3, 0), 0x8) | - FIELD_PREP(GENMASK(7, 4), 0x9) | - FIELD_PREP(GENMASK(11, 8), 0xa) | - FIELD_PREP(GENMASK(15, 12), 0xb) | - FIELD_PREP(GENMASK(19, 16), 0xc) | - FIELD_PREP(GENMASK(23, 20), 0xd) | - FIELD_PREP(GENMASK(27, 24), 0xe) | - FIELD_PREP(GENMASK(31, 28), 0xf)); - - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); - kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); - - /* Dump the CX debugbus data if the block exists */ - if (!kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) - return; - - for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_DEBUGBUS, - snapshot, gen8_snapshot_cx_dbgc_debugbus_block, - (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); - kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, - snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, - (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); - } -} - /* gen8_snapshot_debugbus() - Capture debug bus data */ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) @@ -1422,6 +1366,41 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0); kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM, + FIELD_PREP(GENMASK(27, 24), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0, + FIELD_PREP(GENMASK(3, 0), 0x0) | + FIELD_PREP(GENMASK(7, 4), 0x1) | + FIELD_PREP(GENMASK(11, 8), 0x2) | + FIELD_PREP(GENMASK(15, 12), 0x3) | + FIELD_PREP(GENMASK(19, 16), 0x4) | + FIELD_PREP(GENMASK(23, 20), 0x5) | + FIELD_PREP(GENMASK(27, 24), 0x6) | + FIELD_PREP(GENMASK(31, 28), 0x7)); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1, + FIELD_PREP(GENMASK(3, 0), 0x8) | + FIELD_PREP(GENMASK(7, 4), 0x9) | + FIELD_PREP(GENMASK(11, 8), 0xa) | + FIELD_PREP(GENMASK(15, 12), 0xb) | + FIELD_PREP(GENMASK(19, 16), 0xc) | + FIELD_PREP(GENMASK(23, 20), 0xd) | + FIELD_PREP(GENMASK(27, 24), 0xe) | + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); + kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); + for (i = 0; i < gen8_snapshot_block_list->debugbus_blocks_len; i++) { kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUGBUS, @@ -1443,6 +1422,20 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, snapshot, gen8_snapshot_dbgc_side_debugbus_block, (void *) &gen8_snapshot_block_list->gbif_debugbus_blocks[i]); } + + /* Dump the CX debugbus data if the block exists */ + if (kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A)) { + for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen8_snapshot_cx_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block, + (void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]); + } + } } static void gen8_reglist_snapshot(struct kgsl_device *device, @@ -1550,8 +1543,7 @@ static void gen8_cx_misc_regs_snapshot(struct kgsl_device *device, u64 *ptr, offset = 0; const u32 *regs_ptr = (const u32 *)gen8_snapshot_block_list->cx_misc_regs; - if (CD_SCRIPT_CHECK(device) || !gen8_gmu_rpmh_pwr_state_is_active(device) - || !gen8_gmu_gx_is_on(ADRENO_DEVICE(device))) + if (CD_SCRIPT_CHECK(device) || !adreno_gx_is_on(ADRENO_DEVICE(device))) goto legacy_snapshot; /* Build the crash script */ @@ -1628,18 +1620,15 @@ void gen8_snapshot(struct adreno_device *adreno_dev, if (!gmu_core_isenabled(device)) gen8_snapshot_external_core_regs(device, snapshot); - gen8_cx_misc_regs_snapshot(device, snapshot); - - gen8_snapshot_cx_debugbus(adreno_dev, snapshot); - - if (!gen8_gmu_rpmh_pwr_state_is_active(device) || - !gen8_gmu_gx_is_on(adreno_dev)) - return; - gen8_snapshot_trace_buffer(device, snapshot); gen8_snapshot_debugbus(adreno_dev, snapshot); + gen8_cx_misc_regs_snapshot(device, snapshot); + + if (!adreno_gx_is_on(adreno_dev)) + return; + is_current_rt = rt_task(current); if (is_current_rt) From 6234e13d74fd020da164a24ebbd5a819f97caa6a Mon Sep 17 00:00:00 2001 From: Linux Image Build Automation Date: Wed, 17 Jul 2024 01:29:03 -0700 Subject: [PATCH 0857/1016] Revert "kgsl: gen8: Dump CPR registers in snapshot for gen8_0_x GPUs" This reverts commit 334fbadc748d4502004cadb57fdbf63eda98a801. Change-Id: Ia87a3cba54122744a6e06b460038387313bbb7f3 Signed-off-by: Linux Image Build Automation --- adreno_gen8_0_0_snapshot.h | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/adreno_gen8_0_0_snapshot.h b/adreno_gen8_0_0_snapshot.h index 050854acda..cb924cc162 100644 --- a/adreno_gen8_0_0_snapshot.h +++ b/adreno_gen8_0_0_snapshot.h @@ -2056,22 +2056,6 @@ static const u32 gen8_0_0_rscc_rsc_registers[] = { }; static_assert(IS_ALIGNED(sizeof(gen8_0_0_rscc_rsc_registers), 8)); -/* - * Block : ['CPR'] - * REGION : UNSLICE - * Pipeline: PIPE_NONE - * pairs : 20 (Regs:479) - */ -static const u32 gen8_0_0_cpr_registers[] = { - 0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c, - 0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850, - 0x26880, 0x2689f, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee, - 0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f, - 0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274c4, - UINT_MAX, UINT_MAX, -}; -static_assert(IS_ALIGNED(sizeof(gen8_0_0_cpr_registers), 8)); - static const u32 *gen8_0_0_external_core_regs[] = { gen8_0_0_gdpm_lkg_registers, gen8_0_0_gpu_cc_ahb2phy_broadcast_swman_registers, @@ -2083,6 +2067,5 @@ static const u32 *gen8_0_0_external_core_regs[] = { gen8_0_0_gx_clkctl_ahb2phy_swman_registers, gen8_0_0_gx_clkctl_pll0_cm_pll_taycan_common_registers, gen8_0_0_gx_clkctl_gx_clkctl_reg_registers, - gen8_0_0_cpr_registers, }; #endif /*_ADRENO_GEN8_0_0_SNAPSHOT_H */ From 6871ef71e0093dcc35779a96d7890f3f9b160546 Mon Sep 17 00:00:00 2001 From: Linux Image Build Automation Date: Wed, 17 Jul 2024 01:29:16 -0700 Subject: [PATCH 0858/1016] Revert "kgsl: Add ability to vote for dependent domains for GX levels" This reverts commit bd362cd220c4613ca6a6770455e438035c3fa09b. Change-Id: Ib9aca1ff89901035685e006bda02e1a7d3d1fa01 Signed-off-by: Linux Image Build Automation --- adreno_a6xx_rpmh.c | 8 +-- adreno_gen7_rpmh.c | 6 +- adreno_gen8_rpmh.c | 156 ++++++++++++++++----------------------------- adreno_hfi.h | 4 +- 4 files changed, 64 insertions(+), 110 deletions(-) diff --git a/adreno_a6xx_rpmh.c b/adreno_a6xx_rpmh.c index 0d451ae3ca..9d4a10f4d2 100644 --- a/adreno_a6xx_rpmh.c +++ b/adreno_a6xx_rpmh.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -354,10 +354,10 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); table->gx_votes[0].freq = 0; - table->gx_votes[0].dep_vote = 0; + table->gx_votes[0].cx_vote = 0; /* Disable cx vote in gmu dcvs table if it is not supported in DT */ if (pwr->pwrlevels[0].cx_level == 0xffffffff) - table->gx_votes[0].dep_vote = 0xffffffff; + table->gx_votes[0].cx_vote = 0xffffffff; /* GMU power levels are in ascending order */ for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) { @@ -367,7 +367,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000; ret = to_cx_hlvl(cx_rail, cx_vlvl, - &table->gx_votes[index].dep_vote); + &table->gx_votes[index].cx_vote); if (ret) { dev_err(&gmu->pdev->dev, "Unsupported cx corner: %u\n", cx_vlvl); diff --git a/adreno_gen7_rpmh.c b/adreno_gen7_rpmh.c index 88a173bf58..be6cac5b7c 100644 --- a/adreno_gen7_rpmh.c +++ b/adreno_gen7_rpmh.c @@ -350,10 +350,10 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); table->gx_votes[0].freq = 0; - table->gx_votes[0].dep_vote = 0; + table->gx_votes[0].cx_vote = 0; /* Disable cx vote in gmu dcvs table if it is not supported in DT */ if (pwr->pwrlevels[0].cx_level == 0xffffffff) - table->gx_votes[0].dep_vote = 0xffffffff; + table->gx_votes[0].cx_vote = 0xffffffff; /* GMU power levels are in ascending order */ for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) { @@ -363,7 +363,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000; ret = to_cx_hlvl(cx_rail, cx_vlvl, - &table->gx_votes[index].dep_vote); + &table->gx_votes[index].cx_vote); if (ret) { dev_err(device->dev, "Unsupported cx corner: %u\n", cx_vlvl); diff --git a/adreno_gen8_rpmh.c b/adreno_gen8_rpmh.c index 525626740c..361a436b04 100644 --- a/adreno_gen8_rpmh.c +++ b/adreno_gen8_rpmh.c @@ -293,64 +293,27 @@ static int setup_cx_arc_votes(struct gen8_gmu_device *gmu, return ret; } -#define GEN8_DEP_VOTE_SET(cx, mx) \ - (FIELD_PREP(GENMASK(31, 14), 0x3FFFF) | \ - FIELD_PREP(GENMASK(13, 8), mx) | \ - FIELD_PREP(GENMASK(7, 0), cx)) - -static int setup_dependency_domain_tbl(u32 *votes, - struct rpmh_arc_vals *dep_rail, struct rpmh_arc_vals *cx_rail, - u16 *vlvl, u32 *cx_vlvl, u32 num_entries) +static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl) { - u32 cx_vote, mx_vote; - int i, j; + u32 i; - for (i = 1; i < num_entries; i++) { - bool found_match = false; - - if (cx_vlvl[i] == 0xffffffff) { - /* This means that the Gx level doesn't have a dependency on Cx level */ - cx_vote = 0xff; - found_match = true; - } else { - for (j = 0; j < cx_rail->num; j++) { - if (cx_rail->val[j] >= cx_vlvl[i]) { - cx_vote = j; - found_match = true; - break; - } - } - } - - /* If we did not find a matching VLVL level then abort */ - if (!found_match) { - pr_err("kgsl: Unsupported cx corner: %u\n", cx_vlvl[i]); - return -EINVAL; - } - - /* - * Set Mx dependency domain votes for Gx level. Look for indexes - * whose vlvl value is greater than or equal to the vlvl value - * of the corresponding index of dependency rail - */ - for (j = 0; j < dep_rail->num; j++) { - if (dep_rail->val[j] >= vlvl[i]) { - mx_vote = j; - found_match = true; - break; - } - } - - /* If we did not find a matching VLVL level then abort */ - if (!found_match) { - pr_err("kgsl: Unsupported mx corner: %u\n", vlvl[i]); - return -EINVAL; - } - - votes[i] = GEN8_DEP_VOTE_SET(cx_vote, mx_vote); + /* + * This means that the Gx level doesn't have a dependency on Cx level. + * Return the same value to disable cx voting at GMU. + */ + if (vlvl == 0xffffffff) { + *hlvl = vlvl; + return 0; } - return 0; + for (i = 0; i < cx_rail->num; i++) { + if (cx_rail->val[i] >= vlvl) { + *hlvl = i; + return 0; + } + } + + return -EINVAL; } /* @@ -358,77 +321,71 @@ static int setup_dependency_domain_tbl(u32 *votes, * @hfi: Pointer to hfi device * @pri_rail: Pointer to primary power rail vlvl table * @sec_rail: Pointer to second/dependent power rail vlvl table - * @gmxc_rail: Pointer to MxG power rail vlvl table * * This function initializes the gx votes for all gpu frequencies * for gpu dcvs */ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, - struct rpmh_arc_vals *gmxc_rail, struct rpmh_arc_vals *cx_rail) + struct rpmh_arc_vals *cx_rail) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen8_dcvs_table *table = &gmu->dcvs_table; + u32 index; u16 vlvl_tbl[MAX_GX_LEVELS]; - u32 cx_vlvl_tbl[MAX_GX_LEVELS]; u32 gx_votes[MAX_GX_LEVELS]; - u32 dep_votes[MAX_GX_LEVELS]; int ret, i; - table->gpu_level_num = pwr->num_pwrlevels + 1; - - if (table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) { + if (pwr->num_pwrlevels + 1 > ARRAY_SIZE(vlvl_tbl)) { dev_err(device->dev, "Defined more GPU DCVS levels than RPMh can support\n"); return -ERANGE; } - /* Initialize vlvl tables */ - memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); - memset(cx_vlvl_tbl, 0, sizeof(cx_vlvl_tbl)); - - /* Fill the vlvl tables. GMU power levels are in ascending order */ - for (i = 1; i < table->gpu_level_num; i++) { - vlvl_tbl[i] = pwr->pwrlevels[pwr->num_pwrlevels - i].voltage_level; - cx_vlvl_tbl[i] = pwr->pwrlevels[pwr->num_pwrlevels - i].cx_level; - } - - /* If the target does not have a dedicated Mx rail, use secondary rail */ - if (gmxc_rail == NULL) - ret = setup_volt_dependency_tbl(gx_votes, pri_rail, sec_rail, - vlvl_tbl, table->gpu_level_num); - else - ret = setup_volt_dependency_tbl(gx_votes, pri_rail, gmxc_rail, - vlvl_tbl, table->gpu_level_num); - if (ret) - return ret; - - ret = setup_dependency_domain_tbl(dep_votes, sec_rail, cx_rail, - vlvl_tbl, cx_vlvl_tbl, table->gpu_level_num); - if (ret) - return ret; - - /* Populate DCVS table with all the votes */ - for (i = 1; i < table->gpu_level_num; i++) { - table->gx_votes[i].freq = pwr->pwrlevels[pwr->num_pwrlevels - i].gpu_freq / 1000; - table->gx_votes[i].vote = gx_votes[i]; - table->gx_votes[i].dep_vote = dep_votes[i]; - } - /* Add the zero powerlevel for the perf table */ + table->gpu_level_num = pwr->num_pwrlevels + 1; + + memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); + table->gx_votes[0].freq = 0; - table->gx_votes[0].vote = 0; - table->gx_votes[0].dep_vote = 0xFFFFFFFF; + table->gx_votes[0].cx_vote = 0; + /* Disable cx vote in gmu dcvs table if it is not supported in DT */ + if (pwr->pwrlevels[0].cx_level == 0xffffffff) + table->gx_votes[0].cx_vote = 0xffffffff; + + /* GMU power levels are in ascending order */ + for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) { + u32 cx_vlvl = pwr->pwrlevels[i].cx_level; + + vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level; + table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000; + + ret = to_cx_hlvl(cx_rail, cx_vlvl, + &table->gx_votes[index].cx_vote); + if (ret) { + dev_err(device->dev, "Unsupported cx corner: %u\n", + cx_vlvl); + return ret; + } + } + + ret = setup_volt_dependency_tbl(gx_votes, pri_rail, + sec_rail, vlvl_tbl, table->gpu_level_num); + if (!ret) { + for (i = 0; i < table->gpu_level_num; i++) + table->gx_votes[i].vote = gx_votes[i]; + } return ret; + } static int build_dcvs_table(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - struct rpmh_arc_vals gx_arc, cx_arc, mx_arc, gmxc_arc; + struct rpmh_arc_vals gx_arc, cx_arc, mx_arc; int ret; ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl"); @@ -449,15 +406,12 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) /* If the target supports dedicated MxC rail, read the same */ if (cmd_db_read_addr("gmxc.lvl")) { - ret = rpmh_arc_cmds(&gmxc_arc, "gmxc.lvl"); + ret = rpmh_arc_cmds(&mx_arc, "gmxc.lvl"); if (ret) return ret; - ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &gmxc_arc, &cx_arc); - } else { - ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, NULL, &cx_arc); } - return ret; + return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &cx_arc); } /* diff --git a/adreno_hfi.h b/adreno_hfi.h index b7f1c26cb3..f48529b73d 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -541,8 +541,8 @@ struct hfi_bwtable_cmd { struct opp_gx_desc { u32 vote; - /* This is 'acdLvl' in gmu fw which is now repurposed for various dependency votes */ - u32 dep_vote; + /* This is 'acdLvl' in gmu fw which is now repurposed for cx vote */ + u32 cx_vote; u32 freq; } __packed; From b4d88f72dc15082dd120b140e4faab0f6d9708bb Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 21 May 2024 16:24:53 +0530 Subject: [PATCH 0859/1016] kgsl: gmu: Make RPMH utilities generation agnostic RPMH utility functions and structures are independent of GPU generation. So, relocate them to generation agnostic code. Change-Id: Id402ca6bef7813258600625e98b5d9038a58aacc Signed-off-by: Kamal Agrawal --- Kbuild | 1 + adreno_a6xx_rpmh.c | 337 +++----------------------------------------- adreno_gen7_rpmh.c | 335 ++----------------------------------------- adreno_gen8_rpmh.c | 314 ++--------------------------------------- adreno_rpmh.c | 266 ++++++++++++++++++++++++++++++++++ adreno_rpmh.h | 121 ++++++++++++++++ build/kgsl_defs.bzl | 1 + 7 files changed, 435 insertions(+), 940 deletions(-) create mode 100644 adreno_rpmh.c create mode 100644 adreno_rpmh.h diff --git a/Kbuild b/Kbuild index 091d00e061..a327cd6dcc 100644 --- a/Kbuild +++ b/Kbuild @@ -148,6 +148,7 @@ msm_kgsl-y += \ adreno_gen8_ringbuffer.o \ adreno_gen8_rpmh.o \ adreno_gen8_snapshot.o \ + adreno_rpmh.o \ adreno_hwsched.o \ adreno_hwsched_snapshot.o \ adreno_ioctl.o \ diff --git a/adreno_a6xx_rpmh.c b/adreno_a6xx_rpmh.c index 0d451ae3ca..5703967c8a 100644 --- a/adreno_a6xx_rpmh.c +++ b/adreno_a6xx_rpmh.c @@ -11,248 +11,10 @@ #include "adreno.h" #include "adreno_a6xx.h" #include "adreno_hfi.h" +#include "adreno_rpmh.h" #include "kgsl_bus.h" #include "kgsl_device.h" -struct rpmh_arc_vals { - u32 num; - const u16 *val; -}; - -struct bcm { - const char *name; - u32 buswidth; - u32 channels; - u32 unit; - u16 width; - u8 vcd; - bool fixed; -}; - -struct bcm_data { - __le32 unit; - __le16 width; - u8 vcd; - u8 reserved; -}; - -struct rpmh_bw_votes { - u32 wait_bitmask; - u32 num_cmds; - u32 *addrs; - u32 num_levels; - u32 **cmds; -}; - -#define ARC_VOTE_SET(pri, sec, vlvl) \ - ((((vlvl) & 0xFFFF) << 16) | (((sec) & 0xFF) << 8) | ((pri) & 0xFF)) - -static int rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id) -{ - size_t len = 0; - - arc->val = cmd_db_read_aux_data(res_id, &len); - - /* - * cmd_db_read_aux_data() gives us a zero-padded table of - * size len that contains the arc values. To determine the - * number of arc values, we loop through the table and count - * them until we get to the end of the buffer or hit the - * zero padding. - */ - for (arc->num = 1; arc->num < (len >> 1); arc->num++) { - if (arc->val[arc->num - 1] != 0 && arc->val[arc->num] == 0) - break; - } - - return 0; -} - -static int setup_volt_dependency_tbl(uint32_t *votes, - struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, - u16 *vlvl, unsigned int num_entries) -{ - int i, j, k; - uint16_t cur_vlvl; - bool found_match; - - /* i tracks current KGSL GPU frequency table entry - * j tracks secondary rail voltage table entry - * k tracks primary rail voltage table entry - */ - for (i = 0; i < num_entries; i++) { - found_match = false; - - /* Look for a primary rail voltage that matches a VLVL level */ - for (k = 0; k < pri_rail->num; k++) { - if (pri_rail->val[k] >= vlvl[i]) { - cur_vlvl = pri_rail->val[k]; - found_match = true; - break; - } - } - - /* If we did not find a matching VLVL level then abort */ - if (!found_match) - return -EINVAL; - - /* - * Look for a secondary rail index whose VLVL value - * is greater than or equal to the VLVL value of the - * corresponding index of the primary rail - */ - for (j = 0; j < sec_rail->num; j++) { - if (sec_rail->val[j] >= cur_vlvl || - j + 1 == sec_rail->num) - break; - } - - if (j == sec_rail->num) - j = 0; - - votes[i] = ARC_VOTE_SET(k, j, cur_vlvl); - } - - return 0; -} - -/* Generate a set of bandwidth votes for the list of BCMs */ -static void tcs_cmd_data(struct bcm *bcms, int count, u32 ab, u32 ib, - u32 *data) -{ - int i; - - for (i = 0; i < count; i++) { - bool valid = true; - bool commit = false; - u64 avg, peak, x, y; - - if (i == count - 1 || bcms[i].vcd != bcms[i + 1].vcd) - commit = true; - - /* - * On a660, the "ACV" y vote should be 0x08 if there is a valid - * vote and 0x00 if not. This is kind of hacky and a660 specific - * but we can clean it up when we add a new target - */ - if (bcms[i].fixed) { - if (!ab && !ib) - data[i] = BCM_TCS_CMD(commit, false, 0x0, 0x0); - else - data[i] = BCM_TCS_CMD(commit, true, 0x0, 0x8); - continue; - } - - /* Multiple the bandwidth by the width of the connection */ - avg = ((u64) ab) * bcms[i].width; - - /* And then divide by the total width across channels */ - do_div(avg, bcms[i].buswidth * bcms[i].channels); - - peak = ((u64) ib) * bcms[i].width; - do_div(peak, bcms[i].buswidth); - - /* Input bandwidth value is in KBps */ - x = avg * 1000ULL; - do_div(x, bcms[i].unit); - - /* Input bandwidth value is in KBps */ - y = peak * 1000ULL; - do_div(y, bcms[i].unit); - - /* - * If a bandwidth value was specified but the calculation ends - * rounding down to zero, set a minimum level - */ - if (ab && x == 0) - x = 1; - - if (ib && y == 0) - y = 1; - - x = min_t(u64, x, BCM_TCS_CMD_VOTE_MASK); - y = min_t(u64, y, BCM_TCS_CMD_VOTE_MASK); - - if (!x && !y) - valid = false; - - data[i] = BCM_TCS_CMD(commit, valid, x, y); - } -} - -static void free_rpmh_bw_votes(struct rpmh_bw_votes *votes) -{ - int i; - - if (!votes) - return; - - for (i = 0; votes->cmds && i < votes->num_levels; i++) - kfree(votes->cmds[i]); - - kfree(votes->cmds); - kfree(votes->addrs); - kfree(votes); -} - -/* Build the votes table from the specified bandwidth levels */ -static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms, - int bcm_count, u32 *levels, int levels_count) -{ - struct rpmh_bw_votes *votes; - int i; - - votes = kzalloc(sizeof(*votes), GFP_KERNEL); - if (!votes) - return ERR_PTR(-ENOMEM); - - votes->addrs = kcalloc(bcm_count, sizeof(*votes->cmds), GFP_KERNEL); - if (!votes->addrs) { - free_rpmh_bw_votes(votes); - return ERR_PTR(-ENOMEM); - } - - votes->cmds = kcalloc(levels_count, sizeof(*votes->cmds), GFP_KERNEL); - if (!votes->cmds) { - free_rpmh_bw_votes(votes); - return ERR_PTR(-ENOMEM); - } - - votes->num_cmds = bcm_count; - votes->num_levels = levels_count; - - /* Get the cmd-db information for each BCM */ - for (i = 0; i < bcm_count; i++) { - size_t l; - const struct bcm_data *data; - - data = cmd_db_read_aux_data(bcms[i].name, &l); - - votes->addrs[i] = cmd_db_read_addr(bcms[i].name); - - bcms[i].unit = le32_to_cpu(data->unit); - bcms[i].width = le16_to_cpu(data->width); - bcms[i].vcd = data->vcd; - } - - for (i = 0; i < bcm_count; i++) { - if (i == (bcm_count - 1) || bcms[i].vcd != bcms[i + 1].vcd) - votes->wait_bitmask |= (1 << i); - } - - for (i = 0; i < levels_count; i++) { - votes->cmds[i] = kcalloc(bcm_count, sizeof(u32), GFP_KERNEL); - if (!votes->cmds[i]) { - free_rpmh_bw_votes(votes); - return ERR_PTR(-ENOMEM); - } - - tcs_cmd_data(bcms, bcm_count, 0, levels[i], votes->cmds[i]); - } - - return votes; -} - /* * setup_gmu_arc_votes - Build the gmu voting table * @adreno_dev: Pointer to adreno device @@ -286,7 +48,7 @@ static int setup_cx_arc_votes(struct adreno_device *adreno_dev, table->cx_votes[1].freq = freqs[0] / 1000; table->cx_votes[2].freq = freqs[1] / 1000; - ret = setup_volt_dependency_tbl(cx_votes, pri_rail, + ret = adreno_rpmh_setup_volt_dependency_tbl(cx_votes, pri_rail, sec_rail, gmu_cx_vlvl, table->gmu_level_num); if (!ret) { for (i = 0; i < table->gmu_level_num; i++) @@ -296,29 +58,6 @@ static int setup_cx_arc_votes(struct adreno_device *adreno_dev, return ret; } -static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl) -{ - u32 i; - - /* - * This means that the Gx level doesn't have a dependency on Cx level. - * Return the same value to disable cx voting at GMU. - */ - if (vlvl == 0xffffffff) { - *hlvl = vlvl; - return 0; - } - - for (i = 0; i < cx_rail->num; i++) { - if (cx_rail->val[i] >= vlvl) { - *hlvl = i; - return 0; - } - } - - return -EINVAL; -} - /* * setup_gx_arc_votes - Build the gpu dcvs voting table * @hfi: Pointer to hfi device @@ -366,7 +105,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level; table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000; - ret = to_cx_hlvl(cx_rail, cx_vlvl, + ret = adreno_rpmh_to_cx_hlvl(cx_rail, cx_vlvl, &table->gx_votes[index].dep_vote); if (ret) { dev_err(&gmu->pdev->dev, "Unsupported cx corner: %u\n", @@ -375,7 +114,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, } } - ret = setup_volt_dependency_tbl(gx_votes, pri_rail, + ret = adreno_rpmh_setup_volt_dependency_tbl(gx_votes, pri_rail, sec_rail, vlvl_tbl, table->gpu_level_num); if (!ret) { for (i = 0; i < table->gpu_level_num; i++) @@ -397,15 +136,15 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) if (ret) return ret; - ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl"); + ret = adreno_rpmh_arc_cmds(&gx_arc, "gfx.lvl"); if (ret) return ret; - ret = rpmh_arc_cmds(&cx_arc, "cx.lvl"); + ret = adreno_rpmh_arc_cmds(&cx_arc, "cx.lvl"); if (ret) return ret; - ret = rpmh_arc_cmds(&mx_arc, "mx.lvl"); + ret = adreno_rpmh_arc_cmds(&mx_arc, "mx.lvl"); if (ret) return ret; @@ -417,50 +156,8 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &cx_arc); } -/* - * List of Bus Control Modules (BCMs) that need to be configured for the GPU - * to access DDR. For each bus level we will generate a vote each BC - */ -static struct bcm a660_ddr_bcms[] = { - { .name = "SH0", .buswidth = 16 }, - { .name = "MC0", .buswidth = 4 }, - { .name = "ACV", .fixed = true }, -}; - -/* Same as above, but for the CNOC BCMs */ -static struct bcm a660_cnoc_bcms[] = { - { .name = "CN0", .buswidth = 4 }, -}; - -static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd, - struct rpmh_bw_votes *ddr, struct rpmh_bw_votes *cnoc) -{ - u32 i, j; - - cmd->bw_level_num = ddr->num_levels; - cmd->ddr_cmds_num = ddr->num_cmds; - cmd->ddr_wait_bitmask = ddr->wait_bitmask; - - for (i = 0; i < ddr->num_cmds; i++) - cmd->ddr_cmd_addrs[i] = ddr->addrs[i]; - - for (i = 0; i < ddr->num_levels; i++) - for (j = 0; j < ddr->num_cmds; j++) - cmd->ddr_cmd_data[i][j] = (u32) ddr->cmds[i][j]; - - if (!cnoc) - return; - - cmd->cnoc_cmds_num = cnoc->num_cmds; - cmd->cnoc_wait_bitmask = cnoc->wait_bitmask; - - for (i = 0; i < cnoc->num_cmds; i++) - cmd->cnoc_cmd_addrs[i] = cnoc->addrs[i]; - - for (i = 0; i < cnoc->num_levels; i++) - for (j = 0; j < cnoc->num_cmds; j++) - cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j]; -} +/* BIT(3) is used to vote for GPU performance mode through GMU */ +#define ACV_GPU_PERFMODE_VOTE BIT(3) static int build_bw_table(struct adreno_device *adreno_dev) { @@ -472,8 +169,8 @@ static int build_bw_table(struct adreno_device *adreno_dev) u32 count; int ret; - ddr = build_rpmh_bw_votes(a660_ddr_bcms, ARRAY_SIZE(a660_ddr_bcms), - pwr->ddr_table, pwr->ddr_table_count); + ddr = adreno_rpmh_build_bw_votes(adreno_ddr_bcms, ARRAY_SIZE(adreno_ddr_bcms), + pwr->ddr_table, pwr->ddr_table_count, ACV_GPU_PERFMODE_VOTE, 0); if (IS_ERR(ddr)) return PTR_ERR(ddr); @@ -481,13 +178,13 @@ static int build_bw_table(struct adreno_device *adreno_dev) &count); if (count > 0) - cnoc = build_rpmh_bw_votes(a660_cnoc_bcms, - ARRAY_SIZE(a660_cnoc_bcms), cnoc_table, count); + cnoc = adreno_rpmh_build_bw_votes(adreno_cnoc_bcms, + ARRAY_SIZE(adreno_cnoc_bcms), cnoc_table, count, 0, 0); kfree(cnoc_table); if (IS_ERR(cnoc)) { - free_rpmh_bw_votes(ddr); + adreno_rpmh_free_bw_votes(ddr); return PTR_ERR(cnoc); } @@ -495,10 +192,10 @@ static int build_bw_table(struct adreno_device *adreno_dev) if (ret) return ret; - build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc); + adreno_rpmh_build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc); - free_rpmh_bw_votes(ddr); - free_rpmh_bw_votes(cnoc); + adreno_rpmh_free_bw_votes(ddr); + adreno_rpmh_free_bw_votes(cnoc); return 0; } diff --git a/adreno_gen7_rpmh.c b/adreno_gen7_rpmh.c index 88a173bf58..568ed9f1f2 100644 --- a/adreno_gen7_rpmh.c +++ b/adreno_gen7_rpmh.c @@ -10,249 +10,10 @@ #include "adreno.h" #include "adreno_gen7.h" +#include "adreno_rpmh.h" #include "kgsl_bus.h" #include "kgsl_device.h" -struct rpmh_arc_vals { - u32 num; - const u16 *val; -}; - -struct bcm { - const char *name; - u32 buswidth; - u32 channels; - u32 unit; - u16 width; - u8 vcd; - bool fixed; -}; - -struct bcm_data { - __le32 unit; - __le16 width; - u8 vcd; - u8 reserved; -}; - -struct rpmh_bw_votes { - u32 wait_bitmask; - u32 num_cmds; - u32 *addrs; - u32 num_levels; - u32 **cmds; -}; - -#define ARC_VOTE_SET(pri, sec, vlvl) \ - (FIELD_PREP(GENMASK(31, 16), vlvl) | \ - FIELD_PREP(GENMASK(15, 8), sec) | \ - FIELD_PREP(GENMASK(7, 0), pri)) - -static int rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id) -{ - size_t len = 0; - - arc->val = cmd_db_read_aux_data(res_id, &len); - - /* - * cmd_db_read_aux_data() gives us a zero-padded table of - * size len that contains the arc values. To determine the - * number of arc values, we loop through the table and count - * them until we get to the end of the buffer or hit the - * zero padding. - */ - for (arc->num = 1; arc->num < (len >> 1); arc->num++) { - if (arc->val[arc->num - 1] != 0 && arc->val[arc->num] == 0) - break; - } - - return 0; -} - -static int setup_volt_dependency_tbl(u32 *votes, - struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, - u16 *vlvl, unsigned int num_entries) -{ - int i, j, k; - uint16_t cur_vlvl; - bool found_match; - - /* i tracks current KGSL GPU frequency table entry - * j tracks secondary rail voltage table entry - * k tracks primary rail voltage table entry - */ - for (i = 0; i < num_entries; i++) { - found_match = false; - - /* Look for a primary rail voltage that matches a VLVL level */ - for (k = 0; k < pri_rail->num; k++) { - if (pri_rail->val[k] >= vlvl[i]) { - cur_vlvl = pri_rail->val[k]; - found_match = true; - break; - } - } - - /* If we did not find a matching VLVL level then abort */ - if (!found_match) - return -EINVAL; - - /* - * Look for a secondary rail index whose VLVL value - * is greater than or equal to the VLVL value of the - * corresponding index of the primary rail - */ - for (j = 0; j < sec_rail->num; j++) { - if (sec_rail->val[j] >= cur_vlvl || - j + 1 == sec_rail->num) - break; - } - - if (j == sec_rail->num) - j = 0; - - votes[i] = ARC_VOTE_SET(k, j, cur_vlvl); - } - - return 0; -} - -/* Generate a set of bandwidth votes for the list of BCMs */ -static void tcs_cmd_data(struct bcm *bcms, int count, - u32 ab, u32 ib, u32 *data, u32 perfmode_vote, bool set_perfmode) -{ - int i; - - for (i = 0; i < count; i++) { - bool valid = true; - bool commit = false; - u64 avg, peak, x, y; - - if (i == count - 1 || bcms[i].vcd != bcms[i + 1].vcd) - commit = true; - - if (bcms[i].fixed) { - if (!ab && !ib) - data[i] = BCM_TCS_CMD(commit, false, 0x0, 0x0); - else - data[i] = BCM_TCS_CMD(commit, true, 0x0, - set_perfmode ? perfmode_vote : 0x0); - continue; - } - - /* Multiple the bandwidth by the width of the connection */ - avg = ((u64) ab) * bcms[i].width; - - /* And then divide by the total width */ - do_div(avg, bcms[i].buswidth); - - peak = ((u64) ib) * bcms[i].width; - do_div(peak, bcms[i].buswidth); - - /* Input bandwidth value is in KBps */ - x = avg * 1000ULL; - do_div(x, bcms[i].unit); - - /* Input bandwidth value is in KBps */ - y = peak * 1000ULL; - do_div(y, bcms[i].unit); - - /* - * If a bandwidth value was specified but the calculation ends - * rounding down to zero, set a minimum level - */ - if (ab && x == 0) - x = 1; - - if (ib && y == 0) - y = 1; - - x = min_t(u64, x, BCM_TCS_CMD_VOTE_MASK); - y = min_t(u64, y, BCM_TCS_CMD_VOTE_MASK); - - if (!x && !y) - valid = false; - - data[i] = BCM_TCS_CMD(commit, valid, x, y); - } -} - -static void free_rpmh_bw_votes(struct rpmh_bw_votes *votes) -{ - int i; - - if (!votes) - return; - - for (i = 0; votes->cmds && i < votes->num_levels; i++) - kfree(votes->cmds[i]); - - kfree(votes->cmds); - kfree(votes->addrs); - kfree(votes); -} - -/* Build the votes table from the specified bandwidth levels */ -static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms, - int bcm_count, u32 *levels, int levels_count, u32 perfmode_vote, u32 perfmode_lvl) -{ - struct rpmh_bw_votes *votes; - bool set_perfmode; - int i; - - votes = kzalloc(sizeof(*votes), GFP_KERNEL); - if (!votes) - return ERR_PTR(-ENOMEM); - - votes->addrs = kcalloc(bcm_count, sizeof(*votes->cmds), GFP_KERNEL); - if (!votes->addrs) { - free_rpmh_bw_votes(votes); - return ERR_PTR(-ENOMEM); - } - - votes->cmds = kcalloc(levels_count, sizeof(*votes->cmds), GFP_KERNEL); - if (!votes->cmds) { - free_rpmh_bw_votes(votes); - return ERR_PTR(-ENOMEM); - } - - votes->num_cmds = bcm_count; - votes->num_levels = levels_count; - - /* Get the cmd-db information for each BCM */ - for (i = 0; i < bcm_count; i++) { - size_t l; - const struct bcm_data *data; - - data = cmd_db_read_aux_data(bcms[i].name, &l); - - votes->addrs[i] = cmd_db_read_addr(bcms[i].name); - - bcms[i].unit = le32_to_cpu(data->unit); - bcms[i].width = le16_to_cpu(data->width); - bcms[i].vcd = data->vcd; - } - - for (i = 0; i < bcm_count; i++) { - if (i == (bcm_count - 1) || bcms[i].vcd != bcms[i + 1].vcd) - votes->wait_bitmask |= (1 << i); - } - - for (i = 0; i < levels_count; i++) { - votes->cmds[i] = kcalloc(bcm_count, sizeof(u32), GFP_KERNEL); - if (!votes->cmds[i]) { - free_rpmh_bw_votes(votes); - return ERR_PTR(-ENOMEM); - } - - set_perfmode = (i >= perfmode_lvl) ? true : false; - tcs_cmd_data(bcms, bcm_count, levels[i], levels[i], votes->cmds[i], - perfmode_vote, set_perfmode); - } - - return votes; -} - /* * setup_gmu_arc_votes - Build the gmu voting table * @gmu: Pointer to gmu device @@ -283,7 +44,7 @@ static int setup_cx_arc_votes(struct gen7_gmu_device *gmu, table->cx_votes[1].freq = freqs[0] / 1000; table->cx_votes[2].freq = freqs[1] / 1000; - ret = setup_volt_dependency_tbl(cx_votes, pri_rail, + ret = adreno_rpmh_setup_volt_dependency_tbl(cx_votes, pri_rail, sec_rail, gmu_cx_vlvl, table->gmu_level_num); if (!ret) { for (i = 0; i < table->gmu_level_num; i++) @@ -293,29 +54,6 @@ static int setup_cx_arc_votes(struct gen7_gmu_device *gmu, return ret; } -static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl) -{ - u32 i; - - /* - * This means that the Gx level doesn't have a dependency on Cx level. - * Return the same value to disable cx voting at GMU. - */ - if (vlvl == 0xffffffff) { - *hlvl = vlvl; - return 0; - } - - for (i = 0; i < cx_rail->num; i++) { - if (cx_rail->val[i] >= vlvl) { - *hlvl = i; - return 0; - } - } - - return -EINVAL; -} - /* * setup_gx_arc_votes - Build the gpu dcvs voting table * @hfi: Pointer to hfi device @@ -362,7 +100,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level; table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000; - ret = to_cx_hlvl(cx_rail, cx_vlvl, + ret = adreno_rpmh_to_cx_hlvl(cx_rail, cx_vlvl, &table->gx_votes[index].dep_vote); if (ret) { dev_err(device->dev, "Unsupported cx corner: %u\n", @@ -371,7 +109,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, } } - ret = setup_volt_dependency_tbl(gx_votes, pri_rail, + ret = adreno_rpmh_setup_volt_dependency_tbl(gx_votes, pri_rail, sec_rail, vlvl_tbl, table->gpu_level_num); if (!ret) { for (i = 0; i < table->gpu_level_num; i++) @@ -388,15 +126,15 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) struct rpmh_arc_vals gx_arc, cx_arc, mx_arc; int ret; - ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl"); + ret = adreno_rpmh_arc_cmds(&gx_arc, "gfx.lvl"); if (ret) return ret; - ret = rpmh_arc_cmds(&cx_arc, "cx.lvl"); + ret = adreno_rpmh_arc_cmds(&cx_arc, "cx.lvl"); if (ret) return ret; - ret = rpmh_arc_cmds(&mx_arc, "mx.lvl"); + ret = adreno_rpmh_arc_cmds(&mx_arc, "mx.lvl"); if (ret) return ret; @@ -407,51 +145,6 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &cx_arc); } -/* - * List of Bus Control Modules (BCMs) that need to be configured for the GPU - * to access DDR. For each bus level we will generate a vote each BC - */ -static struct bcm gen7_ddr_bcms[] = { - { .name = "SH0", .buswidth = 16 }, - { .name = "MC0", .buswidth = 4 }, - { .name = "ACV", .fixed = true }, -}; - -/* Same as above, but for the CNOC BCMs */ -static struct bcm gen7_cnoc_bcms[] = { - { .name = "CN0", .buswidth = 4 }, -}; - -static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd, - struct rpmh_bw_votes *ddr, struct rpmh_bw_votes *cnoc) -{ - u32 i, j; - - cmd->bw_level_num = ddr->num_levels; - cmd->ddr_cmds_num = ddr->num_cmds; - cmd->ddr_wait_bitmask = ddr->wait_bitmask; - - for (i = 0; i < ddr->num_cmds; i++) - cmd->ddr_cmd_addrs[i] = ddr->addrs[i]; - - for (i = 0; i < ddr->num_levels; i++) - for (j = 0; j < ddr->num_cmds; j++) - cmd->ddr_cmd_data[i][j] = (u32) ddr->cmds[i][j]; - - if (!cnoc) - return; - - cmd->cnoc_cmds_num = cnoc->num_cmds; - cmd->cnoc_wait_bitmask = cnoc->wait_bitmask; - - for (i = 0; i < cnoc->num_cmds; i++) - cmd->cnoc_cmd_addrs[i] = cnoc->addrs[i]; - - for (i = 0; i < cnoc->num_levels; i++) - for (j = 0; j < cnoc->num_cmds; j++) - cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j]; -} - static int build_bw_table(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); @@ -470,7 +163,7 @@ static int build_bw_table(struct adreno_device *adreno_dev) if (!perfmode_vote) perfmode_vote = BIT(3); - ddr = build_rpmh_bw_votes(gen7_ddr_bcms, ARRAY_SIZE(gen7_ddr_bcms), + ddr = adreno_rpmh_build_bw_votes(adreno_ddr_bcms, ARRAY_SIZE(adreno_ddr_bcms), pwr->ddr_table, pwr->ddr_table_count, perfmode_vote, perfmode_lvl); if (IS_ERR(ddr)) return PTR_ERR(ddr); @@ -479,13 +172,13 @@ static int build_bw_table(struct adreno_device *adreno_dev) &count); if (count > 0) - cnoc = build_rpmh_bw_votes(gen7_cnoc_bcms, - ARRAY_SIZE(gen7_cnoc_bcms), cnoc_table, count, 0, 0); + cnoc = adreno_rpmh_build_bw_votes(adreno_cnoc_bcms, + ARRAY_SIZE(adreno_cnoc_bcms), cnoc_table, count, 0, 0); kfree(cnoc_table); if (IS_ERR(cnoc)) { - free_rpmh_bw_votes(ddr); + adreno_rpmh_free_bw_votes(ddr); return PTR_ERR(cnoc); } @@ -493,10 +186,10 @@ static int build_bw_table(struct adreno_device *adreno_dev) if (ret) return ret; - build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc); + adreno_rpmh_build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc); - free_rpmh_bw_votes(ddr); - free_rpmh_bw_votes(cnoc); + adreno_rpmh_free_bw_votes(ddr); + adreno_rpmh_free_bw_votes(cnoc); return 0; } diff --git a/adreno_gen8_rpmh.c b/adreno_gen8_rpmh.c index 525626740c..6392799aa2 100644 --- a/adreno_gen8_rpmh.c +++ b/adreno_gen8_rpmh.c @@ -10,249 +10,10 @@ #include "adreno.h" #include "adreno_gen8.h" +#include "adreno_rpmh.h" #include "kgsl_bus.h" #include "kgsl_device.h" -struct rpmh_arc_vals { - u32 num; - const u16 *val; -}; - -struct bcm { - const char *name; - u32 buswidth; - u32 channels; - u32 unit; - u16 width; - u8 vcd; - bool fixed; -}; - -struct bcm_data { - __le32 unit; - __le16 width; - u8 vcd; - u8 reserved; -}; - -struct rpmh_bw_votes { - u32 wait_bitmask; - u32 num_cmds; - u32 *addrs; - u32 num_levels; - u32 **cmds; -}; - -#define ARC_VOTE_SET(pri, sec, vlvl) \ - (FIELD_PREP(GENMASK(31, 16), vlvl) | \ - FIELD_PREP(GENMASK(15, 8), sec) | \ - FIELD_PREP(GENMASK(7, 0), pri)) - -static int rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id) -{ - size_t len = 0; - - arc->val = cmd_db_read_aux_data(res_id, &len); - - /* - * cmd_db_read_aux_data() gives us a zero-padded table of - * size len that contains the arc values. To determine the - * number of arc values, we loop through the table and count - * them until we get to the end of the buffer or hit the - * zero padding. - */ - for (arc->num = 1; arc->num < (len >> 1); arc->num++) { - if (arc->val[arc->num - 1] != 0 && arc->val[arc->num] == 0) - break; - } - - return 0; -} - -static int setup_volt_dependency_tbl(u32 *votes, - struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, - u16 *vlvl, u32 num_entries) -{ - int i, j, k; - uint16_t cur_vlvl; - bool found_match; - - /* i tracks current KGSL GPU frequency table entry - * j tracks secondary rail voltage table entry - * k tracks primary rail voltage table entry - */ - for (i = 0; i < num_entries; i++) { - found_match = false; - - /* Look for a primary rail voltage that matches a VLVL level */ - for (k = 0; k < pri_rail->num; k++) { - if (pri_rail->val[k] >= vlvl[i]) { - cur_vlvl = pri_rail->val[k]; - found_match = true; - break; - } - } - - /* If we did not find a matching VLVL level then abort */ - if (!found_match) - return -EINVAL; - - /* - * Look for a secondary rail index whose VLVL value - * is greater than or equal to the VLVL value of the - * corresponding index of the primary rail - */ - for (j = 0; j < sec_rail->num; j++) { - if (sec_rail->val[j] >= cur_vlvl || - j + 1 == sec_rail->num) - break; - } - - if (j == sec_rail->num) - j = 0; - - votes[i] = ARC_VOTE_SET(k, j, cur_vlvl); - } - - return 0; -} - -/* Generate a set of bandwidth votes for the list of BCMs */ -static void tcs_cmd_data(struct bcm *bcms, int count, - u32 ab, u32 ib, u32 *data, u32 perfmode_vote, bool set_perfmode) -{ - int i; - - for (i = 0; i < count; i++) { - bool valid = true; - bool commit = false; - u64 avg, peak, x, y; - - if (i == count - 1 || bcms[i].vcd != bcms[i + 1].vcd) - commit = true; - - if (bcms[i].fixed) { - if (!ab && !ib) - data[i] = BCM_TCS_CMD(commit, false, 0x0, 0x0); - else - data[i] = BCM_TCS_CMD(commit, true, 0x0, - set_perfmode ? perfmode_vote : 0x0); - continue; - } - - /* Multiple the bandwidth by the width of the connection */ - avg = ((u64) ab) * bcms[i].width; - - /* And then divide by the total width */ - do_div(avg, bcms[i].buswidth); - - peak = ((u64) ib) * bcms[i].width; - do_div(peak, bcms[i].buswidth); - - /* Input bandwidth value is in KBps */ - x = avg * 1000ULL; - do_div(x, bcms[i].unit); - - /* Input bandwidth value is in KBps */ - y = peak * 1000ULL; - do_div(y, bcms[i].unit); - - /* - * If a bandwidth value was specified but the calculation ends - * rounding down to zero, set a minimum level - */ - if (ab && x == 0) - x = 1; - - if (ib && y == 0) - y = 1; - - x = min_t(u64, x, BCM_TCS_CMD_VOTE_MASK); - y = min_t(u64, y, BCM_TCS_CMD_VOTE_MASK); - - if (!x && !y) - valid = false; - - data[i] = BCM_TCS_CMD(commit, valid, x, y); - } -} - -static void free_rpmh_bw_votes(struct rpmh_bw_votes *votes) -{ - int i; - - if (!votes) - return; - - for (i = 0; votes->cmds && i < votes->num_levels; i++) - kfree(votes->cmds[i]); - - kfree(votes->cmds); - kfree(votes->addrs); - kfree(votes); -} - -/* Build the votes table from the specified bandwidth levels */ -static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms, - int bcm_count, u32 *levels, int levels_count, u32 perfmode_vote, u32 perfmode_lvl) -{ - struct rpmh_bw_votes *votes; - bool set_perfmode; - int i; - - votes = kzalloc(sizeof(*votes), GFP_KERNEL); - if (!votes) - return ERR_PTR(-ENOMEM); - - votes->addrs = kcalloc(bcm_count, sizeof(*votes->cmds), GFP_KERNEL); - if (!votes->addrs) { - free_rpmh_bw_votes(votes); - return ERR_PTR(-ENOMEM); - } - - votes->cmds = kcalloc(levels_count, sizeof(*votes->cmds), GFP_KERNEL); - if (!votes->cmds) { - free_rpmh_bw_votes(votes); - return ERR_PTR(-ENOMEM); - } - - votes->num_cmds = bcm_count; - votes->num_levels = levels_count; - - /* Get the cmd-db information for each BCM */ - for (i = 0; i < bcm_count; i++) { - size_t l; - const struct bcm_data *data; - - data = cmd_db_read_aux_data(bcms[i].name, &l); - - votes->addrs[i] = cmd_db_read_addr(bcms[i].name); - - bcms[i].unit = le32_to_cpu(data->unit); - bcms[i].width = le16_to_cpu(data->width); - bcms[i].vcd = data->vcd; - } - - for (i = 0; i < bcm_count; i++) { - if (i == (bcm_count - 1) || bcms[i].vcd != bcms[i + 1].vcd) - votes->wait_bitmask |= (1 << i); - } - - for (i = 0; i < levels_count; i++) { - votes->cmds[i] = kcalloc(bcm_count, sizeof(u32), GFP_KERNEL); - if (!votes->cmds[i]) { - free_rpmh_bw_votes(votes); - return ERR_PTR(-ENOMEM); - } - - set_perfmode = (i >= perfmode_lvl) ? true : false; - tcs_cmd_data(bcms, bcm_count, levels[i], levels[i], votes->cmds[i], - perfmode_vote, set_perfmode); - } - - return votes; -} - /* * setup_gmu_arc_votes - Build the gmu voting table * @gmu: Pointer to gmu device @@ -283,7 +44,7 @@ static int setup_cx_arc_votes(struct gen8_gmu_device *gmu, table->cx_votes[1].freq = freqs[0] / 1000; table->cx_votes[2].freq = freqs[1] / 1000; - ret = setup_volt_dependency_tbl(cx_votes, pri_rail, + ret = adreno_rpmh_setup_volt_dependency_tbl(cx_votes, pri_rail, sec_rail, gmu_cx_vlvl, table->gmu_level_num); if (!ret) { for (i = 0; i < table->gmu_level_num; i++) @@ -397,10 +158,10 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, /* If the target does not have a dedicated Mx rail, use secondary rail */ if (gmxc_rail == NULL) - ret = setup_volt_dependency_tbl(gx_votes, pri_rail, sec_rail, + ret = adreno_rpmh_setup_volt_dependency_tbl(gx_votes, pri_rail, sec_rail, vlvl_tbl, table->gpu_level_num); else - ret = setup_volt_dependency_tbl(gx_votes, pri_rail, gmxc_rail, + ret = adreno_rpmh_setup_volt_dependency_tbl(gx_votes, pri_rail, gmxc_rail, vlvl_tbl, table->gpu_level_num); if (ret) return ret; @@ -431,15 +192,15 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) struct rpmh_arc_vals gx_arc, cx_arc, mx_arc, gmxc_arc; int ret; - ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl"); + ret = adreno_rpmh_arc_cmds(&gx_arc, "gfx.lvl"); if (ret) return ret; - ret = rpmh_arc_cmds(&cx_arc, "cx.lvl"); + ret = adreno_rpmh_arc_cmds(&cx_arc, "cx.lvl"); if (ret) return ret; - ret = rpmh_arc_cmds(&mx_arc, "mx.lvl"); + ret = adreno_rpmh_arc_cmds(&mx_arc, "mx.lvl"); if (ret) return ret; @@ -449,7 +210,7 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) /* If the target supports dedicated MxC rail, read the same */ if (cmd_db_read_addr("gmxc.lvl")) { - ret = rpmh_arc_cmds(&gmxc_arc, "gmxc.lvl"); + ret = adreno_rpmh_arc_cmds(&gmxc_arc, "gmxc.lvl"); if (ret) return ret; ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &gmxc_arc, &cx_arc); @@ -460,51 +221,6 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) return ret; } -/* - * List of Bus Control Modules (BCMs) that need to be configured for the GPU - * to access DDR. For each bus level we will generate a vote each BC - */ -static struct bcm gen8_ddr_bcms[] = { - { .name = "SH0", .buswidth = 16 }, - { .name = "MC0", .buswidth = 4 }, - { .name = "ACV", .fixed = true }, -}; - -/* Same as above, but for the CNOC BCMs */ -static struct bcm gen8_cnoc_bcms[] = { - { .name = "CN0", .buswidth = 4 }, -}; - -static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd, - struct rpmh_bw_votes *ddr, struct rpmh_bw_votes *cnoc) -{ - u32 i, j; - - cmd->bw_level_num = ddr->num_levels; - cmd->ddr_cmds_num = ddr->num_cmds; - cmd->ddr_wait_bitmask = ddr->wait_bitmask; - - for (i = 0; i < ddr->num_cmds; i++) - cmd->ddr_cmd_addrs[i] = ddr->addrs[i]; - - for (i = 0; i < ddr->num_levels; i++) - for (j = 0; j < ddr->num_cmds; j++) - cmd->ddr_cmd_data[i][j] = (u32) ddr->cmds[i][j]; - - if (!cnoc) - return; - - cmd->cnoc_cmds_num = cnoc->num_cmds; - cmd->cnoc_wait_bitmask = cnoc->wait_bitmask; - - for (i = 0; i < cnoc->num_cmds; i++) - cmd->cnoc_cmd_addrs[i] = cnoc->addrs[i]; - - for (i = 0; i < cnoc->num_levels; i++) - for (j = 0; j < cnoc->num_cmds; j++) - cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j]; -} - /* BIT(2) is used to vote for GPU performance mode through GMU */ #define ACV_GPU_PERFMODE_VOTE BIT(2) @@ -521,7 +237,7 @@ static int build_bw_table(struct adreno_device *adreno_dev) u32 count; int ret; - ddr = build_rpmh_bw_votes(gen8_ddr_bcms, ARRAY_SIZE(gen8_ddr_bcms), + ddr = adreno_rpmh_build_bw_votes(adreno_ddr_bcms, ARRAY_SIZE(adreno_ddr_bcms), pwr->ddr_table, pwr->ddr_table_count, ACV_GPU_PERFMODE_VOTE, perfmode_lvl); if (IS_ERR(ddr)) return PTR_ERR(ddr); @@ -530,13 +246,13 @@ static int build_bw_table(struct adreno_device *adreno_dev) &count); if (count > 0) - cnoc = build_rpmh_bw_votes(gen8_cnoc_bcms, - ARRAY_SIZE(gen8_cnoc_bcms), cnoc_table, count, 0, 0); + cnoc = adreno_rpmh_build_bw_votes(adreno_cnoc_bcms, + ARRAY_SIZE(adreno_cnoc_bcms), cnoc_table, count, 0, 0); kfree(cnoc_table); if (IS_ERR(cnoc)) { - free_rpmh_bw_votes(ddr); + adreno_rpmh_free_bw_votes(ddr); return PTR_ERR(cnoc); } @@ -544,10 +260,10 @@ static int build_bw_table(struct adreno_device *adreno_dev) if (ret) return ret; - build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc); + adreno_rpmh_build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc); - free_rpmh_bw_votes(ddr); - free_rpmh_bw_votes(cnoc); + adreno_rpmh_free_bw_votes(ddr); + adreno_rpmh_free_bw_votes(cnoc); return 0; } diff --git a/adreno_rpmh.c b/adreno_rpmh.c new file mode 100644 index 0000000000..3c1bcfc002 --- /dev/null +++ b/adreno_rpmh.c @@ -0,0 +1,266 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include +#include +#include + +#include "adreno.h" +#include "adreno_rpmh.h" + +int adreno_rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id) +{ + size_t len = 0; + + arc->val = cmd_db_read_aux_data(res_id, &len); + + /* + * cmd_db_read_aux_data() gives us a zero-padded table of + * size len that contains the arc values. To determine the + * number of arc values, we loop through the table and count + * them until we get to the end of the buffer or hit the + * zero padding. + */ + for (arc->num = 1; arc->num < (len >> 1); arc->num++) { + if (arc->val[arc->num - 1] != 0 && arc->val[arc->num] == 0) + break; + } + + return 0; +} + +int adreno_rpmh_setup_volt_dependency_tbl(u32 *votes, struct rpmh_arc_vals *pri_rail, + struct rpmh_arc_vals *sec_rail, u16 *vlvl, u32 num_entries) +{ + int i, j, k; + uint16_t cur_vlvl; + bool found_match; + + /* + * i tracks current KGSL GPU frequency table entry + * j tracks secondary rail voltage table entry + * k tracks primary rail voltage table entry + */ + for (i = 0; i < num_entries; i++) { + found_match = false; + + /* Look for a primary rail voltage that matches a VLVL level */ + for (k = 0; k < pri_rail->num; k++) { + if (pri_rail->val[k] >= vlvl[i]) { + cur_vlvl = pri_rail->val[k]; + found_match = true; + break; + } + } + + /* If we did not find a matching VLVL level then abort */ + if (!found_match) + return -EINVAL; + + /* + * Look for a secondary rail index whose VLVL value + * is greater than or equal to the VLVL value of the + * corresponding index of the primary rail + */ + for (j = 0; j < sec_rail->num; j++) { + if (sec_rail->val[j] >= cur_vlvl || + j + 1 == sec_rail->num) + break; + } + + if (j == sec_rail->num) + j = 0; + + votes[i] = ARC_VOTE_SET(k, j, cur_vlvl); + } + + return 0; +} + +/* Generate a set of bandwidth votes for the list of BCMs */ +static void tcs_cmd_data(struct bcm *bcms, int count, u32 ab, u32 ib, u32 *data, + u32 perfmode_vote) +{ + int i; + + for (i = 0; i < count; i++) { + bool valid = true; + bool commit = false; + u64 avg, peak, x, y; + + if (i == count - 1 || bcms[i].vcd != bcms[i + 1].vcd) + commit = true; + + if (bcms[i].fixed) { + if (!ab && !ib) + data[i] = BCM_TCS_CMD(commit, false, 0x0, 0x0); + else + data[i] = BCM_TCS_CMD(commit, true, 0x0, perfmode_vote); + continue; + } + + /* Multiple the bandwidth by the width of the connection */ + avg = ((u64) ab) * bcms[i].width; + + /* And then divide by the total width */ + do_div(avg, bcms[i].buswidth); + + peak = ((u64) ib) * bcms[i].width; + do_div(peak, bcms[i].buswidth); + + /* Input bandwidth value is in KBps */ + x = avg * 1000ULL; + do_div(x, bcms[i].unit); + + /* Input bandwidth value is in KBps */ + y = peak * 1000ULL; + do_div(y, bcms[i].unit); + + /* + * If a bandwidth value was specified but the calculation ends + * rounding down to zero, set a minimum level + */ + if (ab && x == 0) + x = 1; + + if (ib && y == 0) + y = 1; + + x = min_t(u64, x, BCM_TCS_CMD_VOTE_MASK); + y = min_t(u64, y, BCM_TCS_CMD_VOTE_MASK); + + if (!x && !y) + valid = false; + + data[i] = BCM_TCS_CMD(commit, valid, x, y); + } +} + +void adreno_rpmh_free_bw_votes(struct rpmh_bw_votes *votes) +{ + int i; + + if (!votes) + return; + + for (i = 0; votes->cmds && i < votes->num_levels; i++) + kfree(votes->cmds[i]); + + kfree(votes->cmds); + kfree(votes->addrs); + kfree(votes); +} + +struct rpmh_bw_votes *adreno_rpmh_build_bw_votes(struct bcm *bcms, int bcm_count, + u32 *levels, int levels_count, u32 perfmode_vote, u32 perfmode_lvl) +{ + struct rpmh_bw_votes *votes; + int i; + + votes = kzalloc(sizeof(*votes), GFP_KERNEL); + if (!votes) + return ERR_PTR(-ENOMEM); + + votes->addrs = kcalloc(bcm_count, sizeof(*votes->cmds), GFP_KERNEL); + if (!votes->addrs) { + adreno_rpmh_free_bw_votes(votes); + return ERR_PTR(-ENOMEM); + } + + votes->cmds = kcalloc(levels_count, sizeof(*votes->cmds), GFP_KERNEL); + if (!votes->cmds) { + adreno_rpmh_free_bw_votes(votes); + return ERR_PTR(-ENOMEM); + } + + votes->num_cmds = bcm_count; + votes->num_levels = levels_count; + + /* Get the cmd-db information for each BCM */ + for (i = 0; i < bcm_count; i++) { + size_t l; + const struct bcm_data *data; + + data = cmd_db_read_aux_data(bcms[i].name, &l); + + votes->addrs[i] = cmd_db_read_addr(bcms[i].name); + + bcms[i].unit = le32_to_cpu(data->unit); + bcms[i].width = le16_to_cpu(data->width); + bcms[i].vcd = data->vcd; + } + + for (i = 0; i < bcm_count; i++) { + if (i == (bcm_count - 1) || bcms[i].vcd != bcms[i + 1].vcd) + votes->wait_bitmask |= (1 << i); + } + + for (i = 0; i < levels_count; i++) { + votes->cmds[i] = kcalloc(bcm_count, sizeof(u32), GFP_KERNEL); + if (!votes->cmds[i]) { + adreno_rpmh_free_bw_votes(votes); + return ERR_PTR(-ENOMEM); + } + + tcs_cmd_data(bcms, bcm_count, levels[i], levels[i], votes->cmds[i], + (i >= perfmode_lvl) ? perfmode_vote : 0x0); + } + + return votes; +} + +int adreno_rpmh_to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl) +{ + u32 i; + + /* + * This means that the Gx level doesn't have a dependency on Cx level. + * Return the same value to disable cx voting at GMU. + */ + if (vlvl == 0xffffffff) { + *hlvl = vlvl; + return 0; + } + + for (i = 0; i < cx_rail->num; i++) { + if (cx_rail->val[i] >= vlvl) { + *hlvl = i; + return 0; + } + } + + return -EINVAL; +} + +void adreno_rpmh_build_bw_table_cmd(struct hfi_bwtable_cmd *cmd, struct rpmh_bw_votes *ddr, + struct rpmh_bw_votes *cnoc) +{ + u32 i, j; + + cmd->bw_level_num = ddr->num_levels; + cmd->ddr_cmds_num = ddr->num_cmds; + cmd->ddr_wait_bitmask = ddr->wait_bitmask; + + for (i = 0; i < ddr->num_cmds; i++) + cmd->ddr_cmd_addrs[i] = ddr->addrs[i]; + + for (i = 0; i < ddr->num_levels; i++) + for (j = 0; j < ddr->num_cmds; j++) + cmd->ddr_cmd_data[i][j] = (u32) ddr->cmds[i][j]; + + if (!cnoc) + return; + + cmd->cnoc_cmds_num = cnoc->num_cmds; + cmd->cnoc_wait_bitmask = cnoc->wait_bitmask; + + for (i = 0; i < cnoc->num_cmds; i++) + cmd->cnoc_cmd_addrs[i] = cnoc->addrs[i]; + + for (i = 0; i < cnoc->num_levels; i++) + for (j = 0; j < cnoc->num_cmds; j++) + cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j]; +} diff --git a/adreno_rpmh.h b/adreno_rpmh.h new file mode 100644 index 0000000000..7bdc13b75a --- /dev/null +++ b/adreno_rpmh.h @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _ADRENO_GMU_RPMH_H +#define _ADRENO_GMU_RPMH_H + +struct rpmh_arc_vals { + u32 num; + const u16 *val; +}; + +struct bcm { + const char *name; + u32 buswidth; + u32 channels; + u32 unit; + u16 width; + u8 vcd; + bool fixed; +}; + +struct bcm_data { + __le32 unit; + __le16 width; + u8 vcd; + u8 reserved; +}; + +struct rpmh_bw_votes { + u32 wait_bitmask; + u32 num_cmds; + u32 *addrs; + u32 num_levels; + u32 **cmds; +}; + +#define ARC_VOTE_SET(pri, sec, vlvl) \ + (FIELD_PREP(GENMASK(31, 16), vlvl) | \ + FIELD_PREP(GENMASK(15, 8), sec) | \ + FIELD_PREP(GENMASK(7, 0), pri)) + +/* + * List of Bus Control Modules (BCMs) that need to be configured for the GPU + * to access DDR. For each bus level we will generate a vote each BC + */ +static struct bcm adreno_ddr_bcms[] = { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { .name = "ACV", .fixed = true }, +}; + +/* Same as above, but for the CNOC BCMs */ +static struct bcm adreno_cnoc_bcms[] = { + { .name = "CN0", .buswidth = 4 }, +}; + +/** + * adreno_rpmh_arc_cmds - Read RPMh ARC values from command DB + * @arc: Pointer to the rpmh_arc_vals structure + * @res_id: Resource ID for which ARC values are requested + * + * Return: 0 on success or negative error on failure + */ +int adreno_rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id); + +/** + * adreno_rpmh_setup_volt_dependency_tbl - Set up voltage dependency table + * @votes: Pointer to an array to store the resulting ARC votes + * @pri_rail: Pointer to the primary rail ARC values + * @sec_rail: Pointer to the secondary rail ARC values + * @vlvl: Array of voltage levels corresponding to each frequency corner + * @num_entries: Number of entries in the voltage dependency table + * + * Return: 0 on success or negative error on failure + */ +int adreno_rpmh_setup_volt_dependency_tbl(u32 *votes, struct rpmh_arc_vals *pri_rail, + struct rpmh_arc_vals *sec_rail, u16 *vlvl, u32 num_entries); + +/** + * adreno_rpmh_free_bw_votes - Free memory associated with RPMh bandwidth votes + * @votes: Pointer to the rpmh_bw_votes structure + */ +void adreno_rpmh_free_bw_votes(struct rpmh_bw_votes *votes); + +/** + * adreno_rpmh_build_bw_votes - Build the votes table from the specified + * bandwidth levels + * @bcms: Array of BCM structures representing BCMs + * @bcm_count: Number of BCMs in the array + * @levels: Array of performance levels + * @levels_count: Number of performance levels + * @perfmode_vote: Performance mode vote + * @perfmode_lvl: Initial performance level for performance mode vote + * + * Return: Pointer to the rpmh_bw_votes structure on success, or an error pointer failure + */ +struct rpmh_bw_votes *adreno_rpmh_build_bw_votes(struct bcm *bcms, int bcm_count, + u32 *levels, int levels_count, u32 perfmode_vote, u32 perfmode_lvl); + +/** + * adreno_rpmh_to_cx_hlvl - Convert RPMh VLVL to CX HLVL level + * @cx_rail: Pointer to the RPMh ARC values for the CX rail + * @vlvl: Voltage level to convert + * @hlvl: Pointer to store the resulting CX level + * + * Return: 0 on success or negative error on failure + */ +int adreno_rpmh_to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl); + +/** + * adreno_rpmh_build_bw_table_cmd - Build bandwidth table command + * @cmd: Pointer to the hfi_bwtable_cmd structure + * @ddr: Pointer to the DDR RPMh bandwidth votes + * @cnoc: Pointer to the CNOC RPMh bandwidth votes + */ +void adreno_rpmh_build_bw_table_cmd(struct hfi_bwtable_cmd *cmd, struct rpmh_bw_votes *ddr, + struct rpmh_bw_votes *cnoc); +#endif diff --git a/build/kgsl_defs.bzl b/build/kgsl_defs.bzl index b969b687e4..a1dd18540b 100644 --- a/build/kgsl_defs.bzl +++ b/build/kgsl_defs.bzl @@ -52,6 +52,7 @@ def kgsl_get_srcs(): "adreno_gen8_ringbuffer.c", "adreno_gen8_rpmh.c", "adreno_gen8_snapshot.c", + "adreno_rpmh.c", "adreno_hwsched.c", "adreno_hwsched_snapshot.c", "adreno_ioctl.c", From 135eebd9b4eb6bb3a60ed5e66eaeea94ea0f6fa7 Mon Sep 17 00:00:00 2001 From: Raviteja Narayanam Date: Mon, 24 Jun 2024 12:15:30 +0530 Subject: [PATCH 0860/1016] kgsl: gen7: Update uche gmem alignment for gen7_3_0 UCHE GMEM address should be 16M aligned for gen7_3_0. Change-Id: I323c39a721a007424a22464b913a6e4df3523667 Signed-off-by: Raviteja Narayanam --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 0d7b54c6c2..97139acf20 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2272,7 +2272,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_3_0 = { ADRENO_PREEMPTION | ADRENO_BCL | ADRENO_ACD, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_no_cb_perfcounters, - .uche_gmem_alignment = 0, + .uche_gmem_alignment = SZ_16M, .gmem_size = SZ_512K, .bus_width = 32, .snapshot_size = SZ_2M, From c784a4fdffa9e9df6f7404c4c4b054c08aacde8f Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Mon, 22 Jul 2024 14:12:41 -0600 Subject: [PATCH 0861/1016] kgsl: hwfence: Fix context refcount check Currently, hardware fences are not created because hardware fence entry allocation path is not checking the return value of _kgsl_context_get() correctly. Fix this to ensure hardware fences are created. Change-Id: Ic37b8ffd3ed00ed44ea02e4e26885475c1444a16 Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched_hfi.c | 2 +- adreno_gen8_hwsched_hfi.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 47cdf27550..9d4a6a5a84 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -3373,7 +3373,7 @@ static struct adreno_hw_fence_entry *allocate_hw_fence_entry(struct adreno_devic if (!DRAWCTXT_SLOT_AVAILABLE(drawctxt->hw_fence_count)) return NULL; - if (_kgsl_context_get(&drawctxt->base)) + if (!_kgsl_context_get(&drawctxt->base)) return NULL; entry = kmem_cache_zalloc(hwsched->hw_fence_cache, GFP_ATOMIC); diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 701af8c8d2..7b1a31c27a 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -3165,7 +3165,7 @@ static struct adreno_hw_fence_entry *allocate_hw_fence_entry(struct adreno_devic if (!DRAWCTXT_SLOT_AVAILABLE(drawctxt->hw_fence_count)) return NULL; - if (_kgsl_context_get(&drawctxt->base)) + if (!_kgsl_context_get(&drawctxt->base)) return NULL; entry = kmem_cache_zalloc(hwsched->hw_fence_cache, GFP_ATOMIC); From 0a6138f1a75178172d5906f8ddd466a6d1d488de Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 20 Jun 2024 11:11:59 -0600 Subject: [PATCH 0862/1016] kgsl: Make fault recovery and suspend mutually exclusive It is possible that kgsl is doing fault recovery when a system suspend request comes in. The system suspend path increments power domain prepared_count and that causes fault recovery to fail because the cx and gx gdscs are not successfully toggled. To fix this, introduce a new mutex to make sure fault recovery and system suspend are mutually exclusive. Also, introduce a fault recovery completion to make sure we don't attempt fault recovery until we are sure that the system path will not be able to increment the power domain prepared_count while fault recovery is in place. Change-Id: I0eb51e7afb3147d195ba4b19bcb0f2087ed6d752 Signed-off-by: Harshdeep Dhatt --- adreno.c | 99 ++++++++++++++++++++++++--- adreno.h | 46 +++++++++++-- adreno_a5xx.c | 7 +- adreno_a5xx_perfcounter.c | 3 +- adreno_a5xx_preempt.c | 10 +-- adreno_a6xx.c | 8 +-- adreno_a6xx_gmu.c | 6 +- adreno_a6xx_hwsched.c | 8 +-- adreno_a6xx_hwsched_hfi.c | 18 ++--- adreno_a6xx_perfcounter.c | 4 +- adreno_a6xx_preempt.c | 14 ++-- adreno_a6xx_rgmu.c | 2 +- adreno_a6xx_ringbuffer.c | 2 +- adreno_dispatch.c | 100 ++++++++++++++++------------ adreno_dispatch.h | 14 +--- adreno_drawctxt.c | 2 +- adreno_gen7.c | 10 ++- adreno_gen7_gmu.c | 6 +- adreno_gen7_hwsched.c | 6 +- adreno_gen7_hwsched_hfi.c | 13 ++-- adreno_gen7_perfcounter.c | 2 +- adreno_gen7_preempt.c | 14 ++-- adreno_gen7_ringbuffer.c | 2 +- adreno_gen8.c | 10 ++- adreno_gen8_gmu.c | 6 +- adreno_gen8_hwsched.c | 6 +- adreno_gen8_hwsched_hfi.c | 13 ++-- adreno_gen8_preempt.c | 14 ++-- adreno_gen8_ringbuffer.c | 2 +- adreno_hwsched.c | 137 ++++++++++++++++---------------------- adreno_hwsched.h | 30 --------- kgsl_iommu.c | 4 +- 32 files changed, 331 insertions(+), 287 deletions(-) diff --git a/adreno.c b/adreno.c index 660b19ca74..1f98add59e 100644 --- a/adreno.c +++ b/adreno.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -379,7 +380,7 @@ void adreno_hang_int_callback(struct adreno_device *adreno_dev, int bit) adreno_irqctrl(adreno_dev, 0); /* Trigger a fault in the dispatcher - this will effect a restart */ - adreno_dispatcher_fault(adreno_dev, ADRENO_HARD_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_HARD_FAULT); } /* @@ -391,9 +392,7 @@ void adreno_hang_int_callback(struct adreno_device *adreno_dev, int bit) */ void adreno_cp_callback(struct adreno_device *adreno_dev, int bit) { - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); } static irqreturn_t adreno_irq_handler(int irq, void *data) @@ -1164,10 +1163,13 @@ static void adreno_setup_device(struct adreno_device *adreno_dev) init_completion(&adreno_dev->dev.hwaccess_gate); init_completion(&adreno_dev->dev.halt_gate); + init_completion(&adreno_dev->suspend_recovery_gate); + complete_all(&adreno_dev->suspend_recovery_gate); idr_init(&adreno_dev->dev.context_idr); mutex_init(&adreno_dev->dev.mutex); + mutex_init(&adreno_dev->fault_recovery_mutex); INIT_LIST_HEAD(&adreno_dev->dev.globals); /* Set the fault tolerance policy to replay, skip, throttle */ @@ -1220,6 +1222,73 @@ static int adreno_irq_setup(struct platform_device *pdev, return kgsl_request_irq(pdev, "kgsl_3d0_irq", adreno_irq_handler, KGSL_DEVICE(adreno_dev)); } +static int adreno_pm_notifier(struct notifier_block *nb, unsigned long event, void *unused) +{ + struct adreno_device *adreno_dev = container_of(nb, struct adreno_device, pm_nb); + struct kgsl_pwrctrl *pwr = &adreno_dev->dev.pwrctrl; + struct generic_pm_domain *pd = NULL; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if ((event != PM_SUSPEND_PREPARE) && (event != PM_POST_SUSPEND)) + return NOTIFY_DONE; + + if (pwr->gx_pd) { + pd = container_of(pwr->gx_pd->pm_domain, struct generic_pm_domain, domain); + + if (pd->prepared_count) { + dev_err_ratelimited(device->dev, + "unexpected gx pd prepared_count:%d event:%lu\n", + pd->prepared_count, event); + return NOTIFY_BAD; + } + } + + if (pwr->cx_pd) { + pd = container_of(pwr->cx_pd->pm_domain, struct generic_pm_domain, domain); + + if (pd->prepared_count) { + dev_err_ratelimited(device->dev, + "unexpected cx pd prepared_count:%d event:%lu\n", + pd->prepared_count, event); + return NOTIFY_BAD; + } + } + + if (event == PM_SUSPEND_PREPARE) { + /* + * In presence of a hardware fault, cancel system suspend (by returning NOTIFY_BAD) + * here to make sure system suspend doesn't increment the pd prepared_count. This + * ensures that cx and gx gdscs can be toggled successfully during fault recovery. + */ + if (adreno_gpu_fault(adreno_dev)) { + dev_err_ratelimited(device->dev, "cancelling suspend because of fault\n"); + complete_all(&adreno_dev->suspend_recovery_gate); + adreno_scheduler_queue(adreno_dev); + return NOTIFY_BAD; + } + + reinit_completion(&adreno_dev->suspend_recovery_gate); + return NOTIFY_DONE; + } + + /* + * We get PM_POST_SUSPEND if we failed kgsl suspend in the presence of a hardware fault, + * or when system resume finishes. In either case, this means the system has come out of + * suspend and has put back the power domain prepared_count. This means we are safe to + * perform fault recovery. + */ + if (event == PM_POST_SUSPEND) { + complete_all(&adreno_dev->suspend_recovery_gate); + /* + * Queue the gpu scheduler to proceed with fault recovery in case there was a + * fault + */ + adreno_scheduler_queue(adreno_dev); + } + + return NOTIFY_DONE; +} + int adreno_device_probe(struct platform_device *pdev, struct adreno_device *adreno_dev) { @@ -1403,6 +1472,18 @@ int adreno_device_probe(struct platform_device *pdev, } #endif + /* + * With power domains, we cannot perform recovery during a concurrent system suspend because + * system suspend path increments power domain prepared_count, which prevents successful + * toggling of the power domain gdsc while system is in suspend path. Hence, get + * notifications when system has come out of suspend completely, so that we can perform + * fault recovery. + */ + if (device->pwrctrl.gx_pd || device->pwrctrl.cx_pd) { + adreno_dev->pm_nb.notifier_call = adreno_pm_notifier; + register_pm_notifier(&adreno_dev->pm_nb); + } + kgsl_qcom_va_md_register(device); KGSL_BOOT_MARKER("GPU Ready"); @@ -1598,6 +1679,10 @@ static int adreno_pm_suspend(struct device *dev) adreno_dev = ADRENO_DEVICE(device); ops = ADRENO_POWER_OPS(adreno_dev); + /* Return early if fault recovery is in progress */ + if (!mutex_trylock(&adreno_dev->fault_recovery_mutex)) + return -EDEADLK; + mutex_lock(&device->mutex); status = ops->pm_suspend(adreno_dev); @@ -1607,6 +1692,7 @@ static int adreno_pm_suspend(struct device *dev) #endif mutex_unlock(&device->mutex); + mutex_unlock(&adreno_dev->fault_recovery_mutex); if (status) return status; @@ -3237,10 +3323,7 @@ bool adreno_smmu_is_stalled(struct adreno_device *adreno_dev) return (val & BIT(24)); } - if (WARN_ON(!adreno_dev->dispatch_ops || !adreno_dev->dispatch_ops->get_fault)) - return false; - - fault = adreno_dev->dispatch_ops->get_fault(adreno_dev); + fault = adreno_gpu_fault(adreno_dev); return ((fault & ADRENO_IOMMU_PAGE_FAULT) && test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &mmu->pfpolicy)) ? true : false; diff --git a/adreno.h b/adreno.h index 3fc604eb7c..537e53b304 100644 --- a/adreno.h +++ b/adreno.h @@ -514,11 +514,8 @@ struct adreno_dispatch_ops { struct adreno_context *drawctxt); void (*setup_context)(struct adreno_device *adreno_dev, struct adreno_context *drawctxt); - void (*fault)(struct adreno_device *adreno_dev, u32 fault); /* @create_hw_fence: Create a hardware fence */ void (*create_hw_fence)(struct adreno_device *adreno_dev, struct kgsl_sync_fence *kfence); - /* @get_fault: Get the GPU fault status */ - u32 (*get_fault)(struct adreno_device *adreno_dev); }; /** @@ -757,8 +754,23 @@ struct adreno_device { struct adreno_fault_proc fault_procs[ADRENO_MAX_FAULTING_PROCS]; /** @fault_stats_lock: A R/W lock to protect GPU fault statistics */ rwlock_t fault_stats_lock; + /** @fault_recovery_mutex: Mutex taken during fault handling in the dispatcher */ + struct mutex fault_recovery_mutex; + /** @pm_nb: Notifier block for defining a callback that gets called during system suspend */ + struct notifier_block pm_nb; + /** @suspend_recovery_gate: Gate to wait on for system to come out of suspend */ + struct completion suspend_recovery_gate; + /** @scheduler_worker: kthread worker for scheduling gpu commands */ + struct kthread_worker *scheduler_worker; + /** @scheduler_work: work_struct to put the gpu command scheduler in a work queue */ + struct kthread_work scheduler_work; + /** @scheduler_fault: Atomic to trigger scheduler based fault recovery */ + atomic_t scheduler_fault; }; +/* Time to wait for suspend recovery gate to complete */ +#define ADRENO_SUSPEND_RECOVERY_GATE_TIMEOUT_MS 5000 + /** * enum adreno_device_flags - Private flags for the adreno_device * @ADRENO_DEVICE_PWRON - Set during init after a power collapse @@ -1424,7 +1436,7 @@ static inline unsigned int adreno_gpu_fault(struct adreno_device *adreno_dev) { /* make sure we're reading the latest value */ smp_rmb(); - return atomic_read(&adreno_dev->dispatcher.fault); + return atomic_read(&adreno_dev->scheduler_fault); } /** @@ -1437,7 +1449,7 @@ static inline void adreno_set_gpu_fault(struct adreno_device *adreno_dev, int state) { /* only set the fault bit w/o overwriting other bits */ - atomic_or(state, &adreno_dev->dispatcher.fault); + atomic_or(state, &adreno_dev->scheduler_fault); /* make sure other CPUs see the update */ smp_wmb(); @@ -1449,15 +1461,35 @@ static inline void adreno_set_gpu_fault(struct adreno_device *adreno_dev, * * Clear the GPU fault status for the adreno device */ - static inline void adreno_clear_gpu_fault(struct adreno_device *adreno_dev) { - atomic_set(&adreno_dev->dispatcher.fault, 0); + atomic_set(&adreno_dev->scheduler_fault, 0); /* make sure other CPUs see the update */ smp_wmb(); } +/** + * adreno_scheduler_queue - Queue the scheduler kthread + * @adreno_dev: Adreno device handle + */ +static inline void adreno_scheduler_queue(struct adreno_device *adreno_dev) +{ + kthread_queue_work(adreno_dev->scheduler_worker, &adreno_dev->scheduler_work); +} + +/** + * adreno_scheduler_fault() - Set GPU fault and trigger fault recovery + * @adreno_dev: A pointer to an adreno_device structure + * @fault: Type of fault + */ +static inline void adreno_scheduler_fault(struct adreno_device *adreno_dev, + u32 fault) +{ + adreno_set_gpu_fault(adreno_dev, fault); + adreno_scheduler_queue(adreno_dev); +} + /** * adreno_gpu_halt() - Return the GPU halt refcount * @adreno_dev: A pointer to the adreno_device diff --git a/adreno_a5xx.c b/adreno_a5xx.c index e48c93a2f2..7ab1d85bf1 100644 --- a/adreno_a5xx.c +++ b/adreno_a5xx.c @@ -2112,7 +2112,7 @@ static void a5xx_irq_storm_worker(struct work_struct *work) mutex_unlock(&device->mutex); /* Reschedule just to make sure everything retires */ - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); } static void a5xx_cp_callback(struct adreno_device *adreno_dev, int bit) @@ -2162,7 +2162,7 @@ static void a5xx_cp_callback(struct adreno_device *adreno_dev, int bit) } a5xx_preemption_trigger(adreno_dev); - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); } static const char *gpmu_int_msg[32] = { @@ -2237,8 +2237,7 @@ static void a5x_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit) adreno_irqctrl(adreno_dev, 0); /* Trigger a fault in the dispatcher - this will effect a restart */ - adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT); - adreno_dispatcher_schedule(device); + adreno_scheduler_fault(adreno_dev, ADRENO_SOFT_FAULT); } u64 a5xx_read_alwayson(struct adreno_device *adreno_dev) diff --git a/adreno_a5xx_perfcounter.c b/adreno_a5xx_perfcounter.c index 8886ee24ba..1b4f8a0ed5 100644 --- a/adreno_a5xx_perfcounter.c +++ b/adreno_a5xx_perfcounter.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -91,7 +92,7 @@ static int a5xx_counter_inline_enable(struct adreno_device *adreno_dev, * rb[0] will not get scheduled to run */ if (adreno_dev->cur_rb != rb) - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); /* wait for the above commands submitted to complete */ ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp, diff --git a/adreno_a5xx_preempt.c b/adreno_a5xx_preempt.c index 4428beb246..5dffcd0ef7 100644 --- a/adreno_a5xx_preempt.c +++ b/adreno_a5xx_preempt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014-2017,2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -68,7 +68,7 @@ static void _a5xx_preemption_done(struct adreno_device *adreno_dev) adreno_dev->next_rb->wptr); /* Set a fault and restart */ - adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_PREEMPT_FAULT); return; } @@ -109,7 +109,7 @@ static void _a5xx_preemption_fault(struct adreno_device *adreno_dev) adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); return; } } @@ -123,7 +123,7 @@ static void _a5xx_preemption_fault(struct adreno_device *adreno_dev) adreno_get_rptr(adreno_dev->next_rb), adreno_dev->next_rb->wptr); - adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_PREEMPT_FAULT); } static void _a5xx_preemption_worker(struct work_struct *work) @@ -275,7 +275,7 @@ void a5xx_preempt_callback(struct adreno_device *adreno_dev, int bit) * there then we have to assume something bad happened */ adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); return; } diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 9ba53209de..8996aca245 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -1736,12 +1736,10 @@ static const char *a6xx_iommu_fault_block(struct kgsl_device *device, static void a6xx_cp_callback(struct adreno_device *adreno_dev, int bit) { - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - if (adreno_is_preemption_enabled(adreno_dev)) a6xx_preemption_trigger(adreno_dev, true); - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); } /* @@ -1763,7 +1761,7 @@ static void a6xx_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit) adreno_irqctrl(adreno_dev, 0); /* Trigger a fault in the dispatcher - this will effect a restart */ - adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_SOFT_FAULT); } static const struct adreno_irq_funcs a6xx_irq_funcs[32] = { @@ -1891,7 +1889,7 @@ static irqreturn_t a6xx_irq_handler(struct adreno_device *adreno_dev) a6xx_gpu_keepalive(adreno_dev, true); if (a6xx_irq_poll_fence(adreno_dev)) { - adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); goto done; } diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 4eb1232a20..436821424c 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -908,9 +908,7 @@ static void trigger_reset_recovery(struct adreno_device *adreno_dev, if (req != oob_perfcntr) return; - if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->fault) - adreno_dev->dispatch_ops->fault(adreno_dev, - ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } int a6xx_gmu_oob_set(struct kgsl_device *device, @@ -2077,7 +2075,7 @@ static int a6xx_gmu_dcvs_set(struct adreno_device *adreno_dev, * dispatcher based reset and recovery. */ if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) - adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT | + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT | ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index e31f3f6f80..d1f566847c 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -149,7 +149,7 @@ static int a6xx_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); /* Clear any hwsched faults that might have been left over */ - adreno_hwsched_clear_fault(adreno_dev); + adreno_clear_gpu_fault(adreno_dev); ret = a6xx_gmu_device_start(adreno_dev); if (ret) @@ -223,7 +223,7 @@ static int a6xx_hwsched_gmu_boot(struct adreno_device *adreno_dev) a6xx_gmu_irq_enable(adreno_dev); /* Clear any hwsched faults that might have been left over */ - adreno_hwsched_clear_fault(adreno_dev); + adreno_clear_gpu_fault(adreno_dev); ret = a6xx_gmu_device_start(adreno_dev); if (ret) @@ -829,7 +829,7 @@ static int a6xx_hwsched_dcvs_set(struct adreno_device *adreno_dev, * dispatcher based reset and recovery. */ if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) - adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); } if (req.freq != INVALID_DCVS_IDX) @@ -960,7 +960,7 @@ void a6xx_hwsched_handle_watchdog(struct adreno_device *adreno_dev) dev_err_ratelimited(&gmu->pdev->dev, "GMU watchdog expired interrupt received\n"); - adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); } static void a6xx_hwsched_pm_resume(struct adreno_device *adreno_dev) diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index cdc386a8ed..31aa0c2eff 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -439,7 +439,7 @@ static void process_ctx_bad(struct adreno_device *adreno_dev) else log_gpu_fault(adreno_dev); - adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_HARD_FAULT); } static u32 peek_next_header(struct a6xx_gmu_device *gmu, uint32_t queue_idx) @@ -495,7 +495,7 @@ static void a6xx_hwsched_process_msgq(struct adreno_device *adreno_dev) if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { a6xx_receive_ack_async(adreno_dev, rcvd); } else if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_TS_RETIRE) { - adreno_hwsched_trigger(adreno_dev); + adreno_scheduler_queue(adreno_dev); log_profiling_info(adreno_dev, rcvd); } else if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_GMU_CNTR_RELEASE) { struct hfi_gmu_cntr_release_cmd *cmd = @@ -554,7 +554,7 @@ static void a6xx_hwsched_process_dbgq(struct adreno_device *adreno_dev, bool lim if (!recovery) return; - adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); } /* HFI interrupt handler */ @@ -583,7 +583,7 @@ static irqreturn_t a6xx_hwsched_hfi_handler(int irq, void *data) if (status & (HFI_IRQ_MSGQ_MASK | HFI_IRQ_DBGQ_MASK)) { wake_up_interruptible(&hfi->f2h_wq); - adreno_hwsched_trigger(adreno_dev); + adreno_scheduler_queue(adreno_dev); } if (status & HFI_IRQ_CM3_FAULT_MASK) { atomic_set(&gmu->cm3_fault, 1); @@ -594,7 +594,7 @@ static irqreturn_t a6xx_hwsched_hfi_handler(int irq, void *data) dev_err_ratelimited(&gmu->pdev->dev, "GMU CM3 fault interrupt received\n"); - adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); } /* Ignore OOB bits */ @@ -1717,7 +1717,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, context->id, ret); if (device->gmu_fault) - adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); return ret; } @@ -1729,7 +1729,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, context->id, ret); if (device->gmu_fault) - adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); return ret; } @@ -1936,7 +1936,7 @@ int a6xx_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, int ret; static bool active; - if (adreno_gpu_halt(adreno_dev) || adreno_hwsched_gpu_fault(adreno_dev)) + if (adreno_gpu_halt(adreno_dev) || adreno_gpu_fault(adreno_dev)) return -EBUSY; if (test_bit(CMDOBJ_RECURRING_STOP, &cmdobj->priv)) { @@ -2027,7 +2027,7 @@ static void trigger_context_unregister_fault(struct adreno_device *adreno_dev, * replayed after recovery. */ adreno_drawctxt_set_guilty(device, context); - adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); } static int send_context_unregister_hfi(struct adreno_device *adreno_dev, diff --git a/adreno_a6xx_perfcounter.c b/adreno_a6xx_perfcounter.c index 7d07cfc53e..5bf1bd6f05 100644 --- a/adreno_a6xx_perfcounter.c +++ b/adreno_a6xx_perfcounter.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -135,7 +135,7 @@ static int a6xx_counter_inline_enable(struct adreno_device *adreno_dev, * rb[0] will not get scheduled to run */ if (adreno_dev->cur_rb != rb) - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); /* wait for the above commands submitted to complete */ ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp, diff --git a/adreno_a6xx_preempt.c b/adreno_a6xx_preempt.c index 4a129967cc..2d5d6920cc 100644 --- a/adreno_a6xx_preempt.c +++ b/adreno_a6xx_preempt.c @@ -59,7 +59,7 @@ static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer, /* If WPTR update fails, set the fault and trigger recovery */ if (ret) { gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); - adreno_dispatcher_fault(adreno_dev, + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } } @@ -117,7 +117,7 @@ static void _a6xx_preemption_done(struct adreno_device *adreno_dev) adreno_dev->next_rb->wptr); /* Set a fault and restart */ - adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_PREEMPT_FAULT); return; } @@ -174,7 +174,7 @@ static void _a6xx_preemption_fault(struct adreno_device *adreno_dev) adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); return; } } @@ -188,7 +188,7 @@ static void _a6xx_preemption_fault(struct adreno_device *adreno_dev) adreno_get_rptr(adreno_dev->next_rb), adreno_dev->next_rb->wptr); - adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_PREEMPT_FAULT); } static void _a6xx_preemption_worker(struct work_struct *work) @@ -403,10 +403,10 @@ err: /* If fenced write fails, take inline snapshot and trigger recovery */ if (!atomic) { gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); - adreno_dispatcher_fault(adreno_dev, + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } else { - adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); } adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); /* Clear the keep alive */ @@ -437,7 +437,7 @@ void a6xx_preemption_callback(struct adreno_device *adreno_dev, int bit) * there then we have to assume something bad happened */ adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); - adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev)); + adreno_scheduler_queue(adreno_dev); return; } diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index de71a099a8..f5f8a94471 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -115,7 +115,7 @@ static irqreturn_t a6xx_oob_irq_handler(int irq, void *data) dev_err_ratelimited(&rgmu->pdev->dev, "RGMU oob irq error\n"); - adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); } if (status & ~RGMU_OOB_IRQ_MASK) dev_err_ratelimited(&rgmu->pdev->dev, diff --git a/adreno_a6xx_ringbuffer.c b/adreno_a6xx_ringbuffer.c index 47db9cadcd..0bb78f1912 100644 --- a/adreno_a6xx_ringbuffer.c +++ b/adreno_a6xx_ringbuffer.c @@ -168,7 +168,7 @@ int a6xx_ringbuffer_submit(struct adreno_ringbuffer *rb, * recovery. */ gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); - adreno_dispatcher_fault(adreno_dev, + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } diff --git a/adreno_dispatch.c b/adreno_dispatch.c index 1cceaec899..be515083d4 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -884,7 +884,7 @@ static void adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev) _decrement_submit_now(device); return; done: - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); } /** @@ -1889,10 +1889,29 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) int halt; bool gx_on; - fault = atomic_xchg(&dispatcher->fault, 0); + fault = adreno_gpu_fault(adreno_dev); if (fault == 0) return 0; + /* + * Return early if there is a concurrent suspend in progress. The suspend thread will error + * out in the presence of this hwsched fault and requeue the dispatcher to handle this fault + */ + if (!mutex_trylock(&adreno_dev->fault_recovery_mutex)) + return 1; + + /* + * Wait long enough to allow the system to come out of suspend completely, which can take + * variable amount of time especially if it has to rewind suspend processes and devices. + */ + if (!wait_for_completion_timeout(&adreno_dev->suspend_recovery_gate, + msecs_to_jiffies(ADRENO_SUSPEND_RECOVERY_GATE_TIMEOUT_MS))) { + dev_err(device->dev, "suspend recovery gate timeout\n"); + adreno_scheduler_queue(adreno_dev); + mutex_unlock(&adreno_dev->fault_recovery_mutex); + return 1; + } + mutex_lock(&device->mutex); /* @@ -1901,6 +1920,7 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) */ if (!kgsl_state_is_awake(device)) { mutex_unlock(&device->mutex); + mutex_unlock(&adreno_dev->fault_recovery_mutex); return 0; } @@ -1924,6 +1944,7 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) if (!(fault & ADRENO_IOMMU_PAGE_FAULT) && gx_on) { if (adreno_smmu_is_stalled(adreno_dev)) { mutex_unlock(&device->mutex); + mutex_unlock(&adreno_dev->fault_recovery_mutex); dev_err(device->dev, "SMMU is stalled without a pagefault\n"); return -EBUSY; @@ -2032,13 +2053,15 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) ret = adreno_reset(device, fault); - mutex_unlock(&device->mutex); - /* If adreno_reset() fails then what hope do we have for the future? */ BUG_ON(ret); /* if any other fault got in until reset then ignore */ - atomic_set(&dispatcher->fault, 0); + adreno_clear_gpu_fault(adreno_dev); + + mutex_unlock(&device->mutex); + + mutex_unlock(&adreno_dev->fault_recovery_mutex); /* recover all the dispatch_q's starting with the one that hung */ if (dispatch_q) @@ -2295,10 +2318,9 @@ static void _dispatcher_power_down(struct adreno_device *adreno_dev) static void adreno_dispatcher_work(struct kthread_work *work) { - struct adreno_dispatcher *dispatcher = - container_of(work, struct adreno_dispatcher, work); struct adreno_device *adreno_dev = - container_of(dispatcher, struct adreno_device, dispatcher); + container_of(work, struct adreno_device, scheduler_work); + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); int count = 0; @@ -2348,14 +2370,6 @@ static void adreno_dispatcher_work(struct kthread_work *work) mutex_unlock(&dispatcher->mutex); } -void adreno_dispatcher_schedule(struct kgsl_device *device) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; - - kthread_queue_work(dispatcher->worker, &dispatcher->work); -} - /* * Put a draw context on the dispatcher pending queue and schedule the * dispatcher. This is used to reschedule changes that might have been blocked @@ -2365,14 +2379,7 @@ static void adreno_dispatcher_queue_context(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { dispatcher_queue_context(adreno_dev, drawctxt); - adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev)); -} - -void adreno_dispatcher_fault(struct adreno_device *adreno_dev, - u32 fault) -{ - adreno_set_gpu_fault(adreno_dev, fault); - adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev)); + adreno_scheduler_queue(adreno_dev); } /* @@ -2385,7 +2392,7 @@ static void adreno_dispatcher_timer(struct timer_list *t) struct adreno_device *adreno_dev = container_of(dispatcher, struct adreno_device, dispatcher); - adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev)); + adreno_scheduler_queue(adreno_dev); } /** @@ -2398,7 +2405,7 @@ void adreno_dispatcher_start(struct kgsl_device *device) complete_all(&device->halt_gate); /* Schedule the work loop to get things going */ - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(ADRENO_DEVICE(device)); } /** @@ -2544,7 +2551,7 @@ static void adreno_dispatcher_close(struct adreno_device *adreno_dev) mutex_unlock(&dispatcher->mutex); - kthread_destroy_worker(dispatcher->worker); + kthread_destroy_worker(adreno_dev->scheduler_worker); adreno_set_dispatch_ops(adreno_dev, NULL); @@ -2687,8 +2694,6 @@ static const struct adreno_dispatch_ops swsched_ops = { .queue_cmds = adreno_dispatcher_queue_cmds, .setup_context = adreno_dispatcher_setup_context, .queue_context = adreno_dispatcher_queue_context, - .fault = adreno_dispatcher_fault, - .get_fault = adreno_gpu_fault, }; /** @@ -2711,10 +2716,10 @@ int adreno_dispatcher_init(struct adreno_device *adreno_dev) if (ret) return ret; - dispatcher->worker = kthread_create_worker(0, "kgsl_dispatcher"); - if (IS_ERR(dispatcher->worker)) { + adreno_dev->scheduler_worker = kthread_create_worker(0, "kgsl_dispatcher"); + if (IS_ERR(adreno_dev->scheduler_worker)) { kobject_put(&dispatcher->kobj); - return PTR_ERR(dispatcher->worker); + return PTR_ERR(adreno_dev->scheduler_worker); } WARN_ON(sysfs_create_files(&device->dev->kobj, _dispatch_attr_list)); @@ -2723,7 +2728,7 @@ int adreno_dispatcher_init(struct adreno_device *adreno_dev) timer_setup(&dispatcher->timer, adreno_dispatcher_timer, 0); - kthread_init_work(&dispatcher->work, adreno_dispatcher_work); + kthread_init_work(&adreno_dev->scheduler_work, adreno_dispatcher_work); init_completion(&dispatcher->idle_gate); complete_all(&dispatcher->idle_gate); @@ -2737,7 +2742,7 @@ int adreno_dispatcher_init(struct adreno_device *adreno_dev) adreno_set_dispatch_ops(adreno_dev, &swsched_ops); - sched_set_fifo(dispatcher->worker->task); + sched_set_fifo(adreno_dev->scheduler_worker->task); set_bit(ADRENO_DISPATCHER_INIT, &dispatcher->priv); @@ -2779,17 +2784,24 @@ int adreno_dispatcher_idle(struct adreno_device *adreno_dev) * or pending dispatcher works on worker are * finished */ - kthread_flush_worker(dispatcher->worker); + kthread_flush_worker(adreno_dev->scheduler_worker); - ret = wait_for_completion_timeout(&dispatcher->idle_gate, - msecs_to_jiffies(ADRENO_IDLE_TIMEOUT)); - if (ret == 0) { - ret = -ETIMEDOUT; - WARN(1, "Dispatcher halt timeout\n"); - } else if (ret < 0) { - dev_err(device->dev, "Dispatcher halt failed %d\n", ret); + if (adreno_gpu_fault(adreno_dev) != 0) { + ret = -EDEADLK; } else { - ret = 0; + ret = wait_for_completion_timeout(&dispatcher->idle_gate, + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT)); + if (ret == 0) { + ret = -ETIMEDOUT; + WARN(1, "Dispatcher halt timeout\n"); + } else if (ret < 0) { + dev_err(device->dev, "Dispatcher halt failed %d\n", ret); + } else { + ret = 0; + } + + if (adreno_gpu_fault(adreno_dev) != 0) + ret = -EDEADLK; } mutex_lock(&device->mutex); @@ -2798,6 +2810,6 @@ int adreno_dispatcher_idle(struct adreno_device *adreno_dev) * requeue dispatcher work to resubmit pending commands * that may have been blocked due to this idling request */ - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); return ret; } diff --git a/adreno_dispatch.h b/adreno_dispatch.h index 397dbe27dd..167324c256 100644 --- a/adreno_dispatch.h +++ b/adreno_dispatch.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ____ADRENO_DISPATCHER_H @@ -61,9 +62,7 @@ struct adreno_dispatch_job { * @state: Current state of the dispatcher (active or paused) * @timer: Timer to monitor the progress of the drawobjs * @inflight: Number of drawobj operations pending in the ringbuffer - * @fault: Non-zero if a fault was detected. * @pending: Priority list of contexts waiting to submit drawobjs - * @work: work_struct to put the dispatcher in a work queue * @kobj: kobject for the dispatcher directory in the device sysfs node * @idle_gate: Gate to wait on for dispatcher to idle */ @@ -73,15 +72,12 @@ struct adreno_dispatcher { struct timer_list timer; struct timer_list fault_timer; unsigned int inflight; - atomic_t fault; /** @jobs - Array of dispatch job lists for each priority level */ struct llist_head jobs[16]; /** @requeue - Array of lists for dispatch jobs that got requeued */ struct llist_head requeue[16]; - struct kthread_work work; struct kobject kobj; struct completion idle_gate; - struct kthread_worker *worker; }; enum adreno_dispatcher_flags { @@ -101,12 +97,4 @@ void adreno_dispatcher_stop(struct adreno_device *adreno_dev); void adreno_dispatcher_start_fault_timer(struct adreno_device *adreno_dev); void adreno_dispatcher_stop_fault_timer(struct kgsl_device *device); -void adreno_dispatcher_schedule(struct kgsl_device *device); - -/** - * adreno_dispatcher_fault - Set dispatcher fault to request recovery - * @adreno_dev: A handle to adreno device - * @fault: The type of fault - */ -void adreno_dispatcher_fault(struct adreno_device *adreno_dev, u32 fault); #endif /* __ADRENO_DISPATCHER_H */ diff --git a/adreno_drawctxt.c b/adreno_drawctxt.c index e302e74e23..16f7643e74 100644 --- a/adreno_drawctxt.c +++ b/adreno_drawctxt.c @@ -567,7 +567,7 @@ static void wait_for_timestamp_rb(struct kgsl_device *device, mutex_unlock(&device->mutex); /* Schedule dispatcher to kick in recovery */ - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); /* Wait for context to be invalidated and release context */ wait_event_interruptible_timeout(drawctxt->timeout, diff --git a/adreno_gen7.c b/adreno_gen7.c index be1d9529c9..5292e9a836 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1552,12 +1552,10 @@ static const char *gen7_iommu_fault_block(struct kgsl_device *device, static void gen7_cp_callback(struct adreno_device *adreno_dev, int bit) { - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - if (adreno_is_preemption_enabled(adreno_dev)) gen7_preemption_trigger(adreno_dev, true); - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); } /* @@ -1579,7 +1577,7 @@ static void gen7_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit) adreno_irqctrl(adreno_dev, 0); /* Trigger a fault in the dispatcher - this will effect a restart */ - adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_SOFT_FAULT); } /* @@ -1615,7 +1613,7 @@ static void gen7_swfuse_violation_callback(struct adreno_device *adreno_dev, int /* Trigger a fault in the dispatcher for LPAC and RAYTRACING violation */ if (status & GENMASK(GEN7_RAYTRACING_SW_FUSE, GEN7_LPAC_SW_FUSE)) { adreno_irqctrl(adreno_dev, 0); - adreno_dispatcher_fault(adreno_dev, ADRENO_HARD_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_HARD_FAULT); } } @@ -1740,7 +1738,7 @@ static irqreturn_t gen7_irq_handler(struct adreno_device *adreno_dev) gen7_gpu_keepalive(adreno_dev, true); if (gen7_irq_poll_fence(adreno_dev)) { - adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); goto done; } diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index d65311dad5..a6a633ab01 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -617,9 +617,7 @@ static void trigger_reset_recovery(struct adreno_device *adreno_dev, if (req != oob_perfcntr) return; - if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->fault) - adreno_dev->dispatch_ops->fault(adreno_dev, - ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } int gen7_gmu_oob_set(struct kgsl_device *device, @@ -1664,7 +1662,7 @@ static int gen7_gmu_dcvs_set(struct adreno_device *adreno_dev, * dispatcher based reset and recovery. */ if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) - adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT | + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT | ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 444091dcf3..18c23d01bb 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -58,7 +58,7 @@ void gen7_hwsched_fault(struct adreno_device *adreno_dev, u32 fault) */ _wakeup_hw_fence_waiters(adreno_dev, fault); - adreno_hwsched_fault(adreno_dev, fault); + adreno_scheduler_fault(adreno_dev, fault); } static void gen7_hwsched_snapshot_preemption_records(struct kgsl_device *device, @@ -234,7 +234,7 @@ static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); /* Clear any hwsched faults that might have been left over */ - adreno_hwsched_clear_fault(adreno_dev); + adreno_clear_gpu_fault(adreno_dev); ret = gen7_gmu_device_start(adreno_dev); if (ret) @@ -326,7 +326,7 @@ static int gen7_hwsched_gmu_boot(struct adreno_device *adreno_dev) gen7_gmu_irq_enable(adreno_dev); /* Clear any hwsched faults that might have been left over */ - adreno_hwsched_clear_fault(adreno_dev); + adreno_clear_gpu_fault(adreno_dev); ret = gen7_gmu_device_start(adreno_dev); if (ret) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 47cdf27550..7abe747ce5 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -928,7 +928,6 @@ static void gen7_trigger_syncobj_query(struct adreno_device *adreno_dev, u32 *rcvd) { struct syncobj_query_work *query_work; - struct adreno_hwsched *hwsched = &adreno_dev->hwsched; struct hfi_syncobj_query_cmd *cmd = (struct hfi_syncobj_query_cmd *)rcvd; struct kgsl_context *context = NULL; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -961,7 +960,7 @@ static void gen7_trigger_syncobj_query(struct adreno_device *adreno_dev, memcpy(&query_work->cmd, cmd, sizeof(*cmd)); query_work->context = context; - kthread_queue_work(hwsched->worker, &query_work->work); + kthread_queue_work(adreno_dev->scheduler_worker, &query_work->work); } /* @@ -1125,7 +1124,7 @@ static void _disable_hw_fence_throttle(struct adreno_device *adreno_dev, bool cl /* Wake up dispatcher and any sleeping threads that want to create hardware fences */ if (max) { adreno_put_gpu_halt(adreno_dev); - adreno_hwsched_trigger(adreno_dev); + adreno_scheduler_queue(adreno_dev); wake_up_all(&hfi->hw_fence.unack_wq); } } @@ -1219,7 +1218,7 @@ static void process_hw_fence_ack(struct adreno_device *adreno_dev, u32 received_ */ if (drawctxt) { kthread_init_work(&hfi->defer_hw_fence_work, gen7_defer_hw_fence_work); - kthread_queue_work(adreno_dev->hwsched.worker, &hfi->defer_hw_fence_work); + kthread_queue_work(adreno_dev->scheduler_worker, &hfi->defer_hw_fence_work); return; } @@ -1270,7 +1269,7 @@ void gen7_hwsched_process_msgq(struct adreno_device *adreno_dev) break; case F2H_MSG_TS_RETIRE: log_profiling_info(adreno_dev, rcvd); - adreno_hwsched_trigger(adreno_dev); + adreno_scheduler_queue(adreno_dev); break; case F2H_MSG_SYNCOBJ_QUERY: gen7_trigger_syncobj_query(adreno_dev, rcvd); @@ -1365,7 +1364,7 @@ static irqreturn_t gen7_hwsched_hfi_handler(int irq, void *data) if (status & (HFI_IRQ_MSGQ_MASK | HFI_IRQ_DBGQ_MASK)) { wake_up_interruptible(&hfi->f2h_wq); - adreno_hwsched_trigger(adreno_dev); + adreno_scheduler_queue(adreno_dev); } if (status & HFI_IRQ_CM3_FAULT_MASK) { atomic_set(&gmu->cm3_fault, 1); @@ -3819,7 +3818,7 @@ int gen7_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, int ret; static bool active; - if (adreno_gpu_halt(adreno_dev) || adreno_hwsched_gpu_fault(adreno_dev)) + if (adreno_gpu_halt(adreno_dev) || adreno_gpu_fault(adreno_dev)) return -EBUSY; if (test_bit(CMDOBJ_RECURRING_STOP, &cmdobj->priv)) { diff --git a/adreno_gen7_perfcounter.c b/adreno_gen7_perfcounter.c index 76b5acd093..e1ec9a9eb7 100644 --- a/adreno_gen7_perfcounter.c +++ b/adreno_gen7_perfcounter.c @@ -139,7 +139,7 @@ static int gen7_counter_inline_enable(struct adreno_device *adreno_dev, * rb[0] will not get scheduled to run */ if (adreno_dev->cur_rb != rb) - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); /* wait for the above commands submitted to complete */ ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp, diff --git a/adreno_gen7_preempt.c b/adreno_gen7_preempt.c index 38cfbb5d27..bf19d17a4b 100644 --- a/adreno_gen7_preempt.c +++ b/adreno_gen7_preempt.c @@ -59,7 +59,7 @@ static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer, /* If WPTR update fails, set the fault and trigger recovery */ if (ret) { gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); - adreno_dispatcher_fault(adreno_dev, + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } } @@ -99,7 +99,7 @@ static void _gen7_preemption_done(struct adreno_device *adreno_dev) adreno_dev->next_rb->wptr); /* Set a fault and restart */ - adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_PREEMPT_FAULT); return; } @@ -145,7 +145,7 @@ static void _gen7_preemption_fault(struct adreno_device *adreno_dev) adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); return; } } @@ -159,7 +159,7 @@ static void _gen7_preemption_fault(struct adreno_device *adreno_dev) adreno_get_rptr(adreno_dev->next_rb), adreno_dev->next_rb->wptr); - adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_PREEMPT_FAULT); } static void _gen7_preemption_worker(struct work_struct *work) @@ -379,10 +379,10 @@ err: /* If fenced write fails, take inline snapshot and trigger recovery */ if (!in_interrupt()) { gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); - adreno_dispatcher_fault(adreno_dev, + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } else { - adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); } adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); /* Clear the keep alive */ @@ -413,7 +413,7 @@ void gen7_preemption_callback(struct adreno_device *adreno_dev, int bit) * there then we have to assume something bad happened */ adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); - adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev)); + adreno_scheduler_queue(adreno_dev); return; } diff --git a/adreno_gen7_ringbuffer.c b/adreno_gen7_ringbuffer.c index 11d9036594..9356df94e2 100644 --- a/adreno_gen7_ringbuffer.c +++ b/adreno_gen7_ringbuffer.c @@ -195,7 +195,7 @@ int gen7_ringbuffer_submit(struct adreno_ringbuffer *rb, * recovery. */ gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); - adreno_dispatcher_fault(adreno_dev, + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } diff --git a/adreno_gen8.c b/adreno_gen8.c index 442ec114a3..fd0d22258c 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -2121,12 +2121,10 @@ static const char *gen8_iommu_fault_block(struct kgsl_device *device, static void gen8_cp_callback(struct adreno_device *adreno_dev, int bit) { - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - if (adreno_is_preemption_enabled(adreno_dev)) gen8_preemption_trigger(adreno_dev, true); - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); } /* @@ -2148,7 +2146,7 @@ static void gen8_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit) adreno_irqctrl(adreno_dev, 0); /* Trigger a fault in the dispatcher - this will effect a restart */ - adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_SOFT_FAULT); } /* @@ -2184,7 +2182,7 @@ static void gen8_swfuse_violation_callback(struct adreno_device *adreno_dev, int /* Trigger a fault in the dispatcher for LPAC and RAYTRACING violation */ if (status & GENMASK(GEN8_RAYTRACING_SW_FUSE, GEN8_LPAC_SW_FUSE)) { adreno_irqctrl(adreno_dev, 0); - adreno_dispatcher_fault(adreno_dev, ADRENO_HARD_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_HARD_FAULT); } } @@ -2309,7 +2307,7 @@ static irqreturn_t gen8_irq_handler(struct adreno_device *adreno_dev) gen8_gpu_keepalive(adreno_dev, true); if (gen8_irq_poll_fence(adreno_dev)) { - adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); goto done; } diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 291fb52822..0cf50b1e2a 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -512,9 +512,7 @@ static void trigger_reset_recovery(struct adreno_device *adreno_dev, if (req != oob_perfcntr) return; - if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->fault) - adreno_dev->dispatch_ops->fault(adreno_dev, - ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } int gen8_gmu_oob_set(struct kgsl_device *device, @@ -1529,7 +1527,7 @@ static int gen8_gmu_dcvs_set(struct adreno_device *adreno_dev, * dispatcher based reset and recovery. */ if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) - adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT | + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT | ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 72976fa719..4d1f7db36e 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -59,7 +59,7 @@ void gen8_hwsched_fault(struct adreno_device *adreno_dev, u32 fault) */ _wakeup_hw_fence_waiters(adreno_dev, fault); - adreno_hwsched_fault(adreno_dev, fault); + adreno_scheduler_fault(adreno_dev, fault); } static void gen8_hwsched_snapshot_preemption_records(struct kgsl_device *device, @@ -315,7 +315,7 @@ static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) gen8_hwsched_soccp_vote(adreno_dev, true); /* Clear any hwsched faults that might have been left over */ - adreno_hwsched_clear_fault(adreno_dev); + adreno_clear_gpu_fault(adreno_dev); ret = gen8_gmu_device_start(adreno_dev); if (ret) @@ -410,7 +410,7 @@ static int gen8_hwsched_gmu_boot(struct adreno_device *adreno_dev) gen8_hwsched_soccp_vote(adreno_dev, true); /* Clear any hwsched faults that might have been left over */ - adreno_hwsched_clear_fault(adreno_dev); + adreno_clear_gpu_fault(adreno_dev); ret = gen8_gmu_device_start(adreno_dev); if (ret) diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 701af8c8d2..21c7deec27 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -815,7 +815,6 @@ static void gen8_trigger_syncobj_query(struct adreno_device *adreno_dev, u32 *rcvd) { struct syncobj_query_work *query_work; - struct adreno_hwsched *hwsched = &adreno_dev->hwsched; struct hfi_syncobj_query_cmd *cmd = (struct hfi_syncobj_query_cmd *)rcvd; struct kgsl_context *context = NULL; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -848,7 +847,7 @@ static void gen8_trigger_syncobj_query(struct adreno_device *adreno_dev, memcpy(&query_work->cmd, cmd, sizeof(*cmd)); query_work->context = context; - kthread_queue_work(hwsched->worker, &query_work->work); + kthread_queue_work(adreno_dev->scheduler_worker, &query_work->work); } /* @@ -1011,7 +1010,7 @@ static void _disable_hw_fence_throttle(struct adreno_device *adreno_dev, bool cl /* Wake up dispatcher and any sleeping threads that want to create hardware fences */ if (max) { adreno_put_gpu_halt(adreno_dev); - adreno_hwsched_trigger(adreno_dev); + adreno_scheduler_queue(adreno_dev); wake_up_all(&hfi->hw_fence.unack_wq); } } @@ -1106,7 +1105,7 @@ static void process_hw_fence_ack(struct adreno_device *adreno_dev, u32 received_ */ if (drawctxt) { kthread_init_work(&hfi->defer_hw_fence_work, gen8_defer_hw_fence_work); - kthread_queue_work(adreno_dev->hwsched.worker, &hfi->defer_hw_fence_work); + kthread_queue_work(adreno_dev->scheduler_worker, &hfi->defer_hw_fence_work); return; } @@ -1157,7 +1156,7 @@ void gen8_hwsched_process_msgq(struct adreno_device *adreno_dev) break; case F2H_MSG_TS_RETIRE: log_profiling_info(adreno_dev, rcvd); - adreno_hwsched_trigger(adreno_dev); + adreno_scheduler_queue(adreno_dev); break; case F2H_MSG_SYNCOBJ_QUERY: gen8_trigger_syncobj_query(adreno_dev, rcvd); @@ -1252,7 +1251,7 @@ static irqreturn_t gen8_hwsched_hfi_handler(int irq, void *data) if (status & (HFI_IRQ_MSGQ_MASK | HFI_IRQ_DBGQ_MASK)) { wake_up_interruptible(&hfi->f2h_wq); - adreno_hwsched_trigger(adreno_dev); + adreno_scheduler_queue(adreno_dev); } if (status & HFI_IRQ_CM3_FAULT_MASK) { atomic_set(&gmu->cm3_fault, 1); @@ -3545,7 +3544,7 @@ int gen8_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, int ret; static bool active; - if (adreno_gpu_halt(adreno_dev) || adreno_hwsched_gpu_fault(adreno_dev)) + if (adreno_gpu_halt(adreno_dev) || adreno_gpu_fault(adreno_dev)) return -EBUSY; if (test_bit(CMDOBJ_RECURRING_STOP, &cmdobj->priv)) { diff --git a/adreno_gen8_preempt.c b/adreno_gen8_preempt.c index b675a5588a..d6e07b767c 100644 --- a/adreno_gen8_preempt.c +++ b/adreno_gen8_preempt.c @@ -59,7 +59,7 @@ static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer, /* If WPTR update fails, set the fault and trigger recovery */ if (ret) { gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); - adreno_dispatcher_fault(adreno_dev, + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } } @@ -99,7 +99,7 @@ static void _gen8_preemption_done(struct adreno_device *adreno_dev) adreno_dev->next_rb->wptr); /* Set a fault and restart */ - adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_PREEMPT_FAULT); return; } @@ -145,7 +145,7 @@ static void _gen8_preemption_fault(struct adreno_device *adreno_dev) adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); - adreno_dispatcher_schedule(device); + adreno_scheduler_queue(adreno_dev); return; } } @@ -159,7 +159,7 @@ static void _gen8_preemption_fault(struct adreno_device *adreno_dev) adreno_get_rptr(adreno_dev->next_rb), adreno_dev->next_rb->wptr); - adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_PREEMPT_FAULT); } static void _gen8_preemption_worker(struct work_struct *work) @@ -379,10 +379,10 @@ err: /* If fenced write fails, take inline snapshot and trigger recovery */ if (!in_interrupt()) { gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); - adreno_dispatcher_fault(adreno_dev, + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } else { - adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); } adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); /* Clear the keep alive */ @@ -413,7 +413,7 @@ void gen8_preemption_callback(struct adreno_device *adreno_dev, int bit) * there then we have to assume something bad happened */ adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); - adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev)); + adreno_scheduler_queue(adreno_dev); return; } diff --git a/adreno_gen8_ringbuffer.c b/adreno_gen8_ringbuffer.c index a959e89450..87c5592fe6 100644 --- a/adreno_gen8_ringbuffer.c +++ b/adreno_gen8_ringbuffer.c @@ -197,7 +197,7 @@ int gen8_ringbuffer_submit(struct adreno_ringbuffer *rb, * recovery. */ gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); - adreno_dispatcher_fault(adreno_dev, + adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 1b5528ad08..b0c3e4e0e0 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -376,13 +376,6 @@ static int hwsched_queue_context(struct adreno_device *adreno_dev, return 0; } -void adreno_hwsched_flush(struct adreno_device *adreno_dev) -{ - struct adreno_hwsched *hwsched = &adreno_dev->hwsched; - - kthread_flush_worker(hwsched->worker); -} - /** * is_marker_skip() - Check if the draw object is a MARKEROBJ_TYPE and CMDOBJ_SKIP bit is set */ @@ -403,12 +396,10 @@ static bool is_marker_skip(struct kgsl_drawobj *drawobj) static bool _abort_submission(struct adreno_device *adreno_dev) { - struct adreno_hwsched *hwsched = &adreno_dev->hwsched; - /* We only need a single barrier before reading all the atomic variables below */ smp_rmb(); - if (atomic_read(&adreno_dev->halt) || atomic_read(&hwsched->fault)) + if (atomic_read(&adreno_dev->halt) || atomic_read(&adreno_dev->scheduler_fault)) return true; return false; @@ -693,13 +684,6 @@ static void hwsched_issuecmds(struct adreno_device *adreno_dev) hwsched_handle_jobs(adreno_dev, i); } -void adreno_hwsched_trigger(struct adreno_device *adreno_dev) -{ - struct adreno_hwsched *hwsched = &adreno_dev->hwsched; - - kthread_queue_work(hwsched->worker, &hwsched->work); -} - static inline void _decrement_submit_now(struct kgsl_device *device) { spin_lock(&device->submit_lock); @@ -707,13 +691,6 @@ static inline void _decrement_submit_now(struct kgsl_device *device) spin_unlock(&device->submit_lock); } -u32 adreno_hwsched_gpu_fault(struct adreno_device *adreno_dev) -{ - /* make sure we're reading the latest value */ - smp_rmb(); - return atomic_read(&adreno_dev->hwsched.fault); -} - /** * adreno_hwsched_issuecmds() - Issue commmands from pending contexts * @adreno_dev: Pointer to the adreno device struct @@ -740,7 +717,7 @@ static void adreno_hwsched_issuecmds(struct adreno_device *adreno_dev) goto done; } - if (!adreno_hwsched_gpu_fault(adreno_dev)) + if (!adreno_gpu_fault(adreno_dev)) hwsched_issuecmds(adreno_dev); if (hwsched->inflight > 0) { @@ -755,7 +732,7 @@ static void adreno_hwsched_issuecmds(struct adreno_device *adreno_dev) return; done: - adreno_hwsched_trigger(adreno_dev); + adreno_scheduler_queue(adreno_dev); } /** @@ -1235,7 +1212,7 @@ static void adreno_hwsched_queue_context(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { hwsched_queue_context(adreno_dev, drawctxt); - adreno_hwsched_trigger(adreno_dev); + adreno_scheduler_queue(adreno_dev); } void adreno_hwsched_start(struct adreno_device *adreno_dev) @@ -1244,7 +1221,7 @@ void adreno_hwsched_start(struct adreno_device *adreno_dev) complete_all(&device->halt_gate); - adreno_hwsched_trigger(adreno_dev); + adreno_scheduler_queue(adreno_dev); } static void change_preemption(struct adreno_device *adreno_dev, void *priv) @@ -1328,8 +1305,8 @@ static void adreno_hwsched_dispatcher_close(struct adreno_device *adreno_dev) struct adreno_hwsched *hwsched = &adreno_dev->hwsched; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - if (!IS_ERR_OR_NULL(hwsched->worker)) - kthread_destroy_worker(hwsched->worker); + if (!IS_ERR_OR_NULL(adreno_dev->scheduler_worker)) + kthread_destroy_worker(adreno_dev->scheduler_worker); adreno_set_dispatch_ops(adreno_dev, NULL); @@ -1885,10 +1862,29 @@ static bool adreno_hwsched_do_fault(struct adreno_device *adreno_dev) struct adreno_hwsched *hwsched = &adreno_dev->hwsched; int fault; - fault = atomic_xchg(&hwsched->fault, 0); + fault = adreno_gpu_fault(adreno_dev); if (fault == 0) return false; + /* + * Return early if there is a concurrent suspend in progress. The suspend thread will error + * out in the presence of this hwsched fault and requeue the dispatcher to handle this fault + */ + if (!mutex_trylock(&adreno_dev->fault_recovery_mutex)) + return true; + + /* + * Wait long enough to allow the system to come out of suspend completely, which can take + * variable amount of time especially if it has to rewind suspend processes and devices. + */ + if (!wait_for_completion_timeout(&adreno_dev->suspend_recovery_gate, + msecs_to_jiffies(ADRENO_SUSPEND_RECOVERY_GATE_TIMEOUT_MS))) { + dev_err(device->dev, "suspend recovery gate timeout\n"); + adreno_scheduler_queue(adreno_dev); + mutex_unlock(&adreno_dev->fault_recovery_mutex); + return true; + } + mutex_lock(&device->mutex); if (test_bit(ADRENO_HWSCHED_CTX_BAD_LEGACY, &hwsched->flags)) @@ -1896,19 +1892,19 @@ static bool adreno_hwsched_do_fault(struct adreno_device *adreno_dev) else adreno_hwsched_reset_and_snapshot(adreno_dev, fault); - adreno_hwsched_trigger(adreno_dev); + adreno_scheduler_queue(adreno_dev); mutex_unlock(&device->mutex); + mutex_unlock(&adreno_dev->fault_recovery_mutex); return true; } static void adreno_hwsched_work(struct kthread_work *work) { - struct adreno_hwsched *hwsched = container_of(work, - struct adreno_hwsched, work); - struct adreno_device *adreno_dev = container_of(hwsched, - struct adreno_device, hwsched); + struct adreno_device *adreno_dev = container_of(work, + struct adreno_device, scheduler_work); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); mutex_lock(&hwsched->mutex); @@ -1942,28 +1938,6 @@ static void adreno_hwsched_work(struct kthread_work *work) mutex_unlock(&hwsched->mutex); } -void adreno_hwsched_fault(struct adreno_device *adreno_dev, - u32 fault) -{ - struct adreno_hwsched *hwsched = &adreno_dev->hwsched; - u32 curr = atomic_read(&hwsched->fault); - - atomic_set(&hwsched->fault, curr | fault); - - /* make sure fault is written before triggering dispatcher */ - smp_wmb(); - - adreno_hwsched_trigger(adreno_dev); -} - -void adreno_hwsched_clear_fault(struct adreno_device *adreno_dev) -{ - atomic_set(&adreno_dev->hwsched.fault, 0); - - /* make sure other CPUs see the update */ - smp_wmb(); -} - static void adreno_hwsched_create_hw_fence(struct adreno_device *adreno_dev, struct kgsl_sync_fence *kfence) { @@ -1990,9 +1964,7 @@ static const struct adreno_dispatch_ops hwsched_ops = { .close = adreno_hwsched_dispatcher_close, .queue_cmds = adreno_hwsched_queue_cmds, .queue_context = adreno_hwsched_queue_context, - .fault = adreno_hwsched_fault, .create_hw_fence = adreno_hwsched_create_hw_fence, - .get_fault = adreno_hwsched_gpu_fault, }; static void hwsched_lsr_check(struct work_struct *work) @@ -2032,15 +2004,15 @@ int adreno_hwsched_init(struct adreno_device *adreno_dev, if (!hwsched->ctxt_bad) return -ENOMEM; - hwsched->worker = kthread_create_worker(0, "kgsl_hwsched"); - if (IS_ERR(hwsched->worker)) { + adreno_dev->scheduler_worker = kthread_create_worker(0, "kgsl_hwsched"); + if (IS_ERR(adreno_dev->scheduler_worker)) { kfree(hwsched->ctxt_bad); - return PTR_ERR(hwsched->worker); + return PTR_ERR(adreno_dev->scheduler_worker); } mutex_init(&hwsched->mutex); - kthread_init_work(&hwsched->work, adreno_hwsched_work); + kthread_init_work(&adreno_dev->scheduler_work, adreno_hwsched_work); jobs_cache = KMEM_CACHE(adreno_dispatch_job, 0); obj_cache = KMEM_CACHE(cmd_list_obj, 0); @@ -2052,7 +2024,7 @@ int adreno_hwsched_init(struct adreno_device *adreno_dev, init_llist_head(&hwsched->requeue[i]); } - sched_set_fifo(hwsched->worker->task); + sched_set_fifo(adreno_dev->scheduler_worker->task); WARN_ON(sysfs_create_files(&device->dev->kobj, _hwsched_attr_list)); adreno_set_dispatch_ops(adreno_dev, &hwsched_ops); @@ -2182,17 +2154,25 @@ static int hwsched_idle(struct adreno_device *adreno_dev) * or pending dispatcher works on worker are * finished */ - adreno_hwsched_flush(adreno_dev); + kthread_flush_worker(adreno_dev->scheduler_worker); - ret = wait_for_completion_timeout(&hwsched->idle_gate, - msecs_to_jiffies(ADRENO_IDLE_TIMEOUT)); - if (ret == 0) { - ret = -ETIMEDOUT; - WARN(1, "hwsched halt timeout\n"); - } else if (ret < 0) { - dev_err(device->dev, "hwsched halt failed %d\n", ret); + if (adreno_gpu_fault(adreno_dev)) { + ret = -EDEADLK; } else { - ret = 0; + + ret = wait_for_completion_timeout(&hwsched->idle_gate, + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT)); + if (ret == 0) { + ret = -ETIMEDOUT; + WARN(1, "hwsched halt timeout\n"); + } else if (ret < 0) { + dev_err(device->dev, "hwsched halt failed %d\n", ret); + } else { + ret = 0; + } + + if (adreno_gpu_fault(adreno_dev)) + ret = -EDEADLK; } mutex_lock(&device->mutex); @@ -2203,11 +2183,6 @@ static int hwsched_idle(struct adreno_device *adreno_dev) */ adreno_put_gpu_halt(adreno_dev); - /* - * Requeue dispatcher work to resubmit pending commands - * that may have been blocked due to this idling request - */ - adreno_hwsched_trigger(adreno_dev); return ret; } @@ -2229,7 +2204,7 @@ int adreno_hwsched_idle(struct adreno_device *adreno_dev) return ret; do { - if (adreno_hwsched_gpu_fault(adreno_dev)) + if (adreno_gpu_fault(adreno_dev)) return -EDEADLK; if (gpudev->hw_isidle(adreno_dev)) @@ -2241,7 +2216,7 @@ int adreno_hwsched_idle(struct adreno_device *adreno_dev) * without checking if the gpu is idle. check one last time before we * return failure. */ - if (adreno_hwsched_gpu_fault(adreno_dev)) + if (adreno_gpu_fault(adreno_dev)) return -EDEADLK; if (gpudev->hw_isidle(adreno_dev)) diff --git a/adreno_hwsched.h b/adreno_hwsched.h index d78bc871da..59440ddec6 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -72,13 +72,8 @@ struct adreno_hwsched { struct llist_head jobs[16]; /** @requeue - Array of lists for dispatch jobs that got requeued */ struct llist_head requeue[16]; - /** @work: The work structure to execute dispatcher function */ - struct kthread_work work; /** @cmd_list: List of objects submitted to dispatch queues */ struct list_head cmd_list; - /** @fault: Atomic to record a fault */ - atomic_t fault; - struct kthread_worker *worker; /** @hwsched_ops: Container for target specific hwscheduler ops */ const struct adreno_hwsched_ops *hwsched_ops; /** @ctxt_bad: Container for the context bad hfi packet */ @@ -124,14 +119,6 @@ enum adreno_hwsched_flags { ADRENO_HWSCHED_HW_FENCE, }; -/** - * adreno_hwsched_trigger - Function to schedule the hwsched thread - * @adreno_dev: A handle to adreno device - * - * Schedule the hw dispatcher for retiring and submitting command objects - */ -void adreno_hwsched_trigger(struct adreno_device *adreno_dev); - /** * adreno_hwsched_start() - activate the hwsched dispatcher * @adreno_dev: pointer to the adreno device @@ -150,21 +137,6 @@ void adreno_hwsched_start(struct adreno_device *adreno_dev); int adreno_hwsched_init(struct adreno_device *adreno_dev, const struct adreno_hwsched_ops *hwsched_ops); -/** - * adreno_hwsched_fault - Set hwsched fault to request recovery - * @adreno_dev: A handle to adreno device - * @fault: The type of fault - */ -void adreno_hwsched_fault(struct adreno_device *adreno_dev, u32 fault); - -/** - * adreno_hwsched_clear_fault() - Clear the hwsched fault - * @adreno_dev: A pointer to an adreno_device structure - * - * Clear the hwsched fault status for adreno device - */ -void adreno_hwsched_clear_fault(struct adreno_device *adreno_dev); - /** * adreno_hwsched_parse_fault_ib - Parse the faulty submission * @adreno_dev: pointer to the adreno device @@ -176,8 +148,6 @@ void adreno_hwsched_clear_fault(struct adreno_device *adreno_dev); void adreno_hwsched_parse_fault_cmdobj(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot); -void adreno_hwsched_flush(struct adreno_device *adreno_dev); - /** * adreno_hwsched_unregister_contexts - Reset context gmu_registered bit * @adreno_dev: pointer to the adreno device diff --git a/kgsl_iommu.c b/kgsl_iommu.c index f7d96fdc45..44f09dca4a 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -1250,9 +1250,7 @@ static int kgsl_iommu_fault_handler(struct kgsl_mmu *mmu, ctx->stalled_on_fault = true; /* Go ahead with recovery*/ - if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->fault) - adreno_dev->dispatch_ops->fault(adreno_dev, - ADRENO_IOMMU_PAGE_FAULT); + adreno_scheduler_fault(adreno_dev, ADRENO_IOMMU_PAGE_FAULT); } kgsl_context_put(context); From 03a6ece1b20cb782eba1af94914645651a0286e5 Mon Sep 17 00:00:00 2001 From: Raviteja Narayanam Date: Mon, 22 Jul 2024 11:42:41 +0530 Subject: [PATCH 0863/1016] kgsl: gen8: Fix UCHE_CLIENT_PF nc_register override Currently, the value set for GEN8_UCHE_CLIENT_PF register from debugfs via nonctxt_override is not reflected as there is a subsequent write to the same register in gen8_start() function. So, move the default value regwrite before nc_override. Change-Id: If9cdbac748b03c5e783ae0f5c50eed682fd65b67 Signed-off-by: Raviteja Narayanam --- adreno_gen8.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 442ec114a3..a79bfe6c2f 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1443,6 +1443,9 @@ int gen8_start(struct adreno_device *adreno_dev) kgsl_regmap_multi_write(&device->regmap, gen8_0_0_bicubic_regs, ARRAY_SIZE(gen8_0_0_bicubic_regs)); + kgsl_regwrite(device, GEN8_UCHE_CLIENT_PF, BIT(7) | + FIELD_PREP(GENMASK(6, 0), adreno_dev->uche_client_pf)); + /* Program noncontext registers */ gen8_nonctxt_regconfig(adreno_dev); @@ -1451,9 +1454,6 @@ int gen8_start(struct adreno_device *adreno_dev) FIELD_PREP(GENMASK(27, 0), gen8_core->hang_detect_cycles)); kgsl_regwrite(device, GEN8_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30)); - kgsl_regwrite(device, GEN8_UCHE_CLIENT_PF, BIT(7) | - FIELD_PREP(GENMASK(6, 0), adreno_dev->uche_client_pf)); - /* Enable the GMEM save/restore feature for preemption */ if (adreno_is_preemption_enabled(adreno_dev)) { gen8_regwrite_aperture(device, From 6c8ce59852486cc44bb35750e9b695348f505ae3 Mon Sep 17 00:00:00 2001 From: Rakesh Naidu Bhaviripudi Date: Fri, 23 Feb 2024 08:48:39 +0530 Subject: [PATCH 0864/1016] kgsl: Add support for A611 GPU Add new entry in the gpulist to support A611 GPU. This squashes following commits commit c7713baf4c58 ("kgsl: Add support for A611 GPU with updated sequence") commit 69a55ba20856 ("kgsl: build: Enable shmem memory and process reclaim for pitti"). Change-Id: I82a94a896f7cd5ef9ac2cddc31a54dd595e04087 Signed-off-by: Harshitha Sai Neelati Signed-off-by: Rakesh Naidu Bhaviripudi --- adreno-gpulist.h | 26 ++++++++++++++++++++++++++ adreno.h | 10 ++++++++++ adreno_a6xx.c | 12 ++++++------ config/gki_pitti.conf | 12 +++++++++++- config/pitti_consolidate_gpuconf | 4 +++- 5 files changed, 56 insertions(+), 8 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 28403c1733..9ba15743ad 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1510,6 +1510,31 @@ static const struct adreno_a6xx_core adreno_gpu_core_a610 = { .highest_bank_bit = 14, }; +static const struct adreno_a6xx_core adreno_gpu_core_a611 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A611, 6, 1, 1, ANY_ID), + .compatible = "qcom,adreno-gpu-a611", + .features = ADRENO_CONTENT_PROTECTION | + ADRENO_PREEMPTION, + .gpudev = &adreno_a6xx_gpudev, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .uche_gmem_alignment = SZ_1M, + .gmem_size = (SZ_128K + SZ_4K), + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .prim_fifo_threshold = 0x00080000, + .sqefw_name = "a630_sqe.fw", + .zap_name = "a610_zap.mbn", + .hwcg = a612_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a612_hwcg_regs), + .vbif = a640_vbif_regs, + .vbif_count = ARRAY_SIZE(a640_vbif_regs), + .hang_detect_cycles = 0x3ffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 14, +}; + static const struct kgsl_regmap_list a660_hwcg_regs[] = { {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, @@ -3157,6 +3182,7 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_a612.base, &adreno_gpu_core_a616.base, &adreno_gpu_core_a610.base, + &adreno_gpu_core_a611.base, &adreno_gpu_core_a660_shima.base, &adreno_gpu_core_a702.base, &adreno_gpu_core_gen7_0_0.base, diff --git a/adreno.h b/adreno.h index 537e53b304..2e77efa431 100644 --- a/adreno.h +++ b/adreno.h @@ -214,6 +214,7 @@ enum adreno_gpurev { ADRENO_REV_A530 = 530, ADRENO_REV_A540 = 540, ADRENO_REV_A610 = 610, + ADRENO_REV_A611 = 611, ADRENO_REV_A612 = 612, ADRENO_REV_A615 = 615, ADRENO_REV_A616 = 616, @@ -1066,6 +1067,7 @@ extern const struct adreno_gpudev adreno_a5xx_gpudev; extern const struct adreno_gpudev adreno_a6xx_gpudev; extern const struct adreno_gpudev adreno_a6xx_rgmu_gpudev; extern const struct adreno_gpudev adreno_a619_holi_gpudev; +extern const struct adreno_gpudev adreno_a611_gpudev; extern int adreno_wake_nice; extern unsigned int adreno_wake_timeout; @@ -1189,6 +1191,7 @@ static inline int adreno_is_a660_shima(struct adreno_device *adreno_dev) } ADRENO_TARGET(a610, ADRENO_REV_A610) +ADRENO_TARGET(a611, ADRENO_REV_A611) ADRENO_TARGET(a612, ADRENO_REV_A612) ADRENO_TARGET(a618, ADRENO_REV_A618) ADRENO_TARGET(a619, ADRENO_REV_A619) @@ -1264,6 +1267,13 @@ static inline int adreno_is_a620(struct adreno_device *adreno_dev) return (rev == ADRENO_REV_A620 || rev == ADRENO_REV_A621); } +static inline int adreno_is_a610_family(struct adreno_device *adreno_dev) +{ + u32 rev = ADRENO_GPUREV(adreno_dev); + + return (rev == ADRENO_REV_A610 || rev == ADRENO_REV_A611); +} + static inline int adreno_is_a640v2(struct adreno_device *adreno_dev) { return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A640) && diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 8996aca245..3e5b8269ba 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -275,7 +275,7 @@ __get_rbbm_clock_cntl_on(struct adreno_device *adreno_dev) { if (adreno_is_a630(adreno_dev)) return 0x8AA8AA02; - else if (adreno_is_a612(adreno_dev) || adreno_is_a610(adreno_dev)) + else if (adreno_is_a612(adreno_dev) || adreno_is_a610_family(adreno_dev)) return 0xAAA8AA82; else if (adreno_is_a702(adreno_dev)) return 0xAAAAAA82; @@ -378,7 +378,7 @@ static void a6xx_hwcg_set(struct adreno_device *adreno_dev, bool on) */ if (gmu_core_isenabled(device) && !adreno_is_a612(adreno_dev) && - !adreno_is_a610(adreno_dev) && !adreno_is_a702(adreno_dev)) + !adreno_is_a610_family(adreno_dev) && !adreno_is_a702(adreno_dev)) gmu_core_regrmw(device, A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); else if (adreno_is_a619_holi(adreno_dev)) @@ -398,7 +398,7 @@ static void a6xx_hwcg_set(struct adreno_device *adreno_dev, bool on) * Hence skip GMU_GX registers for A612. */ if (gmu_core_isenabled(device) && !adreno_is_a612(adreno_dev) && - !adreno_is_a610(adreno_dev) && !adreno_is_a702(adreno_dev)) + !adreno_is_a610_family(adreno_dev) && !adreno_is_a702(adreno_dev)) gmu_core_regrmw(device, A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1); else if (adreno_is_a619_holi(adreno_dev)) @@ -624,8 +624,8 @@ void a6xx_start(struct adreno_device *adreno_dev) !adreno_is_a702(adreno_dev)) { kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140); kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362C); - } else if (adreno_is_a612(adreno_dev) || adreno_is_a610(adreno_dev) || - adreno_is_a702(adreno_dev)) { + } else if (adreno_is_a612(adreno_dev) || adreno_is_a610_family(adreno_dev) || + adreno_is_a702(adreno_dev)) { kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060); kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16); } else { @@ -642,7 +642,7 @@ void a6xx_start(struct adreno_device *adreno_dev) kgsl_regwrite(device, A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020); } - if (adreno_is_a612(adreno_dev) || adreno_is_a610(adreno_dev)) { + if (adreno_is_a612(adreno_dev) || adreno_is_a610_family(adreno_dev)) { /* For A612 and A610 Mem pool size is reduced to 48 */ kgsl_regwrite(device, A6XX_CP_MEM_POOL_SIZE, 48); kgsl_regwrite(device, A6XX_CP_MEM_POOL_DBG_ADDR, 47); diff --git a/config/gki_pitti.conf b/config/gki_pitti.conf index a414481466..6febff2590 100644 --- a/config/gki_pitti.conf +++ b/config/gki_pitti.conf @@ -1,11 +1,16 @@ # SPDX-License-Identifier: GPL-2.0-only -# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. +# Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. CONFIG_QCOM_KGSL_SORT_POOL = y CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" +ifneq ($(CONFIG_SHMEM),) + CONFIG_QCOM_KGSL_USE_SHMEM = y + CONFIG_QCOM_KGSL_PROCESS_RECLAIM = y +endif + ifneq ($(CONFIG_CORESIGHT),) CONFIG_QCOM_KGSL_CORESIGHT = y endif @@ -18,3 +23,8 @@ ccflags-y += -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ ifneq ($(CONFIG_CORESIGHT),) ccflags-y += -DCONFIG_QCOM_KGSL_CORESIGHT=1 endif + +ifneq ($(CONFIG_SHMEM),) + ccflags-y += -DCONFIG_QCOM_KGSL_USE_SHMEM=1 \ + -DCONFIG_QCOM_KGSL_PROCESS_RECLAIM=1 +endif diff --git a/config/pitti_consolidate_gpuconf b/config/pitti_consolidate_gpuconf index 953bb31391..8547359329 100644 --- a/config/pitti_consolidate_gpuconf +++ b/config/pitti_consolidate_gpuconf @@ -1,8 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only -# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. +# Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. CONFIG_QCOM_KGSL=m CONFIG_QCOM_KGSL_SORT_POOL=y CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" +CONFIG_QCOM_KGSL_USE_SHMEM=y +CONFIG_QCOM_KGSL_PROCESS_RECLAIM=y From 8484d7c0200d42c063da1da8e0a215d3240324f9 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Wed, 3 Jul 2024 17:08:20 -0600 Subject: [PATCH 0865/1016] kgsl: hwfence: Vote for soccp before calling synx_signal It is possible that synx driver may send interrupts to soccp when a client calls synx_signal(). Make sure soccp is voted on to be able to handle these interrupts. Change-Id: Ia6c1c06c32782f41aedb0cece7a649d4c790b27f Signed-off-by: Harshdeep Dhatt --- adreno_gen8_hwsched.c | 2 +- adreno_gen8_hwsched.h | 9 +++++- adreno_gen8_hwsched_hfi.c | 61 +++++++++++++++++++++++++++++++-------- kgsl_sync.c | 5 ++++ 4 files changed, 63 insertions(+), 14 deletions(-) diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 72976fa719..c452288cdb 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -218,7 +218,7 @@ static void gen8_hwsched_soccp_vote_init(struct adreno_device *adreno_dev) clear_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags); } -static void gen8_hwsched_soccp_vote(struct adreno_device *adreno_dev, bool pwr_on) +void gen8_hwsched_soccp_vote(struct adreno_device *adreno_dev, bool pwr_on) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); diff --git a/adreno_gen8_hwsched.h b/adreno_gen8_hwsched.h index 0d166f8114..88befbdaa8 100644 --- a/adreno_gen8_hwsched.h +++ b/adreno_gen8_hwsched.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ADRENO_GEN8_HWSCHED_H_ @@ -103,4 +103,11 @@ int gen8_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, */ void gen8_hwsched_fault(struct adreno_device *adreno_dev, u32 fault); +/** + * gen8_hwsched_soccp_vote - Vote for soccp power + * @adreno_dev: A handle to adreno device + * @pwr_on: Boolean to turn soccp on/off + */ +void gen8_hwsched_soccp_vote(struct adreno_device *adreno_dev, bool pwr_on); + #endif diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 701af8c8d2..fc016a0353 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -3031,6 +3031,31 @@ static void move_detached_context_hardware_fences(struct adreno_device *adreno_d } } +static int drain_context_hw_fence_gmu(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct adreno_hw_fence_entry *entry, *tmp; + int ret = 0; + + list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { + + ret = gen8_send_hw_fence_hfi_wait_ack(adreno_dev, entry, + HW_FENCE_FLAG_SKIP_MEMSTORE); + if (ret) + break; + + adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); + } + + if (ret) { + move_detached_context_hardware_fences(adreno_dev, drawctxt); + gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev), GMU_FAULT_HW_FENCE); + gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); + } + + return ret; +} + /** * check_detached_context_hardware_fences - When this context has been un-registered with the GMU, * make sure all the hardware fences(that were sent to GMU) for this context have been sent to @@ -3061,17 +3086,7 @@ static int check_detached_context_hardware_fences(struct adreno_device *adreno_d } /* Send hardware fences (to TxQueue) that were not dispatched to GMU */ - list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { - - ret = gen8_send_hw_fence_hfi_wait_ack(adreno_dev, entry, - HW_FENCE_FLAG_SKIP_MEMSTORE); - if (ret) - goto fault; - - adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); - } - - return 0; + return drain_context_hw_fence_gmu(adreno_dev, drawctxt); fault: move_detached_context_hardware_fences(adreno_dev, drawctxt); @@ -3629,12 +3644,34 @@ static void drain_context_hw_fence_cpu(struct adreno_device *adreno_dev, { struct adreno_hw_fence_entry *entry, *tmp; + /* + * Triggering these fences from HLOS may send interrupts to soccp. Hence, vote for soccp + * here + */ + gen8_hwsched_soccp_vote(adreno_dev, true); + list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) { kgsl_hw_fence_trigger_cpu(KGSL_DEVICE(adreno_dev), entry->kfence); adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } + + gen8_hwsched_soccp_vote(adreno_dev, false); +} + +static void drain_context_hw_fences(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + + if (list_empty(&drawctxt->hw_fence_list)) + return; + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + drain_context_hw_fence_gmu(adreno_dev, drawctxt); + else + drain_context_hw_fence_cpu(adreno_dev, drawctxt); } static void trigger_context_unregister_fault(struct adreno_device *adreno_dev, @@ -3661,7 +3698,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, /* Only send HFI if device is not in SLUMBER */ if (!context->gmu_registered || !test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) { - drain_context_hw_fence_cpu(adreno_dev, drawctxt); + drain_context_hw_fences(adreno_dev, drawctxt); return 0; } diff --git a/kgsl_sync.c b/kgsl_sync.c index aeaa847745..e24cad95de 100644 --- a/kgsl_sync.c +++ b/kgsl_sync.c @@ -10,6 +10,7 @@ #include #include "kgsl_device.h" +#include "kgsl_gmu_core.h" #include "kgsl_sync.h" static const struct dma_fence_ops kgsl_sync_fence_ops; @@ -143,6 +144,10 @@ void kgsl_hw_fence_destroy(struct kgsl_sync_fence *kfence) void kgsl_hw_fence_trigger_cpu(struct kgsl_device *device, struct kgsl_sync_fence *kfence) { + /* soccp should be powered on */ + WARN_RATELIMIT(!test_bit(GMU_PRIV_SOCCP_VOTE_ON, &device->gmu_core.flags), + "signaling hw fence via cpu without soccp powered up\n"); + synx_signal(kgsl_synx.handle, (u32)kfence->hw_fence_index, SYNX_STATE_SIGNALED_SUCCESS); } From ee9b6f3e0fb6233ee5269179fb6c7cb4fc32fd13 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Wed, 17 Jul 2024 12:58:29 -0700 Subject: [PATCH 0866/1016] kgsl: gen8: Fix the warmboot raw command error handling If the raw commands fails in the warmboot path current flow was not checking for the error in the ack received. Make sure the error code is checked along with the ack so that we do not miss a timeout error from GMU. Change-Id: I4fcb7620c3197ec4d0e9fb9c68feb540ca869e52 Signed-off-by: Urvashi Agrawal --- adreno_gen8_hwsched_hfi.c | 54 ++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 701af8c8d2..1cf909b06c 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -2044,6 +2044,13 @@ err: return ret; } +static void print_warmboot_gpu_error(struct device *dev, struct pending_cmd *ret_cmd) +{ + dev_err(dev, + "HFI ACK: Req=0x%8.8X, Result=0x%8.8X Error:0x%8.8X\n", + ret_cmd->results[1], ret_cmd->results[2], ret_cmd->results[3]); +} + static int gen8_hwsched_warmboot_gpu(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); @@ -2051,25 +2058,42 @@ static int gen8_hwsched_warmboot_gpu(struct adreno_device *adreno_dev) int ret = 0; ret = gen8_hwsched_hfi_warmboot_gpu_cmd(adreno_dev, &ret_cmd); - if (!ret) - return ret; - - if (MSG_HDR_GET_TYPE(ret_cmd.results[1]) != H2F_MSG_WARMBOOT_CMD) + if (ret) goto err; - switch (MSG_HDR_GET_TYPE(ret_cmd.results[2])) { - case H2F_MSG_ISSUE_CMD_RAW: { - if (ret_cmd.results[2] == gmu->cp_init_hdr) - gen8_spin_idle_debug(adreno_dev, - "CP initialization failed to idle\n"); - else if (ret_cmd.results[2] == gmu->switch_to_unsec_hdr) - gen8_spin_idle_debug(adreno_dev, - "Switch to unsecure failed to idle\n"); + /* Check if the ack belongs to the warmboot command */ + if (MSG_HDR_GET_ID(ret_cmd.results[0]) != H2F_MSG_WARMBOOT_CMD) { + ret = -EINVAL; + goto err; + } + + switch (ret_cmd.results[3]) { + /* If ack has no error code then GPU executed raw commands just fine */ + case GMU_SUCCESS: + return ret_cmd.results[2]; + /* If GPU timedout processing raw commands, check which raw command failed */ + case GMU_ERROR_TIMEOUT: { + if (MSG_HDR_GET_ID(ret_cmd.results[1]) == H2F_MSG_ISSUE_CMD_RAW) { + + /* Based on sequence number we can differentiate which command failed */ + if (ret_cmd.results[1] == gmu->cp_init_hdr) + gen8_spin_idle_debug(adreno_dev, + "CP initialization failed to idle\n"); + else if (ret_cmd.results[1] == gmu->switch_to_unsec_hdr) + gen8_spin_idle_debug(adreno_dev, + "Switch to unsecure failed to idle\n"); + } else if (MSG_HDR_GET_ID(ret_cmd.results[1]) == H2F_MSG_ISSUE_LPAC_CMD_RAW) { + gen8_spin_idle_debug_lpac(adreno_dev, + "LPAC CP initialization failed to idle\n"); + } else { + print_warmboot_gpu_error(&gmu->pdev->dev, &ret_cmd); } + ret = -EINVAL; break; - case H2F_MSG_ISSUE_LPAC_CMD_RAW: - gen8_spin_idle_debug_lpac(adreno_dev, - "LPAC CP initialization failed to idle\n"); + } + default: + print_warmboot_gpu_error(&gmu->pdev->dev, &ret_cmd); + ret = -EINVAL; break; } err: From 3c2221066621688465efa9007ade1de83d521148 Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Tue, 16 Jul 2024 08:37:30 -0600 Subject: [PATCH 0867/1016] kgsl: snapshot: Initialize host address pointer to NULL Initialize a local input variable to NULL in case hwsched_ops->get_rb_hostptr is not defined. Change-Id: Id61776074f74e1dd3f1a25235c8eabe3958a0958 Signed-off-by: Carter Cooper --- adreno_hwsched_snapshot.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/adreno_hwsched_snapshot.c b/adreno_hwsched_snapshot.c index 7cc369ea89..9990af43e1 100644 --- a/adreno_hwsched_snapshot.c +++ b/adreno_hwsched_snapshot.c @@ -56,7 +56,7 @@ static void adreno_hwsched_snapshot_rb_payload(struct adreno_device *adreno_dev, u32 size = adreno_hwsched_parse_payload(payload, KEY_RB_SIZEDWORDS) << 2; const struct adreno_hwsched_ops *hwsched_ops = adreno_dev->hwsched.hwsched_ops; u64 lo, hi, gpuaddr; - void *rb_hostptr; + void *rb_hostptr = NULL; char str[16]; lo = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_LO); @@ -70,6 +70,9 @@ static void adreno_hwsched_snapshot_rb_payload(struct adreno_device *adreno_dev, if (hwsched_ops->get_rb_hostptr) rb_hostptr = hwsched_ops->get_rb_hostptr(adreno_dev, gpuaddr, size); + if (rb_hostptr == NULL) + goto err; + /* If the gpuaddress and size don't match any allocation, then abort */ if (((snapshot->remain - sizeof(*section_header)) < (size + sizeof(*header))) || !copy_gpu_global(data, rb_hostptr, size)) From c24810cd5d7f33130034f9e146f229be3683e185 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 25 Jul 2024 10:52:18 +0530 Subject: [PATCH 0868/1016] kgsl: iommu: Print context type for the context on a pagefault When a pagefault occurs, print the context type information for the context that resulted in a pagefault. Change-Id: I5f462c28a790e6e0d5f02f4fad1844449d296437 Signed-off-by: Kamal Agrawal --- kgsl_iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 9ce250963e..d9ec0db889 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -1079,6 +1079,7 @@ static void kgsl_iommu_print_fault(struct kgsl_mmu *mmu, const char *fault_type = NULL; const char *comm = NULL; u32 ptname = KGSL_MMU_GLOBAL_PT; + struct adreno_context *drawctxt = context ? ADRENO_CONTEXT(context) : NULL; int id; if (private) { @@ -1112,8 +1113,9 @@ static void kgsl_iommu_print_fault(struct kgsl_mmu *mmu, return; dev_crit(device->dev, - "GPU PAGE FAULT: addr = %lX pid= %d name=%s drawctxt=%d context pid = %d\n", addr, - ptname, comm, contextid, context ? context->tid : 0); + "GPU PAGE FAULT: addr = %lX group id= %d name=%s drawctxt=%d context pid = %d ctx_type=%s\n", + addr, ptname, comm, contextid, context ? context->tid : 0, + drawctxt ? kgsl_context_type(drawctxt->type) : "ANY"); dev_crit(device->dev, "context=%s TTBR0=0x%llx (%s %s fault)\n", From 67dd27dfd6fb783f13bb40f2b59ed42f21091da6 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 26 Jul 2024 12:30:34 +0530 Subject: [PATCH 0869/1016] kgsl: pwrctrl: Honor thermal requests properly Currently, the pmqos max limit is not utilized to determine the minimum power level. Consolidate both the pm qos max limit and thermal power level to determine the correct minimum power level. Change-Id: I84b8f0c34ec1254475d992538517307fd09e631b Signed-off-by: Kamal Agrawal --- kgsl_pwrctrl.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index d832d94b18..a7ca3c2cff 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -75,16 +75,12 @@ static void _bimc_clk_prepare_enable(struct kgsl_device *device, */ static u32 _adjust_pwrlevel(struct kgsl_pwrctrl *pwr, u32 level, struct kgsl_pwr_constraint *pwrc) { - u32 thermal_pwrlevel = READ_ONCE(pwr->thermal_pwrlevel); - u32 max_pwrlevel = max_t(u32, thermal_pwrlevel, - pwr->max_pwrlevel); - u32 min_pwrlevel = pwr->min_pwrlevel; - + u32 thermal_pwrlevel = max_t(u32, READ_ONCE(pwr->thermal_pwrlevel), + READ_ONCE(pwr->pmqos_max_pwrlevel)); /* Ensure that max pwrlevel is within pmqos max limit */ - max_pwrlevel = max_t(u32, max_pwrlevel, READ_ONCE(pwr->pmqos_max_pwrlevel)); - + u32 max_pwrlevel = max_t(u32, pwr->max_pwrlevel, thermal_pwrlevel); /* Ensure that min pwrlevel is within thermal limit */ - min_pwrlevel = max_t(u32, min_pwrlevel, thermal_pwrlevel); + u32 min_pwrlevel = max_t(u32, pwr->min_pwrlevel, thermal_pwrlevel); switch (pwrc->type) { case KGSL_CONSTRAINT_PWRLEVEL: { From 4e7a4869f152fb30732448f8c435b88b7ecd89da Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Tue, 4 Jun 2024 12:06:24 -0700 Subject: [PATCH 0870/1016] kgsl: adreno: Add module params to override soc_code and speed_bin Enable override of soc_code and speed_bin properties on internal SKUs. This allows simulating device variants to test features that depend on these device properties. Change-Id: Ia019fd9a0f4a6efdf09348d70d0f64135a109e15 Signed-off-by: Lynus Vaz --- adreno.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/adreno.c b/adreno.c index 1f98add59e..0ba9b20d54 100644 --- a/adreno.c +++ b/adreno.c @@ -48,6 +48,8 @@ static struct device_node * static struct adreno_device device_3d0; static bool adreno_preemption_enable; +static u32 kgsl_gpu_sku_override = U32_MAX; +static u32 kgsl_gpu_speed_bin_override = U32_MAX; /* Nice level for the higher priority GPU start thread */ int adreno_wake_nice = -7; @@ -773,19 +775,11 @@ static int adreno_of_get_pwrlevels(struct adreno_device *adreno_dev, { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct device_node *node, *child; - int feature_code, pcode; node = of_find_node_by_name(parent, "qcom,gpu-pwrlevel-bins"); if (node == NULL) return adreno_of_get_legacy_pwrlevels(adreno_dev, parent); - feature_code = max_t(int, socinfo_get_feature_code(), SOCINFO_FC_UNKNOWN); - pcode = (feature_code >= SOCINFO_FC_Y0 && feature_code < SOCINFO_FC_INT_RESERVE) ? - max_t(int, socinfo_get_pcode(), SOCINFO_PCODE_UNKNOWN) : SOCINFO_PCODE_UNKNOWN; - - device->soc_code = FIELD_PREP(GENMASK(31, 16), pcode) | - FIELD_PREP(GENMASK(15, 0), feature_code); - for_each_child_of_node(node, child) { bool match = false; int tbl_size; @@ -936,6 +930,31 @@ static int adreno_read_speed_bin(struct platform_device *pdev) return val; } +static void adreno_read_soc_code(struct kgsl_device *device) +{ + int feature_code, pcode; + bool internal_sku; + + feature_code = max_t(int, socinfo_get_feature_code(), SOCINFO_FC_UNKNOWN); + internal_sku = (feature_code >= SOCINFO_FC_Y0) && (feature_code < SOCINFO_FC_INT_RESERVE); + + /* Pcode is significant only for internal SKUs */ + pcode = internal_sku ? max_t(int, socinfo_get_pcode(), SOCINFO_PCODE_UNKNOWN) : + SOCINFO_PCODE_UNKNOWN; + + device->soc_code = FIELD_PREP(GENMASK(31, 16), pcode) | + FIELD_PREP(GENMASK(15, 0), feature_code); + + /* Override soc_code and speed_bin for internal feature codes only */ + if (internal_sku) { + if (kgsl_gpu_sku_override != U32_MAX) + device->soc_code = kgsl_gpu_sku_override; + + if (kgsl_gpu_speed_bin_override != U32_MAX) + device->speed_bin = kgsl_gpu_speed_bin_override; + } +} + static int adreno_read_gpu_model_fuse(struct platform_device *pdev) { struct nvmem_cell *cell = nvmem_cell_get(&pdev->dev, "gpu_model"); @@ -1315,6 +1334,8 @@ int adreno_device_probe(struct platform_device *pdev, device->speed_bin = status; + adreno_read_soc_code(device); + status = adreno_of_get_power(adreno_dev, pdev); if (status) goto err; @@ -3924,6 +3945,12 @@ static void __exit kgsl_3d_exit(void) module_param_named(preempt_enable, adreno_preemption_enable, bool, 0600); MODULE_PARM_DESC(preempt_enable, "Enable GPU HW Preemption"); +module_param_named(gpu_sku_override, kgsl_gpu_sku_override, uint, 0600); +MODULE_PARM_DESC(gpu_sku_override, "Override SKU code identifier for GPU driver"); + +module_param_named(gpu_speed_bin_override, kgsl_gpu_speed_bin_override, uint, 0600); +MODULE_PARM_DESC(gpu_speed_bin_override, "Override GPU speed bin"); + module_init(kgsl_3d_init); module_exit(kgsl_3d_exit); From 1f36219ad2a5fd3d77e3f1ff1b5ddf668c07588f Mon Sep 17 00:00:00 2001 From: Rakesh Naidu Bhaviripudi Date: Tue, 14 May 2024 10:11:46 +0530 Subject: [PATCH 0871/1016] kgsl: Update power collapse sequence for A611 GPU A611 GPU requires us to write the RBBM_SW_RESET_CMD register to initiate the SW reset. Correct the power collapse sequence for this GPU. Forked from following commit commit c7713baf4c58 ("kgsl: Add support for A611 GPU with updated sequence"). Change-Id: I2a0970370d1f6e6a5268302d2ea878e93cce4d7b Signed-off-by: Rakesh Naidu Bhaviripudi --- adreno_a6xx.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 3e5b8269ba..420c72c3fa 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -1232,6 +1232,25 @@ static void a6xx_sptprac_disable(struct adreno_device *adreno_dev) a6xx_gmu_sptprac_disable(adreno_dev); } +/* + * a6xx_prepare_for_regulator_disable() - Prepare for regulator disable + * @adreno_dev: Pointer to Adreno device + */ +static void a6xx_prepare_for_regulator_disable(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* This sequence is only required for A611 */ + if (!adreno_is_a611(adreno_dev)) + return; + + kgsl_regwrite(device, A6XX_RBBM_SW_RESET_CMD, 0x1); + /* Make sure above writes are posted before turning off power resource.*/ + wmb(); + /*wait for 100usecs, to allow the software reset to complete*/ + udelay(100); +} + /* * a6xx_gpu_keepalive() - GMU reg write to request GPU stays on * @adreno_dev: Pointer to the adreno device that has the GMU @@ -2336,8 +2355,7 @@ const struct adreno_gpudev adreno_a6xx_gpudev = { .init = a6xx_nogmu_init, .irq_handler = a6xx_irq_handler, .rb_start = a6xx_rb_start, - .regulator_enable = a6xx_sptprac_enable, - .regulator_disable = a6xx_sptprac_disable, + .regulator_disable = a6xx_prepare_for_regulator_disable, .gpu_keepalive = a6xx_gpu_keepalive, .hw_isidle = a6xx_hw_isidle, .iommu_fault_block = a6xx_iommu_fault_block, From 335828c430b3d8456c662155cc4c12c79825c8ad Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 30 Jul 2024 23:08:15 +0530 Subject: [PATCH 0872/1016] kgsl: hwsched: Fix OOB access in adreno_hwsched_lookup_key_value Currently, num_values is decremented after each loop because do-while loop is used in adreno_hwsched_lookup_key_value(). Due to this, the loop runs for an additional iteration resulting in OOB access. Fix this by using while loop. Change-Id: Id08778a9abd5b1a41dd6f9197c975529a04694e9 Signed-off-by: Kamal Agrawal --- adreno_hwsched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 78511ad7a3..d74d3a3ea6 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -2449,10 +2449,10 @@ static void adreno_hwsched_lookup_key_value(struct adreno_device *adreno_dev, if ((payload->type == type) && (payload->data[i] == key)) { u32 j = 1; - do { + while (num_values--) { ptr[j - 1] = payload->data[i + j]; j++; - } while (num_values--); + } break; } From f8a30152da79cd33134e3e0ac5beeda48e842453 Mon Sep 17 00:00:00 2001 From: Ashok Gandla Date: Thu, 6 Jun 2024 12:41:19 +0530 Subject: [PATCH 0873/1016] kgsl: build: Add changes to compile graphics-kernel for Scuba Add changes to compile graphics-kernel for Scuba. Change-Id: I83d2c6cbb81b31bc4c94968feb121a87900eec95 Signed-off-by: Ashok Gandla --- Kbuild | 3 +++ config/gki_scuba.conf | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 config/gki_scuba.conf diff --git a/Kbuild b/Kbuild index 091d00e061..bd9051564e 100644 --- a/Kbuild +++ b/Kbuild @@ -70,6 +70,9 @@ endif ifeq ($(CONFIG_ARCH_PARROT), y) include $(KGSL_PATH)/config/gki_parrot.conf endif +ifeq ($(CONFIG_ARCH_SCUBA), y) + include $(KGSL_PATH)/config/gki_scuba.conf +endif ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq diff --git a/config/gki_scuba.conf b/config/gki_scuba.conf new file mode 100644 index 0000000000..9692132550 --- /dev/null +++ b/config/gki_scuba.conf @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 +CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" +CONFIG_QCOM_KGSL_SORT_POOL = y + +ifneq ($(CONFIG_SHMEM),) + CONFIG_QCOM_KGSL_USE_SHMEM = y + CONFIG_QCOM_KGSL_PROCESS_RECLAIM = y +endif + +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ + -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ + -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" \ + -DCONFIG_QCOM_KGSL_SORT_POOL=1 + +ifneq ($(CONFIG_SHMEM),) + ccflags-y += -DCONFIG_QCOM_KGSL_USE_SHMEM=1 \ + -DCONFIG_QCOM_KGSL_PROCESS_RECLAIM=1 +endif From 977eac35cf945edf9ed1af0dffbc634405d28349 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 1 Aug 2024 16:48:14 +0530 Subject: [PATCH 0874/1016] kgsl: snapshot: Remove type 0 and type 3 packets Type 0 and Type 3 are legacy packets that were supported on A4X and older targets. A4X and older targets are not supported in current code base. Thus, remove references for type0 and type3 packets. Change-Id: Ic67ddbc14bb1bdc80ecd691e493afc1a6c60102b Signed-off-by: Kamal Agrawal --- adreno_cp_parser.c | 518 +-------------------------------------------- adreno_cp_parser.h | 34 +-- adreno_pm4types.h | 101 +-------- adreno_snapshot.c | 41 +--- 4 files changed, 21 insertions(+), 673 deletions(-) diff --git a/adreno_cp_parser.c b/adreno_cp_parser.c index 98b21c6182..23fd9baff1 100644 --- a/adreno_cp_parser.c +++ b/adreno_cp_parser.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -26,21 +27,6 @@ struct ib_parser_variables { struct set_draw_state set_draw_groups[NUM_SET_DRAW_GROUPS]; }; -/* - * Used for locating shader objects. This array holds the unit size of shader - * objects based on type and block of shader. The type can be 0 or 1 hence there - * are 2 columns and block can be 0-7 hence 7 rows. - */ -static int load_state_unit_sizes[7][2] = { - { 2, 4 }, - { 0, 1 }, - { 2, 4 }, - { 0, 1 }, - { 8, 2 }, - { 8, 2 }, - { 8, 2 }, -}; - static int adreno_ib_find_objs(struct kgsl_device *device, struct kgsl_process_private *process, uint64_t gpuaddr, uint64_t dwords, @@ -48,12 +34,6 @@ static int adreno_ib_find_objs(struct kgsl_device *device, struct adreno_ib_object_list *ib_obj_list, int ib_level); -static int ib_parse_set_draw_state(struct kgsl_device *device, - unsigned int *ptr, - struct kgsl_process_private *process, - struct adreno_ib_object_list *ib_obj_list, - struct ib_parser_variables *ib_parse_vars); - static int ib_parse_type7_set_draw_state(struct kgsl_device *device, unsigned int *ptr, struct kgsl_process_private *process, @@ -158,166 +138,6 @@ static int adreno_ib_add(struct kgsl_process_private *process, return 0; } -/* - * ib_save_mip_addresses() - Find mip addresses - * @pkt: Pointer to the packet in IB - * @process: The process in which IB is mapped - * @ib_obj_list: List in which any objects found are added - * - * Returns 0 on success else error code - */ -static int ib_save_mip_addresses(unsigned int *pkt, - struct kgsl_process_private *process, - struct adreno_ib_object_list *ib_obj_list) -{ - int ret = 0; - int num_levels = (pkt[1] >> 22) & 0x03FF; - int i; - unsigned int *hostptr; - struct kgsl_mem_entry *ent; - unsigned int block, type; - int unitsize = 0; - - block = (pkt[1] >> 19) & 0x07; - type = pkt[2] & 0x03; - - if (type == 0) - unitsize = load_state_unit_sizes[block][0]; - else - unitsize = load_state_unit_sizes[block][1]; - - if (3 == block && 1 == type) { - uint64_t gpuaddr = pkt[2] & 0xFFFFFFFC; - uint64_t size = (num_levels * unitsize) << 2; - - ent = kgsl_sharedmem_find(process, gpuaddr); - if (ent == NULL) - return 0; - - if (!kgsl_gpuaddr_in_memdesc(&ent->memdesc, - gpuaddr, size)) { - kgsl_mem_entry_put(ent); - return 0; - } - - hostptr = kgsl_gpuaddr_to_vaddr(&ent->memdesc, gpuaddr); - if (hostptr != NULL) { - for (i = 0; i < num_levels; i++) { - ret = adreno_ib_add(process, hostptr[i], - SNAPSHOT_GPU_OBJECT_GENERIC, - ib_obj_list); - if (ret) - break; - } - } - - kgsl_memdesc_unmap(&ent->memdesc); - kgsl_mem_entry_put(ent); - } - return ret; -} - -/* - * ib_parse_load_state() - Parse load state packet - * @pkt: Pointer to the packet in IB - * @process: The pagetable in which the IB is mapped - * @ib_obj_list: List in which any objects found are added - * @ib_parse_vars: VAriable list that store temporary addressses - * - * Parse load state packet found in an IB and add any memory object found to - * a list - * Returns 0 on success else error code - */ -static int ib_parse_load_state(unsigned int *pkt, - struct kgsl_process_private *process, - struct adreno_ib_object_list *ib_obj_list, - struct ib_parser_variables *ib_parse_vars) -{ - int ret = 0; - int i; - - /* - * The object here is to find indirect shaders i.e - shaders loaded from - * GPU memory instead of directly in the command. These should be added - * to the list of memory objects to dump. So look at the load state - * if the block is indirect (source = 4). If so then add the memory - * address to the list. The size of the object differs depending on the - * type per the load_state_unit_sizes array above. - */ - - if (type3_pkt_size(pkt[0]) < 2) - return 0; - - /* - * Anything from 3rd ordinal onwards of packet can be a memory object, - * no need to be fancy about parsing it, just save it if it looks - * like memory - */ - for (i = 0; i <= (type3_pkt_size(pkt[0]) - 2); i++) { - ret |= adreno_ib_add(process, pkt[2 + i] & 0xFFFFFFFC, - SNAPSHOT_GPU_OBJECT_GENERIC, - ib_obj_list); - if (ret) - break; - } - /* get the mip addresses */ - if (!ret) - ret = ib_save_mip_addresses(pkt, process, ib_obj_list); - return ret; -} - -/* - * This opcode sets the base addresses for the visibilty stream buffer and the - * visiblity stream size buffer. - */ - -static int ib_parse_set_bin_data(unsigned int *pkt, - struct kgsl_process_private *process, - struct adreno_ib_object_list *ib_obj_list, - struct ib_parser_variables *ib_parse_vars) -{ - int ret = 0; - - if (type3_pkt_size(pkt[0]) < 2) - return 0; - - /* Visiblity stream buffer */ - ret = adreno_ib_add(process, pkt[1], - SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list); - if (ret) - return ret; - - /* visiblity stream size buffer (fixed size 8 dwords) */ - ret = adreno_ib_add(process, pkt[2], - SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list); - - return ret; -} - -/* - * This opcode writes to GPU memory - if the buffer is written to, there is a - * good chance that it would be valuable to capture in the snapshot, so mark all - * buffers that are written to as frozen - */ - -static int ib_parse_mem_write(unsigned int *pkt, - struct kgsl_process_private *process, - struct adreno_ib_object_list *ib_obj_list, - struct ib_parser_variables *ib_parse_vars) -{ - if (type3_pkt_size(pkt[0]) < 1) - return 0; - - /* - * The address is where the data in the rest of this packet is written - * to, but since that might be an offset into the larger buffer we need - * to get the whole thing. Pass a size of 0 tocapture the entire buffer. - */ - - return adreno_ib_add(process, pkt[1] & 0xFFFFFFFC, - SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list); -} - /* * ib_add_type0_entries() - Add memory objects to list * @device: The device on which the IB will execute @@ -390,94 +210,6 @@ static int ib_add_type0_entries(struct kgsl_device *device, } return ret; } -/* - * The DRAW_INDX opcode sends a draw initator which starts a draw operation in - * the GPU, so this is the point where all the registers and buffers become - * "valid". The DRAW_INDX may also have an index buffer pointer that should be - * frozen with the others - */ - -static int ib_parse_draw_indx(struct kgsl_device *device, unsigned int *pkt, - struct kgsl_process_private *process, - struct adreno_ib_object_list *ib_obj_list, - struct ib_parser_variables *ib_parse_vars) -{ - int ret = 0; - int i; - int opcode = cp_type3_opcode(pkt[0]); - - switch (opcode) { - case CP_DRAW_INDX: - if (type3_pkt_size(pkt[0]) > 3) { - ret = adreno_ib_add(process, - pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC, - ib_obj_list); - } - break; - case CP_DRAW_INDX_OFFSET: - if (type3_pkt_size(pkt[0]) == 6) { - ret = adreno_ib_add(process, - pkt[5], SNAPSHOT_GPU_OBJECT_GENERIC, - ib_obj_list); - } - break; - case CP_DRAW_INDIRECT: - if (type3_pkt_size(pkt[0]) == 2) { - ret = adreno_ib_add(process, - pkt[2], SNAPSHOT_GPU_OBJECT_GENERIC, - ib_obj_list); - } - break; - case CP_DRAW_INDX_INDIRECT: - if (type3_pkt_size(pkt[0]) == 4) { - ret = adreno_ib_add(process, - pkt[2], SNAPSHOT_GPU_OBJECT_GENERIC, - ib_obj_list); - if (ret) - break; - ret = adreno_ib_add(process, - pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC, - ib_obj_list); - } - break; - case CP_DRAW_AUTO: - if (type3_pkt_size(pkt[0]) == 6) { - ret = adreno_ib_add(process, - pkt[3], SNAPSHOT_GPU_OBJECT_GENERIC, - ib_obj_list); - if (ret) - break; - ret = adreno_ib_add(process, - pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC, - ib_obj_list); - } - break; - } - - if (ret) - return ret; - /* - * All of the type0 writes are valid at a draw initiator, so freeze - * the various buffers that we are tracking - */ - ret = ib_add_type0_entries(device, process, ib_obj_list, - ib_parse_vars); - if (ret) - return ret; - /* Process set draw state command streams if any */ - for (i = 0; i < NUM_SET_DRAW_GROUPS; i++) { - if (!ib_parse_vars->set_draw_groups[i].cmd_stream_dwords) - continue; - ret = adreno_ib_find_objs(device, process, - ib_parse_vars->set_draw_groups[i].cmd_stream_addr, - ib_parse_vars->set_draw_groups[i].cmd_stream_dwords, - 0, SNAPSHOT_GPU_OBJECT_DRAW, - ib_obj_list, 2); - if (ret) - break; - } - return ret; -} /* * Parse all the type7 opcode packets that may contain important information, @@ -500,143 +232,6 @@ static int ib_parse_type7(struct kgsl_device *device, unsigned int *ptr, return 0; } -/* - * Parse all the type3 opcode packets that may contain important information, - * such as additional GPU buffers to grab or a draw initator - */ - -static int ib_parse_type3(struct kgsl_device *device, unsigned int *ptr, - struct kgsl_process_private *process, - struct adreno_ib_object_list *ib_obj_list, - struct ib_parser_variables *ib_parse_vars) -{ - int opcode = cp_type3_opcode(*ptr); - - switch (opcode) { - case CP_LOAD_STATE: - return ib_parse_load_state(ptr, process, ib_obj_list, - ib_parse_vars); - case CP_SET_BIN_DATA: - return ib_parse_set_bin_data(ptr, process, ib_obj_list, - ib_parse_vars); - case CP_MEM_WRITE: - return ib_parse_mem_write(ptr, process, ib_obj_list, - ib_parse_vars); - case CP_DRAW_INDX: - case CP_DRAW_INDX_OFFSET: - case CP_DRAW_INDIRECT: - case CP_DRAW_INDX_INDIRECT: - return ib_parse_draw_indx(device, ptr, process, ib_obj_list, - ib_parse_vars); - case CP_SET_DRAW_STATE: - return ib_parse_set_draw_state(device, ptr, process, - ib_obj_list, ib_parse_vars); - } - - return 0; -} - -/* - * Parse type0 packets found in the stream. Some of the registers that are - * written are clues for GPU buffers that we need to freeze. Register writes - * are considred valid when a draw initator is called, so just cache the values - * here and freeze them when a CP_DRAW_INDX is seen. This protects against - * needlessly caching buffers that won't be used during a draw call - */ - -static int ib_parse_type0(struct kgsl_device *device, unsigned int *ptr, - struct kgsl_process_private *process, - struct adreno_ib_object_list *ib_obj_list, - struct ib_parser_variables *ib_parse_vars) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - int size = type0_pkt_size(*ptr); - int offset = type0_pkt_offset(*ptr); - int i; - int reg_index; - int ret = 0; - - for (i = 0; i < size; i++, offset++) { - /* Visiblity stream buffer */ - if (offset >= adreno_cp_parser_getreg(adreno_dev, - ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0) && - offset <= adreno_cp_parser_getreg(adreno_dev, - ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7)) { - reg_index = adreno_cp_parser_regindex( - adreno_dev, offset, - ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0, - ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7); - if (reg_index >= 0) - ib_parse_vars->cp_addr_regs[reg_index] = - ptr[i + 1]; - continue; - } else if ((offset >= adreno_cp_parser_getreg(adreno_dev, - ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0)) && - (offset <= adreno_cp_parser_getreg(adreno_dev, - ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15))) { - reg_index = adreno_cp_parser_regindex(adreno_dev, - offset, - ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0, - ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15); - if (reg_index >= 0) - ib_parse_vars->cp_addr_regs[reg_index] = - ptr[i + 1]; - continue; - } else if ((offset >= adreno_cp_parser_getreg(adreno_dev, - ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16)) && - (offset <= adreno_cp_parser_getreg(adreno_dev, - ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31))) { - reg_index = adreno_cp_parser_regindex(adreno_dev, - offset, - ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16, - ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31); - if (reg_index >= 0) - ib_parse_vars->cp_addr_regs[reg_index] = - ptr[i + 1]; - continue; - } else { - if (offset == - adreno_cp_parser_getreg(adreno_dev, - ADRENO_CP_ADDR_VSC_SIZE_ADDRESS)) - ib_parse_vars->cp_addr_regs[ - ADRENO_CP_ADDR_VSC_SIZE_ADDRESS] = - ptr[i + 1]; - else if (offset == adreno_cp_parser_getreg(adreno_dev, - ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR)) - ib_parse_vars->cp_addr_regs[ - ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR] = - ptr[i + 1]; - else if (offset == adreno_cp_parser_getreg(adreno_dev, - ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR)) - ib_parse_vars->cp_addr_regs[ - ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR] = - ptr[i + 1]; - else if (offset == adreno_cp_parser_getreg(adreno_dev, - ADRENO_CP_ADDR_SP_VS_OBJ_START_REG)) - ib_parse_vars->cp_addr_regs[ - ADRENO_CP_ADDR_SP_VS_OBJ_START_REG] = - ptr[i + 1]; - else if (offset == adreno_cp_parser_getreg(adreno_dev, - ADRENO_CP_ADDR_SP_FS_OBJ_START_REG)) - ib_parse_vars->cp_addr_regs[ - ADRENO_CP_ADDR_SP_FS_OBJ_START_REG] = - ptr[i + 1]; - else if ((offset == adreno_cp_parser_getreg(adreno_dev, - ADRENO_CP_UCHE_INVALIDATE0)) || - (offset == adreno_cp_parser_getreg(adreno_dev, - ADRENO_CP_UCHE_INVALIDATE1))) { - ret = adreno_ib_add(process, - ptr[i + 1] & 0xFFFFFFC0, - SNAPSHOT_GPU_OBJECT_GENERIC, - ib_obj_list); - if (ret) - break; - } - } - } - return ret; -} - static int ib_parse_type7_set_draw_state(struct kgsl_device *device, unsigned int *ptr, struct kgsl_process_private *process, @@ -694,70 +289,6 @@ static int ib_parse_type7_set_draw_state(struct kgsl_device *device, return ret; } -static int ib_parse_set_draw_state(struct kgsl_device *device, - unsigned int *ptr, - struct kgsl_process_private *process, - struct adreno_ib_object_list *ib_obj_list, - struct ib_parser_variables *ib_parse_vars) -{ - int size = type0_pkt_size(*ptr); - int i; - int grp_id; - int ret = 0; - int flags; - struct set_draw_state *group; - - /* - * size is the size of the packet that does not include the DWORD - * for the packet header, we only want to loop here through the - * packet parameters from ptr[1] till ptr[size] where ptr[0] is the - * packet header. In each loop we look at 2 DWORDS hence increment - * loop counter by 2 always - */ - for (i = 1; i <= size; i += 2) { - grp_id = (ptr[i] & 0x1F000000) >> 24; - /* take action based on flags */ - flags = (ptr[i] & 0x000F0000) >> 16; - /* Disable all groups */ - if (flags & 0x4) { - int j; - - for (j = 0; j < NUM_SET_DRAW_GROUPS; j++) { - group = &(ib_parse_vars->set_draw_groups[j]); - group->cmd_stream_dwords = 0; - } - continue; - } - /* disable flag */ - if (flags & 0x2) { - group = &(ib_parse_vars->set_draw_groups[grp_id]); - group->cmd_stream_dwords = 0; - continue; - } - /* - * dirty flag or no flags both mean we need to load it for - * next draw. No flags is used when the group is activated - * or initialized for the first time in the IB - */ - if (flags & 0x1 || !flags) { - group = &(ib_parse_vars->set_draw_groups[grp_id]); - group->cmd_stream_dwords = ptr[i] & 0x0000FFFF; - group->cmd_stream_addr = ptr[i + 1]; - continue; - } - /* load immediate */ - if (flags & 0x8) { - ret = adreno_ib_find_objs(device, process, - ptr[i + 1], (ptr[i] & 0x0000FFFF), - 0, SNAPSHOT_GPU_OBJECT_IB, - ib_obj_list, 2); - if (ret) - break; - } - } - return ret; -} - /* * adreno_cp_parse_ib2() - Wrapper function around IB2 parsing * @device: Device pointer @@ -872,15 +403,8 @@ static int adreno_ib_find_objs(struct kgsl_device *device, for (i = 0; rem > 0; rem--, i++) { int pktsize; - if (pkt_is_type0(src[i])) - pktsize = type0_pkt_size(src[i]); - - else if (pkt_is_type3(src[i])) - pktsize = type3_pkt_size(src[i]); - - else if (pkt_is_type4(src[i])) + if (pkt_is_type4(src[i])) pktsize = type4_pkt_size(src[i]); - else if (pkt_is_type7(src[i])) pktsize = type7_pkt_size(src[i]); @@ -891,36 +415,7 @@ static int adreno_ib_find_objs(struct kgsl_device *device, else break; - if (((pkt_is_type0(src[i]) || pkt_is_type3(src[i])) && !pktsize) - || ((pktsize + 1) > rem)) - break; - - if (pkt_is_type3(src[i])) { - if (adreno_cmd_is_ib(adreno_dev, src[i])) { - uint64_t gpuaddrib2 = src[i + 1]; - uint64_t size = src[i + 2]; - - ret = adreno_cp_parse_ib2(device, process, - gpuaddrib2, size, ib2base, - ib_obj_list, ib_level); - if (ret) - goto done; - } else { - ret = ib_parse_type3(device, &src[i], process, - ib_obj_list, - &ib_parse_vars); - /* - * If the parse function failed (probably - * because of a bad decode) then bail out and - * just capture the binary IB data - */ - - if (ret) - goto done; - } - } - - else if (pkt_is_type7(src[i])) { + if (pkt_is_type7(src[i])) { if (adreno_cmd_is_ib(adreno_dev, src[i])) { uint64_t size = src[i + 3]; uint64_t gpuaddrib2 = src[i + 2]; @@ -947,13 +442,6 @@ static int adreno_ib_find_objs(struct kgsl_device *device, } } - else if (pkt_is_type0(src[i])) { - ret = ib_parse_type0(device, &src[i], process, - ib_obj_list, &ib_parse_vars); - if (ret) - goto done; - } - i += pktsize; rem -= pktsize; } diff --git a/adreno_cp_parser.h b/adreno_cp_parser.h index 52b8ddfe05..6cd08ce6a1 100644 --- a/adreno_cp_parser.h +++ b/adreno_cp_parser.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2013-2014, 2017, 2019, The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_IB_PARSER__ @@ -92,8 +92,6 @@ enum adreno_cp_addr_regs { ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR, ADRENO_CP_ADDR_SP_VS_OBJ_START_REG, ADRENO_CP_ADDR_SP_FS_OBJ_START_REG, - ADRENO_CP_UCHE_INVALIDATE0, - ADRENO_CP_UCHE_INVALIDATE1, ADRENO_CP_ADDR_MAX, }; @@ -117,36 +115,6 @@ static inline void adreno_ib_init_ib_obj(uint64_t gpuaddr, ib_obj->entry = entry; } -/* - * adreno_cp_parser_getreg() - Returns the value of register offset - * @adreno_dev: The adreno device being operated upon - * @reg_enum: Enum index of the register whose offset is returned - */ -static inline int adreno_cp_parser_getreg(struct adreno_device *adreno_dev, - enum adreno_cp_addr_regs reg_enum) -{ - return -EEXIST; -} - -/* - * adreno_cp_parser_regindex() - Returns enum index for a given register offset - * @adreno_dev: The adreno device being operated upon - * @offset: Register offset - * @start: The start index to search from - * @end: The last index to search - * - * Checks the list of registers defined for the device and returns the index - * whose offset value matches offset parameter. - */ -static inline int adreno_cp_parser_regindex(struct adreno_device *adreno_dev, - unsigned int offset, - enum adreno_cp_addr_regs start, - enum adreno_cp_addr_regs end) -{ - return -EEXIST; - -} - int adreno_ib_create_object_list( struct kgsl_device *device, struct kgsl_process_private *process, diff --git a/adreno_pm4types.h b/adreno_pm4types.h index 7e2dc23003..109f03603c 100644 --- a/adreno_pm4types.h +++ b/adreno_pm4types.h @@ -1,22 +1,18 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_PM4TYPES_H #define __ADRENO_PM4TYPES_H #include "adreno.h" -#define CP_TYPE0_PKT (0 << 30) -#define CP_TYPE3_PKT (3 << 30) #define CP_TYPE4_PKT (4 << 28) #define CP_TYPE7_PKT (7 << 28) #define PM4_TYPE4_PKT_SIZE_MAX 128 -/* type3 packets */ - /* Enable preemption flag */ #define CP_PREEMPT_ENABLE 0x1C /* Preemption token command on which preemption occurs */ @@ -81,24 +77,6 @@ /* generate an event that creates a write to memory when completed */ #define CP_EVENT_WRITE 0x46 -/* initiate fetch of index buffer and draw */ -#define CP_DRAW_INDX 0x22 - -/* New draw packets defined for A4XX */ -#define CP_DRAW_INDX_OFFSET 0x38 -#define CP_DRAW_INDIRECT 0x28 -#define CP_DRAW_INDX_INDIRECT 0x29 -#define CP_DRAW_AUTO 0x24 - -/* load constant into chip and to memory */ -#define CP_SET_CONSTANT 0x2d - -/* selective invalidation of state pointers */ -#define CP_INVALIDATE_STATE 0x3b - -/* generate interrupt from the command stream */ -#define CP_INTERRUPT 0x40 - /* A5XX Enable yield in RB only */ #define CP_YIELD_ENABLE 0x1C @@ -116,16 +94,6 @@ /* Inform CP about current render mode (needed for a5xx preemption) */ #define CP_SET_RENDER_MODE 0x6C -/* Write register, ignoring context state for context sensitive registers */ -#define CP_REG_WR_NO_CTXT 0x78 - -/* - * for A4xx - * Write to register with address that does not fit into type-0 pkt - */ -#define CP_WIDE_REG_WRITE 0x74 - - /* PFP waits until the FIFO between the PFP and the ME is empty */ #define CP_WAIT_FOR_ME 0x13 @@ -193,21 +161,6 @@ static inline uint pm4_calc_odd_parity_bit(uint val) ((val) >> 28)))) & 1; } -/* - * PM4 packet header functions - * For all the packet functions the passed in count should be the size of the - * payload excluding the header - */ -static inline uint cp_type0_packet(uint regindx, uint cnt) -{ - return CP_TYPE0_PKT | ((cnt-1) << 16) | ((regindx) & 0x7FFF); -} - -static inline uint cp_type3_packet(uint opcode, uint cnt) -{ - return CP_TYPE3_PKT | ((cnt-1) << 16) | (((opcode) & 0xFF) << 8); -} - static inline uint cp_type4_packet(uint opcode, uint cnt) { return CP_TYPE4_PKT | ((cnt) << 0) | @@ -225,23 +178,6 @@ static inline uint cp_type7_packet(uint opcode, uint cnt) } -#define pkt_is_type0(pkt) (((pkt) & 0XC0000000) == CP_TYPE0_PKT) - -#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) -#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF) - -/* - * Check both for the type3 opcode and make sure that the reserved bits [1:7] - * and 15 are 0 - */ - -#define pkt_is_type3(pkt) \ - ((((pkt) & 0xC0000000) == CP_TYPE3_PKT) && \ - (((pkt) & 0x80FE) == 0)) - -#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF) -#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) - #define pkt_is_type4(pkt) \ ((((pkt) & 0xF0000000) == CP_TYPE4_PKT) && \ ((((pkt) >> 27) & 0x1) == \ @@ -269,9 +205,6 @@ static inline uint cp_type7_packet(uint opcode, uint cnt) /* gmem command buffer length */ #define CP_REG(reg) ((0x4 << 16) | (SUBBLOCK_OFFSET(reg))) -/* Return true if the hardware uses the legacy (A4XX and older) PM4 format */ -#define ADRENO_LEGACY_PM4(_d) (ADRENO_GPUREV(_d) < 500) - /** * cp_packet - Generic CP packet to support different opcodes on * different GPU cores. @@ -282,9 +215,6 @@ static inline uint cp_type7_packet(uint opcode, uint cnt) static inline uint cp_packet(struct adreno_device *adreno_dev, int opcode, uint size) { - if (ADRENO_LEGACY_PM4(adreno_dev)) - return cp_type3_packet(opcode, size); - return cp_type7_packet(opcode, size); } @@ -299,9 +229,6 @@ static inline uint cp_packet(struct adreno_device *adreno_dev, static inline uint cp_mem_packet(struct adreno_device *adreno_dev, int opcode, uint size, uint num_mem) { - if (ADRENO_LEGACY_PM4(adreno_dev)) - return cp_type3_packet(opcode, size); - return cp_type7_packet(opcode, size + num_mem); } @@ -331,12 +258,9 @@ static inline uint cp_gpuaddr(struct adreno_device *adreno_dev, { uint *start = cmds; - if (ADRENO_LEGACY_PM4(adreno_dev)) - *cmds++ = (uint)gpuaddr; - else { - *cmds++ = lower_32_bits(gpuaddr); - *cmds++ = upper_32_bits(gpuaddr); - } + *cmds++ = lower_32_bits(gpuaddr); + *cmds++ = upper_32_bits(gpuaddr); + return cmds - start; } @@ -349,9 +273,6 @@ static inline uint cp_gpuaddr(struct adreno_device *adreno_dev, static inline uint cp_register(struct adreno_device *adreno_dev, unsigned int reg, unsigned int size) { - if (ADRENO_LEGACY_PM4(adreno_dev)) - return cp_type0_packet(reg, size); - return cp_type4_packet(reg, size); } @@ -365,12 +286,7 @@ static inline uint cp_wait_for_me(struct adreno_device *adreno_dev, { uint *start = cmds; - if (ADRENO_LEGACY_PM4(adreno_dev)) { - *cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1); - *cmds++ = 0; - } else - *cmds++ = cp_type7_packet(CP_WAIT_FOR_ME, 0); - + *cmds++ = cp_type7_packet(CP_WAIT_FOR_ME, 0); return cmds - start; } @@ -384,12 +300,7 @@ static inline uint cp_wait_for_idle(struct adreno_device *adreno_dev, { uint *start = cmds; - if (ADRENO_LEGACY_PM4(adreno_dev)) { - *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - *cmds++ = 0; - } else - *cmds++ = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); - + *cmds++ = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); return cmds - start; } diff --git a/adreno_snapshot.c b/adreno_snapshot.c index d25ffeb9f2..8537cf6020 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -283,16 +283,10 @@ void adreno_snapshot_dump_all_ibs(struct kgsl_device *device, uint64_t ibaddr; uint64_t ibsize; - if (ADRENO_LEGACY_PM4(adreno_dev)) { - ibaddr = rbptr[(index + 1) % KGSL_RB_DWORDS]; - ibsize = rbptr[(index + 2) % KGSL_RB_DWORDS]; - index += 3; - } else { - ibaddr = rbptr[(index + 2) % KGSL_RB_DWORDS]; - ibaddr = ibaddr << 32 | rbptr[(index + 1) % KGSL_RB_DWORDS]; - ibsize = rbptr[(index + 3) % KGSL_RB_DWORDS]; - index += 4; - } + ibaddr = rbptr[(index + 2) % KGSL_RB_DWORDS]; + ibaddr = ibaddr << 32 | rbptr[(index + 1) % KGSL_RB_DWORDS]; + ibsize = rbptr[(index + 3) % KGSL_RB_DWORDS]; + index += 4; /* Don't parse known global IBs */ if (kgsl_gpuaddr_in_memdesc(iommu->setstate, @@ -356,10 +350,7 @@ static void snapshot_rb_ibs(struct kgsl_device *device, index--; if (index < 0) { - if (ADRENO_LEGACY_PM4(adreno_dev)) - index = KGSL_RB_DWORDS - 3; - else - index = KGSL_RB_DWORDS - 4; + index = KGSL_RB_DWORDS - 4; /* We wrapped without finding what we wanted */ if (index < rb->wptr) { @@ -372,14 +363,9 @@ static void snapshot_rb_ibs(struct kgsl_device *device, uint64_t ibaddr; uint64_t ibsize; - if (ADRENO_LEGACY_PM4(adreno_dev)) { - ibaddr = rbptr[index + 1]; - ibsize = rbptr[index + 2]; - } else { - ibaddr = rbptr[index + 2]; - ibaddr = ibaddr << 32 | rbptr[index + 1]; - ibsize = rbptr[index + 3]; - } + ibaddr = rbptr[index + 2]; + ibaddr = ibaddr << 32 | rbptr[index + 1]; + ibsize = rbptr[index + 3]; if (adreno_ib_addr_overlap(snapshot->ib1base, ibaddr, ibsize)) { @@ -467,14 +453,9 @@ static void snapshot_rb_ibs(struct kgsl_device *device, uint64_t ibaddr; uint64_t ibsize; - if (ADRENO_LEGACY_PM4(adreno_dev)) { - ibaddr = rbptr[index + 1]; - ibsize = rbptr[index + 2]; - } else { - ibaddr = rbptr[index + 2]; - ibaddr = ibaddr << 32 | rbptr[index + 1]; - ibsize = rbptr[index + 3]; - } + ibaddr = rbptr[index + 2]; + ibaddr = ibaddr << 32 | rbptr[index + 1]; + ibsize = rbptr[index + 3]; index = (index + 1) % KGSL_RB_DWORDS; From 292080dd7651ac69a2a69b23867e6d196744da22 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 1 Aug 2024 09:37:49 +0530 Subject: [PATCH 0875/1016] kgsl: gen8: Assert the static bit before dumping snapshot Currently, the static bit is not asserted for SLICE and UNSLICE blocks which might result in GPU activity during snapshot dump. Assert these bits before collecting the snapshot. Change-Id: Ia65cf6fac38e708abd27f08eef04be86f2aeb170 Signed-off-by: Kamal Agrawal --- adreno_gen8_snapshot.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 1e0fe73ae8..f8b497aadc 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -1621,7 +1621,7 @@ void gen8_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - u32 i; + u32 i, slice_mask; const struct adreno_gen8_core *gpucore = to_gen8_core(ADRENO_DEVICE(device)); int is_current_rt; @@ -1676,8 +1676,14 @@ void gen8_snapshot(struct adreno_device *adreno_dev, /* Clear aperture register */ gen8_host_aperture_set(adreno_dev, 0, 0, 0); - /* Assert the isStatic bit before triggering snapshot */ - kgsl_regwrite(device, GEN8_RBBM_SNAPSHOT_STATUS, 0x1); + /* + * Assert the isStatic bit before triggering snapshot. + * BIT(0): GPU activity during snapshot dump + * BIT(1): GPU UNSLICE activity during snapshot dump + * Similarly, BIT(4) for slice-0, BIT(5) for slice-1 and so on. + */ + slice_mask = ((1 << gen8_get_num_slices(adreno_dev)) - 1) << 4; + kgsl_regwrite(device, GEN8_RBBM_SNAPSHOT_STATUS, BIT(0) | BIT(1) | slice_mask); /* Dump the registers which get affected by crash dumper trigger */ for (i = 0; i < gen8_snapshot_block_list->num_pre_crashdumper_regs; i++) { From 32017c9fcb81bf77ef963489238c8b30e8efd1a0 Mon Sep 17 00:00:00 2001 From: Raviteja Narayanam Date: Tue, 16 Jul 2024 12:42:05 +0530 Subject: [PATCH 0876/1016] kgsl: hwsched: Use right key while parsing CP_BV_HW_FAULT For gen7 and gen8 targets, use right key while parsing CP_BV_HW_FAULT error in hwsched path. Change-Id: If85a5a83139a41108f618d23735a90fd8772e5ef Signed-off-by: Raviteja Narayanam --- adreno_gen7_hwsched_hfi.c | 4 ++-- adreno_gen8_hwsched_hfi.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 3b2d8fe349..b46e53b7de 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -449,7 +449,7 @@ static void log_gpu_fault_legacy(struct adreno_device *adreno_dev) dev_crit_ratelimited(dev, "CP BV | Ringbuffer HW fault | status=0x%8.8x\n", gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, - KEY_CP_HW_FAULT)); + KEY_CP_BV_HW_FAULT)); break; case GMU_CP_BV_ILLEGAL_INST_ERROR: dev_crit_ratelimited(dev, "CP BV Illegal instruction error\n"); @@ -654,7 +654,7 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) dev_crit_ratelimited(dev, "CP BV | Ringbuffer HW fault | status=0x%8.8x\n", gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, - KEY_CP_HW_FAULT)); + KEY_CP_BV_HW_FAULT)); break; case GMU_CP_BV_ILLEGAL_INST_ERROR: dev_crit_ratelimited(dev, "CP BV Illegal instruction error\n"); diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index bcbdf3290a..4539559243 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -432,7 +432,7 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) dev_crit_ratelimited(dev, "CP BV | Ringbuffer HW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, - KEY_CP_HW_FAULT)); + KEY_CP_BV_HW_FAULT)); break; case GMU_CP_BV_ILLEGAL_INST_ERROR: dev_crit_ratelimited(dev, "CP BV Illegal instruction error\n"); From 2e6c1131c1ca31df4d9600c558d3a1ba89ce3169 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Tue, 6 Aug 2024 16:02:59 -0700 Subject: [PATCH 0877/1016] kgsl: gen8: Fix the GBIF perfcounter enable procedure When enabling the GBIF perfcounters on Gen8, the register values were incorrectly calculated. Update the operation to use the correct values. Change-Id: I7b5f59fc6114a3808e713516442e9434bd70a2ee Signed-off-by: Lynus Vaz --- adreno_gen8_perfcounter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno_gen8_perfcounter.c b/adreno_gen8_perfcounter.c index 98f3750898..59d648b518 100644 --- a/adreno_gen8_perfcounter.c +++ b/adreno_gen8_perfcounter.c @@ -131,7 +131,7 @@ static int gen8_counter_gbif_enable(struct adreno_device *adreno_dev, { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_perfcount_register *reg = &group->regs[counter]; - u32 shift = counter << 3; + u32 shift = (counter % 4) << 3; u32 select = BIT(counter); if (countable > 0xff) @@ -160,7 +160,7 @@ static int gen8_counter_gbif_pwr_enable(struct adreno_device *adreno_dev, { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_perfcount_register *reg = &group->regs[counter]; - u32 shift = counter << 3; + u32 shift = (counter % 4) << 3; u32 select = BIT(16 + counter); if (countable > 0xff) From 43c70c6f2df92548fab989db6f937835e95b091d Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 9 Aug 2024 22:08:36 +0530 Subject: [PATCH 0878/1016] kgsl: Fix UBSAN warning in kgsl_get_page_size kgsl_shmem_alloc_pages() and _kgsl_alloc_pages() could call kgsl_get_page_size() with size as 0. This will be passed to rounddown_pow_of_two() and it's result is undefined for 0. To avoid this, return early from kgsl_get_page_size() when size is 0. Change-Id: Iac9f3fb88615ea9db86e38a8835648fd287fb380 Signed-off-by: Kamal Agrawal --- kgsl_pool.c | 3 +++ kgsl_pool.h | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/kgsl_pool.c b/kgsl_pool.c index c17bf2251b..5dc6c3d3d1 100644 --- a/kgsl_pool.c +++ b/kgsl_pool.c @@ -470,6 +470,9 @@ u32 kgsl_get_page_size(size_t size, unsigned int align) { u32 pool; + if (!size) + return 0; + for (pool = rounddown_pow_of_two(size); pool > PAGE_SIZE; pool >>= 1) if ((align >= ilog2(pool)) && (size >= pool) && kgsl_pool_available(pool)) diff --git a/kgsl_pool.h b/kgsl_pool.h index 0f73994131..fa28c25b2f 100644 --- a/kgsl_pool.h +++ b/kgsl_pool.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2016-2017,2019,2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_POOL_H #define __KGSL_POOL_H @@ -14,6 +14,9 @@ static inline u32 kgsl_get_page_size(size_t size, unsigned int align) { u32 page_size; + if (!size) + return 0; + for (page_size = rounddown_pow_of_two(size); page_size > PAGE_SIZE; page_size >>= 1) if ((align >= ilog2(page_size)) && (size >= page_size)) return page_size; From ee1bee1db54867ab258324b831ce05deee487faa Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 8 Aug 2024 09:54:09 +0530 Subject: [PATCH 0879/1016] kgsl: gen8: Update GPU recovery sequence Currently, GMU reset is performed after GBIF Halt. Update the sequence to perform GMU reset before GBIF Halt to ensure that the GBIF is in a clean state for the next boot-up. Change-Id: I1aabc2db1dfb11f5065d674532ba5d704b3bd43d Signed-off-by: Kamal Agrawal --- adreno_gen8_gmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index dfde921bcd..1d4159cf25 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1396,6 +1396,8 @@ static void gen8_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) /* Make sure above writes are committed before we proceed to recovery */ wmb(); + gmu_core_regwrite(device, GEN8_GMUCX_CM3_SYSRESET, 1); + /* Halt CX traffic */ _do_gbif_halt(device, GEN8_GBIF_HALT, GEN8_GBIF_HALT_ACK, GEN8_GBIF_ARB_HALT_MASK, "CX"); From ed75fb4d3dce632bb35407a85f4a8c0d67197ac3 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Mon, 29 Jul 2024 23:05:26 +0530 Subject: [PATCH 0880/1016] kgsl: Update format for always on counters Update format specifier for always on counters to display the values in hexadecimal notation. Change-Id: I6b221bdc458c79b454467c57afd406e0eb01efd9 Signed-off-by: Pankaj Gupta --- adreno_hwsched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 6e3e17b86d..8ddcc25674 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -2324,13 +2324,13 @@ int adreno_hwsched_wait_ack_completion(struct adreno_device *adreno_dev, if (completion_done(&ack->complete)) { unprocessed++; if (__ratelimit(&_rs)) - dev_err(dev, "Ack unprocessed for id:%d sequence=%d count=%d/%d ticks=%llu/%llu\n", + dev_err(dev, "Ack unprocessed for id:%d sequence=%d count=%d/%d ticks=0x%llx/0x%llx\n", MSG_HDR_GET_ID(ack->sent_hdr), MSG_HDR_GET_SEQNUM(ack->sent_hdr), unprocessed, processed, start, end); return 0; } - dev_err(dev, "Ack timeout for id:%d sequence=%d ticks=%llu/%llu\n", + dev_err(dev, "Ack timeout for id:%d sequence=%d ticks=0x%llx/0x%llx\n", MSG_HDR_GET_ID(ack->sent_hdr), MSG_HDR_GET_SEQNUM(ack->sent_hdr), start, end); gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev), GMU_FAULT_WAIT_ACK_COMPLETION); return -ETIMEDOUT; @@ -2367,14 +2367,14 @@ int adreno_hwsched_ctxt_unregister_wait_completion( if (completion_done(&ack->complete)) { dev_err_ratelimited(dev, - "Ack unprocessed for context unregister seq: %d ctx: %u ts: %u ticks=%llu/%llu\n", + "Ack unprocessed for context unregister seq: %d ctx: %u ts: %u ticks=0x%llx/0x%llx\n", MSG_HDR_GET_SEQNUM(ack->sent_hdr), cmd->ctxt_id, cmd->ts, start, end); return 0; } dev_err_ratelimited(dev, - "Ack timeout for context unregister seq: %d ctx: %u ts: %u ticks=%llu/%llu\n", + "Ack timeout for context unregister seq: %d ctx: %u ts: %u ticks=0x%llx/0x%llx\n", MSG_HDR_GET_SEQNUM(ack->sent_hdr), cmd->ctxt_id, cmd->ts, start, end); return -ETIMEDOUT; } From 7d1afdbd263afa8b819593d63e4d0274dc1d0eb9 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Tue, 23 Jul 2024 13:47:53 -0700 Subject: [PATCH 0881/1016] kgsl: gen8: Capture CX MISC with GMU snapshot CX MISC has some important registers needed for debug. Capture it first with GMU snapshot before we move on to any GX registers. Also move DTCM after CX MISC registers since DTCM might get skipped if we fail on one of the GMU GX registers. Change-Id: I1554117f1234974093fafc703b3f1236c4c9ac2e Signed-off-by: Urvashi Agrawal --- adreno_gen8_gmu_snapshot.c | 20 +++++++++++++++----- adreno_gen8_snapshot.c | 8 ++++---- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index 7c3bd4e536..269707e3e2 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -189,6 +189,7 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, const struct adreno_gen8_core *gpucore = to_gen8_core(adreno_dev); const struct gen8_snapshot_block_list *gen8_snapshot_block_list = gpucore->gen8_snapshot_block_list; + const u32 *regs_ptr = (const u32 *)gen8_snapshot_block_list->cx_misc_regs; u32 i, slice, j; struct gen8_reg_list_info info = {0}; @@ -202,6 +203,19 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, gen8_snapshot_rscc_registers, (void *) gen8_snapshot_block_list->rscc_regs); + /* + * We want to capture these through AHB path here because we might skip them + * in the crashdumper path if GX is OFF, and these are needed for debug. + */ + if (!kgsl_regmap_valid_offset(&device->regmap, regs_ptr[0])) + WARN_ONCE(1, "cx_misc registers are not defined in device tree"); + else + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, adreno_snapshot_registers_v2, (void *)regs_ptr); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, + snapshot, gen8_gmu_snapshot_dtcm, gmu); + /* Capture GMU registers which are on CX domain and unsliced */ kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, adreno_snapshot_registers_v2, @@ -209,7 +223,7 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, if (!gen8_gmu_rpmh_pwr_state_is_active(device) || !gen8_gmu_gx_is_on(adreno_dev)) - goto dtcm; + return; /* Set fence to ALLOW mode so registers can be read */ kgsl_regwrite(device, GEN8_GMUAO_AHB_FENCE_CTRL, 0); @@ -226,10 +240,6 @@ static void gen8_gmu_device_snapshot(struct kgsl_device *device, gen8_legacy_snapshot_registers, &info); } } - -dtcm: - kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, - snapshot, gen8_gmu_snapshot_dtcm, gmu); } void gen8_gmu_snapshot(struct adreno_device *adreno_dev, diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 1e0fe73ae8..cdb24a2f86 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -1628,11 +1628,11 @@ void gen8_snapshot(struct adreno_device *adreno_dev, gen8_crashdump_timedout = false; gen8_snapshot_block_list = gpucore->gen8_snapshot_block_list; - /* External registers are dumped in the beginning of gmu snapshot */ - if (!gmu_core_isenabled(device)) + /* External core and CX MISC regs are dumped in the beginning of gmu snapshot */ + if (!gmu_core_isenabled(device)) { gen8_snapshot_external_core_regs(device, snapshot); - - gen8_cx_misc_regs_snapshot(device, snapshot); + gen8_cx_misc_regs_snapshot(device, snapshot); + } gen8_snapshot_cx_debugbus(adreno_dev, snapshot); From 77fabc3d65dfaf3877293d027079dcb968867080 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 15 Aug 2024 21:56:19 +0530 Subject: [PATCH 0882/1016] kgsl: iommu: Always use qcom_skip_tlb_management() qcom_skip_tlb_management() is available on all supported kernels. Thus, remove the code in KGSL that assumes this support is not available. Change-Id: Icc2f6af71f4bf53b19e6ae9640167a377ed2150d Signed-off-by: Kamal Agrawal --- kgsl_iommu.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index d9ec0db889..f1fb362a48 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -303,25 +303,7 @@ static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) if (unmapped != size) return -EINVAL; - /* - * Skip below logic for 6.1 kernel version and above as - * qcom_skip_tlb_management() API takes care of avoiding - * TLB operations during slumber. - */ flush: - if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) { - struct kgsl_device *device = KGSL_MMU_DEVICE(pt->base.mmu); - - /* Skip TLB Operations if GPU is in slumber */ - if (mutex_trylock(&device->mutex)) { - if (device->state == KGSL_STATE_SLUMBER) { - mutex_unlock(&device->mutex); - return 0; - } - mutex_unlock(&device->mutex); - } - } - kgsl_iommu_flush_tlb(pt->base.mmu); return 0; } @@ -370,7 +352,6 @@ static size_t _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, static void kgsl_iommu_send_tlb_hint(struct kgsl_mmu *mmu, bool hint) { -#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) struct kgsl_iommu *iommu = &mmu->iommu; /* @@ -380,7 +361,6 @@ static void kgsl_iommu_send_tlb_hint(struct kgsl_mmu *mmu, bool hint) qcom_skip_tlb_management(&iommu->user_context.pdev->dev, hint); if (iommu->lpac_context.domain) qcom_skip_tlb_management(&iommu->lpac_context.pdev->dev, hint); -#endif /* * TLB operations are skipped during slumber. Incase CX doesn't From bfc26b62a53f5f0784c8ba4c4d3f3bb41ea96684 Mon Sep 17 00:00:00 2001 From: Himanshu Agrawal Date: Mon, 24 Jun 2024 11:13:41 +0530 Subject: [PATCH 0883/1016] kgsl: a6x: Add support for GEN6_3_26_0 GPU Add changes to support GEN6_3_26_0 GPU. Change-Id: I29f1ca6464a0e79d08e2a469319dfef698fe527d Signed-off-by: Himanshu Agrawal --- adreno-gpulist.h | 26 +++++++++++++++++++++++ adreno.h | 16 +++++++++++--- adreno_a6xx.c | 44 +++++++++++++++++++++++++-------------- adreno_a6xx_perfcounter.c | 2 +- kgsl_pwrctrl.c | 1 + kgsl_pwrctrl.h | 2 +- 6 files changed, 70 insertions(+), 21 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 1d36095fda..5d786c1338 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1796,6 +1796,31 @@ static const struct adreno_a6xx_core adreno_gpu_core_a662 = { .ctxt_record_size = 2496 * 1024, }; +static const struct adreno_a6xx_core adreno_gpu_core_gen6_3_26_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN6_3_26_0, ANY_ID, ANY_ID, ANY_ID, ANY_ID), + .compatible = "qcom,adreno-gpu-gen6-3-26-0", + .features = ADRENO_CONTENT_PROTECTION | ADRENO_IOCOHERENT | + ADRENO_PREEMPTION, + .gpudev = &adreno_a6xx_gpudev, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .uche_gmem_alignment = SZ_1M, + .gmem_size = (SZ_128K + SZ_4K), + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x00080000, + .sqefw_name = "a630_sqe.fw", + .zap_name = "gen6_3_26_0_zap.mdt", + .hwcg = a612_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a612_hwcg_regs), + .vbif = a640_vbif_regs, + .vbif_count = ARRAY_SIZE(a640_vbif_regs), + .hang_detect_cycles = 0x3fffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 15, +}; + extern const struct gen7_snapshot_block_list gen7_0_0_snapshot_block_list; static const struct kgsl_regmap_list gen7_0_0_gbif_regs[] = { @@ -3184,6 +3209,7 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_a610.base, &adreno_gpu_core_a611.base, &adreno_gpu_core_a660_shima.base, + &adreno_gpu_core_gen6_3_26_0.base, &adreno_gpu_core_a702.base, &adreno_gpu_core_gen7_0_0.base, &adreno_gpu_core_gen7_0_1.base, diff --git a/adreno.h b/adreno.h index 697cd038cc..12a8aedfe1 100644 --- a/adreno.h +++ b/adreno.h @@ -232,11 +232,12 @@ enum adreno_gpurev { ADRENO_REV_A680 = 680, ADRENO_REV_A702 = 702, /* - * Gen7 and higher version numbers may exceed 1 digit + * Version numbers may exceed 1 digit * Bits 16-23: Major * Bits 8-15: Minor * Bits 0-7: Patch id */ + ADRENO_REV_GEN6_3_26_0 = ADRENO_GPUREV_VALUE(3, 26, 0), ADRENO_REV_GEN7_0_0 = ADRENO_GPUREV_VALUE(7, 0, 0), ADRENO_REV_GEN7_0_1 = ADRENO_GPUREV_VALUE(7, 0, 1), ADRENO_REV_GEN7_2_0 = ADRENO_GPUREV_VALUE(7, 2, 0), @@ -1192,8 +1193,9 @@ static inline int adreno_is_a505_or_a506(struct adreno_device *adreno_dev) static inline int adreno_is_a6xx(struct adreno_device *adreno_dev) { - return ADRENO_GPUREV(adreno_dev) >= 600 && - ADRENO_GPUREV(adreno_dev) <= 702; + return (ADRENO_GPUREV(adreno_dev) >= 600 && + ADRENO_GPUREV(adreno_dev) <= 702) || + ADRENO_GPUREV(adreno_dev) == ADRENO_REV_GEN6_3_26_0; } static inline int adreno_is_a660_shima(struct adreno_device *adreno_dev) @@ -1217,6 +1219,7 @@ ADRENO_TARGET(a640, ADRENO_REV_A640) ADRENO_TARGET(a650, ADRENO_REV_A650) ADRENO_TARGET(a663, ADRENO_REV_A663) ADRENO_TARGET(a680, ADRENO_REV_A680) +ADRENO_TARGET(gen6_3_26_0, ADRENO_REV_GEN6_3_26_0) ADRENO_TARGET(a702, ADRENO_REV_A702) /* A635 is derived from A660 and shares same logic */ @@ -1288,6 +1291,13 @@ static inline int adreno_is_a610_family(struct adreno_device *adreno_dev) return (rev == ADRENO_REV_A610 || rev == ADRENO_REV_A611); } +static inline int adreno_is_a612_family(struct adreno_device *adreno_dev) +{ + unsigned int rev = ADRENO_GPUREV(adreno_dev); + + return (rev == ADRENO_REV_A612 || rev == ADRENO_REV_GEN6_3_26_0); +} + static inline int adreno_is_a640v2(struct adreno_device *adreno_dev) { return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A640) && diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 420c72c3fa..b2828fb52e 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -200,7 +200,8 @@ int a6xx_init(struct adreno_device *adreno_dev) if (of_fdt_get_ddrtype() == 0x7) { if (adreno_is_a660_shima(adreno_dev) || adreno_is_a635(adreno_dev) || - adreno_is_a662(adreno_dev)) + adreno_is_a662(adreno_dev) || + adreno_is_gen6_3_26_0(adreno_dev)) adreno_dev->highest_bank_bit = 14; else if ((adreno_is_a650(adreno_dev) || adreno_is_a660(adreno_dev))) @@ -275,7 +276,7 @@ __get_rbbm_clock_cntl_on(struct adreno_device *adreno_dev) { if (adreno_is_a630(adreno_dev)) return 0x8AA8AA02; - else if (adreno_is_a612(adreno_dev) || adreno_is_a610_family(adreno_dev)) + else if (adreno_is_a612_family(adreno_dev) || adreno_is_a610_family(adreno_dev)) return 0xAAA8AA82; else if (adreno_is_a702(adreno_dev)) return 0xAAAAAA82; @@ -365,6 +366,22 @@ static void a6xx_hwcg_set(struct adreno_device *adreno_dev, bool on) kgsl_regread(device, A6XX_RBBM_CLOCK_CNTL, &value); + /* + * GBIF L2 CGC control is not part of the UCHE and is enabled by + * default. Hence modify the register when CGC disabled is + * requested. + * Note: The below programming will need modification in case + * of change in the register reset value in future. + */ + if (!on) + kgsl_regrmw(device, A6XX_UCHE_GBIF_GX_CONFIG, GENMASK(18, 16), + FIELD_PREP(GENMASK(18, 16), 0)); + + /* Recommended to always disable GBIF_CX_CONFIG for gen6_3_26_0 */ + if (adreno_is_gen6_3_26_0(adreno_dev)) + kgsl_regrmw(device, A6XX_GBIF_CX_CONFIG, GENMASK(18, 16), + FIELD_PREP(GENMASK(18, 16), 0)); + if (value == __get_rbbm_clock_cntl_on(adreno_dev) && on) return; @@ -377,7 +394,7 @@ static void a6xx_hwcg_set(struct adreno_device *adreno_dev, bool on) * Hence skip GMU_GX registers for A12 and A610. */ - if (gmu_core_isenabled(device) && !adreno_is_a612(adreno_dev) && + if (gmu_core_isenabled(device) && !adreno_is_a612_family(adreno_dev) && !adreno_is_a610_family(adreno_dev) && !adreno_is_a702(adreno_dev)) gmu_core_regrmw(device, A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); @@ -388,16 +405,12 @@ static void a6xx_hwcg_set(struct adreno_device *adreno_dev, bool on) kgsl_regwrite(device, a6xx_core->hwcg[i].offset, on ? a6xx_core->hwcg[i].val : 0); - /* GBIF L2 CGC control is not part of the UCHE */ - kgsl_regrmw(device, A6XX_UCHE_GBIF_GX_CONFIG, 0x70000, - FIELD_PREP(GENMASK(18, 16), on ? 2 : 0)); - /* * Enable SP clock after programming HWCG registers. * A612 and A610 GPU is not having the GX power domain. * Hence skip GMU_GX registers for A612. */ - if (gmu_core_isenabled(device) && !adreno_is_a612(adreno_dev) && + if (gmu_core_isenabled(device) && !adreno_is_a612_family(adreno_dev) && !adreno_is_a610_family(adreno_dev) && !adreno_is_a702(adreno_dev)) gmu_core_regrmw(device, A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1); @@ -620,14 +633,13 @@ void a6xx_start(struct adreno_device *adreno_dev) kgsl_regwrite(device, A6XX_UCHE_CACHE_WAYS, 0x4); /* ROQ sizes are twice as big on a640/a680 than on a630 */ - if ((ADRENO_GPUREV(adreno_dev) >= ADRENO_REV_A640) && - !adreno_is_a702(adreno_dev)) { - kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140); - kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362C); - } else if (adreno_is_a612(adreno_dev) || adreno_is_a610_family(adreno_dev) || - adreno_is_a702(adreno_dev)) { + if (adreno_is_a612_family(adreno_dev) || adreno_is_a610_family(adreno_dev) || + adreno_is_a702(adreno_dev)) { kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060); kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16); + } else if (ADRENO_GPUREV(adreno_dev) >= ADRENO_REV_A640) { + kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140); + kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362C); } else { kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x010000C0); kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362C); @@ -642,8 +654,8 @@ void a6xx_start(struct adreno_device *adreno_dev) kgsl_regwrite(device, A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020); } - if (adreno_is_a612(adreno_dev) || adreno_is_a610_family(adreno_dev)) { - /* For A612 and A610 Mem pool size is reduced to 48 */ + if (adreno_is_a612_family(adreno_dev) || adreno_is_a610_family(adreno_dev)) { + /* For A612, gen6_3_26_0 and A610 Mem pool size is reduced to 48 */ kgsl_regwrite(device, A6XX_CP_MEM_POOL_SIZE, 48); kgsl_regwrite(device, A6XX_CP_MEM_POOL_DBG_ADDR, 47); } else if (adreno_is_a702(adreno_dev)) { diff --git a/adreno_a6xx_perfcounter.c b/adreno_a6xx_perfcounter.c index 5bf1bd6f05..b0eab70b50 100644 --- a/adreno_a6xx_perfcounter.c +++ b/adreno_a6xx_perfcounter.c @@ -985,7 +985,7 @@ static const struct adreno_perfcount_group a6xx_hwsched_perfcounter_groups NULL), }; -/* a610, a612, a616, a618 and a619 do not have the GMU registers. +/* a610, a612, gen6_3_26_0, a616, a618 and a619 do not have the GMU registers. * a605, a608, a615, a630, a640 and a680 don't have enough room in the * CP_PROTECT registers so the GMU counters are not accessible */ diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index dcff7d8b6a..5e73aff81d 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -50,6 +50,7 @@ static const char * const clocks[KGSL_MAX_CLKS] = { "ahb_clk", "smmu_vote", "apb_pclk", + "hub_cx_int_clk", }; static void kgsl_pwrctrl_clk(struct kgsl_device *device, bool state, diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index a567d0feca..4c59de95cc 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -12,7 +12,7 @@ /***************************************************************************** * power flags ****************************************************************************/ -#define KGSL_MAX_CLKS 18 +#define KGSL_MAX_CLKS 19 #define KGSL_MAX_PWRLEVELS 32 From 90e1c38c7cd1ca79a125dedeeec47a60953b195f Mon Sep 17 00:00:00 2001 From: Puranam V G Tejaswi Date: Wed, 1 Nov 2023 22:20:35 +0530 Subject: [PATCH 0884/1016] kgsl: gmu: Use upstream header file for regulator level macros RPMH_REGULATOR_LEVEL_LOW_SVS and RPMH_REGULATOR_LEVEL_SVS are also defined in upstream header dt-bindings/power/qcom-rpmpd.h. Use this file instead of a downstream header file. This helps to avoid compilation issues when kgsl is compiled with upstream kernel. Change-Id: I05d4cea6b8919d310e04b98d8133e978d8aeee8d Signed-off-by: Puranam V G Tejaswi --- adreno_a6xx_gmu.c | 2 +- adreno_gen7_gmu.c | 2 +- adreno_gen8_gmu.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 436821424c..523cd77842 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -4,7 +4,7 @@ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ -#include +#include #include #include #include diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index b47ca1028b..afd115802d 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -4,7 +4,7 @@ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ -#include +#include #include #include #include diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index dfde921bcd..48ccfbb612 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -4,7 +4,7 @@ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ -#include +#include #include #include #include From d7bcc0ee7a479137c4c1c95a0381e61b3ac869d0 Mon Sep 17 00:00:00 2001 From: Puranam V G Tejaswi Date: Wed, 11 Oct 2023 18:28:00 +0530 Subject: [PATCH 0885/1016] kgsl: Include minidump.h only when minidump config is enabled The header file minidump.h can be included only when the config CONFIG_QCOM_VA_MINIDUMP is enabled. Change-Id: Iaa723e3a96622135e295c449bffeffebd0b19a12 Signed-off-by: Puranam V G Tejaswi --- kgsl_util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kgsl_util.c b/kgsl_util.c index 718171e7bc..5fedfbac7b 100644 --- a/kgsl_util.c +++ b/kgsl_util.c @@ -23,7 +23,6 @@ #include #include #include -#include #include "adreno.h" #include "kgsl_util.h" @@ -226,6 +225,8 @@ void kgsl_hwunlock(struct cpu_gpu_lock *lock) } #if IS_ENABLED(CONFIG_QCOM_VA_MINIDUMP) +#include + void kgsl_add_to_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size) { struct md_region md_entry = {0}; From d5188a1ea8381654d5d4944a163b1c4b3789ae52 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 25 Mar 2024 12:33:31 -0700 Subject: [PATCH 0886/1016] kgsl: gen8: Enable L1A preemption for gen8_0_x Enable L1A preemption on gen8_0_x GPU to run multiple software contexts concurrently. Change-Id: I8bda38ad8306eb613c2d1545f6e8522652aea1eb Signed-off-by: Hareesh Gundu Signed-off-by: Carter Cooper --- adreno-gpulist.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 1d36095fda..75e6570640 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2910,6 +2910,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), .noc_timeout_us = 3410, /* 3.41 msec */ .ctxt_record_size = (13536 * SZ_1K), + .preempt_level = 1, }; static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { @@ -2949,6 +2950,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), .noc_timeout_us = 3410, /* 3.41 msec */ .ctxt_record_size = (13536 * SZ_1K), + .preempt_level = 1, }; /* GEN8_4_0 noncontext register list */ From 78ce641bd4f127b9eb63b6d9be53e3f0c162df7d Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 23 Aug 2024 11:50:53 +0530 Subject: [PATCH 0887/1016] kgsl: pwrctrl: Remove QOS constraint update for Tj mitigation KGSL is directly registered as a thermal cooling device. Currently, it updates the QOS constraint for Tj mitigation. Since KGSL handles thermal requests directly, there is no need to update QOS constraint. Remove this to avoid duplicate tracking. Change-Id: Ia777ed0f827e4ddc1fd3d0e375957b9f0c32a4e0 Signed-off-by: Kamal Agrawal --- kgsl_pwrctrl.c | 12 ------------ kgsl_pwrctrl.h | 2 -- 2 files changed, 14 deletions(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 8bea9980e9..98395ece72 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1846,7 +1846,6 @@ static int kgsl_cooling_set_cur_state(struct thermal_cooling_device *cooling_dev mutex_unlock(&device->mutex); - queue_work(kgsl_driver.workqueue, &pwr->cooling_ws); return 0; } @@ -1856,15 +1855,6 @@ static const struct thermal_cooling_device_ops kgsl_cooling_ops = { .set_cur_state = kgsl_cooling_set_cur_state, }; -static void do_pmqos_update(struct work_struct *work) -{ - struct kgsl_pwrctrl *pwr = container_of(work, struct kgsl_pwrctrl, cooling_ws); - u32 thermal_pwrlevel = READ_ONCE(pwr->thermal_pwrlevel); - u32 freq = pwr->pwrlevels[thermal_pwrlevel].gpu_freq; - - dev_pm_qos_update_request(&pwr->pmqos_max_freq, DIV_ROUND_UP(freq, HZ_PER_KHZ)); -} - static int register_thermal_cooling_device(struct kgsl_device *device, struct device_node *np) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; @@ -1876,8 +1866,6 @@ static int register_thermal_cooling_device(struct kgsl_device *device, struct de if (ret) goto err; - INIT_WORK(&pwr->cooling_ws, do_pmqos_update); - pwr->cooling_dev = thermal_of_cooling_device_register(np, name, device, &kgsl_cooling_ops); if (IS_ERR(pwr->cooling_dev)) { diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index a567d0feca..ba7fb8764f 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -178,8 +178,6 @@ struct kgsl_pwrctrl { struct thermal_cooling_device *cooling_dev; /* pmqos_max_freq: Handle to raise PMQOS MAX FREQUENCY request */ struct dev_pm_qos_request pmqos_max_freq; - /* cooling_ws: Work which updates PMQOS during thermal event */ - struct work_struct cooling_ws; /** @time_in_pwrlevel: Each pwrlevel active duration in usec */ u64 time_in_pwrlevel[KGSL_MAX_PWRLEVELS]; /** @last_stat_updated: The last time stats were updated */ From cb24bdddaeb4a5cf07d8611bc63c004982012f29 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 22 Aug 2024 09:58:19 +0530 Subject: [PATCH 0888/1016] kgsl: pwrctrl: Aggregate PM QOS and thermal requests properly Consider a scenario where there is no thermal constraint (i.e., thermal_pwrlevel = 0), but there is a constraint imposed by sysfs through PM QOS. Once the PM QOS constraint is removed, KGSL ignores this request because the PM QOS max power level is same as the thermal constraint. Remove this condition and aggregate the constraints in existing _adjust_pwrlevel() function. Change-Id: I41a48586daf24e23240663d06617f1dac7e22d65 Signed-off-by: Kamal Agrawal --- kgsl_pwrctrl.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 98395ece72..b6fff703ab 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1765,7 +1765,6 @@ static int pmqos_max_notifier_call(struct notifier_block *nb, unsigned long val, struct kgsl_device *device = container_of(pwr, struct kgsl_device, pwrctrl); u32 max_freq = val * 1000; int level; - u32 thermal_pwrlevel; if (!device->pwrscale.devfreq_enabled) return NOTIFY_DONE; @@ -1784,14 +1783,6 @@ static int pmqos_max_notifier_call(struct notifier_block *nb, unsigned long val, pwr->pmqos_max_pwrlevel = level; - /* - * Since thermal constraint is already applied prior to this, if qos constraint is same as - * thermal constraint, we can return early here. - */ - thermal_pwrlevel = READ_ONCE(pwr->thermal_pwrlevel); - if (pwr->pmqos_max_pwrlevel == thermal_pwrlevel) - return NOTIFY_OK; - trace_kgsl_thermal_constraint(max_freq); mutex_lock(&device->mutex); From 10b75cce131d55a5bd43185c5a1c6019aeff91d7 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 22 Aug 2024 15:41:01 +0530 Subject: [PATCH 0889/1016] kgsl: gmu: Add sanity checks in gmu_core_set_vrb_register Add sanity checks in the gmu_core_set_vrb_register() to ensure the validity of the vrb pointer and the index value. Change-Id: I9b4370faec0edd70bfe79238f6b8e3d501ffbc19 Signed-off-by: Kamal Agrawal --- adreno_a6xx_hwsched.c | 11 +++++++---- adreno_gen7_hwsched.c | 11 +++++++---- adreno_gen8_hwsched.c | 18 ++++++++++++------ kgsl_gmu_core.c | 17 ++++++++++++++--- kgsl_gmu_core.h | 6 ++++-- 5 files changed, 44 insertions(+), 19 deletions(-) diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index d1f566847c..1baddf4aca 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -438,6 +438,7 @@ static void hwsched_idle_timer(struct timer_list *t) static int a6xx_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + int ret; /* GMU Virtual register bank */ if (IS_ERR_OR_NULL(gmu->vrb)) { @@ -448,8 +449,9 @@ static int a6xx_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) return PTR_ERR(gmu->vrb); /* Populate size of the virtual register bank */ - gmu_core_set_vrb_register(gmu->vrb->hostptr, VRB_SIZE_IDX, - gmu->vrb->size >> 2); + ret = gmu_core_set_vrb_register(gmu->vrb, VRB_SIZE_IDX, gmu->vrb->size >> 2); + if (ret) + return ret; } /* GMU trace log */ @@ -461,9 +463,10 @@ static int a6xx_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) return PTR_ERR(gmu->trace.md); /* Pass trace buffer address to GMU through the VRB */ - gmu_core_set_vrb_register(gmu->vrb->hostptr, - VRB_TRACE_BUFFER_ADDR_IDX, + ret = gmu_core_set_vrb_register(gmu->vrb, VRB_TRACE_BUFFER_ADDR_IDX, gmu->trace.md->gmuaddr); + if (ret) + return ret; /* Initialize the GMU trace buffer header */ gmu_core_trace_header_init(&gmu->trace); diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 93d0a3b670..ff9065d9d2 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -595,6 +595,7 @@ static int gen7_gmu_warmboot_init(struct adreno_device *adreno_dev) static int gen7_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; /* GMU Virtual register bank */ if (IS_ERR_OR_NULL(gmu->vrb)) { @@ -605,8 +606,9 @@ static int gen7_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) return PTR_ERR(gmu->vrb); /* Populate size of the virtual register bank */ - gmu_core_set_vrb_register(gmu->vrb->hostptr, VRB_SIZE_IDX, - gmu->vrb->size >> 2); + ret = gmu_core_set_vrb_register(gmu->vrb, VRB_SIZE_IDX, gmu->vrb->size >> 2); + if (ret) + return ret; } /* GMU trace log */ @@ -618,9 +620,10 @@ static int gen7_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) return PTR_ERR(gmu->trace.md); /* Pass trace buffer address to GMU through the VRB */ - gmu_core_set_vrb_register(gmu->vrb->hostptr, - VRB_TRACE_BUFFER_ADDR_IDX, + ret = gmu_core_set_vrb_register(gmu->vrb, VRB_TRACE_BUFFER_ADDR_IDX, gmu->trace.md->gmuaddr); + if (ret) + return ret; /* Initialize the GMU trace buffer header */ gmu_core_trace_header_init(&gmu->trace); diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 1c78e36bf1..63f78b11a2 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -195,8 +195,11 @@ static void _get_hw_fence_entries(struct adreno_device *adreno_dev) of_node_put(node); - gmu_core_set_vrb_register(gmu->vrb->hostptr, VRB_HW_FENCE_SHADOW_NUM_ENTRIES, - shadow_num_entries); + /* + * The return value is ignored as it does not need to be returned to the caller. + * Any errors are logged within the VRB set API if a failure occurs. + */ + gmu_core_set_vrb_register(gmu->vrb, VRB_HW_FENCE_SHADOW_NUM_ENTRIES, shadow_num_entries); } static void gen8_hwsched_soccp_vote_init(struct adreno_device *adreno_dev) @@ -680,6 +683,7 @@ static int gen8_gmu_warmboot_init(struct adreno_device *adreno_dev) static int gen8_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + int ret; /* GMU Virtual register bank */ if (IS_ERR_OR_NULL(gmu->vrb)) { @@ -690,8 +694,9 @@ static int gen8_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) return PTR_ERR(gmu->vrb); /* Populate size of the virtual register bank */ - gmu_core_set_vrb_register(gmu->vrb->hostptr, VRB_SIZE_IDX, - gmu->vrb->size >> 2); + ret = gmu_core_set_vrb_register(gmu->vrb, VRB_SIZE_IDX, gmu->vrb->size >> 2); + if (ret) + return ret; } /* GMU trace log */ @@ -703,9 +708,10 @@ static int gen8_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) return PTR_ERR(gmu->trace.md); /* Pass trace buffer address to GMU through the VRB */ - gmu_core_set_vrb_register(gmu->vrb->hostptr, - VRB_TRACE_BUFFER_ADDR_IDX, + ret = gmu_core_set_vrb_register(gmu->vrb, VRB_TRACE_BUFFER_ADDR_IDX, gmu->trace.md->gmuaddr); + if (ret) + return ret; /* Initialize the GMU trace buffer header */ gmu_core_trace_header_init(&gmu->trace); diff --git a/kgsl_gmu_core.c b/kgsl_gmu_core.c index 64bb616616..2f7906de3d 100644 --- a/kgsl_gmu_core.c +++ b/kgsl_gmu_core.c @@ -220,14 +220,25 @@ void gmu_core_dev_force_first_boot(struct kgsl_device *device) return ops->force_first_boot(device); } -void gmu_core_set_vrb_register(void *ptr, u32 index, u32 val) +int gmu_core_set_vrb_register(struct kgsl_memdesc *vrb, u32 index, u32 val) { - u32 *vrb = ptr; + u32 *vrb_buf; - vrb[index] = val; + if (WARN_ON(IS_ERR_OR_NULL(vrb))) + return -ENODEV; + + if (WARN_ON(index >= (vrb->size >> 2))) { + pr_err("kgsl: Unable to set VRB register for index %u\n", index); + return -EINVAL; + } + + vrb_buf = vrb->hostptr; + vrb_buf[index] = val; /* Make sure the vrb write is posted before moving ahead */ wmb(); + + return 0; } static void stream_trace_data(struct gmu_trace_packet *pkt) diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 11cb4ffd0a..25bffb2de1 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -539,11 +539,13 @@ void gmu_core_dev_force_first_boot(struct kgsl_device *device); /** * gmu_core_set_vrb_register - set vrb register value at specified index - * @ptr: vrb host pointer + * @vrb: GMU virtual register bank memory * @index: vrb index to write the value * @val: value to be writen into vrb + * + * Return: Negative error on failure and zero on success. */ -void gmu_core_set_vrb_register(void *ptr, u32 index, u32 val); +int gmu_core_set_vrb_register(struct kgsl_memdesc *vrb, u32 index, u32 val); /** * gmu_core_process_trace_data - Process gmu trace buffer data writes to default linux trace buffer From 4defc08550e24b29ebace33884f4557fa5bc4b41 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Mon, 19 Aug 2024 19:43:25 +0530 Subject: [PATCH 0890/1016] kgsl: gen8: Read preemption count from VRB Currently, the preemption count is displayed as 0 for hardware scheduler targets when GPU is inactive. However, it displays proper value for software scheduler. Fix this by reading the preemption count from VRB. While at it, update VRB enum as well. Change-Id: Iae7cefb0458ffe1e7ff3ee1c239a0666820d7278 Signed-off-by: Kamal Agrawal --- adreno_gen8_hwsched_hfi.c | 8 +++++++- kgsl_gmu_core.c | 18 ++++++++++++++++++ kgsl_gmu_core.h | 14 ++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 3d01fd438b..f0cfa72d66 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -3682,11 +3682,17 @@ void gen8_hwsched_context_detach(struct adreno_context *drawctxt) u32 gen8_hwsched_preempt_count_get(struct adreno_device *adreno_dev) { + struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret, preempt_count = 0; - if (device->state != KGSL_STATE_ACTIVE) + ret = gmu_core_get_vrb_register(gmu->vrb, VRB_PREEMPT_COUNT_TOTAL, &preempt_count); + if (ret) return 0; + if ((preempt_count != 0) || (device->state != KGSL_STATE_ACTIVE)) + return preempt_count; + return gen8_hwsched_hfi_get_value(adreno_dev, HFI_VALUE_PREEMPT_COUNT); } diff --git a/kgsl_gmu_core.c b/kgsl_gmu_core.c index 2f7906de3d..3a5ae869ae 100644 --- a/kgsl_gmu_core.c +++ b/kgsl_gmu_core.c @@ -241,6 +241,24 @@ int gmu_core_set_vrb_register(struct kgsl_memdesc *vrb, u32 index, u32 val) return 0; } +int gmu_core_get_vrb_register(struct kgsl_memdesc *vrb, u32 index, u32 *val) +{ + u32 *vrb_buf; + + if (IS_ERR_OR_NULL(vrb)) + return -ENODEV; + + if (WARN_ON(index >= (vrb->size >> 2))) { + pr_err("kgsl: Unable to get VRB register for index %u\n", index); + return -EINVAL; + } + + vrb_buf = vrb->hostptr; + *val = vrb_buf[index]; + + return 0; +} + static void stream_trace_data(struct gmu_trace_packet *pkt) { switch (pkt->trace_id) { diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 25bffb2de1..d330cefa69 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -185,6 +185,10 @@ enum gmu_vrb_idx { VRB_TRACE_BUFFER_ADDR_IDX = 2, /* Contains the number of hw fence shadow table entries */ VRB_HW_FENCE_SHADOW_NUM_ENTRIES = 3, + /* Contains OpenCL no fault tolerance timeout in ms */ + VRB_CL_NO_FT_TIMEOUT = 4, + /* Contains the total number of GPU preemptions */ + VRB_PREEMPT_COUNT_TOTAL = 5, }; /* For GMU Trace */ @@ -547,6 +551,16 @@ void gmu_core_dev_force_first_boot(struct kgsl_device *device); */ int gmu_core_set_vrb_register(struct kgsl_memdesc *vrb, u32 index, u32 val); +/** + * gmu_core_get_vrb_register - get vrb register value at specified index + * @vrb: GMU virtual register bank memory + * @index: vrb index to write the value + * @val: Pointer to update the data after reading from vrb + * + * Return: Negative error on failure and zero on success. + */ +int gmu_core_get_vrb_register(struct kgsl_memdesc *vrb, u32 index, u32 *val); + /** * gmu_core_process_trace_data - Process gmu trace buffer data writes to default linux trace buffer * @device: Pointer to KGSL device From 949bc75ce60244f96dc789492784ca6b26f81342 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 28 Aug 2024 22:08:14 +0530 Subject: [PATCH 0891/1016] kgsl: Remove unnecessary mutex in ioctl to read context timestamp KGSL device mutex is not required while reading context timestamp in IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID ioctl. Remove this to reduce device mutex contention. Change-Id: I6c774654b31a1f0f91edb095ae71932ab2bae979 Signed-off-by: Kamal Agrawal --- kgsl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kgsl.c b/kgsl.c index da8ae3ec85..3dfcca0826 100644 --- a/kgsl.c +++ b/kgsl.c @@ -2582,7 +2582,6 @@ long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private struct kgsl_context *context; long result = -EINVAL; - mutex_lock(&device->mutex); context = kgsl_context_get_owner(dev_priv, param->context_id); if (context) { @@ -2594,7 +2593,6 @@ long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private } kgsl_context_put(context); - mutex_unlock(&device->mutex); return result; } From f3e493983f36d6192b6bb7b0e3e3cfa4ef500569 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 29 Aug 2024 07:49:42 +0530 Subject: [PATCH 0892/1016] kgsl: coresight: Remove explicit pre-hardware access validation kgsl_reg* APIs handle pre-hardware access validation internally. Thus, remove explicit check from _adreno_coresight_get_and_clear(). Change-Id: I9f3fa58612983448c9b2539f5ee50062af076fdc Signed-off-by: Kamal Agrawal --- adreno_coresight.c | 1 - 1 file changed, 1 deletion(-) diff --git a/adreno_coresight.c b/adreno_coresight.c index 6740b44107..684e541d59 100644 --- a/adreno_coresight.c +++ b/adreno_coresight.c @@ -116,7 +116,6 @@ static void _adreno_coresight_get_and_clear(struct adreno_device *adreno_dev, if (IS_ERR_OR_NULL(adreno_csdev->dev) || !adreno_csdev->enabled) return; - kgsl_pre_hwaccess(device); /* * Save the current value of each coresight register * and then clear each register From c6efb6730171553b7855d6f299326e7413f8859b Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 29 Aug 2024 08:50:30 +0530 Subject: [PATCH 0893/1016] kgsl: Make default vote setup function generation agnostic The logic for setting up the default vote is independent of gpu generation. Thus, move it to generation agnostic file to reduce code duplication and improve maintainability. Change-Id: I786f73c5de69fb3e9dfe448af83223f0b1432df9 Signed-off-by: Kamal Agrawal --- adreno_a6xx_hfi.c | 8 +------- adreno_a6xx_hwsched_hfi.c | 10 +--------- adreno_a6xx_rgmu.c | 7 +------ adreno_gen7_hfi.c | 8 +------- adreno_gen7_hwsched_hfi.c | 18 ++---------------- adreno_gen8_hfi.c | 8 +------- adreno_gen8_hwsched_hfi.c | 18 ++---------------- kgsl_pwrctrl.c | 15 ++++++++++++++- kgsl_pwrctrl.h | 10 +++++++++- 9 files changed, 32 insertions(+), 70 deletions(-) diff --git a/adreno_a6xx_hfi.c b/adreno_a6xx_hfi.c index 37367a305f..b4e408b6c2 100644 --- a/adreno_a6xx_hfi.c +++ b/adreno_a6xx_hfi.c @@ -740,13 +740,7 @@ int a6xx_hfi_start(struct adreno_device *adreno_dev) set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); - /* Request default DCVS level */ - result = kgsl_pwrctrl_set_default_gpu_pwrlevel(device); - if (result) - goto err; - - /* Request default BW vote */ - result = kgsl_pwrctrl_axi(device, true); + result = kgsl_pwrctrl_setup_default_votes(device); err: if (result) diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 0a23cc2936..20e10503a3 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -1285,15 +1285,7 @@ int a6xx_hwsched_hfi_start(struct adreno_device *adreno_dev) set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); - /* Request default DCVS level */ - ret = kgsl_pwrctrl_set_default_gpu_pwrlevel(device); - if (ret) - goto err; - - /* Request default BW vote */ - ret = kgsl_pwrctrl_axi(device, true); - if (ret) - goto err; + ret = kgsl_pwrctrl_setup_default_votes(device); err: if (ret) diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index f5f8a94471..b4c37ce3a0 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -796,12 +796,7 @@ static int a6xx_rgmu_boot(struct adreno_device *adreno_dev) if (ret) goto err; - /* Request default DCVS level */ - ret = kgsl_pwrctrl_set_default_gpu_pwrlevel(device); - if (ret) - goto err; - - ret = kgsl_pwrctrl_axi(device, true); + ret = kgsl_pwrctrl_setup_default_votes(device); if (ret) goto err; diff --git a/adreno_gen7_hfi.c b/adreno_gen7_hfi.c index 39adc35b65..b384456192 100644 --- a/adreno_gen7_hfi.c +++ b/adreno_gen7_hfi.c @@ -772,13 +772,7 @@ int gen7_hfi_start(struct adreno_device *adreno_dev) set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); - /* Request default DCVS level */ - result = kgsl_pwrctrl_set_default_gpu_pwrlevel(device); - if (result) - goto err; - - /* Request default BW vote */ - result = kgsl_pwrctrl_axi(device, true); + result = kgsl_pwrctrl_setup_default_votes(device); err: if (result) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 015eeff51c..2cbfe5ed16 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -2300,20 +2300,6 @@ int gen7_hwsched_boot_gpu(struct adreno_device *adreno_dev) return gen7_hwsched_coldboot_gpu(adreno_dev); } -static int gen7_hwsched_setup_default_votes(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - int ret = 0; - - /* Request default DCVS level */ - ret = kgsl_pwrctrl_set_default_gpu_pwrlevel(device); - if (ret) - return ret; - - /* Request default BW vote */ - return kgsl_pwrctrl_axi(device, true); -} - int gen7_hwsched_warmboot_init_gmu(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); @@ -2329,7 +2315,7 @@ int gen7_hwsched_warmboot_init_gmu(struct adreno_device *adreno_dev) set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); - ret = gen7_hwsched_setup_default_votes(adreno_dev); + ret = kgsl_pwrctrl_setup_default_votes(KGSL_DEVICE(adreno_dev)); err: if (ret) { @@ -2511,7 +2497,7 @@ int gen7_hwsched_hfi_start(struct adreno_device *adreno_dev) if (adreno_dev->warmboot_enabled) set_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags); - ret = gen7_hwsched_setup_default_votes(adreno_dev); + ret = kgsl_pwrctrl_setup_default_votes(KGSL_DEVICE(adreno_dev)); err: if (ret) diff --git a/adreno_gen8_hfi.c b/adreno_gen8_hfi.c index 1a79a9d1d3..80478f4ed4 100644 --- a/adreno_gen8_hfi.c +++ b/adreno_gen8_hfi.c @@ -782,13 +782,7 @@ int gen8_hfi_start(struct adreno_device *adreno_dev) set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); - /* Request default DCVS level */ - result = kgsl_pwrctrl_set_default_gpu_pwrlevel(device); - if (result) - goto err; - - /* Request default BW vote */ - result = kgsl_pwrctrl_axi(device, true); + result = kgsl_pwrctrl_setup_default_votes(device); err: if (result) diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 3d01fd438b..d67be92e7d 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -2155,20 +2155,6 @@ int gen8_hwsched_boot_gpu(struct adreno_device *adreno_dev) return gen8_hwsched_coldboot_gpu(adreno_dev); } -static int gen8_hwsched_setup_default_votes(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - int ret = 0; - - /* Request default DCVS level */ - ret = kgsl_pwrctrl_set_default_gpu_pwrlevel(device); - if (ret) - return ret; - - /* Request default BW vote */ - return kgsl_pwrctrl_axi(device, true); -} - int gen8_hwsched_warmboot_init_gmu(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); @@ -2184,7 +2170,7 @@ int gen8_hwsched_warmboot_init_gmu(struct adreno_device *adreno_dev) set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); - ret = gen8_hwsched_setup_default_votes(adreno_dev); + ret = kgsl_pwrctrl_setup_default_votes(KGSL_DEVICE(adreno_dev)); err: if (ret) { @@ -2355,7 +2341,7 @@ int gen8_hwsched_hfi_start(struct adreno_device *adreno_dev) if (adreno_dev->warmboot_enabled) set_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags); - ret = gen8_hwsched_setup_default_votes(adreno_dev); + ret = kgsl_pwrctrl_setup_default_votes(KGSL_DEVICE(adreno_dev)); err: if (ret) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 8bea9980e9..d67e7a0a93 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -2535,7 +2535,7 @@ int kgsl_active_count_wait(struct kgsl_device *device, int count, * kgsl_pwrctrl_set_default_gpu_pwrlevel() - Set GPU to default power level * @device: Pointer to the kgsl_device struct */ -int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device) +static int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; unsigned int new_level = pwr->default_pwrlevel; @@ -2554,6 +2554,19 @@ int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device) return device->ftbl->gpu_clock_set(device, pwr->active_pwrlevel); } +int kgsl_pwrctrl_setup_default_votes(struct kgsl_device *device) +{ + int ret; + + /* Request default DCVS level */ + ret = kgsl_pwrctrl_set_default_gpu_pwrlevel(device); + if (ret) + return ret; + + /* Request default BW vote */ + return kgsl_pwrctrl_axi(device, true); +} + u32 kgsl_pwrctrl_get_acv_perfmode_lvl(struct kgsl_device *device, u32 ddr_freq) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index a567d0feca..cd7632e66b 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -246,7 +246,6 @@ void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy); */ void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, struct kgsl_pwr_constraint *pwrc, u32 id, u32 ts); -int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device); /** * kgsl_pwrctrl_request_state - Request a specific power state @@ -345,4 +344,13 @@ int kgsl_pwrctrl_probe_gdscs(struct kgsl_device *device, struct platform_device * Return: DDR vote level from where GPU should vote for performance mode */ u32 kgsl_pwrctrl_get_acv_perfmode_lvl(struct kgsl_device *device, u32 ddr_freq); + +/** + * kgsl_pwrctrl_setup_default_votes - Set up default power level and bandwidth vote + * @device: Pointer to the kgsl device + * + * Return: 0 on success or a negative error code on failure. + */ +int kgsl_pwrctrl_setup_default_votes(struct kgsl_device *device); + #endif /* __KGSL_PWRCTRL_H */ From 9cdb975b2b88387b07a3a2d08d59da19743af02c Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 29 Aug 2024 09:28:54 +0530 Subject: [PATCH 0894/1016] kgsl: pwrscale: Make busmon and devfreq functions static Busmon and devfreq functions are used within kgsl_pwrscale.c file only. Therefore, make these functions static and remove their declarations from kgsl_pwrscale.h. Change-Id: I9548c7a36e42ff927140f35f0c1e73ed1585dab2 Signed-off-by: Kamal Agrawal --- kgsl_pwrscale.c | 12 ++++++------ kgsl_pwrscale.h | 12 +----------- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index 69e7f16c3e..f713357be5 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -230,7 +230,7 @@ void kgsl_pwrscale_enable(struct kgsl_device *device) * thereby not recommending anything above the constraint. * This function expects the device mutex to be unlocked. */ -int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) +static int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) { struct kgsl_device *device = dev_get_drvdata(dev); struct kgsl_pwrctrl *pwr; @@ -290,7 +290,7 @@ int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) * * This function expects the device mutex to be unlocked. */ -int kgsl_devfreq_get_dev_status(struct device *dev, +static int kgsl_devfreq_get_dev_status(struct device *dev, struct devfreq_dev_status *stat) { struct kgsl_device *device = dev_get_drvdata(dev); @@ -368,7 +368,7 @@ int kgsl_devfreq_get_dev_status(struct device *dev, * * This function expects the device mutex to be unlocked. */ -int kgsl_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) +static int kgsl_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) { struct kgsl_device *device = dev_get_drvdata(dev); struct kgsl_pwrscale *pwrscale = &device->pwrscale; @@ -407,7 +407,7 @@ int kgsl_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) * * This function expects the device mutex to be unlocked. */ -int kgsl_busmon_get_dev_status(struct device *dev, +static int kgsl_busmon_get_dev_status(struct device *dev, struct devfreq_dev_status *stat) { struct xstats *b; @@ -454,7 +454,7 @@ static int _read_hint(u32 flags) * * This function expects the device mutex to be unlocked. */ -int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags) +static int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags) { struct kgsl_device *device = dev_get_drvdata(dev); struct kgsl_pwrctrl *pwr; @@ -527,7 +527,7 @@ int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags) return 0; } -int kgsl_busmon_get_cur_freq(struct device *dev, unsigned long *freq) +static int kgsl_busmon_get_cur_freq(struct device *dev, unsigned long *freq) { return 0; } diff --git a/kgsl_pwrscale.h b/kgsl_pwrscale.h index 271511d6cc..834efbbce7 100644 --- a/kgsl_pwrscale.h +++ b/kgsl_pwrscale.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_PWRSCALE_H @@ -92,16 +92,6 @@ void kgsl_pwrscale_wake(struct kgsl_device *device); void kgsl_pwrscale_enable(struct kgsl_device *device); void kgsl_pwrscale_disable(struct kgsl_device *device, bool turbo); -int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags); -int kgsl_devfreq_get_dev_status(struct device *dev, - struct devfreq_dev_status *stat); -int kgsl_devfreq_get_cur_freq(struct device *dev, unsigned long *freq); - -int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags); -int kgsl_busmon_get_dev_status(struct device *dev, - struct devfreq_dev_status *stat); -int kgsl_busmon_get_cur_freq(struct device *dev, unsigned long *freq); - #if IS_ENABLED(CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ) static inline int msm_adreno_tz_init(void) { From 64e11b45324580e29194cee031483693b95fde19 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 29 Aug 2024 09:43:44 +0530 Subject: [PATCH 0895/1016] kgsl: Remove unused cp_marker_type enum and set_marker function The adreno_cp_marker_type enum and the set_marker functions in adreno_gen*.h are no longer used. Remove these unused definitions and functions to clean up the code and reduce clutter. Change-Id: Ic06e771e4a58971af7ca958a1a69c7be85df5bf3 Signed-off-by: Kamal Agrawal --- adreno.h | 7 ------- adreno_a6xx.h | 3 --- adreno_gen7.h | 3 --- adreno_gen8.h | 2 -- 4 files changed, 15 deletions(-) diff --git a/adreno.h b/adreno.h index 697cd038cc..fb7b8aabc0 100644 --- a/adreno.h +++ b/adreno.h @@ -934,13 +934,6 @@ struct adreno_debugbus_block { unsigned int dwords; }; -enum adreno_cp_marker_type { - IFPC_DISABLE, - IFPC_ENABLE, - IB1LIST_START, - IB1LIST_END, -}; - struct adreno_gpudev { /* * These registers are in a different location on different devices, diff --git a/adreno_a6xx.h b/adreno_a6xx.h index 650611ccb5..90b6941545 100644 --- a/adreno_a6xx.h +++ b/adreno_a6xx.h @@ -232,9 +232,6 @@ u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, u32 *cmds); -unsigned int a6xx_set_marker(unsigned int *cmds, - enum adreno_cp_marker_type type); - void a6xx_preemption_callback(struct adreno_device *adreno_dev, int bit); void a6xx_preemption_context_destroy(struct kgsl_context *context); diff --git a/adreno_gen7.h b/adreno_gen7.h index 21c276af7f..2114fa3e17 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -264,9 +264,6 @@ u32 gen7_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, u32 *cmds); -unsigned int gen7_set_marker(unsigned int *cmds, - enum adreno_cp_marker_type type); - void gen7_preemption_callback(struct adreno_device *adreno_dev, int bit); void gen7_preemption_context_destroy(struct kgsl_context *context); diff --git a/adreno_gen8.h b/adreno_gen8.h index 706e9df884..306155d60d 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -300,8 +300,6 @@ u32 gen8_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, u32 *cmds); -u32 gen8_set_marker(u32 *cmds, enum adreno_cp_marker_type type); - void gen8_preemption_callback(struct adreno_device *adreno_dev, int bit); void gen8_preemption_context_destroy(struct kgsl_context *context); From f36ddd284d73099955df62cce9054b211aea52c8 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 29 Aug 2024 09:55:12 +0530 Subject: [PATCH 0896/1016] kgsl: pwrctrl: Make kgsl_pwrctrl_adjust_pwrlevel static kgsl_pwrctrl_adjust_pwrlevel() function is only used within kgsl_pwrctrl.c file. Therefore, make this function static and remove its declaration from kgsl_pwrctrl.h. Also, replace unsigned int with u32 in this function. Change-Id: I06b0bfa48642bf1b73091b9b836d1437cddde726 Signed-off-by: Kamal Agrawal --- kgsl_pwrctrl.c | 5 ++--- kgsl_pwrctrl.h | 3 --- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 8bea9980e9..dbe6357737 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -132,11 +132,10 @@ static void kgsl_pwrctrl_pwrlevel_change_settings(struct kgsl_device *device, * @device: Pointer to the kgsl_device struct * @new_level: Requested powerlevel, an index into the pwrlevel array */ -unsigned int kgsl_pwrctrl_adjust_pwrlevel(struct kgsl_device *device, - unsigned int new_level) +static u32 kgsl_pwrctrl_adjust_pwrlevel(struct kgsl_device *device, u32 new_level) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; - unsigned int old_level = pwr->active_pwrlevel; + u32 old_level = pwr->active_pwrlevel; bool reset = false; /* If a pwr constraint is expired, remove it */ diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index a567d0feca..33b962412d 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -205,9 +205,6 @@ void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device, int kgsl_pwrctrl_init_sysfs(struct kgsl_device *device); int kgsl_pwrctrl_change_state(struct kgsl_device *device, int state); -unsigned int kgsl_pwrctrl_adjust_pwrlevel(struct kgsl_device *device, - unsigned int new_level); - /* * kgsl_pwrctrl_active_freq - get currently configured frequency * @pwr: kgsl_pwrctrl structure for the device From cf59d41dca929b9739438581d7dd4c59dc181dec Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 29 Aug 2024 10:39:13 +0530 Subject: [PATCH 0897/1016] kgsl: Remove unused register definitions Several register macros in the adreno_regs enum are no longer used. Remove these macros and their definitions to clean up the code and reduce clutter. Change-Id: I2538343d9b8315f66bf5ad8c14f63eaf42a5b8e1 Signed-off-by: Kamal Agrawal --- adreno.h | 35 ----------------------------------- adreno_a5xx.c | 15 --------------- adreno_a6xx.c | 30 ------------------------------ 3 files changed, 80 deletions(-) diff --git a/adreno.h b/adreno.h index 697cd038cc..d2914f5146 100644 --- a/adreno.h +++ b/adreno.h @@ -857,13 +857,10 @@ struct adreno_submit_time { * and are indexed by the enumeration values defined in this enum */ enum adreno_regs { - ADRENO_REG_CP_ME_RAM_DATA, ADRENO_REG_CP_RB_BASE, ADRENO_REG_CP_RB_BASE_HI, ADRENO_REG_CP_LPAC_RB_BASE, ADRENO_REG_CP_LPAC_RB_BASE_HI, - ADRENO_REG_CP_RB_RPTR_ADDR_LO, - ADRENO_REG_CP_RB_RPTR_ADDR_HI, ADRENO_REG_CP_RB_RPTR, ADRENO_REG_CP_RB_WPTR, ADRENO_REG_CP_ME_CNTL, @@ -874,44 +871,12 @@ enum adreno_regs { ADRENO_REG_CP_IB2_BASE, ADRENO_REG_CP_IB2_BASE_HI, ADRENO_REG_CP_IB2_BUFSZ, - ADRENO_REG_CP_TIMESTAMP, - ADRENO_REG_CP_SCRATCH_REG6, - ADRENO_REG_CP_SCRATCH_REG7, - ADRENO_REG_CP_PROTECT_STATUS, - ADRENO_REG_CP_PREEMPT, - ADRENO_REG_CP_PREEMPT_DEBUG, - ADRENO_REG_CP_PREEMPT_DISABLE, - ADRENO_REG_CP_PROTECT_REG_0, - ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO, - ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI, - ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO, - ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI, - ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO, - ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI, - ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO, - ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI, - ADRENO_REG_CP_PREEMPT_LEVEL_STATUS, ADRENO_REG_RBBM_STATUS, ADRENO_REG_RBBM_STATUS3, - ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, - ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, - ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, - ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3, - ADRENO_REG_RBBM_PERFCTR_PWR_1_LO, ADRENO_REG_RBBM_INT_0_MASK, - ADRENO_REG_RBBM_PM_OVERRIDE2, ADRENO_REG_RBBM_SW_RESET_CMD, - ADRENO_REG_RBBM_CLOCK_CTL, - ADRENO_REG_PA_SC_AA_CONFIG, - ADRENO_REG_SQ_GPR_MANAGEMENT, - ADRENO_REG_SQ_INST_STORE_MANAGEMENT, - ADRENO_REG_TP0_CHICKEN, - ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO, - ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI, ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK, - ADRENO_REG_GMU_AHB_FENCE_STATUS, ADRENO_REG_GMU_GMU2HOST_INTR_MASK, - ADRENO_REG_GPMU_POWER_COUNTER_ENABLE, ADRENO_REG_REGISTER_MAX, }; diff --git a/adreno_a5xx.c b/adreno_a5xx.c index 7ab1d85bf1..0ef4313360 100644 --- a/adreno_a5xx.c +++ b/adreno_a5xx.c @@ -1939,10 +1939,6 @@ static int a5xx_microcode_read(struct adreno_device *adreno_dev) static unsigned int a5xx_register_offsets[ADRENO_REG_REGISTER_MAX] = { ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A5XX_CP_RB_BASE), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, A5XX_CP_RB_BASE_HI), - ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_LO, - A5XX_CP_RB_RPTR_ADDR_LO), - ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_HI, - A5XX_CP_RB_RPTR_ADDR_HI), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A5XX_CP_RB_RPTR), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A5XX_CP_RB_WPTR), ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A5XX_CP_ME_CNTL), @@ -1953,21 +1949,10 @@ static unsigned int a5xx_register_offsets[ADRENO_REG_REGISTER_MAX] = { ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A5XX_CP_IB2_BASE), ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, A5XX_CP_IB2_BASE_HI), ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A5XX_CP_IB2_BUFSZ), - ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A5XX_CP_PROTECT_REG_0), - ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A5XX_CP_CONTEXT_SWITCH_CNTL), - ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DEBUG, ADRENO_REG_SKIP), - ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DISABLE, ADRENO_REG_SKIP), - ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO, - A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO), - ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI, - A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI), ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A5XX_RBBM_STATUS), ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS3, A5XX_RBBM_STATUS3), ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A5XX_RBBM_INT_0_MASK), - ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A5XX_RBBM_CLOCK_CNTL), ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A5XX_RBBM_SW_RESET_CMD), - ADRENO_REG_DEFINE(ADRENO_REG_GPMU_POWER_COUNTER_ENABLE, - A5XX_GPMU_POWER_COUNTER_ENABLE), }; static void a5xx_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit) diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 420c72c3fa..a1cd15bc20 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -1998,10 +1998,6 @@ static unsigned int a6xx_register_offsets[ADRENO_REG_REGISTER_MAX] = { ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A6XX_CP_RB_BASE), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, A6XX_CP_RB_BASE_HI), - ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_LO, - A6XX_CP_RB_RPTR_ADDR_LO), - ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_HI, - A6XX_CP_RB_RPTR_ADDR_HI), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A6XX_CP_RB_RPTR), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A6XX_CP_RB_WPTR), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A6XX_CP_RB_CNTL), @@ -2012,38 +2008,12 @@ static unsigned int a6xx_register_offsets[ADRENO_REG_REGISTER_MAX] = { ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A6XX_CP_IB2_BASE), ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, A6XX_CP_IB2_BASE_HI), ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A6XX_CP_IB2_REM_SIZE), - ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A6XX_CP_CONTEXT_SWITCH_CNTL), - ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO, - A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO), - ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI, - A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI), - ADRENO_REG_DEFINE( - ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO, - A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO), - ADRENO_REG_DEFINE( - ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI, - A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI), - ADRENO_REG_DEFINE( - ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO, - A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO), - ADRENO_REG_DEFINE( - ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI, - A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI), - ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO, - A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO), - ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI, - A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI), - ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_LEVEL_STATUS, - A6XX_CP_CONTEXT_SWITCH_LEVEL_STATUS), ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A6XX_RBBM_STATUS), ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS3, A6XX_RBBM_STATUS3), ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A6XX_RBBM_INT_0_MASK), - ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A6XX_RBBM_CLOCK_CNTL), ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A6XX_RBBM_SW_RESET_CMD), ADRENO_REG_DEFINE(ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK, A6XX_GMU_AO_HOST_INTERRUPT_MASK), - ADRENO_REG_DEFINE(ADRENO_REG_GMU_AHB_FENCE_STATUS, - A6XX_GMU_AHB_FENCE_STATUS), ADRENO_REG_DEFINE(ADRENO_REG_GMU_GMU2HOST_INTR_MASK, A6XX_GMU_GMU2HOST_INTR_MASK), }; From 992af719da9fd4d490d61772d00b2e111cb401f2 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 29 Aug 2024 19:22:11 +0530 Subject: [PATCH 0898/1016] kgsl: Make AB vote quantization function generation agnostic gen*_bus_ab_quantize() logic is independent of gpu generation. Thus, move it to generation agnostic file to reduce code duplication. Change-Id: Ica61c9111d958b1458ebfb2a947027bb11aaf340 Signed-off-by: Kamal Agrawal --- adreno.c | 47 +++++++++++++++++++++++++++++++++++++++ adreno.h | 9 ++++++++ adreno_gen7_gmu.c | 51 ++----------------------------------------- adreno_gen7_gmu.h | 9 -------- adreno_gen7_hwsched.c | 4 ++-- adreno_gen8_gmu.c | 51 ++----------------------------------------- adreno_gen8_gmu.h | 9 -------- adreno_gen8_hwsched.c | 4 ++-- 8 files changed, 64 insertions(+), 120 deletions(-) diff --git a/adreno.c b/adreno.c index 0ba9b20d54..960caa3660 100644 --- a/adreno.c +++ b/adreno.c @@ -3457,6 +3457,53 @@ static int adreno_gpu_bus_set(struct kgsl_device *device, int level, u32 ab) return adreno_interconnect_bus_set(adreno_dev, level, ab); } +u32 adreno_gmu_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab) +{ + u16 vote = 0; + u32 max_bw, max_ab; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + if (!adreno_dev->gmu_ab || (ab == INVALID_AB_VALUE)) + return (FIELD_PREP(GENMASK(31, 16), INVALID_AB_VALUE)); + + /* + * max ddr bandwidth (kbps) = (Max bw in kbps per channel * number of channel) + * max ab (Mbps) = max ddr bandwidth (kbps) / 1000 + */ + max_bw = pwr->ddr_table[pwr->ddr_table_count - 1] * adreno_dev->gpucore->num_ddr_channels; + max_ab = max_bw / 1000; + + /* + * If requested AB is higher than theoretical max bandwidth, set AB vote as max + * allowable quantized AB value. + * + * Power FW supports a 16 bit AB BW level. We can quantize the entire vote-able BW + * range to a 16 bit space and the quantized value can be used to vote for AB though + * GMU. Quantization can be performed as below. + * + * quantized_vote = (ab vote (kbps) * 2^16) / max ddr bandwidth (kbps) + */ + if (ab >= max_ab) + vote = MAX_AB_VALUE; + else + vote = (u16)(((u64)ab * 1000 * (1 << 16)) / max_bw); + + /* + * Vote will be calculated as 0 for smaller AB values. + * Set a minimum non-zero vote in such cases. + */ + if (ab && !vote) + vote = 0x1; + + /* + * Set ab enable mask and valid AB vote. req.bw is 32 bit value 0xABABENIB + * and with this return we want to set the upper 16 bits and EN field specifies + * if the AB vote is valid or not. + */ + return (FIELD_PREP(GENMASK(31, 16), vote) | FIELD_PREP(GENMASK(15, 8), 1)); +} + static void adreno_deassert_gbif_halt(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); diff --git a/adreno.h b/adreno.h index 697cd038cc..cd296aed9e 100644 --- a/adreno.h +++ b/adreno.h @@ -1954,6 +1954,15 @@ void adreno_set_active_ctxs_null(struct adreno_device *adreno_dev); */ void adreno_get_bus_counters(struct adreno_device *adreno_dev); +/** + * adreno_gmu_bus_ab_quantize - Calculate the AB vote that needs to be sent to GMU + * @adreno_dev: Handle to the adreno device + * @ab: ab request that needs to be scaled in MBps + * + * Returns the AB value that needs to be prefixed to bandwidth vote in kbps + */ +u32 adreno_gmu_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab); + /** * adreno_suspend_context - Make sure device is idle * @device: Pointer to the kgsl device diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index b47ca1028b..7f2739e3d2 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1572,7 +1572,7 @@ static int gen7_gmu_notify_slumber(struct adreno_device *adreno_dev) }; int ret; - req.bw |= gen7_bus_ab_quantize(adreno_dev, 0); + req.bw |= adreno_gmu_bus_ab_quantize(adreno_dev, 0); /* Disable the power counter so that the GMU is not busy */ gmu_core_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0); @@ -1641,7 +1641,7 @@ static int gen7_gmu_dcvs_set(struct adreno_device *adreno_dev, if (bus_level < pwr->ddr_table_count && bus_level > 0) req.bw = bus_level; - req.bw |= gen7_bus_ab_quantize(adreno_dev, ab); + req.bw |= adreno_gmu_bus_ab_quantize(adreno_dev, ab); /* GMU will vote for slumber levels through the sleep sequence */ if ((req.freq == INVALID_DCVS_IDX) && (req.bw == INVALID_BW_VOTE)) @@ -2304,53 +2304,6 @@ static int gen7_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, return ret; } -u32 gen7_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab) -{ - u16 vote = 0; - u32 max_bw, max_ab; - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct kgsl_pwrctrl *pwr = &device->pwrctrl; - - if (!adreno_dev->gmu_ab || (ab == INVALID_AB_VALUE)) - return (FIELD_PREP(GENMASK(31, 16), INVALID_AB_VALUE)); - - /* - * max ddr bandwidth (kbps) = (Max bw in kbps per channel * number of channel) - * max ab (Mbps) = max ddr bandwidth (kbps) / 1000 - */ - max_bw = pwr->ddr_table[pwr->ddr_table_count - 1] * adreno_dev->gpucore->num_ddr_channels; - max_ab = max_bw / 1000; - - /* - * If requested AB is higher than theoretical max bandwidth, set AB vote as max - * allowable quantized AB value. - * - * Power FW supports a 16 bit AB BW level. We can quantize the entire vote-able BW - * range to a 16 bit space and the quantized value can be used to vote for AB though - * GMU. Quantization can be performed as below. - * - * quantized_vote = (ab vote (kbps) * 2^16) / max ddr bandwidth (kbps) - */ - if (ab >= max_ab) - vote = MAX_AB_VALUE; - else - vote = (u16)(((u64)ab * 1000 * (1 << 16)) / max_bw); - - /* - * Vote will be calculated as 0 for smaller AB values. - * Set a minimum non-zero vote in such cases. - */ - if (ab && !vote) - vote = 0x1; - - /* - * Set ab enable mask and valid AB vote. req.bw is 32 bit value 0xABABENIB - * and with this return we want to set the upper 16 bits and EN field specifies - * if the AB vote is valid or not. - */ - return (FIELD_PREP(GENMASK(31, 16), vote) | FIELD_PREP(GENMASK(15, 8), 1)); -} - static void gen7_free_gmu_globals(struct gen7_gmu_device *gmu) { int i; diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index b2930a9f70..4d5523bb85 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -484,15 +484,6 @@ void gen7_gmu_send_nmi(struct kgsl_device *device, bool force, */ int gen7_gmu_add_to_minidump(struct adreno_device *adreno_dev); -/** - * gen7_bus_ab_quantize - Calculate the AB vote that needs to be sent to GMU - * @adreno_dev: Handle to the adreno device - * @ab: ab request that needs to be scaled in MBps - * - * Returns the AB value that needs to be prefixed to bandwidth vote in kbps - */ -u32 gen7_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab); - /** * gen7_gmu_clock_set_rate - Set the gmu clock rate * @adreno_dev: Handle to the adreno device diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 93d0a3b670..3b51e99345 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -405,7 +405,7 @@ static int gen7_hwsched_notify_slumber(struct adreno_device *adreno_dev) req.freq = gmu->dcvs_table.gpu_level_num - pwr->default_pwrlevel - 1; req.bw = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq; - req.bw |= gen7_bus_ab_quantize(adreno_dev, 0); + req.bw |= adreno_gmu_bus_ab_quantize(adreno_dev, 0); /* Disable the power counter so that the GMU is not busy */ gmu_core_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0); @@ -1103,7 +1103,7 @@ static int gen7_hwsched_dcvs_set(struct adreno_device *adreno_dev, if (bus_level < pwr->ddr_table_count && bus_level > 0) req.bw = bus_level; - req.bw |= gen7_bus_ab_quantize(adreno_dev, ab); + req.bw |= adreno_gmu_bus_ab_quantize(adreno_dev, ab); /* GMU will vote for slumber levels through the sleep sequence */ if ((req.freq == INVALID_DCVS_IDX) && (req.bw == INVALID_BW_VOTE)) diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 1d4159cf25..42ea7a163b 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -1439,7 +1439,7 @@ static int gen8_gmu_notify_slumber(struct adreno_device *adreno_dev) }; int ret; - req.bw |= gen8_bus_ab_quantize(adreno_dev, 0); + req.bw |= adreno_gmu_bus_ab_quantize(adreno_dev, 0); /* Disable the power counter so that the GMU is not busy */ gmu_core_regwrite(device, GEN8_GMUCX_POWER_COUNTER_ENABLE, 0); @@ -1508,7 +1508,7 @@ static int gen8_gmu_dcvs_set(struct adreno_device *adreno_dev, if (bus_level < pwr->ddr_table_count && bus_level > 0) req.bw = bus_level; - req.bw |= gen8_bus_ab_quantize(adreno_dev, ab); + req.bw |= adreno_gmu_bus_ab_quantize(adreno_dev, ab); /* GMU will vote for slumber levels through the sleep sequence */ if ((req.freq == INVALID_DCVS_IDX) && (req.bw == INVALID_BW_VOTE)) @@ -2121,53 +2121,6 @@ static int gen8_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, return ret; } -u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab) -{ - u16 vote = 0; - u32 max_bw, max_ab; - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct kgsl_pwrctrl *pwr = &device->pwrctrl; - - if (!adreno_dev->gmu_ab || (ab == INVALID_AB_VALUE)) - return (FIELD_PREP(GENMASK(31, 16), INVALID_AB_VALUE)); - - /* - * max ddr bandwidth (kbps) = (Max bw in kbps per channel * number of channel) - * max ab (Mbps) = max ddr bandwidth (kbps) / 1000 - */ - max_bw = pwr->ddr_table[pwr->ddr_table_count - 1] * adreno_dev->gpucore->num_ddr_channels; - max_ab = max_bw / 1000; - - /* - * If requested AB is higher than theoretical max bandwidth, set AB vote as max - * allowable quantized AB value. - * - * Power FW supports a 16 bit AB BW level. We can quantize the entire vote-able BW - * range to a 16 bit space and the quantized value can be used to vote for AB though - * GMU. Quantization can be performed as below. - * - * quantized_vote = (ab vote (kbps) * 2^16) / max ddr bandwidth (kbps) - */ - if (ab >= max_ab) - vote = MAX_AB_VALUE; - else - vote = (u16)(((u64)ab * 1000 * (1 << 16)) / max_bw); - - /* - * Vote will be calculated as 0 for smaller AB values. - * Set a minimum non-zero vote in such cases. - */ - if (ab && !vote) - vote = 0x1; - - /* - * Set ab enable mask and valid AB vote. req.bw is 32 bit value 0xABABENIB - * and with this return we want to set the upper 16 bits and EN field specifies - * if the AB vote is valid or not. - */ - return (FIELD_PREP(GENMASK(31, 16), vote) | FIELD_PREP(GENMASK(15, 8), 1)); -} - static void gen8_free_gmu_globals(struct gen8_gmu_device *gmu) { int i; diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index d4831f3c4d..469bca544e 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -470,15 +470,6 @@ void gen8_gmu_send_nmi(struct kgsl_device *device, bool force, */ int gen8_gmu_add_to_minidump(struct adreno_device *adreno_dev); -/** - * gen8_bus_ab_quantize - Calculate the AB vote that needs to be sent to GMU - * @adreno_dev: Handle to the adreno device - * @ab: ab request that needs to be scaled in MBps - * - * Returns the AB value that needs to be prefixed to bandwidth vote in kbps - */ -u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab); - /** * gen8_gmu_clock_set_rate - Set the gmu clock rate * @adreno_dev: Handle to the adreno device diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 1c78e36bf1..e0cecd2102 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -490,7 +490,7 @@ static int gen8_hwsched_notify_slumber(struct adreno_device *adreno_dev) req.freq = gmu->dcvs_table.gpu_level_num - pwr->default_pwrlevel - 1; req.bw = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq; - req.bw |= gen8_bus_ab_quantize(adreno_dev, 0); + req.bw |= adreno_gmu_bus_ab_quantize(adreno_dev, 0); /* Disable the power counter so that the GMU is not busy */ gmu_core_regwrite(device, GEN8_GMUCX_POWER_COUNTER_ENABLE, 0); @@ -1170,7 +1170,7 @@ static int gen8_hwsched_dcvs_set(struct adreno_device *adreno_dev, if (bus_level < pwr->ddr_table_count && bus_level > 0) req.bw = bus_level; - req.bw |= gen8_bus_ab_quantize(adreno_dev, ab); + req.bw |= adreno_gmu_bus_ab_quantize(adreno_dev, ab); /* GMU will vote for slumber levels through the sleep sequence */ if ((req.freq == INVALID_DCVS_IDX) && (req.bw == INVALID_BW_VOTE)) From b3c8a965a1f9980940b823e2980cb6522eab558c Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 29 Aug 2024 22:28:48 +0530 Subject: [PATCH 0899/1016] kgsl: Make set property generation agnostic The logic for setting up the KGSL property is independent of gpu generation. Thus, move it to generation agnostic file to reduce code duplication and improve maintainability. Change-Id: Ic7cd3e86f7a0f83f92432e8440d7e69c0fc39129 Signed-off-by: Kamal Agrawal --- adreno.c | 47 ++++++++++++++++++++++++++++++++++++++++++++--- adreno.h | 2 -- adreno_a5xx.c | 32 -------------------------------- adreno_a6xx.c | 49 ------------------------------------------------- adreno_gen7.c | 39 --------------------------------------- adreno_gen8.c | 38 -------------------------------------- 6 files changed, 44 insertions(+), 163 deletions(-) diff --git a/adreno.c b/adreno.c index 0ba9b20d54..ad89d0c195 100644 --- a/adreno.c +++ b/adreno.c @@ -2595,6 +2595,49 @@ int adreno_set_constraint(struct kgsl_device *device, return status; } +static int adreno_default_setproperty(struct kgsl_device_private *dev_priv, + u32 type, void __user *value, u32 sizebytes) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 enable; + + if (type != KGSL_PROP_PWRCTRL) + return -ENODEV; + + if (sizebytes != sizeof(enable)) + return -EINVAL; + + if (copy_from_user(&enable, value, sizeof(enable))) + return -EFAULT; + + mutex_lock(&device->mutex); + + if (enable) { + if (gmu_core_isenabled(device)) + clear_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); + else + device->pwrctrl.ctrl_flags = 0; + + kgsl_pwrscale_enable(device); + } else { + if (gmu_core_isenabled(device)) { + set_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); + + if (!adreno_active_count_get(adreno_dev)) + adreno_active_count_put(adreno_dev); + } else { + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + device->pwrctrl.ctrl_flags = KGSL_PWR_ON; + } + kgsl_pwrscale_disable(device, true); + } + + mutex_unlock(&device->mutex); + + return 0; +} + static int adreno_setproperty(struct kgsl_device_private *dev_priv, unsigned int type, void __user *value, @@ -2602,8 +2645,6 @@ static int adreno_setproperty(struct kgsl_device_private *dev_priv, { int status = -EINVAL; struct kgsl_device *device = dev_priv->device; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); switch (type) { case KGSL_PROP_PWR_CONSTRAINT: @@ -2633,7 +2674,7 @@ static int adreno_setproperty(struct kgsl_device_private *dev_priv, } break; default: - status = gpudev->setproperty(dev_priv, type, value, sizebytes); + status = adreno_default_setproperty(dev_priv, type, value, sizebytes); break; } diff --git a/adreno.h b/adreno.h index 697cd038cc..5a6ddab166 100644 --- a/adreno.h +++ b/adreno.h @@ -994,8 +994,6 @@ struct adreno_gpudev { */ void (*power_stats)(struct adreno_device *adreno_dev, struct kgsl_power_stats *stats); - int (*setproperty)(struct kgsl_device_private *priv, u32 type, - void __user *value, u32 sizebytes); int (*add_to_va_minidump)(struct adreno_device *adreno_dev); /** * @gx_is_on - Return true if both gfx clock and gxgdsc are enabled. diff --git a/adreno_a5xx.c b/adreno_a5xx.c index 7ab1d85bf1..2b068b60fd 100644 --- a/adreno_a5xx.c +++ b/adreno_a5xx.c @@ -2449,37 +2449,6 @@ static void a5xx_power_stats(struct adreno_device *adreno_dev, &busy->bif_starved_ram); } -static int a5xx_setproperty(struct kgsl_device_private *dev_priv, - u32 type, void __user *value, u32 sizebytes) -{ - struct kgsl_device *device = dev_priv->device; - u32 enable; - - if (type != KGSL_PROP_PWRCTRL) - return -ENODEV; - - if (sizebytes != sizeof(enable)) - return -EINVAL; - - if (copy_from_user(&enable, value, sizeof(enable))) - return -EFAULT; - - mutex_lock(&device->mutex); - - if (enable) { - device->pwrctrl.ctrl_flags = 0; - kgsl_pwrscale_enable(device); - } else { - kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); - device->pwrctrl.ctrl_flags = KGSL_PWR_ON; - kgsl_pwrscale_disable(device, true); - } - - mutex_unlock(&device->mutex); - - return 0; -} - const struct adreno_gpudev adreno_a5xx_gpudev = { .reg_offsets = a5xx_register_offsets, .probe = a5xx_probe, @@ -2500,5 +2469,4 @@ const struct adreno_gpudev adreno_a5xx_gpudev = { .ringbuffer_submitcmd = a5xx_ringbuffer_submitcmd, .is_hw_collapsible = a5xx_is_hw_collapsible, .power_stats = a5xx_power_stats, - .setproperty = a5xx_setproperty, }; diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 420c72c3fa..3b98d01f52 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -2205,49 +2205,6 @@ static void a6xx_power_stats(struct adreno_device *adreno_dev, a6xx_read_bus_stats(device, stats, busy); } -static int a6xx_setproperty(struct kgsl_device_private *dev_priv, - u32 type, void __user *value, u32 sizebytes) -{ - struct kgsl_device *device = dev_priv->device; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - u32 enable; - - if (type != KGSL_PROP_PWRCTRL) - return -ENODEV; - - if (sizebytes != sizeof(enable)) - return -EINVAL; - - if (copy_from_user(&enable, value, sizeof(enable))) - return -EFAULT; - - mutex_lock(&device->mutex); - - if (enable) { - if (gmu_core_isenabled(device)) - clear_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); - else - device->pwrctrl.ctrl_flags = 0; - - kgsl_pwrscale_enable(device); - } else { - if (gmu_core_isenabled(device)) { - set_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); - - if (!adreno_active_count_get(adreno_dev)) - adreno_active_count_put(adreno_dev); - } else { - kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); - device->pwrctrl.ctrl_flags = KGSL_PWR_ON; - } - kgsl_pwrscale_disable(device, true); - } - - mutex_unlock(&device->mutex); - - return 0; -} - static int a6xx_dev_add_to_minidump(struct adreno_device *adreno_dev) { return kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_ADRENO_DEVICE, @@ -2369,7 +2326,6 @@ const struct adreno_gpudev adreno_a6xx_gpudev = { .ringbuffer_submitcmd = a6xx_ringbuffer_submitcmd, .is_hw_collapsible = adreno_isidle, .power_stats = a6xx_power_stats, - .setproperty = a6xx_setproperty, .add_to_va_minidump = a6xx_dev_add_to_minidump, .gx_is_on = a6xx_gx_is_on, }; @@ -2386,7 +2342,6 @@ const struct a6xx_gpudev adreno_a6xx_hwsched_gpudev = { .reset = a6xx_hwsched_reset_replay, .power_ops = &a6xx_hwsched_power_ops, .power_stats = a6xx_power_stats, - .setproperty = a6xx_setproperty, .hw_isidle = a6xx_hw_isidle, .add_to_va_minidump = a6xx_hwsched_add_to_minidump, .gx_is_on = a6xx_gmu_gx_is_on, @@ -2417,7 +2372,6 @@ const struct a6xx_gpudev adreno_a6xx_gmu_gpudev = { .remove = a6xx_remove, .ringbuffer_submitcmd = a6xx_ringbuffer_submitcmd, .power_stats = a6xx_power_stats, - .setproperty = a6xx_setproperty, .add_to_va_minidump = a6xx_gmu_add_to_minidump, .gx_is_on = a6xx_gmu_gx_is_on, .set_isdb_breakpoint_registers = a6xx_set_isdb_breakpoint_registers, @@ -2444,7 +2398,6 @@ const struct adreno_gpudev adreno_a6xx_rgmu_gpudev = { .remove = a6xx_remove, .ringbuffer_submitcmd = a6xx_ringbuffer_submitcmd, .power_stats = a6xx_power_stats, - .setproperty = a6xx_setproperty, .add_to_va_minidump = a6xx_rgmu_add_to_minidump, .gx_is_on = a6xx_rgmu_gx_is_on, }; @@ -2473,7 +2426,6 @@ const struct adreno_gpudev adreno_a619_holi_gpudev = { .ringbuffer_submitcmd = a6xx_ringbuffer_submitcmd, .is_hw_collapsible = adreno_isidle, .power_stats = a6xx_power_stats, - .setproperty = a6xx_setproperty, .add_to_va_minidump = a6xx_dev_add_to_minidump, .gx_is_on = a619_holi_gx_is_on, }; @@ -2497,7 +2449,6 @@ const struct a6xx_gpudev adreno_a630_gpudev = { .remove = a6xx_remove, .ringbuffer_submitcmd = a6xx_ringbuffer_submitcmd, .power_stats = a6xx_power_stats, - .setproperty = a6xx_setproperty, .add_to_va_minidump = a6xx_gmu_add_to_minidump, .gx_is_on = a6xx_gmu_gx_is_on, }, diff --git a/adreno_gen7.c b/adreno_gen7.c index 5292e9a836..7a8d3aae74 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -2188,42 +2188,6 @@ static void gen7_power_stats(struct adreno_device *adreno_dev, } } -static int gen7_setproperty(struct kgsl_device_private *dev_priv, - u32 type, void __user *value, u32 sizebytes) -{ - struct kgsl_device *device = dev_priv->device; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - u32 enable; - - if (type != KGSL_PROP_PWRCTRL) - return -ENODEV; - - if (sizebytes != sizeof(enable)) - return -EINVAL; - - if (copy_from_user(&enable, value, sizeof(enable))) - return -EFAULT; - - mutex_lock(&device->mutex); - - if (enable) { - clear_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); - - kgsl_pwrscale_enable(device); - } else { - set_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); - - if (!adreno_active_count_get(adreno_dev)) - adreno_active_count_put(adreno_dev); - - kgsl_pwrscale_disable(device, true); - } - - mutex_unlock(&device->mutex); - - return 0; -} - static void gen7_set_isdb_breakpoint_registers(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -2340,7 +2304,6 @@ const struct gen7_gpudev adreno_gen7_9_0_hwsched_gpudev = { .reset = gen7_hwsched_reset_replay, .power_ops = &gen7_hwsched_power_ops, .power_stats = gen7_power_stats, - .setproperty = gen7_setproperty, .hw_isidle = gen7_hw_isidle, .add_to_va_minidump = gen7_hwsched_add_to_minidump, .gx_is_on = gen7_gmu_gx_is_on, @@ -2369,7 +2332,6 @@ const struct gen7_gpudev adreno_gen7_hwsched_gpudev = { .reset = gen7_hwsched_reset_replay, .power_ops = &gen7_hwsched_power_ops, .power_stats = gen7_power_stats, - .setproperty = gen7_setproperty, .hw_isidle = gen7_hw_isidle, .add_to_va_minidump = gen7_hwsched_add_to_minidump, .gx_is_on = gen7_gmu_gx_is_on, @@ -2403,7 +2365,6 @@ const struct gen7_gpudev adreno_gen7_gmu_gpudev = { .remove = gen7_remove, .ringbuffer_submitcmd = gen7_ringbuffer_submitcmd, .power_stats = gen7_power_stats, - .setproperty = gen7_setproperty, .add_to_va_minidump = gen7_gmu_add_to_minidump, .gx_is_on = gen7_gmu_gx_is_on, .perfcounter_remove = gen7_perfcounter_remove, diff --git a/adreno_gen8.c b/adreno_gen8.c index 8ca41c7681..c8f9613cf3 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -2755,42 +2755,6 @@ static void gen8_power_stats(struct adreno_device *adreno_dev, } } -static int gen8_setproperty(struct kgsl_device_private *dev_priv, - u32 type, void __user *value, u32 sizebytes) -{ - struct kgsl_device *device = dev_priv->device; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - u32 enable; - - if (type != KGSL_PROP_PWRCTRL) - return -ENODEV; - - if (sizebytes != sizeof(enable)) - return -EINVAL; - - if (copy_from_user(&enable, value, sizeof(enable))) - return -EFAULT; - - mutex_lock(&device->mutex); - - if (enable) { - clear_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); - - kgsl_pwrscale_enable(device); - } else { - set_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); - - if (!adreno_active_count_get(adreno_dev)) - adreno_active_count_put(adreno_dev); - - kgsl_pwrscale_disable(device, true); - } - - mutex_unlock(&device->mutex); - - return 0; -} - static void gen8_set_isdb_breakpoint_registers(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -2983,7 +2947,6 @@ const struct gen8_gpudev adreno_gen8_hwsched_gpudev = { .reset = gen8_hwsched_reset_replay, .power_ops = &gen8_hwsched_power_ops, .power_stats = gen8_power_stats, - .setproperty = gen8_setproperty, .hw_isidle = gen8_hw_isidle, .add_to_va_minidump = gen8_hwsched_add_to_minidump, .gx_is_on = gen8_gmu_gx_is_on, @@ -3018,7 +2981,6 @@ const struct gen8_gpudev adreno_gen8_gmu_gpudev = { .remove = gen8_remove, .ringbuffer_submitcmd = gen8_ringbuffer_submitcmd, .power_stats = gen8_power_stats, - .setproperty = gen8_setproperty, .add_to_va_minidump = gen8_gmu_add_to_minidump, .gx_is_on = gen8_gmu_gx_is_on, .perfcounter_remove = gen8_perfcounter_remove, From 3acec0802e86c8729004f4b75cf5429339858b47 Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Mon, 1 Jul 2024 15:48:08 -0600 Subject: [PATCH 0900/1016] kgsl: gen8: Update register protect list for gen8 targets Update the register protection list to the latest recommendations. Change-Id: Ia9d4e4ced6b29559ea7ebd02316c23014bcf0fcc Signed-off-by: Carter Cooper --- adreno-gpulist.h | 35 ++++++++++++++++++----------------- adreno_gen8.c | 2 ++ 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index d62f89180a..6d0c9b7025 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2846,23 +2846,24 @@ static const struct gen8_protected_regs gen8_0_0_protected_regs[] = { { GEN8_CP_PROTECT_REG_GLOBAL + 26, 0x0981a, 0x09aff, 0 }, { GEN8_CP_PROTECT_REG_GLOBAL + 27, 0x09e00, 0x09fff, 1 }, { GEN8_CP_PROTECT_REG_GLOBAL + 28, 0x0a600, 0x0a7ff, 1 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 29, 0x0ae00, 0x0ae06, 1 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 30, 0x0ae08, 0x0ae0e, 1 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 31, 0x0ae10, 0x0b17f, 1 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 32, 0x0b600, 0x0d5ff, 1 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 33, 0x0dc00, 0x0fbff, 1 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 34, 0x0fc00, 0x11bff, 0 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 35, 0x18400, 0x1843f, 1 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 36, 0x18440, 0x1857f, 0 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 37, 0x18580, 0x1a57f, 1 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 38, 0x1b400, 0x1d3ff, 1 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 39, 0x1f400, 0x1f877, 1 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 40, 0x1f878, 0x1ffff, 0 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 41, 0x1f930, 0x1fc59, 1 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 42, 0x20000, 0x21fff, 1 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 43, 0x27800, 0x2787f, 1 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 44, 0x27880, 0x27c01, 0 }, - { GEN8_CP_PROTECT_REG_GLOBAL + 45, 0x27882, 0x27883, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 29, 0x0ae00, 0x0ae00, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 30, 0x0ae02, 0x0ae06, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 31, 0x0ae08, 0x0ae0e, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 32, 0x0ae10, 0x0b17f, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 33, 0x0b600, 0x0d5ff, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 34, 0x0dc00, 0x0fbff, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 35, 0x0fc00, 0x11bff, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 36, 0x18400, 0x1843f, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 37, 0x18440, 0x1857f, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 38, 0x18580, 0x1a57f, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 39, 0x1b400, 0x1d3ff, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 40, 0x1f400, 0x1f877, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 41, 0x1f878, 0x1ffff, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 42, 0x1f930, 0x1fc59, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 43, 0x20000, 0x21fff, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 44, 0x27800, 0x2787f, 1 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 45, 0x27880, 0x27c01, 0 }, + { GEN8_CP_PROTECT_REG_GLOBAL + 46, 0x27882, 0x27883, 1 }, { GEN8_CP_PROTECT_REG_GLOBAL + 63, 0x27c02, 0x27c02, 1 }, { 0 }, }; diff --git a/adreno_gen8.c b/adreno_gen8.c index 8ca41c7681..a9ba6617d8 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -251,6 +251,7 @@ static const u32 gen8_ifpc_pwrup_reglist[] = { GEN8_CP_PROTECT_REG_GLOBAL + 43, GEN8_CP_PROTECT_REG_GLOBAL + 44, GEN8_CP_PROTECT_REG_GLOBAL + 45, + GEN8_CP_PROTECT_REG_GLOBAL + 46, GEN8_CP_PROTECT_REG_GLOBAL + 63, GEN8_CP_INTERRUPT_STATUS_MASK_GLOBAL, }; @@ -332,6 +333,7 @@ static const u32 gen8_3_0_ifpc_pwrup_reglist[] = { GEN8_CP_PROTECT_REG_GLOBAL + 43, GEN8_CP_PROTECT_REG_GLOBAL + 44, GEN8_CP_PROTECT_REG_GLOBAL + 45, + GEN8_CP_PROTECT_REG_GLOBAL + 46, GEN8_CP_PROTECT_REG_GLOBAL + 63, }; From b671594f618a0b033ca5dc9c5ab885c495349558 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 29 Aug 2024 21:17:40 +0530 Subject: [PATCH 0901/1016] kgsl: Introduce dedicated mutex for device open and close operations KGSL device mutex is currently heavily contended, leading to potential performance and stability bottlenecks. In the existing design, device mutex is required during the first open and last close operations. However, for intermediate open and close operations, device mutex can be bypassed. Therefore, introduce a new mutex to specifically synchronize device open and close operations. This will reduce the contention on primary device mutex. Change-Id: Iac9cbb3f83df4b308061f21efdc41a1e95be9220 Signed-off-by: Kamal Agrawal --- adreno.c | 1 + kgsl.c | 16 +++++++++++----- kgsl_device.h | 2 ++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/adreno.c b/adreno.c index 0ba9b20d54..c426361afd 100644 --- a/adreno.c +++ b/adreno.c @@ -1188,6 +1188,7 @@ static void adreno_setup_device(struct adreno_device *adreno_dev) idr_init(&adreno_dev->dev.context_idr); mutex_init(&adreno_dev->dev.mutex); + mutex_init(&adreno_dev->dev.file_mutex); mutex_init(&adreno_dev->fault_recovery_mutex); INIT_LIST_HEAD(&adreno_dev->dev.globals); diff --git a/kgsl.c b/kgsl.c index da8ae3ec85..961499869f 100644 --- a/kgsl.c +++ b/kgsl.c @@ -1366,9 +1366,12 @@ static int kgsl_close_device(struct kgsl_device *device) { int result = 0; - mutex_lock(&device->mutex); - if (device->open_count == 1) + mutex_lock(&device->file_mutex); + if (device->open_count == 1) { + mutex_lock(&device->mutex); result = device->ftbl->last_close(device); + mutex_unlock(&device->mutex); + } /* * We must decrement the open_count after last_close() has finished. @@ -1381,7 +1384,7 @@ static int kgsl_close_device(struct kgsl_device *device) * last_close(). */ device->open_count--; - mutex_unlock(&device->mutex); + mutex_unlock(&device->file_mutex); return result; } @@ -1446,15 +1449,18 @@ static int kgsl_open_device(struct kgsl_device *device) { int result = 0; - mutex_lock(&device->mutex); + mutex_lock(&device->file_mutex); if (device->open_count == 0) { + mutex_lock(&device->mutex); result = device->ftbl->first_open(device); + mutex_unlock(&device->mutex); + if (result) goto out; } device->open_count++; out: - mutex_unlock(&device->mutex); + mutex_unlock(&device->file_mutex); return result; } diff --git a/kgsl_device.h b/kgsl_device.h index b7841e8466..33f86ca475 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -349,6 +349,8 @@ struct kgsl_device { * object */ u32 max_syncobj_hw_fence_count; + /** @file_mutex: Mutex to protect device open and close operations */ + struct mutex file_mutex; }; #define KGSL_MMU_DEVICE(_mmu) \ From 967f2ddd6233952d9ca467f35976d5f735490752 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Mon, 2 Sep 2024 20:04:39 +0530 Subject: [PATCH 0902/1016] kgsl: Schedule touch wake input work during slumber only Currently, touch wake input work is always scheduled for GMU based targets. The worker takes device mutex and then returns early without doing any operation when GPU is active. Therefore, schedule touch wake input work during slumber only to reduce device mutex contention. Same mechanism is followed for non-gmu targets. Change-Id: I020e65238a113319cae089fe0d2a19564f3eee17 Signed-off-by: Kamal Agrawal --- adreno.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno.c b/adreno.c index 0ba9b20d54..a4a86d4696 100644 --- a/adreno.c +++ b/adreno.c @@ -232,7 +232,7 @@ void adreno_touch_wake(struct kgsl_device *device) if (device->pwrctrl.wake_on_touch) return; - if (gmu_core_isenabled(device) || (device->state == KGSL_STATE_SLUMBER)) + if (device->state == KGSL_STATE_SLUMBER) schedule_work(&adreno_dev->input_work); } From d3cd5a8da856717d71e70db0cbdef2ecdd73aa49 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Mon, 2 Sep 2024 21:56:25 +0530 Subject: [PATCH 0903/1016] kgsl: Optimize mutex usage in gpuclk_store function GPU frequency table is static and can be accessed safely without device mutex. Refactor the gpuclk_store function to lock the device mutex only when necessary. This reduces unnecessary mutex contention by locking the mutex only when the power level is valid and needs to be changed. Change-Id: Ie0ec53b06315fd973e8f6a881a34bdab60b31a6c Signed-off-by: Kamal Agrawal --- kgsl_pwrctrl.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 1505c37c74..5d8db81485 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -539,12 +539,13 @@ static ssize_t gpuclk_store(struct device *dev, if (ret) return ret; - mutex_lock(&device->mutex); level = _get_nearest_pwrlevel(pwr, val); - if (level >= 0) + if (level >= 0) { + mutex_lock(&device->mutex); kgsl_pwrctrl_pwrlevel_change(device, (unsigned int) level); + mutex_unlock(&device->mutex); + } - mutex_unlock(&device->mutex); return count; } From d970d6cebb796cd19cfbfb8b959dec06ad917255 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 3 Sep 2024 12:43:23 +0530 Subject: [PATCH 0904/1016] kgsl: pwrctrl: Optimize mutex usage in min_pwrlevel_set min_render_pwrlevel is static and can be accessed safely without device mutex. Refactor the kgsl_pwrctrl_min_pwrlevel_set() function to lock the device mutex only when necessary. This adjustment helps in reducing device mutex contention. Change-Id: Ie1c1f43572a9a001e493c9ecf2edebeefa4f2f0c Signed-off-by: Kamal Agrawal --- kgsl_pwrctrl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 1505c37c74..0aabc6fb8f 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -419,11 +419,11 @@ static void kgsl_pwrctrl_min_pwrlevel_set(struct kgsl_device *device, { struct kgsl_pwrctrl *pwr = &device->pwrctrl; - mutex_lock(&device->mutex); - if (level > pwr->min_render_pwrlevel) level = pwr->min_render_pwrlevel; + mutex_lock(&device->mutex); + /* You can't set a minimum power level lower than the maximum */ if (level < pwr->max_pwrlevel) level = pwr->max_pwrlevel; From ef3f517e751feaea7b9f38f2c37e8ac9593d2a88 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 3 Sep 2024 16:43:31 +0530 Subject: [PATCH 0905/1016] kgsl: Remove unused structures and enums Clean up kgsl_compat.h and kgsl_gmu_core.h by removing unused structures and enums. Change-Id: Idcc41120e70f3d4a7e21918ca38b9787ba2b4aea Signed-off-by: Kamal Agrawal --- kgsl_compat.h | 10 +--------- kgsl_gmu_core.h | 9 --------- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/kgsl_compat.h b/kgsl_compat.h index a8081dd08f..65043deefd 100644 --- a/kgsl_compat.h +++ b/kgsl_compat.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2013-2017,2019,2021 The Linux Foundation. All rights reserved. + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_COMPAT_H #define __KGSL_COMPAT_H @@ -127,15 +128,6 @@ struct kgsl_gpumem_alloc_compat { #define IOCTL_KGSL_GPUMEM_ALLOC_COMPAT \ _IOWR(KGSL_IOC_TYPE, 0x2f, struct kgsl_gpumem_alloc_compat) -struct kgsl_cff_syncmem_compat { - compat_ulong_t gpuaddr; - compat_size_t len; - unsigned int __pad[2]; /* For future binary compatibility */ -}; - -#define IOCTL_KGSL_CFF_SYNCMEM_COMPAT \ - _IOW(KGSL_IOC_TYPE, 0x30, struct kgsl_cff_syncmem_compat) - struct kgsl_timestamp_event_compat { int type; /* Type of event (see list below) */ unsigned int timestamp; /* Timestamp to trigger event on */ diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 11cb4ffd0a..0ecebd78d6 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -68,15 +68,6 @@ enum oob_request { oob_max, }; -enum gmu_pwrctrl_mode { - GMU_FW_START, - GMU_FW_STOP, - GMU_SUSPEND, - GMU_DCVS_NOHFI, - GMU_NOTIFY_SLUMBER, - INVALID_POWER_CTRL -}; - #define GPU_HW_ACTIVE 0x00 #define GPU_HW_IFPC 0x03 #define GPU_HW_SLUMBER 0x0f From 9cf888d976041d36ed7a178f0e7cee7c1989ffa9 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 31 Aug 2024 02:03:13 +0530 Subject: [PATCH 0906/1016] kgsl: pwrscale: Remove on_time field in pwrscale and related code on_time field in kgsl_pwrscale structure doesn't hold any useful information. Therefore, remove it and clean up the related code. Change-Id: I3f5e17e7232ad06d5667a6b500c2f44a3f328e5e Signed-off-by: Kamal Agrawal --- adreno_a5xx_ringbuffer.c | 6 ++---- adreno_a6xx_ringbuffer.c | 1 - adreno_gen7_ringbuffer.c | 1 - adreno_gen8_ringbuffer.c | 1 - kgsl_pwrscale.c | 16 ---------------- kgsl_pwrscale.h | 5 +---- 6 files changed, 3 insertions(+), 27 deletions(-) diff --git a/adreno_a5xx_ringbuffer.c b/adreno_a5xx_ringbuffer.c index feafeedcce..9c939d1d66 100644 --- a/adreno_a5xx_ringbuffer.c +++ b/adreno_a5xx_ringbuffer.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -84,10 +84,8 @@ int a5xx_ringbuffer_submit(struct adreno_ringbuffer *rb, spin_lock_irqsave(&rb->preempt_lock, flags); if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) { - if (adreno_dev->cur_rb == rb) { - kgsl_pwrscale_busy(device); + if (adreno_dev->cur_rb == rb) kgsl_regwrite(device, A5XX_CP_RB_WPTR, rb->_wptr); - } } rb->wptr = rb->_wptr; diff --git a/adreno_a6xx_ringbuffer.c b/adreno_a6xx_ringbuffer.c index 0bb78f1912..bbc6816355 100644 --- a/adreno_a6xx_ringbuffer.c +++ b/adreno_a6xx_ringbuffer.c @@ -148,7 +148,6 @@ int a6xx_ringbuffer_submit(struct adreno_ringbuffer *rb, spin_lock_irqsave(&rb->preempt_lock, flags); if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) { if (adreno_dev->cur_rb == rb) { - kgsl_pwrscale_busy(device); ret = a6xx_fenced_write(adreno_dev, A6XX_CP_RB_WPTR, rb->_wptr, FENCE_STATUS_WRITEDROPPED0_MASK); diff --git a/adreno_gen7_ringbuffer.c b/adreno_gen7_ringbuffer.c index 63daee9543..6a9b3ad8de 100644 --- a/adreno_gen7_ringbuffer.c +++ b/adreno_gen7_ringbuffer.c @@ -185,7 +185,6 @@ int gen7_ringbuffer_submit(struct adreno_ringbuffer *rb, spin_lock_irqsave(&rb->preempt_lock, flags); if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) { if (adreno_dev->cur_rb == rb) { - kgsl_pwrscale_busy(device); ret = gen7_fenced_write(adreno_dev, GEN7_CP_RB_WPTR, rb->_wptr, FENCE_STATUS_WRITEDROPPED0_MASK); diff --git a/adreno_gen8_ringbuffer.c b/adreno_gen8_ringbuffer.c index 748ef007dc..6d9477e5e8 100644 --- a/adreno_gen8_ringbuffer.c +++ b/adreno_gen8_ringbuffer.c @@ -187,7 +187,6 @@ int gen8_ringbuffer_submit(struct adreno_ringbuffer *rb, spin_lock_irqsave(&rb->preempt_lock, flags); if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) { if (adreno_dev->cur_rb == rb) { - kgsl_pwrscale_busy(device); ret = gen8_fenced_write(adreno_dev, GEN8_CP_RB_WPTR_GC, rb->_wptr, FENCE_STATUS_WRITEDROPPED0_MASK); diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index 69e7f16c3e..57da5d0619 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -53,7 +53,6 @@ void kgsl_pwrscale_sleep(struct kgsl_device *device) { if (!device->pwrscale.enabled) return; - device->pwrscale.on_time = 0; /* to call devfreq_suspend_device() from a kernel thread */ queue_work(device->pwrscale.devfreq_wq, @@ -89,21 +88,6 @@ void kgsl_pwrscale_wake(struct kgsl_device *device) queue_work(psc->devfreq_wq, &psc->devfreq_resume_ws); } -/* - * kgsl_pwrscale_busy - update pwrscale state for new work - * @device: The device - * - * Called when new work is submitted to the device. - * This function must be called with the device mutex locked. - */ -void kgsl_pwrscale_busy(struct kgsl_device *device) -{ - if (!device->pwrscale.enabled) - return; - if (device->pwrscale.on_time == 0) - device->pwrscale.on_time = ktime_to_us(ktime_get()); -} - /** * kgsl_pwrscale_update_stats() - update device busy statistics * @device: The device diff --git a/kgsl_pwrscale.h b/kgsl_pwrscale.h index 271511d6cc..c318313de6 100644 --- a/kgsl_pwrscale.h +++ b/kgsl_pwrscale.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_PWRSCALE_H @@ -33,7 +33,6 @@ struct kgsl_power_stats { * @accum_stats - Accumulated statistics for various frequency calculations * @enabled - Whether or not power scaling is enabled * @time - Last submitted sample timestamp - * @on_time - Timestamp when gpu busy begins * @devfreq_wq - Main devfreq workqueue * @devfreq_suspend_ws - Pass device suspension to devfreq * @devfreq_resume_ws - Pass device resume to devfreq @@ -54,7 +53,6 @@ struct kgsl_pwrscale { struct kgsl_power_stats accum_stats; bool enabled; ktime_t time; - s64 on_time; struct workqueue_struct *devfreq_wq; struct work_struct devfreq_suspend_ws; struct work_struct devfreq_resume_ws; @@ -85,7 +83,6 @@ void kgsl_pwrscale_close(struct kgsl_device *device); void kgsl_pwrscale_update(struct kgsl_device *device); void kgsl_pwrscale_update_stats(struct kgsl_device *device); -void kgsl_pwrscale_busy(struct kgsl_device *device); void kgsl_pwrscale_sleep(struct kgsl_device *device); void kgsl_pwrscale_wake(struct kgsl_device *device); From 3666b76be944625460afc3a202589a4532b4c943 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 3 Sep 2024 14:05:46 +0530 Subject: [PATCH 0907/1016] kgsl: Optimize L3 vote setup check The L3 vote setup is performed only once. Once it is set up, it is safe to access num_l3_pwrlevels without acquiring the device mutex. Therefore, add an early check without taking the mutex to reduce mutex contention. Change-Id: I75be6420b83dd815f218321660414a7b7a49702d Signed-off-by: Kamal Agrawal --- adreno.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/adreno.c b/adreno.c index 0ba9b20d54..3d69bf20d0 100644 --- a/adreno.c +++ b/adreno.c @@ -847,13 +847,21 @@ static int register_l3_voter(struct kgsl_device *device) { int ret = 0; + /* + * The L3 vote setup is performed only once. Once set up is done, it is + * safe to access num_l3_pwrlevels without acquiring the device mutex. + * Therefore, an early check can be added without taking the mutex. + */ + if (READ_ONCE(device->num_l3_pwrlevels)) + return ret; + mutex_lock(&device->mutex); if (!device->l3_vote) goto done; - /* This indicates that we are already set up */ - if (device->num_l3_pwrlevels != 0) + /* Verify again if the L3 vote is set up to handle races */ + if (device->num_l3_pwrlevels) goto done; memset(device->l3_freq, 0x0, sizeof(device->l3_freq)); @@ -876,7 +884,7 @@ static int register_l3_voter(struct kgsl_device *device) goto done; } - device->num_l3_pwrlevels = 3; + WRITE_ONCE(device->num_l3_pwrlevels, 3); done: mutex_unlock(&device->mutex); From 76988c771e6d16676101c9c1a962290bc6b503c1 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 29 Aug 2024 23:18:00 +0530 Subject: [PATCH 0908/1016] kgsl: Make active count put generation agnostic The *_active_count_put() logic is independent of gpu generation. All implementations of active_count_put operations are identical. Thus, remove the redundant code and use adreno_active_count_put() universally to reduce code duplication and improve maintainability. Change-Id: I9d9fb91ebac34dfc26e9274f0db96b651bbf775c Signed-off-by: Kamal Agrawal --- adreno.c | 10 +--------- adreno.h | 7 +------ adreno_a6xx_gmu.c | 27 +-------------------------- adreno_a6xx_hwsched.c | 26 +------------------------- adreno_a6xx_hwsched.h | 11 +---------- adreno_a6xx_hwsched_hfi.c | 2 +- adreno_a6xx_rgmu.c | 26 +------------------------- adreno_gen7_gmu.c | 26 +------------------------- adreno_gen7_hwsched.c | 26 +------------------------- adreno_gen7_hwsched.h | 11 +---------- adreno_gen7_hwsched_hfi.c | 6 +++--- adreno_gen8_gmu.c | 26 +------------------------- adreno_gen8_hwsched.c | 26 +------------------------- adreno_gen8_hwsched.h | 9 --------- adreno_gen8_hwsched_hfi.c | 6 +++--- 15 files changed, 18 insertions(+), 227 deletions(-) diff --git a/adreno.c b/adreno.c index 0ba9b20d54..ba148a40f6 100644 --- a/adreno.c +++ b/adreno.c @@ -1909,7 +1909,7 @@ static int adreno_pwrctrl_active_count_get(struct adreno_device *adreno_dev) return ret; } -static void adreno_pwrctrl_active_count_put(struct adreno_device *adreno_dev) +void adreno_active_count_put(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -1940,13 +1940,6 @@ int adreno_active_count_get(struct adreno_device *adreno_dev) return ops->active_count_get(adreno_dev); } -void adreno_active_count_put(struct adreno_device *adreno_dev) -{ - const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); - - ops->active_count_put(adreno_dev); -} - void adreno_get_bus_counters(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -3627,7 +3620,6 @@ const struct adreno_power_ops adreno_power_operations = { .first_open = adreno_open, .last_close = adreno_close, .active_count_get = adreno_pwrctrl_active_count_get, - .active_count_put = adreno_pwrctrl_active_count_put, .pm_suspend = adreno_suspend, .pm_resume = adreno_resume, .touch_wakeup = adreno_touch_wakeup, diff --git a/adreno.h b/adreno.h index 697cd038cc..e6631e0408 100644 --- a/adreno.h +++ b/adreno.h @@ -447,11 +447,6 @@ struct adreno_power_ops { * collapsing */ int (*active_count_get)(struct adreno_device *adreno_dev); - /** - * @active_count_put: Target specific function to allow gpu to power - * collapse - */ - void (*active_count_put)(struct adreno_device *adreno_dev); /** @pm_suspend: Target specific function to suspend the driver */ int (*pm_suspend)(struct adreno_device *adreno_dev); /** @pm_resume: Target specific function to resume the driver */ @@ -1143,7 +1138,7 @@ u64 adreno_read_cx_timer(struct adreno_device *adreno_dev); int adreno_active_count_get(struct adreno_device *adreno_dev); /** - * adreno_active_count_put - Wrapper for target specific active count put + * adreno_active_count_put - Decrement the active count * @adreno_dev: pointer to the adreno device * * Decrease the active or the KGSL device and schedule the idle thread to diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 436821424c..c925870994 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -3037,29 +3037,6 @@ error: return ret; } -static void a6xx_gmu_active_count_put(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - if (WARN_ON(!mutex_is_locked(&device->mutex))) - return; - - if (WARN(atomic_read(&device->active_cnt) == 0, - "Unbalanced get/put calls to KGSL active count\n")) - return; - - if (atomic_dec_and_test(&device->active_cnt)) { - kgsl_pwrscale_update_stats(device); - kgsl_pwrscale_update(device); - kgsl_start_idle_timer(device); - } - - trace_kgsl_active_count(device, - (unsigned long) __builtin_return_address(0)); - - wake_up(&device->active_cnt_wq); -} - int a6xx_halt_gbif(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -3545,7 +3522,7 @@ static int a6xx_gmu_first_open(struct adreno_device *adreno_dev) * check by incrementing the active count and immediately releasing it. */ atomic_inc(&device->active_cnt); - a6xx_gmu_active_count_put(adreno_dev); + adreno_active_count_put(adreno_dev); return 0; } @@ -3693,7 +3670,6 @@ const struct adreno_power_ops a6xx_gmu_power_ops = { .first_open = a6xx_gmu_first_open, .last_close = a6xx_gmu_last_close, .active_count_get = a6xx_gmu_active_count_get, - .active_count_put = a6xx_gmu_active_count_put, .pm_suspend = a6xx_gmu_pm_suspend, .pm_resume = a6xx_gmu_pm_resume, .touch_wakeup = a6xx_gmu_touch_wakeup, @@ -3705,7 +3681,6 @@ const struct adreno_power_ops a630_gmu_power_ops = { .first_open = a6xx_gmu_first_open, .last_close = a6xx_gmu_last_close, .active_count_get = a6xx_gmu_active_count_get, - .active_count_put = a6xx_gmu_active_count_put, .pm_suspend = a6xx_gmu_pm_suspend, .pm_resume = a6xx_gmu_pm_resume, .touch_wakeup = a6xx_gmu_touch_wakeup, diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index d1f566847c..9d441d8962 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -265,29 +265,6 @@ gdsc_off: return ret; } -void a6xx_hwsched_active_count_put(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - if (WARN_ON(!mutex_is_locked(&device->mutex))) - return; - - if (WARN(atomic_read(&device->active_cnt) == 0, - "Unbalanced get/put calls to KGSL active count\n")) - return; - - if (atomic_dec_and_test(&device->active_cnt)) { - kgsl_pwrscale_update_stats(device); - kgsl_pwrscale_update(device); - kgsl_start_idle_timer(device); - } - - trace_kgsl_active_count(device, - (unsigned long) __builtin_return_address(0)); - - wake_up(&device->active_cnt_wq); -} - static int a6xx_hwsched_notify_slumber(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -749,7 +726,7 @@ static int a6xx_hwsched_first_open(struct adreno_device *adreno_dev) * check by incrementing the active count and immediately releasing it. */ atomic_inc(&device->active_cnt); - a6xx_hwsched_active_count_put(adreno_dev); + adreno_active_count_put(adreno_dev); return 0; } @@ -1037,7 +1014,6 @@ const struct adreno_power_ops a6xx_hwsched_power_ops = { .first_open = a6xx_hwsched_first_open, .last_close = a6xx_hwsched_power_off, .active_count_get = a6xx_hwsched_active_count_get, - .active_count_put = a6xx_hwsched_active_count_put, .touch_wakeup = a6xx_hwsched_touch_wakeup, .pm_suspend = a6xx_hwsched_pm_suspend, .pm_resume = a6xx_hwsched_pm_resume, diff --git a/adreno_a6xx_hwsched.h b/adreno_a6xx_hwsched.h index 2b23c68d45..b1ba23dff4 100644 --- a/adreno_a6xx_hwsched.h +++ b/adreno_a6xx_hwsched.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ADRENO_A6XX_HWSCHED_H_ @@ -69,15 +69,6 @@ void a6xx_hwsched_handle_watchdog(struct adreno_device *adreno_dev); */ int a6xx_hwsched_active_count_get(struct adreno_device *adreno_dev); -/** - * a6xx_hwsched_active_count_put - Put back the active count - * @adreno_dev: Pointer to the adreno device - * - * This function decrements the active count sets the idle - * timer if active count is zero. - */ -void a6xx_hwsched_active_count_put(struct adreno_device *adreno_dev); - /** * a6xx_hwsched_add_to_minidump - Register hwsched_device with va minidump * @adreno_dev: Pointer to the adreno device diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 0a23cc2936..dfcea9438b 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -2021,7 +2021,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, ret = check_ack_failure(adreno_dev, &pending_ack); done: - a6xx_hwsched_active_count_put(adreno_dev); + adreno_active_count_put(adreno_dev); del_waiter(hfi, &pending_ack); return ret; diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index f5f8a94471..2033e2cee5 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -50,29 +50,6 @@ static struct a6xx_rgmu_device *to_a6xx_rgmu(struct adreno_device *adreno_dev) return &a6xx_dev->rgmu; } -static void a6xx_rgmu_active_count_put(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - if (WARN_ON(!mutex_is_locked(&device->mutex))) - return; - - if (WARN(atomic_read(&device->active_cnt) == 0, - "Unbalanced get/put calls to KGSL active count\n")) - return; - - if (atomic_dec_and_test(&device->active_cnt)) { - kgsl_pwrscale_update_stats(device); - kgsl_pwrscale_update(device); - kgsl_start_idle_timer(device); - } - - trace_kgsl_active_count(device, - (unsigned long) __builtin_return_address(0)); - - wake_up(&device->active_cnt_wq); -} - static irqreturn_t a6xx_rgmu_irq_handler(int irq, void *data) { struct kgsl_device *device = data; @@ -1024,7 +1001,7 @@ static int a6xx_rgmu_first_open(struct adreno_device *adreno_dev) * check by incrementing the active count and immediately releasing it. */ atomic_inc(&device->active_cnt); - a6xx_rgmu_active_count_put(adreno_dev); + adreno_active_count_put(adreno_dev); return 0; } @@ -1273,7 +1250,6 @@ const struct adreno_power_ops a6xx_rgmu_power_ops = { .first_open = a6xx_rgmu_first_open, .last_close = a6xx_power_off, .active_count_get = a6xx_rgmu_active_count_get, - .active_count_put = a6xx_rgmu_active_count_put, .pm_suspend = a6xx_rgmu_pm_suspend, .pm_resume = a6xx_rgmu_pm_resume, .touch_wakeup = a6xx_rgmu_touch_wakeup, diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index b47ca1028b..4fa841e777 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -2729,29 +2729,6 @@ error: return ret; } -static void gen7_gmu_active_count_put(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - if (WARN_ON(!mutex_is_locked(&device->mutex))) - return; - - if (WARN(atomic_read(&device->active_cnt) == 0, - "Unbalanced get/put calls to KGSL active count\n")) - return; - - if (atomic_dec_and_test(&device->active_cnt)) { - kgsl_pwrscale_update_stats(device); - kgsl_pwrscale_update(device); - kgsl_start_idle_timer(device); - } - - trace_kgsl_active_count(device, - (unsigned long) __builtin_return_address(0)); - - wake_up(&device->active_cnt_wq); -} - int gen7_halt_gbif(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -3206,7 +3183,7 @@ static int gen7_gmu_first_open(struct adreno_device *adreno_dev) * check by incrementing the active count and immediately releasing it. */ atomic_inc(&device->active_cnt); - gen7_gmu_active_count_put(adreno_dev); + adreno_active_count_put(adreno_dev); return 0; } @@ -3353,7 +3330,6 @@ const struct adreno_power_ops gen7_gmu_power_ops = { .first_open = gen7_gmu_first_open, .last_close = gen7_gmu_last_close, .active_count_get = gen7_gmu_active_count_get, - .active_count_put = gen7_gmu_active_count_put, .pm_suspend = gen7_gmu_pm_suspend, .pm_resume = gen7_gmu_pm_resume, .touch_wakeup = gen7_gmu_touch_wakeup, diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 93d0a3b670..16d2d5fcda 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -367,29 +367,6 @@ gdsc_off: return ret; } -void gen7_hwsched_active_count_put(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - if (WARN_ON(!mutex_is_locked(&device->mutex))) - return; - - if (WARN(atomic_read(&device->active_cnt) == 0, - "Unbalanced get/put calls to KGSL active count\n")) - return; - - if (atomic_dec_and_test(&device->active_cnt)) { - kgsl_pwrscale_update_stats(device); - kgsl_pwrscale_update(device); - kgsl_start_idle_timer(device); - } - - trace_kgsl_active_count(device, - (unsigned long) __builtin_return_address(0)); - - wake_up(&device->active_cnt_wq); -} - static int gen7_hwsched_notify_slumber(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -1043,7 +1020,7 @@ static int gen7_hwsched_first_open(struct adreno_device *adreno_dev) * check by incrementing the active count and immediately releasing it. */ atomic_inc(&device->active_cnt); - gen7_hwsched_active_count_put(adreno_dev); + adreno_active_count_put(adreno_dev); return 0; } @@ -1560,7 +1537,6 @@ const struct adreno_power_ops gen7_hwsched_power_ops = { .first_open = gen7_hwsched_first_open, .last_close = gen7_hwsched_power_off, .active_count_get = gen7_hwsched_active_count_get, - .active_count_put = gen7_hwsched_active_count_put, .touch_wakeup = gen7_hwsched_touch_wakeup, .pm_suspend = gen7_hwsched_pm_suspend, .pm_resume = gen7_hwsched_pm_resume, diff --git a/adreno_gen7_hwsched.h b/adreno_gen7_hwsched.h index a3de4b0564..dd10a0a462 100644 --- a/adreno_gen7_hwsched.h +++ b/adreno_gen7_hwsched.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ADRENO_GEN7_HWSCHED_H_ @@ -67,15 +67,6 @@ void gen7_hwsched_handle_watchdog(struct adreno_device *adreno_dev); */ int gen7_hwsched_active_count_get(struct adreno_device *adreno_dev); -/** - * gen7_hwsched_active_count_put - Put back the active count - * @adreno_dev: Pointer to the adreno device - * - * This function decrements the active count sets the idle - * timer if active count is zero. - */ -void gen7_hwsched_active_count_put(struct adreno_device *adreno_dev); - /** * gen7_hwsched_add_to_minidump - Register hwsched_device with va minidump * @adreno_dev: Pointer to the adreno device diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 015eeff51c..c646e78a4f 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1175,7 +1175,7 @@ static void gen7_defer_hw_fence_work(struct kthread_work *work) */ kgsl_context_put(&drawctxt->base); - gen7_hwsched_active_count_put(adreno_dev); + adreno_active_count_put(adreno_dev); _disable_hw_fence_throttle(adreno_dev, false); @@ -3855,7 +3855,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, ret = check_ack_failure(adreno_dev, &pending_ack); done: - gen7_hwsched_active_count_put(adreno_dev); + adreno_active_count_put(adreno_dev); del_waiter(hfi, &pending_ack); return ret; @@ -4076,7 +4076,7 @@ int gen7_hwsched_disable_hw_fence_throttle(struct adreno_device *adreno_dev) ret = process_hw_fence_deferred_ctxt(adreno_dev, drawctxt, ts); kgsl_context_put(&drawctxt->base); - gen7_hwsched_active_count_put(adreno_dev); + adreno_active_count_put(adreno_dev); done: _disable_hw_fence_throttle(adreno_dev, true); diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 1d4159cf25..a34fdd48cf 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -2538,29 +2538,6 @@ error: return ret; } -static void gen8_gmu_active_count_put(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - if (WARN_ON(!mutex_is_locked(&device->mutex))) - return; - - if (WARN(atomic_read(&device->active_cnt) == 0, - "Unbalanced get/put calls to KGSL active count\n")) - return; - - if (atomic_dec_and_test(&device->active_cnt)) { - kgsl_pwrscale_update_stats(device); - kgsl_pwrscale_update(device); - kgsl_start_idle_timer(device); - } - - trace_kgsl_active_count(device, - (unsigned long) __builtin_return_address(0)); - - wake_up(&device->active_cnt_wq); -} - int gen8_halt_gbif(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -2995,7 +2972,7 @@ static int gen8_gmu_first_open(struct adreno_device *adreno_dev) * check by incrementing the active count and immediately releasing it. */ atomic_inc(&device->active_cnt); - gen8_gmu_active_count_put(adreno_dev); + adreno_active_count_put(adreno_dev); return 0; } @@ -3142,7 +3119,6 @@ const struct adreno_power_ops gen8_gmu_power_ops = { .first_open = gen8_gmu_first_open, .last_close = gen8_gmu_last_close, .active_count_get = gen8_gmu_active_count_get, - .active_count_put = gen8_gmu_active_count_put, .pm_suspend = gen8_gmu_pm_suspend, .pm_resume = gen8_gmu_pm_resume, .touch_wakeup = gen8_gmu_touch_wakeup, diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 1c78e36bf1..d6f7642e6a 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -452,29 +452,6 @@ gdsc_off: return ret; } -void gen8_hwsched_active_count_put(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - if (WARN_ON(!mutex_is_locked(&device->mutex))) - return; - - if (WARN(atomic_read(&device->active_cnt) == 0, - "Unbalanced get/put calls to KGSL active count\n")) - return; - - if (atomic_dec_and_test(&device->active_cnt)) { - kgsl_pwrscale_update_stats(device); - kgsl_pwrscale_update(device); - kgsl_start_idle_timer(device); - } - - trace_kgsl_active_count(device, - (unsigned long) __builtin_return_address(0)); - - wake_up(&device->active_cnt_wq); -} - static int gen8_hwsched_notify_slumber(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -1110,7 +1087,7 @@ static int gen8_hwsched_first_open(struct adreno_device *adreno_dev) * check by incrementing the active count and immediately releasing it. */ atomic_inc(&device->active_cnt); - gen8_hwsched_active_count_put(adreno_dev); + adreno_active_count_put(adreno_dev); return 0; } @@ -1623,7 +1600,6 @@ const struct adreno_power_ops gen8_hwsched_power_ops = { .first_open = gen8_hwsched_first_open, .last_close = gen8_hwsched_power_off, .active_count_get = gen8_hwsched_active_count_get, - .active_count_put = gen8_hwsched_active_count_put, .touch_wakeup = gen8_hwsched_touch_wakeup, .pm_suspend = gen8_hwsched_pm_suspend, .pm_resume = gen8_hwsched_pm_resume, diff --git a/adreno_gen8_hwsched.h b/adreno_gen8_hwsched.h index 88befbdaa8..c4a8e84d89 100644 --- a/adreno_gen8_hwsched.h +++ b/adreno_gen8_hwsched.h @@ -67,15 +67,6 @@ void gen8_hwsched_handle_watchdog(struct adreno_device *adreno_dev); */ int gen8_hwsched_active_count_get(struct adreno_device *adreno_dev); -/** - * gen8_hwsched_active_count_put - Put back the active count - * @adreno_dev: Pointer to the adreno device - * - * This function decrements the active count sets the idle - * timer if active count is zero. - */ -void gen8_hwsched_active_count_put(struct adreno_device *adreno_dev); - /** * gen8_hwsched_add_to_minidump - Register hwsched_device with va minidump * @adreno_dev: Pointer to the adreno device diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index 3d01fd438b..30a9ccdda6 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -1060,7 +1060,7 @@ static void gen8_defer_hw_fence_work(struct kthread_work *work) */ kgsl_context_put(&drawctxt->base); - gen8_hwsched_active_count_put(adreno_dev); + adreno_active_count_put(adreno_dev); _disable_hw_fence_throttle(adreno_dev, false); @@ -3645,7 +3645,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, ret = check_ack_failure(adreno_dev, &pending_ack); done: - gen8_hwsched_active_count_put(adreno_dev); + adreno_active_count_put(adreno_dev); del_waiter(hfi, &pending_ack); return ret; @@ -3723,7 +3723,7 @@ int gen8_hwsched_disable_hw_fence_throttle(struct adreno_device *adreno_dev) ret = process_hw_fence_deferred_ctxt(adreno_dev, drawctxt, ts); kgsl_context_put(&drawctxt->base); - gen8_hwsched_active_count_put(adreno_dev); + adreno_active_count_put(adreno_dev); done: _disable_hw_fence_throttle(adreno_dev, true); From b3192aa608b407f1d6740da614a55257878f9897 Mon Sep 17 00:00:00 2001 From: Rakesh Naidu Bhaviripudi Date: Tue, 17 Oct 2023 20:07:43 +0530 Subject: [PATCH 0909/1016] kgsl: Remove unused header file Header file qcom_dma_heap.h is not used anymore. Hence, remove it. Change-Id: Ic1a52bfa6501d0ac6bd3bba42fff3bda45bf527d Signed-off-by: Puranam V G Tejaswi Signed-off-by: Rakesh Naidu Bhaviripudi --- kgsl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kgsl.c b/kgsl.c index 3dfcca0826..a600729a3c 100644 --- a/kgsl.c +++ b/kgsl.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include From 0d52cece82979d0e224959dfd004711a785f2ddf Mon Sep 17 00:00:00 2001 From: Puranam V G Tejaswi Date: Tue, 17 Oct 2023 18:58:54 +0530 Subject: [PATCH 0910/1016] kgsl: Fix compilation error when CONFIG_TRACING is not enabled When CONFIG_TRACING is not enabled, compilation fails because the included header file linux/trace.h uses macros EINVAL and NULL. The header files defining these macros are not included. Add the right headers to avoid compilation errors when CONFIG_TRACING is disabled. Change-Id: Ia2deb2d69e12b2055db20529a9e38df93f4182c5 Signed-off-by: Puranam V G Tejaswi --- adreno_trace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/adreno_trace.c b/adreno_trace.c index 4c27d2c451..aed7a83894 100644 --- a/adreno_trace.c +++ b/adreno_trace.c @@ -1,8 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021 The Linux Foundation. All rights reserved. + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. */ +#include +#include #include #include "adreno.h" From fba839d519128c6ad5ebedd6fe7547dda9fb1bde Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Wed, 4 Sep 2024 17:46:29 +0530 Subject: [PATCH 0911/1016] kgsl: hwfence: Resolve compilation errors in stub functions A few stub functions in the kgsl_sync.h file were causing compilation issues. The code has been updated with the necessary fixes. Change-Id: I28d6c0db87702b574de3133155d436d2d1b1f6c6 Signed-off-by: SIVA MULLATI --- kgsl_sync.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kgsl_sync.h b/kgsl_sync.h index dc46c27bfe..23c5955880 100644 --- a/kgsl_sync.h +++ b/kgsl_sync.h @@ -163,7 +163,7 @@ static inline void kgsl_get_fence_info(struct kgsl_drawobj_sync_event *event) } static inline struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, - bool (*func)(void *priv), void *priv); + bool (*func)(void *priv), void *priv) { return NULL; } @@ -214,7 +214,7 @@ static inline void kgsl_syncsource_process_release_syncsources( bool is_kgsl_fence(struct dma_fence *f) { - + return false; } void kgsl_sync_timeline_signal(struct kgsl_sync_timeline *ktimeline, From e7807d267026992d3e6c3bd6ae9e0957a62b9016 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 30 Aug 2024 15:32:19 -0700 Subject: [PATCH 0912/1016] kgsl: Clean up all shmem pages on incomplete allocation With the CONFIG_QCOM_KGSL_USE_SHMEM config option, kgsl can allocate memory through shmem. If the process receives a fatal signal while the allocation is in progress, it may result in unused shmem pages. Clean up the list of remaining shmem pages in case this happens. Also use the correct error code when propagating this failure. Change-Id: I30b950be1f974c03c9df74a6ff5f3d034e0fb0aa Signed-off-by: Lynus Vaz --- kgsl_sharedmem.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index e1a676be88..6012f9a561 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -1029,7 +1029,7 @@ static int _kgsl_shmem_alloc_page(struct kgsl_memdesc *memdesc, u32 order) gfp_t gfp_mask = kgsl_gfp_mask(order); if (fatal_signal_pending(current)) - return -ENOMEM; + return -EINTR; /* Allocate non compound page to split 4K page chunks */ gfp_mask &= ~__GFP_COMP; @@ -1126,7 +1126,7 @@ static int kgsl_alloc_page(struct kgsl_memdesc *memdesc, int *page_size, return -EINVAL; if (fatal_signal_pending(current)) - return -ENOMEM; + return -EINTR; page = shmem_read_mapping_page_gfp(memdesc->shmem_filp->f_mapping, page_off, kgsl_gfp_mask(0)); @@ -1170,6 +1170,16 @@ static void kgsl_free_page(struct page *p) put_page(p); } +static void kgsl_memdesc_pagelist_cleanup(struct kgsl_memdesc *memdesc) +{ + while (!list_empty(&memdesc->shmem_page_list)) { + struct page *page = list_first_entry(&memdesc->shmem_page_list, struct page, lru); + + list_del(&page->lru); + kgsl_free_page(page); + } +} + static void _kgsl_free_pages(struct kgsl_memdesc *memdesc) { int i; @@ -1198,7 +1208,7 @@ static int kgsl_alloc_page(struct kgsl_memdesc *memdesc, int *page_size, unsigned int *align, unsigned int page_off) { if (fatal_signal_pending(current)) - return -ENOMEM; + return -EINTR; return kgsl_pool_alloc_page(page_size, pages, pages_len, align, memdesc->kgsl_dev); @@ -1209,6 +1219,10 @@ static int kgsl_memdesc_file_setup(struct kgsl_memdesc *memdesc) return 0; } +static void kgsl_memdesc_pagelist_cleanup(struct kgsl_memdesc *memdesc) +{ +} + static void kgsl_free_page(struct page *p) { kgsl_pool_free_page(p); @@ -1340,7 +1354,8 @@ static int _kgsl_alloc_pages(struct kgsl_memdesc *memdesc, if (memdesc->shmem_filp) fput(memdesc->shmem_filp); - return -ENOMEM; + count = -ENOMEM; + goto done; } count += ret; @@ -1353,6 +1368,8 @@ static int _kgsl_alloc_pages(struct kgsl_memdesc *memdesc, *pages = local; +done: + kgsl_memdesc_pagelist_cleanup(memdesc); return count; } From e5a0a6323117f05a43ce47ea357e9e204ce8d67a Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Wed, 4 Sep 2024 14:30:14 -0600 Subject: [PATCH 0913/1016] kgsl: hwfence: Destroy hardware fences when soccp vote fails Currently, when soccp vote fails, as part of disabling hardware fence feature, we de-register our synx handle. However, the hardware fences (that got created before failure) may be destroyed later when all of its refcounts are put back. And when that eventually happens, we end up calling synx_release() with an invalid handle which leads to a NULL pointer dereference. To fix this, make sure we destroy all existing hardware fences before we disable hardware fence feature. Change-Id: I0c1e8ecbb0b5d5db5663e622f3d26ce781922b23 Signed-off-by: Harshdeep Dhatt --- adreno_hwsched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 32b23d0e42..c703b3df8d 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -2412,6 +2412,7 @@ void adreno_hwsched_log_destroy_pending_hw_fences(struct adreno_device *adreno_d if (count < ARRAY_SIZE(entries)) memcpy(&entries[count], entry, sizeof(*entry)); count++; + kgsl_hw_fence_destroy(entry->kfence); adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); } From 8f47aead4e9b11be92e2229b7f284ff034fe5fca Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Thu, 23 May 2024 09:46:24 -0600 Subject: [PATCH 0914/1016] kgsl: gen8: Enable CLX for Gen8_0_x GPU CLX feature is required for higher GPU frequencies. Enable the feature for Gen8_0_x GPU. Update current budget for MxG as per latest recommendation. Change-Id: Ie178380eb4bb6c1c398971df301fe38ef084f50d Signed-off-by: Carter Cooper Signed-off-by: Kamal Agrawal --- adreno-gpulist.h | 4 ++-- adreno_gen8_hfi.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 3c760e772d..718d1f468a 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2907,7 +2907,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_BCL | ADRENO_IFPC | ADRENO_HW_FENCE | ADRENO_PREEMPTION | - ADRENO_ACD, + ADRENO_ACD | ADRENO_CLX, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, @@ -2947,7 +2947,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_BCL | ADRENO_IFPC | ADRENO_HW_FENCE | ADRENO_PREEMPTION | - ADRENO_ACD, + ADRENO_ACD | ADRENO_CLX, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, diff --git a/adreno_gen8_hfi.c b/adreno_gen8_hfi.c index 80478f4ed4..f530a9a0fd 100644 --- a/adreno_gen8_hfi.c +++ b/adreno_gen8_hfi.c @@ -604,7 +604,7 @@ int gen8_hfi_send_clx_feature_ctrl(struct adreno_device *adreno_dev) cmd.domain[1].clxh = 0; cmd.domain[1].urgmode = 1; cmd.domain[1].lkgen = 0; - cmd.domain[1].currbudget = 100; + cmd.domain[1].currbudget = 50; return gen8_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); } From 00606ef75edd53105620bf0e0c9459a643028d70 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 30 Aug 2024 15:32:19 -0700 Subject: [PATCH 0915/1016] kgsl: Clean up all shmem pages on incomplete allocation With the CONFIG_QCOM_KGSL_USE_SHMEM config option, kgsl can allocate memory through shmem. If the process receives a fatal signal while the allocation is in progress, it may result in unused shmem pages. Clean up the list of remaining shmem pages in case this happens. Also use the correct error code when propagating this failure. Change-Id: I30b950be1f974c03c9df74a6ff5f3d034e0fb0aa Signed-off-by: Lynus Vaz --- kgsl_sharedmem.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index e1a676be88..6012f9a561 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -1029,7 +1029,7 @@ static int _kgsl_shmem_alloc_page(struct kgsl_memdesc *memdesc, u32 order) gfp_t gfp_mask = kgsl_gfp_mask(order); if (fatal_signal_pending(current)) - return -ENOMEM; + return -EINTR; /* Allocate non compound page to split 4K page chunks */ gfp_mask &= ~__GFP_COMP; @@ -1126,7 +1126,7 @@ static int kgsl_alloc_page(struct kgsl_memdesc *memdesc, int *page_size, return -EINVAL; if (fatal_signal_pending(current)) - return -ENOMEM; + return -EINTR; page = shmem_read_mapping_page_gfp(memdesc->shmem_filp->f_mapping, page_off, kgsl_gfp_mask(0)); @@ -1170,6 +1170,16 @@ static void kgsl_free_page(struct page *p) put_page(p); } +static void kgsl_memdesc_pagelist_cleanup(struct kgsl_memdesc *memdesc) +{ + while (!list_empty(&memdesc->shmem_page_list)) { + struct page *page = list_first_entry(&memdesc->shmem_page_list, struct page, lru); + + list_del(&page->lru); + kgsl_free_page(page); + } +} + static void _kgsl_free_pages(struct kgsl_memdesc *memdesc) { int i; @@ -1198,7 +1208,7 @@ static int kgsl_alloc_page(struct kgsl_memdesc *memdesc, int *page_size, unsigned int *align, unsigned int page_off) { if (fatal_signal_pending(current)) - return -ENOMEM; + return -EINTR; return kgsl_pool_alloc_page(page_size, pages, pages_len, align, memdesc->kgsl_dev); @@ -1209,6 +1219,10 @@ static int kgsl_memdesc_file_setup(struct kgsl_memdesc *memdesc) return 0; } +static void kgsl_memdesc_pagelist_cleanup(struct kgsl_memdesc *memdesc) +{ +} + static void kgsl_free_page(struct page *p) { kgsl_pool_free_page(p); @@ -1340,7 +1354,8 @@ static int _kgsl_alloc_pages(struct kgsl_memdesc *memdesc, if (memdesc->shmem_filp) fput(memdesc->shmem_filp); - return -ENOMEM; + count = -ENOMEM; + goto done; } count += ret; @@ -1353,6 +1368,8 @@ static int _kgsl_alloc_pages(struct kgsl_memdesc *memdesc, *pages = local; +done: + kgsl_memdesc_pagelist_cleanup(memdesc); return count; } From 555332d44626058e7eef350e8052259d32771338 Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Wed, 24 Jul 2024 12:10:36 +0530 Subject: [PATCH 0916/1016] kgsl: gmu: Make gmu's pdev generation agnostic Currently gmu's platform device struct is present across all generations of gmu based target. Make it generation agnostic by moving out to gmu core struct. Change-Id: I40ded4fe0b4cc697eff27744c9cbdf52127290ba Signed-off-by: SIVA MULLATI --- adreno_a6xx_gmu.c | 211 +++++++++++++++++----------------- adreno_a6xx_gmu.h | 1 - adreno_a6xx_gmu_snapshot.c | 4 +- adreno_a6xx_hfi.c | 45 ++++---- adreno_a6xx_hwsched.c | 7 +- adreno_a6xx_hwsched_hfi.c | 110 +++++++++--------- adreno_a6xx_rpmh.c | 4 +- adreno_gen7_gmu.c | 193 +++++++++++++++++-------------- adreno_gen7_gmu.h | 1 - adreno_gen7_gmu_snapshot.c | 5 +- adreno_gen7_hfi.c | 42 ++++--- adreno_gen7_hwsched.c | 11 +- adreno_gen7_hwsched_hfi.c | 228 +++++++++++++++++++------------------ adreno_gen8_gmu.c | 184 ++++++++++++++++-------------- adreno_gen8_gmu.h | 1 - adreno_gen8_gmu_snapshot.c | 3 +- adreno_gen8_hfi.c | 41 ++++--- adreno_gen8_hwsched.c | 24 ++-- adreno_gen8_hwsched_hfi.c | 213 +++++++++++++++++----------------- kgsl_gmu_core.h | 8 ++ 20 files changed, 698 insertions(+), 638 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 523cd77842..62efbaf62e 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -342,6 +342,8 @@ void a6xx_load_rsc_ucode(struct adreno_device *adreno_dev) int a6xx_load_pdc_ucode(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + struct platform_device *gmu_pdev = GMU_PDEV(KGSL_DEVICE(adreno_dev)); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct resource *res_pdc, *res_cfg, *res_seq; unsigned int cfg_offset, seq_offset; void __iomem *cfg = NULL, *seq = NULL; @@ -352,19 +354,19 @@ int a6xx_load_pdc_ucode(struct adreno_device *adreno_dev) u32 mx_res_addr = cmd_db_read_addr("mx.lvl"); if (!xo_resource_addr) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Failed to get 'xo.lvl' addr from cmd_db\n"); return -ENOENT; } if (!cx_res_addr) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Failed to get 'cx.lvl' addr from cmd_db\n"); return -ENOENT; } if (!mx_res_addr) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Failed to get 'mx.lvl' addr from cmd_db\n"); return -ENOENT; } @@ -393,10 +395,10 @@ int a6xx_load_pdc_ucode(struct adreno_device *adreno_dev) } /* Get pointers to each of the possible PDC resources */ - res_pdc = platform_get_resource_byname(gmu->pdev, IORESOURCE_MEM, - "kgsl_gmu_pdc_reg"); - res_cfg = platform_get_resource_byname(gmu->pdev, IORESOURCE_MEM, - "kgsl_gmu_pdc_cfg"); + res_pdc = platform_get_resource_byname(gmu_pdev, + IORESOURCE_MEM, "kgsl_gmu_pdc_reg"); + res_cfg = platform_get_resource_byname(gmu_pdev, + IORESOURCE_MEM, "kgsl_gmu_pdc_cfg"); /* * Map the starting address for pdc_cfg programming. If the pdc_cfg @@ -404,14 +406,14 @@ int a6xx_load_pdc_ucode(struct adreno_device *adreno_dev) */ if (gmu->pdc_cfg_base == NULL) { if (res_cfg) - gmu->pdc_cfg_base = devm_ioremap(&gmu->pdev->dev, + gmu->pdc_cfg_base = devm_ioremap(gmu_pdev_dev, res_cfg->start, resource_size(res_cfg)); else if (res_pdc) - gmu->pdc_cfg_base = devm_ioremap(&gmu->pdev->dev, + gmu->pdc_cfg_base = devm_ioremap(gmu_pdev_dev, res_pdc->start + cfg_offset, 0x10000); if (!gmu->pdc_cfg_base) { - dev_err(&gmu->pdev->dev, "Failed to map PDC CFG\n"); + dev_err(gmu_pdev_dev, "Failed to map PDC CFG\n"); return -ENODEV; } } @@ -427,18 +429,18 @@ int a6xx_load_pdc_ucode(struct adreno_device *adreno_dev) * resource is not available use an offset from the base PDC resource. */ if (gmu->pdc_seq_base == NULL) { - res_seq = platform_get_resource_byname(gmu->pdev, IORESOURCE_MEM, - "kgsl_gmu_pdc_seq"); + res_seq = platform_get_resource_byname(gmu_pdev, + IORESOURCE_MEM, "kgsl_gmu_pdc_seq"); if (res_seq) - gmu->pdc_seq_base = devm_ioremap(&gmu->pdev->dev, + gmu->pdc_seq_base = devm_ioremap(gmu_pdev_dev, res_seq->start, resource_size(res_seq)); else if (res_pdc) - gmu->pdc_seq_base = devm_ioremap(&gmu->pdev->dev, + gmu->pdc_seq_base = devm_ioremap(gmu_pdev_dev, res_pdc->start + seq_offset, 0x10000); if (!gmu->pdc_seq_base) { - dev_err(&gmu->pdev->dev, "Failed to map PDC SEQ\n"); + dev_err(gmu_pdev_dev, "Failed to map PDC SEQ\n"); return -ENODEV; } } @@ -635,7 +637,7 @@ int a6xx_gmu_device_start(struct adreno_device *adreno_dev) A6XX_GMU_CM3_FW_INIT_RESULT, val, GMU_START_TIMEOUT, mask)) { - dev_err(&gmu->pdev->dev, "GMU doesn't boot\n"); + dev_err(GMU_PDEV_DEV(device), "GMU doesn't boot\n"); gmu_core_fault_snapshot(device, GMU_FAULT_DEVICE_START); return -ETIMEDOUT; } @@ -649,7 +651,6 @@ int a6xx_gmu_device_start(struct adreno_device *adreno_dev) */ int a6xx_gmu_hfi_start(struct adreno_device *adreno_dev) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); gmu_core_regwrite(device, A6XX_GMU_HFI_CTRL_INIT, 1); @@ -659,7 +660,7 @@ int a6xx_gmu_hfi_start(struct adreno_device *adreno_dev) BIT(0), GMU_START_TIMEOUT, BIT(0))) { - dev_err(&gmu->pdev->dev, "GMU HFI init failed\n"); + dev_err(GMU_PDEV_DEV(device), "GMU HFI init failed\n"); gmu_core_fault_snapshot(device, GMU_FAULT_HFI_INIT); return -ETIMEDOUT; } @@ -671,7 +672,6 @@ int a6xx_rscc_wakeup_sequence(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); - struct device *dev = &gmu->pdev->dev; int val; /* Skip wakeup sequence if we didn't do the sleep sequence */ @@ -687,7 +687,7 @@ int a6xx_rscc_wakeup_sequence(struct adreno_device *adreno_dev) gmu_core_regread(device, A6XX_GPU_CC_GX_DOMAIN_MISC, &val); if (!(val & 0x1)) - dev_info_ratelimited(&gmu->pdev->dev, + dev_info_ratelimited(GMU_PDEV_DEV(device), "GMEM CLAMP IO not set while GFX rail off\n"); /* RSC wake sequence */ @@ -701,7 +701,7 @@ int a6xx_rscc_wakeup_sequence(struct adreno_device *adreno_dev) BIT(1), GPU_START_TIMEOUT, BIT(1))) { - dev_err(dev, "Failed to do GPU RSC power on\n"); + dev_err(GMU_PDEV_DEV(device), "Failed to do GPU RSC power on\n"); return -ETIMEDOUT; } @@ -719,7 +719,8 @@ int a6xx_rscc_wakeup_sequence(struct adreno_device *adreno_dev) return 0; error_rsc: - dev_err(dev, "GPU RSC sequence stuck in waking up GPU\n"); + dev_err(GMU_PDEV_DEV(device), + "GPU RSC sequence stuck in waking up GPU\n"); return -ETIMEDOUT; } @@ -753,7 +754,7 @@ int a6xx_rscc_sleep_sequence(struct adreno_device *adreno_dev) BIT(16)); if (ret) { - dev_err(&gmu->pdev->dev, "GPU RSC power off fail\n"); + dev_err(GMU_PDEV_DEV(device), "GPU RSC power off fail\n"); return -ETIMEDOUT; } @@ -829,6 +830,7 @@ static void load_tcm(struct adreno_device *adreno_dev, const u8 *src, int a6xx_gmu_load_fw(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const u8 *fw = (const u8 *)gmu->fw_image->data; if (adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev)) @@ -848,7 +850,7 @@ int a6xx_gmu_load_fw(struct adreno_device *adreno_dev) id = find_vma_block(gmu, blk->addr, blk->size); if (id < 0) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unknown block in GMU FW addr:0x%x size:0x%x\n", blk->addr, blk->size); return -EINVAL; @@ -867,7 +869,7 @@ int a6xx_gmu_load_fw(struct adreno_device *adreno_dev) find_gmu_memdesc(gmu, blk->addr, blk->size); if (!md) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "No backing memory for GMU FW block addr:0x%x size:0x%x\n", blk->addr, blk->size); return -EINVAL; @@ -931,8 +933,7 @@ int a6xx_gmu_oob_set(struct kgsl_device *device, * newer implementations */ if (req >= oob_boot_slumber) { - dev_err(&gmu->pdev->dev, - "Unsupported OOB request %s\n", + dev_err(GMU_PDEV_DEV(device), "Unsupported OOB request %s\n", oob_to_str(req)); return -EINVAL; } @@ -974,7 +975,7 @@ void a6xx_gmu_oob_clear(struct kgsl_device *device, } else { clear = BIT(31 - req * 2); if (req >= oob_boot_slumber) { - dev_err(&gmu->pdev->dev, "Unsupported OOB clear %s\n", + dev_err(GMU_PDEV_DEV(device), "Unsupported OOB clear %s\n", oob_to_str(req)); return; } @@ -1122,7 +1123,6 @@ static int a6xx_complete_rpmh_votes(struct adreno_device *adreno_dev, int a6xx_gmu_sptprac_enable(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); /* Only certain targets have sptprac */ if (!adreno_is_a630(adreno_dev) && !adreno_is_a615_family(adreno_dev)) @@ -1140,7 +1140,7 @@ int a6xx_gmu_sptprac_enable(struct adreno_device *adreno_dev) SPTPRAC_POWERON_STATUS_MASK, SPTPRAC_CTRL_TIMEOUT, SPTPRAC_POWERON_STATUS_MASK)) { - dev_err(&gmu->pdev->dev, "power on SPTPRAC fail\n"); + dev_err(GMU_PDEV_DEV(device), "power on SPTPRAC fail\n"); gmu_core_fault_snapshot(device, GMU_FAULT_PANIC_NONE); return -ETIMEDOUT; } @@ -1156,7 +1156,6 @@ int a6xx_gmu_sptprac_enable(struct adreno_device *adreno_dev) void a6xx_gmu_sptprac_disable(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); /* Only certain targets have sptprac */ if (!adreno_is_a630(adreno_dev) && !adreno_is_a615_family(adreno_dev)) @@ -1180,7 +1179,7 @@ void a6xx_gmu_sptprac_disable(struct adreno_device *adreno_dev) SPTPRAC_POWEROFF_STATUS_MASK, SPTPRAC_CTRL_TIMEOUT, SPTPRAC_POWEROFF_STATUS_MASK)) - dev_err(&gmu->pdev->dev, "power off SPTPRAC fail\n"); + dev_err(GMU_PDEV_DEV(device), "power off SPTPRAC fail\n"); } #define SPTPRAC_POWER_OFF BIT(2) @@ -1292,6 +1291,7 @@ static const char *idle_level_name(int level) int a6xx_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); unsigned int reg, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8; unsigned long t; @@ -1329,29 +1329,33 @@ int a6xx_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) gmu_core_regread(device, A6XX_GMU_GMU_PWR_COL_KEEPALIVE, ®4); gmu_core_regread(device, A6XX_GMU_AO_SPARE_CNTL, ®5); - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "----------------------[ GMU error ]----------------------\n"); - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Timeout waiting for lowest idle level %s\n", idle_level_name(gmu->idle_level)); - dev_err(&gmu->pdev->dev, "Start: %llx (absolute ticks)\n", ts1); - dev_err(&gmu->pdev->dev, "Poll: %llx (ticks relative to start)\n", + dev_err(gmu_pdev_dev, + "Start: %llx (absolute ticks)\n", ts1); + dev_err(gmu_pdev_dev, + "Poll: %llx (ticks relative to start)\n", ts2-ts1); - dev_err(&gmu->pdev->dev, "Retry: %llx (ticks relative to poll)\n", + dev_err(gmu_pdev_dev, + "Retry: %llx (ticks relative to poll)\n", ts3-ts2); - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "RPMH_POWER_STATE=%x SPTPRAC_PWR_CLK_STATUS=%x\n", reg, reg1); - dev_err(&gmu->pdev->dev, "CX_BUSY_STATUS=%x\n", reg2); - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "CX_BUSY_STATUS=%x\n", reg2); + dev_err(gmu_pdev_dev, "RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n", reg3, reg4); - dev_err(&gmu->pdev->dev, "A6XX_GMU_AO_SPARE_CNTL=%x\n", reg5); + dev_err(gmu_pdev_dev, "A6XX_GMU_AO_SPARE_CNTL=%x\n", reg5); if (adreno_is_a660(adreno_dev)) { u32 val; gmu_core_regread(device, A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE, &val); - dev_err(&gmu->pdev->dev, "PWR_COL_PREEMPT_KEEPALIVE=%x\n", val); + dev_err(gmu_pdev_dev, + "PWR_COL_PREEMPT_KEEPALIVE=%x\n", val); } /* Access GX registers only when GX is ON */ @@ -1360,8 +1364,8 @@ int a6xx_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) kgsl_regread(device, A6XX_CP_CP2GMU_STATUS, ®7); kgsl_regread(device, A6XX_CP_CONTEXT_SWITCH_CNTL, ®8); - dev_err(&gmu->pdev->dev, "A6XX_CP_STATUS_1=%x\n", reg6); - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "A6XX_CP_STATUS_1=%x\n", reg6); + dev_err(gmu_pdev_dev, "CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n", reg7, reg8); } @@ -1376,7 +1380,6 @@ int a6xx_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) int a6xx_gmu_wait_for_idle(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); unsigned int status2; uint64_t ts1; @@ -1385,7 +1388,7 @@ int a6xx_gmu_wait_for_idle(struct adreno_device *adreno_dev) 0, GMU_START_TIMEOUT, CXGXCPUBUSYIGNAHB)) { gmu_core_regread(device, A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2, &status2); - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "GMU not idling: status2=0x%x %llx %llx\n", status2, ts1, a6xx_read_alwayson(ADRENO_DEVICE(device))); @@ -1603,7 +1606,7 @@ struct kgsl_memdesc *reserve_gmu_kernel_block(struct a6xx_gmu_device *gmu, ret = gmu_core_map_memdesc(gmu->domain, md, addr, IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV); if (ret) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", addr, md->size, ret); kgsl_sharedmem_free(md); @@ -1635,7 +1638,7 @@ struct kgsl_memdesc *reserve_gmu_kernel_block_fixed(struct a6xx_gmu_device *gmu, md = &gmu->gmu_globals[gmu->global_entries]; - ret = kgsl_memdesc_init_fixed(device, gmu->pdev, resource, md); + ret = kgsl_memdesc_init_fixed(device, GMU_PDEV(device), resource, md); if (ret) return ERR_PTR(ret); @@ -1643,7 +1646,7 @@ struct kgsl_memdesc *reserve_gmu_kernel_block_fixed(struct a6xx_gmu_device *gmu, addr = ALIGN(vma->next_va, hfi_get_gmu_va_alignment(align)); if ((vma->next_va + aligned_size) > (vma->start + vma->size)) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "GMU mapping too big. available: %d required: %d\n", vma->next_va - vma->start, aligned_size); md = ERR_PTR(-ENOMEM); @@ -1652,7 +1655,7 @@ struct kgsl_memdesc *reserve_gmu_kernel_block_fixed(struct a6xx_gmu_device *gmu, ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); if (ret) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", addr, md->size, ret); md = ERR_PTR(-ENOMEM); @@ -1717,12 +1720,13 @@ static int a6xx_gmu_cache_finalize(struct adreno_device *adreno_dev) static int a6xx_gmu_process_prealloc(struct a6xx_gmu_device *gmu, struct gmu_block_header *blk) { + struct kgsl_device *device = KGSL_DEVICE(a6xx_gmu_to_adreno(gmu)); struct kgsl_memdesc *md; int id = find_vma_block(gmu, blk->addr, blk->value); if (id < 0) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Invalid prealloc block addr: 0x%x value:%d\n", blk->addr, blk->value); return id; @@ -1750,6 +1754,7 @@ int a6xx_gmu_parse_fw(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct gmu_block_header *blk; int ret, offset = 0; @@ -1759,10 +1764,10 @@ int a6xx_gmu_parse_fw(struct adreno_device *adreno_dev) if (a6xx_core->gmufw_name == NULL) return -EINVAL; - ret = request_firmware(&gmu->fw_image, a6xx_core->gmufw_name, - &gmu->pdev->dev); + ret = request_firmware(&gmu->fw_image, + a6xx_core->gmufw_name, gmu_pdev_dev); if (ret) { - dev_err(&gmu->pdev->dev, "request_firmware (%s) failed: %d\n", + dev_err(gmu_pdev_dev, "request_firmware (%s) failed: %d\n", a6xx_core->gmufw_name, ret); return ret; } @@ -1777,7 +1782,7 @@ int a6xx_gmu_parse_fw(struct adreno_device *adreno_dev) blk = (struct gmu_block_header *)&gmu->fw_image->data[offset]; if (offset + sizeof(*blk) > gmu->fw_image->size) { - dev_err(&gmu->pdev->dev, "Invalid FW Block\n"); + dev_err(gmu_pdev_dev, "Invalid FW Block\n"); return -EINVAL; } @@ -1844,7 +1849,6 @@ static int a6xx_gmu_init(struct adreno_device *adreno_dev) static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); /* If SPTP_RAC is on, turn off SPTP_RAC HS */ @@ -1933,7 +1937,7 @@ static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) gmu_core_regrmw(device, A6XX_GPU_CPR_FSM_CTL, 1, 1); if (a6xx_gmu_gx_is_on(adreno_dev)) - dev_err(&gmu->pdev->dev, "gx is stuck on\n"); + dev_err(GMU_PDEV_DEV(device), "gx is stuck on\n"); } /* @@ -1982,7 +1986,7 @@ static int a6xx_gmu_notify_slumber(struct adreno_device *adreno_dev) gmu_core_regread(device, A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE, &state); if (state != GPU_HW_SLUMBER) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Failed to prepare for slumber: 0x%x\n", state); ret = -ETIMEDOUT; @@ -2017,7 +2021,7 @@ void a6xx_gmu_suspend(struct adreno_device *adreno_dev) a6xx_rdpm_cx_freq_update(gmu, 0); - dev_err(&gmu->pdev->dev, "Suspended GMU\n"); + dev_err(GMU_PDEV_DEV(device), "Suspended GMU\n"); kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE); } @@ -2066,7 +2070,7 @@ static int a6xx_gmu_dcvs_set(struct adreno_device *adreno_dev, ret = a6xx_hfi_send_generic_req(adreno_dev, &req, sizeof(req)); if (ret) { - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "Failed to set GPU perf idx %u, bw idx %u\n", req.freq, req.bw); @@ -2136,7 +2140,7 @@ void a6xx_gmu_send_nmi(struct kgsl_device *device, bool force, * to save cm3 state to DDR. */ if (a6xx_gmu_gx_is_on(adreno_dev) && adreno_smmu_is_stalled(adreno_dev)) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Skipping NMI because SMMU is stalled\n"); goto done; } @@ -2185,13 +2189,11 @@ nmi: udelay(200); done: - KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, gmu->pdev, ticks, gf_policy); + KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, GMU_PDEV(device), ticks, gf_policy); } static void a6xx_gmu_cooperative_reset(struct kgsl_device *device) { - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); unsigned int result; gmu_core_regwrite(device, A6XX_GMU_CX_GMU_WDOG_CTRL, 0); @@ -2206,7 +2208,7 @@ static void a6xx_gmu_cooperative_reset(struct kgsl_device *device) return; gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &result); - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "GMU cooperative reset timed out 0x%x\n", result); /* * If we dont get a snapshot ready from GMU, trigger NMI @@ -2216,7 +2218,7 @@ static void a6xx_gmu_cooperative_reset(struct kgsl_device *device) gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &result); if ((result & 0x800) != 0x800) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "GMU cooperative reset NMI timed out 0x%x\n", result); } @@ -2224,7 +2226,6 @@ static int a6xx_gmu_wait_for_active_transition( struct kgsl_device *device) { unsigned int reg; - struct a6xx_gmu_device *gmu = to_a6xx_gmu(ADRENO_DEVICE(device)); if (!gmu_core_isenabled(device)) return 0; @@ -2232,7 +2233,7 @@ static int a6xx_gmu_wait_for_active_transition( if (gmu_core_timed_poll_check(device, A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE, GPU_HW_ACTIVE, 100, GENMASK(3, 0))) { gmu_core_regread(device, A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE, ®); - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "GMU failed to move to ACTIVE state, Current state: 0x%x\n", reg); @@ -2251,7 +2252,6 @@ static bool a6xx_gmu_scales_bandwidth(struct kgsl_device *device) void a6xx_gmu_handle_watchdog(struct adreno_device *adreno_dev) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 mask; @@ -2263,7 +2263,7 @@ void a6xx_gmu_handle_watchdog(struct adreno_device *adreno_dev) a6xx_gmu_send_nmi(device, false, GMU_FAULT_PANIC_NONE); - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "GMU watchdog expired interrupt received\n"); } @@ -2271,7 +2271,6 @@ static irqreturn_t a6xx_gmu_irq_handler(int irq, void *data) { struct kgsl_device *device = data; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); const struct a6xx_gpudev *a6xx_gpudev = to_a6xx_gpudev(ADRENO_GPU_DEVICE(adreno_dev)); unsigned int status = 0; @@ -2283,19 +2282,19 @@ static irqreturn_t a6xx_gmu_irq_handler(int irq, void *data) if (status & GMU_INT_WDOG_BITE) a6xx_gpudev->handle_watchdog(adreno_dev); if (status & GMU_INT_HOST_AHB_BUS_ERR) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "AHB bus error interrupt received\n"); if (status & GMU_INT_FENCE_ERR) { unsigned int fence_status; gmu_core_regread(device, A6XX_GMU_AHB_FENCE_STATUS, &fence_status); - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "FENCE error interrupt received %x\n", fence_status); } if (status & ~GMU_AO_INT_MASK) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "Unhandled GMU interrupts 0x%lx\n", status & ~GMU_AO_INT_MASK); @@ -2320,6 +2319,7 @@ void a6xx_gmu_snapshot(struct adreno_device *adreno_dev, void a6xx_gmu_aop_send_acd_state(struct a6xx_gmu_device *gmu, bool flag) { + struct kgsl_device *device = KGSL_DEVICE(a6xx_gmu_to_adreno(gmu)); struct qmp_pkt msg; char msg_buf[36]; u32 size; @@ -2338,7 +2338,7 @@ void a6xx_gmu_aop_send_acd_state(struct a6xx_gmu_device *gmu, bool flag) ret = mbox_send_message(gmu->mailbox.channel, &msg); if (ret < 0) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "AOP mbox send message failed: %d\n", ret); } @@ -2352,8 +2352,8 @@ int a6xx_gmu_clock_set_rate(struct adreno_device *adreno_dev, u32 req_freq) ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", req_freq); if (ret) { - dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n", - req_freq, ret); + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), + "GMU clock:%d set failed:%d\n", req_freq, ret); return ret; } @@ -2377,13 +2377,13 @@ int a6xx_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level) ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "hub_clk", adreno_dev->gmu_hub_clk_freq); if (ret && ret != -ENODEV) { - dev_err(&gmu->pdev->dev, "Unable to set the HUB clock\n"); + dev_err(GMU_PDEV_DEV(device), "Unable to set the HUB clock\n"); return ret; } ret = clk_bulk_prepare_enable(gmu->num_clks, gmu->clks); if (ret) { - dev_err(&gmu->pdev->dev, "Cannot enable GMU clocks\n"); + dev_err(GMU_PDEV_DEV(device), "Cannot enable GMU clocks\n"); return ret; } @@ -2673,6 +2673,7 @@ static int a6xx_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, static void a6xx_free_gmu_globals(struct a6xx_gmu_device *gmu) { + struct kgsl_device *device = KGSL_DEVICE(a6xx_gmu_to_adreno(gmu)); int i; for (i = 0; i < gmu->global_entries && i < ARRAY_SIZE(gmu->gmu_globals); i++) { @@ -2690,7 +2691,7 @@ static void a6xx_free_gmu_globals(struct a6xx_gmu_device *gmu) } if (gmu->domain) { - iommu_detach_device(gmu->domain, &gmu->pdev->dev); + iommu_detach_device(gmu->domain, GMU_PDEV_DEV(device)); iommu_domain_free(gmu->domain); gmu->domain = NULL; } @@ -2703,7 +2704,7 @@ static int a6xx_gmu_aop_mailbox_init(struct adreno_device *adreno_dev, { struct kgsl_mailbox *mailbox = &gmu->mailbox; - mailbox->client.dev = &gmu->pdev->dev; + mailbox->client.dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); mailbox->client.tx_block = true; mailbox->client.tx_tout = 1000; mailbox->client.knows_txdone = false; @@ -2755,20 +2756,19 @@ static void a6xx_gmu_acd_probe(struct kgsl_device *device, ret = a6xx_gmu_aop_mailbox_init(adreno_dev, gmu); if (ret) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "AOP mailbox init failed: %d\n", ret); } static int a6xx_gmu_reg_probe(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); int ret; - ret = kgsl_regmap_add_region(&device->regmap, gmu->pdev, + ret = kgsl_regmap_add_region(&device->regmap, GMU_PDEV(device), "kgsl_gmu_reg", NULL, NULL); if (ret) - dev_err(&gmu->pdev->dev, "Unable to map the GMU registers\n"); + dev_err(GMU_PDEV_DEV(device), "Unable to map the GMU registers\n"); return ret; } @@ -2776,12 +2776,13 @@ static int a6xx_gmu_reg_probe(struct adreno_device *adreno_dev) static int a6xx_gmu_clk_probe(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret, i; int tbl_size; int num_freqs; int offset; - ret = devm_clk_bulk_get_all(&gmu->pdev->dev, &gmu->clks); + ret = devm_clk_bulk_get_all(GMU_PDEV_DEV(device), &gmu->clks); if (ret < 0) return ret; @@ -2803,7 +2804,7 @@ static int a6xx_gmu_clk_probe(struct adreno_device *adreno_dev) gmu->num_clks = ret; /* Read the optional list of GMU frequencies */ - if (of_get_property(gmu->pdev->dev.of_node, + if (of_get_property(GMU_PDEV(device)->dev.of_node, "qcom,gmu-freq-table", &tbl_size) == NULL) goto default_gmu_freq; @@ -2813,11 +2814,11 @@ static int a6xx_gmu_clk_probe(struct adreno_device *adreno_dev) for (i = 0; i < num_freqs; i++) { offset = i * 2; - ret = of_property_read_u32_index(gmu->pdev->dev.of_node, + ret = of_property_read_u32_index(GMU_PDEV(device)->dev.of_node, "qcom,gmu-freq-table", offset, &gmu->freqs[i]); if (ret) goto default_gmu_freq; - ret = of_property_read_u32_index(gmu->pdev->dev.of_node, + ret = of_property_read_u32_index(GMU_PDEV(device)->dev.of_node, "qcom,gmu-freq-table", offset + 1, &gmu->vlvls[i]); if (ret) goto default_gmu_freq; @@ -2900,11 +2901,13 @@ static int a6xx_gmu_iommu_fault_handler(struct iommu_domain *domain, static int a6xx_gmu_iommu_init(struct a6xx_gmu_device *gmu) { + struct kgsl_device *device = KGSL_DEVICE(a6xx_gmu_to_adreno(gmu)); int ret; gmu->domain = iommu_domain_alloc(&platform_bus_type); if (gmu->domain == NULL) { - dev_err(&gmu->pdev->dev, "Unable to allocate GMU IOMMU domain\n"); + dev_err(GMU_PDEV_DEV(device), + "Unable to allocate GMU IOMMU domain\n"); return -ENODEV; } @@ -2915,14 +2918,14 @@ static int a6xx_gmu_iommu_init(struct a6xx_gmu_device *gmu) */ qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL); - ret = iommu_attach_device(gmu->domain, &gmu->pdev->dev); + ret = iommu_attach_device(gmu->domain, GMU_PDEV_DEV(device)); if (!ret) { iommu_set_fault_handler(gmu->domain, a6xx_gmu_iommu_fault_handler, gmu); return 0; } - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unable to attach GMU IOMMU domain: %d\n", ret); iommu_domain_free(gmu->domain); gmu->domain = NULL; @@ -2939,11 +2942,12 @@ int a6xx_gmu_probe(struct kgsl_device *device, struct resource *res; int ret; - gmu->pdev = pdev; + device->gmu_core.pdev = pdev; - dma_set_coherent_mask(&gmu->pdev->dev, DMA_BIT_MASK(64)); - gmu->pdev->dev.dma_mask = &gmu->pdev->dev.coherent_dma_mask; - set_dma_ops(&gmu->pdev->dev, NULL); + dma_set_coherent_mask(&device->gmu_core.pdev->dev, DMA_BIT_MASK(64)); + device->gmu_core.pdev->dev.dma_mask = + &device->gmu_core.pdev->dev.coherent_dma_mask; + set_dma_ops(GMU_PDEV_DEV(device), NULL); res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "rscc"); @@ -2951,7 +2955,7 @@ int a6xx_gmu_probe(struct kgsl_device *device, gmu->rscc_virt = devm_ioremap(&device->pdev->dev, res->start, resource_size(res)); if (gmu->rscc_virt == NULL) { - dev_err(&gmu->pdev->dev, "rscc ioremap failed\n"); + dev_err(GMU_PDEV_DEV(device), "rscc ioremap failed\n"); return -ENOMEM; } } @@ -3021,11 +3025,11 @@ int a6xx_gmu_probe(struct kgsl_device *device, (void) kobject_init_and_add(&gmu->log_kobj, &log_kobj_type, &dev->kobj, "log"); (void) kobject_init_and_add(&gmu->stats_kobj, &stats_kobj_type, &dev->kobj, "stats"); - of_property_read_u32(gmu->pdev->dev.of_node, "qcom,gmu-perf-ddr-bw", - &gmu->perf_ddr_bw); + of_property_read_u32(GMU_PDEV(device)->dev.of_node, + "qcom,gmu-perf-ddr-bw", &gmu->perf_ddr_bw); - gmu->irq = kgsl_request_irq(gmu->pdev, "kgsl_gmu_irq", - a6xx_gmu_irq_handler, device); + gmu->irq = kgsl_request_irq(GMU_PDEV(device), "kgsl_gmu_irq", + a6xx_gmu_irq_handler, device); if (gmu->irq >= 0) return 0; @@ -3773,9 +3777,10 @@ int a6xx_gmu_hfi_probe(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct a6xx_hfi *hfi = &gmu->hfi; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - hfi->irq = kgsl_request_irq(gmu->pdev, "kgsl_hfi_irq", - a6xx_hfi_irq_handler, KGSL_DEVICE(adreno_dev)); + hfi->irq = kgsl_request_irq(GMU_PDEV(device), "kgsl_hfi_irq", + a6xx_hfi_irq_handler, device); return hfi->irq < 0 ? hfi->irq : 0; } diff --git a/adreno_a6xx_gmu.h b/adreno_a6xx_gmu.h index 94f4586c49..8dde9f435d 100644 --- a/adreno_a6xx_gmu.h +++ b/adreno_a6xx_gmu.h @@ -32,7 +32,6 @@ struct a6xx_gmu_device { u32 pwr_dev; u32 hfi; } ver; - struct platform_device *pdev; int irq; const struct firmware *fw_image; struct kgsl_memdesc *dump_mem; diff --git a/adreno_a6xx_gmu_snapshot.c b/adreno_a6xx_gmu_snapshot.c index 839c60d956..93e7be5afc 100644 --- a/adreno_a6xx_gmu_snapshot.c +++ b/adreno_a6xx_gmu_snapshot.c @@ -189,7 +189,7 @@ static size_t a6xx_gmu_snapshot_itcm(struct kgsl_device *device, struct a6xx_gmu_device *gmu = (struct a6xx_gmu_device *)priv; if (!gmu->itcm_shadow) { - dev_err(&gmu->pdev->dev, "ITCM not captured\n"); + dev_err(GMU_PDEV_DEV(device), "ITCM not captured\n"); return 0; } @@ -402,7 +402,7 @@ void a6xx_gmu_device_snapshot(struct kgsl_device *device, /* A stalled SMMU can lead to NoC timeouts when host accesses DTCM */ if (adreno_smmu_is_stalled(adreno_dev)) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Not dumping dtcm because SMMU is stalled\n"); return; } diff --git a/adreno_a6xx_hfi.c b/adreno_a6xx_hfi.c index b4e408b6c2..4b68fe6e00 100644 --- a/adreno_a6xx_hfi.c +++ b/adreno_a6xx_hfi.c @@ -31,6 +31,7 @@ struct a6xx_hfi *to_a6xx_hfi(struct adreno_device *adreno_dev) int a6xx_hfi_queue_read(struct a6xx_gmu_device *gmu, uint32_t queue_idx, unsigned int *output, unsigned int max_size) { + struct kgsl_device *device = KGSL_DEVICE(a6xx_gmu_to_adreno(gmu)); struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; struct hfi_queue_table *tbl = mem_addr->hostptr; struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; @@ -54,7 +55,7 @@ int a6xx_hfi_queue_read(struct a6xx_gmu_device *gmu, uint32_t queue_idx, size = MSG_HDR_GET_SIZE(msg_hdr); if (size > (max_size >> 2)) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "HFI message too big: hdr:0x%x rd idx=%d\n", msg_hdr, hdr->read_index); result = -EMSGSIZE; @@ -71,7 +72,7 @@ int a6xx_hfi_queue_read(struct a6xx_gmu_device *gmu, uint32_t queue_idx, result = size; } else { /* In case FW messed up */ - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Read index %d greater than queue size %d\n", hdr->read_index, hdr->queue_size); result = -ENODATA; @@ -248,7 +249,7 @@ int a6xx_receive_ack_cmd(struct a6xx_gmu_device *gmu, void *rcvd, } /* Didn't find the sender, list the waiter */ - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n", req_hdr, ret_cmd->sent_hdr); @@ -281,7 +282,7 @@ static int a6xx_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK); if (rc) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n", cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd)); gmu_core_fault_snapshot(device, GMU_FAULT_SEND_CMD_WAIT_INLINE); @@ -309,10 +310,9 @@ int a6xx_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 s return rc; if (ret_cmd.results[2]) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "HFI ACK failure: Req=0x%8.8X, Result=0x%8.8X\n", ret_cmd.results[1], ret_cmd.results[2]); @@ -344,7 +344,6 @@ static int a6xx_hfi_send_gmu_init(struct adreno_device *adreno_dev) static int a6xx_hfi_get_fw_version(struct adreno_device *adreno_dev, uint32_t expected_ver, uint32_t *ver) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct hfi_fw_version_cmd cmd = { .supported_ver = expected_ver, }; @@ -365,7 +364,7 @@ static int a6xx_hfi_get_fw_version(struct adreno_device *adreno_dev, if (!rc) *ver = ret_cmd.results[3]; else - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "gmu get fw ver failed with error=%d\n", rc); return rc; @@ -388,7 +387,6 @@ int a6xx_hfi_send_core_fw_start(struct adreno_device *adreno_dev) int a6xx_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, uint32_t feature, uint32_t enable, uint32_t data) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct hfi_feature_ctrl_cmd cmd = { .feature = feature, .enable = enable, @@ -402,7 +400,7 @@ int a6xx_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, ret = a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); if (ret) - dev_err(&gmu->pdev->dev, "Unable to %s feature %s (%d)\n", + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "Unable to %s feature %s (%d)\n", enable ? "enable" : "disable", hfi_feature_to_string(feature), feature); return ret; } @@ -410,7 +408,6 @@ int a6xx_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, int a6xx_hfi_send_set_value(struct adreno_device *adreno_dev, u32 type, u32 subtype, u32 data) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct hfi_set_value_cmd cmd = { .type = type, .subtype = subtype, @@ -424,7 +421,7 @@ int a6xx_hfi_send_set_value(struct adreno_device *adreno_dev, ret = a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); if (ret) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "Unable to set HFI Value %d, %d to %d, error = %d\n", type, subtype, data, ret); return ret; @@ -478,26 +475,28 @@ void adreno_a6xx_receive_err_req(struct a6xx_gmu_device *gmu, void *rcvd) u64 ticks = gpudev->read_alwayson(ADRENO_DEVICE(device)); struct hfi_err_cmd *cmd = rcvd; - dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n", + dev_err(GMU_PDEV_DEV(device), "HFI Error Received: %d %d %.16s\n", ((cmd->error_code >> 16) & 0xFFFF), (cmd->error_code & 0xFFFF), (char *) cmd->data); KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, - gmu->pdev, ticks, GMU_FAULT_F2H_MSG_ERR); + GMU_PDEV(device), ticks, GMU_FAULT_F2H_MSG_ERR); } void adreno_a6xx_receive_debug_req(struct a6xx_gmu_device *gmu, void *rcvd) { + struct kgsl_device *device = KGSL_DEVICE(a6xx_gmu_to_adreno(gmu)); struct hfi_debug_cmd *cmd = rcvd; - dev_dbg(&gmu->pdev->dev, "HFI Debug Received: %d %d %d\n", + dev_dbg(GMU_PDEV_DEV(device), "HFI Debug Received: %d %d %d\n", cmd->type, cmd->timestamp, cmd->data); } static void a6xx_hfi_v1_receiver(struct a6xx_gmu_device *gmu, uint32_t *rcvd, struct pending_cmd *ret_cmd) { + struct kgsl_device *device = KGSL_DEVICE(a6xx_gmu_to_adreno(gmu)); /* V1 ACK Handler */ if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_V1_MSG_ACK) { a6xx_receive_ack_cmd(gmu, rcvd, ret_cmd); @@ -513,7 +512,7 @@ static void a6xx_hfi_v1_receiver(struct a6xx_gmu_device *gmu, uint32_t *rcvd, adreno_a6xx_receive_debug_req(gmu, rcvd); break; default: /* No Reply */ - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "HFI V1 request %d not supported\n", MSG_HDR_GET_ID(rcvd[0])); break; @@ -523,6 +522,7 @@ static void a6xx_hfi_v1_receiver(struct a6xx_gmu_device *gmu, uint32_t *rcvd, int a6xx_hfi_process_queue(struct a6xx_gmu_device *gmu, uint32_t queue_idx, struct pending_cmd *ret_cmd) { + struct kgsl_device *device = KGSL_DEVICE(a6xx_gmu_to_adreno(gmu)); uint32_t rcvd[MAX_RCVD_SIZE]; while (a6xx_hfi_queue_read(gmu, queue_idx, rcvd, sizeof(rcvd)) > 0) { @@ -550,7 +550,7 @@ int a6xx_hfi_process_queue(struct a6xx_gmu_device *gmu, adreno_a6xx_receive_debug_req(gmu, rcvd); break; default: /* No Reply */ - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "HFI request %d not supported\n", MSG_HDR_GET_ID(rcvd[0])); break; @@ -564,6 +564,7 @@ static int a6xx_hfi_verify_fw_version(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); int result; unsigned int ver, major, minor; @@ -577,19 +578,19 @@ static int a6xx_hfi_verify_fw_version(struct adreno_device *adreno_dev) result = a6xx_hfi_get_fw_version(adreno_dev, GMU_VERSION(major, minor, 0), &ver); if (result) { - dev_err_once(&gmu->pdev->dev, + dev_err_once(gmu_pdev_dev, "Failed to get FW version via HFI\n"); return result; } /* For now, warn once. Could return error later if needed */ if (major != GMU_VER_MAJOR(ver)) - dev_err_once(&gmu->pdev->dev, + dev_err_once(gmu_pdev_dev, "FW Major Error: Wanted %d, got %d\n", major, GMU_VER_MAJOR(ver)); if (minor > GMU_VER_MINOR(ver)) - dev_err_once(&gmu->pdev->dev, + dev_err_once(gmu_pdev_dev, "FW Minor Error: Wanted < %d, got %d\n", GMU_VER_MINOR(ver), minor); @@ -773,7 +774,7 @@ irqreturn_t a6xx_hfi_irq_handler(int irq, void *data) if (status & HFI_IRQ_DBGQ_MASK) a6xx_hfi_process_queue(gmu, HFI_DBG_ID, NULL); if (status & HFI_IRQ_CM3_FAULT_MASK) { - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "GMU CM3 fault interrupt received\n"); atomic_set(&gmu->cm3_fault, 1); @@ -781,7 +782,7 @@ irqreturn_t a6xx_hfi_irq_handler(int irq, void *data) smp_wmb(); } if (status & ~HFI_IRQ_MASK) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "Unhandled HFI interrupts 0x%lx\n", status & ~HFI_IRQ_MASK); diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index d1f566847c..f71985a6b5 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -798,7 +798,7 @@ static int a6xx_hwsched_dcvs_set(struct adreno_device *adreno_dev, /* Do not set to XO and lower GPU clock vote from GMU */ if ((gpu_pwrlevel != INVALID_DCVS_IDX) && (gpu_pwrlevel >= table->gpu_level_num - 1)) { - dev_err(&gmu->pdev->dev, "Invalid gpu dcvs request: %d\n", + dev_err(GMU_PDEV_DEV(device), "Invalid gpu dcvs request: %d\n", gpu_pwrlevel); return -EINVAL; } @@ -820,7 +820,7 @@ static int a6xx_hwsched_dcvs_set(struct adreno_device *adreno_dev, ret = a6xx_hfi_send_cmd_async(adreno_dev, &req, sizeof(req)); if (ret) { - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "Failed to set GPU perf idx %u, bw idx %u\n", req.freq, req.bw); @@ -945,7 +945,6 @@ err: void a6xx_hwsched_handle_watchdog(struct adreno_device *adreno_dev) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 mask; @@ -957,7 +956,7 @@ void a6xx_hwsched_handle_watchdog(struct adreno_device *adreno_dev) a6xx_gmu_send_nmi(device, false, GMU_FAULT_PANIC_NONE); - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "GMU watchdog expired interrupt received\n"); adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 20e10503a3..a5a82ee509 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -89,8 +89,8 @@ static void del_waiter(struct a6xx_hwsched_hfi *hfi, struct pending_cmd *ack) static void a6xx_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct pending_cmd *cmd = NULL; u32 waiters[64], num_waiters = 0, i; u32 *ack = rcvd; @@ -99,7 +99,7 @@ static void a6xx_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) u32 size_bytes = MSG_HDR_GET_SIZE(hdr) << 2; if (size_bytes > sizeof(cmd->results)) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(gmu_pdev_dev, "Ack result too big: %d Truncating to: %ld\n", size_bytes, sizeof(cmd->results)); @@ -122,13 +122,13 @@ static void a6xx_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) read_unlock(&hfi->msglock); /* Didn't find the sender, list the waiter */ - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(gmu_pdev_dev, "Unexpectedly got id %d seqnum %d. Total waiters: %d Top %d Waiters:\n", MSG_HDR_GET_ID(req_hdr), MSG_HDR_GET_SEQNUM(req_hdr), num_waiters, min_t(u32, num_waiters, 5)); for (i = 0; i < num_waiters && i < 5; i++) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(gmu_pdev_dev, " id %d seqnum %d\n", MSG_HDR_GET_ID(waiters[i]), MSG_HDR_GET_SEQNUM(waiters[i])); @@ -232,20 +232,19 @@ static u32 get_payload_rb_key_legacy(struct adreno_device *adreno_dev, static void log_gpu_fault_legacy(struct adreno_device *adreno_dev) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); - struct device *dev = &gmu->pdev->dev; + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; switch (cmd->error) { case GMU_GPU_HW_HANG: - dev_crit_ratelimited(dev, "MISC: GPU hang detected\n"); + dev_crit_ratelimited(gmu_pdev_dev, "MISC: GPU hang detected\n"); break; case GMU_GPU_SW_HANG: - dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %u\n", + dev_crit_ratelimited(gmu_pdev_dev, "gpu timeout ctx %d ts %u\n", cmd->ctxt_id, cmd->ts); break; case GMU_CP_OPCODE_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP opcode error interrupt | opcode=0x%8.8x\n", a6xx_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_OPCODE_ERROR)); @@ -254,20 +253,20 @@ static void log_gpu_fault_legacy(struct adreno_device *adreno_dev) u32 status = a6xx_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_PROTECTED_ERROR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", status & (1 << 20) ? "READ" : "WRITE", status & 0x3FFFF, status); } break; case GMU_CP_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "CP Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP Illegal instruction error\n"); break; case GMU_CP_UCODE_ERROR: - dev_crit_ratelimited(dev, "CP ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP ucode error interrupt\n"); break; case GMU_CP_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP | Ringbuffer HW fault | status=0x%8.8x\n", a6xx_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_HW_FAULT)); @@ -285,16 +284,16 @@ static void log_gpu_fault_legacy(struct adreno_device *adreno_dev) next_rptr = get_payload_rb_key_legacy(adreno_dev, next, KEY_RB_RPTR); next_wptr = get_payload_rb_key_legacy(adreno_dev, next, KEY_RB_WPTR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n", cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr); } break; case GMU_CP_GPC_ERROR: - dev_crit_ratelimited(dev, "RBBM: GPC error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "RBBM: GPC error\n"); break; default: - dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n", + dev_crit_ratelimited(gmu_pdev_dev, "Unknown GPU fault: %u\n", cmd->error); break; } @@ -361,20 +360,19 @@ static u32 get_payload_rb_key(struct adreno_device *adreno_dev, static void log_gpu_fault(struct adreno_device *adreno_dev) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); - struct device *dev = &gmu->pdev->dev; + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; switch (cmd->error) { case GMU_GPU_HW_HANG: - dev_crit_ratelimited(dev, "MISC: GPU hang detected\n"); + dev_crit_ratelimited(gmu_pdev_dev, "MISC: GPU hang detected\n"); break; case GMU_GPU_SW_HANG: - dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %d\n", + dev_crit_ratelimited(gmu_pdev_dev, "gpu timeout ctx %d ts %d\n", cmd->gc.ctxt_id, cmd->gc.ts); break; case GMU_CP_OPCODE_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP opcode error interrupt | opcode=0x%8.8x\n", a6xx_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_OPCODE_ERROR)); @@ -383,20 +381,20 @@ static void log_gpu_fault(struct adreno_device *adreno_dev) u32 status = a6xx_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_PROTECTED_ERROR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", status & (1 << 20) ? "READ" : "WRITE", status & 0x3FFFF, status); } break; case GMU_CP_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "CP Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP Illegal instruction error\n"); break; case GMU_CP_UCODE_ERROR: - dev_crit_ratelimited(dev, "CP ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP ucode error interrupt\n"); break; case GMU_CP_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP | Ringbuffer HW fault | status=0x%8.8x\n", a6xx_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_HW_FAULT)); @@ -414,16 +412,16 @@ static void log_gpu_fault(struct adreno_device *adreno_dev) next_rptr = get_payload_rb_key(adreno_dev, next, KEY_RB_RPTR); next_wptr = get_payload_rb_key(adreno_dev, next, KEY_RB_WPTR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n", cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr); } break; case GMU_CP_GPC_ERROR: - dev_crit_ratelimited(dev, "RBBM: GPC error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "RBBM: GPC error\n"); break; default: - dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n", + dev_crit_ratelimited(gmu_pdev_dev, "Unknown GPU fault: %u\n", cmd->error); break; } @@ -590,7 +588,7 @@ static irqreturn_t a6xx_hwsched_hfi_handler(int irq, void *data) /* make sure other CPUs see the update */ smp_wmb(); - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "GMU CM3 fault interrupt received\n"); adreno_scheduler_fault(adreno_dev, ADRENO_GMU_FAULT); @@ -600,7 +598,7 @@ static irqreturn_t a6xx_hwsched_hfi_handler(int irq, void *data) status &= GENMASK(31 - (oob_max - 1), 0); if (status & ~hfi->irq_mask) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "Unhandled HFI interrupts 0x%x\n", status & ~hfi->irq_mask); @@ -612,20 +610,20 @@ static irqreturn_t a6xx_hwsched_hfi_handler(int irq, void *data) static int check_ack_failure(struct adreno_device *adreno_dev, struct pending_cmd *ack) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u64 ticks = gpudev->read_alwayson(adreno_dev); if (ack->results[2] != 0xffffffff) return 0; - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "ACK error: sender id %d seqnum %d\n", MSG_HDR_GET_ID(ack->sent_hdr), MSG_HDR_GET_SEQNUM(ack->sent_hdr)); - KGSL_GMU_CORE_FORCE_PANIC(KGSL_DEVICE(adreno_dev)->gmu_core.gf_panic, - gmu->pdev, ticks, GMU_FAULT_HFI_ACK); + KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, + GMU_PDEV(device), ticks, GMU_FAULT_HFI_ACK); return -EINVAL; } @@ -633,6 +631,7 @@ int a6xx_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 si { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 *cmd = data; u32 seqnum; int rc; @@ -647,8 +646,8 @@ int a6xx_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 si if (rc) goto done; - rc = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &pending_ack, - a6xx_hwsched_process_msgq); + rc = adreno_hwsched_wait_ack_completion(adreno_dev, + GMU_PDEV_DEV(device), &pending_ack, a6xx_hwsched_process_msgq); if (rc) goto done; @@ -762,6 +761,7 @@ static int gmu_import_buffer(struct adreno_device *adreno_dev, struct hfi_mem_alloc_entry *entry) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct hfi_mem_alloc_desc *desc = &entry->desc; int attrs = get_attrs(desc->flags); struct gmu_vma_entry *vma = &gmu->vma[GMU_NONCACHED_KERNEL]; @@ -771,7 +771,7 @@ static int gmu_import_buffer(struct adreno_device *adreno_dev, vma = &gmu->vma[GMU_CACHE]; if ((vma->next_va + desc->size) > (vma->start + vma->size)) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "GMU mapping too big. available: %d required: %d\n", vma->next_va - vma->start, desc->size); return -ENOMEM; @@ -780,7 +780,7 @@ static int gmu_import_buffer(struct adreno_device *adreno_dev, ret = gmu_core_map_memdesc(gmu->domain, entry->md, vma->next_va, attrs); if (ret) { - dev_err(&gmu->pdev->dev, "gmu map err: 0x%08x, %x\n", + dev_err(GMU_PDEV_DEV(device), "gmu map err: 0x%08x, %x\n", vma->next_va, attrs); return ret; } @@ -811,6 +811,7 @@ static struct hfi_mem_alloc_entry *get_mem_alloc_entry( struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); struct hfi_mem_alloc_entry *entry = lookup_mem_alloc_table(adreno_dev, desc); @@ -825,13 +826,13 @@ static struct hfi_mem_alloc_entry *get_mem_alloc_entry( return entry; if (desc->mem_kind >= HFI_MEMKIND_MAX) { - dev_err(&gmu->pdev->dev, "Invalid mem kind: %d\n", + dev_err(gmu_pdev_dev, "Invalid mem kind: %d\n", desc->mem_kind); return ERR_PTR(-EINVAL); } if (hfi->mem_alloc_entries == ARRAY_SIZE(hfi->mem_alloc_table)) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Reached max mem alloc entries\n"); return ERR_PTR(-ENOMEM); } @@ -895,7 +896,7 @@ static struct hfi_mem_alloc_entry *get_mem_alloc_entry( */ ret = gmu_import_buffer(adreno_dev, entry); if (ret) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "gpuaddr: 0x%llx size: %lld bytes lost\n", entry->md->gpuaddr, entry->md->size); memset(entry, 0, sizeof(*entry)); @@ -987,6 +988,7 @@ static int send_start_msg(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); u32 seqnum; int rc; struct hfi_start_cmd cmd; @@ -1010,7 +1012,7 @@ poll: rc = adreno_hwsched_poll_msg_queue_write_index(gmu->hfi.hfi_mem); if (rc) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Timed out processing MSG_START seqnum: %d\n", seqnum); gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); @@ -1019,7 +1021,7 @@ poll: rc = a6xx_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)); if (rc <= 0) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "MSG_START: payload error: %d\n", rc); gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); @@ -1041,7 +1043,7 @@ poll: rc = check_ack_failure(adreno_dev, &pending_ack); goto done; } else { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "MSG_START: unexpected response id:%d, type:%d\n", MSG_HDR_GET_ID(rcvd[0]), MSG_HDR_GET_TYPE(rcvd[0])); @@ -1505,6 +1507,7 @@ static int hfi_f2h_main(void *arg) struct adreno_device *adreno_dev = arg; struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); while (!kthread_should_stop()) { wait_event_interruptible(hfi->f2h_wq, kthread_should_stop() || @@ -1520,8 +1523,8 @@ static int hfi_f2h_main(void *arg) break; a6xx_hwsched_process_msgq(adreno_dev); - gmu_core_process_trace_data(KGSL_DEVICE(adreno_dev), - &gmu->pdev->dev, &gmu->trace); + gmu_core_process_trace_data(device, + GMU_PDEV_DEV(device), &gmu->trace); a6xx_hwsched_process_dbgq(adreno_dev, true); } @@ -1532,8 +1535,9 @@ int a6xx_hwsched_hfi_probe(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct a6xx_hwsched_hfi *hw_hfi = to_a6xx_hwsched_hfi(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - gmu->hfi.irq = kgsl_request_irq(gmu->pdev, "kgsl_hfi_irq", + gmu->hfi.irq = kgsl_request_irq(GMU_PDEV(device), "kgsl_hfi_irq", a6xx_hwsched_hfi_handler, adreno_dev); if (gmu->hfi.irq < 0) @@ -1632,7 +1636,6 @@ static int send_context_pointers(struct adreno_device *adreno_dev, static int hfi_context_register(struct adreno_device *adreno_dev, struct kgsl_context *context) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; @@ -1641,7 +1644,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, ret = send_context_register(adreno_dev, context); if (ret) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unable to register context %u: %d\n", context->id, ret); @@ -1653,7 +1656,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, ret = send_context_pointers(adreno_dev, context); if (ret) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unable to register context %u pointers: %d\n", context->id, ret); @@ -1964,6 +1967,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct pending_cmd pending_ack; struct hfi_unregister_ctxt_cmd cmd; u32 seqnum; @@ -2004,7 +2008,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, } ret = adreno_hwsched_ctxt_unregister_wait_completion(adreno_dev, - &gmu->pdev->dev, &pending_ack, a6xx_hwsched_process_msgq, &cmd); + GMU_PDEV_DEV(device), &pending_ack, a6xx_hwsched_process_msgq, &cmd); if (ret) { trigger_context_unregister_fault(adreno_dev, context); goto done; @@ -2076,8 +2080,8 @@ u32 a6xx_hwsched_preempt_count_get(struct adreno_device *adreno_dev) if (rc) goto done; - rc = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &pending_ack, - a6xx_hwsched_process_msgq); + rc = adreno_hwsched_wait_ack_completion(adreno_dev, + GMU_PDEV_DEV(device), &pending_ack, a6xx_hwsched_process_msgq); if (rc) goto done; diff --git a/adreno_a6xx_rpmh.c b/adreno_a6xx_rpmh.c index 5703967c8a..d32d86816e 100644 --- a/adreno_a6xx_rpmh.c +++ b/adreno_a6xx_rpmh.c @@ -85,7 +85,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, if (table->gpu_level_num > pri_rail->num || table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Defined more GPU DCVS levels than RPMh can support\n"); return -ERANGE; } @@ -108,7 +108,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, ret = adreno_rpmh_to_cx_hlvl(cx_rail, cx_vlvl, &table->gx_votes[index].dep_vote); if (ret) { - dev_err(&gmu->pdev->dev, "Unsupported cx corner: %u\n", + dev_err(GMU_PDEV_DEV(device), "Unsupported cx corner: %u\n", cx_vlvl); return ret; } diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index afd115802d..65bdf5f821 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -311,19 +311,21 @@ void gen7_load_rsc_ucode(struct adreno_device *adreno_dev) int gen7_load_pdc_ucode(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct platform_device *gmu_pdev = GMU_PDEV(KGSL_DEVICE(adreno_dev)); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); if (gmu->pdc_cfg_base == NULL) { struct resource *res_cfg; - res_cfg = platform_get_resource_byname(gmu->pdev, IORESOURCE_MEM, - "gmu_pdc"); + res_cfg = platform_get_resource_byname(gmu_pdev, + IORESOURCE_MEM, "gmu_pdc"); if (res_cfg) - gmu->pdc_cfg_base = devm_ioremap(&gmu->pdev->dev, + gmu->pdc_cfg_base = devm_ioremap(gmu_pdev_dev, res_cfg->start, resource_size(res_cfg)); if (!gmu->pdc_cfg_base) { - dev_err(&gmu->pdev->dev, "Failed to map PDC CFG\n"); + dev_err(gmu_pdev_dev, "Failed to map PDC CFG\n"); return -ENODEV; } } @@ -334,7 +336,7 @@ int gen7_load_pdc_ucode(struct adreno_device *adreno_dev) GEN7_PDC_ENABLE_REG_VALUE); if (!IS_ENABLED(CONFIG_QCOM_KGSL_HIBERNATION)) { - devm_iounmap(&gmu->pdev->dev, gmu->pdc_cfg_base); + devm_iounmap(gmu_pdev_dev, gmu->pdc_cfg_base); gmu->pdc_cfg_base = NULL; } @@ -391,7 +393,8 @@ int gen7_gmu_device_start(struct adreno_device *adreno_dev) if (gmu_core_timed_poll_check(device, GEN7_GMU_CM3_FW_INIT_RESULT, BIT(8), 100, GENMASK(8, 0))) { - dev_err(&gmu->pdev->dev, "GMU failed to come out of reset\n"); + dev_err(GMU_PDEV_DEV(device), + "GMU failed to come out of reset\n"); gmu_core_fault_snapshot(device, GMU_FAULT_DEVICE_START); return -ETIMEDOUT; } @@ -405,14 +408,13 @@ int gen7_gmu_device_start(struct adreno_device *adreno_dev) */ int gen7_gmu_hfi_start(struct adreno_device *adreno_dev) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); gmu_core_regwrite(device, GEN7_GMU_HFI_CTRL_INIT, 1); if (gmu_core_timed_poll_check(device, GEN7_GMU_HFI_CTRL_STATUS, BIT(0), 100, BIT(0))) { - dev_err(&gmu->pdev->dev, "GMU HFI init failed\n"); + dev_err(GMU_PDEV_DEV(device), "GMU HFI init failed\n"); gmu_core_fault_snapshot(device, GMU_FAULT_HFI_INIT); return -ETIMEDOUT; } @@ -424,7 +426,6 @@ int gen7_rscc_wakeup_sequence(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - struct device *dev = &gmu->pdev->dev; /* Skip wakeup sequence if we didn't do the sleep sequence */ if (!test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags)) @@ -438,13 +439,14 @@ int gen7_rscc_wakeup_sequence(struct adreno_device *adreno_dev) if (gmu_core_timed_poll_check(device, GEN7_GMU_RSCC_CONTROL_ACK, BIT(1), 100, BIT(1))) { - dev_err(dev, "Failed to do GPU RSC power on\n"); + dev_err(GMU_PDEV_DEV(device), "Failed to do GPU RSC power on\n"); return -ETIMEDOUT; } if (gen7_timed_poll_check_rscc(gmu, GEN7_RSCC_SEQ_BUSY_DRV0, 0x0, 100, UINT_MAX)) { - dev_err(dev, "GPU RSC sequence stuck in waking up GPU\n"); + dev_err(GMU_PDEV_DEV(device), + "GPU RSC sequence stuck in waking up GPU\n"); return -ETIMEDOUT; } @@ -480,7 +482,7 @@ int gen7_rscc_sleep_sequence(struct adreno_device *adreno_dev) ret = gen7_timed_poll_check_rscc(gmu, GEN7_GPU_RSCC_RSC_STATUS0_DRV0, BIT(16), 100, BIT(16)); if (ret) { - dev_err(&gmu->pdev->dev, "GPU RSC power off fail\n"); + dev_err(GMU_PDEV_DEV(device), "GPU RSC power off fail\n"); return -ETIMEDOUT; } @@ -534,6 +536,7 @@ static void load_tcm(struct adreno_device *adreno_dev, const u8 *src, int gen7_gmu_load_fw(struct adreno_device *adreno_dev) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); const u8 *fw = (const u8 *)gmu->fw_image->data; @@ -551,7 +554,7 @@ int gen7_gmu_load_fw(struct adreno_device *adreno_dev) id = find_vma_block(gmu, blk->addr, blk->size); if (id < 0) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unknown block in GMU FW addr:0x%x size:0x%x\n", blk->addr, blk->size); return -EINVAL; @@ -572,7 +575,7 @@ int gen7_gmu_load_fw(struct adreno_device *adreno_dev) find_gmu_memdesc(gmu, blk->addr, blk->size); if (!md) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "No backing memory for GMU FW block addr:0x%x size:0x%x\n", blk->addr, blk->size); return -EINVAL; @@ -632,7 +635,7 @@ int gen7_gmu_oob_set(struct kgsl_device *device, return 0; if (req >= oob_boot_slumber) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unsupported OOB request %s\n", oob_to_str(req)); return -EINVAL; @@ -670,7 +673,7 @@ void gen7_gmu_oob_clear(struct kgsl_device *device, return; if (req >= oob_boot_slumber) { - dev_err(&gmu->pdev->dev, "Unsupported OOB clear %s\n", + dev_err(GMU_PDEV_DEV(device), "Unsupported OOB clear %s\n", oob_to_str(req)); return; } @@ -774,7 +777,8 @@ static int gen7_complete_rpmh_votes(struct gen7_gmu_device *gmu, } if (ret) - dev_err(&gmu->pdev->dev, "RPMH votes timedout: %d\n", ret); + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), + "RPMH votes timedout: %d\n", ret); return ret; } @@ -813,6 +817,7 @@ static const char *idle_level_name(int level) int gen7_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); u32 reg, reg1, reg2, reg3, reg4; @@ -858,15 +863,19 @@ int gen7_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) gmu_core_regread(device, GEN7_GMU_RBBM_INT_UNMASKED_STATUS, ®3); gmu_core_regread(device, GEN7_GMU_GMU_PWR_COL_KEEPALIVE, ®4); - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "----------------------[ GMU error ]----------------------\n"); - dev_err(&gmu->pdev->dev, "Timeout waiting for lowest idle level %s\n", + dev_err(gmu_pdev_dev, + "Timeout waiting for lowest idle level %s\n", idle_level_name(gmu->idle_level)); - dev_err(&gmu->pdev->dev, "Start: %llx (absolute ticks)\n", ts1); - dev_err(&gmu->pdev->dev, "Poll: %llx (ticks relative to start)\n", ts2-ts1); - dev_err(&gmu->pdev->dev, "RPMH_POWER_STATE=%x GFX_PWR_CLK_STATUS=%x\n", reg, reg1); - dev_err(&gmu->pdev->dev, "CX_BUSY_STATUS=%x\n", reg2); - dev_err(&gmu->pdev->dev, "RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n", reg3, reg4); + dev_err(gmu_pdev_dev, "Start: %llx (absolute ticks)\n", ts1); + dev_err(gmu_pdev_dev, + "Poll: %llx (ticks relative to start)\n", ts2-ts1); + dev_err(gmu_pdev_dev, + "RPMH_POWER_STATE=%x GFX_PWR_CLK_STATUS=%x\n", reg, reg1); + dev_err(gmu_pdev_dev, "CX_BUSY_STATUS=%x\n", reg2); + dev_err(gmu_pdev_dev, + "RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n", reg3, reg4); /* Access GX registers only when GX is ON */ if (is_on(reg1)) { @@ -874,8 +883,9 @@ int gen7_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) kgsl_regread(device, GEN7_CP_CP2GMU_STATUS, ®3); kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, ®4); - dev_err(&gmu->pdev->dev, "GEN7_CP_STATUS_1=%x\n", reg2); - dev_err(&gmu->pdev->dev, "CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n", reg3, reg4); + dev_err(gmu_pdev_dev, "GEN7_CP_STATUS_1=%x\n", reg2); + dev_err(gmu_pdev_dev, + "CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n", reg3, reg4); } WARN_ON(1); @@ -888,7 +898,6 @@ int gen7_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) int gen7_gmu_wait_for_idle(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); u32 status2; u64 ts1; @@ -898,7 +907,7 @@ int gen7_gmu_wait_for_idle(struct adreno_device *adreno_dev) 0, 100, CXGXCPUBUSYIGNAHB)) { gmu_core_regread(device, GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS2, &status2); - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "GMU not idling: status2=0x%x %llx %llx\n", status2, ts1, gpudev->read_alwayson(adreno_dev)); @@ -929,7 +938,7 @@ int gen7_gmu_version_info(struct adreno_device *adreno_dev) /* Check if gmu fw version on device is compatible with kgsl driver */ if (gmu->ver.core < gen7_core->gmu_fw_version) { - dev_err_once(&gmu->pdev->dev, + dev_err_once(GMU_PDEV_DEV(device), "GMU FW version 0x%x error (expected 0x%x)\n", gmu->ver.core, gen7_core->gmu_fw_version); return -EINVAL; @@ -1123,9 +1132,11 @@ static int _map_gmu_dynamic(struct gen7_gmu_device *gmu, struct kgsl_memdesc *md, u32 addr, u32 vma_id, int attrs, u32 align) { - int ret; + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); struct gmu_vma_entry *vma = &gmu->vma[vma_id]; struct gmu_vma_node *vma_node = NULL; + int ret; u32 size = ALIGN(md->size, hfi_get_gmu_sz_alignment(align)); spin_lock(&vma->lock); @@ -1137,7 +1148,7 @@ static int _map_gmu_dynamic(struct gen7_gmu_device *gmu, addr = find_unmapped_va(vma, size, hfi_get_gmu_va_alignment(align)); if (addr == 0) { spin_unlock(&vma->lock); - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Insufficient VA space size: %x\n", size); return -ENOMEM; } @@ -1146,7 +1157,7 @@ static int _map_gmu_dynamic(struct gen7_gmu_device *gmu, ret = insert_va(vma, addr, size); spin_unlock(&vma->lock); if (ret < 0) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Could not insert va: %x size %x\n", addr, size); return ret; } @@ -1158,7 +1169,7 @@ static int _map_gmu_dynamic(struct gen7_gmu_device *gmu, } /* Failed to map to GMU */ - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", addr, md->size, ret); @@ -1176,8 +1187,9 @@ static int _map_gmu_static(struct gen7_gmu_device *gmu, struct kgsl_memdesc *md, u32 addr, u32 vma_id, int attrs, u32 align) { - int ret; + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); struct gmu_vma_entry *vma = &gmu->vma[vma_id]; + int ret; u32 size = ALIGN(md->size, hfi_get_gmu_sz_alignment(align)); if (!addr) @@ -1185,7 +1197,7 @@ static int _map_gmu_static(struct gen7_gmu_device *gmu, ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); if (ret) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", addr, md->size, ret); return ret; @@ -1257,7 +1269,7 @@ struct kgsl_memdesc *gen7_reserve_gmu_kernel_block_fixed(struct gen7_gmu_device md = &gmu->gmu_globals[gmu->global_entries]; - ret = kgsl_memdesc_init_fixed(device, gmu->pdev, resource, md); + ret = kgsl_memdesc_init_fixed(device, GMU_PDEV(device), resource, md); if (ret) return ERR_PTR(ret); @@ -1270,7 +1282,7 @@ struct kgsl_memdesc *gen7_reserve_gmu_kernel_block_fixed(struct gen7_gmu_device if (!ret) gmu->global_entries++; else { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", addr, md->size, ret); memset(md, 0x0, sizeof(*md)); @@ -1327,12 +1339,12 @@ free: static int gen7_gmu_process_prealloc(struct gen7_gmu_device *gmu, struct gmu_block_header *blk) { + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); struct kgsl_memdesc *md; - int id = find_vma_block(gmu, blk->addr, blk->value); if (id < 0) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Invalid prealloc block addr: 0x%x value:%d\n", blk->addr, blk->value); return id; @@ -1354,6 +1366,7 @@ static int gen7_gmu_process_prealloc(struct gen7_gmu_device *gmu, int gen7_gmu_parse_fw(struct adreno_device *adreno_dev) { + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev); struct gmu_block_header *blk; @@ -1373,16 +1386,16 @@ int gen7_gmu_parse_fw(struct adreno_device *adreno_dev) return -EINVAL; ret = request_firmware(&gmu->fw_image, gmufw_name, - &gmu->pdev->dev); + gmu_pdev_dev); if (ret) { if (gen7_core->gmufw_bak_name) { gmufw_name = gen7_core->gmufw_bak_name; ret = request_firmware(&gmu->fw_image, gmufw_name, - &gmu->pdev->dev); + gmu_pdev_dev); } if (ret) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "request_firmware (%s) failed: %d\n", gmufw_name, ret); @@ -1400,7 +1413,7 @@ int gen7_gmu_parse_fw(struct adreno_device *adreno_dev) blk = (struct gmu_block_header *)&gmu->fw_image->data[offset]; if (offset + sizeof(*blk) > gmu->fw_image->size) { - dev_err(&gmu->pdev->dev, "Invalid FW Block\n"); + dev_err(gmu_pdev_dev, "Invalid FW Block\n"); return -EINVAL; } @@ -1551,7 +1564,7 @@ static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) kgsl_pwrctrl_disable_gx_gdsc(device); if (gen7_gmu_gx_is_on(adreno_dev)) - dev_err(&gmu->pdev->dev, "gx is stuck on\n"); + dev_err(GMU_PDEV_DEV(device), "gx is stuck on\n"); } /* @@ -1608,7 +1621,7 @@ void gen7_gmu_suspend(struct adreno_device *adreno_dev) gen7_rdpm_cx_freq_update(gmu, 0); - dev_err(&gmu->pdev->dev, "Suspended GMU\n"); + dev_err(GMU_PDEV_DEV(device), "Suspended GMU\n"); kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE); } @@ -1653,7 +1666,7 @@ static int gen7_gmu_dcvs_set(struct adreno_device *adreno_dev, ret = gen7_hfi_send_generic_req(adreno_dev, &req, sizeof(req)); if (ret) { - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "Failed to set GPU perf idx %u, bw idx %u\n", req.freq, req.bw); @@ -1723,7 +1736,7 @@ void gen7_gmu_send_nmi(struct kgsl_device *device, bool force, * to save cm3 state to DDR. */ if (gen7_gmu_gx_is_on(adreno_dev) && adreno_smmu_is_stalled(adreno_dev)) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Skipping NMI because SMMU is stalled\n"); goto done; } @@ -1766,13 +1779,11 @@ nmi: /* Wait for the NMI to be handled */ udelay(200); done: - KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, gmu->pdev, ticks, gf_policy); + KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, GMU_PDEV(device), ticks, gf_policy); } static void gen7_gmu_cooperative_reset(struct kgsl_device *device) { - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); unsigned int result; gmu_core_regwrite(device, GEN7_GMU_CX_GMU_WDOG_CTRL, 0); @@ -1787,7 +1798,7 @@ static void gen7_gmu_cooperative_reset(struct kgsl_device *device) return; gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &result); - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "GMU cooperative reset timed out 0x%x\n", result); /* * If we dont get a snapshot ready from GMU, trigger NMI @@ -1797,19 +1808,18 @@ static void gen7_gmu_cooperative_reset(struct kgsl_device *device) gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &result); if ((result & 0x800) != 0x800) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "GMU cooperative reset NMI timed out 0x%x\n", result); } static int gen7_gmu_wait_for_active_transition(struct kgsl_device *device) { unsigned int reg; - struct gen7_gmu_device *gmu = to_gen7_gmu(ADRENO_DEVICE(device)); if (gmu_core_timed_poll_check(device, GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE, GPU_HW_ACTIVE, 100, GENMASK(3, 0))) { gmu_core_regread(device, GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE, ®); - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "GMU failed to move to ACTIVE state, Current state: 0x%x\n", reg); @@ -1826,7 +1836,6 @@ static bool gen7_gmu_scales_bandwidth(struct kgsl_device *device) void gen7_gmu_handle_watchdog(struct adreno_device *adreno_dev) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 mask; @@ -1837,15 +1846,15 @@ void gen7_gmu_handle_watchdog(struct adreno_device *adreno_dev) gen7_gmu_send_nmi(device, false, GMU_FAULT_PANIC_NONE); - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "GMU watchdog expired interrupt received\n"); } static irqreturn_t gen7_gmu_irq_handler(int irq, void *data) { struct kgsl_device *device = data; + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); const struct gen7_gpudev *gen7_gpudev = to_gen7_gpudev(ADRENO_GPU_DEVICE(adreno_dev)); unsigned int status = 0; @@ -1854,7 +1863,7 @@ static irqreturn_t gen7_gmu_irq_handler(int irq, void *data) gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_CLR, status); if (status & GMU_INT_HOST_AHB_BUS_ERR) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(gmu_pdev_dev, "AHB bus error interrupt received\n"); if (status & GMU_INT_WDOG_BITE) @@ -1865,12 +1874,12 @@ static irqreturn_t gen7_gmu_irq_handler(int irq, void *data) gmu_core_regread(device, GEN7_GMU_AHB_FENCE_STATUS, &fence_status); - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(gmu_pdev_dev, "FENCE error interrupt received %x\n", fence_status); } if (status & ~GMU_AO_INT_MASK) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(gmu_pdev_dev, "Unhandled GMU interrupts 0x%lx\n", status & ~GMU_AO_INT_MASK); @@ -1879,6 +1888,7 @@ static irqreturn_t gen7_gmu_irq_handler(int irq, void *data) void gen7_gmu_aop_send_acd_state(struct gen7_gmu_device *gmu, bool flag) { + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); struct qmp_pkt msg; char msg_buf[36]; u32 size; @@ -1897,7 +1907,7 @@ void gen7_gmu_aop_send_acd_state(struct gen7_gmu_device *gmu, bool flag) ret = mbox_send_message(gmu->mailbox.channel, &msg); if (ret < 0) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "AOP mbox send message failed: %d\n", ret); } @@ -1911,8 +1921,8 @@ int gen7_gmu_clock_set_rate(struct adreno_device *adreno_dev, u32 req_freq) ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", req_freq); if (ret) { - dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n", - req_freq, ret); + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), + "GMU clock:%d set failed:%d\n", req_freq, ret); return ret; } @@ -1936,13 +1946,13 @@ int gen7_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level) ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "hub_clk", adreno_dev->gmu_hub_clk_freq); if (ret && ret != -ENODEV) { - dev_err(&gmu->pdev->dev, "Unable to set the HUB clock\n"); + dev_err(GMU_PDEV_DEV(device), "Unable to set the HUB clock\n"); return ret; } ret = clk_bulk_prepare_enable(gmu->num_clks, gmu->clks); if (ret) { - dev_err(&gmu->pdev->dev, "Cannot enable GMU clocks\n"); + dev_err(GMU_PDEV_DEV(device), "Cannot enable GMU clocks\n"); return ret; } @@ -2353,6 +2363,7 @@ u32 gen7_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab) static void gen7_free_gmu_globals(struct gen7_gmu_device *gmu) { + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); int i; for (i = 0; i < gmu->global_entries && i < ARRAY_SIZE(gmu->gmu_globals); i++) { @@ -2370,7 +2381,7 @@ static void gen7_free_gmu_globals(struct gen7_gmu_device *gmu) } if (gmu->domain) { - iommu_detach_device(gmu->domain, &gmu->pdev->dev); + iommu_detach_device(gmu->domain, GMU_PDEV_DEV(device)); iommu_domain_free(gmu->domain); gmu->domain = NULL; } @@ -2383,7 +2394,7 @@ static int gen7_gmu_aop_mailbox_init(struct adreno_device *adreno_dev, { struct kgsl_mailbox *mailbox = &gmu->mailbox; - mailbox->client.dev = &gmu->pdev->dev; + mailbox->client.dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); mailbox->client.tx_block = true; mailbox->client.tx_tout = 1000; mailbox->client.knows_txdone = false; @@ -2435,38 +2446,42 @@ static void gen7_gmu_acd_probe(struct kgsl_device *device, ret = gen7_gmu_aop_mailbox_init(adreno_dev, gmu); if (ret) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "AOP mailbox init failed: %d\n", ret); } static int gen7_gmu_reg_probe(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); int ret; - ret = kgsl_regmap_add_region(&device->regmap, gmu->pdev, "gmu", NULL, NULL); + ret = kgsl_regmap_add_region(&device->regmap, + GMU_PDEV(device), "gmu", NULL, NULL); if (ret) - dev_err(&gmu->pdev->dev, "Unable to map the GMU registers\n"); + dev_err(GMU_PDEV_DEV(device), + "Unable to map the GMU registers\n"); /* * gmu_ao_blk_dec1 and gmu_ao_blk_dec2 are contiguous and contained within the gmu region * mapped above. gmu_ao_blk_dec0 is not within the gmu region and is mapped separately. */ - kgsl_regmap_add_region(&device->regmap, gmu->pdev, "gmu_ao_blk_dec0", NULL, NULL); + kgsl_regmap_add_region(&device->regmap, GMU_PDEV(device), + "gmu_ao_blk_dec0", NULL, NULL); return ret; } static int gen7_gmu_clk_probe(struct adreno_device *adreno_dev) { + struct platform_device *gmu_pdev = GMU_PDEV(KGSL_DEVICE(adreno_dev)); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); int ret, i; int tbl_size; int num_freqs; int offset; - ret = devm_clk_bulk_get_all(&gmu->pdev->dev, &gmu->clks); + ret = devm_clk_bulk_get_all(gmu_pdev_dev, &gmu->clks); if (ret < 0) return ret; @@ -2488,7 +2503,7 @@ static int gen7_gmu_clk_probe(struct adreno_device *adreno_dev) gmu->num_clks = ret; /* Read the optional list of GMU frequencies */ - if (of_get_property(gmu->pdev->dev.of_node, + if (of_get_property(gmu_pdev->dev.of_node, "qcom,gmu-freq-table", &tbl_size) == NULL) goto default_gmu_freq; @@ -2498,11 +2513,11 @@ static int gen7_gmu_clk_probe(struct adreno_device *adreno_dev) for (i = 0; i < num_freqs; i++) { offset = i * 2; - ret = of_property_read_u32_index(gmu->pdev->dev.of_node, + ret = of_property_read_u32_index(gmu_pdev->dev.of_node, "qcom,gmu-freq-table", offset, &gmu->freqs[i]); if (ret) goto default_gmu_freq; - ret = of_property_read_u32_index(gmu->pdev->dev.of_node, + ret = of_property_read_u32_index(gmu_pdev->dev.of_node, "qcom,gmu-freq-table", offset + 1, &gmu->vlvls[i]); if (ret) goto default_gmu_freq; @@ -2581,11 +2596,13 @@ static int gen7_gmu_iommu_fault_handler(struct iommu_domain *domain, static int gen7_gmu_iommu_init(struct gen7_gmu_device *gmu) { + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); int ret; gmu->domain = iommu_domain_alloc(&platform_bus_type); if (gmu->domain == NULL) { - dev_err(&gmu->pdev->dev, "Unable to allocate GMU IOMMU domain\n"); + dev_err(gmu_pdev_dev, "Unable to allocate GMU IOMMU domain\n"); return -ENODEV; } @@ -2596,14 +2613,14 @@ static int gen7_gmu_iommu_init(struct gen7_gmu_device *gmu) */ qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL); - ret = iommu_attach_device(gmu->domain, &gmu->pdev->dev); + ret = iommu_attach_device(gmu->domain, gmu_pdev_dev); if (!ret) { iommu_set_fault_handler(gmu->domain, gen7_gmu_iommu_fault_handler, gmu); return 0; } - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Unable to attach GMU IOMMU domain: %d\n", ret); iommu_domain_free(gmu->domain); gmu->domain = NULL; @@ -2626,11 +2643,12 @@ int gen7_gmu_probe(struct kgsl_device *device, struct resource *res; int ret, i; - gmu->pdev = pdev; + device->gmu_core.pdev = pdev; - dma_set_coherent_mask(&gmu->pdev->dev, DMA_BIT_MASK(64)); - gmu->pdev->dev.dma_mask = &gmu->pdev->dev.coherent_dma_mask; - set_dma_ops(&gmu->pdev->dev, NULL); + dma_set_coherent_mask(&device->gmu_core.pdev->dev, DMA_BIT_MASK(64)); + device->gmu_core.pdev->dev.dma_mask = + &device->gmu_core.pdev->dev.coherent_dma_mask; + set_dma_ops(GMU_PDEV_DEV(device), NULL); res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "rscc"); @@ -2638,7 +2656,7 @@ int gen7_gmu_probe(struct kgsl_device *device, gmu->rscc_virt = devm_ioremap(&device->pdev->dev, res->start, resource_size(res)); if (!gmu->rscc_virt) { - dev_err(&gmu->pdev->dev, "rscc ioremap failed\n"); + dev_err(GMU_PDEV_DEV(device), "rscc ioremap failed\n"); return -ENOMEM; } } @@ -2711,12 +2729,12 @@ int gen7_gmu_probe(struct kgsl_device *device, (void) kobject_init_and_add(&gmu->log_kobj, &log_kobj_type, &dev->kobj, "log"); (void) kobject_init_and_add(&gmu->stats_kobj, &stats_kobj_type, &dev->kobj, "stats"); - of_property_read_u32(gmu->pdev->dev.of_node, "qcom,gmu-perf-ddr-bw", + of_property_read_u32(GMU_PDEV(device)->dev.of_node, "qcom,gmu-perf-ddr-bw", &gmu->perf_ddr_bw); spin_lock_init(&gmu->hfi.cmdq_lock); - gmu->irq = kgsl_request_irq(gmu->pdev, "gmu", + gmu->irq = kgsl_request_irq(GMU_PDEV(device), "gmu", gen7_gmu_irq_handler, device); if (gmu->irq >= 0) @@ -3427,10 +3445,11 @@ int gen7_gmu_reset(struct adreno_device *adreno_dev) int gen7_gmu_hfi_probe(struct adreno_device *adreno_dev) { + struct platform_device *gmu_pdev = GMU_PDEV(KGSL_DEVICE(adreno_dev)); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct gen7_hfi *hfi = &gmu->hfi; - hfi->irq = kgsl_request_irq(gmu->pdev, "hfi", + hfi->irq = kgsl_request_irq(gmu_pdev, "hfi", gen7_hfi_irq_handler, KGSL_DEVICE(adreno_dev)); return hfi->irq < 0 ? hfi->irq : 0; diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index b2930a9f70..7a8021c2e8 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -39,7 +39,6 @@ struct gen7_gmu_device { u32 pwr_dev; u32 hfi; } ver; - struct platform_device *pdev; int irq; const struct firmware *fw_image; struct kgsl_memdesc *dump_mem; diff --git a/adreno_gen7_gmu_snapshot.c b/adreno_gen7_gmu_snapshot.c index c4f32a8c58..003542792e 100644 --- a/adreno_gen7_gmu_snapshot.c +++ b/adreno_gen7_gmu_snapshot.c @@ -65,7 +65,8 @@ static size_t gen7_gmu_snapshot_itcm(struct kgsl_device *device, struct gen7_gmu_device *gmu = (struct gen7_gmu_device *)priv; if (!gmu->itcm_shadow) { - dev_err(&gmu->pdev->dev, "No memory allocated for ITCM shadow capture\n"); + dev_err(GMU_PDEV_DEV(device), + "No memory allocated for ITCM shadow capture\n"); return 0; } @@ -229,7 +230,7 @@ static void gen7_gmu_device_snapshot(struct kgsl_device *device, * DTCM can be read through side-band DBGC interface on gen7_2_x family. */ if (adreno_smmu_is_stalled(adreno_dev) && !adreno_is_gen7_2_x_family(adreno_dev)) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Not dumping dtcm because SMMU is stalled\n"); return; } diff --git a/adreno_gen7_hfi.c b/adreno_gen7_hfi.c index b384456192..3244627e37 100644 --- a/adreno_gen7_hfi.c +++ b/adreno_gen7_hfi.c @@ -35,6 +35,7 @@ struct gen7_hfi *to_gen7_hfi(struct adreno_device *adreno_dev) int gen7_hfi_queue_read(struct gen7_gmu_device *gmu, u32 queue_idx, unsigned int *output, unsigned int max_size) { + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; struct hfi_queue_table *tbl = mem_addr->hostptr; struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; @@ -58,7 +59,7 @@ int gen7_hfi_queue_read(struct gen7_gmu_device *gmu, u32 queue_idx, size = MSG_HDR_GET_SIZE(msg_hdr); if (size > (max_size >> 2)) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "HFI message too big: hdr:0x%x rd idx=%d\n", msg_hdr, hdr->read_index); result = -EMSGSIZE; @@ -75,7 +76,7 @@ int gen7_hfi_queue_read(struct gen7_gmu_device *gmu, u32 queue_idx, result = size; } else { /* In case FW messed up */ - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Read index %d greater than queue size %d\n", hdr->read_index, hdr->queue_size); result = -ENODATA; @@ -251,7 +252,7 @@ int gen7_receive_ack_cmd(struct gen7_gmu_device *gmu, void *rcvd, } /* Didn't find the sender, list the waiter */ - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n", req_hdr, ret_cmd->sent_hdr); @@ -264,9 +265,8 @@ static int poll_gmu_reg(struct adreno_device *adreno_dev, u32 offsetdwords, unsigned int expected_val, unsigned int mask, unsigned int timeout_ms) { - unsigned int val; - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int val; unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms); bool nmi = false; @@ -297,7 +297,7 @@ static int poll_gmu_reg(struct adreno_device *adreno_dev, if ((val & mask) == expected_val) return 0; - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Reg poll %s: offset 0x%x, want 0x%x, got 0x%x\n", nmi ? "abort" : "timeout", offsetdwords, expected_val, val & mask); @@ -330,7 +330,7 @@ static int gen7_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT); if (rc) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n", cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd)); gmu_core_fault_snapshot(device, GMU_FAULT_SEND_CMD_WAIT_INLINE); @@ -358,10 +358,9 @@ int gen7_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 s return rc; if (ret_cmd.results[2]) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "HFI ACK failure: Req=0x%8.8X, Result=0x%8.8X\n", ret_cmd.results[1], ret_cmd.results[2]); @@ -412,7 +411,7 @@ int gen7_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd, rc = -EINVAL; break; default: - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "HFI ACK: Req=0x%8.8X, Result=0x%8.8X Error:0x%8.8X\n", ret_cmd->results[1], ret_cmd->results[2], ret_cmd->results[3]); rc = -EINVAL; @@ -426,7 +425,6 @@ int gen7_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd, int gen7_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, u32 feature, u32 enable, u32 data) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct pending_cmd ret_cmd = {0}; struct hfi_feature_ctrl_cmd cmd = { .feature = feature, @@ -441,14 +439,13 @@ int gen7_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, ret = gen7_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd)); if (ret < 0) - dev_err(&gmu->pdev->dev, "Unable to %s feature %s (%d)\n", + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "Unable to %s feature %s (%d)\n", enable ? "enable" : "disable", hfi_feature_to_string(feature), feature); return ret; } int gen7_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subtype) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct pending_cmd ret_cmd = {0}; struct hfi_get_value_cmd cmd = { .type = type, @@ -462,7 +459,7 @@ int gen7_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subt ret = gen7_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd)); if (ret < 0) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "Unable to get HFI Value type: %d, subtype: %d, error = %d\n", type, subtype, ret); @@ -472,7 +469,6 @@ int gen7_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subt int gen7_hfi_send_set_value(struct adreno_device *adreno_dev, u32 type, u32 subtype, u32 data) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct pending_cmd ret_cmd = {0}; struct hfi_set_value_cmd cmd = { .type = type, @@ -487,7 +483,7 @@ int gen7_hfi_send_set_value(struct adreno_device *adreno_dev, ret = gen7_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd)); if (ret < 0) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "Unable to set HFI Value %d, %d to %d, error = %d\n", type, subtype, data, ret); return ret; @@ -500,26 +496,28 @@ void adreno_gen7_receive_err_req(struct gen7_gmu_device *gmu, void *rcvd) u64 ticks = gpudev->read_alwayson(ADRENO_DEVICE(device)); struct hfi_err_cmd *cmd = rcvd; - dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n", + dev_err(GMU_PDEV_DEV(device), "HFI Error Received: %d %d %.16s\n", ((cmd->error_code >> 16) & 0xffff), (cmd->error_code & 0xffff), (char *) cmd->data); KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, - gmu->pdev, ticks, GMU_FAULT_F2H_MSG_ERR); + GMU_PDEV(device), ticks, GMU_FAULT_F2H_MSG_ERR); } void adreno_gen7_receive_debug_req(struct gen7_gmu_device *gmu, void *rcvd) { + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); struct hfi_debug_cmd *cmd = rcvd; - dev_dbg(&gmu->pdev->dev, "HFI Debug Received: %d %d %d\n", + dev_dbg(GMU_PDEV_DEV(device), "HFI Debug Received: %d %d %d\n", cmd->type, cmd->timestamp, cmd->data); } int gen7_hfi_process_queue(struct gen7_gmu_device *gmu, u32 queue_idx, struct pending_cmd *ret_cmd) { + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); u32 rcvd[MAX_RCVD_SIZE]; while (gen7_hfi_queue_read(gmu, queue_idx, rcvd, sizeof(rcvd)) > 0) { @@ -541,7 +539,7 @@ int gen7_hfi_process_queue(struct gen7_gmu_device *gmu, adreno_gen7_receive_debug_req(gmu, rcvd); break; default: /* No Reply */ - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "HFI request %d not supported\n", MSG_HDR_GET_ID(rcvd[0])); break; @@ -805,7 +803,7 @@ irqreturn_t gen7_hfi_irq_handler(int irq, void *data) if (status & HFI_IRQ_DBGQ_MASK) gen7_hfi_process_queue(gmu, HFI_DBG_ID, NULL); if (status & HFI_IRQ_CM3_FAULT_MASK) { - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "GMU CM3 fault interrupt received\n"); atomic_set(&gmu->cm3_fault, 1); @@ -813,7 +811,7 @@ irqreturn_t gen7_hfi_irq_handler(int irq, void *data) smp_wmb(); } if (status & ~HFI_IRQ_MASK) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "Unhandled HFI interrupts 0x%lx\n", status & ~HFI_IRQ_MASK); diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 93d0a3b670..a64d7b19ca 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -963,7 +963,6 @@ static void check_hw_fence_unack_count(struct adreno_device *adreno_dev) { struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); u32 unack_count; if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags)) @@ -978,7 +977,8 @@ static void check_hw_fence_unack_count(struct adreno_device *adreno_dev) if (!unack_count) return; - dev_err(&gmu->pdev->dev, "hardware fence unack_count(%d) isn't zero before SLUMBER\n", + dev_err(GMU_PDEV_DEV(device), + "hardware fence unack_count(%d) isn't zero before SLUMBER\n", unack_count); gmu_core_fault_snapshot(device, GMU_FAULT_HW_FENCE); } @@ -1092,7 +1092,7 @@ static int gen7_hwsched_dcvs_set(struct adreno_device *adreno_dev, /* Do not set to XO and lower GPU clock vote from GMU */ if ((gpu_pwrlevel != INVALID_DCVS_IDX) && (gpu_pwrlevel >= table->gpu_level_num - 1)) { - dev_err(&gmu->pdev->dev, "Invalid gpu dcvs request: %d\n", + dev_err(GMU_PDEV_DEV(device), "Invalid gpu dcvs request: %d\n", gpu_pwrlevel); return -EINVAL; } @@ -1116,7 +1116,7 @@ static int gen7_hwsched_dcvs_set(struct adreno_device *adreno_dev, ret = gen7_hfi_send_cmd_async(adreno_dev, &req, sizeof(req)); if (ret) { - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "Failed to set GPU perf idx %u, bw idx %u\n", req.freq, req.bw); @@ -1266,7 +1266,6 @@ static void gen7_hwsched_pm_resume(struct adreno_device *adreno_dev) void gen7_hwsched_handle_watchdog(struct adreno_device *adreno_dev) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 mask; @@ -1278,7 +1277,7 @@ void gen7_hwsched_handle_watchdog(struct adreno_device *adreno_dev) gen7_gmu_send_nmi(device, false, GMU_FAULT_PANIC_NONE); - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "GMU watchdog expired interrupt received\n"); gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 2cbfe5ed16..f8fca021bc 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -100,7 +100,7 @@ static void del_waiter(struct gen7_hwsched_hfi *hfi, struct pending_cmd *ack) static void gen7_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct pending_cmd *cmd = NULL; u32 waiters[64], num_waiters = 0, i; @@ -110,7 +110,7 @@ static void gen7_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) u32 size_bytes = MSG_HDR_GET_SIZE(hdr) << 2; if (size_bytes > sizeof(cmd->results)) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(gmu_pdev_dev, "Ack result too big: %d Truncating to: %ld\n", size_bytes, sizeof(cmd->results)); @@ -133,13 +133,13 @@ static void gen7_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) read_unlock(&hfi->msglock); /* Didn't find the sender, list the waiter */ - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(gmu_pdev_dev, "Unexpectedly got id %d seqnum %d. Total waiters: %d Top %d Waiters:\n", MSG_HDR_GET_ID(req_hdr), MSG_HDR_GET_SEQNUM(req_hdr), num_waiters, min_t(u32, num_waiters, 5)); for (i = 0; i < num_waiters && i < 5; i++) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(gmu_pdev_dev, " id %d seqnum %d\n", MSG_HDR_GET_ID(waiters[i]), MSG_HDR_GET_SEQNUM(waiters[i])); @@ -300,6 +300,7 @@ static void _get_syncobj_string(char *str, u32 max_size, struct hfi_syncobj_lega static void log_syncobj(struct gen7_gmu_device *gmu, struct adreno_context *drawctxt, struct hfi_submit_syncobj *cmd, u32 syncobj_read_idx) { + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; struct hfi_syncobj_legacy syncobj; char str[128]; @@ -311,7 +312,7 @@ static void log_syncobj(struct gen7_gmu_device *gmu, struct adreno_context *draw break; _get_syncobj_string(str, sizeof(str), &syncobj, i); - dev_err(&gmu->pdev->dev, "%s\n", str); + dev_err(GMU_PDEV_DEV(device), "%s\n", str); syncobj_read_idx = (syncobj_read_idx + (sizeof(syncobj) >> 2)) % hdr->queue_size; } } @@ -359,7 +360,7 @@ static void find_timeout_syncobj(struct adreno_device *adreno_dev, u32 ctxt_id, } if (i == hdr->write_index) - dev_err(&gmu->pdev->dev, "Couldn't find unsignaled syncobj ctx:%d ts:%d\n", + dev_err(GMU_PDEV_DEV(device), "Couldn't find unsignaled syncobj ctx:%d ts:%d\n", ctxt_id, ts); kgsl_context_put(context); @@ -367,20 +368,19 @@ static void find_timeout_syncobj(struct adreno_device *adreno_dev, u32 ctxt_id, static void log_gpu_fault_legacy(struct adreno_device *adreno_dev) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - struct device *dev = &gmu->pdev->dev; + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad; switch (cmd->error) { case GMU_GPU_HW_HANG: - dev_crit_ratelimited(dev, "MISC: GPU hang detected\n"); + dev_crit_ratelimited(gmu_pdev_dev, "MISC: GPU hang detected\n"); break; case GMU_GPU_SW_HANG: - dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %u\n", + dev_crit_ratelimited(gmu_pdev_dev, "gpu timeout ctx %d ts %u\n", cmd->ctxt_id, cmd->ts); break; case GMU_CP_OPCODE_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP opcode error interrupt | opcode=0x%8.8x\n", gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_OPCODE_ERROR)); @@ -389,20 +389,20 @@ static void log_gpu_fault_legacy(struct adreno_device *adreno_dev) u32 status = gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_PROTECTED_ERROR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", status & (1 << 20) ? "READ" : "WRITE", status & 0x3FFFF, status); } break; case GMU_CP_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "CP Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP Illegal instruction error\n"); break; case GMU_CP_UCODE_ERROR: - dev_crit_ratelimited(dev, "CP ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP ucode error interrupt\n"); break; case GMU_CP_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP | Ringbuffer HW fault | status=0x%8.8x\n", gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_HW_FAULT)); @@ -420,16 +420,16 @@ static void log_gpu_fault_legacy(struct adreno_device *adreno_dev) next_rptr = get_payload_rb_key_legacy(adreno_dev, next, KEY_RB_RPTR); next_wptr = get_payload_rb_key_legacy(adreno_dev, next, KEY_RB_WPTR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n", cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr); } break; case GMU_CP_GPC_ERROR: - dev_crit_ratelimited(dev, "RBBM: GPC error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "RBBM: GPC error\n"); break; case GMU_CP_BV_OPCODE_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP BV opcode error | opcode=0x%8.8x\n", gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_BV_OPCODE_ERROR)); @@ -438,70 +438,70 @@ static void log_gpu_fault_legacy(struct adreno_device *adreno_dev) u32 status = gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_BV_PROTECTED_ERROR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP BV | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", status & (1 << 20) ? "READ" : "WRITE", status & 0x3FFFF, status); } break; case GMU_CP_BV_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP BV | Ringbuffer HW fault | status=0x%8.8x\n", gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_BV_HW_FAULT)); break; case GMU_CP_BV_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "CP BV Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP BV Illegal instruction error\n"); break; case GMU_CP_BV_UCODE_ERROR: - dev_crit_ratelimited(dev, "CP BV ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP BV ucode error interrupt\n"); break; case GMU_GPU_SW_FUSE_VIOLATION: - dev_crit_ratelimited(dev, "RBBM: SW Feature Fuse violation status=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "RBBM: SW Feature Fuse violation status=0x%8.8x\n", gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_SWFUSE_VIOLATION_FAULT)); break; case GMU_GPU_AQE0_OPCODE_ERROR: - dev_crit_ratelimited(dev, "AQE0 opcode error | opcode=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "AQE0 opcode error | opcode=0x%8.8x\n", gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE0_OPCODE_ERROR)); break; case GMU_GPU_AQE0_UCODE_ERROR: - dev_crit_ratelimited(dev, "AQE0 ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "AQE0 ucode error interrupt\n"); break; case GMU_GPU_AQE0_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, "AQE0 HW fault | status=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "AQE0 HW fault | status=0x%8.8x\n", gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE0_HW_FAULT)); break; case GMU_GPU_AQE0_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "AQE0 Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "AQE0 Illegal instruction error\n"); break; case GMU_GPU_AQE1_OPCODE_ERROR: - dev_crit_ratelimited(dev, "AQE1 opcode error | opcode=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "AQE1 opcode error | opcode=0x%8.8x\n", gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE1_OPCODE_ERROR)); break; case GMU_GPU_AQE1_UCODE_ERROR: - dev_crit_ratelimited(dev, "AQE1 ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "AQE1 ucode error interrupt\n"); break; case GMU_GPU_AQE1_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, "AQE1 HW fault | status=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "AQE1 HW fault | status=0x%8.8x\n", gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE1_HW_FAULT)); break; case GMU_GPU_AQE1_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "AQE1 Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "AQE1 Illegal instruction error\n"); break; case GMU_SYNCOBJ_TIMEOUT_ERROR: - dev_crit_ratelimited(dev, "syncobj timeout ctx %d ts %u\n", + dev_crit_ratelimited(gmu_pdev_dev, "syncobj timeout ctx %d ts %u\n", cmd->ctxt_id, cmd->ts); find_timeout_syncobj(adreno_dev, cmd->ctxt_id, cmd->ts); break; case GMU_CP_UNKNOWN_ERROR: fallthrough; default: - dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n", + dev_crit_ratelimited(gmu_pdev_dev, "Unknown GPU fault: %u\n", cmd->error); break; } @@ -568,24 +568,23 @@ static u32 get_payload_rb_key(struct adreno_device *adreno_dev, static bool log_gpu_fault(struct adreno_device *adreno_dev) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - struct device *dev = &gmu->pdev->dev; + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; /* Return false for non fatal errors */ - if (adreno_hwsched_log_nonfatal_gpu_fault(adreno_dev, dev, cmd->error)) + if (adreno_hwsched_log_nonfatal_gpu_fault(adreno_dev, gmu_pdev_dev, cmd->error)) return false; switch (cmd->error) { case GMU_GPU_HW_HANG: - dev_crit_ratelimited(dev, "MISC: GPU hang detected\n"); + dev_crit_ratelimited(gmu_pdev_dev, "MISC: GPU hang detected\n"); break; case GMU_GPU_SW_HANG: - dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %d\n", + dev_crit_ratelimited(gmu_pdev_dev, "gpu timeout ctx %d ts %d\n", cmd->gc.ctxt_id, cmd->gc.ts); break; case GMU_CP_OPCODE_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP opcode error interrupt | opcode=0x%8.8x\n", gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_OPCODE_ERROR)); @@ -594,20 +593,20 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) u32 status = gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_PROTECTED_ERROR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", status & (1 << 20) ? "READ" : "WRITE", status & 0x3FFFF, status); } break; case GMU_CP_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "CP Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP Illegal instruction error\n"); break; case GMU_CP_UCODE_ERROR: - dev_crit_ratelimited(dev, "CP ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP ucode error interrupt\n"); break; case GMU_CP_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP | Ringbuffer HW fault | status=0x%8.8x\n", gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_HW_FAULT)); @@ -625,16 +624,16 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) next_rptr = get_payload_rb_key(adreno_dev, next, KEY_RB_RPTR); next_wptr = get_payload_rb_key(adreno_dev, next, KEY_RB_WPTR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n", cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr); } break; case GMU_CP_GPC_ERROR: - dev_crit_ratelimited(dev, "RBBM: GPC error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "RBBM: GPC error\n"); break; case GMU_CP_BV_OPCODE_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP BV opcode error | opcode=0x%8.8x\n", gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_BV_OPCODE_ERROR)); @@ -643,26 +642,26 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) u32 status = gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_BV_PROTECTED_ERROR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP BV | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", status & (1 << 20) ? "READ" : "WRITE", status & 0x3FFFF, status); } break; case GMU_CP_BV_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP BV | Ringbuffer HW fault | status=0x%8.8x\n", gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_BV_HW_FAULT)); break; case GMU_CP_BV_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "CP BV Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP BV Illegal instruction error\n"); break; case GMU_CP_BV_UCODE_ERROR: - dev_crit_ratelimited(dev, "CP BV ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP BV ucode error interrupt\n"); break; case GMU_CP_LPAC_OPCODE_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP LPAC opcode error | opcode=0x%8.8x\n", gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_LPAC_OPCODE_ERROR)); @@ -671,74 +670,74 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) u32 status = gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_LPAC_PROTECTED_ERROR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP LPAC | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", status & (1 << 20) ? "READ" : "WRITE", status & 0x3FFFF, status); } break; case GMU_CP_LPAC_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP LPAC | Ringbuffer HW fault | status=0x%8.8x\n", gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_LPAC_HW_FAULT)); break; case GMU_CP_LPAC_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "CP LPAC Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP LPAC Illegal instruction error\n"); break; case GMU_CP_LPAC_UCODE_ERROR: - dev_crit_ratelimited(dev, "CP LPAC ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP LPAC ucode error interrupt\n"); break; case GMU_GPU_LPAC_SW_HANG: - dev_crit_ratelimited(dev, "LPAC: gpu timeout ctx %d ts %d\n", + dev_crit_ratelimited(gmu_pdev_dev, "LPAC: gpu timeout ctx %d ts %d\n", cmd->lpac.ctxt_id, cmd->lpac.ts); break; case GMU_GPU_SW_FUSE_VIOLATION: - dev_crit_ratelimited(dev, "RBBM: SW Feature Fuse violation status=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "RBBM: SW Feature Fuse violation status=0x%8.8x\n", gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_SWFUSE_VIOLATION_FAULT)); break; case GMU_GPU_AQE0_OPCODE_ERROR: - dev_crit_ratelimited(dev, "AQE0 opcode error | opcode=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "AQE0 opcode error | opcode=0x%8.8x\n", gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE0_OPCODE_ERROR)); break; case GMU_GPU_AQE0_UCODE_ERROR: - dev_crit_ratelimited(dev, "AQE0 ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "AQE0 ucode error interrupt\n"); break; case GMU_GPU_AQE0_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, "AQE0 HW fault | status=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "AQE0 HW fault | status=0x%8.8x\n", gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE0_HW_FAULT)); break; case GMU_GPU_AQE0_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "AQE0 Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "AQE0 Illegal instruction error\n"); break; case GMU_GPU_AQE1_OPCODE_ERROR: - dev_crit_ratelimited(dev, "AQE1 opcode error | opcode=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "AQE1 opcode error | opcode=0x%8.8x\n", gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE1_OPCODE_ERROR)); break; case GMU_GPU_AQE1_UCODE_ERROR: - dev_crit_ratelimited(dev, "AQE1 ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "AQE1 ucode error interrupt\n"); break; case GMU_GPU_AQE1_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, "AQE1 HW fault | status=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "AQE1 HW fault | status=0x%8.8x\n", gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE1_HW_FAULT)); break; case GMU_GPU_AQE1_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "AQE1 Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "AQE1 Illegal instruction error\n"); break; case GMU_SYNCOBJ_TIMEOUT_ERROR: - dev_crit_ratelimited(dev, "syncobj timeout ctx %d ts %u\n", + dev_crit_ratelimited(gmu_pdev_dev, "syncobj timeout ctx %d ts %u\n", cmd->gc.ctxt_id, cmd->gc.ts); find_timeout_syncobj(adreno_dev, cmd->gc.ctxt_id, cmd->gc.ts); break; case GMU_CP_UNKNOWN_ERROR: fallthrough; default: - dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n", + dev_crit_ratelimited(gmu_pdev_dev, "Unknown GPU fault: %u\n", cmd->error); break; } @@ -818,9 +817,9 @@ static bool fence_is_queried(struct hfi_syncobj_query_cmd *cmd, u32 fence_index) static void set_fence_signal_bit(struct adreno_device *adreno_dev, struct hfi_syncobj_query_cmd *reply, struct dma_fence *fence, u32 fence_index) { + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); u32 index = GET_QUERIED_FENCE_INDEX(fence_index); u32 bit = GET_QUERIED_FENCE_BIT(fence_index); - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); u64 flags = ADRENO_HW_FENCE_SW_STATUS_PENDING; char name[KGSL_FENCE_NAME_LEN]; char value[32] = "unknown"; @@ -829,7 +828,7 @@ static void set_fence_signal_bit(struct adreno_device *adreno_dev, fence->ops->timeline_value_str(fence, value, sizeof(value)); if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "GMU is waiting for signaled fence(ctx:%llu seqno:%llu value:%s)\n", fence->context, fence->seqno, value); reply->queries[index].query_bitmask |= BIT(bit); @@ -907,7 +906,6 @@ static void gen7_process_syncobj_query_work(struct kthread_work *work) } if (missing) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct adreno_context *drawctxt = ADRENO_CONTEXT(context); struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; @@ -917,7 +915,8 @@ static void gen7_process_syncobj_query_work(struct kthread_work *work) * we have a problem. */ if (timestamp_cmp(cmd->sync_obj_ts, hdr->sync_obj_ts) > 0) { - dev_err(&gmu->pdev->dev, "Missing sync object ctx:%d ts:%d retired:%d\n", + dev_err(GMU_PDEV_DEV(device), + "Missing sync object ctx:%d ts:%d retired:%d\n", context->id, cmd->sync_obj_ts, hdr->sync_obj_ts); gmu_core_fault_snapshot(device, GMU_FAULT_HW_FENCE); gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); @@ -1188,7 +1187,6 @@ static void process_hw_fence_ack(struct adreno_device *adreno_dev, u32 received_ { struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct adreno_context *drawctxt = NULL; - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); spin_lock(&hfi->hw_fence.lock); @@ -1203,7 +1201,8 @@ static void process_hw_fence_ack(struct adreno_device *adreno_dev, u32 received_ /* The unack count should never be greater than MAX_HW_FENCE_UNACK_COUNT */ if (hfi->hw_fence.unack_count > MAX_HW_FENCE_UNACK_COUNT) - dev_err(&gmu->pdev->dev, "unexpected hardware fence unack count:%d\n", + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), + "unexpected hardware fence unack count:%d\n", hfi->hw_fence.unack_count); if (!test_bit(GEN7_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags) || @@ -1379,7 +1378,7 @@ static irqreturn_t gen7_hwsched_hfi_handler(int irq, void *data) /* make sure other CPUs see the update */ smp_wmb(); - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "GMU CM3 fault interrupt received\n"); gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); @@ -1389,7 +1388,7 @@ static irqreturn_t gen7_hwsched_hfi_handler(int irq, void *data) status &= GENMASK(31 - (oob_max - 1), 0); if (status & ~hfi->irq_mask) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "Unhandled HFI interrupts 0x%x\n", status & ~hfi->irq_mask); @@ -1401,25 +1400,26 @@ static irqreturn_t gen7_hwsched_hfi_handler(int irq, void *data) static int check_ack_failure(struct adreno_device *adreno_dev, struct pending_cmd *ack) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u64 ticks = gpudev->read_alwayson(adreno_dev); if (ack->results[2] != 0xffffffff) return 0; - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "ACK error: sender id %d seqnum %d\n", MSG_HDR_GET_ID(ack->sent_hdr), MSG_HDR_GET_SEQNUM(ack->sent_hdr)); - KGSL_GMU_CORE_FORCE_PANIC(KGSL_DEVICE(adreno_dev)->gmu_core.gf_panic, - gmu->pdev, ticks, GMU_FAULT_HFI_ACK); + KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, + GMU_PDEV(device), ticks, GMU_FAULT_HFI_ACK); return -EINVAL; } int gen7_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 size_bytes) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); u32 *cmd = data; @@ -1436,8 +1436,8 @@ int gen7_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 si if (rc) goto done; - rc = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &pending_ack, - gen7_hwsched_process_msgq); + rc = adreno_hwsched_wait_ack_completion(adreno_dev, + GMU_PDEV_DEV(device), &pending_ack, gen7_hwsched_process_msgq); if (rc) goto done; @@ -1583,6 +1583,7 @@ static struct hfi_mem_alloc_entry *get_mem_alloc_entry( struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct hfi_mem_alloc_entry *entry = lookup_mem_alloc_table(adreno_dev, desc); @@ -1597,13 +1598,13 @@ static struct hfi_mem_alloc_entry *get_mem_alloc_entry( return entry; if (desc->mem_kind >= HFI_MEMKIND_MAX) { - dev_err(&gmu->pdev->dev, "Invalid mem kind: %d\n", + dev_err(gmu_pdev_dev, "Invalid mem kind: %d\n", desc->mem_kind); return ERR_PTR(-EINVAL); } if (hfi->mem_alloc_entries == ARRAY_SIZE(hfi->mem_alloc_table)) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Reached max mem alloc entries\n"); return ERR_PTR(-ENOMEM); } @@ -1686,7 +1687,7 @@ static struct hfi_mem_alloc_entry *get_mem_alloc_entry( */ ret = gmu_import_buffer(adreno_dev, entry); if (ret) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "gpuaddr: 0x%llx size: %lld bytes lost\n", entry->md->gpuaddr, entry->md->size); memset(entry, 0, sizeof(*entry)); @@ -1797,6 +1798,7 @@ static int send_start_msg(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); u32 seqnum; int ret, rc = 0; struct hfi_start_cmd cmd; @@ -1820,7 +1822,7 @@ poll: rc = adreno_hwsched_poll_msg_queue_write_index(gmu->hfi.hfi_mem); if (rc) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Timed out processing MSG_START seqnum: %d\n", seqnum); gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); @@ -1829,7 +1831,7 @@ poll: rc = gen7_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)); if (rc <= 0) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "MSG_START: payload error: %d\n", rc); gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); @@ -1851,7 +1853,7 @@ poll: rc = check_ack_failure(adreno_dev, &pending_ack); goto done; } else { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "MSG_START: unexpected response id:%d, type:%d\n", MSG_HDR_GET_ID(rcvd[0]), MSG_HDR_GET_TYPE(rcvd[0])); @@ -2035,6 +2037,7 @@ static int gen7_hfi_send_perfcounter_feature_ctrl(struct adreno_device *adreno_d u32 gen7_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop) { + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct hfi_get_value_cmd cmd; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); @@ -2057,7 +2060,7 @@ u32 gen7_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop) if (rc) goto done; - rc = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &pending_ack, + rc = adreno_hwsched_wait_ack_completion(adreno_dev, gmu_pdev_dev, &pending_ack, gen7_hwsched_process_msgq); done: @@ -2083,7 +2086,6 @@ static void _context_queue_hw_fence_enable(struct adreno_device *adreno_dev) static int gen7_hfi_send_hw_fence_feature_ctrl(struct adreno_device *adreno_dev) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct adreno_hwsched *hwsched = &adreno_dev->hwsched; int ret; @@ -2092,7 +2094,8 @@ static int gen7_hfi_send_hw_fence_feature_ctrl(struct adreno_device *adreno_dev) ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_HW_FENCE, 1, 0); if (ret && (ret == -ENOENT)) { - dev_err(&gmu->pdev->dev, "GMU doesn't support HW_FENCE feature\n"); + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), + "GMU doesn't support HW_FENCE feature\n"); adreno_hwsched_deregister_hw_fence(adreno_dev); return 0; } @@ -2102,7 +2105,6 @@ static int gen7_hfi_send_hw_fence_feature_ctrl(struct adreno_device *adreno_dev) static int gen7_hfi_send_dms_feature_ctrl(struct adreno_device *adreno_dev) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); int ret; if (!test_bit(ADRENO_DEVICE_DMS, &adreno_dev->priv)) @@ -2110,7 +2112,8 @@ static int gen7_hfi_send_dms_feature_ctrl(struct adreno_device *adreno_dev) ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_DMS, 1, 0); if (ret == -ENOENT) { - dev_err(&gmu->pdev->dev, "GMU doesn't support DMS feature\n"); + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), + "GMU doesn't support DMS feature\n"); clear_bit(ADRENO_DEVICE_DMS, &adreno_dev->priv); adreno_dev->dms_enabled = false; return 0; @@ -2184,6 +2187,7 @@ static int gen7_hwsched_hfi_send_warmboot_cmd(struct adreno_device *adreno_dev, static int gen7_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev, struct pending_cmd *ret_cmd) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct hfi_warmboot_scratch_cmd cmd = { @@ -2210,8 +2214,8 @@ static int gen7_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev, if (ret) goto err; - ret = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, ret_cmd, - gen7_hwsched_process_msgq); + ret = adreno_hwsched_wait_ack_completion(adreno_dev, + GMU_PDEV_DEV(device), ret_cmd, gen7_hwsched_process_msgq); err: del_waiter(hfi, ret_cmd); @@ -2619,6 +2623,7 @@ int gen7_hwsched_lpac_cp_init(struct adreno_device *adreno_dev) static int hfi_f2h_main(void *arg) { struct adreno_device *adreno_dev = arg; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); @@ -2637,7 +2642,7 @@ static int hfi_f2h_main(void *arg) gen7_hwsched_process_msgq(adreno_dev); gmu_core_process_trace_data(KGSL_DEVICE(adreno_dev), - &gmu->pdev->dev, &gmu->trace); + GMU_PDEV_DEV(device), &gmu->trace); gen7_hwsched_process_dbgq(adreno_dev, true); } @@ -2650,7 +2655,7 @@ static void gen7_hwsched_hw_fence_timeout(struct work_struct *work) struct gen7_hwsched_device *gen7_hw_dev = container_of(hfi, struct gen7_hwsched_device, hwsched_hfi); struct adreno_device *adreno_dev = &gen7_hw_dev->gen7_dev.adreno_dev; - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 unack_count, ts; struct adreno_context *drawctxt = NULL; bool fault; @@ -2674,12 +2679,13 @@ static void gen7_hwsched_hw_fence_timeout(struct work_struct *work) if (!fault) return; - dev_err(&gmu->pdev->dev, "Hardware fence unack(%d) timeout\n", unack_count); + dev_err(GMU_PDEV_DEV(device), + "Hardware fence unack(%d) timeout\n", unack_count); if (drawctxt) { struct kgsl_process_private *proc_priv = drawctxt->base.proc_priv; - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Hardware fence got deferred for ctx:%d ts:%d pid:%d proc:%s\n", drawctxt->base.id, ts, pid_nr(proc_priv->pid), proc_priv->comm); } @@ -2695,10 +2701,11 @@ static void gen7_hwsched_hw_fence_timer(struct timer_list *t) int gen7_hwsched_hfi_probe(struct adreno_device *adreno_dev) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); - gmu->hfi.irq = kgsl_request_irq(gmu->pdev, "hfi", + gmu->hfi.irq = kgsl_request_irq(GMU_PDEV(device), "hfi", gen7_hwsched_hfi_handler, adreno_dev); if (gmu->hfi.irq < 0) @@ -2862,7 +2869,6 @@ static int send_context_pointers(struct adreno_device *adreno_dev, static int hfi_context_register(struct adreno_device *adreno_dev, struct kgsl_context *context) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; @@ -2871,7 +2877,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, ret = send_context_register(adreno_dev, context); if (ret) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unable to register context %u: %d\n", context->id, ret); @@ -2883,7 +2889,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, ret = send_context_pointers(adreno_dev, context); if (ret) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unable to register context %u pointers: %d\n", context->id, ret); @@ -3048,7 +3054,6 @@ int gen7_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_d struct adreno_context *drawctxt) { struct adreno_hw_fence_entry *entry, *tmp; - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); int ret = 0; spin_lock(&drawctxt->lock); @@ -3056,7 +3061,7 @@ int gen7_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_d struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; if (timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "detached ctx:%d has unsignaled fence ts:%d retired:%d\n", drawctxt->base.id, (u32)entry->cmd.ts, hdr->out_fence_ts); ret = -EINVAL; @@ -3113,7 +3118,6 @@ static int check_detached_context_hardware_fences(struct adreno_device *adreno_d { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_hw_fence_entry *entry, *tmp; - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); int ret = 0; /* We don't need the drawctxt lock because this context has been detached */ @@ -3121,7 +3125,7 @@ static int check_detached_context_hardware_fences(struct adreno_device *adreno_d struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; if ((timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "detached ctx:%d has unsignaled fence ts:%d retired:%d\n", drawctxt->base.id, (u32)entry->cmd.ts, hdr->out_fence_ts); ret = -EINVAL; @@ -3182,7 +3186,6 @@ static inline int setup_hw_fence_info_cmd(struct adreno_device *adreno_dev, int gen7_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, struct adreno_hw_fence_entry *entry, u64 flags) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 seqnum; @@ -3211,8 +3214,9 @@ int gen7_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, spin_unlock(&hfi->hw_fence.lock); if (!ret) - ret = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &hw_fence_ack, - gen7_hwsched_process_msgq); + ret = adreno_hwsched_wait_ack_completion(adreno_dev, + GMU_PDEV_DEV(device), &hw_fence_ack, + gen7_hwsched_process_msgq); memset(&hw_fence_ack, 0x0, sizeof(hw_fence_ack)); return ret; @@ -3352,7 +3356,6 @@ void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev, /* Only allow a single log in a second */ static DEFINE_RATELIMIT_STATE(_rs, HZ, 1); struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); u32 retired = 0; int ret = 0; @@ -3407,7 +3410,7 @@ void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev, ret = _send_hw_fence_no_ack(adreno_dev, entry); if (ret) { if (__ratelimit(&_rs)) - dev_err(&gmu->pdev->dev, "hw fence for ctx:%d ts:%d ret:%d may not be destroyed\n", + dev_err(GMU_PDEV_DEV(device), "hw fence for ctx:%d ts:%d ret:%d may not be destroyed\n", kfence->context_id, kfence->timestamp, ret); adreno_hwsched_remove_hw_fence_entry(adreno_dev, entry); kgsl_hw_fence_destroy(kfence); @@ -3785,6 +3788,7 @@ static void trigger_context_unregister_fault(struct adreno_device *adreno_dev, static int send_context_unregister_hfi(struct adreno_device *adreno_dev, struct kgsl_context *context, u32 ts) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct adreno_context *drawctxt = ADRENO_CONTEXT(context); @@ -3830,7 +3834,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, } ret = adreno_hwsched_ctxt_unregister_wait_completion(adreno_dev, - &gmu->pdev->dev, &pending_ack, gen7_hwsched_process_msgq, &cmd); + GMU_PDEV_DEV(device), &pending_ack, gen7_hwsched_process_msgq, &cmd); if (ret) { trigger_context_unregister_fault(adreno_dev, drawctxt); goto done; diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index a13ee31f07..f8b080a2bf 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -289,7 +289,8 @@ int gen8_gmu_device_start(struct adreno_device *adreno_dev) if (gmu_core_timed_poll_check(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, BIT(8), 100, GENMASK(8, 0))) { - dev_err(&gmu->pdev->dev, "GMU failed to come out of reset\n"); + dev_err(GMU_PDEV_DEV(device), + "GMU failed to come out of reset\n"); gmu_core_fault_snapshot(device, GMU_FAULT_DEVICE_START); return -ETIMEDOUT; } @@ -303,14 +304,13 @@ int gen8_gmu_device_start(struct adreno_device *adreno_dev) */ int gen8_gmu_hfi_start(struct adreno_device *adreno_dev) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); gmu_core_regwrite(device, GEN8_GMUCX_HFI_CTRL_INIT, 1); if (gmu_core_timed_poll_check(device, GEN8_GMUCX_HFI_CTRL_STATUS, BIT(0), 100, BIT(0))) { - dev_err(&gmu->pdev->dev, "GMU HFI init failed\n"); + dev_err(GMU_PDEV_DEV(device), "GMU HFI init failed\n"); gmu_core_fault_snapshot(device, GMU_FAULT_HFI_INIT); return -ETIMEDOUT; } @@ -322,7 +322,6 @@ int gen8_rscc_wakeup_sequence(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - struct device *dev = &gmu->pdev->dev; /* Skip wakeup sequence if we didn't do the sleep sequence */ if (!test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags)) @@ -336,13 +335,14 @@ int gen8_rscc_wakeup_sequence(struct adreno_device *adreno_dev) if (gmu_core_timed_poll_check(device, GEN8_GMUAO_RSCC_CONTROL_ACK, BIT(1), 100, BIT(1))) { - dev_err(dev, "Failed to do GPU RSC power on\n"); + dev_err(GMU_PDEV_DEV(device), "Failed to do GPU RSC power on\n"); return -ETIMEDOUT; } if (gen8_timed_poll_check_rscc(gmu, GEN8_RSCC_SEQ_BUSY_DRV0, 0x0, 100, UINT_MAX)) { - dev_err(dev, "GPU RSC sequence stuck in waking up GPU\n"); + dev_err(GMU_PDEV_DEV(device), + "GPU RSC sequence stuck in waking up GPU\n"); return -ETIMEDOUT; } @@ -378,7 +378,7 @@ int gen8_rscc_sleep_sequence(struct adreno_device *adreno_dev) ret = gen8_timed_poll_check_rscc(gmu, GEN8_GPU_RSCC_RSC_STATUS0_DRV0, BIT(16), 100, BIT(16)); if (ret) { - dev_err(&gmu->pdev->dev, "GPU RSC power off fail\n"); + dev_err(GMU_PDEV_DEV(device), "GPU RSC power off fail\n"); return -ETIMEDOUT; } @@ -433,6 +433,7 @@ static void load_tcm(struct adreno_device *adreno_dev, const u8 *src, int gen8_gmu_load_fw(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); const u8 *fw = (const u8 *)gmu->fw_image->data; while (fw < gmu->fw_image->data + gmu->fw_image->size) { @@ -449,7 +450,7 @@ int gen8_gmu_load_fw(struct adreno_device *adreno_dev) id = find_vma_block(gmu, blk->addr, blk->size); if (id < 0) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Unknown block in GMU FW addr:0x%x size:0x%x\n", blk->addr, blk->size); return -EINVAL; @@ -468,7 +469,7 @@ int gen8_gmu_load_fw(struct adreno_device *adreno_dev) find_gmu_memdesc(gmu, blk->addr, blk->size); if (!md) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "No backing memory for GMU FW block addr:0x%x size:0x%x\n", blk->addr, blk->size); return -EINVAL; @@ -527,7 +528,7 @@ int gen8_gmu_oob_set(struct kgsl_device *device, return 0; if (req >= oob_boot_slumber) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unsupported OOB request %s\n", oob_to_str(req)); return -EINVAL; @@ -565,7 +566,7 @@ void gen8_gmu_oob_clear(struct kgsl_device *device, return; if (req >= oob_boot_slumber) { - dev_err(&gmu->pdev->dev, "Unsupported OOB clear %s\n", + dev_err(GMU_PDEV_DEV(device), "Unsupported OOB clear %s\n", oob_to_str(req)); return; } @@ -656,6 +657,7 @@ static u32 gen8_rscc_tcsm_drv0_status_reglist[] = { static int gen8_complete_rpmh_votes(struct gen8_gmu_device *gmu, u32 timeout) { + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); int i, ret = 0; for (i = 0; i < ARRAY_SIZE(gen8_rscc_tcsm_drv0_status_reglist); i++) @@ -664,7 +666,7 @@ static int gen8_complete_rpmh_votes(struct gen8_gmu_device *gmu, BIT(0)); if (ret) - dev_err(&gmu->pdev->dev, "RPMH votes timedout: %d\n", ret); + dev_err(GMU_PDEV_DEV(device), "RPMH votes timedout: %d\n", ret); return ret; } @@ -703,6 +705,7 @@ static const char *idle_level_name(int level) int gen8_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); u32 reg, reg1, reg2, reg3, reg4; @@ -748,15 +751,19 @@ int gen8_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) gmu_core_regread(device, GEN8_GMUAO_RBBM_INT_UNMASKED_STATUS_SHADOW, ®3); gmu_core_regread(device, GEN8_GMUCX_PWR_COL_KEEPALIVE, ®4); - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "----------------------[ GMU error ]----------------------\n"); - dev_err(&gmu->pdev->dev, "Timeout waiting for lowest idle level %s\n", + dev_err(gmu_pdev_dev, + "Timeout waiting for lowest idle level %s\n", idle_level_name(gmu->idle_level)); - dev_err(&gmu->pdev->dev, "Start: %llx (absolute ticks)\n", ts1); - dev_err(&gmu->pdev->dev, "Poll: %llx (ticks relative to start)\n", ts2-ts1); - dev_err(&gmu->pdev->dev, "RPMH_POWER_STATE=%x GFX_PWR_CLK_STATUS=%x\n", reg, reg1); - dev_err(&gmu->pdev->dev, "CX_BUSY_STATUS=%x\n", reg2); - dev_err(&gmu->pdev->dev, "RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n", reg3, reg4); + dev_err(gmu_pdev_dev, "Start: %llx (absolute ticks)\n", ts1); + dev_err(gmu_pdev_dev, + "Poll: %llx (ticks relative to start)\n", ts2-ts1); + dev_err(gmu_pdev_dev, + "RPMH_POWER_STATE=%x GFX_PWR_CLK_STATUS=%x\n", reg, reg1); + dev_err(gmu_pdev_dev, "CX_BUSY_STATUS=%x\n", reg2); + dev_err(gmu_pdev_dev, + "RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n", reg3, reg4); /* Access GX registers only when GX is ON */ if (is_on(reg1)) { @@ -767,8 +774,10 @@ int gen8_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) kgsl_regread(device, GEN8_CP_CP2GMU_STATUS, ®2); kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_CNTL, ®3); - dev_err(&gmu->pdev->dev, "GEN8_CP_PIPE_STATUS_PIPE BV:%x BR:%x\n", reg, reg1); - dev_err(&gmu->pdev->dev, "CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n", reg2, reg3); + dev_err(gmu_pdev_dev, + "GEN8_CP_PIPE_STATUS_PIPE BV:%x BR:%x\n", reg, reg1); + dev_err(gmu_pdev_dev, + "CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n", reg2, reg3); } WARN_ON(1); @@ -781,7 +790,6 @@ int gen8_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) int gen8_gmu_wait_for_idle(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); u32 status2; u64 ts1; @@ -791,7 +799,7 @@ int gen8_gmu_wait_for_idle(struct adreno_device *adreno_dev) 0, 100, CXGXCPUBUSYIGNAHB)) { gmu_core_regread(device, GEN8_GMUAO_GPU_CX_BUSY_STATUS2, &status2); - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "GMU not idling: status2=0x%x %llx %llx\n", status2, ts1, gpudev->read_alwayson(adreno_dev)); @@ -822,7 +830,7 @@ int gen8_gmu_version_info(struct adreno_device *adreno_dev) /* Check if gmu fw version on device is compatible with kgsl driver */ if (gmu->ver.core < gen8_core->gmu_fw_version) { - dev_err_once(&gmu->pdev->dev, + dev_err_once(GMU_PDEV_DEV(device), "GMU FW version 0x%x error (expected 0x%x)\n", gmu->ver.core, gen8_core->gmu_fw_version); return -EINVAL; @@ -1023,9 +1031,11 @@ static int _map_gmu_dynamic(struct gen8_gmu_device *gmu, struct kgsl_memdesc *md, u32 addr, u32 vma_id, int attrs, u32 align) { - int ret; + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); struct gmu_vma_entry *vma = &gmu->vma[vma_id]; struct gmu_vma_node *vma_node = NULL; + int ret; u32 size = ALIGN(md->size, hfi_get_gmu_sz_alignment(align)); spin_lock(&vma->lock); @@ -1037,7 +1047,7 @@ static int _map_gmu_dynamic(struct gen8_gmu_device *gmu, addr = find_unmapped_va(vma, size, hfi_get_gmu_va_alignment(align)); if (addr == 0) { spin_unlock(&vma->lock); - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Insufficient VA space size: %x\n", size); return -ENOMEM; } @@ -1046,7 +1056,7 @@ static int _map_gmu_dynamic(struct gen8_gmu_device *gmu, ret = insert_va(vma, addr, size); spin_unlock(&vma->lock); if (ret < 0) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Could not insert va: %x size %x\n", addr, size); return ret; } @@ -1058,7 +1068,7 @@ static int _map_gmu_dynamic(struct gen8_gmu_device *gmu, } /* Failed to map to GMU */ - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", addr, md->size, ret); @@ -1076,8 +1086,9 @@ static int _map_gmu_static(struct gen8_gmu_device *gmu, struct kgsl_memdesc *md, u32 addr, u32 vma_id, int attrs, u32 align) { - int ret; + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); struct gmu_vma_entry *vma = &gmu->vma[vma_id]; + int ret; u32 size = ALIGN(md->size, hfi_get_gmu_sz_alignment(align)); if (!addr) @@ -1085,7 +1096,7 @@ static int _map_gmu_static(struct gen8_gmu_device *gmu, ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); if (ret) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", addr, md->size, ret); return ret; @@ -1157,7 +1168,7 @@ struct kgsl_memdesc *gen8_reserve_gmu_kernel_block_fixed(struct gen8_gmu_device md = &gmu->gmu_globals[gmu->global_entries]; - ret = kgsl_memdesc_init_fixed(device, gmu->pdev, resource, md); + ret = kgsl_memdesc_init_fixed(device, GMU_PDEV(device), resource, md); if (ret) return ERR_PTR(ret); @@ -1170,7 +1181,7 @@ struct kgsl_memdesc *gen8_reserve_gmu_kernel_block_fixed(struct gen8_gmu_device if (!ret) gmu->global_entries++; else { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", addr, md->size, ret); memset(md, 0x0, sizeof(*md)); @@ -1227,12 +1238,12 @@ free: static int gen8_gmu_process_prealloc(struct gen8_gmu_device *gmu, struct gmu_block_header *blk) { + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); struct kgsl_memdesc *md; - int id = find_vma_block(gmu, blk->addr, blk->value); if (id < 0) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Invalid prealloc block addr: 0x%x value:%d\n", blk->addr, blk->value); return id; @@ -1254,6 +1265,7 @@ static int gen8_gmu_process_prealloc(struct gen8_gmu_device *gmu, int gen8_gmu_parse_fw(struct adreno_device *adreno_dev) { + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); struct gmu_block_header *blk; @@ -1273,10 +1285,10 @@ int gen8_gmu_parse_fw(struct adreno_device *adreno_dev) return -EINVAL; ret = request_firmware(&gmu->fw_image, gmufw_name, - &gmu->pdev->dev); + gmu_pdev_dev); if (ret) { - dev_err(&gmu->pdev->dev, "request_firmware (%s) failed: %d\n", - gmufw_name, ret); + dev_err(gmu_pdev_dev, + "request_firmware (%s) failed: %d\n", gmufw_name, ret); return ret; } } @@ -1290,7 +1302,7 @@ int gen8_gmu_parse_fw(struct adreno_device *adreno_dev) blk = (struct gmu_block_header *)&gmu->fw_image->data[offset]; if (offset + sizeof(*blk) > gmu->fw_image->size) { - dev_err(&gmu->pdev->dev, "Invalid FW Block\n"); + dev_err(gmu_pdev_dev, "Invalid FW Block\n"); return -EINVAL; } @@ -1418,7 +1430,7 @@ static void gen8_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) gen8_rscc_sleep_sequence(adreno_dev); if (gen8_gmu_gx_is_on(adreno_dev)) - dev_err(&gmu->pdev->dev, "gx is stuck on\n"); + dev_err(GMU_PDEV_DEV(device), "gx is stuck on\n"); } /* @@ -1475,7 +1487,7 @@ void gen8_gmu_suspend(struct adreno_device *adreno_dev) gen8_rdpm_cx_freq_update(gmu, 0); - dev_err(&gmu->pdev->dev, "Suspended GMU\n"); + dev_err(GMU_PDEV_DEV(device), "Suspended GMU\n"); kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE); } @@ -1520,7 +1532,7 @@ static int gen8_gmu_dcvs_set(struct adreno_device *adreno_dev, ret = gen8_hfi_send_generic_req(adreno_dev, &req, sizeof(req)); if (ret) { - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "Failed to set GPU perf idx %u, bw idx %u\n", req.freq, req.bw); @@ -1590,7 +1602,7 @@ void gen8_gmu_send_nmi(struct kgsl_device *device, bool force, * to save cm3 state to DDR. */ if (gen8_gmu_gx_is_on(adreno_dev) && adreno_smmu_is_stalled(adreno_dev)) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Skipping NMI because SMMU is stalled\n"); goto done; } @@ -1634,13 +1646,12 @@ nmi: udelay(200); done: - KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, gmu->pdev, ticks, gf_policy); + KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, + GMU_PDEV(device), ticks, gf_policy); } static void gen8_gmu_cooperative_reset(struct kgsl_device *device) { - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); u32 result; gmu_core_regwrite(device, GEN8_GMUCX_WDOG_CTRL, 0); @@ -1655,7 +1666,7 @@ static void gen8_gmu_cooperative_reset(struct kgsl_device *device) return; gmu_core_regread(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, &result); - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "GMU cooperative reset timed out 0x%x\n", result); /* * If we dont get a snapshot ready from GMU, trigger NMI @@ -1665,19 +1676,18 @@ static void gen8_gmu_cooperative_reset(struct kgsl_device *device) gmu_core_regread(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, &result); if ((result & 0x800) != 0x800) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "GMU cooperative reset NMI timed out 0x%x\n", result); } static int gen8_gmu_wait_for_active_transition(struct kgsl_device *device) { u32 reg; - struct gen8_gmu_device *gmu = to_gen8_gmu(ADRENO_DEVICE(device)); if (gmu_core_timed_poll_check(device, GEN8_GMUCX_RPMH_POWER_STATE, GPU_HW_ACTIVE, 100, GENMASK(3, 0))) { gmu_core_regread(device, GEN8_GMUCX_RPMH_POWER_STATE, ®); - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "GMU failed to move to ACTIVE state, Current state: 0x%x\n", reg); @@ -1694,7 +1704,6 @@ static bool gen8_gmu_scales_bandwidth(struct kgsl_device *device) void gen8_gmu_handle_watchdog(struct adreno_device *adreno_dev) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 mask; @@ -1705,7 +1714,7 @@ void gen8_gmu_handle_watchdog(struct adreno_device *adreno_dev) gen8_gmu_send_nmi(device, false, GMU_FAULT_PANIC_NONE); - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "GMU watchdog expired interrupt received\n"); } @@ -1713,7 +1722,6 @@ static irqreturn_t gen8_gmu_irq_handler(int irq, void *data) { struct kgsl_device *device = data; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); const struct gen8_gpudev *gen8_gpudev = to_gen8_gpudev(ADRENO_GPU_DEVICE(adreno_dev)); u32 status = 0; @@ -1722,7 +1730,7 @@ static irqreturn_t gen8_gmu_irq_handler(int irq, void *data) gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_CLR, status); if (status & GMU_INT_HOST_AHB_BUS_ERR) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "AHB bus error interrupt received\n"); if (status & GMU_INT_WDOG_BITE) @@ -1733,12 +1741,12 @@ static irqreturn_t gen8_gmu_irq_handler(int irq, void *data) gmu_core_regread(device, GEN8_GMUAO_AHB_FENCE_STATUS, &fence_status); - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "FENCE error interrupt received %x\n", fence_status); } if (status & ~GMU_AO_INT_MASK) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "Unhandled GMU interrupts 0x%lx\n", status & ~GMU_AO_INT_MASK); @@ -1747,6 +1755,7 @@ static irqreturn_t gen8_gmu_irq_handler(int irq, void *data) void gen8_gmu_aop_send_acd_state(struct gen8_gmu_device *gmu, bool flag) { + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); char msg_buf[36]; u32 size; int ret; @@ -1759,7 +1768,7 @@ void gen8_gmu_aop_send_acd_state(struct gen8_gmu_device *gmu, bool flag) ret = qmp_send(gmu->qmp, msg_buf, ALIGN((size + 1), SZ_4)); if (ret < 0) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "AOP qmp send message failed: %d\n", ret); } @@ -1773,8 +1782,8 @@ int gen8_gmu_clock_set_rate(struct adreno_device *adreno_dev, u32 req_freq) ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", req_freq); if (ret) { - dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n", - req_freq, ret); + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), + "GMU clock:%d set failed:%d\n", req_freq, ret); return ret; } @@ -1798,13 +1807,13 @@ int gen8_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level) ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "hub_clk", adreno_dev->gmu_hub_clk_freq); if (ret && ret != -ENODEV) { - dev_err(&gmu->pdev->dev, "Unable to set the HUB clock\n"); + dev_err(GMU_PDEV_DEV(device), "Unable to set the HUB clock\n"); return ret; } ret = clk_bulk_prepare_enable(gmu->num_clks, gmu->clks); if (ret) { - dev_err(&gmu->pdev->dev, "Cannot enable GMU clocks\n"); + dev_err(GMU_PDEV_DEV(device), "Cannot enable GMU clocks\n"); return ret; } @@ -2170,6 +2179,7 @@ u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab) static void gen8_free_gmu_globals(struct gen8_gmu_device *gmu) { + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); int i; for (i = 0; i < gmu->global_entries && i < ARRAY_SIZE(gmu->gmu_globals); i++) { @@ -2187,7 +2197,7 @@ static void gen8_free_gmu_globals(struct gen8_gmu_device *gmu) } if (gmu->domain) { - iommu_detach_device(gmu->domain, &gmu->pdev->dev); + iommu_detach_device(gmu->domain, GMU_PDEV_DEV(device)); iommu_domain_free(gmu->domain); gmu->domain = NULL; } @@ -2198,7 +2208,9 @@ static void gen8_free_gmu_globals(struct gen8_gmu_device *gmu) static int gen8_gmu_qmp_aoss_init(struct adreno_device *adreno_dev, struct gen8_gmu_device *gmu) { - gmu->qmp = qmp_get(&gmu->pdev->dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); + + gmu->qmp = qmp_get(gmu_pdev_dev); if (IS_ERR(gmu->qmp)) return PTR_ERR(gmu->qmp); @@ -2245,38 +2257,40 @@ static void gen8_gmu_acd_probe(struct kgsl_device *device, ret = gen8_gmu_qmp_aoss_init(adreno_dev, gmu); if (ret) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "AOP qmp init failed: %d\n", ret); } static int gen8_gmu_reg_probe(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); int ret; - ret = kgsl_regmap_add_region(&device->regmap, gmu->pdev, "gmu", NULL, NULL); + ret = kgsl_regmap_add_region(&device->regmap, + GMU_PDEV(device), "gmu", NULL, NULL); if (ret) - dev_err(&gmu->pdev->dev, "Unable to map the GMU registers\n"); + dev_err(GMU_PDEV_DEV(device), "Unable to map the GMU registers\n"); /* * gmu_ao_blk_dec1 and gmu_ao_blk_dec2 are contiguous and contained within the gmu region * mapped above. gmu_ao_blk_dec0 is not within the gmu region and is mapped separately. */ - kgsl_regmap_add_region(&device->regmap, gmu->pdev, "gmu_ao_blk_dec0", NULL, NULL); + kgsl_regmap_add_region(&device->regmap, GMU_PDEV(device), "gmu_ao_blk_dec0", NULL, NULL); return ret; } static int gen8_gmu_clk_probe(struct adreno_device *adreno_dev) { + struct platform_device *gmu_pdev = GMU_PDEV(KGSL_DEVICE(adreno_dev)); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); int ret, i; int tbl_size; int num_freqs; int offset; - ret = devm_clk_bulk_get_all(&gmu->pdev->dev, &gmu->clks); + ret = devm_clk_bulk_get_all(gmu_pdev_dev, &gmu->clks); if (ret < 0) return ret; @@ -2298,7 +2312,7 @@ static int gen8_gmu_clk_probe(struct adreno_device *adreno_dev) gmu->num_clks = ret; /* Read the optional list of GMU frequencies */ - if (of_get_property(gmu->pdev->dev.of_node, + if (of_get_property(gmu_pdev->dev.of_node, "qcom,gmu-freq-table", &tbl_size) == NULL) goto default_gmu_freq; @@ -2308,11 +2322,11 @@ static int gen8_gmu_clk_probe(struct adreno_device *adreno_dev) for (i = 0; i < num_freqs; i++) { offset = i * 2; - ret = of_property_read_u32_index(gmu->pdev->dev.of_node, + ret = of_property_read_u32_index(gmu_pdev->dev.of_node, "qcom,gmu-freq-table", offset, &gmu->freqs[i]); if (ret) goto default_gmu_freq; - ret = of_property_read_u32_index(gmu->pdev->dev.of_node, + ret = of_property_read_u32_index(gmu_pdev->dev.of_node, "qcom,gmu-freq-table", offset + 1, &gmu->vlvls[i]); if (ret) goto default_gmu_freq; @@ -2389,11 +2403,13 @@ static int gen8_gmu_iommu_fault_handler(struct iommu_domain *domain, static int gen8_gmu_iommu_init(struct gen8_gmu_device *gmu) { + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); int ret; gmu->domain = iommu_domain_alloc(&platform_bus_type); if (gmu->domain == NULL) { - dev_err(&gmu->pdev->dev, "Unable to allocate GMU IOMMU domain\n"); + dev_err(gmu_pdev_dev, "Unable to allocate GMU IOMMU domain\n"); return -ENODEV; } @@ -2404,14 +2420,14 @@ static int gen8_gmu_iommu_init(struct gen8_gmu_device *gmu) */ qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL); - ret = iommu_attach_device(gmu->domain, &gmu->pdev->dev); + ret = iommu_attach_device(gmu->domain, gmu_pdev_dev); if (!ret) { iommu_set_fault_handler(gmu->domain, gen8_gmu_iommu_fault_handler, gmu); return 0; } - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Unable to attach GMU IOMMU domain: %d\n", ret); iommu_domain_free(gmu->domain); gmu->domain = NULL; @@ -2434,11 +2450,12 @@ int gen8_gmu_probe(struct kgsl_device *device, struct resource *res; int ret, i; - gmu->pdev = pdev; + device->gmu_core.pdev = pdev; - dma_set_coherent_mask(&gmu->pdev->dev, DMA_BIT_MASK(64)); - gmu->pdev->dev.dma_mask = &gmu->pdev->dev.coherent_dma_mask; - set_dma_ops(&gmu->pdev->dev, NULL); + dma_set_coherent_mask(&device->gmu_core.pdev->dev, DMA_BIT_MASK(64)); + device->gmu_core.pdev->dev.dma_mask = + &device->gmu_core.pdev->dev.coherent_dma_mask; + set_dma_ops(GMU_PDEV_DEV(device), NULL); res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "rscc"); @@ -2446,7 +2463,7 @@ int gen8_gmu_probe(struct kgsl_device *device, gmu->rscc_virt = devm_ioremap(&device->pdev->dev, res->start, resource_size(res)); if (!gmu->rscc_virt) { - dev_err(&gmu->pdev->dev, "rscc ioremap failed\n"); + dev_err(GMU_PDEV_DEV(device), "rscc ioremap failed\n"); return -ENOMEM; } } @@ -2520,12 +2537,12 @@ int gen8_gmu_probe(struct kgsl_device *device, (void) kobject_init_and_add(&gmu->log_kobj, &log_kobj_type, &dev->kobj, "log"); (void) kobject_init_and_add(&gmu->stats_kobj, &stats_kobj_type, &dev->kobj, "stats"); - of_property_read_u32(gmu->pdev->dev.of_node, "qcom,gmu-perf-ddr-bw", - &gmu->perf_ddr_bw); + of_property_read_u32(GMU_PDEV(device)->dev.of_node, + "qcom,gmu-perf-ddr-bw", &gmu->perf_ddr_bw); spin_lock_init(&gmu->hfi.cmdq_lock); - gmu->irq = kgsl_request_irq(gmu->pdev, "gmu", + gmu->irq = kgsl_request_irq(GMU_PDEV(device), "gmu", gen8_gmu_irq_handler, device); if (gmu->irq >= 0) @@ -3209,10 +3226,11 @@ int gen8_gmu_reset(struct adreno_device *adreno_dev) int gen8_gmu_hfi_probe(struct adreno_device *adreno_dev) { + struct platform_device *gmu_pdev = GMU_PDEV(KGSL_DEVICE(adreno_dev)); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct gen8_hfi *hfi = &gmu->hfi; - hfi->irq = kgsl_request_irq(gmu->pdev, "hfi", + hfi->irq = kgsl_request_irq(gmu_pdev, "hfi", gen8_hfi_irq_handler, KGSL_DEVICE(adreno_dev)); return hfi->irq < 0 ? hfi->irq : 0; diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index d4831f3c4d..d44b805ff1 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -39,7 +39,6 @@ struct gen8_gmu_device { u32 pwr_dev; u32 hfi; } ver; - struct platform_device *pdev; int irq; const struct firmware *fw_image; struct kgsl_memdesc *dump_mem; diff --git a/adreno_gen8_gmu_snapshot.c b/adreno_gen8_gmu_snapshot.c index 7c3bd4e536..d5dfeb5b64 100644 --- a/adreno_gen8_gmu_snapshot.c +++ b/adreno_gen8_gmu_snapshot.c @@ -55,7 +55,8 @@ static size_t gen8_gmu_snapshot_itcm(struct kgsl_device *device, struct gen8_gmu_device *gmu = (struct gen8_gmu_device *)priv; if (!gmu->itcm_shadow) { - dev_err(&gmu->pdev->dev, "No memory allocated for ITCM shadow capture\n"); + dev_err(GMU_PDEV_DEV(device), + "No memory allocated for ITCM shadow capture\n"); return 0; } diff --git a/adreno_gen8_hfi.c b/adreno_gen8_hfi.c index 80478f4ed4..456a28fcd9 100644 --- a/adreno_gen8_hfi.c +++ b/adreno_gen8_hfi.c @@ -35,6 +35,7 @@ struct gen8_hfi *to_gen8_hfi(struct adreno_device *adreno_dev) int gen8_hfi_queue_read(struct gen8_gmu_device *gmu, u32 queue_idx, u32 *output, u32 max_size) { + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; struct hfi_queue_table *tbl = mem_addr->hostptr; struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; @@ -58,7 +59,7 @@ int gen8_hfi_queue_read(struct gen8_gmu_device *gmu, u32 queue_idx, size = MSG_HDR_GET_SIZE(msg_hdr); if (size > (max_size >> 2)) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "HFI message too big: hdr:0x%x rd idx=%d\n", msg_hdr, hdr->read_index); result = -EMSGSIZE; @@ -75,7 +76,7 @@ int gen8_hfi_queue_read(struct gen8_gmu_device *gmu, u32 queue_idx, result = size; } else { /* In case FW messed up */ - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Read index %d greater than queue size %d\n", hdr->read_index, hdr->queue_size); result = -ENODATA; @@ -251,7 +252,7 @@ int gen8_receive_ack_cmd(struct gen8_gmu_device *gmu, void *rcvd, } /* Didn't find the sender, list the waiter */ - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n", req_hdr, ret_cmd->sent_hdr); gmu_core_fault_snapshot(device, GMU_FAULT_HFI_RECIVE_ACK); @@ -264,7 +265,6 @@ static int poll_gmu_reg(struct adreno_device *adreno_dev, u32 mask, u32 timeout_ms) { u32 val; - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms); bool nmi = false; @@ -296,7 +296,7 @@ static int poll_gmu_reg(struct adreno_device *adreno_dev, if ((val & mask) == expected_val) return 0; - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Reg poll %s: offset 0x%x, want 0x%x, got 0x%x\n", nmi ? "abort" : "timeout", offsetdwords, expected_val, val & mask); @@ -328,7 +328,7 @@ static int gen8_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT); if (rc) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n", cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd)); gmu_core_fault_snapshot(device, GMU_FAULT_SEND_CMD_WAIT_INLINE); @@ -356,10 +356,9 @@ int gen8_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 s return rc; if (ret_cmd.results[2]) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "HFI ACK failure: Req=0x%8.8X, Result=0x%8.8X\n", ret_cmd.results[1], ret_cmd.results[2]); @@ -410,7 +409,7 @@ int gen8_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd, rc = -EINVAL; break; default: - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "HFI ACK: Req=0x%8.8X, Result=0x%8.8X Error:0x%8.8X\n", ret_cmd->results[1], ret_cmd->results[2], ret_cmd->results[3]); rc = -EINVAL; @@ -424,7 +423,6 @@ int gen8_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd, int gen8_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, u32 feature, u32 enable, u32 data) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct pending_cmd ret_cmd = {0}; struct hfi_feature_ctrl_cmd cmd = { .feature = feature, @@ -439,14 +437,14 @@ int gen8_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, ret = gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd)); if (ret < 0) - dev_err(&gmu->pdev->dev, "Unable to %s feature %s (%d)\n", + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), + "Unable to %s feature %s (%d)\n", enable ? "enable" : "disable", hfi_feature_to_string(feature), feature); return ret; } int gen8_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subtype) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct pending_cmd ret_cmd = {0}; struct hfi_get_value_cmd cmd = { .type = type, @@ -460,7 +458,7 @@ int gen8_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subt ret = gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd)); if (ret < 0) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "Unable to get HFI Value type: %d, subtype: %d, error = %d\n", type, subtype, ret); @@ -470,7 +468,6 @@ int gen8_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subt int gen8_hfi_send_set_value(struct adreno_device *adreno_dev, u32 type, u32 subtype, u32 data) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct pending_cmd ret_cmd = {0}; struct hfi_set_value_cmd cmd = { .type = type, @@ -485,7 +482,7 @@ int gen8_hfi_send_set_value(struct adreno_device *adreno_dev, ret = gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd)); if (ret < 0) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "Unable to set HFI Value %d, %d to %d, error = %d\n", type, subtype, data, ret); return ret; @@ -498,26 +495,28 @@ void adreno_gen8_receive_err_req(struct gen8_gmu_device *gmu, void *rcvd) u64 ticks = gpudev->read_alwayson(ADRENO_DEVICE(device)); struct hfi_err_cmd *cmd = rcvd; - dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n", + dev_err(GMU_PDEV_DEV(device), "HFI Error Received: %d %d %.16s\n", ((cmd->error_code >> 16) & 0xffff), (cmd->error_code & 0xffff), (char *) cmd->data); KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, - gmu->pdev, ticks, GMU_FAULT_F2H_MSG_ERR); + GMU_PDEV(device), ticks, GMU_FAULT_F2H_MSG_ERR); } void adreno_gen8_receive_debug_req(struct gen8_gmu_device *gmu, void *rcvd) { + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); struct hfi_debug_cmd *cmd = rcvd; - dev_dbg(&gmu->pdev->dev, "HFI Debug Received: %d %d %d\n", + dev_dbg(GMU_PDEV_DEV(device), "HFI Debug Received: %d %d %d\n", cmd->type, cmd->timestamp, cmd->data); } int gen8_hfi_process_queue(struct gen8_gmu_device *gmu, u32 queue_idx, struct pending_cmd *ret_cmd) { + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); u32 rcvd[MAX_RCVD_SIZE]; while (gen8_hfi_queue_read(gmu, queue_idx, rcvd, sizeof(rcvd)) > 0) { @@ -539,7 +538,7 @@ int gen8_hfi_process_queue(struct gen8_gmu_device *gmu, adreno_gen8_receive_debug_req(gmu, rcvd); break; default: /* No Reply */ - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "HFI request %d not supported\n", MSG_HDR_GET_ID(rcvd[0])); break; @@ -815,7 +814,7 @@ irqreturn_t gen8_hfi_irq_handler(int irq, void *data) if (status & HFI_IRQ_DBGQ_MASK) gen8_hfi_process_queue(gmu, HFI_DBG_ID, NULL); if (status & HFI_IRQ_CM3_FAULT_MASK) { - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "GMU CM3 fault interrupt received\n"); atomic_set(&gmu->cm3_fault, 1); @@ -823,7 +822,7 @@ irqreturn_t gen8_hfi_irq_handler(int irq, void *data) smp_wmb(); } if (status & ~HFI_IRQ_MASK) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "Unhandled HFI interrupts 0x%lx\n", status & ~HFI_IRQ_MASK); diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 1c78e36bf1..b3ff9adeea 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -189,7 +189,8 @@ static void _get_hw_fence_entries(struct adreno_device *adreno_dev) if (of_property_read_u32(node, "qcom,hw-fence-table-entries", &shadow_num_entries)) { - dev_err(&gmu->pdev->dev, "qcom,hw-fence-table-entries property not found\n"); + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), + "qcom,hw-fence-table-entries property not found\n"); shadow_num_entries = 8192; } @@ -201,7 +202,7 @@ static void _get_hw_fence_entries(struct adreno_device *adreno_dev) static void gen8_hwsched_soccp_vote_init(struct adreno_device *adreno_dev) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags)) @@ -210,7 +211,7 @@ static void gen8_hwsched_soccp_vote_init(struct adreno_device *adreno_dev) if (hw_hfi->hw_fence.soccp_rproc) return; - hw_hfi->hw_fence.soccp_rproc = gmu_core_soccp_vote_init(&gmu->pdev->dev); + hw_hfi->hw_fence.soccp_rproc = gmu_core_soccp_vote_init(gmu_pdev_dev); if (!IS_ERR(hw_hfi->hw_fence.soccp_rproc)) return; @@ -220,13 +221,14 @@ static void gen8_hwsched_soccp_vote_init(struct adreno_device *adreno_dev) void gen8_hwsched_soccp_vote(struct adreno_device *adreno_dev, bool pwr_on) { + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags)) return; - if (!gmu_core_soccp_vote(&gmu->pdev->dev, &gmu->flags, hw_hfi->hw_fence.soccp_rproc, + if (!gmu_core_soccp_vote(gmu_pdev_dev, &gmu->flags, hw_hfi->hw_fence.soccp_rproc, pwr_on)) return; @@ -240,7 +242,8 @@ void gen8_hwsched_soccp_vote(struct adreno_device *adreno_dev, bool pwr_on) * soccp power vote failed, these hardware fences may never be signaled. Hence, log them * for debug purposes. */ - adreno_hwsched_log_destroy_pending_hw_fences(adreno_dev, &gmu->pdev->dev); + adreno_hwsched_log_destroy_pending_hw_fences(adreno_dev, + gmu_pdev_dev); adreno_mark_for_coldboot(adreno_dev); adreno_hwsched_deregister_hw_fence(adreno_dev); @@ -1030,7 +1033,6 @@ static void check_hw_fence_unack_count(struct adreno_device *adreno_dev) { struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); u32 unack_count; if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags)) @@ -1045,7 +1047,8 @@ static void check_hw_fence_unack_count(struct adreno_device *adreno_dev) if (!unack_count) return; - dev_err(&gmu->pdev->dev, "hardware fence unack_count(%d) isn't zero before SLUMBER\n", + dev_err(GMU_PDEV_DEV(device), + "hardware fence unack_count(%d) isn't zero before SLUMBER\n", unack_count); gmu_core_fault_snapshot(device, GMU_FAULT_HW_FENCE); } @@ -1159,7 +1162,7 @@ static int gen8_hwsched_dcvs_set(struct adreno_device *adreno_dev, /* Do not set to XO and lower GPU clock vote from GMU */ if ((gpu_pwrlevel != INVALID_DCVS_IDX) && (gpu_pwrlevel >= table->gpu_level_num - 1)) { - dev_err(&gmu->pdev->dev, "Invalid gpu dcvs request: %d\n", + dev_err(GMU_PDEV_DEV(device), "Invalid gpu dcvs request: %d\n", gpu_pwrlevel); return -EINVAL; } @@ -1183,7 +1186,7 @@ static int gen8_hwsched_dcvs_set(struct adreno_device *adreno_dev, ret = gen8_hfi_send_cmd_async(adreno_dev, &req, sizeof(req)); if (ret) { - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "Failed to set GPU perf idx %u, bw idx %u\n", req.freq, req.bw); @@ -1329,7 +1332,6 @@ static void gen8_hwsched_pm_resume(struct adreno_device *adreno_dev) void gen8_hwsched_handle_watchdog(struct adreno_device *adreno_dev) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 mask; @@ -1341,7 +1343,7 @@ void gen8_hwsched_handle_watchdog(struct adreno_device *adreno_dev) gen8_gmu_send_nmi(device, false, GMU_FAULT_PANIC_NONE); - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "GMU watchdog expired interrupt received\n"); gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index d67be92e7d..586190eb9d 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -79,7 +79,7 @@ static void del_waiter(struct gen8_hwsched_hfi *hfi, struct pending_cmd *ack) static void gen8_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); struct pending_cmd *cmd = NULL; u32 waiters[64], num_waiters = 0, i; @@ -89,7 +89,7 @@ static void gen8_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) u32 size_bytes = MSG_HDR_GET_SIZE(hdr) << 2; if (size_bytes > sizeof(cmd->results)) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(gmu_pdev_dev, "Ack result too big: %d Truncating to: %ld\n", size_bytes, sizeof(cmd->results)); @@ -112,13 +112,13 @@ static void gen8_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) read_unlock(&hfi->msglock); /* Didn't find the sender, list the waiter */ - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(gmu_pdev_dev, "Unexpectedly got id %d seqnum %d. Total waiters: %d Top %d Waiters:\n", MSG_HDR_GET_ID(req_hdr), MSG_HDR_GET_SEQNUM(req_hdr), num_waiters, min_t(u32, num_waiters, 5)); for (i = 0; i < num_waiters && i < 5; i++) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(gmu_pdev_dev, " id %d seqnum %d\n", MSG_HDR_GET_ID(waiters[i]), MSG_HDR_GET_SEQNUM(waiters[i])); @@ -219,6 +219,7 @@ static void _get_syncobj_string(char *str, u32 max_size, struct hfi_syncobj *syn static void log_syncobj(struct gen8_gmu_device *gmu, struct adreno_context *drawctxt, struct hfi_submit_syncobj *cmd, u32 syncobj_read_idx) { + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; struct hfi_syncobj syncobj; char str[128]; @@ -230,7 +231,7 @@ static void log_syncobj(struct gen8_gmu_device *gmu, struct adreno_context *draw break; _get_syncobj_string(str, sizeof(str), &syncobj, i); - dev_err(&gmu->pdev->dev, "%s\n", str); + dev_err(GMU_PDEV_DEV(device), "%s\n", str); syncobj_read_idx = (syncobj_read_idx + (sizeof(syncobj) >> 2)) % hdr->queue_size; } } @@ -278,7 +279,7 @@ static void find_timeout_syncobj(struct adreno_device *adreno_dev, u32 ctxt_id, } if (i == hdr->write_index) - dev_err(&gmu->pdev->dev, "Couldn't find unsignaled syncobj ctx:%d ts:%d\n", + dev_err(GMU_PDEV_DEV(device), "Couldn't find unsignaled syncobj ctx:%d ts:%d\n", ctxt_id, ts); kgsl_context_put(context); @@ -345,24 +346,23 @@ static u32 get_payload_rb_key(struct adreno_device *adreno_dev, static bool log_gpu_fault(struct adreno_device *adreno_dev) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); - struct device *dev = &gmu->pdev->dev; + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; /* Return false for non fatal errors */ - if (adreno_hwsched_log_nonfatal_gpu_fault(adreno_dev, dev, cmd->error)) + if (adreno_hwsched_log_nonfatal_gpu_fault(adreno_dev, gmu_pdev_dev, cmd->error)) return false; switch (cmd->error) { case GMU_GPU_HW_HANG: - dev_crit_ratelimited(dev, "MISC: GPU hang detected\n"); + dev_crit_ratelimited(gmu_pdev_dev, "MISC: GPU hang detected\n"); break; case GMU_GPU_SW_HANG: - dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %d\n", + dev_crit_ratelimited(gmu_pdev_dev, "gpu timeout ctx %d ts %d\n", cmd->gc.ctxt_id, cmd->gc.ts); break; case GMU_CP_OPCODE_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP opcode error interrupt | opcode=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_OPCODE_ERROR)); @@ -371,20 +371,20 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_PROTECTED_ERROR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", status & (1 << 20) ? "READ" : "WRITE", status & 0x3FFFF, status); } break; case GMU_CP_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "CP Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP Illegal instruction error\n"); break; case GMU_CP_UCODE_ERROR: - dev_crit_ratelimited(dev, "CP ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP ucode error interrupt\n"); break; case GMU_CP_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP | Ringbuffer HW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_HW_FAULT)); @@ -402,16 +402,16 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) next_rptr = get_payload_rb_key(adreno_dev, next, KEY_RB_RPTR); next_wptr = get_payload_rb_key(adreno_dev, next, KEY_RB_WPTR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n", cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr); } break; case GMU_CP_GPC_ERROR: - dev_crit_ratelimited(dev, "RBBM: GPC error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "RBBM: GPC error\n"); break; case GMU_CP_BV_OPCODE_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP BV opcode error | opcode=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_BV_OPCODE_ERROR)); @@ -420,26 +420,26 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_BV_PROTECTED_ERROR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP BV | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", status & (1 << 20) ? "READ" : "WRITE", status & 0x3FFFF, status); } break; case GMU_CP_BV_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP BV | Ringbuffer HW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_BV_HW_FAULT)); break; case GMU_CP_BV_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "CP BV Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP BV Illegal instruction error\n"); break; case GMU_CP_BV_UCODE_ERROR: - dev_crit_ratelimited(dev, "CP BV ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP BV ucode error interrupt\n"); break; case GMU_CP_LPAC_OPCODE_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP LPAC opcode error | opcode=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_LPAC_OPCODE_ERROR)); @@ -448,152 +448,152 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_LPAC_PROTECTED_ERROR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP LPAC | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", status & (1 << 20) ? "READ" : "WRITE", status & 0x3FFFF, status); } break; case GMU_CP_LPAC_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP LPAC | Ringbuffer HW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_LPAC_HW_FAULT)); break; case GMU_CP_LPAC_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "CP LPAC Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP LPAC Illegal instruction error\n"); break; case GMU_CP_LPAC_UCODE_ERROR: - dev_crit_ratelimited(dev, "CP LPAC ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP LPAC ucode error interrupt\n"); break; case GMU_GPU_LPAC_SW_HANG: - dev_crit_ratelimited(dev, "LPAC: gpu timeout ctx %d ts %d\n", + dev_crit_ratelimited(gmu_pdev_dev, "LPAC: gpu timeout ctx %d ts %d\n", cmd->lpac.ctxt_id, cmd->lpac.ts); break; case GMU_GPU_SW_FUSE_VIOLATION: - dev_crit_ratelimited(dev, "RBBM: SW Feature Fuse violation status=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "RBBM: SW Feature Fuse violation status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_SWFUSE_VIOLATION_FAULT)); break; case GMU_GPU_AQE0_OPCODE_ERROR: - dev_crit_ratelimited(dev, "AQE0 opcode error | opcode=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "AQE0 opcode error | opcode=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE0_OPCODE_ERROR)); break; case GMU_GPU_AQE0_UCODE_ERROR: - dev_crit_ratelimited(dev, "AQE0 ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "AQE0 ucode error interrupt\n"); break; case GMU_GPU_AQE0_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, "AQE0 HW fault | status=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "AQE0 HW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE0_HW_FAULT)); break; case GMU_GPU_AQE0_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "AQE0 Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "AQE0 Illegal instruction error\n"); break; case GMU_GPU_AQE1_OPCODE_ERROR: - dev_crit_ratelimited(dev, "AQE1 opcode error | opcode=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "AQE1 opcode error | opcode=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE1_OPCODE_ERROR)); break; case GMU_GPU_AQE1_UCODE_ERROR: - dev_crit_ratelimited(dev, "AQE1 ucode error interrupt\n"); + dev_crit_ratelimited(gmu_pdev_dev, "AQE1 ucode error interrupt\n"); break; case GMU_GPU_AQE1_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, "AQE1 HW fault | status=0x%8.8x\n", + dev_crit_ratelimited(gmu_pdev_dev, "AQE1 HW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_AQE1_HW_FAULT)); break; case GMU_GPU_AQE1_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "AQE1 Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "AQE1 Illegal instruction error\n"); break; case GMU_SYNCOBJ_TIMEOUT_ERROR: - dev_crit_ratelimited(dev, "syncobj timeout ctx %d ts %u\n", + dev_crit_ratelimited(gmu_pdev_dev, "syncobj timeout ctx %d ts %u\n", cmd->gc.ctxt_id, cmd->gc.ts); find_timeout_syncobj(adreno_dev, cmd->gc.ctxt_id, cmd->gc.ts); break; case GMU_CP_DDEBR_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP DDE BR | Ringbuffer HW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_DDEBR_HW_FAULT)); break; case GMU_CP_DDEBR_OPCODE_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP DDE BR opcode error | opcode=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_DDEBR_OPCODE_ERROR)); break; case GMU_CP_DDEBR_UCODE_ERROR: - dev_crit_ratelimited(dev, "CP DDE BR ucode error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP DDE BR ucode error\n"); break; case GMU_CP_DDEBR_PROTECTED_ERROR: { u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_DDEBR_PROTECTED_ERROR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP DDE BR | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", status & (1 << 20) ? "READ" : "WRITE", status & 0x3FFFF, status); } break; case GMU_CP_DDEBR_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "CP DDEBR Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP DDEBR Illegal instruction error\n"); break; case GMU_CP_DDEBV_HW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP DDE BV | Ringbuffer HW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_DDEBV_HW_FAULT)); break; case GMU_CP_DDEBV_OPCODE_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP DDE BV opcode error | opcode=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_DDEBV_OPCODE_ERROR)); break; case GMU_CP_DDEBV_UCODE_ERROR: - dev_crit_ratelimited(dev, "CP DDE BV ucode error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP DDE BV ucode error\n"); break; case GMU_CP_DDEBV_PROTECTED_ERROR: { u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_DDEBV_PROTECTED_ERROR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP DDE BV | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", status & (1 << 20) ? "READ" : "WRITE", status & 0x3FFFF, status); } break; case GMU_CP_DDEBV_ILLEGAL_INST_ERROR: - dev_crit_ratelimited(dev, "CP DDE BV Illegal instruction error\n"); + dev_crit_ratelimited(gmu_pdev_dev, "CP DDE BV Illegal instruction error\n"); break; case GMU_CP_BR_SW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP BR | SW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_BR_SW_FAULT)); break; case GMU_CP_BV_SW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP BV | SW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_BV_SW_FAULT)); break; case GMU_CP_LPAC_SW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP LPAC | SW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_LPAC_SW_FAULT)); break; case GMU_CP_AQE0_SW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP AQE0 | SW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_AQE0_SW_FAULT)); break; case GMU_CP_AQE1_SW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP AQE1 | SW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_AQE1_SW_FAULT)); @@ -602,7 +602,7 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_AQE0_PROTECTED_ERROR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP AQE0 | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", status & (1 << 20) ? "READ" : "WRITE", status & 0x3FFFF, status); @@ -612,20 +612,20 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_AQE1_PROTECTED_ERROR); - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP AQE1 | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", status & (1 << 20) ? "READ" : "WRITE", status & 0x3FFFF, status); } break; case GMU_CP_DDEBR_SW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP DDE BR | SW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_DDEBR_SW_FAULT)); break; case GMU_CP_DDEBV_SW_FAULT_ERROR: - dev_crit_ratelimited(dev, + dev_crit_ratelimited(gmu_pdev_dev, "CP DDE BV | SW fault | status=0x%8.8x\n", gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, KEY_CP_DDEBV_SW_FAULT)); @@ -633,7 +633,7 @@ static bool log_gpu_fault(struct adreno_device *adreno_dev) case GMU_CP_UNKNOWN_ERROR: fallthrough; default: - dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n", + dev_crit_ratelimited(gmu_pdev_dev, "Unknown GPU fault: %u\n", cmd->error); break; } @@ -707,7 +707,6 @@ static void set_fence_signal_bit(struct adreno_device *adreno_dev, { u32 index = GET_QUERIED_FENCE_INDEX(fence_index); u32 bit = GET_QUERIED_FENCE_BIT(fence_index); - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); u64 flags = ADRENO_HW_FENCE_SW_STATUS_PENDING; char name[KGSL_FENCE_NAME_LEN]; char value[32] = "unknown"; @@ -716,7 +715,7 @@ static void set_fence_signal_bit(struct adreno_device *adreno_dev, fence->ops->timeline_value_str(fence, value, sizeof(value)); if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "GMU is waiting for signaled fence(ctx:%llu seqno:%llu value:%s)\n", fence->context, fence->seqno, value); reply->queries[index].query_bitmask |= BIT(bit); @@ -793,7 +792,6 @@ static void gen8_process_syncobj_query_work(struct kthread_work *work) } if (missing) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct adreno_context *drawctxt = ADRENO_CONTEXT(context); struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; @@ -803,7 +801,8 @@ static void gen8_process_syncobj_query_work(struct kthread_work *work) * we have a problem. */ if (timestamp_cmp(cmd->sync_obj_ts, hdr->sync_obj_ts) > 0) { - dev_err(&gmu->pdev->dev, "Missing sync object ctx:%d ts:%d retired:%d\n", + dev_err(GMU_PDEV_DEV(device), + "Missing sync object ctx:%d ts:%d retired:%d\n", context->id, cmd->sync_obj_ts, hdr->sync_obj_ts); gmu_core_fault_snapshot(device, GMU_FAULT_HW_FENCE); gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); @@ -1073,7 +1072,6 @@ static void process_hw_fence_ack(struct adreno_device *adreno_dev, u32 received_ { struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); struct adreno_context *drawctxt = NULL; - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); spin_lock(&hfi->hw_fence.lock); @@ -1089,7 +1087,8 @@ static void process_hw_fence_ack(struct adreno_device *adreno_dev, u32 received_ /* The unack count should never be greater than MAX_HW_FENCE_UNACK_COUNT */ if (hfi->hw_fence.unack_count > MAX_HW_FENCE_UNACK_COUNT) - dev_err(&gmu->pdev->dev, "unexpected hardware fence unack count:%d\n", + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), + "unexpected hardware fence unack count:%d\n", hfi->hw_fence.unack_count); if (!test_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags) || @@ -1265,7 +1264,7 @@ static irqreturn_t gen8_hwsched_hfi_handler(int irq, void *data) /* make sure other CPUs see the update */ smp_wmb(); - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "GMU CM3 fault interrupt received\n"); gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); @@ -1275,7 +1274,7 @@ static irqreturn_t gen8_hwsched_hfi_handler(int irq, void *data) status &= GENMASK(31 - (oob_max - 1), 0); if (status & ~hfi->irq_mask) - dev_err_ratelimited(&gmu->pdev->dev, + dev_err_ratelimited(GMU_PDEV_DEV(device), "Unhandled HFI interrupts 0x%x\n", status & ~hfi->irq_mask); @@ -1287,25 +1286,26 @@ static irqreturn_t gen8_hwsched_hfi_handler(int irq, void *data) static int check_ack_failure(struct adreno_device *adreno_dev, struct pending_cmd *ack) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u64 ticks = gpudev->read_alwayson(adreno_dev); if (ack->results[2] != 0xffffffff) return 0; - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "ACK error: sender id %d seqnum %d\n", MSG_HDR_GET_ID(ack->sent_hdr), MSG_HDR_GET_SEQNUM(ack->sent_hdr)); - KGSL_GMU_CORE_FORCE_PANIC(KGSL_DEVICE(adreno_dev)->gmu_core.gf_panic, - gmu->pdev, ticks, GMU_FAULT_HFI_ACK); + KGSL_GMU_CORE_FORCE_PANIC(device->gmu_core.gf_panic, + GMU_PDEV(device), ticks, GMU_FAULT_HFI_ACK); return -EINVAL; } int gen8_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 size_bytes) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); u32 *cmd = data; @@ -1322,8 +1322,8 @@ int gen8_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 si if (rc) goto done; - rc = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &pending_ack, - gen8_hwsched_process_msgq); + rc = adreno_hwsched_wait_ack_completion(adreno_dev, + GMU_PDEV_DEV(device), &pending_ack, gen8_hwsched_process_msgq); if (rc) goto done; @@ -1450,6 +1450,7 @@ static struct hfi_mem_alloc_entry *get_mem_alloc_entry( struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); struct hfi_mem_alloc_entry *entry = lookup_mem_alloc_table(adreno_dev, desc); @@ -1464,13 +1465,13 @@ static struct hfi_mem_alloc_entry *get_mem_alloc_entry( return entry; if (desc->mem_kind >= HFI_MEMKIND_MAX) { - dev_err(&gmu->pdev->dev, "Invalid mem kind: %d\n", + dev_err(gmu_pdev_dev, "Invalid mem kind: %d\n", desc->mem_kind); return ERR_PTR(-EINVAL); } if (hfi->mem_alloc_entries == ARRAY_SIZE(hfi->mem_alloc_table)) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Reached max mem alloc entries\n"); return ERR_PTR(-ENOMEM); } @@ -1553,7 +1554,7 @@ static struct hfi_mem_alloc_entry *get_mem_alloc_entry( */ ret = gmu_import_buffer(adreno_dev, entry); if (ret) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "gpuaddr: 0x%llx size: %lld bytes lost\n", entry->md->gpuaddr, entry->md->size); memset(entry, 0, sizeof(*entry)); @@ -1664,6 +1665,7 @@ static int send_start_msg(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); int ret, rc = 0; struct hfi_start_cmd cmd; u32 seqnum, rcvd[MAX_RCVD_SIZE]; @@ -1686,7 +1688,7 @@ poll: rc = adreno_hwsched_poll_msg_queue_write_index(gmu->hfi.hfi_mem); if (rc) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Timed out processing MSG_START seqnum: %d\n", seqnum); gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); @@ -1695,7 +1697,7 @@ poll: rc = gen8_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)); if (rc <= 0) { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "MSG_START: payload error: %d\n", rc); gmu_core_fault_snapshot(device, GMU_FAULT_H2F_MSG_START); @@ -1717,7 +1719,7 @@ poll: rc = check_ack_failure(adreno_dev, &pending_ack); goto done; } else { - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "MSG_START: unexpected response id:%d, type:%d\n", MSG_HDR_GET_ID(rcvd[0]), MSG_HDR_GET_TYPE(rcvd[0])); @@ -1902,6 +1904,7 @@ static int gen8_hfi_send_perfcounter_feature_ctrl(struct adreno_device *adreno_d u32 gen8_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop) { struct hfi_get_value_cmd cmd; + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); struct pending_cmd pending_ack; @@ -1923,8 +1926,8 @@ u32 gen8_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop) if (rc) goto done; - rc = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &pending_ack, - gen8_hwsched_process_msgq); + rc = adreno_hwsched_wait_ack_completion(adreno_dev, + gmu_pdev_dev, &pending_ack, gen8_hwsched_process_msgq); done: del_waiter(hfi, &pending_ack); @@ -1937,8 +1940,8 @@ done: static int gen8_hfi_send_hw_fence_feature_ctrl(struct adreno_device *adreno_dev) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); int ret; if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags)) @@ -1946,7 +1949,8 @@ static int gen8_hfi_send_hw_fence_feature_ctrl(struct adreno_device *adreno_dev) ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_HW_FENCE, 1, 0); if (ret && (ret == -ENOENT)) { - dev_err(&gmu->pdev->dev, "GMU doesn't support HW_FENCE feature\n"); + dev_err(gmu_pdev_dev, + "GMU doesn't support HW_FENCE feature\n"); adreno_hwsched_deregister_hw_fence(adreno_dev); return 0; } @@ -2016,6 +2020,7 @@ static int gen8_hwsched_hfi_send_warmboot_cmd(struct adreno_device *adreno_dev, static int gen8_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev, struct pending_cmd *ret_cmd) { + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); struct hfi_warmboot_scratch_cmd cmd = { @@ -2041,8 +2046,8 @@ static int gen8_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev, if (ret) goto err; - ret = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, ret_cmd, - gen8_hwsched_process_msgq); + ret = adreno_hwsched_wait_ack_completion(adreno_dev, + gmu_pdev_dev, ret_cmd, gen8_hwsched_process_msgq); err: del_waiter(hfi, ret_cmd); @@ -2058,6 +2063,7 @@ static void print_warmboot_gpu_error(struct device *dev, struct pending_cmd *ret static int gen8_hwsched_warmboot_gpu(struct adreno_device *adreno_dev) { + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct pending_cmd ret_cmd = {0}; int ret = 0; @@ -2091,13 +2097,13 @@ static int gen8_hwsched_warmboot_gpu(struct adreno_device *adreno_dev) gen8_spin_idle_debug_lpac(adreno_dev, "LPAC CP initialization failed to idle\n"); } else { - print_warmboot_gpu_error(&gmu->pdev->dev, &ret_cmd); + print_warmboot_gpu_error(gmu_pdev_dev, &ret_cmd); } ret = -EINVAL; break; } default: - print_warmboot_gpu_error(&gmu->pdev->dev, &ret_cmd); + print_warmboot_gpu_error(gmu_pdev_dev, &ret_cmd); ret = -EINVAL; break; } @@ -2463,6 +2469,7 @@ int gen8_hwsched_lpac_cp_init(struct adreno_device *adreno_dev) static int hfi_f2h_main(void *arg) { struct adreno_device *adreno_dev = arg; + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); @@ -2481,7 +2488,7 @@ static int hfi_f2h_main(void *arg) gen8_hwsched_process_msgq(adreno_dev); gmu_core_process_trace_data(KGSL_DEVICE(adreno_dev), - &gmu->pdev->dev, &gmu->trace); + gmu_pdev_dev, &gmu->trace); gen8_hwsched_process_dbgq(adreno_dev, true); } @@ -2494,7 +2501,7 @@ static void gen8_hwsched_hw_fence_timeout(struct work_struct *work) struct gen8_hwsched_device *gen8_hw_dev = container_of(hfi, struct gen8_hwsched_device, hwsched_hfi); struct adreno_device *adreno_dev = &gen8_hw_dev->gen8_dev.adreno_dev; - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); u32 unack_count, ts; struct adreno_context *drawctxt = NULL; bool fault; @@ -2518,12 +2525,12 @@ static void gen8_hwsched_hw_fence_timeout(struct work_struct *work) if (!fault) return; - dev_err(&gmu->pdev->dev, "Hardware fence unack(%d) timeout\n", unack_count); + dev_err(gmu_pdev_dev, "Hardware fence unack(%d) timeout\n", unack_count); if (drawctxt) { struct kgsl_process_private *proc_priv = drawctxt->base.proc_priv; - dev_err(&gmu->pdev->dev, + dev_err(gmu_pdev_dev, "Hardware fence got deferred for ctx:%d ts:%d pid:%d proc:%s\n", drawctxt->base.id, ts, pid_nr(proc_priv->pid), proc_priv->comm); } @@ -2540,9 +2547,10 @@ static void gen8_hwsched_hw_fence_timer(struct timer_list *t) int gen8_hwsched_hfi_probe(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); + struct platform_device *gmu_pdev = GMU_PDEV(KGSL_DEVICE(adreno_dev)); struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); - gmu->hfi.irq = kgsl_request_irq(gmu->pdev, "hfi", + gmu->hfi.irq = kgsl_request_irq(gmu_pdev, "hfi", gen8_hwsched_hfi_handler, adreno_dev); if (gmu->hfi.irq < 0) @@ -2684,7 +2692,6 @@ static int send_context_pointers(struct adreno_device *adreno_dev, static int hfi_context_register(struct adreno_device *adreno_dev, struct kgsl_context *context) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; @@ -2693,7 +2700,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, ret = send_context_register(adreno_dev, context); if (ret) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unable to register context %u: %d\n", context->id, ret); @@ -2705,7 +2712,7 @@ static int hfi_context_register(struct adreno_device *adreno_dev, ret = send_context_pointers(adreno_dev, context); if (ret) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "Unable to register context %u pointers: %d\n", context->id, ret); @@ -2866,7 +2873,6 @@ int gen8_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_d struct adreno_context *drawctxt) { struct adreno_hw_fence_entry *entry, *tmp; - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); int ret = 0; spin_lock(&drawctxt->lock); @@ -2874,7 +2880,7 @@ int gen8_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_d struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; if (timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)), "detached ctx:%d has unsignaled fence ts:%d retired:%d\n", drawctxt->base.id, (u32)entry->cmd.ts, hdr->out_fence_ts); ret = -EINVAL; @@ -2956,7 +2962,6 @@ static int check_detached_context_hardware_fences(struct adreno_device *adreno_d { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_hw_fence_entry *entry, *tmp; - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); int ret = 0; /* We don't need the drawctxt lock because this context has been detached */ @@ -2964,7 +2969,7 @@ static int check_detached_context_hardware_fences(struct adreno_device *adreno_d struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr; if ((timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)) { - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "detached ctx:%d has unsignaled fence ts:%d retired:%d\n", drawctxt->base.id, (u32)entry->cmd.ts, hdr->out_fence_ts); ret = -EINVAL; @@ -3015,7 +3020,6 @@ static inline int setup_hw_fence_info_cmd(struct adreno_device *adreno_dev, int gen8_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, struct adreno_hw_fence_entry *entry, u64 flags) { - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret = 0; @@ -3045,7 +3049,7 @@ int gen8_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, if (!ret) ret = adreno_hwsched_wait_ack_completion(adreno_dev, - &gmu->pdev->dev, &gen8_hw_fence_ack, + GMU_PDEV_DEV(device), &gen8_hw_fence_ack, gen8_hwsched_process_msgq); memset(&gen8_hw_fence_ack, 0x0, sizeof(gen8_hw_fence_ack)); @@ -3186,7 +3190,6 @@ void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, /* Only allow a single log in a second */ static DEFINE_RATELIMIT_STATE(_rs, HZ, 1); struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev); - struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); u32 retired = 0; int ret = 0; @@ -3241,7 +3244,7 @@ void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev, ret = _send_hw_fence_no_ack(adreno_dev, entry); if (ret) { if (__ratelimit(&_rs)) - dev_err(&gmu->pdev->dev, + dev_err(GMU_PDEV_DEV(device), "hw fence for ctx:%d ts:%d ret:%d may not be destroyed\n", kfence->context_id, kfence->timestamp, ret); kgsl_hw_fence_destroy(kfence); @@ -3575,6 +3578,7 @@ static void trigger_context_unregister_fault(struct adreno_device *adreno_dev, static int send_context_unregister_hfi(struct adreno_device *adreno_dev, struct kgsl_context *context, u32 ts) { + struct device *gmu_pdev_dev = GMU_PDEV_DEV(KGSL_DEVICE(adreno_dev)); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev); struct adreno_context *drawctxt = ADRENO_CONTEXT(context); @@ -3620,7 +3624,8 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, } ret = adreno_hwsched_ctxt_unregister_wait_completion(adreno_dev, - &gmu->pdev->dev, &pending_ack, gen8_hwsched_process_msgq, &cmd); + gmu_pdev_dev, &pending_ack, + gen8_hwsched_process_msgq, &cmd); if (ret) { trigger_context_unregister_fault(adreno_dev, drawctxt); goto done; diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 11cb4ffd0a..9395873edf 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -15,6 +15,12 @@ /* GMU_DEVICE - Given an KGSL device return the GMU specific struct */ #define GMU_DEVICE_OPS(_a) ((_a)->gmu_core.dev_ops) +/* GMU_PDEV - Given a KGSL device return the GMU platform device struct */ +#define GMU_PDEV(device) ((device)->gmu_core.pdev) + +/* GMU_PDEV_DEV - Given a KGSL device return pointer to struct dev for GMU platform device */ +#define GMU_PDEV_DEV(device) (&((GMU_PDEV(device))->dev)) + #define MAX_GX_LEVELS 32 #define MAX_GX_LEVELS_LEGACY 16 #define MAX_CX_LEVELS 4 @@ -444,6 +450,8 @@ struct gmu_core_device { unsigned long flags; /** @gf_panic: GMU fault panic policy */ enum gmu_fault_panic_policy gf_panic; + /** @pdev: platform device for the gmu */ + struct platform_device *pdev; }; extern struct platform_driver a6xx_gmu_driver; From afed8d700028a01f416692c7a85d166e7dd9e3a4 Mon Sep 17 00:00:00 2001 From: Kaushal Sanadhya Date: Wed, 21 Aug 2024 23:40:41 +0530 Subject: [PATCH 0917/1016] kgsl: gen8: Disable RGBA FP16 compression for UBWC 4.0 FP16 is a special compression technique introduced for RGBA16 format from UBWC 4.3 onwards. Hence, disable it to correctly configure UBWC 4.0. Change-Id: I742a29bdddbb93dd9da4ef7b0ff05fc4b020a229 Signed-off-by: Kaushal Sanadhya --- adreno_gen8.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/adreno_gen8.c b/adreno_gen8.c index 8ca41c7681..17488b59bc 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1305,7 +1305,7 @@ int gen8_start(struct adreno_device *adreno_dev) u32 hbb_lo = 1, hbb_hi = 0, hbb = 1; struct cpu_gpu_lock *pwrup_lock = adreno_dev->pwrup_reglist->hostptr; u64 uche_trap_base = gen8_get_uche_trap_base(); - u32 rgba8888_lossless = 0; + u32 rgba8888_lossless = 0, fp16compoptdis = 0; int is_current_rt = rt_task(current); /* Reset aperture fields to go through first aperture write check */ @@ -1390,6 +1390,7 @@ int gen8_start(struct adreno_device *adreno_dev) case KGSL_UBWC_4_0: amsbc = 1; rgb565_predicator = 1; + fp16compoptdis = 1; rgba8888_lossless = 1; mode2 = 2; break; @@ -1420,6 +1421,7 @@ int gen8_start(struct adreno_device *adreno_dev) gen8_regwrite_aperture(device, GEN8_RB_CMP_NC_MODE_CNTL, FIELD_PREP(GENMASK(17, 15), mode2) | FIELD_PREP(GENMASK(4, 4), rgba8888_lossless) | + FIELD_PREP(GENMASK(3, 3), fp16compoptdis) | FIELD_PREP(GENMASK(2, 2), rgb565_predicator) | FIELD_PREP(GENMASK(1, 1), amsbc) | FIELD_PREP(GENMASK(0, 0), mal), From 08fd3099f629bbb9771a4997cc8ec2bc90c20d9d Mon Sep 17 00:00:00 2001 From: Piyush Mehta Date: Thu, 18 Jul 2024 14:05:51 +0530 Subject: [PATCH 0918/1016] kgsl: Update power-related features and hub clk for a621 Enable ACD and BCL feature and gmu_hub_clk_freq support in a621 GPU. Change-Id: I6b5b740cc420685fa061e9c8bc8b949da28376e8 Signed-off-by: Piyush Mehta --- adreno-gpulist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 3c760e772d..7a5f9cb0f5 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1076,7 +1076,7 @@ static const struct adreno_a6xx_core adreno_gpu_core_a621 = { .compatible = "qcom,adreno-gpu-a621", .features = ADRENO_CONTENT_PROTECTION | ADRENO_IOCOHERENT | ADRENO_APRIV | ADRENO_LSR | ADRENO_PREEMPTION | - ADRENO_IFPC, + ADRENO_IFPC | ADRENO_ACD | ADRENO_BCL, .gpudev = &adreno_a6xx_hwsched_gpudev.base, .perfcounters = &adreno_a6xx_hwsched_perfcounters, .uche_gmem_alignment = 0, @@ -1100,6 +1100,7 @@ static const struct adreno_a6xx_core adreno_gpu_core_a621 = { .protected_regs = a620_protected_regs, .disable_tseskip = true, .highest_bank_bit = 13, + .gmu_hub_clk_freq = 200000000, }; static const struct kgsl_regmap_list a640_hwcg_regs[] = { From 105ce1185a0577c2993e086928b4569444d9d31e Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 3 Sep 2024 14:59:52 +0530 Subject: [PATCH 0919/1016] kgsl: pwrctrl: Use atomic64 for interval_timeout Remove device mutex for interval_timeout and use atomic64 for thread-safe operations. This helps to remove the device mutex, thereby reducing contention on the primary device mutex. Change-Id: I8bf35a8f0fc350dc6a0c09df832567f26d928e67 Signed-off-by: Kamal Agrawal --- adreno.c | 2 +- adreno_snapshot.c | 2 +- kgsl_device.h | 3 ++- kgsl_pwrctrl.c | 13 ++++++------- kgsl_pwrctrl.h | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/adreno.c b/adreno.c index 0ba9b20d54..5b9a8f6577 100644 --- a/adreno.c +++ b/adreno.c @@ -894,7 +894,7 @@ static int adreno_of_get_power(struct adreno_device *adreno_dev, if (ret) return ret; - device->pwrctrl.interval_timeout = CONFIG_QCOM_KGSL_IDLE_TIMEOUT; + atomic64_set(&device->pwrctrl.interval_timeout, CONFIG_QCOM_KGSL_IDLE_TIMEOUT); /* Set default bus control to true on all targets */ device->pwrctrl.bus_control = true; diff --git a/adreno_snapshot.c b/adreno_snapshot.c index 8537cf6020..6c0bd60e84 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -970,7 +970,7 @@ static void adreno_snapshot_os(struct kgsl_device *device, header->seconds = ktime_get_real_seconds(); header->power_flags = device->pwrctrl.power_flags; header->power_level = device->pwrctrl.active_pwrlevel; - header->power_interval_timeout = device->pwrctrl.interval_timeout; + header->power_interval_timeout = atomic64_read(&device->pwrctrl.interval_timeout); header->grpclk = clk_get_rate(device->pwrctrl.grp_clks[0]); /* Get the current PT base */ diff --git a/kgsl_device.h b/kgsl_device.h index b7841e8466..168fff49fc 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -686,7 +686,8 @@ static inline bool kgsl_state_is_awake(struct kgsl_device *device) */ static inline void kgsl_start_idle_timer(struct kgsl_device *device) { - device->idle_jiffies = jiffies + msecs_to_jiffies(device->pwrctrl.interval_timeout); + device->idle_jiffies = jiffies + + msecs_to_jiffies(atomic64_read(&device->pwrctrl.interval_timeout)); mod_timer(&device->idle_timer, device->idle_jiffies); } diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 145a5f15e2..13fb010b18 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -169,7 +169,7 @@ unsigned int kgsl_pwrctrl_adjust_pwrlevel(struct kgsl_device *device, * the timestamp retires */ pwr->constraint.expires = jiffies + - msecs_to_jiffies(device->pwrctrl.interval_timeout); + msecs_to_jiffies(atomic64_read(&device->pwrctrl.interval_timeout)); kgsl_context_put(context); } @@ -308,7 +308,7 @@ void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, pwrc_old->sub_type = pwrc->sub_type; pwrc_old->owner_id = id; pwrc_old->expires = jiffies + - msecs_to_jiffies(device->pwrctrl.interval_timeout); + msecs_to_jiffies(atomic64_read(&device->pwrctrl.interval_timeout)); pwrc_old->owner_timestamp = ts; kgsl_pwrctrl_pwrlevel_change(device, constraint); /* Trace the constraint being set by the driver */ @@ -317,7 +317,7 @@ void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, pwrc_old->owner_id = id; pwrc_old->owner_timestamp = ts; pwrc_old->expires = jiffies + - msecs_to_jiffies(device->pwrctrl.interval_timeout); + msecs_to_jiffies(atomic64_read(&device->pwrctrl.interval_timeout)); } } @@ -578,9 +578,7 @@ static ssize_t idle_timer_store(struct device *dev, struct device_attribute *att if (val > jiffies_to_usecs(MAX_JIFFY_OFFSET)) return -EINVAL; - mutex_lock(&device->mutex); - device->pwrctrl.interval_timeout = val; - mutex_unlock(&device->mutex); + atomic64_set(&device->pwrctrl.interval_timeout, val); return count; } @@ -590,7 +588,8 @@ static ssize_t idle_timer_show(struct device *dev, { struct kgsl_device *device = dev_get_drvdata(dev); - return scnprintf(buf, PAGE_SIZE, "%u\n", device->pwrctrl.interval_timeout); + return scnprintf(buf, PAGE_SIZE, "%llu\n", + atomic64_read(&device->pwrctrl.interval_timeout)); } static ssize_t minbw_timer_store(struct device *dev, diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 4efba1b2c7..88b4e3a6bc 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -144,7 +144,7 @@ struct kgsl_pwrctrl { unsigned int min_render_pwrlevel; unsigned int num_pwrlevels; unsigned int throttle_mask; - u32 interval_timeout; + atomic64_t interval_timeout; u64 clock_times[KGSL_MAX_PWRLEVELS]; /** @thermal_time: Time in usecs the GPU is limited by thermal constraints */ u64 thermal_time; From 39ebf89280d8415d8314a2e19b80b2917c7c70ef Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Thu, 19 Sep 2024 13:28:37 -0600 Subject: [PATCH 0920/1016] kgsl: gen8: Update gen8_0_0 GEN8_UCHE_MODE_CNTL register programming Set disableSetHash=0 in GEN8_UCHE_MODE_CNTL register. Change-Id: I2f5df20014eba31fea0ba09e9ede4d9b83c2c843 Signed-off-by: Carter Cooper --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index f696bb61d3..fdfec6553f 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2821,7 +2821,7 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { /* Enable cubemap small miplevel optimization settings */ { GEN8_TPL1_DBG_ECO_CNTL1, 0x00000724, BIT(PIPE_NONE) }, /* Disable tag bank id hashing */ - { GEN8_UCHE_MODE_CNTL, 0x000a0000, BIT(PIPE_NONE) }, + { GEN8_UCHE_MODE_CNTL, 0x00080000, BIT(PIPE_NONE) }, { GEN8_UCHE_CCHE_MODE_CNTL, 0x00001000, BIT(PIPE_NONE) }, /* Limit gmem number of ways for GMEM requests in each set */ { GEN8_UCHE_CCHE_CACHE_WAYS, 0x00000800, BIT(PIPE_NONE)}, From b2a53b0d7a804596c3d4a2a4baeb19d3e495f2f7 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 4 Sep 2024 19:42:09 +0530 Subject: [PATCH 0921/1016] kgsl: pwrctrl: Use dedicated GMU CX GenPD instance for voting Currently, there is a race condition in GenPD framework where GPU CX GDSC can remain ON if both GMU and KGSL SMMU devices are suspending in parallel and are voting on the same power domain. Use dedicated GMU CX GenPD instance for CX GDSC voting to avoid the race. Since GMU and SMMU votes are consolidate in CX GenPD instance, use it for registering the notifier. Change-Id: I220dfd94bb7d5144fc4989047083c70b3560f3c2 Signed-off-by: Kamal Agrawal --- adreno.c | 10 +++++----- adreno_a6xx_gmu.c | 2 +- kgsl_pwrctrl.c | 29 ++++++++++++++++++++++------- kgsl_pwrctrl.h | 9 ++++++++- 4 files changed, 36 insertions(+), 14 deletions(-) diff --git a/adreno.c b/adreno.c index 22e68d3f32..973fb430c2 100644 --- a/adreno.c +++ b/adreno.c @@ -1263,8 +1263,8 @@ static int adreno_pm_notifier(struct notifier_block *nb, unsigned long event, vo } } - if (pwr->cx_pd) { - pd = container_of(pwr->cx_pd->pm_domain, struct generic_pm_domain, domain); + if (pwr->gmu_cx_pd) { + pd = container_of(pwr->gmu_cx_pd->pm_domain, struct generic_pm_domain, domain); if (pd->prepared_count) { dev_err_ratelimited(device->dev, @@ -1501,7 +1501,7 @@ int adreno_device_probe(struct platform_device *pdev, * notifications when system has come out of suspend completely, so that we can perform * fault recovery. */ - if (device->pwrctrl.gx_pd || device->pwrctrl.cx_pd) { + if (device->pwrctrl.gx_pd || device->pwrctrl.gmu_cx_pd) { adreno_dev->pm_nb.notifier_call = adreno_pm_notifier; register_pm_notifier(&adreno_dev->pm_nb); } @@ -1789,8 +1789,8 @@ static bool gdscs_left_on(struct kgsl_device *device) if (pwr->gx_regulator) return regulator_is_enabled(pwr->gx_regulator); - if (pwr->cx_pd) - return kgsl_genpd_is_enabled(pwr->cx_pd); + if (pwr->gmu_cx_pd) + return kgsl_genpd_is_enabled(pwr->gmu_cx_pd); if (pwr->gx_pd) return kgsl_genpd_is_enabled(pwr->gx_pd); diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 28a3ef449a..bdcc6d0be3 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -598,7 +598,7 @@ void a6xx_gmu_disable_gdsc(struct adreno_device *adreno_dev) struct kgsl_pwrctrl *pwr = &device->pwrctrl; /* ADRENO_QUIRK_CX_GDSC quirk is not supported for genpd */ - WARN_ON_ONCE(pwr->cx_pd && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)); + WARN_ON_ONCE(pwr->gmu_cx_pd && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)); if (pwr->cx_regulator && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) regulator_set_mode(pwr->cx_regulator, REGULATOR_MODE_IDLE); diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 261131e722..1116757347 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1364,7 +1364,7 @@ int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device) struct kgsl_pwrctrl *pwr = &device->pwrctrl; int ret; - if (!pwr->cx_regulator && !pwr->cx_pd) + if (!pwr->cx_regulator && !pwr->gmu_cx_pd) return 0; ret = wait_for_completion_timeout(&pwr->cx_gdsc_gate, msecs_to_jiffies(5000)); @@ -1381,7 +1381,7 @@ int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device) if (pwr->cx_regulator) ret = regulator_enable(pwr->cx_regulator); else - ret = pm_runtime_resume_and_get(pwr->cx_pd); + ret = pm_runtime_resume_and_get(pwr->gmu_cx_pd); if (ret) dev_err(device->dev, "Failed to enable CX gdsc, error %d\n", ret); @@ -1414,7 +1414,7 @@ void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; - if (!pwr->cx_regulator && !pwr->cx_pd) + if (!pwr->cx_regulator && !pwr->gmu_cx_pd) return; kgsl_mmu_send_tlb_hint(&device->mmu, true); @@ -1424,7 +1424,7 @@ void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device) if (pwr->cx_regulator) regulator_disable(pwr->cx_regulator); else - pm_runtime_put_sync(pwr->cx_pd); + pm_runtime_put_sync(pwr->gmu_cx_pd); } void kgsl_pwrctrl_disable_gx_gdsc(struct kgsl_device *device) @@ -1477,9 +1477,20 @@ static int kgsl_pwrctrl_probe_cx_gdsc(struct kgsl_device *device, struct platfor if (of_property_read_bool(pdev->dev.of_node, "power-domains")) { /* Get virtual device handle for CX GDSC to control it */ - struct device *cx_pd = dev_pm_domain_attach_by_name(&pdev->dev, "cx"); + struct device *cx_pd, *gmu_cx_pd; + gmu_cx_pd = dev_pm_domain_attach_by_name(&pdev->dev, "gmu_cx"); + if (IS_ERR_OR_NULL(gmu_cx_pd)) { + dev_err_probe(&pdev->dev, PTR_ERR(gmu_cx_pd), + "Failed to attach GMU cx power domain\n"); + return IS_ERR(gmu_cx_pd) ? PTR_ERR(gmu_cx_pd) : -EINVAL; + } + pwr->gmu_cx_pd = gmu_cx_pd; + + cx_pd = dev_pm_domain_attach_by_name(&pdev->dev, "cx"); if (IS_ERR_OR_NULL(cx_pd)) { + dev_pm_domain_detach(gmu_cx_pd, false); + pwr->gmu_cx_pd = NULL; dev_err_probe(&pdev->dev, PTR_ERR(cx_pd), "Failed to attach cx power domain\n"); return IS_ERR(cx_pd) ? PTR_ERR(cx_pd) : -EINVAL; @@ -1538,9 +1549,11 @@ int kgsl_pwrctrl_probe_gdscs(struct kgsl_device *device, struct platform_device return ret; ret = kgsl_pwrctrl_probe_gx_gdsc(device, pdev); - if (ret && pwr->cx_pd) { + if (ret && pwr->gmu_cx_pd) { dev_pm_domain_detach(pwr->cx_pd, false); + dev_pm_domain_detach(pwr->gmu_cx_pd, false); pwr->cx_pd = NULL; + pwr->gmu_cx_pd = NULL; } return ret; @@ -2013,10 +2026,12 @@ void kgsl_pwrctrl_close(struct kgsl_device *device) pm_runtime_disable(&device->pdev->dev); - if (pwr->cx_pd) { + if (pwr->gmu_cx_pd) { dev_pm_genpd_remove_notifier(pwr->cx_pd); dev_pm_domain_detach(pwr->cx_pd, false); + dev_pm_domain_detach(pwr->gmu_cx_pd, false); pwr->cx_pd = NULL; + pwr->gmu_cx_pd = NULL; } if (pwr->gx_pd) { diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index a75cc8dc95..b83e5a6543 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -114,8 +114,15 @@ struct kgsl_pwrctrl { struct regulator *cx_regulator; /** @gx_regulator: Pointer to the GX domain regulator if applicable */ struct regulator *gx_regulator; - /** @cx_pd: Power domain for controlling CX GDSC */ + /** + * @cx_pd: Power domain for registering CX GDSC notifier + * + * Only GMU device votes for GMU_CX_PD. Other client votes are consolidated + * in the CX GenPD instance, so use this for registering the notifier. + */ struct device *cx_pd; + /** @gmu_cx_pd: Power domain for controlling GMU CX GDSC instance */ + struct device *gmu_cx_pd; /** @gx_pd: Power domain for controlling GX GDSC */ struct device *gx_pd; /** @gx_regulator_parent: Pointer to the GX domain parent supply */ From 7b02b370cf125bb6957a1a611c44534a6186a5a1 Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Mon, 1 Jul 2024 14:58:34 -0600 Subject: [PATCH 0922/1016] kgsl: gen8: Add support to restrict CL time in the GPU Previously CL contexts could request infinite time on the GPU by setting NO_FT flag at context creation. Going forward, allow each target to define a max timeout for CL context when the NO_FT flag is set. If no value is explicitly set for the target then the legacy infinite timeout will be honored. Change-Id: I5f3d0771d5f9fb6d2a77dcdc54f6d0d7f1e104e5 Signed-off-by: Carter Cooper --- adreno-gpulist.h | 3 +++ adreno_gen8.h | 2 ++ adreno_gen8_hwsched.c | 7 +++++++ 3 files changed, 12 insertions(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index fdfec6553f..39d194bca1 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2938,6 +2938,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .noc_timeout_us = 3410, /* 3.41 msec */ .ctxt_record_size = (13536 * SZ_1K), .preempt_level = 1, + .cl_no_ft_timeout_ms = 8000, }; static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { @@ -2978,6 +2979,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { .noc_timeout_us = 3410, /* 3.41 msec */ .ctxt_record_size = (13536 * SZ_1K), .preempt_level = 1, + .cl_no_ft_timeout_ms = 8000, }; /* GEN8_4_0 noncontext register list */ @@ -3086,6 +3088,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .bcl_data = 1, .ctxt_record_size = (13536 * SZ_1K), .noc_timeout_us = 3410, /* 3.41 msec */ + .cl_no_ft_timeout_ms = 8000, }; extern const struct gen8_snapshot_block_list gen8_3_0_snapshot_block_list; diff --git a/adreno_gen8.h b/adreno_gen8.h index 306155d60d..78b511691f 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -171,6 +171,8 @@ struct adreno_gen8_core { bool fast_bus_hint; /** @noc_timeout_us: GPU config NOC port timeout in usec */ u32 noc_timeout_us; + /** @cl_no_ft_timeout_ms: Use this timeout for CL NO_FT instead of infinite */ + u32 cl_no_ft_timeout_ms; }; /** diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 879a50f290..98d213cbd7 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -664,6 +664,7 @@ static int gen8_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); int ret; + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); /* GMU Virtual register bank */ if (IS_ERR_OR_NULL(gmu->vrb)) { @@ -697,6 +698,12 @@ static int gen8_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) gmu_core_trace_header_init(&gmu->trace); } + /* Set the CL infinite timeout VRB override (if declared in gpulist) */ + if (gen8_core->cl_no_ft_timeout_ms) + gmu_core_set_vrb_register(gmu->vrb->hostptr, + VRB_CL_NO_FT_TIMEOUT, + gen8_core->cl_no_ft_timeout_ms); + return 0; } From 278c2e1a80964b0429ecf9b139015f9f1f801ba9 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Wed, 2 Oct 2024 13:02:38 -0700 Subject: [PATCH 0923/1016] kgsl: hwsched: gen8: Add GMU thermal support Add support to allow the GMU to throttle when GPU temp gets above thresholds. Change-Id: Icad35b6831a0068a8e6190f26698f3cafa4553db Signed-off-by: Carter Cooper Signed-off-by: Hareesh Gundu --- adreno.h | 2 ++ adreno_gen8.h | 3 +++ adreno_gen8_hwsched.c | 3 +++ adreno_gen8_hwsched_hfi.c | 31 +++++++++++++++++++++++ adreno_gen8_hwsched_hfi.h | 52 +++++++++++++++++++++++++++++++++++++++ adreno_hfi.h | 2 ++ kgsl_gmu_core.h | 1 + 7 files changed, 94 insertions(+) diff --git a/adreno.h b/adreno.h index dff0f293f6..f5a04432c3 100644 --- a/adreno.h +++ b/adreno.h @@ -140,6 +140,8 @@ #define ADRENO_GMU_WARMBOOT BIT(19) /* The GPU supports CLX */ #define ADRENO_CLX BIT(20) +/* Enable GMU support for GMU based thermal mitigation */ +#define ADRENO_GMU_THERMAL_MITIGATION BIT(21) /* * Adreno GPU quirks - control bits for various workarounds diff --git a/adreno_gen8.h b/adreno_gen8.h index 78b511691f..ffac83d5e3 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -10,6 +10,7 @@ #include #include "adreno_gen8_gmu.h" +#include "adreno_gen8_hwsched_hfi.h" #include "gen8_reg.h" #define GEN8_0_0_NUM_PHYSICAL_SLICES 3 @@ -173,6 +174,8 @@ struct adreno_gen8_core { u32 noc_timeout_us; /** @cl_no_ft_timeout_ms: Use this timeout for CL NO_FT instead of infinite */ u32 cl_no_ft_timeout_ms; + /** @therm_profile: GMU thermal mitigation profile */ + const struct hfi_therm_profile_ctrl *therm_profile; }; /** diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 98d213cbd7..3924342e46 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -711,6 +711,9 @@ static int gen8_hwsched_gmu_init(struct adreno_device *adreno_dev) { int ret; + if (ADRENO_FEATURE(adreno_dev, ADRENO_GMU_THERMAL_MITIGATION)) + set_bit(GMU_THERMAL_MITIGATION, &KGSL_DEVICE(adreno_dev)->gmu_core.flags); + ret = gen8_gmu_parse_fw(adreno_dev); if (ret) return ret; diff --git a/adreno_gen8_hwsched_hfi.c b/adreno_gen8_hwsched_hfi.c index a9e33c1373..e79031a730 100644 --- a/adreno_gen8_hwsched_hfi.c +++ b/adreno_gen8_hwsched_hfi.c @@ -2206,6 +2206,33 @@ static void warmboot_init_message_record_bitmask(struct adreno_device *adreno_de clear_bit(H2F_MSG_GX_BW_PERF_VOTE, hfi->wb_set_record_bitmask); } +static int gen8_hfi_send_thermal_feature_ctrl(struct adreno_device *adreno_dev) +{ + const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); + const struct hfi_therm_profile_ctrl *therm = gen8_core->therm_profile; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + static struct hfi_thermaltable_cmd cmd = {0}; + int ret; + + if (!test_bit(GMU_THERMAL_MITIGATION, &device->gmu_core.flags) || !therm) + return 0; + + ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_THERMAL, 1, 0); + if (ret) + return ret; + + if (cmd.version == 0) { + ret = CMD_MSG_HDR(cmd, H2F_MSG_THERM_TBL); + if (ret) + return ret; + + cmd.version = 1; + memcpy(&cmd.ctrl, therm, sizeof(*therm)); + } + + return gen8_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); +} + int gen8_hwsched_hfi_start(struct adreno_device *adreno_dev) { struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); @@ -2262,6 +2289,10 @@ int gen8_hwsched_hfi_start(struct adreno_device *adreno_dev) if (ret) goto err; + ret = gen8_hfi_send_thermal_feature_ctrl(adreno_dev); + if (ret) + goto err; + ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_HWSCHED, 1, 0); if (ret) goto err; diff --git a/adreno_gen8_hwsched_hfi.h b/adreno_gen8_hwsched_hfi.h index 9c37cc6340..a6775f1a85 100644 --- a/adreno_gen8_hwsched_hfi.h +++ b/adreno_gen8_hwsched_hfi.h @@ -39,6 +39,58 @@ */ #define GEN8_HWSCHED_HW_FENCE_ABORT_BIT 0x2 +#define MAX_THROTTLE_LVLS 3 +struct hfi_tsens_cfg { + /** @limit_u: deci-C value for upper trigger point */ + u32 limit_u; + /** @limit_l: deci-C value for lower trigger point */ + u32 limit_l; + /** @margin_u: deci-C value for upper trigger intercept margin */ + u32 margin_u; + /** @margin_l: deci-C value for lower trigger intercept margin */ + u32 margin_l; +} __packed; + +struct hfi_tsens_throttle_param { + /** @throttle_hyst: Microsecond wait between each throttle level */ + u32 throttle_hyst; + /** @num_throttle_cnt: Number of entries in throttle_levels */ + u32 num_throttle_cnt; + /** @throttle_lvls: Percent of original clock to throttle per level */ + u32 throttle_lvls[MAX_THROTTLE_LVLS]; +} __packed; + +struct hfi_therm_profile_ctrl { + /** @feature_en: Feature enable status */ + u16 feature_en; + /** @feature_rev: Feature revision */ + u16 feature_rev; + /** @tsens_en: tsens sensor enable status */ + u32 tsens_en; + /** @tj_limit: deci-C value for tj limit */ + u32 tj_limit; + /** @tskin_addr: unused */ + u32 tskin_addr; + /** @tskin_limit: unused */ + u32 tskin_limit; + /** @tsens_cfg_cnt: Count of tsens configuration structs */ + u32 tsens_cfg_cnt; + /** @tsens_cfg: Struct of tsens configurations */ + struct hfi_tsens_cfg tsens_cfg; + /** @throttle_cfg: Struct of throttle configurations */ + struct hfi_tsens_throttle_param throttle_cfg; +} __packed; + +/* H2F */ +struct hfi_thermaltable_cmd { + /** @hdr: HFI header message */ + u32 hdr; + /** @version: Version identifier for the format used for domains */ + u32 version; + /** @ctrl: Thermal profile control information */ + struct hfi_therm_profile_ctrl ctrl; +} __packed; + struct gen8_hwsched_hfi { struct hfi_mem_alloc_entry mem_alloc_table[32]; u32 mem_alloc_entries; diff --git a/adreno_hfi.h b/adreno_hfi.h index b2f9ded168..3ba15ffcb8 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -77,6 +77,7 @@ #define HFI_FEATURE_HW_FENCE 25 #define HFI_FEATURE_PERF_NORETAIN 26 #define HFI_FEATURE_DMS 27 +#define HFI_FEATURE_THERMAL 28 #define HFI_FEATURE_AQE 29 /* Types to be used with H2F_MSG_TABLE */ @@ -459,6 +460,7 @@ enum hfi_msg_type { H2F_MSG_TEST = 5, H2F_MSG_ACD_TBL = 7, H2F_MSG_CLX_TBL = 8, + H2F_MSG_THERM_TBL = 9, H2F_MSG_START = 10, H2F_MSG_FEATURE_CTRL = 11, H2F_MSG_GET_VALUE = 12, diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 6914671d21..8877f2272c 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -57,6 +57,7 @@ enum gmu_core_flags { GMU_ENABLED, GMU_RSCC_SLEEP_SEQ_DONE, GMU_DISABLE_SLUMBER, + GMU_THERMAL_MITIGATION, }; /* From c10e45b5d47f23624c44e719fb3a3029cca82e9f Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Thu, 5 Sep 2024 13:14:58 -0700 Subject: [PATCH 0924/1016] kgsl: gen8: Enable thermal gmu mitigation for gen8_0_x Enable GMU thermal mitigation for gen8_0_x targets to support throttle when GPU temp exceed thresholds. Change-Id: I8570d6b4148e157562851800022f9e768c5e6e9e Signed-off-by: Carter Cooper Signed-off-by: Hareesh Gundu --- adreno-gpulist.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 39d194bca1..571c44b606 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2900,6 +2900,25 @@ static const struct kgsl_regmap_list gen8_ao_hwcg_regs[] = { { GEN8_GMUAO_CGC_HYST_CNTL, 0x00005555 }, }; +static const struct hfi_therm_profile_ctrl therm_profile_8_0_0 = { + .feature_en = 1, + .feature_rev = 1, + .tsens_en = 0xFF, + .tj_limit = 900, + .tskin_addr = 0, + .tskin_limit = 0, + .tsens_cfg_cnt = 1, + .tsens_cfg.limit_u = 1130, + .tsens_cfg.limit_l = 10, + .tsens_cfg.margin_u = 30, + .tsens_cfg.margin_l = 20, + .throttle_cfg.throttle_hyst = 250, + .throttle_cfg.num_throttle_cnt = 3, + .throttle_cfg.throttle_lvls[0] = 50, + .throttle_cfg.throttle_lvls[1] = 37, + .throttle_cfg.throttle_lvls[2] = 20, +}; + static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .base = { DEFINE_ADRENO_REV(ADRENO_REV_GEN8_0_0, @@ -2909,7 +2928,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_BCL | ADRENO_IFPC | ADRENO_HW_FENCE | ADRENO_PREEMPTION | - ADRENO_ACD | ADRENO_CLX, + ADRENO_ACD | ADRENO_CLX | ADRENO_GMU_THERMAL_MITIGATION, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, @@ -2939,6 +2958,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .ctxt_record_size = (13536 * SZ_1K), .preempt_level = 1, .cl_no_ft_timeout_ms = 8000, + .therm_profile = &therm_profile_8_0_0, }; static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { @@ -2950,7 +2970,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_BCL | ADRENO_IFPC | ADRENO_HW_FENCE | ADRENO_PREEMPTION | - ADRENO_ACD | ADRENO_CLX, + ADRENO_ACD | ADRENO_CLX | ADRENO_GMU_THERMAL_MITIGATION, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, @@ -2980,6 +3000,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { .ctxt_record_size = (13536 * SZ_1K), .preempt_level = 1, .cl_no_ft_timeout_ms = 8000, + .therm_profile = &therm_profile_8_0_0, }; /* GEN8_4_0 noncontext register list */ From 8f723bc710cfdea087029eda58b39d4f1223167b Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 4 Sep 2024 19:42:09 +0530 Subject: [PATCH 0925/1016] kgsl: pwrctrl: Use dedicated GMU CX GenPD instance for voting Currently, there is a race condition in GenPD framework where GPU CX GDSC can remain ON if both GMU and KGSL SMMU devices are suspending in parallel and are voting on the same power domain. Use dedicated GMU CX GenPD instance for CX GDSC voting to avoid the race. Since GMU and SMMU votes are consolidate in CX GenPD instance, use it for registering the notifier. Change-Id: I220dfd94bb7d5144fc4989047083c70b3560f3c2 Signed-off-by: Kamal Agrawal --- adreno.c | 10 +++++----- adreno_a6xx_gmu.c | 2 +- kgsl_pwrctrl.c | 42 +++++++++++++++++++++++++++++++----------- kgsl_pwrctrl.h | 9 ++++++++- 4 files changed, 45 insertions(+), 18 deletions(-) diff --git a/adreno.c b/adreno.c index 66f3e3e33f..1a714d4004 100644 --- a/adreno.c +++ b/adreno.c @@ -1271,8 +1271,8 @@ static int adreno_pm_notifier(struct notifier_block *nb, unsigned long event, vo } } - if (pwr->cx_pd) { - pd = container_of(pwr->cx_pd->pm_domain, struct generic_pm_domain, domain); + if (pwr->gmu_cx_pd) { + pd = container_of(pwr->gmu_cx_pd->pm_domain, struct generic_pm_domain, domain); if (pd->prepared_count) { dev_err_ratelimited(device->dev, @@ -1509,7 +1509,7 @@ int adreno_device_probe(struct platform_device *pdev, * notifications when system has come out of suspend completely, so that we can perform * fault recovery. */ - if (device->pwrctrl.gx_pd || device->pwrctrl.cx_pd) { + if (device->pwrctrl.gx_pd || device->pwrctrl.gmu_cx_pd) { adreno_dev->pm_nb.notifier_call = adreno_pm_notifier; register_pm_notifier(&adreno_dev->pm_nb); } @@ -1797,8 +1797,8 @@ static bool gdscs_left_on(struct kgsl_device *device) if (pwr->gx_regulator) return regulator_is_enabled(pwr->gx_regulator); - if (pwr->cx_pd) - return kgsl_genpd_is_enabled(pwr->cx_pd); + if (pwr->gmu_cx_pd) + return kgsl_genpd_is_enabled(pwr->gmu_cx_pd); if (pwr->gx_pd) return kgsl_genpd_is_enabled(pwr->gx_pd); diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 24256f179e..dc7b3edad6 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -600,7 +600,7 @@ void a6xx_gmu_disable_gdsc(struct adreno_device *adreno_dev) struct kgsl_pwrctrl *pwr = &device->pwrctrl; /* ADRENO_QUIRK_CX_GDSC quirk is not supported for genpd */ - WARN_ON_ONCE(pwr->cx_pd && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)); + WARN_ON_ONCE(pwr->gmu_cx_pd && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)); if (pwr->cx_regulator && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) regulator_set_mode(pwr->cx_regulator, REGULATOR_MODE_IDLE); diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 0dca3b83c2..2ac79015ef 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1363,7 +1363,7 @@ int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device) struct kgsl_pwrctrl *pwr = &device->pwrctrl; int ret; - if (!pwr->cx_regulator && !pwr->cx_pd) + if (!pwr->cx_regulator && !pwr->gmu_cx_pd) return 0; ret = wait_for_completion_timeout(&pwr->cx_gdsc_gate, msecs_to_jiffies(5000)); @@ -1380,7 +1380,7 @@ int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device) if (pwr->cx_regulator) ret = regulator_enable(pwr->cx_regulator); else - ret = pm_runtime_resume_and_get(pwr->cx_pd); + ret = pm_runtime_resume_and_get(pwr->gmu_cx_pd); if (ret) dev_err(device->dev, "Failed to enable CX gdsc, error %d\n", ret); @@ -1413,7 +1413,7 @@ void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; - if (!pwr->cx_regulator && !pwr->cx_pd) + if (!pwr->cx_regulator && !pwr->gmu_cx_pd) return; kgsl_mmu_send_tlb_hint(&device->mmu, true); @@ -1423,7 +1423,7 @@ void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device) if (pwr->cx_regulator) regulator_disable(pwr->cx_regulator); else - pm_runtime_put_sync(pwr->cx_pd); + pm_runtime_put_sync(pwr->gmu_cx_pd); } void kgsl_pwrctrl_disable_gx_gdsc(struct kgsl_device *device) @@ -1484,6 +1484,14 @@ static int kgsl_pwrctrl_probe_cx_gdsc(struct kgsl_device *device, struct platfor return IS_ERR(cx_pd) ? PTR_ERR(cx_pd) : -EINVAL; } pwr->cx_pd = cx_pd; + + pwr->gmu_cx_pd = dev_pm_domain_attach_by_name(&pdev->dev, "gmu_cx"); + if (IS_ERR_OR_NULL(pwr->gmu_cx_pd)) { + dev_err(device->dev, + "Failed to attach GMU cx power domain, falling back to cx pd\n"); + /* Fallback to cx pd voting if gmu_cx pd is unavailable */ + pwr->gmu_cx_pd = cx_pd; + } } else { struct regulator *cx_regulator = devm_regulator_get(&pdev->dev, "vddcx"); @@ -1537,10 +1545,18 @@ int kgsl_pwrctrl_probe_gdscs(struct kgsl_device *device, struct platform_device return ret; ret = kgsl_pwrctrl_probe_gx_gdsc(device, pdev); - if (ret && pwr->cx_pd) { + if (!ret) + return ret; + + /* Detach pm domains during failure */ + if (pwr->gmu_cx_pd && (pwr->gmu_cx_pd != pwr->cx_pd)) + dev_pm_domain_detach(pwr->gmu_cx_pd, false); + + if (pwr->cx_pd) dev_pm_domain_detach(pwr->cx_pd, false); - pwr->cx_pd = NULL; - } + + pwr->gmu_cx_pd = NULL; + pwr->cx_pd = NULL; return ret; } @@ -2012,16 +2028,20 @@ void kgsl_pwrctrl_close(struct kgsl_device *device) pm_runtime_disable(&device->pdev->dev); + if (pwr->gmu_cx_pd && (pwr->gmu_cx_pd != pwr->cx_pd)) + dev_pm_domain_detach(pwr->gmu_cx_pd, false); + if (pwr->cx_pd) { dev_pm_genpd_remove_notifier(pwr->cx_pd); dev_pm_domain_detach(pwr->cx_pd, false); - pwr->cx_pd = NULL; } - if (pwr->gx_pd) { + if (pwr->gx_pd) dev_pm_domain_detach(pwr->gx_pd, false); - pwr->gx_pd = NULL; - } + + pwr->gmu_cx_pd = NULL; + pwr->cx_pd = NULL; + pwr->gx_pd = NULL; } void kgsl_idle_check(struct work_struct *work) diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 15bd0f3309..48e114fe0d 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -114,8 +114,15 @@ struct kgsl_pwrctrl { struct regulator *cx_regulator; /** @gx_regulator: Pointer to the GX domain regulator if applicable */ struct regulator *gx_regulator; - /** @cx_pd: Power domain for controlling CX GDSC */ + /** + * @cx_pd: Power domain for registering CX GDSC notifier + * + * Only GMU device votes for GMU_CX_PD. Other client votes are consolidated + * in the CX GenPD instance, so use this for registering the notifier. + */ struct device *cx_pd; + /** @gmu_cx_pd: Power domain for controlling GMU CX GDSC instance */ + struct device *gmu_cx_pd; /** @gx_pd: Power domain for controlling GX GDSC */ struct device *gx_pd; /** @gx_regulator_parent: Pointer to the GX domain parent supply */ From 60f93fb9c798285a6dd6a90e3f55344e3bd74354 Mon Sep 17 00:00:00 2001 From: "V S Ganga VaraPrasad (VARA) Adabala" Date: Sat, 26 Oct 2024 21:59:44 +0530 Subject: [PATCH 0926/1016] Revert "kgsl: pwrctrl: Use dedicated GMU CX GenPD instance for voting" This reverts commit b2a53b0d7a804596c3d4a2a4baeb19d3e495f2f7. Change-Id: I559b04d745a0db7c568d48daec581923511abe24 Signed-off-by: V S Ganga VaraPrasad (VARA) Adabala --- adreno.c | 10 +++++----- adreno_a6xx_gmu.c | 2 +- kgsl_pwrctrl.c | 29 +++++++---------------------- kgsl_pwrctrl.h | 9 +-------- 4 files changed, 14 insertions(+), 36 deletions(-) diff --git a/adreno.c b/adreno.c index 1a714d4004..66f3e3e33f 100644 --- a/adreno.c +++ b/adreno.c @@ -1271,8 +1271,8 @@ static int adreno_pm_notifier(struct notifier_block *nb, unsigned long event, vo } } - if (pwr->gmu_cx_pd) { - pd = container_of(pwr->gmu_cx_pd->pm_domain, struct generic_pm_domain, domain); + if (pwr->cx_pd) { + pd = container_of(pwr->cx_pd->pm_domain, struct generic_pm_domain, domain); if (pd->prepared_count) { dev_err_ratelimited(device->dev, @@ -1509,7 +1509,7 @@ int adreno_device_probe(struct platform_device *pdev, * notifications when system has come out of suspend completely, so that we can perform * fault recovery. */ - if (device->pwrctrl.gx_pd || device->pwrctrl.gmu_cx_pd) { + if (device->pwrctrl.gx_pd || device->pwrctrl.cx_pd) { adreno_dev->pm_nb.notifier_call = adreno_pm_notifier; register_pm_notifier(&adreno_dev->pm_nb); } @@ -1797,8 +1797,8 @@ static bool gdscs_left_on(struct kgsl_device *device) if (pwr->gx_regulator) return regulator_is_enabled(pwr->gx_regulator); - if (pwr->gmu_cx_pd) - return kgsl_genpd_is_enabled(pwr->gmu_cx_pd); + if (pwr->cx_pd) + return kgsl_genpd_is_enabled(pwr->cx_pd); if (pwr->gx_pd) return kgsl_genpd_is_enabled(pwr->gx_pd); diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index dc7b3edad6..24256f179e 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -600,7 +600,7 @@ void a6xx_gmu_disable_gdsc(struct adreno_device *adreno_dev) struct kgsl_pwrctrl *pwr = &device->pwrctrl; /* ADRENO_QUIRK_CX_GDSC quirk is not supported for genpd */ - WARN_ON_ONCE(pwr->gmu_cx_pd && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)); + WARN_ON_ONCE(pwr->cx_pd && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)); if (pwr->cx_regulator && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) regulator_set_mode(pwr->cx_regulator, REGULATOR_MODE_IDLE); diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 65b78c32de..0dca3b83c2 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1363,7 +1363,7 @@ int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device) struct kgsl_pwrctrl *pwr = &device->pwrctrl; int ret; - if (!pwr->cx_regulator && !pwr->gmu_cx_pd) + if (!pwr->cx_regulator && !pwr->cx_pd) return 0; ret = wait_for_completion_timeout(&pwr->cx_gdsc_gate, msecs_to_jiffies(5000)); @@ -1380,7 +1380,7 @@ int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device) if (pwr->cx_regulator) ret = regulator_enable(pwr->cx_regulator); else - ret = pm_runtime_resume_and_get(pwr->gmu_cx_pd); + ret = pm_runtime_resume_and_get(pwr->cx_pd); if (ret) dev_err(device->dev, "Failed to enable CX gdsc, error %d\n", ret); @@ -1413,7 +1413,7 @@ void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; - if (!pwr->cx_regulator && !pwr->gmu_cx_pd) + if (!pwr->cx_regulator && !pwr->cx_pd) return; kgsl_mmu_send_tlb_hint(&device->mmu, true); @@ -1423,7 +1423,7 @@ void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device) if (pwr->cx_regulator) regulator_disable(pwr->cx_regulator); else - pm_runtime_put_sync(pwr->gmu_cx_pd); + pm_runtime_put_sync(pwr->cx_pd); } void kgsl_pwrctrl_disable_gx_gdsc(struct kgsl_device *device) @@ -1476,20 +1476,9 @@ static int kgsl_pwrctrl_probe_cx_gdsc(struct kgsl_device *device, struct platfor if (of_property_read_bool(pdev->dev.of_node, "power-domains")) { /* Get virtual device handle for CX GDSC to control it */ - struct device *cx_pd, *gmu_cx_pd; + struct device *cx_pd = dev_pm_domain_attach_by_name(&pdev->dev, "cx"); - gmu_cx_pd = dev_pm_domain_attach_by_name(&pdev->dev, "gmu_cx"); - if (IS_ERR_OR_NULL(gmu_cx_pd)) { - dev_err_probe(&pdev->dev, PTR_ERR(gmu_cx_pd), - "Failed to attach GMU cx power domain\n"); - return IS_ERR(gmu_cx_pd) ? PTR_ERR(gmu_cx_pd) : -EINVAL; - } - pwr->gmu_cx_pd = gmu_cx_pd; - - cx_pd = dev_pm_domain_attach_by_name(&pdev->dev, "cx"); if (IS_ERR_OR_NULL(cx_pd)) { - dev_pm_domain_detach(gmu_cx_pd, false); - pwr->gmu_cx_pd = NULL; dev_err_probe(&pdev->dev, PTR_ERR(cx_pd), "Failed to attach cx power domain\n"); return IS_ERR(cx_pd) ? PTR_ERR(cx_pd) : -EINVAL; @@ -1548,11 +1537,9 @@ int kgsl_pwrctrl_probe_gdscs(struct kgsl_device *device, struct platform_device return ret; ret = kgsl_pwrctrl_probe_gx_gdsc(device, pdev); - if (ret && pwr->gmu_cx_pd) { + if (ret && pwr->cx_pd) { dev_pm_domain_detach(pwr->cx_pd, false); - dev_pm_domain_detach(pwr->gmu_cx_pd, false); pwr->cx_pd = NULL; - pwr->gmu_cx_pd = NULL; } return ret; @@ -2025,12 +2012,10 @@ void kgsl_pwrctrl_close(struct kgsl_device *device) pm_runtime_disable(&device->pdev->dev); - if (pwr->gmu_cx_pd) { + if (pwr->cx_pd) { dev_pm_genpd_remove_notifier(pwr->cx_pd); dev_pm_domain_detach(pwr->cx_pd, false); - dev_pm_domain_detach(pwr->gmu_cx_pd, false); pwr->cx_pd = NULL; - pwr->gmu_cx_pd = NULL; } if (pwr->gx_pd) { diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 48e114fe0d..15bd0f3309 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -114,15 +114,8 @@ struct kgsl_pwrctrl { struct regulator *cx_regulator; /** @gx_regulator: Pointer to the GX domain regulator if applicable */ struct regulator *gx_regulator; - /** - * @cx_pd: Power domain for registering CX GDSC notifier - * - * Only GMU device votes for GMU_CX_PD. Other client votes are consolidated - * in the CX GenPD instance, so use this for registering the notifier. - */ + /** @cx_pd: Power domain for controlling CX GDSC */ struct device *cx_pd; - /** @gmu_cx_pd: Power domain for controlling GMU CX GDSC instance */ - struct device *gmu_cx_pd; /** @gx_pd: Power domain for controlling GX GDSC */ struct device *gx_pd; /** @gx_regulator_parent: Pointer to the GX domain parent supply */ From b79a96cae153011dcad0d4d4351e72942dc9ffc6 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 4 Sep 2024 19:42:09 +0530 Subject: [PATCH 0927/1016] kgsl: pwrctrl: Use dedicated GMU CX GenPD instance for voting Currently, there is a race condition in GenPD framework where GPU CX GDSC can remain ON if both GMU and KGSL SMMU devices are suspending in parallel and are voting on the same power domain. Use dedicated GMU CX GenPD instance for CX GDSC voting to avoid the race. Since GMU and SMMU votes are consolidate in CX GenPD instance, use it for registering the notifier. Change-Id: I220dfd94bb7d5155fc4989047083c70b3560f3c2 Signed-off-by: Kamal Agrawal --- adreno.c | 10 +++++----- adreno_a6xx_gmu.c | 2 +- kgsl_pwrctrl.c | 42 +++++++++++++++++++++++++++++++----------- kgsl_pwrctrl.h | 9 ++++++++- 4 files changed, 45 insertions(+), 18 deletions(-) diff --git a/adreno.c b/adreno.c index 66f3e3e33f..1a714d4004 100644 --- a/adreno.c +++ b/adreno.c @@ -1271,8 +1271,8 @@ static int adreno_pm_notifier(struct notifier_block *nb, unsigned long event, vo } } - if (pwr->cx_pd) { - pd = container_of(pwr->cx_pd->pm_domain, struct generic_pm_domain, domain); + if (pwr->gmu_cx_pd) { + pd = container_of(pwr->gmu_cx_pd->pm_domain, struct generic_pm_domain, domain); if (pd->prepared_count) { dev_err_ratelimited(device->dev, @@ -1509,7 +1509,7 @@ int adreno_device_probe(struct platform_device *pdev, * notifications when system has come out of suspend completely, so that we can perform * fault recovery. */ - if (device->pwrctrl.gx_pd || device->pwrctrl.cx_pd) { + if (device->pwrctrl.gx_pd || device->pwrctrl.gmu_cx_pd) { adreno_dev->pm_nb.notifier_call = adreno_pm_notifier; register_pm_notifier(&adreno_dev->pm_nb); } @@ -1797,8 +1797,8 @@ static bool gdscs_left_on(struct kgsl_device *device) if (pwr->gx_regulator) return regulator_is_enabled(pwr->gx_regulator); - if (pwr->cx_pd) - return kgsl_genpd_is_enabled(pwr->cx_pd); + if (pwr->gmu_cx_pd) + return kgsl_genpd_is_enabled(pwr->gmu_cx_pd); if (pwr->gx_pd) return kgsl_genpd_is_enabled(pwr->gx_pd); diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 24256f179e..dc7b3edad6 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -600,7 +600,7 @@ void a6xx_gmu_disable_gdsc(struct adreno_device *adreno_dev) struct kgsl_pwrctrl *pwr = &device->pwrctrl; /* ADRENO_QUIRK_CX_GDSC quirk is not supported for genpd */ - WARN_ON_ONCE(pwr->cx_pd && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)); + WARN_ON_ONCE(pwr->gmu_cx_pd && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)); if (pwr->cx_regulator && ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) regulator_set_mode(pwr->cx_regulator, REGULATOR_MODE_IDLE); diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 0dca3b83c2..2ac79015ef 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1363,7 +1363,7 @@ int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device) struct kgsl_pwrctrl *pwr = &device->pwrctrl; int ret; - if (!pwr->cx_regulator && !pwr->cx_pd) + if (!pwr->cx_regulator && !pwr->gmu_cx_pd) return 0; ret = wait_for_completion_timeout(&pwr->cx_gdsc_gate, msecs_to_jiffies(5000)); @@ -1380,7 +1380,7 @@ int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device) if (pwr->cx_regulator) ret = regulator_enable(pwr->cx_regulator); else - ret = pm_runtime_resume_and_get(pwr->cx_pd); + ret = pm_runtime_resume_and_get(pwr->gmu_cx_pd); if (ret) dev_err(device->dev, "Failed to enable CX gdsc, error %d\n", ret); @@ -1413,7 +1413,7 @@ void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; - if (!pwr->cx_regulator && !pwr->cx_pd) + if (!pwr->cx_regulator && !pwr->gmu_cx_pd) return; kgsl_mmu_send_tlb_hint(&device->mmu, true); @@ -1423,7 +1423,7 @@ void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device) if (pwr->cx_regulator) regulator_disable(pwr->cx_regulator); else - pm_runtime_put_sync(pwr->cx_pd); + pm_runtime_put_sync(pwr->gmu_cx_pd); } void kgsl_pwrctrl_disable_gx_gdsc(struct kgsl_device *device) @@ -1484,6 +1484,14 @@ static int kgsl_pwrctrl_probe_cx_gdsc(struct kgsl_device *device, struct platfor return IS_ERR(cx_pd) ? PTR_ERR(cx_pd) : -EINVAL; } pwr->cx_pd = cx_pd; + + pwr->gmu_cx_pd = dev_pm_domain_attach_by_name(&pdev->dev, "gmu_cx"); + if (IS_ERR_OR_NULL(pwr->gmu_cx_pd)) { + dev_err(device->dev, + "Failed to attach GMU cx power domain, falling back to cx pd\n"); + /* Fallback to cx pd voting if gmu_cx pd is unavailable */ + pwr->gmu_cx_pd = cx_pd; + } } else { struct regulator *cx_regulator = devm_regulator_get(&pdev->dev, "vddcx"); @@ -1537,10 +1545,18 @@ int kgsl_pwrctrl_probe_gdscs(struct kgsl_device *device, struct platform_device return ret; ret = kgsl_pwrctrl_probe_gx_gdsc(device, pdev); - if (ret && pwr->cx_pd) { + if (!ret) + return ret; + + /* Detach pm domains during failure */ + if (pwr->gmu_cx_pd && (pwr->gmu_cx_pd != pwr->cx_pd)) + dev_pm_domain_detach(pwr->gmu_cx_pd, false); + + if (pwr->cx_pd) dev_pm_domain_detach(pwr->cx_pd, false); - pwr->cx_pd = NULL; - } + + pwr->gmu_cx_pd = NULL; + pwr->cx_pd = NULL; return ret; } @@ -2012,16 +2028,20 @@ void kgsl_pwrctrl_close(struct kgsl_device *device) pm_runtime_disable(&device->pdev->dev); + if (pwr->gmu_cx_pd && (pwr->gmu_cx_pd != pwr->cx_pd)) + dev_pm_domain_detach(pwr->gmu_cx_pd, false); + if (pwr->cx_pd) { dev_pm_genpd_remove_notifier(pwr->cx_pd); dev_pm_domain_detach(pwr->cx_pd, false); - pwr->cx_pd = NULL; } - if (pwr->gx_pd) { + if (pwr->gx_pd) dev_pm_domain_detach(pwr->gx_pd, false); - pwr->gx_pd = NULL; - } + + pwr->gmu_cx_pd = NULL; + pwr->cx_pd = NULL; + pwr->gx_pd = NULL; } void kgsl_idle_check(struct work_struct *work) diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 15bd0f3309..48e114fe0d 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -114,8 +114,15 @@ struct kgsl_pwrctrl { struct regulator *cx_regulator; /** @gx_regulator: Pointer to the GX domain regulator if applicable */ struct regulator *gx_regulator; - /** @cx_pd: Power domain for controlling CX GDSC */ + /** + * @cx_pd: Power domain for registering CX GDSC notifier + * + * Only GMU device votes for GMU_CX_PD. Other client votes are consolidated + * in the CX GenPD instance, so use this for registering the notifier. + */ struct device *cx_pd; + /** @gmu_cx_pd: Power domain for controlling GMU CX GDSC instance */ + struct device *gmu_cx_pd; /** @gx_pd: Power domain for controlling GX GDSC */ struct device *gx_pd; /** @gx_regulator_parent: Pointer to the GX domain parent supply */ From 29c7955acab4a308ae5fe3c1b8722a88bd08c82f Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Tue, 8 Oct 2024 09:33:34 -0700 Subject: [PATCH 0928/1016] gen8: kgsl: Do not use UINT_MAX for slice_id and statetype If region is unsliced we should use slice ID 0, using a value like UINT_MAX causes NoC errors. Similarly we do not need to initialize the SP_READ_SEL to 0xff. It will cause NoC errors. Remove the register from the non-context list. Change-Id: I9a12c06b120d2a357c43d6654be6c2478a0d53b5 Signed-off-by: Urvashi Agrawal --- adreno-gpulist.h | 3 --- adreno_gen8_snapshot.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 571c44b606..ae34582bb5 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2816,7 +2816,6 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { { GEN8_SP_CHICKEN_BITS_2, BIT(22) | BIT(23), BIT(PIPE_NONE) }, { GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, - { GEN8_SP_READ_SEL, 0x0001ff00, BIT(PIPE_NONE) }, { GEN8_TPL1_DBG_ECO_CNTL, 0x10000000, BIT(PIPE_NONE) }, /* Enable cubemap small miplevel optimization settings */ { GEN8_TPL1_DBG_ECO_CNTL1, 0x00000724, BIT(PIPE_NONE) }, @@ -3044,7 +3043,6 @@ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { { GEN8_SP_CHICKEN_BITS_2, BIT(22) | BIT(23), BIT(PIPE_NONE) }, { GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, - { GEN8_SP_READ_SEL, 0x0001ff00, BIT(PIPE_NONE) }, { GEN8_TPL1_DBG_ECO_CNTL, 0x10000000, BIT(PIPE_NONE) }, /* Enable cubemap small miplevel optimization settings */ { GEN8_TPL1_DBG_ECO_CNTL1, 0x00000724, BIT(PIPE_NONE) }, @@ -3147,7 +3145,6 @@ static const struct gen8_nonctxt_regs gen8_3_0_nonctxt_regs[] = { { GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, { GEN8_SP_PERFCTR_SHADER_MASK, 0x0000003f, BIT(PIPE_NONE) }, { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, - { GEN8_SP_READ_SEL, 0x0001ff00, BIT(PIPE_NONE) }, { GEN8_TPL1_DBG_ECO_CNTL, 0x10000000, BIT(PIPE_NONE) }, { GEN8_TPL1_DBG_ECO_CNTL1, 0x00000724, BIT(PIPE_NONE) }, { GEN8_UCHE_MODE_CNTL, 0x00020000, BIT(PIPE_NONE) }, diff --git a/adreno_gen8_snapshot.h b/adreno_gen8_snapshot.h index 1ea6ba2550..36cea69298 100644 --- a/adreno_gen8_snapshot.h +++ b/adreno_gen8_snapshot.h @@ -42,7 +42,7 @@ enum location_id { #define NUMBER_OF_SLICES(region, adreno_dev) \ ((region == SLICE) ? gen8_get_num_slices(adreno_dev) : 1) -#define SLICE_ID(region, j) ((region == SLICE) ? j : UINT_MAX) +#define SLICE_ID(region, j) ((region == SLICE) ? j : 0) #define GEN8_DEBUGBUS_BLOCK_SIZE 0x100 From 5483c441207da7a720fcd1c5a5e421b4e096fa67 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Tue, 17 Sep 2024 20:52:15 -0700 Subject: [PATCH 0929/1016] kgsl: gen8: Exit snapshot when crashdumper fails In cases when crashdumper fails to run and times out on any section there is a very good chance that GPU is not in a good state and AHB bus will likely result in a NOC error. Skip following sections if crashdumper times out on any section. Change-Id: Ibc41d59c5079542acfe84dbd9282b2c9dcad4798 Signed-off-by: Urvashi Agrawal --- adreno_gen8_snapshot.c | 128 +++++++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 55 deletions(-) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 6f72e546b6..b4f09802cc 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -619,7 +619,7 @@ err_clk_put: clk_put(clk); } -static void gen8_snapshot_shader(struct kgsl_device *device, +static bool gen8_snapshot_shader(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { struct gen8_shader_block_info info = {0}; @@ -628,8 +628,6 @@ static void gen8_snapshot_shader(struct kgsl_device *device, struct gen8_shader_block *shader_blocks = gen8_snapshot_block_list->shader_blocks; size_t num_shader_blocks = gen8_snapshot_block_list->num_shader_blocks; u32 i, sp, usptp, slice; - size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, - void *priv) = gen8_legacy_snapshot_shader; if (CD_SCRIPT_CHECK(device)) { for (i = 0; i < num_shader_blocks; i++) { @@ -649,13 +647,14 @@ static void gen8_snapshot_shader(struct kgsl_device *device, /* Shader working/shadow memory */ kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_SHADER_V3, - snapshot, func, &info); + snapshot, gen8_legacy_snapshot_shader, + &info); } } } } - return; + return true; } for (i = 0; i < num_shader_blocks; i++) { @@ -684,10 +683,9 @@ static void gen8_snapshot_shader(struct kgsl_device *device, /* Marker for end of script */ CD_FINISH(ptr, offset); - /* Try to run the crash dumper */ - func = gen8_legacy_snapshot_shader; - if (_gen8_do_crashdump(device)) - func = gen8_snapshot_shader_memory; + /* Try to run the crash dumper and bail if it times out */ + if (!_gen8_do_crashdump(device)) + return false; offset = 0; for (slice = 0; slice < slices; slice++) { @@ -702,11 +700,13 @@ static void gen8_snapshot_shader(struct kgsl_device *device, /* Shader working/shadow memory */ kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_SHADER_V3, snapshot, func, &info); + KGSL_SNAPSHOT_SECTION_SHADER_V3, snapshot, + gen8_snapshot_shader_memory, &info); } } } } + return true; } static void gen8_rmw_aperture(struct kgsl_device *device, @@ -860,7 +860,7 @@ static size_t gen8_snapshot_cluster_dbgahb(struct kgsl_device *device, u8 *buf, return (size + sizeof(*header)); } -static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, +static bool gen8_snapshot_dbgahb_regs(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { u32 i, j, sp, usptp, count, slice; @@ -868,8 +868,6 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, struct gen8_sptp_cluster_registers_info info = {0}; struct gen8_sptp_cluster_registers *sptp_clusters = gen8_snapshot_block_list->sptp_clusters; size_t num_sptp_clusters = gen8_snapshot_block_list->num_sptp_clusters; - size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, - void *priv) = gen8_legacy_snapshot_cluster_dbgahb; if (CD_SCRIPT_CHECK(device)) { for (i = 0; i < num_sptp_clusters; i++) { @@ -890,12 +888,12 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, info.context_id = cluster->context_id; kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, - func, &info); + gen8_legacy_snapshot_cluster_dbgahb, &info); } } } } - return; + return true; } for (i = 0; i < num_sptp_clusters; i++) { @@ -941,18 +939,18 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, /* Marker for end of script */ CD_FINISH(ptr, offset); - func = gen8_legacy_snapshot_cluster_dbgahb; - /* Try to run the crash dumper */ - if (_gen8_do_crashdump(device)) - func = gen8_snapshot_cluster_dbgahb; + /* Try to run the crash dumper and bail if it times out */ + if (!_gen8_do_crashdump(device)) + return false; kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, - func, &info); + gen8_snapshot_cluster_dbgahb, &info); } } } } + return true; } static size_t gen8_legacy_snapshot_mvc(struct kgsl_device *device, u8 *buf, @@ -1054,7 +1052,7 @@ static size_t gen8_snapshot_mvc(struct kgsl_device *device, u8 *buf, return (size + sizeof(*header)); } -static void gen8_snapshot_mvc_regs(struct kgsl_device *device, +static bool gen8_snapshot_mvc_regs(struct kgsl_device *device, struct kgsl_snapshot *snapshot, struct gen8_cluster_registers *clusters, size_t num_cluster) @@ -1063,8 +1061,6 @@ static void gen8_snapshot_mvc_regs(struct kgsl_device *device, u64 *ptr, offset = 0; u32 count, slice; struct gen8_cluster_registers_info info = {0}; - size_t (*func)(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) = gen8_legacy_snapshot_mvc; if (CD_SCRIPT_CHECK(device)) { for (i = 0; i < num_cluster; i++) { @@ -1078,10 +1074,11 @@ static void gen8_snapshot_mvc_regs(struct kgsl_device *device, info.context_id = cluster->context_id; info.slice_id = SLICE_ID(cluster->slice_region, j); kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, func, &info); + KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, + gen8_legacy_snapshot_mvc, &info); } } - return; + return true; } for (i = 0; i < num_cluster; i++) { @@ -1121,15 +1118,15 @@ static void gen8_snapshot_mvc_regs(struct kgsl_device *device, /* Marker for end of script */ CD_FINISH(ptr, offset); - func = gen8_legacy_snapshot_mvc; - /* Try to run the crash dumper */ - if (_gen8_do_crashdump(device)) - func = gen8_snapshot_mvc; + /* Try to run the crash dumper and bail if it times out */ + if (!_gen8_do_crashdump(device)) + return false; kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, func, &info); + KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, gen8_snapshot_mvc, &info); } } + return true; } /* gen8_dbgc_debug_bus_read() - Read data from trace bus */ @@ -1449,14 +1446,12 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, } } -static void gen8_reglist_snapshot(struct kgsl_device *device, +static bool gen8_reglist_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { u64 *ptr, offset = 0; u32 i, j, r, slices; struct gen8_reg_list *reg_list = gen8_snapshot_block_list->reg_list; - size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, - void *priv) = gen8_legacy_snapshot_registers; struct gen8_reg_list_info info = {0}; if (CD_SCRIPT_CHECK(device)) { @@ -1468,10 +1463,10 @@ static void gen8_reglist_snapshot(struct kgsl_device *device, info.regs = regs; info.slice_id = SLICE_ID(regs->slice_region, j); kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, - snapshot, func, &info); + snapshot, gen8_legacy_snapshot_registers, &info); } } - return; + return true; } for (i = 0; reg_list[i].regs; i++) { @@ -1505,15 +1500,16 @@ static void gen8_reglist_snapshot(struct kgsl_device *device, /* Marker for end of script */ CD_FINISH(ptr, offset); - func = gen8_legacy_snapshot_registers; - /* Try to run the crash dumper */ - if (_gen8_do_crashdump(device)) - func = gen8_snapshot_registers; + /* Try to run the crash dumper and bail if it times out */ + if (!_gen8_do_crashdump(device)) + return false; kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, - snapshot, func, &info); + snapshot, gen8_snapshot_registers, &info); } } + + return true; } static size_t gen8_snapshot_cx_misc_registers(struct kgsl_device *device, u8 *buf, @@ -1548,7 +1544,7 @@ static size_t gen8_snapshot_cx_misc_registers(struct kgsl_device *device, u8 *bu return size; } -static void gen8_cx_misc_regs_snapshot(struct kgsl_device *device, +static bool gen8_cx_misc_regs_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { u64 *ptr, offset = 0; @@ -1572,24 +1568,27 @@ static void gen8_cx_misc_regs_snapshot(struct kgsl_device *device, /* Marker for end of script */ CD_FINISH(ptr, offset); - /* Try to run the crash dumper */ + /* Try to run the crash dumper if it fails return */ if (_gen8_do_crashdump(device)) { kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, gen8_snapshot_cx_misc_registers, (void *)gen8_snapshot_block_list->cx_misc_regs); - return; - } + return true; + } else + return false; legacy_snapshot: regs_ptr = (const u32 *)gen8_snapshot_block_list->cx_misc_regs; if (!kgsl_regmap_valid_offset(&device->regmap, regs_ptr[0])) { WARN_ONCE(1, "cx_misc registers are not defined in device tree"); - return; + return true; } kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, adreno_snapshot_registers_v2, (void *)regs_ptr); + + return true; } void gen8_snapshot_external_core_regs(struct kgsl_device *device, @@ -1624,14 +1623,19 @@ void gen8_snapshot(struct adreno_device *adreno_dev, u32 i, slice_mask; const struct adreno_gen8_core *gpucore = to_gen8_core(ADRENO_DEVICE(device)); int is_current_rt; - gen8_crashdump_timedout = false; gen8_snapshot_block_list = gpucore->gen8_snapshot_block_list; /* External core and CX MISC regs are dumped in the beginning of gmu snapshot */ if (!gmu_core_isenabled(device)) { gen8_snapshot_external_core_regs(device, snapshot); - gen8_cx_misc_regs_snapshot(device, snapshot); + + /* + * If crashdumper timed out while dumping this section skip everything + * since even AHB accesses to the GPU might cause NoC errors. + */ + if (!gen8_cx_misc_regs_snapshot(device, snapshot)) + return; } gen8_snapshot_cx_debugbus(adreno_dev, snapshot); @@ -1701,7 +1705,12 @@ void gen8_snapshot(struct adreno_device *adreno_dev, } } - gen8_reglist_snapshot(device, snapshot); + /* + * If crashdumper timed out while dumping this section skip everything + * since even AHB acceses to the GPU might cause NoC errors. + */ + if (!gen8_reglist_snapshot(device, snapshot)) + goto err; for (i = 0; i < gen8_snapshot_block_list->index_registers_len; i++) { kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL @@ -1717,22 +1726,31 @@ void gen8_snapshot(struct adreno_device *adreno_dev, /* Mempool debug data */ gen8_snapshot_mempool(device, snapshot); - /* CP MVC register section */ - gen8_snapshot_mvc_regs(device, snapshot, - gen8_snapshot_block_list->cp_clusters, gen8_snapshot_block_list->num_cp_clusters); + /* + * CP MVC register section + * If crashdumper timed out while dumping any section below skip everything + * since even AHB acceses to the GPU might cause NoC errors. + */ + if (!gen8_snapshot_mvc_regs(device, snapshot, + gen8_snapshot_block_list->cp_clusters, gen8_snapshot_block_list->num_cp_clusters)) + goto err; /* MVC register section */ - gen8_snapshot_mvc_regs(device, snapshot, - gen8_snapshot_block_list->clusters, gen8_snapshot_block_list->num_clusters); + if (!gen8_snapshot_mvc_regs(device, snapshot, + gen8_snapshot_block_list->clusters, gen8_snapshot_block_list->num_clusters)) + goto err; /* registers dumped through DBG AHB */ - gen8_snapshot_dbgahb_regs(device, snapshot); + if (!gen8_snapshot_dbgahb_regs(device, snapshot)) + goto err; /* Shader memory */ - gen8_snapshot_shader(device, snapshot); + if (!gen8_snapshot_shader(device, snapshot)) + goto err; kgsl_regwrite(device, GEN8_RBBM_SNAPSHOT_STATUS, 0x0); +err: /* Preemption record */ adreno_snapshot_preemption_record(device, snapshot); From 2ff0ecb92ddc3a1cf134603b19d91386b08cac42 Mon Sep 17 00:00:00 2001 From: Kaushal Sanadhya Date: Wed, 3 Jul 2024 18:06:40 +0530 Subject: [PATCH 0930/1016] kgsl: gen8: Fix DBG AHB register collection in snapshot SP_READ_SEL register is incorrectly programmed during debug AHB register collection. Fix it by correctly programming the SP and uSPTP id in this register. Change-Id: I332749e223c0f8b018e5a445b834b43ee1a66540 Signed-off-by: Kaushal Sanadhya --- adreno_gen8_snapshot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index b4f09802cc..57d2b9b3e5 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -790,7 +790,7 @@ static size_t gen8_legacy_snapshot_cluster_dbgahb(struct kgsl_device *device, header->slice_id = info->slice_id; read_sel = GEN8_SP_READ_SEL_VAL(info->slice_id, info->location_id, - info->pipe_id, info->statetype_id, info->sp_id, info->usptp_id); + info->pipe_id, info->statetype_id, info->usptp_id, info->sp_id); kgsl_regwrite(device, GEN8_SP_READ_SEL, read_sel); @@ -925,7 +925,7 @@ static bool gen8_snapshot_dbgahb_regs(struct kgsl_device *device, /* Program the aperture */ ptr += CD_WRITE(ptr, GEN8_SP_READ_SEL, GEN8_SP_READ_SEL_VAL (j, cluster->location_id, cluster->pipe_id, - cluster->statetype, sp, usptp)); + cluster->statetype, usptp, sp)); for (; regs[0] != UINT_MAX; regs += 2) { count = REG_COUNT(regs); From 06a1dcf85165b51b7535381082241ed07e3f8886 Mon Sep 17 00:00:00 2001 From: Kaushal Sanadhya Date: Wed, 3 Jul 2024 18:06:40 +0530 Subject: [PATCH 0931/1016] kgsl: gen8: Fix DBG AHB register collection in snapshot SP_READ_SEL register is incorrectly programmed during debug AHB register collection. Fix it by correctly programming the SP and uSPTP id in this register. Change-Id: I332749e223c0f8b018e5a445b834b43ee1a66540 Signed-off-by: Kaushal Sanadhya Signed-off-by: Pankaj Gupta --- adreno_gen8_snapshot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 6f72e546b6..3c9ac3bd22 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -790,7 +790,7 @@ static size_t gen8_legacy_snapshot_cluster_dbgahb(struct kgsl_device *device, header->slice_id = info->slice_id; read_sel = GEN8_SP_READ_SEL_VAL(info->slice_id, info->location_id, - info->pipe_id, info->statetype_id, info->sp_id, info->usptp_id); + info->pipe_id, info->statetype_id, info->usptp_id, info->sp_id); kgsl_regwrite(device, GEN8_SP_READ_SEL, read_sel); @@ -927,7 +927,7 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, /* Program the aperture */ ptr += CD_WRITE(ptr, GEN8_SP_READ_SEL, GEN8_SP_READ_SEL_VAL (j, cluster->location_id, cluster->pipe_id, - cluster->statetype, sp, usptp)); + cluster->statetype, usptp, sp)); for (; regs[0] != UINT_MAX; regs += 2) { count = REG_COUNT(regs); From d51d7923c40277637309868f5bd5cc13e2ffe430 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Tue, 8 Oct 2024 09:33:34 -0700 Subject: [PATCH 0932/1016] gen8: kgsl: Do not use UINT_MAX for slice_id and statetype If region is unsliced we should use slice ID 0, using a value like UINT_MAX causes NoC errors. Similarly we do not need to initialize the SP_READ_SEL to 0xff. It will cause NoC errors. Remove the register from the non-context list. Change-Id: I9a12c06b120d2a357c43d6654be6c2478a0d53b5 Signed-off-by: Urvashi Agrawal Signed-off-by: Pankaj Gupta --- adreno-gpulist.h | 3 --- adreno_gen8_snapshot.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 571c44b606..ae34582bb5 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2816,7 +2816,6 @@ static const struct gen8_nonctxt_regs gen8_0_0_nonctxt_regs[] = { { GEN8_SP_CHICKEN_BITS_2, BIT(22) | BIT(23), BIT(PIPE_NONE) }, { GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, - { GEN8_SP_READ_SEL, 0x0001ff00, BIT(PIPE_NONE) }, { GEN8_TPL1_DBG_ECO_CNTL, 0x10000000, BIT(PIPE_NONE) }, /* Enable cubemap small miplevel optimization settings */ { GEN8_TPL1_DBG_ECO_CNTL1, 0x00000724, BIT(PIPE_NONE) }, @@ -3044,7 +3043,6 @@ static const struct gen8_nonctxt_regs gen8_4_0_nonctxt_regs[] = { { GEN8_SP_CHICKEN_BITS_2, BIT(22) | BIT(23), BIT(PIPE_NONE) }, { GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, - { GEN8_SP_READ_SEL, 0x0001ff00, BIT(PIPE_NONE) }, { GEN8_TPL1_DBG_ECO_CNTL, 0x10000000, BIT(PIPE_NONE) }, /* Enable cubemap small miplevel optimization settings */ { GEN8_TPL1_DBG_ECO_CNTL1, 0x00000724, BIT(PIPE_NONE) }, @@ -3147,7 +3145,6 @@ static const struct gen8_nonctxt_regs gen8_3_0_nonctxt_regs[] = { { GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, { GEN8_SP_PERFCTR_SHADER_MASK, 0x0000003f, BIT(PIPE_NONE) }, { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, - { GEN8_SP_READ_SEL, 0x0001ff00, BIT(PIPE_NONE) }, { GEN8_TPL1_DBG_ECO_CNTL, 0x10000000, BIT(PIPE_NONE) }, { GEN8_TPL1_DBG_ECO_CNTL1, 0x00000724, BIT(PIPE_NONE) }, { GEN8_UCHE_MODE_CNTL, 0x00020000, BIT(PIPE_NONE) }, diff --git a/adreno_gen8_snapshot.h b/adreno_gen8_snapshot.h index 1ea6ba2550..36cea69298 100644 --- a/adreno_gen8_snapshot.h +++ b/adreno_gen8_snapshot.h @@ -42,7 +42,7 @@ enum location_id { #define NUMBER_OF_SLICES(region, adreno_dev) \ ((region == SLICE) ? gen8_get_num_slices(adreno_dev) : 1) -#define SLICE_ID(region, j) ((region == SLICE) ? j : UINT_MAX) +#define SLICE_ID(region, j) ((region == SLICE) ? j : 0) #define GEN8_DEBUGBUS_BLOCK_SIZE 0x100 From 291d65463c4bf99c815f4d1ad7c843f2f35fef28 Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Tue, 17 Sep 2024 20:52:15 -0700 Subject: [PATCH 0933/1016] kgsl: gen8: Exit snapshot when crashdumper fails In cases when crashdumper fails to run and times out on any section there is a very good chance that GPU is not in a good state and AHB bus will likely result in a NOC error. Skip following sections if crashdumper times out on any section. Change-Id: Ibc41d59c5079542acfe84dbd9282b2c9dcad4798 Signed-off-by: Urvashi Agrawal Signed-off-by: Pankaj Gupta --- adreno_gen8_snapshot.c | 128 +++++++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 55 deletions(-) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 3c9ac3bd22..57d2b9b3e5 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -619,7 +619,7 @@ err_clk_put: clk_put(clk); } -static void gen8_snapshot_shader(struct kgsl_device *device, +static bool gen8_snapshot_shader(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { struct gen8_shader_block_info info = {0}; @@ -628,8 +628,6 @@ static void gen8_snapshot_shader(struct kgsl_device *device, struct gen8_shader_block *shader_blocks = gen8_snapshot_block_list->shader_blocks; size_t num_shader_blocks = gen8_snapshot_block_list->num_shader_blocks; u32 i, sp, usptp, slice; - size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, - void *priv) = gen8_legacy_snapshot_shader; if (CD_SCRIPT_CHECK(device)) { for (i = 0; i < num_shader_blocks; i++) { @@ -649,13 +647,14 @@ static void gen8_snapshot_shader(struct kgsl_device *device, /* Shader working/shadow memory */ kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_SHADER_V3, - snapshot, func, &info); + snapshot, gen8_legacy_snapshot_shader, + &info); } } } } - return; + return true; } for (i = 0; i < num_shader_blocks; i++) { @@ -684,10 +683,9 @@ static void gen8_snapshot_shader(struct kgsl_device *device, /* Marker for end of script */ CD_FINISH(ptr, offset); - /* Try to run the crash dumper */ - func = gen8_legacy_snapshot_shader; - if (_gen8_do_crashdump(device)) - func = gen8_snapshot_shader_memory; + /* Try to run the crash dumper and bail if it times out */ + if (!_gen8_do_crashdump(device)) + return false; offset = 0; for (slice = 0; slice < slices; slice++) { @@ -702,11 +700,13 @@ static void gen8_snapshot_shader(struct kgsl_device *device, /* Shader working/shadow memory */ kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_SHADER_V3, snapshot, func, &info); + KGSL_SNAPSHOT_SECTION_SHADER_V3, snapshot, + gen8_snapshot_shader_memory, &info); } } } } + return true; } static void gen8_rmw_aperture(struct kgsl_device *device, @@ -860,7 +860,7 @@ static size_t gen8_snapshot_cluster_dbgahb(struct kgsl_device *device, u8 *buf, return (size + sizeof(*header)); } -static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, +static bool gen8_snapshot_dbgahb_regs(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { u32 i, j, sp, usptp, count, slice; @@ -868,8 +868,6 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, struct gen8_sptp_cluster_registers_info info = {0}; struct gen8_sptp_cluster_registers *sptp_clusters = gen8_snapshot_block_list->sptp_clusters; size_t num_sptp_clusters = gen8_snapshot_block_list->num_sptp_clusters; - size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, - void *priv) = gen8_legacy_snapshot_cluster_dbgahb; if (CD_SCRIPT_CHECK(device)) { for (i = 0; i < num_sptp_clusters; i++) { @@ -890,12 +888,12 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, info.context_id = cluster->context_id; kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, - func, &info); + gen8_legacy_snapshot_cluster_dbgahb, &info); } } } } - return; + return true; } for (i = 0; i < num_sptp_clusters; i++) { @@ -941,18 +939,18 @@ static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device, /* Marker for end of script */ CD_FINISH(ptr, offset); - func = gen8_legacy_snapshot_cluster_dbgahb; - /* Try to run the crash dumper */ - if (_gen8_do_crashdump(device)) - func = gen8_snapshot_cluster_dbgahb; + /* Try to run the crash dumper and bail if it times out */ + if (!_gen8_do_crashdump(device)) + return false; kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, - func, &info); + gen8_snapshot_cluster_dbgahb, &info); } } } } + return true; } static size_t gen8_legacy_snapshot_mvc(struct kgsl_device *device, u8 *buf, @@ -1054,7 +1052,7 @@ static size_t gen8_snapshot_mvc(struct kgsl_device *device, u8 *buf, return (size + sizeof(*header)); } -static void gen8_snapshot_mvc_regs(struct kgsl_device *device, +static bool gen8_snapshot_mvc_regs(struct kgsl_device *device, struct kgsl_snapshot *snapshot, struct gen8_cluster_registers *clusters, size_t num_cluster) @@ -1063,8 +1061,6 @@ static void gen8_snapshot_mvc_regs(struct kgsl_device *device, u64 *ptr, offset = 0; u32 count, slice; struct gen8_cluster_registers_info info = {0}; - size_t (*func)(struct kgsl_device *device, u8 *buf, - size_t remain, void *priv) = gen8_legacy_snapshot_mvc; if (CD_SCRIPT_CHECK(device)) { for (i = 0; i < num_cluster; i++) { @@ -1078,10 +1074,11 @@ static void gen8_snapshot_mvc_regs(struct kgsl_device *device, info.context_id = cluster->context_id; info.slice_id = SLICE_ID(cluster->slice_region, j); kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, func, &info); + KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, + gen8_legacy_snapshot_mvc, &info); } } - return; + return true; } for (i = 0; i < num_cluster; i++) { @@ -1121,15 +1118,15 @@ static void gen8_snapshot_mvc_regs(struct kgsl_device *device, /* Marker for end of script */ CD_FINISH(ptr, offset); - func = gen8_legacy_snapshot_mvc; - /* Try to run the crash dumper */ - if (_gen8_do_crashdump(device)) - func = gen8_snapshot_mvc; + /* Try to run the crash dumper and bail if it times out */ + if (!_gen8_do_crashdump(device)) + return false; kgsl_snapshot_add_section(device, - KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, func, &info); + KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, gen8_snapshot_mvc, &info); } } + return true; } /* gen8_dbgc_debug_bus_read() - Read data from trace bus */ @@ -1449,14 +1446,12 @@ static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev, } } -static void gen8_reglist_snapshot(struct kgsl_device *device, +static bool gen8_reglist_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { u64 *ptr, offset = 0; u32 i, j, r, slices; struct gen8_reg_list *reg_list = gen8_snapshot_block_list->reg_list; - size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, - void *priv) = gen8_legacy_snapshot_registers; struct gen8_reg_list_info info = {0}; if (CD_SCRIPT_CHECK(device)) { @@ -1468,10 +1463,10 @@ static void gen8_reglist_snapshot(struct kgsl_device *device, info.regs = regs; info.slice_id = SLICE_ID(regs->slice_region, j); kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, - snapshot, func, &info); + snapshot, gen8_legacy_snapshot_registers, &info); } } - return; + return true; } for (i = 0; reg_list[i].regs; i++) { @@ -1505,15 +1500,16 @@ static void gen8_reglist_snapshot(struct kgsl_device *device, /* Marker for end of script */ CD_FINISH(ptr, offset); - func = gen8_legacy_snapshot_registers; - /* Try to run the crash dumper */ - if (_gen8_do_crashdump(device)) - func = gen8_snapshot_registers; + /* Try to run the crash dumper and bail if it times out */ + if (!_gen8_do_crashdump(device)) + return false; kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, - snapshot, func, &info); + snapshot, gen8_snapshot_registers, &info); } } + + return true; } static size_t gen8_snapshot_cx_misc_registers(struct kgsl_device *device, u8 *buf, @@ -1548,7 +1544,7 @@ static size_t gen8_snapshot_cx_misc_registers(struct kgsl_device *device, u8 *bu return size; } -static void gen8_cx_misc_regs_snapshot(struct kgsl_device *device, +static bool gen8_cx_misc_regs_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { u64 *ptr, offset = 0; @@ -1572,24 +1568,27 @@ static void gen8_cx_misc_regs_snapshot(struct kgsl_device *device, /* Marker for end of script */ CD_FINISH(ptr, offset); - /* Try to run the crash dumper */ + /* Try to run the crash dumper if it fails return */ if (_gen8_do_crashdump(device)) { kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, gen8_snapshot_cx_misc_registers, (void *)gen8_snapshot_block_list->cx_misc_regs); - return; - } + return true; + } else + return false; legacy_snapshot: regs_ptr = (const u32 *)gen8_snapshot_block_list->cx_misc_regs; if (!kgsl_regmap_valid_offset(&device->regmap, regs_ptr[0])) { WARN_ONCE(1, "cx_misc registers are not defined in device tree"); - return; + return true; } kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, adreno_snapshot_registers_v2, (void *)regs_ptr); + + return true; } void gen8_snapshot_external_core_regs(struct kgsl_device *device, @@ -1624,14 +1623,19 @@ void gen8_snapshot(struct adreno_device *adreno_dev, u32 i, slice_mask; const struct adreno_gen8_core *gpucore = to_gen8_core(ADRENO_DEVICE(device)); int is_current_rt; - gen8_crashdump_timedout = false; gen8_snapshot_block_list = gpucore->gen8_snapshot_block_list; /* External core and CX MISC regs are dumped in the beginning of gmu snapshot */ if (!gmu_core_isenabled(device)) { gen8_snapshot_external_core_regs(device, snapshot); - gen8_cx_misc_regs_snapshot(device, snapshot); + + /* + * If crashdumper timed out while dumping this section skip everything + * since even AHB accesses to the GPU might cause NoC errors. + */ + if (!gen8_cx_misc_regs_snapshot(device, snapshot)) + return; } gen8_snapshot_cx_debugbus(adreno_dev, snapshot); @@ -1701,7 +1705,12 @@ void gen8_snapshot(struct adreno_device *adreno_dev, } } - gen8_reglist_snapshot(device, snapshot); + /* + * If crashdumper timed out while dumping this section skip everything + * since even AHB acceses to the GPU might cause NoC errors. + */ + if (!gen8_reglist_snapshot(device, snapshot)) + goto err; for (i = 0; i < gen8_snapshot_block_list->index_registers_len; i++) { kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL @@ -1717,22 +1726,31 @@ void gen8_snapshot(struct adreno_device *adreno_dev, /* Mempool debug data */ gen8_snapshot_mempool(device, snapshot); - /* CP MVC register section */ - gen8_snapshot_mvc_regs(device, snapshot, - gen8_snapshot_block_list->cp_clusters, gen8_snapshot_block_list->num_cp_clusters); + /* + * CP MVC register section + * If crashdumper timed out while dumping any section below skip everything + * since even AHB acceses to the GPU might cause NoC errors. + */ + if (!gen8_snapshot_mvc_regs(device, snapshot, + gen8_snapshot_block_list->cp_clusters, gen8_snapshot_block_list->num_cp_clusters)) + goto err; /* MVC register section */ - gen8_snapshot_mvc_regs(device, snapshot, - gen8_snapshot_block_list->clusters, gen8_snapshot_block_list->num_clusters); + if (!gen8_snapshot_mvc_regs(device, snapshot, + gen8_snapshot_block_list->clusters, gen8_snapshot_block_list->num_clusters)) + goto err; /* registers dumped through DBG AHB */ - gen8_snapshot_dbgahb_regs(device, snapshot); + if (!gen8_snapshot_dbgahb_regs(device, snapshot)) + goto err; /* Shader memory */ - gen8_snapshot_shader(device, snapshot); + if (!gen8_snapshot_shader(device, snapshot)) + goto err; kgsl_regwrite(device, GEN8_RBBM_SNAPSHOT_STATUS, 0x0); +err: /* Preemption record */ adreno_snapshot_preemption_record(device, snapshot); From 6ed0072e64aa2b2bc87d6fd6feb07eb882d707f3 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Fri, 27 Sep 2024 12:40:17 -0600 Subject: [PATCH 0934/1016] kgsl: hwsched: Fix pagefault recovery in SMMU TERMINATE model In TERMINATE model, we are currently relying on the smmu fault handler to clear FSR to resume SMMU transactions. Which means if smmu fault handler takes a while to finish, the SMMU will keep terminating transactions. Say at the same time kgsl attempts recovery, then recovery would fail during CP INIT as the GPU SMMU is still terminating transactions. To address this, first make sure the smmu handler doesn't clear FSR in the TERMINATE model. Secondly, add means to clear the FSR in kgsl, before attempting CP INIT. Also make sure that we don't run crashdumper if we detect that GC SMMU is in TERMINATE mode in presence of an outstanding pagefault. Change-Id: I8b7a96492760ea4114dfdc73ef2b6d6228b36e7d Signed-off-by: Harshdeep Dhatt Signed-off-by: Pankaj Gupta --- adreno.c | 4 +-- adreno.h | 2 +- adreno_dispatch.c | 4 +-- adreno_gen8_snapshot.c | 1 + adreno_hwsched.c | 6 ++-- kgsl_iommu.c | 81 +++++++++++++++++++++++++++++++++++------- kgsl_iommu.h | 1 + kgsl_mmu.h | 9 +++++ 8 files changed, 88 insertions(+), 20 deletions(-) diff --git a/adreno.c b/adreno.c index 1a714d4004..109e9c5a29 100644 --- a/adreno.c +++ b/adreno.c @@ -2225,7 +2225,7 @@ int adreno_reset(struct kgsl_device *device, int fault) * the IOMMU hardware needs a reset too) */ - if (!(fault & ADRENO_IOMMU_PAGE_FAULT)) + if (!(fault & ADRENO_IOMMU_STALL_ON_PAGE_FAULT)) ret = adreno_soft_reset(device); if (ret) { @@ -3389,7 +3389,7 @@ bool adreno_smmu_is_stalled(struct adreno_device *adreno_dev) fault = adreno_gpu_fault(adreno_dev); - return ((fault & ADRENO_IOMMU_PAGE_FAULT) && + return ((fault & ADRENO_IOMMU_STALL_ON_PAGE_FAULT) && test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &mmu->pfpolicy)) ? true : false; } diff --git a/adreno.h b/adreno.h index f5a04432c3..7814e2a709 100644 --- a/adreno.h +++ b/adreno.h @@ -260,7 +260,7 @@ enum adreno_gpurev { #define ADRENO_SOFT_FAULT BIT(0) #define ADRENO_HARD_FAULT BIT(1) #define ADRENO_TIMEOUT_FAULT BIT(2) -#define ADRENO_IOMMU_PAGE_FAULT BIT(3) +#define ADRENO_IOMMU_STALL_ON_PAGE_FAULT BIT(3) #define ADRENO_PREEMPT_FAULT BIT(4) #define ADRENO_GMU_FAULT BIT(5) #define ADRENO_CTX_DETATCH_TIMEOUT_FAULT BIT(6) diff --git a/adreno_dispatch.c b/adreno_dispatch.c index be515083d4..816dc6b548 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -1941,7 +1941,7 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) * proceed if the fault handler has already run in the IRQ thread, * else return early to give the fault handler a chance to run. */ - if (!(fault & ADRENO_IOMMU_PAGE_FAULT) && gx_on) { + if (!(fault & ADRENO_IOMMU_STALL_ON_PAGE_FAULT) && gx_on) { if (adreno_smmu_is_stalled(adreno_dev)) { mutex_unlock(&device->mutex); mutex_unlock(&adreno_dev->fault_recovery_mutex); @@ -2019,7 +2019,7 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) gpudev->gpu_keepalive(adreno_dev, false); /* Terminate the stalled transaction and resume the IOMMU */ - if (fault & ADRENO_IOMMU_PAGE_FAULT) + if (fault & ADRENO_IOMMU_STALL_ON_PAGE_FAULT) kgsl_mmu_pagefault_resume(&device->mmu, true); /* Reset the dispatcher queue */ diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 57d2b9b3e5..794a5405fc 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -129,6 +129,7 @@ static void CD_FINISH(u64 *ptr, u32 offset) static bool CD_SCRIPT_CHECK(struct kgsl_device *device) { return (adreno_smmu_is_stalled(ADRENO_DEVICE(device)) || + (kgsl_mmu_ctx_terminated_on_fault(&device->mmu)) || (!device->snapshot_crashdumper) || IS_ERR_OR_NULL(gen8_capturescript) || IS_ERR_OR_NULL(gen8_crashdump_registers) || diff --git a/adreno_hwsched.c b/adreno_hwsched.c index c703b3df8d..e7cc7dbf4c 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1697,7 +1697,7 @@ static void adreno_hwsched_reset_and_snapshot_legacy(struct adreno_device *adren * faulted. */ obj = get_fault_cmdobj(adreno_dev, cmd->ctxt_id, cmd->ts); - if (!obj && (fault & ADRENO_IOMMU_PAGE_FAULT)) + if (!obj && (fault & ADRENO_IOMMU_STALL_ON_PAGE_FAULT)) obj = get_active_cmdobj(adreno_dev); if (obj) { @@ -1782,7 +1782,7 @@ static void adreno_hwsched_reset_and_snapshot(struct adreno_device *adreno_dev, obj = get_fault_cmdobj(adreno_dev, cmd->gc.ctxt_id, cmd->gc.ts); obj_lpac = get_fault_cmdobj(adreno_dev, cmd->lpac.ctxt_id, cmd->lpac.ts); - if (!obj && (fault & ADRENO_IOMMU_PAGE_FAULT)) + if (!obj && (fault & ADRENO_IOMMU_STALL_ON_PAGE_FAULT)) obj = get_active_cmdobj(adreno_dev); if (obj) { @@ -1798,7 +1798,7 @@ static void adreno_hwsched_reset_and_snapshot(struct adreno_device *adreno_dev, do_fault_header(adreno_dev, drawobj, fault); - if (!obj_lpac && (fault & ADRENO_IOMMU_PAGE_FAULT)) + if (!obj_lpac && (fault & ADRENO_IOMMU_STALL_ON_PAGE_FAULT)) obj_lpac = get_active_cmdobj_lpac(adreno_dev); if (!obj && !obj_lpac) { diff --git a/kgsl_iommu.c b/kgsl_iommu.c index f1fb362a48..6d2aba5be2 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -1200,7 +1200,7 @@ static int kgsl_iommu_fault_handler(struct kgsl_mmu *mmu, struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); u64 ptbase; u32 contextidr; - bool stall; + bool stall, terminate; struct kgsl_process_private *private; struct kgsl_context *context; @@ -1211,13 +1211,14 @@ static int kgsl_iommu_fault_handler(struct kgsl_mmu *mmu, context = kgsl_context_get(device, contextidr); stall = kgsl_iommu_check_stall_on_fault(ctx, mmu, flags); + terminate = test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &mmu->pfpolicy) && + test_bit(KGSL_MMU_PAGEFAULT_TERMINATE, &mmu->features); kgsl_iommu_print_fault(mmu, ctx, addr, ptbase, contextidr, flags, private, context); kgsl_iommu_add_fault_info(context, addr, flags); - if (stall) { - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + if (stall || terminate) { u32 sctlr; /* @@ -1228,11 +1229,18 @@ static int kgsl_iommu_fault_handler(struct kgsl_mmu *mmu, sctlr &= ~(0x1 << KGSL_IOMMU_SCTLR_CFIE_SHIFT); KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR, sctlr); - /* This is used by reset/recovery path */ - ctx->stalled_on_fault = true; + /* Make sure the above write goes through before we return */ + wmb(); - /* Go ahead with recovery*/ - adreno_scheduler_fault(adreno_dev, ADRENO_IOMMU_PAGE_FAULT); + /* This is used by reset/recovery path */ + if (stall) { + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + ctx->stalled_on_fault = true; + + /* Go ahead with recovery*/ + adreno_scheduler_fault(adreno_dev, ADRENO_IOMMU_STALL_ON_PAGE_FAULT); + } } kgsl_context_put(context); @@ -1242,11 +1250,11 @@ static int kgsl_iommu_fault_handler(struct kgsl_mmu *mmu, * Fallback to smmu fault handler during globals faults to print useful * debug information. */ - if (!stall && kgsl_iommu_addr_is_global(mmu, addr)) + if ((!(stall || terminate)) && kgsl_iommu_addr_is_global(mmu, addr)) return -ENOSYS; - /* Return -EBUSY to keep the IOMMU driver from resuming on a stall */ - return stall ? -EBUSY : 0; + /* Return -EBUSY to keep the IOMMU driver from resuming on a stall or terminate */ + return (stall || terminate) ? -EBUSY : 0; } static int kgsl_iommu_default_fault_handler(struct iommu_domain *domain, @@ -1786,6 +1794,48 @@ static void kgsl_iommu_configure_gpu_sctlr(struct kgsl_mmu *mmu, KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR, sctlr_val); } +static bool _ctx_terminated_on_fault(struct kgsl_mmu *mmu, struct kgsl_iommu_context *ctx) +{ + u32 fsr; + + /* + * We only need this if SMMU is configured to be in TERMINATE mode in the presence of an + * outstanding fault + */ + if (!test_bit(KGSL_MMU_PAGEFAULT_TERMINATE, &mmu->features) || + !test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &mmu->pfpolicy)) + return false; + + kgsl_iommu_enable_clk(mmu); + + fsr = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_FSR); + + /* Make sure the above read finishes before we compare it */ + rmb(); + + kgsl_iommu_disable_clk(mmu); + + /* See if there is an outstanding fault */ + if (fsr & ~KGSL_IOMMU_FSR_TRANSLATION_FORMAT_MASK) + return true; + + return false; +} + +/* + * kgsl_iommu_ctx_terminated_on_fault - Detect if GC SMMU is terminating transactions in the + * presense of an outstanding fault. + */ +static bool kgsl_iommu_ctx_terminated_on_fault(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + + if (_ctx_terminated_on_fault(mmu, &iommu->user_context)) + return true; + + return false; +} + static int kgsl_iommu_start(struct kgsl_mmu *mmu) { struct kgsl_iommu *iommu = &mmu->iommu; @@ -1823,8 +1873,13 @@ static void kgsl_iommu_context_clear_fsr(struct kgsl_mmu *mmu, struct kgsl_iommu { unsigned int sctlr_val; - if (ctx->stalled_on_fault) { + if (ctx->stalled_on_fault || _ctx_terminated_on_fault(mmu, ctx)) { + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); + kgsl_iommu_enable_clk(mmu); + + dev_err_ratelimited(device->dev, "Clearing pagefault bits in FSR\n"); + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_FSR, 0xffffffff); /* * Re-enable context fault interrupts after clearing @@ -1840,7 +1895,8 @@ static void kgsl_iommu_context_clear_fsr(struct kgsl_mmu *mmu, struct kgsl_iommu */ wmb(); kgsl_iommu_disable_clk(mmu); - ctx->stalled_on_fault = false; + if (ctx->stalled_on_fault) + ctx->stalled_on_fault = false; } } @@ -2937,6 +2993,7 @@ static void kgsl_iommu_sysfs_init(struct kgsl_mmu *mmu) static const struct kgsl_mmu_ops kgsl_iommu_ops = { .mmu_close = kgsl_iommu_close, .mmu_start = kgsl_iommu_start, + .mmu_ctx_terminated_on_fault = kgsl_iommu_ctx_terminated_on_fault, .mmu_clear_fsr = kgsl_iommu_clear_fsr, .mmu_get_current_ttbr0 = kgsl_iommu_get_current_ttbr0, .mmu_enable_clk = kgsl_iommu_enable_clk, diff --git a/kgsl_iommu.h b/kgsl_iommu.h index 4db2a55751..2aec6e9723 100644 --- a/kgsl_iommu.h +++ b/kgsl_iommu.h @@ -108,6 +108,7 @@ /* FSR fields */ #define KGSL_IOMMU_FSR_SS_SHIFT 30 +#define KGSL_IOMMU_FSR_TRANSLATION_FORMAT_MASK GENMASK(10, 9) /* ASID field in TTBR register */ #define KGSL_IOMMU_ASID_START_BIT 48 diff --git a/kgsl_mmu.h b/kgsl_mmu.h index cbdd102134..c6d4dc91f4 100644 --- a/kgsl_mmu.h +++ b/kgsl_mmu.h @@ -104,6 +104,7 @@ struct kgsl_mmu; struct kgsl_mmu_ops { void (*mmu_close)(struct kgsl_mmu *mmu); int (*mmu_start)(struct kgsl_mmu *mmu); + bool (*mmu_ctx_terminated_on_fault)(struct kgsl_mmu *mmu); uint64_t (*mmu_get_current_ttbr0)(struct kgsl_mmu *mmu, struct kgsl_context *context); void (*mmu_pagefault_resume)(struct kgsl_mmu *mmu, bool terminate); void (*mmu_clear_fsr)(struct kgsl_mmu *mmu); @@ -292,6 +293,14 @@ static inline void kgsl_mmu_put_gpuaddr(struct kgsl_pagetable *pagetable, pagetable->pt_ops->put_gpuaddr(memdesc); } +static inline bool kgsl_mmu_ctx_terminated_on_fault(struct kgsl_mmu *mmu) +{ + if (MMU_OP_VALID(mmu, mmu_ctx_terminated_on_fault)) + return mmu->mmu_ops->mmu_ctx_terminated_on_fault(mmu); + + return false; +} + static inline u64 kgsl_mmu_get_current_ttbr0(struct kgsl_mmu *mmu, struct kgsl_context *context) { if (MMU_OP_VALID(mmu, mmu_get_current_ttbr0)) From 1a2db2188d2697c22f242148deaceb1ffd2b4bbf Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Thu, 24 Oct 2024 10:38:57 -0600 Subject: [PATCH 0935/1016] kgsl: gen8: Fix VRB parameters for VRB_CL_NO_FT_TIMEOUT Commit 10b75cce131d ("kgsl: gmu: Add sanity checks in gmu_core_set_vrb_register") was created and merged before the implementation for VRB_CL_NO_FT_TIMEOUT was merged resulting in the function parameters to now be incorrect. Correct the parameters being passed to the helper function. Change-Id: I76b943b7caea376a6bdb528b76d1a0ef05f4c53c Signed-off-by: Carter Cooper Signed-off-by: Pankaj Gupta --- adreno_gen8_hwsched.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/adreno_gen8_hwsched.c b/adreno_gen8_hwsched.c index 3924342e46..c0639e5ff1 100644 --- a/adreno_gen8_hwsched.c +++ b/adreno_gen8_hwsched.c @@ -700,8 +700,7 @@ static int gen8_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) /* Set the CL infinite timeout VRB override (if declared in gpulist) */ if (gen8_core->cl_no_ft_timeout_ms) - gmu_core_set_vrb_register(gmu->vrb->hostptr, - VRB_CL_NO_FT_TIMEOUT, + gmu_core_set_vrb_register(gmu->vrb, VRB_CL_NO_FT_TIMEOUT, gen8_core->cl_no_ft_timeout_ms); return 0; From 1c5da73f846d871a0e1cf4e28619d46962716b7b Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Thu, 10 Oct 2024 14:14:13 -0600 Subject: [PATCH 0936/1016] kgsl: gen8: Update CL Fault timeout Update the CL fault timeout for all gen8 targets to 6.5s from 8s. Change-Id: I6f19283698e57e9798ed0c09859fec1ea5d772bd Signed-off-by: Carter Cooper Signed-off-by: Pankaj Gupta --- adreno-gpulist.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index ae34582bb5..67409e8b17 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2956,7 +2956,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_0 = { .noc_timeout_us = 3410, /* 3.41 msec */ .ctxt_record_size = (13536 * SZ_1K), .preempt_level = 1, - .cl_no_ft_timeout_ms = 8000, + .cl_no_ft_timeout_ms = 6500, .therm_profile = &therm_profile_8_0_0, }; @@ -2998,7 +2998,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_0_1 = { .noc_timeout_us = 3410, /* 3.41 msec */ .ctxt_record_size = (13536 * SZ_1K), .preempt_level = 1, - .cl_no_ft_timeout_ms = 8000, + .cl_no_ft_timeout_ms = 6500, .therm_profile = &therm_profile_8_0_0, }; @@ -3107,7 +3107,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_4_0 = { .bcl_data = 1, .ctxt_record_size = (13536 * SZ_1K), .noc_timeout_us = 3410, /* 3.41 msec */ - .cl_no_ft_timeout_ms = 8000, + .cl_no_ft_timeout_ms = 6500, }; extern const struct gen8_snapshot_block_list gen8_3_0_snapshot_block_list; From 83334cf5e0de35fa20504bca2a4243de8dad1b81 Mon Sep 17 00:00:00 2001 From: Archana Sriram Date: Thu, 17 Oct 2024 15:38:26 +0530 Subject: [PATCH 0937/1016] kgsl: gmu: Update AB correctly for rpmh vote table creation Currently both AB and IB values are set same for creating the rpmh vote table. For GPU cores that do not support voting AB via GMU, AB should be set to 0. Change-Id: I47f7ded8f83a066329711623662982f089dc41d7 Signed-off-by: Archana Sriram --- adreno_a6xx_rpmh.c | 6 ++++-- adreno_gen7_rpmh.c | 6 ++++-- adreno_gen8_rpmh.c | 7 +++++-- adreno_rpmh.c | 8 +++++--- adreno_rpmh.h | 3 ++- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/adreno_a6xx_rpmh.c b/adreno_a6xx_rpmh.c index d32d86816e..5e52d1329f 100644 --- a/adreno_a6xx_rpmh.c +++ b/adreno_a6xx_rpmh.c @@ -170,7 +170,8 @@ static int build_bw_table(struct adreno_device *adreno_dev) int ret; ddr = adreno_rpmh_build_bw_votes(adreno_ddr_bcms, ARRAY_SIZE(adreno_ddr_bcms), - pwr->ddr_table, pwr->ddr_table_count, ACV_GPU_PERFMODE_VOTE, 0); + pwr->ddr_table, pwr->ddr_table_count, ACV_GPU_PERFMODE_VOTE, 0, + adreno_dev->gmu_ab); if (IS_ERR(ddr)) return PTR_ERR(ddr); @@ -179,7 +180,8 @@ static int build_bw_table(struct adreno_device *adreno_dev) if (count > 0) cnoc = adreno_rpmh_build_bw_votes(adreno_cnoc_bcms, - ARRAY_SIZE(adreno_cnoc_bcms), cnoc_table, count, 0, 0); + ARRAY_SIZE(adreno_cnoc_bcms), cnoc_table, count, 0, 0, + adreno_dev->gmu_ab); kfree(cnoc_table); diff --git a/adreno_gen7_rpmh.c b/adreno_gen7_rpmh.c index 568ed9f1f2..0b70f2ab7b 100644 --- a/adreno_gen7_rpmh.c +++ b/adreno_gen7_rpmh.c @@ -164,7 +164,8 @@ static int build_bw_table(struct adreno_device *adreno_dev) perfmode_vote = BIT(3); ddr = adreno_rpmh_build_bw_votes(adreno_ddr_bcms, ARRAY_SIZE(adreno_ddr_bcms), - pwr->ddr_table, pwr->ddr_table_count, perfmode_vote, perfmode_lvl); + pwr->ddr_table, pwr->ddr_table_count, perfmode_vote, perfmode_lvl, + adreno_dev->gmu_ab); if (IS_ERR(ddr)) return PTR_ERR(ddr); @@ -173,7 +174,8 @@ static int build_bw_table(struct adreno_device *adreno_dev) if (count > 0) cnoc = adreno_rpmh_build_bw_votes(adreno_cnoc_bcms, - ARRAY_SIZE(adreno_cnoc_bcms), cnoc_table, count, 0, 0); + ARRAY_SIZE(adreno_cnoc_bcms), cnoc_table, count, 0, 0, + adreno_dev->gmu_ab); kfree(cnoc_table); diff --git a/adreno_gen8_rpmh.c b/adreno_gen8_rpmh.c index 6392799aa2..18fa0f9cc0 100644 --- a/adreno_gen8_rpmh.c +++ b/adreno_gen8_rpmh.c @@ -238,7 +238,9 @@ static int build_bw_table(struct adreno_device *adreno_dev) int ret; ddr = adreno_rpmh_build_bw_votes(adreno_ddr_bcms, ARRAY_SIZE(adreno_ddr_bcms), - pwr->ddr_table, pwr->ddr_table_count, ACV_GPU_PERFMODE_VOTE, perfmode_lvl); + pwr->ddr_table, pwr->ddr_table_count, ACV_GPU_PERFMODE_VOTE, perfmode_lvl, + adreno_dev->gmu_ab); + if (IS_ERR(ddr)) return PTR_ERR(ddr); @@ -247,7 +249,8 @@ static int build_bw_table(struct adreno_device *adreno_dev) if (count > 0) cnoc = adreno_rpmh_build_bw_votes(adreno_cnoc_bcms, - ARRAY_SIZE(adreno_cnoc_bcms), cnoc_table, count, 0, 0); + ARRAY_SIZE(adreno_cnoc_bcms), cnoc_table, count, 0, 0, + adreno_dev->gmu_ab); kfree(cnoc_table); diff --git a/adreno_rpmh.c b/adreno_rpmh.c index 3c1bcfc002..775b6f2117 100644 --- a/adreno_rpmh.c +++ b/adreno_rpmh.c @@ -155,7 +155,8 @@ void adreno_rpmh_free_bw_votes(struct rpmh_bw_votes *votes) } struct rpmh_bw_votes *adreno_rpmh_build_bw_votes(struct bcm *bcms, int bcm_count, - u32 *levels, int levels_count, u32 perfmode_vote, u32 perfmode_lvl) + u32 *levels, int levels_count, u32 perfmode_vote, u32 perfmode_lvl, + bool gmu_ab) { struct rpmh_bw_votes *votes; int i; @@ -205,8 +206,9 @@ struct rpmh_bw_votes *adreno_rpmh_build_bw_votes(struct bcm *bcms, int bcm_count return ERR_PTR(-ENOMEM); } - tcs_cmd_data(bcms, bcm_count, levels[i], levels[i], votes->cmds[i], - (i >= perfmode_lvl) ? perfmode_vote : 0x0); + tcs_cmd_data(bcms, bcm_count, gmu_ab ? levels[i] : 0x0, + levels[i], votes->cmds[i], + (i >= perfmode_lvl) ? perfmode_vote : 0x0); } return votes; diff --git a/adreno_rpmh.h b/adreno_rpmh.h index 7bdc13b75a..35a7ae8e89 100644 --- a/adreno_rpmh.h +++ b/adreno_rpmh.h @@ -94,11 +94,12 @@ void adreno_rpmh_free_bw_votes(struct rpmh_bw_votes *votes); * @levels_count: Number of performance levels * @perfmode_vote: Performance mode vote * @perfmode_lvl: Initial performance level for performance mode vote + * @gmu_ab: Indicate if GMU supports AB vote * * Return: Pointer to the rpmh_bw_votes structure on success, or an error pointer failure */ struct rpmh_bw_votes *adreno_rpmh_build_bw_votes(struct bcm *bcms, int bcm_count, - u32 *levels, int levels_count, u32 perfmode_vote, u32 perfmode_lvl); + u32 *levels, int levels_count, u32 perfmode_vote, u32 perfmode_lvl, bool gmu_ab); /** * adreno_rpmh_to_cx_hlvl - Convert RPMh VLVL to CX HLVL level From 256946ed097a70e6f3b8428b1fec2a816c18a939 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Sun, 1 Sep 2024 22:59:27 +0530 Subject: [PATCH 0938/1016] kgsl: snapshot: Add support to parse IB3 data Currently, the recursive calls for handling IB objects were limited to IB1 and IB2. To fix this added required support to dump IB3 data. Change-Id: I92fdbc27a349c552f4728b1daec831a6ed5c26ab Signed-off-by: Sanjay Yadav --- adreno_cp_parser.c | 83 ++++++++++++++++++++++++++++-------------- adreno_gen7_snapshot.c | 25 +++++-------- adreno_gen8_snapshot.c | 5 +++ adreno_snapshot.c | 17 +-------- adreno_snapshot.h | 17 +++++++++ gen7_reg.h | 3 ++ kgsl_device.h | 12 ++++-- 7 files changed, 99 insertions(+), 63 deletions(-) diff --git a/adreno_cp_parser.c b/adreno_cp_parser.c index 23fd9baff1..32891165bf 100644 --- a/adreno_cp_parser.c +++ b/adreno_cp_parser.c @@ -13,6 +13,11 @@ #define MAX_IB_OBJS 1000 #define NUM_SET_DRAW_GROUPS 32 +#define IB_LEVEL_1 1 +#define IB_LEVEL_2 2 +#define IB_LEVEL_3 3 + +struct snapshot_ib_meta metadata; struct set_draw_state { uint64_t cmd_stream_addr; @@ -268,7 +273,7 @@ static int ib_parse_type7_set_draw_state(struct kgsl_device *device, ret = adreno_ib_find_objs(device, process, cmd_stream_addr, cmd_stream_dwords, 0, SNAPSHOT_GPU_OBJECT_DRAW, - ib_obj_list, 2); + ib_obj_list, IB_LEVEL_2); if (ret) break; continue; @@ -281,7 +286,7 @@ static int ib_parse_type7_set_draw_state(struct kgsl_device *device, ret = adreno_ib_find_objs(device, process, gpuaddr, (ptr[i] & 0x0000FFFF), 0, SNAPSHOT_GPU_OBJECT_IB, - ib_obj_list, 2); + ib_obj_list, IB_LEVEL_2); if (ret) break; } @@ -290,38 +295,38 @@ static int ib_parse_type7_set_draw_state(struct kgsl_device *device, } /* - * adreno_cp_parse_ib2() - Wrapper function around IB2 parsing + * adreno_cp_parse_ibn() - Wrapper function around IBn parsing * @device: Device pointer * @process: Process in which the IB is allocated - * @gpuaddr: IB2 gpuaddr - * @dwords: IB2 size in dwords - * @ib2base: Base address of active IB2 + * @gpuaddr: IB gpuaddr + * @dwords: IB size in dwords + * @ibbase: Base address of active IB * @ib_obj_list: List of objects found in IB - * @ib_level: The level from which function is called, either from IB1 or IB2 + * @ib_level: The level from which function is called from IBn-1 * - * Function does some checks to ensure that IB2 parsing is called from IB1 - * and then calls the function to find objects in IB2. + * Function does some checks to ensure that IBn parsing is called from IBn-1 + * and then calls the function to find objects in IBn. */ -static int adreno_cp_parse_ib2(struct kgsl_device *device, +static int adreno_cp_parse_ibn(struct kgsl_device *device, struct kgsl_process_private *process, - uint64_t gpuaddr, uint64_t dwords, uint64_t ib2base, + u64 gpuaddr, u64 dwords, u64 ibbase, struct adreno_ib_object_list *ib_obj_list, int ib_level) { int i; /* - * We can only expect an IB2 in IB1, if we are - * already processing an IB2 then return error + * We can only expect an IBn in IBn-1, if we are + * already processing an IBn then return error */ - if (ib_level == 2) + if (ib_level >= IB_LEVEL_3) return -EINVAL; - /* Save current IB2 statically */ - if (ib2base == gpuaddr) + /* Save current IBn statically */ + if (ibbase == gpuaddr) kgsl_snapshot_push_object(device, process, gpuaddr, dwords); /* - * only try to find sub objects iff this IB has + * Only try to find sub objects if this IBn has * not been processed already */ for (i = 0; i < ib_obj_list->num_objs; i++) { @@ -334,8 +339,30 @@ static int adreno_cp_parse_ib2(struct kgsl_device *device, return 0; } - return adreno_ib_find_objs(device, process, gpuaddr, dwords, ib2base, - SNAPSHOT_GPU_OBJECT_IB, ib_obj_list, 2); + return adreno_ib_find_objs(device, process, gpuaddr, dwords, ibbase, + SNAPSHOT_GPU_OBJECT_IB, ib_obj_list, ib_level + 1); +} + +static s64 get_ib_base(struct adreno_device *adreno_dev, int ib_level) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + s64 ibbase = 0; + + switch (ib_level) { + case IB_LEVEL_2: + ibbase = metadata.ib2base; + break; + case IB_LEVEL_3: + if (!adreno_is_a5xx(adreno_dev) && !adreno_is_a6xx(adreno_dev)) + ibbase = metadata.ib3base; + break; + default: + /* Invalid IB level */ + dev_err(device->dev, "Invalid IB level %d\n", ib_level); + return -EINVAL; + } + + return ibbase; } /* @@ -350,7 +377,7 @@ static int adreno_cp_parse_ib2(struct kgsl_device *device, * @ib_level: Indicates if IB1 or IB2 is being processed * * Finds all IB objects in a given IB and puts then in a list. Can be called - * recursively for the IB2's in the IB1's + * recursively for the IBn in the IBn-1 * Returns 0 on success else error code */ static int adreno_ib_find_objs(struct kgsl_device *device, @@ -417,14 +444,14 @@ static int adreno_ib_find_objs(struct kgsl_device *device, if (pkt_is_type7(src[i])) { if (adreno_cmd_is_ib(adreno_dev, src[i])) { - uint64_t size = src[i + 3]; - uint64_t gpuaddrib2 = src[i + 2]; + u64 size = src[i + 3]; + u64 gpuaddribn = ((u64)(src[i + 2]) << 32) | src[i + 1]; + s64 next_ibbase = get_ib_base(adreno_dev, ib_level + IB_LEVEL_1); - gpuaddrib2 = gpuaddrib2 << 32 | src[i + 1]; - - ret = adreno_cp_parse_ib2(device, process, - gpuaddrib2, size, ib2base, - ib_obj_list, ib_level); + if (next_ibbase == -EINVAL) + goto done; + ret = adreno_cp_parse_ibn(device, process, + gpuaddribn, size, (u64)next_ibbase, ib_obj_list, ib_level); if (ret) goto done; } else { @@ -504,7 +531,7 @@ int adreno_ib_create_object_list(struct kgsl_device *device, } ret = adreno_ib_find_objs(device, process, gpuaddr, dwords, ib2base, - SNAPSHOT_GPU_OBJECT_IB, ib_obj_list, 1); + SNAPSHOT_GPU_OBJECT_IB, ib_obj_list, IB_LEVEL_1); /* Even if there was an error return the remaining objects found */ if (ib_obj_list->num_objs) diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 1d4b63bd85..a08be93b30 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -1642,7 +1642,7 @@ void gen7_snapshot(struct adreno_device *adreno_dev, { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); unsigned int i; - u32 hi, lo, cgc = 0, cgc1 = 0, cgc2 = 0; + u32 cgc = 0, cgc1 = 0, cgc2 = 0; const struct adreno_gen7_core *gpucore = to_gen7_core(ADRENO_DEVICE(device)); int is_current_rt; @@ -1691,28 +1691,21 @@ void gen7_snapshot(struct adreno_device *adreno_dev, if (is_current_rt) sched_set_normal(current, 0); - kgsl_regread(device, GEN7_CP_IB1_BASE, &lo); - kgsl_regread(device, GEN7_CP_IB1_BASE_HI, &hi); + kgsl_regread64(device, GEN7_CP_IB1_BASE_HI, GEN7_CP_IB1_BASE, &snapshot->ib1base); - snapshot->ib1base = (((u64) hi) << 32) | lo; + kgsl_regread64(device, GEN7_CP_IB2_BASE_HI, GEN7_CP_IB2_BASE, &snapshot->ib2base); - kgsl_regread(device, GEN7_CP_IB2_BASE, &lo); - kgsl_regread(device, GEN7_CP_IB2_BASE_HI, &hi); - - snapshot->ib2base = (((u64) hi) << 32) | lo; + kgsl_regread64(device, GEN7_CP_IB3_BASE_HI, GEN7_CP_IB3_BASE, &snapshot->ib3base); kgsl_regread(device, GEN7_CP_IB1_REM_SIZE, &snapshot->ib1size); kgsl_regread(device, GEN7_CP_IB2_REM_SIZE, &snapshot->ib2size); + kgsl_regread(device, GEN7_CP_IB3_REM_SIZE, &snapshot->ib3size); - kgsl_regread(device, GEN7_CP_LPAC_IB1_BASE, &lo); - kgsl_regread(device, GEN7_CP_LPAC_IB1_BASE_HI, &hi); + kgsl_regread64(device, GEN7_CP_LPAC_IB1_BASE_HI, + GEN7_CP_LPAC_IB1_BASE, &snapshot->ib1base_lpac); - snapshot->ib1base_lpac = (((u64) hi) << 32) | lo; - - kgsl_regread(device, GEN7_CP_LPAC_IB2_BASE, &lo); - kgsl_regread(device, GEN7_CP_LPAC_IB2_BASE_HI, &hi); - - snapshot->ib2base_lpac = (((u64) hi) << 32) | lo; + kgsl_regread64(device, GEN7_CP_LPAC_IB2_BASE_HI, + GEN7_CP_LPAC_IB2_BASE, &snapshot->ib2base_lpac); kgsl_regread(device, GEN7_CP_LPAC_IB1_REM_SIZE, &snapshot->ib1size_lpac); kgsl_regread(device, GEN7_CP_LPAC_IB2_REM_SIZE, &snapshot->ib2size_lpac); diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 6f72e546b6..bfb5bc86f1 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -1655,10 +1655,15 @@ void gen8_snapshot(struct adreno_device *adreno_dev, gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE, GEN8_CP_IB2_BASE_HI_PIPE, &snapshot->ib2base, PIPE_BR, 0, 0); + gen8_regread64_aperture(device, GEN8_CP_IB3_BASE_LO_PIPE, + GEN8_CP_IB3_BASE_HI_PIPE, &snapshot->ib3base, PIPE_BR, 0, 0); + gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE, &snapshot->ib1size, PIPE_BR, 0, 0); gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, &snapshot->ib2size, PIPE_BR, 0, 0); + gen8_regread_aperture(device, GEN8_CP_IB3_REM_SIZE_PIPE, + &snapshot->ib3size, PIPE_BR, 0, 0); if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) { gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE, diff --git a/adreno_snapshot.c b/adreno_snapshot.c index 6c0bd60e84..ad56f05b62 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -583,19 +583,6 @@ out: return ret; } -struct snapshot_ib_meta { - struct kgsl_snapshot *snapshot; - struct kgsl_snapshot_object *obj; - uint64_t ib1base; - uint64_t ib1size; - uint64_t ib2base; - uint64_t ib2size; - u64 ib1base_lpac; - u64 ib1size_lpac; - u64 ib2base_lpac; - u64 ib2size_lpac; -}; - static void kgsl_snapshot_add_active_ib_obj_list(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { @@ -795,14 +782,14 @@ static size_t snapshot_ib(struct kgsl_device *device, u8 *buf, static void dump_object(struct kgsl_device *device, int obj, struct kgsl_snapshot *snapshot) { - struct snapshot_ib_meta metadata; - metadata.snapshot = snapshot; metadata.obj = &objbuf[obj]; metadata.ib1base = snapshot->ib1base; metadata.ib1size = snapshot->ib1size; metadata.ib2base = snapshot->ib2base; metadata.ib2size = snapshot->ib2size; + metadata.ib3base = snapshot->ib3base; + metadata.ib3size = snapshot->ib3size; metadata.ib1base_lpac = snapshot->ib1base_lpac; metadata.ib1size_lpac = snapshot->ib1size_lpac; metadata.ib2base_lpac = snapshot->ib2base_lpac; diff --git a/adreno_snapshot.h b/adreno_snapshot.h index 6d330624a2..88422b0cfb 100644 --- a/adreno_snapshot.h +++ b/adreno_snapshot.h @@ -24,6 +24,23 @@ #define REG_COUNT(_ptr) ((_ptr[1] - _ptr[0]) + 1) +struct snapshot_ib_meta { + struct kgsl_snapshot *snapshot; + struct kgsl_snapshot_object *obj; + u64 ib1base; + u64 ib1size; + u64 ib2base; + u64 ib2size; + u64 ib3base; + u64 ib3size; + u64 ib1base_lpac; + u64 ib1size_lpac; + u64 ib2base_lpac; + u64 ib2size_lpac; +}; + +extern struct snapshot_ib_meta metadata; + void adreno_snapshot_registers(struct kgsl_device *device, struct kgsl_snapshot *snapshot, const unsigned int *regs, unsigned int count); diff --git a/gen7_reg.h b/gen7_reg.h index dc74e33c0d..b86ee81339 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -108,6 +108,9 @@ #define GEN7_CP_IB2_BASE 0x92b #define GEN7_CP_IB2_BASE_HI 0x92c #define GEN7_CP_IB2_REM_SIZE 0x92d +#define GEN7_CP_IB3_BASE 0xa67 +#define GEN7_CP_IB3_BASE_HI 0xa68 +#define GEN7_CP_IB3_REM_SIZE 0xa69 #define GEN7_CP_ALWAYS_ON_COUNTER_LO 0x980 #define GEN7_CP_ALWAYS_ON_COUNTER_HI 0x981 #define GEN7_CP_ALWAYS_ON_CONTEXT_LO 0x982 diff --git a/kgsl_device.h b/kgsl_device.h index 3028a68188..8b19db0272 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -569,8 +569,10 @@ struct kgsl_device_private { * struct kgsl_snapshot - details for a specific snapshot instance * @ib1base: Active IB1 base address at the time of fault * @ib2base: Active IB2 base address at the time of fault + * @ib3base: Active IB3 base address at the time of fault * @ib1size: Number of DWORDS pending in IB1 at the time of fault * @ib2size: Number of DWORDS pending in IB2 at the time of fault + * @ib3size: Number of DWORDS pending in IB3 at the time of fault * @ib1dumped: Active IB1 dump status to sansphot binary * @ib2dumped: Active IB2 dump status to sansphot binary * @start: Pointer to the start of the static snapshot region @@ -590,10 +592,12 @@ struct kgsl_device_private { * @recovered: True if GPU was recovered after previous snapshot */ struct kgsl_snapshot { - uint64_t ib1base; - uint64_t ib2base; - unsigned int ib1size; - unsigned int ib2size; + u64 ib1base; + u64 ib2base; + u64 ib3base; + u32 ib1size; + u32 ib2size; + u32 ib3size; bool ib1dumped; bool ib2dumped; u64 ib1base_lpac; From 28a13081018e792b32d65be158990d50d6ee3aea Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Fri, 30 Aug 2024 16:50:21 +0530 Subject: [PATCH 0939/1016] kgsl: gen8: Add support for Gen8_6_0 GPU Add support for Gen8_6_0 GPU. Change-Id: If76b7cb03555e62017a172e17c124fe120b41894 Signed-off-by: SIVA MULLATI --- adreno-gpulist.h | 37 +++ adreno.h | 4 +- adreno_gen8.h | 6 +- adreno_gen8_6_0_snapshot.h | 478 +++++++++++++++++++++++++++++++++++++ adreno_gen8_snapshot.c | 32 +++ 5 files changed, 555 insertions(+), 2 deletions(-) create mode 100644 adreno_gen8_6_0_snapshot.h diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 67409e8b17..ca8368b6ff 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3195,6 +3195,42 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_3_0 = { .ctxt_record_size = (4558 * SZ_1K), }; +extern const struct gen8_snapshot_block_list gen8_6_0_snapshot_block_list; + +static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN8_6_0, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen8-6-0", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | + ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE, + .gpudev = &adreno_gen8_hwsched_gpudev.base, + .perfcounters = &adreno_gen8_perfcounters, + .uche_gmem_alignment = SZ_64M, + .gmem_size = (SZ_2M + SZ_256K + SZ_128K), + .bus_width = 32, + .snapshot_size = SZ_8M, + .num_ddr_channels = 4, + }, + .aqefw_name = "gen80000_aqe.fw", + .sqefw_name = "gen80000_sqe.fw", + .gmufw_name = "gen80000_gmu.bin", + .zap_name = "gen80000_zap.mbn", + .ao_hwcg = gen8_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen8_ao_hwcg_regs), + .gbif = gen8_3_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen8_3_0_gbif_cx_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen8_0_0_protected_regs, + .nonctxt_regs = gen8_0_0_nonctxt_regs, + .highest_bank_bit = 16, + .gmu_hub_clk_freq = 200000000, + .gen8_snapshot_block_list = &gen8_6_0_snapshot_block_list, + .noc_timeout_us = 3410, /* 3.41 msec */ + .ctxt_record_size = (3372 * SZ_1K), +}; + static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_a306, /* Deprecated */ &adreno_gpu_core_a306a, /* Deprecated */ @@ -3252,4 +3288,5 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen8_0_1.base, &adreno_gpu_core_gen8_3_0.base, &adreno_gpu_core_gen8_4_0.base, + &adreno_gpu_core_gen8_6_0.base, }; diff --git a/adreno.h b/adreno.h index 7814e2a709..169b16b8d7 100644 --- a/adreno.h +++ b/adreno.h @@ -255,6 +255,7 @@ enum adreno_gpurev { ADRENO_REV_GEN8_0_1 = ADRENO_GPUREV_VALUE(8, 0, 1), ADRENO_REV_GEN8_3_0 = ADRENO_GPUREV_VALUE(8, 3, 0), ADRENO_REV_GEN8_4_0 = ADRENO_GPUREV_VALUE(8, 4, 0), + ADRENO_REV_GEN8_6_0 = ADRENO_GPUREV_VALUE(8, 6, 0), }; #define ADRENO_SOFT_FAULT BIT(0) @@ -1284,6 +1285,7 @@ ADRENO_TARGET(gen8_0_0, ADRENO_REV_GEN8_0_0) ADRENO_TARGET(gen8_0_1, ADRENO_REV_GEN8_0_1) ADRENO_TARGET(gen8_3_0, ADRENO_REV_GEN8_3_0) ADRENO_TARGET(gen8_4_0, ADRENO_REV_GEN8_4_0) +ADRENO_TARGET(gen8_6_0, ADRENO_REV_GEN8_6_0) static inline int adreno_is_gen7_9_x(struct adreno_device *adreno_dev) { @@ -1306,7 +1308,7 @@ static inline int adreno_is_gen7_2_x_family(struct adreno_device *adreno_dev) static inline int adreno_is_gen8_0_x_family(struct adreno_device *adreno_dev) { return adreno_is_gen8_0_0(adreno_dev) || adreno_is_gen8_0_1(adreno_dev) || - adreno_is_gen8_4_0(adreno_dev); + adreno_is_gen8_4_0(adreno_dev) || adreno_is_gen8_6_0(adreno_dev); } /* Gen7 targets which does not support concurrent binning */ diff --git a/adreno_gen8.h b/adreno_gen8.h index ffac83d5e3..fa8df15ed4 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -15,6 +15,7 @@ #define GEN8_0_0_NUM_PHYSICAL_SLICES 3 #define GEN8_3_0_NUM_PHYSICAL_SLICES 1 +#define GEN8_6_0_NUM_PHYSICAL_SLICES 2 /* Forward struct declaration */ struct gen8_snapshot_block_list; @@ -631,6 +632,9 @@ static inline u32 gen8_get_num_slices(struct adreno_device *adreno_dev) { if (adreno_is_gen8_3_0(adreno_dev)) return GEN8_3_0_NUM_PHYSICAL_SLICES; - return GEN8_0_0_NUM_PHYSICAL_SLICES; + else if (adreno_is_gen8_6_0(adreno_dev)) + return GEN8_6_0_NUM_PHYSICAL_SLICES; + else + return GEN8_0_0_NUM_PHYSICAL_SLICES; } #endif diff --git a/adreno_gen8_6_0_snapshot.h b/adreno_gen8_6_0_snapshot.h new file mode 100644 index 0000000000..be85fe01df --- /dev/null +++ b/adreno_gen8_6_0_snapshot.h @@ -0,0 +1,478 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#ifndef __ADRENO_GEN8_6_0_SNAPSHOT_H +#define __ADRENO_GEN8_6_0_SNAPSHOT_H + +#include "adreno_gen8_snapshot.h" +#include "adreno_gen8_0_0_snapshot.h" + +static const u32 gen8_6_0_debugbus_blocks[] = { + DEBUGBUS_GBIF_CX_GC_US_I_0, + DEBUGBUS_GMU_CX_GC_US_I_0, + DEBUGBUS_CX_GC_US_I_0, + DEBUGBUS_GBIF_GX_GC_US_I_0, + DEBUGBUS_GMU_GX_GC_US_I_0, + DEBUGBUS_DBGC_GC_US_I_0, + DEBUGBUS_RBBM_GC_US_I_0, + DEBUGBUS_LARC_GC_US_I_0, + DEBUGBUS_COM_GC_US_I_0, + DEBUGBUS_HLSQ_GC_US_I_0, + DEBUGBUS_CGC_GC_US_I_0, + DEBUGBUS_VSC_GC_US_I_0_0, + DEBUGBUS_VSC_GC_US_I_0_1, + DEBUGBUS_UFC_GC_US_I_0, + DEBUGBUS_UFC_GC_US_I_1, + DEBUGBUS_CP_GC_US_I_0_0, + DEBUGBUS_CP_GC_US_I_0_1, + DEBUGBUS_CP_GC_US_I_0_2, + DEBUGBUS_PC_BR_US_I_0, + DEBUGBUS_PC_BV_US_I_0, + DEBUGBUS_GPC_BR_US_I_0, + DEBUGBUS_GPC_BV_US_I_0, + DEBUGBUS_VPC_BR_US_I_0, + DEBUGBUS_VPC_BV_US_I_0, + DEBUGBUS_UCHE_WRAPPER_GC_US_I_0, + DEBUGBUS_UCHE_GC_US_I_0, + DEBUGBUS_UCHE_GC_US_I_1, + DEBUGBUS_CP_GC_S_0_I_0, + DEBUGBUS_PC_BR_S_0_I_0, + DEBUGBUS_PC_BV_S_0_I_0, + DEBUGBUS_TESS_GC_S_0_I_0, + DEBUGBUS_TSEFE_GC_S_0_I_0, + DEBUGBUS_TSEBE_GC_S_0_I_0, + DEBUGBUS_RAS_GC_S_0_I_0, + DEBUGBUS_LRZ_BR_S_0_I_0, + DEBUGBUS_LRZ_BV_S_0_I_0, + DEBUGBUS_VFDP_GC_S_0_I_0, + DEBUGBUS_GPC_BR_S_0_I_0, + DEBUGBUS_GPC_BV_S_0_I_0, + DEBUGBUS_VPCFE_BR_S_0_I_0, + DEBUGBUS_VPCFE_BV_S_0_I_0, + DEBUGBUS_VPCBE_BR_S_0_I_0, + DEBUGBUS_VPCBE_BV_S_0_I_0, + DEBUGBUS_CCHE_GC_S_0_I_0, + DEBUGBUS_DBGC_GC_S_0_I_0, + DEBUGBUS_LARC_GC_S_0_I_0, + DEBUGBUS_RBBM_GC_S_0_I_0, + DEBUGBUS_CCRE_GC_S_0_I_0, + DEBUGBUS_CGC_GC_S_0_I_0, + DEBUGBUS_GMU_GC_S_0_I_0, + DEBUGBUS_SLICE_GC_S_0_I_0, + DEBUGBUS_HLSQ_SPTP_STAR_GC_S_0_I_0, + DEBUGBUS_USP_GC_S_0_I_0, + DEBUGBUS_USP_GC_S_0_I_1, + DEBUGBUS_USPTP_GC_S_0_I_0, + DEBUGBUS_USPTP_GC_S_0_I_1, + DEBUGBUS_USPTP_GC_S_0_I_2, + DEBUGBUS_USPTP_GC_S_0_I_3, + DEBUGBUS_TP_GC_S_0_I_0, + DEBUGBUS_TP_GC_S_0_I_1, + DEBUGBUS_TP_GC_S_0_I_2, + DEBUGBUS_TP_GC_S_0_I_3, + DEBUGBUS_RB_GC_S_0_I_0, + DEBUGBUS_RB_GC_S_0_I_1, + DEBUGBUS_CCU_GC_S_0_I_0, + DEBUGBUS_CCU_GC_S_0_I_1, + DEBUGBUS_HLSQ_GC_S_0_I_0, + DEBUGBUS_HLSQ_GC_S_0_I_1, + DEBUGBUS_VFD_GC_S_0_I_0, + DEBUGBUS_VFD_GC_S_0_I_1, + DEBUGBUS_CP_GC_S_1_I_0, + DEBUGBUS_PC_BR_S_1_I_0, + DEBUGBUS_PC_BV_S_1_I_0, + DEBUGBUS_TESS_GC_S_1_I_0, + DEBUGBUS_TSEFE_GC_S_1_I_0, + DEBUGBUS_TSEBE_GC_S_1_I_0, + DEBUGBUS_RAS_GC_S_1_I_0, + DEBUGBUS_LRZ_BR_S_1_I_0, + DEBUGBUS_LRZ_BV_S_1_I_0, + DEBUGBUS_VFDP_GC_S_1_I_0, + DEBUGBUS_GPC_BR_S_1_I_0, + DEBUGBUS_GPC_BV_S_1_I_0, + DEBUGBUS_VPCFE_BR_S_1_I_0, + DEBUGBUS_VPCFE_BV_S_1_I_0, + DEBUGBUS_VPCBE_BR_S_1_I_0, + DEBUGBUS_VPCBE_BV_S_1_I_0, + DEBUGBUS_CCHE_GC_S_1_I_0, + DEBUGBUS_DBGC_GC_S_1_I_0, + DEBUGBUS_LARC_GC_S_1_I_0, + DEBUGBUS_RBBM_GC_S_1_I_0, + DEBUGBUS_CCRE_GC_S_1_I_0, + DEBUGBUS_CGC_GC_S_1_I_0, + DEBUGBUS_GMU_GC_S_1_I_0, + DEBUGBUS_SLICE_GC_S_1_I_0, + DEBUGBUS_HLSQ_SPTP_STAR_GC_S_1_I_0, + DEBUGBUS_USP_GC_S_1_I_0, + DEBUGBUS_USP_GC_S_1_I_1, + DEBUGBUS_USPTP_GC_S_1_I_0, + DEBUGBUS_USPTP_GC_S_1_I_1, + DEBUGBUS_USPTP_GC_S_1_I_2, + DEBUGBUS_USPTP_GC_S_1_I_3, + DEBUGBUS_TP_GC_S_1_I_0, + DEBUGBUS_TP_GC_S_1_I_1, + DEBUGBUS_TP_GC_S_1_I_2, + DEBUGBUS_TP_GC_S_1_I_3, + DEBUGBUS_RB_GC_S_1_I_0, + DEBUGBUS_RB_GC_S_1_I_1, + DEBUGBUS_CCU_GC_S_1_I_0, + DEBUGBUS_CCU_GC_S_1_I_1, + DEBUGBUS_HLSQ_GC_S_1_I_0, + DEBUGBUS_HLSQ_GC_S_1_I_1, + DEBUGBUS_VFD_GC_S_1_I_0, + DEBUGBUS_VFD_GC_S_1_I_1, +}; + +/* + * Block : ['BROADCAST', 'GRAS', 'PC'] + * Block : ['RBBM', 'RDVM', 'UCHE'] + * Block : ['VFD', 'VPC', 'VSC'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 121 (Regs:1162) + */ +static const u32 gen8_6_0_gpu_registers[] = { + 0x00000, 0x00000, 0x00002, 0x00002, 0x00008, 0x0000d, 0x00010, 0x00013, + 0x00015, 0x00016, 0x00018, 0x00018, 0x0001a, 0x0001a, 0x0001c, 0x0001c, + 0x0001e, 0x0001e, 0x00028, 0x0002b, 0x0002d, 0x00039, 0x00040, 0x00053, + 0x00062, 0x00066, 0x00069, 0x0006e, 0x00071, 0x00072, 0x00074, 0x00074, + 0x00076, 0x0007c, 0x0007f, 0x0009a, 0x0009d, 0x000af, 0x000b2, 0x000d4, + 0x000d7, 0x000e2, 0x000e5, 0x000e6, 0x000e9, 0x000f1, 0x000f4, 0x000f6, + 0x000f9, 0x00108, 0x0010b, 0x0010e, 0x00111, 0x00111, 0x00114, 0x0011c, + 0x0011f, 0x00121, 0x00125, 0x00125, 0x00127, 0x00127, 0x00129, 0x00129, + 0x0012b, 0x00131, 0x00134, 0x00138, 0x0013a, 0x0013a, 0x0013c, 0x0013f, + 0x00142, 0x00150, 0x00153, 0x00155, 0x00158, 0x00159, 0x0015c, 0x0015c, + 0x00166, 0x00179, 0x0019e, 0x001a3, 0x001b0, 0x002c9, 0x002e2, 0x0036b, + 0x00380, 0x0039b, 0x003a4, 0x003ab, 0x003b4, 0x003c5, 0x003ce, 0x003cf, + 0x003e0, 0x003e0, 0x003f0, 0x003f0, 0x00440, 0x00444, 0x00460, 0x00460, + 0x00c02, 0x00c04, 0x00c06, 0x00c06, 0x00c10, 0x00cd9, 0x00ce0, 0x00d0c, + 0x00df0, 0x00df4, 0x00e01, 0x00e04, 0x00e06, 0x00e09, 0x00e0e, 0x00e13, + 0x00e15, 0x00e16, 0x00e20, 0x00e37, 0x0ec00, 0x0ec01, 0x0ec05, 0x0ec05, + 0x0ec07, 0x0ec07, 0x0ec0a, 0x0ec0a, 0x0ec12, 0x0ec12, 0x0ec26, 0x0ec28, + 0x0ec2b, 0x0ec2d, 0x0ec2f, 0x0ec2f, 0x0ec40, 0x0ec41, 0x0ec45, 0x0ec45, + 0x0ec47, 0x0ec47, 0x0ec4a, 0x0ec4a, 0x0ec52, 0x0ec52, 0x0ec66, 0x0ec68, + 0x0ec6b, 0x0ec6d, 0x0ec6f, 0x0ec6f, 0x0ec80, 0x0ec81, 0x0ec85, 0x0ec85, + 0x0ec87, 0x0ec87, 0x0ec8a, 0x0ec8a, 0x0ec92, 0x0ec92, 0x0eca6, 0x0eca8, + 0x0ecab, 0x0ecad, 0x0ecaf, 0x0ecaf, 0x0ecc0, 0x0ecc1, 0x0ecc5, 0x0ecc5, + 0x0ecc7, 0x0ecc7, 0x0ecca, 0x0ecca, 0x0ecd2, 0x0ecd2, 0x0ece6, 0x0ece8, + 0x0eceb, 0x0eced, 0x0ecef, 0x0ecef, 0x0ed00, 0x0ed01, 0x0ed05, 0x0ed05, + 0x0ed07, 0x0ed07, 0x0ed0a, 0x0ed0a, 0x0ed12, 0x0ed12, 0x0ed26, 0x0ed28, + 0x0ed2b, 0x0ed2d, 0x0ed2f, 0x0ed2f, 0x0ed40, 0x0ed41, 0x0ed45, 0x0ed45, + 0x0ed47, 0x0ed47, 0x0ed4a, 0x0ed4a, 0x0ed52, 0x0ed52, 0x0ed66, 0x0ed68, + 0x0ed6b, 0x0ed6d, 0x0ed6f, 0x0ed6f, 0x0ed80, 0x0ed81, 0x0ed85, 0x0ed85, + 0x0ed87, 0x0ed87, 0x0ed8a, 0x0ed8a, 0x0ed92, 0x0ed92, 0x0eda6, 0x0eda8, + 0x0edab, 0x0edad, 0x0edaf, 0x0edaf, 0x0f400, 0x0f400, 0x0f800, 0x0f803, + 0x0fc00, 0x0fc01, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_gpu_registers), 8)); + +/* + * Block : ['GMUAO', 'GMUCX', 'GMUCX_RAM'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 160 (Regs:616) + */ +static const u32 gen8_6_0_gmu_registers[] = { + 0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403, + 0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03, + 0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403, + 0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03, + 0x1f400, 0x1f40b, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507, + 0x1f509, 0x1f50b, 0x1f700, 0x1f701, 0x1f704, 0x1f706, 0x1f708, 0x1f709, + 0x1f70c, 0x1f70d, 0x1f710, 0x1f711, 0x1f713, 0x1f716, 0x1f718, 0x1f71d, + 0x1f720, 0x1f725, 0x1f729, 0x1f729, 0x1f730, 0x1f747, 0x1f750, 0x1f75a, + 0x1f75c, 0x1f75c, 0x1f780, 0x1f781, 0x1f784, 0x1f78b, 0x1f790, 0x1f797, + 0x1f7a0, 0x1f7a7, 0x1f7b0, 0x1f7b7, 0x1f7e0, 0x1f7e1, 0x1f7e4, 0x1f7e5, + 0x1f7e8, 0x1f7e9, 0x1f7ec, 0x1f7ed, 0x1f800, 0x1f804, 0x1f807, 0x1f808, + 0x1f80b, 0x1f80c, 0x1f80f, 0x1f80f, 0x1f811, 0x1f811, 0x1f813, 0x1f817, + 0x1f819, 0x1f81c, 0x1f824, 0x1f830, 0x1f840, 0x1f842, 0x1f848, 0x1f848, + 0x1f84c, 0x1f84c, 0x1f850, 0x1f850, 0x1f858, 0x1f859, 0x1f868, 0x1f869, + 0x1f878, 0x1f883, 0x1f930, 0x1f931, 0x1f934, 0x1f935, 0x1f938, 0x1f939, + 0x1f93c, 0x1f93d, 0x1f940, 0x1f941, 0x1f943, 0x1f943, 0x1f948, 0x1f94a, + 0x1f94f, 0x1f951, 0x1f954, 0x1f955, 0x1f95d, 0x1f95d, 0x1f962, 0x1f96b, + 0x1f970, 0x1f970, 0x1f97c, 0x1f97e, 0x1f980, 0x1f981, 0x1f984, 0x1f986, + 0x1f992, 0x1f993, 0x1f996, 0x1f99e, 0x1f9c0, 0x1f9cf, 0x1f9f0, 0x1f9f1, + 0x1f9f8, 0x1f9fa, 0x1f9fc, 0x1f9fc, 0x1fa00, 0x1fa03, 0x1fc00, 0x1fc01, + 0x1fc04, 0x1fc07, 0x1fc10, 0x1fc10, 0x1fc14, 0x1fc14, 0x1fc18, 0x1fc19, + 0x1fc20, 0x1fc20, 0x1fc24, 0x1fc26, 0x1fc30, 0x1fc33, 0x1fc38, 0x1fc3b, + 0x1fc40, 0x1fc49, 0x1fc50, 0x1fc59, 0x1fc60, 0x1fc7f, 0x1fca0, 0x1fcef, + 0x20000, 0x20007, 0x20010, 0x20015, 0x20018, 0x2001a, 0x2001c, 0x2001d, + 0x20020, 0x20021, 0x20024, 0x20025, 0x2002a, 0x2002c, 0x20030, 0x20031, + 0x20034, 0x20036, 0x20080, 0x20087, 0x20300, 0x20301, 0x20304, 0x20305, + 0x20308, 0x2030c, 0x20310, 0x20314, 0x20318, 0x2031a, 0x20320, 0x20322, + 0x20324, 0x20326, 0x20328, 0x2032a, 0x20330, 0x20333, 0x20338, 0x20338, + 0x20340, 0x20350, 0x20354, 0x2035b, 0x20360, 0x20367, 0x20370, 0x20377, + 0x23801, 0x23801, 0x23803, 0x23803, 0x23805, 0x23805, 0x23807, 0x23807, + 0x23809, 0x23809, 0x2380b, 0x2380b, 0x2380d, 0x2380d, 0x2380f, 0x2380f, + 0x23811, 0x23811, 0x23813, 0x23813, 0x23815, 0x23815, 0x23817, 0x23817, + 0x23819, 0x23819, 0x2381b, 0x2381b, 0x2381d, 0x2381d, 0x2381f, 0x23820, + 0x23822, 0x23822, 0x23824, 0x23824, 0x23826, 0x23826, 0x23828, 0x23828, + 0x2382a, 0x2382a, 0x2382c, 0x2382c, 0x2382e, 0x2382e, 0x23830, 0x23830, + 0x23832, 0x23832, 0x23834, 0x23834, 0x23836, 0x23836, 0x23838, 0x23838, + 0x2383a, 0x2383a, 0x2383c, 0x2383c, 0x2383e, 0x2383e, 0x23840, 0x23847, + 0x23b00, 0x23b01, 0x23b03, 0x23b03, 0x23b05, 0x23b0e, 0x23b10, 0x23b13, + 0x23b15, 0x23b16, 0x23b28, 0x23b28, 0x23b30, 0x23b30, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_gmu_registers), 8)); + +/* + * Block : ['BROADCAST', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF', 'GMUAO'] + * Block : ['GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM'] + * Block : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * pairs : 21 (Regs:301) + */ +static const u32 gen8_6_0_non_context_slice_pipe_br_registers[] = { + 0x08600, 0x08602, 0x08610, 0x08613, 0x08700, 0x08704, 0x08710, 0x08713, + 0x08720, 0x08723, 0x08730, 0x08733, 0x08740, 0x08744, 0x09680, 0x09681, + 0x09690, 0x0969b, 0x09740, 0x09745, 0x09750, 0x0975b, 0x09770, 0x097ef, + 0x09f00, 0x09f0f, 0x09f20, 0x09f23, 0x09f30, 0x09f31, 0x0a600, 0x0a600, + 0x0a603, 0x0a603, 0x0a610, 0x0a61f, 0x0a630, 0x0a632, 0x0a638, 0x0a63c, + 0x0a640, 0x0a67f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_non_context_slice_pipe_br_registers), 8)); + +/* + * Block : ['BROADCAST', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF', 'GMUAO'] + * Block : ['GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM'] + * Block : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_NONE + * pairs : 21 (Regs:301) + */ +static const u32 gen8_6_0_non_context_slice_pipe_bv_registers[] = { + 0x08600, 0x08602, 0x08610, 0x08613, 0x08700, 0x08704, 0x08710, 0x08713, + 0x08720, 0x08723, 0x08730, 0x08733, 0x08740, 0x08744, 0x09680, 0x09681, + 0x09690, 0x0969b, 0x09740, 0x09745, 0x09750, 0x0975b, 0x09770, 0x097ef, + 0x09f00, 0x09f0f, 0x09f20, 0x09f23, 0x09f30, 0x09f31, 0x0a600, 0x0a600, + 0x0a603, 0x0a603, 0x0a610, 0x0a61f, 0x0a630, 0x0a632, 0x0a638, 0x0a63c, + 0x0a640, 0x0a67f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_non_context_slice_pipe_bv_registers), 8)); + +static struct gen8_cluster_registers gen8_6_0_mvc_clusters[] = { + { CLUSTER_NONE, UNSLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_pipe_br_registers, }, + { CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_6_0_non_context_slice_pipe_br_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_BV, STATE_NON_CONTEXT, + gen8_0_0_non_context_pipe_bv_registers, }, + { CLUSTER_NONE, SLICE, PIPE_BV, STATE_NON_CONTEXT, + gen8_6_0_non_context_slice_pipe_bv_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_LPAC, STATE_NON_CONTEXT, + gen8_0_0_non_context_pipe_lpac_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_rb_pipe_br_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_rb_slice_pipe_br_rac_registers, &gen8_0_0_rb_rac_sel, }, + { CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_rb_slice_pipe_br_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rac_registers, &gen8_0_0_rb_rac_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rac_registers, &gen8_0_0_rb_rac_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_gras_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_gras_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_gras_slice_pipe_br_cluster_gras_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_gras_slice_pipe_br_cluster_gras_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_gras_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_gras_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_gras_slice_pipe_bv_cluster_gras_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_gras_slice_pipe_bv_cluster_gras_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_pc_pipe_br_cluster_fe_us_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_pc_pipe_br_cluster_fe_us_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_pc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_pc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_pc_pipe_bv_cluster_fe_us_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_pc_pipe_bv_cluster_fe_us_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_pc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_pc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vfd_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vfd_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vfd_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vfd_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_pipe_br_cluster_vpc_us_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_pipe_br_cluster_vpc_us_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_ps_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_ps_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_pipe_bv_cluster_vpc_us_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_pipe_bv_cluster_vpc_us_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_ps_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_ps_registers, }, +}; + +static struct gen8_reg_list gen8_6_0_reg_list[] = { + { UNSLICE, gen8_6_0_gpu_registers }, + { SLICE, gen8_0_0_gpu_slice_registers }, + { UNSLICE, gen8_0_0_dbgc_registers }, + { SLICE, gen8_0_0_dbgc_slice_registers }, + { UNSLICE, gen8_0_0_cx_dbgc_registers }, + { UNSLICE, NULL}, +}; + +static struct gen8_reg_list gen8_6_0_ahb_registers[] = { + { UNSLICE, gen8_0_0_gbif_registers }, +}; + +/* + * Block : ['GPU_CC_GPU_CC_REG'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 30 (Regs:128) + */ +static const u32 gen8_6_0_gpu_cc_gpu_cc_reg_registers[] = { + 0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, + 0x26400, 0x26406, 0x26415, 0x26418, 0x2641c, 0x2641d, 0x2641f, 0x26437, + 0x26439, 0x2643a, 0x2643c, 0x2643f, 0x26443, 0x26444, 0x26478, 0x2647a, + 0x26489, 0x2648a, 0x2649c, 0x2649e, 0x264a0, 0x264a1, 0x264c5, 0x264c7, + 0x264e8, 0x264ea, 0x264f9, 0x264fc, 0x2650b, 0x2650b, 0x2651c, 0x2651e, + 0x26540, 0x2654b, 0x26554, 0x26556, 0x26558, 0x2655c, 0x2655e, 0x2655f, + 0x26563, 0x26563, 0x2656d, 0x26573, 0x26576, 0x26576, 0x26578, 0x2657a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_gpu_cc_gpu_cc_reg_registers), 8)); + +/* + * Block : ['GPU_CC_PLL0_CM_PLL_LUCID_OLE'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 1 (Regs:16) + */ +static const u32 gen8_6_0_gpu_cc_pll0_cm_pll_lucid_ole_registers[] = { + 0x24000, 0x2400f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_gpu_cc_pll0_cm_pll_lucid_ole_registers), 8)); + +/* + * Block : ['ACD_ACD'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 10 (Regs:53) + */ +static const u32 gen8_6_0_acd_acd_registers[] = { + 0x1a400, 0x1a416, 0x1a420, 0x1a42d, 0x1a430, 0x1a431, 0x1a435, 0x1a435, + 0x1a437, 0x1a437, 0x1a43a, 0x1a43a, 0x1a442, 0x1a442, 0x1a456, 0x1a458, + 0x1a45b, 0x1a45d, 0x1a45f, 0x1a462, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_acd_acd_registers), 8)); + +/* + * Block : ['GX_CLKCTL_GX_CLKCTL_REG'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 12 (Regs:85) + */ +static const u32 gen8_6_0_gx_clkctl_gx_clkctl_reg_registers[] = { + 0x1a000, 0x1a004, 0x1a008, 0x1a012, 0x1a014, 0x1a014, 0x1a017, 0x1a017, + 0x1a019, 0x1a01c, 0x1a022, 0x1a022, 0x1a024, 0x1a029, 0x1a03f, 0x1a05d, + 0x1a060, 0x1a063, 0x1a065, 0x1a066, 0x1a068, 0x1a076, 0x1a078, 0x1a07b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_gx_clkctl_gx_clkctl_reg_registers), 8)); + +/* + * Block : ['GX_CLKCTL_PLL0_CM_PLL_LUCID_OLE'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 1 (Regs:16) + */ +static const u32 gen8_6_0_gx_clkctl_pll0_cm_pll_lucid_ole_registers[] = { + 0x19000, 0x1900f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_gx_clkctl_pll0_cm_pll_lucid_ole_registers), 8)); + +/* + * Block : ['CPR'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 22 (Regs:504) + */ +static const u32 gen8_6_0_cpr_registers[] = { + 0x26800, 0x26805, 0x26808, 0x2680d, 0x26814, 0x26815, 0x2681c, 0x2681c, + 0x26820, 0x26839, 0x26840, 0x26841, 0x26848, 0x26849, 0x26850, 0x26851, + 0x26880, 0x268a2, 0x26980, 0x269b0, 0x269c0, 0x269c2, 0x269c6, 0x269c8, + 0x269e0, 0x269ee, 0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, + 0x26a10, 0x26b0f, 0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, + 0x274ac, 0x274c4, 0x274c8, 0x274da, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_cpr_registers), 8)); + +static const u32 *gen8_6_0_external_core_regs[] = { + gen8_0_0_gdpm_lkg_registers, + gen8_0_0_gpu_cc_ahb2phy_broadcast_swman_registers, + gen8_0_0_gpu_cc_ahb2phy_swman_registers, + gen8_6_0_gpu_cc_gpu_cc_reg_registers, + gen8_6_0_gpu_cc_pll0_cm_pll_lucid_ole_registers, + gen8_6_0_cpr_registers, +}; + +static struct gen8_reg_list gen8_6_0_gmu_gx_registers[] = { + { UNSLICE, gen8_0_0_gmugx_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_swman_registers }, + { UNSLICE, gen8_6_0_gx_clkctl_pll0_cm_pll_lucid_ole_registers }, + { UNSLICE, gen8_6_0_gx_clkctl_gx_clkctl_reg_registers }, + { UNSLICE, gen8_6_0_acd_acd_registers }, + { SLICE, gen8_0_0_gmugx_slice_registers }, +}; +#endif /*_ADRENO_GEN8_6_0_SNAPSHOT_H */ diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 794a5405fc..acc4ffbf7f 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -7,6 +7,7 @@ #include "adreno.h" #include "adreno_gen8_0_0_snapshot.h" #include "adreno_gen8_3_0_snapshot.h" +#include "adreno_gen8_6_0_snapshot.h" #include "adreno_snapshot.h" static struct kgsl_memdesc *gen8_capturescript; @@ -80,6 +81,37 @@ const struct gen8_snapshot_block_list gen8_3_0_snapshot_block_list = { .mempool_index_registers_len = ARRAY_SIZE(gen8_0_0_cp_mempool_reg_list), }; +const struct gen8_snapshot_block_list gen8_6_0_snapshot_block_list = { + .pre_crashdumper_regs = gen8_6_0_ahb_registers, + .num_pre_crashdumper_regs = ARRAY_SIZE(gen8_6_0_ahb_registers), + .debugbus_blocks = gen8_6_0_debugbus_blocks, + .debugbus_blocks_len = ARRAY_SIZE(gen8_6_0_debugbus_blocks), + .gbif_debugbus_blocks = gen8_gbif_debugbus_blocks, + .gbif_debugbus_blocks_len = ARRAY_SIZE(gen8_gbif_debugbus_blocks), + .cx_debugbus_blocks = gen8_cx_debugbus_blocks, + .cx_debugbus_blocks_len = ARRAY_SIZE(gen8_cx_debugbus_blocks), + .external_core_regs = gen8_6_0_external_core_regs, + .num_external_core_regs = ARRAY_SIZE(gen8_6_0_external_core_regs), + .gmu_cx_unsliced_regs = gen8_6_0_gmu_registers, + .gmu_gx_regs = gen8_6_0_gmu_gx_registers, + .num_gmu_gx_regs = ARRAY_SIZE(gen8_6_0_gmu_gx_registers), + .rscc_regs = gen8_0_0_rscc_rsc_registers, + .reg_list = gen8_6_0_reg_list, + .cx_misc_regs = gen8_0_0_cx_misc_registers, + .shader_blocks = gen8_0_0_shader_blocks, + .num_shader_blocks = ARRAY_SIZE(gen8_0_0_shader_blocks), + .cp_clusters = gen8_0_0_cp_clusters, + .num_cp_clusters = ARRAY_SIZE(gen8_0_0_cp_clusters), + .clusters = gen8_6_0_mvc_clusters, + .num_clusters = ARRAY_SIZE(gen8_6_0_mvc_clusters), + .sptp_clusters = gen8_0_0_sptp_clusters, + .num_sptp_clusters = ARRAY_SIZE(gen8_0_0_sptp_clusters), + .index_registers = gen8_0_0_cp_indexed_reg_list, + .index_registers_len = ARRAY_SIZE(gen8_0_0_cp_indexed_reg_list), + .mempool_index_registers = gen8_0_0_cp_mempool_reg_list, + .mempool_index_registers_len = ARRAY_SIZE(gen8_0_0_cp_mempool_reg_list), +}; + #define GEN8_SP_READ_SEL_VAL(_sliceid, _location, _pipe, _statetype, _usptp, _sptp) \ (FIELD_PREP(GENMASK(25, 21), _sliceid) | \ FIELD_PREP(GENMASK(20, 18), _location) | \ From d4783ff9dc3c0f1827fef5a0585b2b3965497ce4 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 20 Sep 2024 02:18:05 +0530 Subject: [PATCH 0940/1016] kgsl: coresight: Add GX registers to IFPC power up register list Currently, GX coresight registers are not retained across IFPC. So, DBGC tracing is not functional when IFPC feature is enabled. Add these registers to the IFPC power up register list to address this limitation. Change-Id: I9797b5ec49fe37962ed234c02bbc60a051b546c7 Signed-off-by: Kamal Agrawal (cherry picked from commit 8b4bd3251c1f7c664b4d50547bccf26d766a1592) --- adreno_a6xx.c | 9 +++++++++ adreno_coresight.c | 33 +++++++++++++++++++++++---------- adreno_coresight.h | 16 +++++++++++++++- adreno_gen7.c | 7 +++++++ adreno_gen8.c | 7 +++++++ 5 files changed, 61 insertions(+), 11 deletions(-) diff --git a/adreno_a6xx.c b/adreno_a6xx.c index ce98a9c2e7..7682712065 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -470,6 +470,15 @@ static void a6xx_patch_pwrup_reglist(struct adreno_device *adreno_dev) } lock->list_length += reglist[i].count * 2; + + if ((r == a6xx_ifpc_pwrup_reglist) || (r == a650_ifpc_pwrup_reglist)) { + u32 cs_len = adreno_coresight_patch_pwrup_reglist(adreno_dev, dest); + + cs_len = cs_len * 2; + lock->list_length += cs_len; + list_offset += cs_len; + dest += cs_len; + } } if (adreno_is_a630(adreno_dev)) { diff --git a/adreno_coresight.c b/adreno_coresight.c index 684e541d59..8bda0d4b8a 100644 --- a/adreno_coresight.c +++ b/adreno_coresight.c @@ -57,19 +57,15 @@ ssize_t adreno_coresight_store_register(struct device *dev, return ret; mutex_lock(&device->mutex); - /* Ignore writes while coresight is off */ - if (!adreno_csdev->enabled) - goto out; - - cattr->reg->value = val; - if (!adreno_active_count_get(adreno_dev)) { - kgsl_regwrite(device, cattr->reg->offset, cattr->reg->value); - adreno_active_count_put(adreno_dev); + if (!adreno_csdev->enabled) { + mutex_unlock(&device->mutex); + return size; } - -out: + adreno_dev->patch_reglist = false; mutex_unlock(&device->mutex); + + adreno_power_cycle_u32(adreno_dev, &cattr->reg->value, val); return size; } @@ -142,6 +138,23 @@ static void _adreno_coresight_set(struct adreno_device *adreno_dev, coresight->registers[i].value); } +u32 adreno_coresight_patch_pwrup_reglist(struct adreno_device *adreno_dev, u32 *dest) +{ + struct adreno_coresight_device *adreno_csdev = &adreno_dev->gx_coresight; + const struct adreno_coresight *coresight = adreno_csdev->coresight; + int i; + + if (IS_ERR_OR_NULL(adreno_csdev->dev) || !adreno_csdev->enabled) + return 0; + + for (i = 0; i < coresight->count; i++) { + *dest++ = coresight->registers[i].offset; + *dest++ = coresight->registers[i].value; + } + + return coresight->count; +} + /* Generic function to enable coresight debug bus on adreno devices */ static int _adreno_coresight_enable(struct coresight_device *csdev, struct perf_event *event, u32 mode) diff --git a/adreno_coresight.h b/adreno_coresight.h index 7a4203f436..626387eb51 100644 --- a/adreno_coresight.h +++ b/adreno_coresight.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2019, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ADRENO_CORESIGHT_H_ @@ -149,6 +149,16 @@ void adreno_coresight_stop(struct adreno_device *adreno_dev); * Destroy any active coresight devices. */ void adreno_coresight_remove(struct adreno_device *adreno_dev); + +/** + * adreno_coresight_patch_pwrup_reglist - Patch power up register list with coresight registers + * @adreno_dev: An Adreno GPU device handle + * @dest: Pointer to power up register list buffer + * + * Patch IFPC power up register list with GX coresight registers. + * Return: Number of GX coresight registers added to power up list + */ +u32 adreno_coresight_patch_pwrup_reglist(struct adreno_device *adreno_dev, u32 *dest); #else static inline void adreno_coresight_add_device(struct kgsl_device *device, const char *name, @@ -160,5 +170,9 @@ static inline void adreno_coresight_add_device(struct kgsl_device *device, static inline void adreno_coresight_start(struct adreno_device *adreno_dev) { } static inline void adreno_coresight_stop(struct adreno_device *adreno_dev) { } static inline void adreno_coresight_remove(struct adreno_device *adreno_dev) { } +static inline u32 adreno_coresight_patch_pwrup_reglist(struct adreno_device *adreno_dev) +{ + return 0; +} #endif #endif diff --git a/adreno_gen7.c b/adreno_gen7.c index 7a8d3aae74..098fa012b6 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -627,6 +627,13 @@ static void gen7_patch_pwrup_reglist(struct adreno_device *adreno_dev) *dest++ = r[j]; kgsl_regread(device, r[j], dest++); } + + if ((r == gen7_ifpc_pwrup_reglist) || (r == gen7_0_0_ifpc_pwrup_reglist)) { + u32 cs_len = adreno_coresight_patch_pwrup_reglist(adreno_dev, dest); + + lock->ifpc_list_len += cs_len; + dest += (cs_len * 2); + } } /* diff --git a/adreno_gen8.c b/adreno_gen8.c index d09c25e626..91704d7004 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1074,6 +1074,13 @@ static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev) kgsl_regread(device, r[j], dest++); } + if ((r == gen8_ifpc_pwrup_reglist) || (r == gen8_3_0_ifpc_pwrup_reglist)) { + u32 cs_len = adreno_coresight_patch_pwrup_reglist(adreno_dev, dest); + + lock->ifpc_list_len += cs_len; + dest += (cs_len * 2); + } + mutex_lock(&gen8_dev->nc_mutex); for (j = 0; j < nc_overrides[j].offset; j++) { unsigned long pipe = (unsigned long)nc_overrides[j].pipelines; From d23d2c7e9e84df9a6e54104f57e43e7506a20c81 Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Fri, 30 Aug 2024 16:50:21 +0530 Subject: [PATCH 0941/1016] kgsl: gen8: Add support for Gen8_6_0 GPU Add support for Gen8_6_0 GPU. Change-Id: If76b7cb03555e62017a172e17c124fe120b41894 Signed-off-by: SIVA MULLATI (cherry picked from commit 28a13081018e792b32d65be158990d50d6ee3aea) --- adreno-gpulist.h | 37 +++ adreno.h | 4 +- adreno_gen8.h | 6 +- adreno_gen8_6_0_snapshot.h | 478 +++++++++++++++++++++++++++++++++++++ adreno_gen8_snapshot.c | 32 +++ 5 files changed, 555 insertions(+), 2 deletions(-) create mode 100644 adreno_gen8_6_0_snapshot.h diff --git a/adreno-gpulist.h b/adreno-gpulist.h index ae34582bb5..1947baff1d 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3195,6 +3195,42 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_3_0 = { .ctxt_record_size = (4558 * SZ_1K), }; +extern const struct gen8_snapshot_block_list gen8_6_0_snapshot_block_list; + +static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN8_6_0, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen8-6-0", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | + ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE, + .gpudev = &adreno_gen8_hwsched_gpudev.base, + .perfcounters = &adreno_gen8_perfcounters, + .uche_gmem_alignment = SZ_64M, + .gmem_size = (SZ_2M + SZ_256K + SZ_128K), + .bus_width = 32, + .snapshot_size = SZ_8M, + .num_ddr_channels = 4, + }, + .aqefw_name = "gen80000_aqe.fw", + .sqefw_name = "gen80000_sqe.fw", + .gmufw_name = "gen80000_gmu.bin", + .zap_name = "gen80000_zap.mbn", + .ao_hwcg = gen8_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen8_ao_hwcg_regs), + .gbif = gen8_3_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen8_3_0_gbif_cx_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen8_0_0_protected_regs, + .nonctxt_regs = gen8_0_0_nonctxt_regs, + .highest_bank_bit = 16, + .gmu_hub_clk_freq = 200000000, + .gen8_snapshot_block_list = &gen8_6_0_snapshot_block_list, + .noc_timeout_us = 3410, /* 3.41 msec */ + .ctxt_record_size = (3372 * SZ_1K), +}; + static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_a306, /* Deprecated */ &adreno_gpu_core_a306a, /* Deprecated */ @@ -3252,4 +3288,5 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen8_0_1.base, &adreno_gpu_core_gen8_3_0.base, &adreno_gpu_core_gen8_4_0.base, + &adreno_gpu_core_gen8_6_0.base, }; diff --git a/adreno.h b/adreno.h index f5a04432c3..e0da437819 100644 --- a/adreno.h +++ b/adreno.h @@ -255,6 +255,7 @@ enum adreno_gpurev { ADRENO_REV_GEN8_0_1 = ADRENO_GPUREV_VALUE(8, 0, 1), ADRENO_REV_GEN8_3_0 = ADRENO_GPUREV_VALUE(8, 3, 0), ADRENO_REV_GEN8_4_0 = ADRENO_GPUREV_VALUE(8, 4, 0), + ADRENO_REV_GEN8_6_0 = ADRENO_GPUREV_VALUE(8, 6, 0), }; #define ADRENO_SOFT_FAULT BIT(0) @@ -1284,6 +1285,7 @@ ADRENO_TARGET(gen8_0_0, ADRENO_REV_GEN8_0_0) ADRENO_TARGET(gen8_0_1, ADRENO_REV_GEN8_0_1) ADRENO_TARGET(gen8_3_0, ADRENO_REV_GEN8_3_0) ADRENO_TARGET(gen8_4_0, ADRENO_REV_GEN8_4_0) +ADRENO_TARGET(gen8_6_0, ADRENO_REV_GEN8_6_0) static inline int adreno_is_gen7_9_x(struct adreno_device *adreno_dev) { @@ -1306,7 +1308,7 @@ static inline int adreno_is_gen7_2_x_family(struct adreno_device *adreno_dev) static inline int adreno_is_gen8_0_x_family(struct adreno_device *adreno_dev) { return adreno_is_gen8_0_0(adreno_dev) || adreno_is_gen8_0_1(adreno_dev) || - adreno_is_gen8_4_0(adreno_dev); + adreno_is_gen8_4_0(adreno_dev) || adreno_is_gen8_6_0(adreno_dev); } /* Gen7 targets which does not support concurrent binning */ diff --git a/adreno_gen8.h b/adreno_gen8.h index ffac83d5e3..fa8df15ed4 100644 --- a/adreno_gen8.h +++ b/adreno_gen8.h @@ -15,6 +15,7 @@ #define GEN8_0_0_NUM_PHYSICAL_SLICES 3 #define GEN8_3_0_NUM_PHYSICAL_SLICES 1 +#define GEN8_6_0_NUM_PHYSICAL_SLICES 2 /* Forward struct declaration */ struct gen8_snapshot_block_list; @@ -631,6 +632,9 @@ static inline u32 gen8_get_num_slices(struct adreno_device *adreno_dev) { if (adreno_is_gen8_3_0(adreno_dev)) return GEN8_3_0_NUM_PHYSICAL_SLICES; - return GEN8_0_0_NUM_PHYSICAL_SLICES; + else if (adreno_is_gen8_6_0(adreno_dev)) + return GEN8_6_0_NUM_PHYSICAL_SLICES; + else + return GEN8_0_0_NUM_PHYSICAL_SLICES; } #endif diff --git a/adreno_gen8_6_0_snapshot.h b/adreno_gen8_6_0_snapshot.h new file mode 100644 index 0000000000..be85fe01df --- /dev/null +++ b/adreno_gen8_6_0_snapshot.h @@ -0,0 +1,478 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#ifndef __ADRENO_GEN8_6_0_SNAPSHOT_H +#define __ADRENO_GEN8_6_0_SNAPSHOT_H + +#include "adreno_gen8_snapshot.h" +#include "adreno_gen8_0_0_snapshot.h" + +static const u32 gen8_6_0_debugbus_blocks[] = { + DEBUGBUS_GBIF_CX_GC_US_I_0, + DEBUGBUS_GMU_CX_GC_US_I_0, + DEBUGBUS_CX_GC_US_I_0, + DEBUGBUS_GBIF_GX_GC_US_I_0, + DEBUGBUS_GMU_GX_GC_US_I_0, + DEBUGBUS_DBGC_GC_US_I_0, + DEBUGBUS_RBBM_GC_US_I_0, + DEBUGBUS_LARC_GC_US_I_0, + DEBUGBUS_COM_GC_US_I_0, + DEBUGBUS_HLSQ_GC_US_I_0, + DEBUGBUS_CGC_GC_US_I_0, + DEBUGBUS_VSC_GC_US_I_0_0, + DEBUGBUS_VSC_GC_US_I_0_1, + DEBUGBUS_UFC_GC_US_I_0, + DEBUGBUS_UFC_GC_US_I_1, + DEBUGBUS_CP_GC_US_I_0_0, + DEBUGBUS_CP_GC_US_I_0_1, + DEBUGBUS_CP_GC_US_I_0_2, + DEBUGBUS_PC_BR_US_I_0, + DEBUGBUS_PC_BV_US_I_0, + DEBUGBUS_GPC_BR_US_I_0, + DEBUGBUS_GPC_BV_US_I_0, + DEBUGBUS_VPC_BR_US_I_0, + DEBUGBUS_VPC_BV_US_I_0, + DEBUGBUS_UCHE_WRAPPER_GC_US_I_0, + DEBUGBUS_UCHE_GC_US_I_0, + DEBUGBUS_UCHE_GC_US_I_1, + DEBUGBUS_CP_GC_S_0_I_0, + DEBUGBUS_PC_BR_S_0_I_0, + DEBUGBUS_PC_BV_S_0_I_0, + DEBUGBUS_TESS_GC_S_0_I_0, + DEBUGBUS_TSEFE_GC_S_0_I_0, + DEBUGBUS_TSEBE_GC_S_0_I_0, + DEBUGBUS_RAS_GC_S_0_I_0, + DEBUGBUS_LRZ_BR_S_0_I_0, + DEBUGBUS_LRZ_BV_S_0_I_0, + DEBUGBUS_VFDP_GC_S_0_I_0, + DEBUGBUS_GPC_BR_S_0_I_0, + DEBUGBUS_GPC_BV_S_0_I_0, + DEBUGBUS_VPCFE_BR_S_0_I_0, + DEBUGBUS_VPCFE_BV_S_0_I_0, + DEBUGBUS_VPCBE_BR_S_0_I_0, + DEBUGBUS_VPCBE_BV_S_0_I_0, + DEBUGBUS_CCHE_GC_S_0_I_0, + DEBUGBUS_DBGC_GC_S_0_I_0, + DEBUGBUS_LARC_GC_S_0_I_0, + DEBUGBUS_RBBM_GC_S_0_I_0, + DEBUGBUS_CCRE_GC_S_0_I_0, + DEBUGBUS_CGC_GC_S_0_I_0, + DEBUGBUS_GMU_GC_S_0_I_0, + DEBUGBUS_SLICE_GC_S_0_I_0, + DEBUGBUS_HLSQ_SPTP_STAR_GC_S_0_I_0, + DEBUGBUS_USP_GC_S_0_I_0, + DEBUGBUS_USP_GC_S_0_I_1, + DEBUGBUS_USPTP_GC_S_0_I_0, + DEBUGBUS_USPTP_GC_S_0_I_1, + DEBUGBUS_USPTP_GC_S_0_I_2, + DEBUGBUS_USPTP_GC_S_0_I_3, + DEBUGBUS_TP_GC_S_0_I_0, + DEBUGBUS_TP_GC_S_0_I_1, + DEBUGBUS_TP_GC_S_0_I_2, + DEBUGBUS_TP_GC_S_0_I_3, + DEBUGBUS_RB_GC_S_0_I_0, + DEBUGBUS_RB_GC_S_0_I_1, + DEBUGBUS_CCU_GC_S_0_I_0, + DEBUGBUS_CCU_GC_S_0_I_1, + DEBUGBUS_HLSQ_GC_S_0_I_0, + DEBUGBUS_HLSQ_GC_S_0_I_1, + DEBUGBUS_VFD_GC_S_0_I_0, + DEBUGBUS_VFD_GC_S_0_I_1, + DEBUGBUS_CP_GC_S_1_I_0, + DEBUGBUS_PC_BR_S_1_I_0, + DEBUGBUS_PC_BV_S_1_I_0, + DEBUGBUS_TESS_GC_S_1_I_0, + DEBUGBUS_TSEFE_GC_S_1_I_0, + DEBUGBUS_TSEBE_GC_S_1_I_0, + DEBUGBUS_RAS_GC_S_1_I_0, + DEBUGBUS_LRZ_BR_S_1_I_0, + DEBUGBUS_LRZ_BV_S_1_I_0, + DEBUGBUS_VFDP_GC_S_1_I_0, + DEBUGBUS_GPC_BR_S_1_I_0, + DEBUGBUS_GPC_BV_S_1_I_0, + DEBUGBUS_VPCFE_BR_S_1_I_0, + DEBUGBUS_VPCFE_BV_S_1_I_0, + DEBUGBUS_VPCBE_BR_S_1_I_0, + DEBUGBUS_VPCBE_BV_S_1_I_0, + DEBUGBUS_CCHE_GC_S_1_I_0, + DEBUGBUS_DBGC_GC_S_1_I_0, + DEBUGBUS_LARC_GC_S_1_I_0, + DEBUGBUS_RBBM_GC_S_1_I_0, + DEBUGBUS_CCRE_GC_S_1_I_0, + DEBUGBUS_CGC_GC_S_1_I_0, + DEBUGBUS_GMU_GC_S_1_I_0, + DEBUGBUS_SLICE_GC_S_1_I_0, + DEBUGBUS_HLSQ_SPTP_STAR_GC_S_1_I_0, + DEBUGBUS_USP_GC_S_1_I_0, + DEBUGBUS_USP_GC_S_1_I_1, + DEBUGBUS_USPTP_GC_S_1_I_0, + DEBUGBUS_USPTP_GC_S_1_I_1, + DEBUGBUS_USPTP_GC_S_1_I_2, + DEBUGBUS_USPTP_GC_S_1_I_3, + DEBUGBUS_TP_GC_S_1_I_0, + DEBUGBUS_TP_GC_S_1_I_1, + DEBUGBUS_TP_GC_S_1_I_2, + DEBUGBUS_TP_GC_S_1_I_3, + DEBUGBUS_RB_GC_S_1_I_0, + DEBUGBUS_RB_GC_S_1_I_1, + DEBUGBUS_CCU_GC_S_1_I_0, + DEBUGBUS_CCU_GC_S_1_I_1, + DEBUGBUS_HLSQ_GC_S_1_I_0, + DEBUGBUS_HLSQ_GC_S_1_I_1, + DEBUGBUS_VFD_GC_S_1_I_0, + DEBUGBUS_VFD_GC_S_1_I_1, +}; + +/* + * Block : ['BROADCAST', 'GRAS', 'PC'] + * Block : ['RBBM', 'RDVM', 'UCHE'] + * Block : ['VFD', 'VPC', 'VSC'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 121 (Regs:1162) + */ +static const u32 gen8_6_0_gpu_registers[] = { + 0x00000, 0x00000, 0x00002, 0x00002, 0x00008, 0x0000d, 0x00010, 0x00013, + 0x00015, 0x00016, 0x00018, 0x00018, 0x0001a, 0x0001a, 0x0001c, 0x0001c, + 0x0001e, 0x0001e, 0x00028, 0x0002b, 0x0002d, 0x00039, 0x00040, 0x00053, + 0x00062, 0x00066, 0x00069, 0x0006e, 0x00071, 0x00072, 0x00074, 0x00074, + 0x00076, 0x0007c, 0x0007f, 0x0009a, 0x0009d, 0x000af, 0x000b2, 0x000d4, + 0x000d7, 0x000e2, 0x000e5, 0x000e6, 0x000e9, 0x000f1, 0x000f4, 0x000f6, + 0x000f9, 0x00108, 0x0010b, 0x0010e, 0x00111, 0x00111, 0x00114, 0x0011c, + 0x0011f, 0x00121, 0x00125, 0x00125, 0x00127, 0x00127, 0x00129, 0x00129, + 0x0012b, 0x00131, 0x00134, 0x00138, 0x0013a, 0x0013a, 0x0013c, 0x0013f, + 0x00142, 0x00150, 0x00153, 0x00155, 0x00158, 0x00159, 0x0015c, 0x0015c, + 0x00166, 0x00179, 0x0019e, 0x001a3, 0x001b0, 0x002c9, 0x002e2, 0x0036b, + 0x00380, 0x0039b, 0x003a4, 0x003ab, 0x003b4, 0x003c5, 0x003ce, 0x003cf, + 0x003e0, 0x003e0, 0x003f0, 0x003f0, 0x00440, 0x00444, 0x00460, 0x00460, + 0x00c02, 0x00c04, 0x00c06, 0x00c06, 0x00c10, 0x00cd9, 0x00ce0, 0x00d0c, + 0x00df0, 0x00df4, 0x00e01, 0x00e04, 0x00e06, 0x00e09, 0x00e0e, 0x00e13, + 0x00e15, 0x00e16, 0x00e20, 0x00e37, 0x0ec00, 0x0ec01, 0x0ec05, 0x0ec05, + 0x0ec07, 0x0ec07, 0x0ec0a, 0x0ec0a, 0x0ec12, 0x0ec12, 0x0ec26, 0x0ec28, + 0x0ec2b, 0x0ec2d, 0x0ec2f, 0x0ec2f, 0x0ec40, 0x0ec41, 0x0ec45, 0x0ec45, + 0x0ec47, 0x0ec47, 0x0ec4a, 0x0ec4a, 0x0ec52, 0x0ec52, 0x0ec66, 0x0ec68, + 0x0ec6b, 0x0ec6d, 0x0ec6f, 0x0ec6f, 0x0ec80, 0x0ec81, 0x0ec85, 0x0ec85, + 0x0ec87, 0x0ec87, 0x0ec8a, 0x0ec8a, 0x0ec92, 0x0ec92, 0x0eca6, 0x0eca8, + 0x0ecab, 0x0ecad, 0x0ecaf, 0x0ecaf, 0x0ecc0, 0x0ecc1, 0x0ecc5, 0x0ecc5, + 0x0ecc7, 0x0ecc7, 0x0ecca, 0x0ecca, 0x0ecd2, 0x0ecd2, 0x0ece6, 0x0ece8, + 0x0eceb, 0x0eced, 0x0ecef, 0x0ecef, 0x0ed00, 0x0ed01, 0x0ed05, 0x0ed05, + 0x0ed07, 0x0ed07, 0x0ed0a, 0x0ed0a, 0x0ed12, 0x0ed12, 0x0ed26, 0x0ed28, + 0x0ed2b, 0x0ed2d, 0x0ed2f, 0x0ed2f, 0x0ed40, 0x0ed41, 0x0ed45, 0x0ed45, + 0x0ed47, 0x0ed47, 0x0ed4a, 0x0ed4a, 0x0ed52, 0x0ed52, 0x0ed66, 0x0ed68, + 0x0ed6b, 0x0ed6d, 0x0ed6f, 0x0ed6f, 0x0ed80, 0x0ed81, 0x0ed85, 0x0ed85, + 0x0ed87, 0x0ed87, 0x0ed8a, 0x0ed8a, 0x0ed92, 0x0ed92, 0x0eda6, 0x0eda8, + 0x0edab, 0x0edad, 0x0edaf, 0x0edaf, 0x0f400, 0x0f400, 0x0f800, 0x0f803, + 0x0fc00, 0x0fc01, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_gpu_registers), 8)); + +/* + * Block : ['GMUAO', 'GMUCX', 'GMUCX_RAM'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 160 (Regs:616) + */ +static const u32 gen8_6_0_gmu_registers[] = { + 0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403, + 0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03, + 0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403, + 0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03, + 0x1f400, 0x1f40b, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507, + 0x1f509, 0x1f50b, 0x1f700, 0x1f701, 0x1f704, 0x1f706, 0x1f708, 0x1f709, + 0x1f70c, 0x1f70d, 0x1f710, 0x1f711, 0x1f713, 0x1f716, 0x1f718, 0x1f71d, + 0x1f720, 0x1f725, 0x1f729, 0x1f729, 0x1f730, 0x1f747, 0x1f750, 0x1f75a, + 0x1f75c, 0x1f75c, 0x1f780, 0x1f781, 0x1f784, 0x1f78b, 0x1f790, 0x1f797, + 0x1f7a0, 0x1f7a7, 0x1f7b0, 0x1f7b7, 0x1f7e0, 0x1f7e1, 0x1f7e4, 0x1f7e5, + 0x1f7e8, 0x1f7e9, 0x1f7ec, 0x1f7ed, 0x1f800, 0x1f804, 0x1f807, 0x1f808, + 0x1f80b, 0x1f80c, 0x1f80f, 0x1f80f, 0x1f811, 0x1f811, 0x1f813, 0x1f817, + 0x1f819, 0x1f81c, 0x1f824, 0x1f830, 0x1f840, 0x1f842, 0x1f848, 0x1f848, + 0x1f84c, 0x1f84c, 0x1f850, 0x1f850, 0x1f858, 0x1f859, 0x1f868, 0x1f869, + 0x1f878, 0x1f883, 0x1f930, 0x1f931, 0x1f934, 0x1f935, 0x1f938, 0x1f939, + 0x1f93c, 0x1f93d, 0x1f940, 0x1f941, 0x1f943, 0x1f943, 0x1f948, 0x1f94a, + 0x1f94f, 0x1f951, 0x1f954, 0x1f955, 0x1f95d, 0x1f95d, 0x1f962, 0x1f96b, + 0x1f970, 0x1f970, 0x1f97c, 0x1f97e, 0x1f980, 0x1f981, 0x1f984, 0x1f986, + 0x1f992, 0x1f993, 0x1f996, 0x1f99e, 0x1f9c0, 0x1f9cf, 0x1f9f0, 0x1f9f1, + 0x1f9f8, 0x1f9fa, 0x1f9fc, 0x1f9fc, 0x1fa00, 0x1fa03, 0x1fc00, 0x1fc01, + 0x1fc04, 0x1fc07, 0x1fc10, 0x1fc10, 0x1fc14, 0x1fc14, 0x1fc18, 0x1fc19, + 0x1fc20, 0x1fc20, 0x1fc24, 0x1fc26, 0x1fc30, 0x1fc33, 0x1fc38, 0x1fc3b, + 0x1fc40, 0x1fc49, 0x1fc50, 0x1fc59, 0x1fc60, 0x1fc7f, 0x1fca0, 0x1fcef, + 0x20000, 0x20007, 0x20010, 0x20015, 0x20018, 0x2001a, 0x2001c, 0x2001d, + 0x20020, 0x20021, 0x20024, 0x20025, 0x2002a, 0x2002c, 0x20030, 0x20031, + 0x20034, 0x20036, 0x20080, 0x20087, 0x20300, 0x20301, 0x20304, 0x20305, + 0x20308, 0x2030c, 0x20310, 0x20314, 0x20318, 0x2031a, 0x20320, 0x20322, + 0x20324, 0x20326, 0x20328, 0x2032a, 0x20330, 0x20333, 0x20338, 0x20338, + 0x20340, 0x20350, 0x20354, 0x2035b, 0x20360, 0x20367, 0x20370, 0x20377, + 0x23801, 0x23801, 0x23803, 0x23803, 0x23805, 0x23805, 0x23807, 0x23807, + 0x23809, 0x23809, 0x2380b, 0x2380b, 0x2380d, 0x2380d, 0x2380f, 0x2380f, + 0x23811, 0x23811, 0x23813, 0x23813, 0x23815, 0x23815, 0x23817, 0x23817, + 0x23819, 0x23819, 0x2381b, 0x2381b, 0x2381d, 0x2381d, 0x2381f, 0x23820, + 0x23822, 0x23822, 0x23824, 0x23824, 0x23826, 0x23826, 0x23828, 0x23828, + 0x2382a, 0x2382a, 0x2382c, 0x2382c, 0x2382e, 0x2382e, 0x23830, 0x23830, + 0x23832, 0x23832, 0x23834, 0x23834, 0x23836, 0x23836, 0x23838, 0x23838, + 0x2383a, 0x2383a, 0x2383c, 0x2383c, 0x2383e, 0x2383e, 0x23840, 0x23847, + 0x23b00, 0x23b01, 0x23b03, 0x23b03, 0x23b05, 0x23b0e, 0x23b10, 0x23b13, + 0x23b15, 0x23b16, 0x23b28, 0x23b28, 0x23b30, 0x23b30, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_gmu_registers), 8)); + +/* + * Block : ['BROADCAST', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF', 'GMUAO'] + * Block : ['GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM'] + * Block : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC'] + * REGION : SLICE + * Pipeline: PIPE_BR + * Cluster : CLUSTER_NONE + * pairs : 21 (Regs:301) + */ +static const u32 gen8_6_0_non_context_slice_pipe_br_registers[] = { + 0x08600, 0x08602, 0x08610, 0x08613, 0x08700, 0x08704, 0x08710, 0x08713, + 0x08720, 0x08723, 0x08730, 0x08733, 0x08740, 0x08744, 0x09680, 0x09681, + 0x09690, 0x0969b, 0x09740, 0x09745, 0x09750, 0x0975b, 0x09770, 0x097ef, + 0x09f00, 0x09f0f, 0x09f20, 0x09f23, 0x09f30, 0x09f31, 0x0a600, 0x0a600, + 0x0a603, 0x0a603, 0x0a610, 0x0a61f, 0x0a630, 0x0a632, 0x0a638, 0x0a63c, + 0x0a640, 0x0a67f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_non_context_slice_pipe_br_registers), 8)); + +/* + * Block : ['BROADCAST', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF', 'GMUAO'] + * Block : ['GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM'] + * Block : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC'] + * REGION : SLICE + * Pipeline: PIPE_BV + * Cluster : CLUSTER_NONE + * pairs : 21 (Regs:301) + */ +static const u32 gen8_6_0_non_context_slice_pipe_bv_registers[] = { + 0x08600, 0x08602, 0x08610, 0x08613, 0x08700, 0x08704, 0x08710, 0x08713, + 0x08720, 0x08723, 0x08730, 0x08733, 0x08740, 0x08744, 0x09680, 0x09681, + 0x09690, 0x0969b, 0x09740, 0x09745, 0x09750, 0x0975b, 0x09770, 0x097ef, + 0x09f00, 0x09f0f, 0x09f20, 0x09f23, 0x09f30, 0x09f31, 0x0a600, 0x0a600, + 0x0a603, 0x0a603, 0x0a610, 0x0a61f, 0x0a630, 0x0a632, 0x0a638, 0x0a63c, + 0x0a640, 0x0a67f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_non_context_slice_pipe_bv_registers), 8)); + +static struct gen8_cluster_registers gen8_6_0_mvc_clusters[] = { + { CLUSTER_NONE, UNSLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_pipe_br_registers, }, + { CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_6_0_non_context_slice_pipe_br_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_BV, STATE_NON_CONTEXT, + gen8_0_0_non_context_pipe_bv_registers, }, + { CLUSTER_NONE, SLICE, PIPE_BV, STATE_NON_CONTEXT, + gen8_6_0_non_context_slice_pipe_bv_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_LPAC, STATE_NON_CONTEXT, + gen8_0_0_non_context_pipe_lpac_registers, }, + { CLUSTER_NONE, UNSLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_rb_pipe_br_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_rb_slice_pipe_br_rac_registers, &gen8_0_0_rb_rac_sel, }, + { CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT, + gen8_0_0_non_context_rb_slice_pipe_br_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rac_registers, &gen8_0_0_rb_rac_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rac_registers, &gen8_0_0_rb_rac_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_rb_slice_pipe_br_cluster_ps_rbp_registers, &gen8_0_0_rb_rbp_sel, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_gras_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_gras_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_gras_slice_pipe_br_cluster_gras_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_gras_slice_pipe_br_cluster_gras_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_gras_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_gras_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_gras_slice_pipe_bv_cluster_gras_registers, }, + { CLUSTER_GRAS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_gras_slice_pipe_bv_cluster_gras_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_pc_pipe_br_cluster_fe_us_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_pc_pipe_br_cluster_fe_us_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_pc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_pc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_pc_pipe_bv_cluster_fe_us_registers, }, + { CLUSTER_FE_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_pc_pipe_bv_cluster_fe_us_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_pc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_pc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vfd_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vfd_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vfd_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vfd_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_br_cluster_fe_s_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_pipe_br_cluster_vpc_us_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_pipe_br_cluster_vpc_us_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_ps_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_br_cluster_vpc_ps_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_bv_cluster_fe_s_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_vs_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_pipe_bv_cluster_vpc_us_registers, }, + { CLUSTER_VPC_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_pipe_bv_cluster_vpc_us_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_ps_registers, }, + { CLUSTER_VPC_PS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1, + gen8_0_0_vpc_slice_pipe_bv_cluster_vpc_ps_registers, }, +}; + +static struct gen8_reg_list gen8_6_0_reg_list[] = { + { UNSLICE, gen8_6_0_gpu_registers }, + { SLICE, gen8_0_0_gpu_slice_registers }, + { UNSLICE, gen8_0_0_dbgc_registers }, + { SLICE, gen8_0_0_dbgc_slice_registers }, + { UNSLICE, gen8_0_0_cx_dbgc_registers }, + { UNSLICE, NULL}, +}; + +static struct gen8_reg_list gen8_6_0_ahb_registers[] = { + { UNSLICE, gen8_0_0_gbif_registers }, +}; + +/* + * Block : ['GPU_CC_GPU_CC_REG'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 30 (Regs:128) + */ +static const u32 gen8_6_0_gpu_cc_gpu_cc_reg_registers[] = { + 0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, + 0x26400, 0x26406, 0x26415, 0x26418, 0x2641c, 0x2641d, 0x2641f, 0x26437, + 0x26439, 0x2643a, 0x2643c, 0x2643f, 0x26443, 0x26444, 0x26478, 0x2647a, + 0x26489, 0x2648a, 0x2649c, 0x2649e, 0x264a0, 0x264a1, 0x264c5, 0x264c7, + 0x264e8, 0x264ea, 0x264f9, 0x264fc, 0x2650b, 0x2650b, 0x2651c, 0x2651e, + 0x26540, 0x2654b, 0x26554, 0x26556, 0x26558, 0x2655c, 0x2655e, 0x2655f, + 0x26563, 0x26563, 0x2656d, 0x26573, 0x26576, 0x26576, 0x26578, 0x2657a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_gpu_cc_gpu_cc_reg_registers), 8)); + +/* + * Block : ['GPU_CC_PLL0_CM_PLL_LUCID_OLE'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 1 (Regs:16) + */ +static const u32 gen8_6_0_gpu_cc_pll0_cm_pll_lucid_ole_registers[] = { + 0x24000, 0x2400f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_gpu_cc_pll0_cm_pll_lucid_ole_registers), 8)); + +/* + * Block : ['ACD_ACD'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 10 (Regs:53) + */ +static const u32 gen8_6_0_acd_acd_registers[] = { + 0x1a400, 0x1a416, 0x1a420, 0x1a42d, 0x1a430, 0x1a431, 0x1a435, 0x1a435, + 0x1a437, 0x1a437, 0x1a43a, 0x1a43a, 0x1a442, 0x1a442, 0x1a456, 0x1a458, + 0x1a45b, 0x1a45d, 0x1a45f, 0x1a462, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_acd_acd_registers), 8)); + +/* + * Block : ['GX_CLKCTL_GX_CLKCTL_REG'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 12 (Regs:85) + */ +static const u32 gen8_6_0_gx_clkctl_gx_clkctl_reg_registers[] = { + 0x1a000, 0x1a004, 0x1a008, 0x1a012, 0x1a014, 0x1a014, 0x1a017, 0x1a017, + 0x1a019, 0x1a01c, 0x1a022, 0x1a022, 0x1a024, 0x1a029, 0x1a03f, 0x1a05d, + 0x1a060, 0x1a063, 0x1a065, 0x1a066, 0x1a068, 0x1a076, 0x1a078, 0x1a07b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_gx_clkctl_gx_clkctl_reg_registers), 8)); + +/* + * Block : ['GX_CLKCTL_PLL0_CM_PLL_LUCID_OLE'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 1 (Regs:16) + */ +static const u32 gen8_6_0_gx_clkctl_pll0_cm_pll_lucid_ole_registers[] = { + 0x19000, 0x1900f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_gx_clkctl_pll0_cm_pll_lucid_ole_registers), 8)); + +/* + * Block : ['CPR'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 22 (Regs:504) + */ +static const u32 gen8_6_0_cpr_registers[] = { + 0x26800, 0x26805, 0x26808, 0x2680d, 0x26814, 0x26815, 0x2681c, 0x2681c, + 0x26820, 0x26839, 0x26840, 0x26841, 0x26848, 0x26849, 0x26850, 0x26851, + 0x26880, 0x268a2, 0x26980, 0x269b0, 0x269c0, 0x269c2, 0x269c6, 0x269c8, + 0x269e0, 0x269ee, 0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, + 0x26a10, 0x26b0f, 0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, + 0x274ac, 0x274c4, 0x274c8, 0x274da, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_cpr_registers), 8)); + +static const u32 *gen8_6_0_external_core_regs[] = { + gen8_0_0_gdpm_lkg_registers, + gen8_0_0_gpu_cc_ahb2phy_broadcast_swman_registers, + gen8_0_0_gpu_cc_ahb2phy_swman_registers, + gen8_6_0_gpu_cc_gpu_cc_reg_registers, + gen8_6_0_gpu_cc_pll0_cm_pll_lucid_ole_registers, + gen8_6_0_cpr_registers, +}; + +static struct gen8_reg_list gen8_6_0_gmu_gx_registers[] = { + { UNSLICE, gen8_0_0_gmugx_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_broadcast_swman_registers }, + { UNSLICE, gen8_0_0_gx_clkctl_ahb2phy_swman_registers }, + { UNSLICE, gen8_6_0_gx_clkctl_pll0_cm_pll_lucid_ole_registers }, + { UNSLICE, gen8_6_0_gx_clkctl_gx_clkctl_reg_registers }, + { UNSLICE, gen8_6_0_acd_acd_registers }, + { SLICE, gen8_0_0_gmugx_slice_registers }, +}; +#endif /*_ADRENO_GEN8_6_0_SNAPSHOT_H */ diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 57d2b9b3e5..503335f7cc 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -7,6 +7,7 @@ #include "adreno.h" #include "adreno_gen8_0_0_snapshot.h" #include "adreno_gen8_3_0_snapshot.h" +#include "adreno_gen8_6_0_snapshot.h" #include "adreno_snapshot.h" static struct kgsl_memdesc *gen8_capturescript; @@ -80,6 +81,37 @@ const struct gen8_snapshot_block_list gen8_3_0_snapshot_block_list = { .mempool_index_registers_len = ARRAY_SIZE(gen8_0_0_cp_mempool_reg_list), }; +const struct gen8_snapshot_block_list gen8_6_0_snapshot_block_list = { + .pre_crashdumper_regs = gen8_6_0_ahb_registers, + .num_pre_crashdumper_regs = ARRAY_SIZE(gen8_6_0_ahb_registers), + .debugbus_blocks = gen8_6_0_debugbus_blocks, + .debugbus_blocks_len = ARRAY_SIZE(gen8_6_0_debugbus_blocks), + .gbif_debugbus_blocks = gen8_gbif_debugbus_blocks, + .gbif_debugbus_blocks_len = ARRAY_SIZE(gen8_gbif_debugbus_blocks), + .cx_debugbus_blocks = gen8_cx_debugbus_blocks, + .cx_debugbus_blocks_len = ARRAY_SIZE(gen8_cx_debugbus_blocks), + .external_core_regs = gen8_6_0_external_core_regs, + .num_external_core_regs = ARRAY_SIZE(gen8_6_0_external_core_regs), + .gmu_cx_unsliced_regs = gen8_6_0_gmu_registers, + .gmu_gx_regs = gen8_6_0_gmu_gx_registers, + .num_gmu_gx_regs = ARRAY_SIZE(gen8_6_0_gmu_gx_registers), + .rscc_regs = gen8_0_0_rscc_rsc_registers, + .reg_list = gen8_6_0_reg_list, + .cx_misc_regs = gen8_0_0_cx_misc_registers, + .shader_blocks = gen8_0_0_shader_blocks, + .num_shader_blocks = ARRAY_SIZE(gen8_0_0_shader_blocks), + .cp_clusters = gen8_0_0_cp_clusters, + .num_cp_clusters = ARRAY_SIZE(gen8_0_0_cp_clusters), + .clusters = gen8_6_0_mvc_clusters, + .num_clusters = ARRAY_SIZE(gen8_6_0_mvc_clusters), + .sptp_clusters = gen8_0_0_sptp_clusters, + .num_sptp_clusters = ARRAY_SIZE(gen8_0_0_sptp_clusters), + .index_registers = gen8_0_0_cp_indexed_reg_list, + .index_registers_len = ARRAY_SIZE(gen8_0_0_cp_indexed_reg_list), + .mempool_index_registers = gen8_0_0_cp_mempool_reg_list, + .mempool_index_registers_len = ARRAY_SIZE(gen8_0_0_cp_mempool_reg_list), +}; + #define GEN8_SP_READ_SEL_VAL(_sliceid, _location, _pipe, _statetype, _usptp, _sptp) \ (FIELD_PREP(GENMASK(25, 21), _sliceid) | \ FIELD_PREP(GENMASK(20, 18), _location) | \ From 1462309ce60f460d74f59a52cae8759f0fa8fa91 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Tue, 19 Nov 2024 22:01:36 +0530 Subject: [PATCH 0942/1016] kgsl: snapshot: Fix integer underflow in adreno_ib_find_objs() Add a check to ensure rem variable in adreno_ib_find_objs() is not less than pktsize before decrementing. This prevents underflow and ensures the loop operates correctly. Change-Id: I7cdd069b737f87fb9284f5c559e72844c7033605 Signed-off-by: Sanjay Yadav --- adreno_cp_parser.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/adreno_cp_parser.c b/adreno_cp_parser.c index 23fd9baff1..a9d1b1c017 100644 --- a/adreno_cp_parser.c +++ b/adreno_cp_parser.c @@ -415,6 +415,10 @@ static int adreno_ib_find_objs(struct kgsl_device *device, else break; + /* Check if rem is less than pktsize before decrementing */ + if (rem < pktsize) + break; + if (pkt_is_type7(src[i])) { if (adreno_cmd_is_ib(adreno_dev, src[i])) { uint64_t size = src[i + 3]; From 36a1759bf8a7bd70975ffcf9209af5e590c5ebcf Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Tue, 26 Nov 2024 22:19:43 +0530 Subject: [PATCH 0943/1016] msm: kgsl: Enable IFPC feature for gen_8_6_0 Enable IFPC feature for GPU power saving. Change-Id: I5d80b2aa9d12b1770d92801ec9167e8083efca25 Signed-off-by: SIVA MULLATI --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index ca8368b6ff..b7e2e068b5 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3204,7 +3204,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .compatible = "qcom,adreno-gpu-gen8-6-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | - ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE, + ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_IFPC, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, From ab2b578c0d0acae1617d80fdd910a08a36c8a679 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 4 Jan 2024 18:48:17 +0530 Subject: [PATCH 0944/1016] kgsl: Skip cache maintenance for io-coherent buffers during page allocation When io-coherency is enabled, DMA cache maintenance can be skipped for io-coherent buffers. Change-Id: I3ec5fd79227b679c1d1e4866cd9da2a9cc8e6ffc Signed-off-by: Pankaj Gupta Signed-off-by: Kamal Agrawal --- kgsl.h | 2 -- kgsl_sharedmem.c | 7 +++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/kgsl.h b/kgsl.h index a275225203..10dcec39ee 100644 --- a/kgsl.h +++ b/kgsl.h @@ -301,8 +301,6 @@ struct kgsl_memdesc { struct mutex ranges_lock; /** @gmuaddr: GMU VA if this is mapped in GMU */ u32 gmuaddr; - /*@kgsl_dev: kgsl device dev instance */ - struct device *kgsl_dev; /*@shmem_page_list: shmem pages list */ struct list_head shmem_page_list; }; diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index 6012f9a561..5288c012f4 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -823,7 +823,6 @@ void kgsl_memdesc_init(struct kgsl_device *device, memdesc->priv |= KGSL_MEMDESC_SECURE; memdesc->flags = flags; - memdesc->kgsl_dev = device->dev; /* * For io-coherent buffers don't set memdesc->dev, so that we skip DMA @@ -1138,7 +1137,7 @@ static int kgsl_alloc_page(struct kgsl_memdesc *memdesc, int *page_size, (list_empty(&memdesc->shmem_page_list) && (pcount > 1))) clear_highpage(page); - kgsl_page_sync(memdesc->kgsl_dev, page, PAGE_SIZE, DMA_TO_DEVICE); + kgsl_page_sync(memdesc->dev, page, PAGE_SIZE, DMA_TO_DEVICE); *page_size = PAGE_SIZE; *pages = page; @@ -1211,7 +1210,7 @@ static int kgsl_alloc_page(struct kgsl_memdesc *memdesc, int *page_size, return -EINTR; return kgsl_pool_alloc_page(page_size, pages, - pages_len, align, memdesc->kgsl_dev); + pages_len, align, memdesc->dev); } static int kgsl_memdesc_file_setup(struct kgsl_memdesc *memdesc) @@ -1654,7 +1653,7 @@ static int kgsl_system_alloc_pages(struct kgsl_memdesc *memdesc, struct page *** } /* Make sure the cache is clean */ - kgsl_page_sync(memdesc->kgsl_dev, local[i], PAGE_SIZE, DMA_TO_DEVICE); + kgsl_page_sync(memdesc->dev, local[i], PAGE_SIZE, DMA_TO_DEVICE); } *pages = local; From a24bf9d1fc579946e044a52274dc66ce95a7b8e1 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 15 Nov 2024 12:27:50 -0700 Subject: [PATCH 0945/1016] kgsl: coresight: Fix adreno_coresight_patch_pwrup_reglist declaration Fix adreno_coresight_patch_pwrup_reglist function declaration when coresight is disabled. Change-Id: I7e5da6a3ff1c13af361fc239da805778123a2b2c Signed-off-by: Carter Cooper Signed-off-by: Kamal Agrawal --- adreno_coresight.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno_coresight.h b/adreno_coresight.h index 626387eb51..b776f85292 100644 --- a/adreno_coresight.h +++ b/adreno_coresight.h @@ -170,7 +170,7 @@ static inline void adreno_coresight_add_device(struct kgsl_device *device, static inline void adreno_coresight_start(struct adreno_device *adreno_dev) { } static inline void adreno_coresight_stop(struct adreno_device *adreno_dev) { } static inline void adreno_coresight_remove(struct adreno_device *adreno_dev) { } -static inline u32 adreno_coresight_patch_pwrup_reglist(struct adreno_device *adreno_dev) +static inline u32 adreno_coresight_patch_pwrup_reglist(struct adreno_device *adreno_dev, u32 *dest) { return 0; } From bf0f513fd57d0b866448e844347638039babb978 Mon Sep 17 00:00:00 2001 From: Amit Kushwaha Date: Wed, 23 Oct 2024 09:30:17 +0530 Subject: [PATCH 0946/1016] kgsl: build: Remove GPU devfreq governor configs for monaco Compile source files for governor "msm_adreno_tz" and "gpubw_mon" in kgsl as kernel doesn't include these configs option. Change-Id: I0675ba0bb8299743230a0ce02b131701fc771a1c Signed-off-by: Amit Kushwaha --- config/monaco_consolidate_gpuconf | 4 +--- config/monaco_gki_gpuconf | 4 +--- config/monaco_perf_gpuconf | 11 +---------- 3 files changed, 3 insertions(+), 16 deletions(-) diff --git a/config/monaco_consolidate_gpuconf b/config/monaco_consolidate_gpuconf index 8319734c76..86725058e4 100644 --- a/config/monaco_consolidate_gpuconf +++ b/config/monaco_consolidate_gpuconf @@ -1,9 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only -# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. CONFIG_QCOM_KGSL=m -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=y CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 CONFIG_QCOM_KGSL_SORT_POOL=y CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y diff --git a/config/monaco_gki_gpuconf b/config/monaco_gki_gpuconf index 8319734c76..86725058e4 100644 --- a/config/monaco_gki_gpuconf +++ b/config/monaco_gki_gpuconf @@ -1,9 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only -# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. CONFIG_QCOM_KGSL=m -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=y CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 CONFIG_QCOM_KGSL_SORT_POOL=y CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y diff --git a/config/monaco_perf_gpuconf b/config/monaco_perf_gpuconf index 22f5e75c5b..1e435f7700 100644 --- a/config/monaco_perf_gpuconf +++ b/config/monaco_perf_gpuconf @@ -1,10 +1 @@ -# SPDX-License-Identifier: GPL-2.0-only -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. - -CONFIG_QCOM_KGSL=m -CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=y -CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=y -CONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 -CONFIG_QCOM_KGSL_SORT_POOL=y -CONFIG_QCOM_KGSL_CONTEXT_DEBUG=y -CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR="msm-adreno-tz" +monaco_consolidate_gpuconf From dab5da0c031b1903c704c14492fcfaf0faf1b2d6 Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Fri, 6 Sep 2024 16:08:50 +0530 Subject: [PATCH 0947/1016] kgsl: gmu: Make gmu's iommu-domain generation agnostic Currently gmu's iommu domain is present across all generations of gmu based target. Make it generation agnostic by moving out to gmu core struct. Change-Id: I36b3ebff37fa0b214adfc08ef017fbe9d34da16a Signed-off-by: SIVA MULLATI --- adreno_a6xx_gmu.c | 73 ++++--------------------------------- adreno_a6xx_gmu.h | 2 - adreno_a6xx_hwsched_hfi.c | 4 +- adreno_gen7_gmu.c | 76 +++++--------------------------------- adreno_gen7_gmu.h | 2 - adreno_gen8_gmu.c | 77 ++++++--------------------------------- adreno_gen8_gmu.h | 2 - kgsl_gmu_core.c | 53 +++++++++++++++++++++++++++ kgsl_gmu_core.h | 11 ++++++ 9 files changed, 95 insertions(+), 205 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index dc7b3edad6..95b4c8c8cb 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -1603,7 +1602,7 @@ struct kgsl_memdesc *reserve_gmu_kernel_block(struct a6xx_gmu_device *gmu, if (!addr) addr = ALIGN(vma->next_va, hfi_get_gmu_va_alignment(align)); - ret = gmu_core_map_memdesc(gmu->domain, md, addr, + ret = gmu_core_map_memdesc(device->gmu_core.domain, md, addr, IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV); if (ret) { dev_err(GMU_PDEV_DEV(device), @@ -1653,7 +1652,7 @@ struct kgsl_memdesc *reserve_gmu_kernel_block_fixed(struct a6xx_gmu_device *gmu, goto done; } - ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); + ret = gmu_core_map_memdesc(device->gmu_core.domain, md, addr, attrs); if (ret) { dev_err(GMU_PDEV_DEV(device), "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", @@ -2682,7 +2681,7 @@ static void a6xx_free_gmu_globals(struct a6xx_gmu_device *gmu) if (!md->gmuaddr) continue; - iommu_unmap(gmu->domain, md->gmuaddr, md->size); + iommu_unmap(device->gmu_core.domain, md->gmuaddr, md->size); if (md->priv & KGSL_MEMDESC_SYSMEM) kgsl_sharedmem_free(md); @@ -2690,10 +2689,10 @@ static void a6xx_free_gmu_globals(struct a6xx_gmu_device *gmu) memset(md, 0, sizeof(*md)); } - if (gmu->domain) { - iommu_detach_device(gmu->domain, GMU_PDEV_DEV(device)); - iommu_domain_free(gmu->domain); - gmu->domain = NULL; + if (device->gmu_core.domain) { + iommu_detach_device(device->gmu_core.domain, GMU_PDEV_DEV(device)); + iommu_domain_free(device->gmu_core.domain); + device->gmu_core.domain = NULL; } gmu->global_entries = 0; @@ -2877,62 +2876,6 @@ void a6xx_gmu_remove(struct kgsl_device *device) kobject_put(&gmu->stats_kobj); } -static int a6xx_gmu_iommu_fault_handler(struct iommu_domain *domain, - struct device *dev, unsigned long addr, int flags, void *token) -{ - char *fault_type = "unknown"; - - if (flags & IOMMU_FAULT_TRANSLATION) - fault_type = "translation"; - else if (flags & IOMMU_FAULT_PERMISSION) - fault_type = "permission"; - else if (flags & IOMMU_FAULT_EXTERNAL) - fault_type = "external"; - else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) - fault_type = "transaction stalled"; - - dev_err(dev, "GMU fault addr = %lX, context=kernel (%s %s fault)\n", - addr, - (flags & IOMMU_FAULT_WRITE) ? "write" : "read", - fault_type); - - return 0; -} - -static int a6xx_gmu_iommu_init(struct a6xx_gmu_device *gmu) -{ - struct kgsl_device *device = KGSL_DEVICE(a6xx_gmu_to_adreno(gmu)); - int ret; - - gmu->domain = iommu_domain_alloc(&platform_bus_type); - if (gmu->domain == NULL) { - dev_err(GMU_PDEV_DEV(device), - "Unable to allocate GMU IOMMU domain\n"); - return -ENODEV; - } - - /* - * Disable stall on fault for the GMU context bank. - * This sets SCTLR.CFCFG = 0. - * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. - */ - qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL); - - ret = iommu_attach_device(gmu->domain, GMU_PDEV_DEV(device)); - if (!ret) { - iommu_set_fault_handler(gmu->domain, - a6xx_gmu_iommu_fault_handler, gmu); - return 0; - } - - dev_err(GMU_PDEV_DEV(device), - "Unable to attach GMU IOMMU domain: %d\n", ret); - iommu_domain_free(gmu->domain); - gmu->domain = NULL; - - return ret; -} - int a6xx_gmu_probe(struct kgsl_device *device, struct platform_device *pdev) { @@ -2973,7 +2916,7 @@ int a6xx_gmu_probe(struct kgsl_device *device, return ret; /* Set up GMU IOMMU and shared memory with GMU */ - ret = a6xx_gmu_iommu_init(gmu); + ret = gmu_core_iommu_init(device); if (ret) goto error; diff --git a/adreno_a6xx_gmu.h b/adreno_a6xx_gmu.h index 8dde9f435d..f22e5d50fe 100644 --- a/adreno_a6xx_gmu.h +++ b/adreno_a6xx_gmu.h @@ -68,8 +68,6 @@ struct a6xx_gmu_device { unsigned long flags; /** @rscc_virt: Pointer where RSCC block is mapped */ void __iomem *rscc_virt; - /** @domain: IOMMU domain for the kernel context */ - struct iommu_domain *domain; /** @rdpm_cx_virt: Pointer where the RDPM CX block is mapped */ void __iomem *rdpm_cx_virt; /** @rdpm_mx_virt: Pointer where the RDPM MX block is mapped */ diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 66a7ead1cb..a88a42a80c 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -777,8 +777,8 @@ static int gmu_import_buffer(struct adreno_device *adreno_dev, return -ENOMEM; } - - ret = gmu_core_map_memdesc(gmu->domain, entry->md, vma->next_va, attrs); + ret = gmu_core_map_memdesc(device->gmu_core.domain, + entry->md, vma->next_va, attrs); if (ret) { dev_err(GMU_PDEV_DEV(device), "gmu map err: 0x%08x, %x\n", vma->next_va, attrs); diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 6d76312ce8..6bffb5cce4 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -1162,7 +1161,7 @@ static int _map_gmu_dynamic(struct gen7_gmu_device *gmu, return ret; } - ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); + ret = gmu_core_map_memdesc(device->gmu_core.domain, md, addr, attrs); if (!ret) { md->gmuaddr = addr; return 0; @@ -1195,7 +1194,7 @@ static int _map_gmu_static(struct gen7_gmu_device *gmu, if (!addr) addr = ALIGN(vma->next_va, hfi_get_gmu_va_alignment(align)); - ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); + ret = gmu_core_map_memdesc(device->gmu_core.domain, md, addr, attrs); if (ret) { dev_err(GMU_PDEV_DEV(device), "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", @@ -1313,6 +1312,7 @@ void gen7_free_gmu_block(struct gen7_gmu_device *gmu, struct kgsl_memdesc *md) int vma_id = find_vma_block(gmu, md->gmuaddr, md->size); struct gmu_vma_entry *vma; struct gmu_vma_node *vma_node; + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); if ((vma_id < 0) || !vma_is_dynamic(vma_id)) return; @@ -1323,7 +1323,7 @@ void gen7_free_gmu_block(struct gen7_gmu_device *gmu, struct kgsl_memdesc *md) * Do not remove the vma node if we failed to unmap the entire buffer. This is because the * iommu driver considers remapping an already mapped iova as fatal. */ - if (md->size != iommu_unmap(gmu->domain, md->gmuaddr, md->size)) + if (md->size != iommu_unmap(device->gmu_core.domain, md->gmuaddr, md->size)) goto free; spin_lock(&vma->lock); @@ -2325,7 +2325,7 @@ static void gen7_free_gmu_globals(struct gen7_gmu_device *gmu) if (!md->gmuaddr) continue; - iommu_unmap(gmu->domain, md->gmuaddr, md->size); + iommu_unmap(device->gmu_core.domain, md->gmuaddr, md->size); if (md->priv & KGSL_MEMDESC_SYSMEM) kgsl_sharedmem_free(md); @@ -2333,10 +2333,10 @@ static void gen7_free_gmu_globals(struct gen7_gmu_device *gmu) memset(md, 0, sizeof(*md)); } - if (gmu->domain) { - iommu_detach_device(gmu->domain, GMU_PDEV_DEV(device)); - iommu_domain_free(gmu->domain); - gmu->domain = NULL; + if (device->gmu_core.domain) { + iommu_detach_device(device->gmu_core.domain, GMU_PDEV_DEV(device)); + iommu_domain_free(device->gmu_core.domain); + device->gmu_core.domain = NULL; } gmu->global_entries = 0; @@ -2525,62 +2525,6 @@ void gen7_gmu_remove(struct kgsl_device *device) kobject_put(&gmu->stats_kobj); } -static int gen7_gmu_iommu_fault_handler(struct iommu_domain *domain, - struct device *dev, unsigned long addr, int flags, void *token) -{ - char *fault_type = "unknown"; - - if (flags & IOMMU_FAULT_TRANSLATION) - fault_type = "translation"; - else if (flags & IOMMU_FAULT_PERMISSION) - fault_type = "permission"; - else if (flags & IOMMU_FAULT_EXTERNAL) - fault_type = "external"; - else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) - fault_type = "transaction stalled"; - - dev_err(dev, "GMU fault addr = %lX, context=kernel (%s %s fault)\n", - addr, - (flags & IOMMU_FAULT_WRITE) ? "write" : "read", - fault_type); - - return 0; -} - -static int gen7_gmu_iommu_init(struct gen7_gmu_device *gmu) -{ - struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); - struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); - int ret; - - gmu->domain = iommu_domain_alloc(&platform_bus_type); - if (gmu->domain == NULL) { - dev_err(gmu_pdev_dev, "Unable to allocate GMU IOMMU domain\n"); - return -ENODEV; - } - - /* - * Disable stall on fault for the GMU context bank. - * This sets SCTLR.CFCFG = 0. - * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. - */ - qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL); - - ret = iommu_attach_device(gmu->domain, gmu_pdev_dev); - if (!ret) { - iommu_set_fault_handler(gmu->domain, - gen7_gmu_iommu_fault_handler, gmu); - return 0; - } - - dev_err(gmu_pdev_dev, - "Unable to attach GMU IOMMU domain: %d\n", ret); - iommu_domain_free(gmu->domain); - gmu->domain = NULL; - - return ret; -} - /* Default IFPC timer (300usec) value */ #define GEN7_GMU_LONG_IFPC_HYST FIELD_PREP(GENMASK(15, 0), 0x1680) @@ -2627,7 +2571,7 @@ int gen7_gmu_probe(struct kgsl_device *device, return ret; /* Set up GMU IOMMU and shared memory with GMU */ - ret = gen7_gmu_iommu_init(gmu); + ret = gmu_core_iommu_init(device); if (ret) goto error; diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 106057d8d5..3c251eebf4 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -78,8 +78,6 @@ struct gen7_gmu_device { unsigned long flags; /** @rscc_virt: Pointer where RSCC block is mapped */ void __iomem *rscc_virt; - /** @domain: IOMMU domain for the kernel context */ - struct iommu_domain *domain; /** @log_stream_enable: GMU log streaming enable. Disabled by default */ bool log_stream_enable; /** @log_group_mask: Allows overriding default GMU log group mask */ diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 2b999b0bcd..e132eb0d28 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -1061,7 +1060,7 @@ static int _map_gmu_dynamic(struct gen8_gmu_device *gmu, return ret; } - ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); + ret = gmu_core_map_memdesc(device->gmu_core.domain, md, addr, attrs); if (!ret) { md->gmuaddr = addr; return 0; @@ -1094,7 +1093,7 @@ static int _map_gmu_static(struct gen8_gmu_device *gmu, if (!addr) addr = ALIGN(vma->next_va, hfi_get_gmu_va_alignment(align)); - ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); + ret = gmu_core_map_memdesc(device->gmu_core.domain, md, addr, attrs); if (ret) { dev_err(GMU_PDEV_DEV(device), "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", @@ -1212,6 +1211,7 @@ void gen8_free_gmu_block(struct gen8_gmu_device *gmu, struct kgsl_memdesc *md) int vma_id = find_vma_block(gmu, md->gmuaddr, md->size); struct gmu_vma_entry *vma; struct gmu_vma_node *vma_node; + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); if ((vma_id < 0) || !vma_is_dynamic(vma_id)) return; @@ -1222,7 +1222,8 @@ void gen8_free_gmu_block(struct gen8_gmu_device *gmu, struct kgsl_memdesc *md) * Do not remove the vma node if we failed to unmap the entire buffer. This is because the * iommu driver considers remapping an already mapped iova as fatal. */ - if (md->size != iommu_unmap(gmu->domain, md->gmuaddr, md->size)) + if (md->size != + iommu_unmap(device->gmu_core.domain, md->gmuaddr, md->size)) goto free; spin_lock(&vma->lock); @@ -2141,7 +2142,7 @@ static void gen8_free_gmu_globals(struct gen8_gmu_device *gmu) if (!md->gmuaddr) continue; - iommu_unmap(gmu->domain, md->gmuaddr, md->size); + iommu_unmap(device->gmu_core.domain, md->gmuaddr, md->size); if (md->priv & KGSL_MEMDESC_SYSMEM) kgsl_sharedmem_free(md); @@ -2149,10 +2150,10 @@ static void gen8_free_gmu_globals(struct gen8_gmu_device *gmu) memset(md, 0, sizeof(*md)); } - if (gmu->domain) { - iommu_detach_device(gmu->domain, GMU_PDEV_DEV(device)); - iommu_domain_free(gmu->domain); - gmu->domain = NULL; + if (device->gmu_core.domain) { + iommu_detach_device(device->gmu_core.domain, GMU_PDEV_DEV(device)); + iommu_domain_free(device->gmu_core.domain); + device->gmu_core.domain = NULL; } gmu->global_entries = 0; @@ -2332,62 +2333,6 @@ void gen8_gmu_remove(struct kgsl_device *device) kobject_put(&gmu->stats_kobj); } -static int gen8_gmu_iommu_fault_handler(struct iommu_domain *domain, - struct device *dev, unsigned long addr, int flags, void *token) -{ - char *fault_type = "unknown"; - - if (flags & IOMMU_FAULT_TRANSLATION) - fault_type = "translation"; - else if (flags & IOMMU_FAULT_PERMISSION) - fault_type = "permission"; - else if (flags & IOMMU_FAULT_EXTERNAL) - fault_type = "external"; - else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) - fault_type = "transaction stalled"; - - dev_err(dev, "GMU fault addr = %lX, context=kernel (%s %s fault)\n", - addr, - (flags & IOMMU_FAULT_WRITE) ? "write" : "read", - fault_type); - - return 0; -} - -static int gen8_gmu_iommu_init(struct gen8_gmu_device *gmu) -{ - struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); - struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); - int ret; - - gmu->domain = iommu_domain_alloc(&platform_bus_type); - if (gmu->domain == NULL) { - dev_err(gmu_pdev_dev, "Unable to allocate GMU IOMMU domain\n"); - return -ENODEV; - } - - /* - * Disable stall on fault for the GMU context bank. - * This sets SCTLR.CFCFG = 0. - * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. - */ - qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL); - - ret = iommu_attach_device(gmu->domain, gmu_pdev_dev); - if (!ret) { - iommu_set_fault_handler(gmu->domain, - gen8_gmu_iommu_fault_handler, gmu); - return 0; - } - - dev_err(gmu_pdev_dev, - "Unable to attach GMU IOMMU domain: %d\n", ret); - iommu_domain_free(gmu->domain); - gmu->domain = NULL; - - return ret; -} - /* Default IFPC timer (300usec) value */ #define GEN8_GMU_LONG_IFPC_HYST FIELD_PREP(GENMASK(15, 0), 0x1680) @@ -2434,7 +2379,7 @@ int gen8_gmu_probe(struct kgsl_device *device, return ret; /* Set up GMU IOMMU and shared memory with GMU */ - ret = gen8_gmu_iommu_init(gmu); + ret = gmu_core_iommu_init(device); if (ret) goto error; diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index f3a1f7fe5c..34cf22d2cb 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -80,8 +80,6 @@ struct gen8_gmu_device { unsigned long flags; /** @rscc_virt: Pointer where RSCC block is mapped */ void __iomem *rscc_virt; - /** @domain: IOMMU domain for the kernel context */ - struct iommu_domain *domain; /** @log_stream_enable: GMU log streaming enable. Disabled by default */ bool log_stream_enable; /** @log_group_mask: Allows overriding default GMU log group mask */ diff --git a/kgsl_gmu_core.c b/kgsl_gmu_core.c index 3a5ae869ae..8c950cd784 100644 --- a/kgsl_gmu_core.c +++ b/kgsl_gmu_core.c @@ -212,6 +212,59 @@ int gmu_core_map_memdesc(struct iommu_domain *domain, struct kgsl_memdesc *memde return mapped == 0 ? -ENOMEM : 0; } +static int gmu_core_iommu_fault_handler(struct iommu_domain *domain, + struct device *dev, unsigned long addr, int flags, void *token) +{ + char *fault_type = "unknown"; + + if (flags & IOMMU_FAULT_TRANSLATION) + fault_type = "translation"; + else if (flags & IOMMU_FAULT_PERMISSION) + fault_type = "permission"; + else if (flags & IOMMU_FAULT_EXTERNAL) + fault_type = "external"; + else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) + fault_type = "transaction stalled"; + + dev_err(dev, "GMU fault addr = %lX, context=kernel (%s %s fault)\n", + addr, (flags & IOMMU_FAULT_WRITE) ? "write" : "read", fault_type); + + return 0; +} + +int gmu_core_iommu_init(struct kgsl_device *device) +{ + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); + int ret; + + device->gmu_core.domain = iommu_domain_alloc(&platform_bus_type); + if (!device->gmu_core.domain) { + dev_err(gmu_pdev_dev, "Unable to allocate GMU IOMMU domain\n"); + return -ENODEV; + } + + /* + * Disable stall on fault for the GMU context bank. + * This sets SCTLR.CFCFG = 0. + * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. + */ + qcom_iommu_set_fault_model(device->gmu_core.domain, + QCOM_IOMMU_FAULT_MODEL_NO_STALL); + + ret = iommu_attach_device(device->gmu_core.domain, gmu_pdev_dev); + if (!ret) { + iommu_set_fault_handler(device->gmu_core.domain, + gmu_core_iommu_fault_handler, device); + return 0; + } + + dev_err(gmu_pdev_dev, "Unable to attach GMU IOMMU domain: %d\n", ret); + iommu_domain_free(device->gmu_core.domain); + device->gmu_core.domain = NULL; + + return ret; +} + void gmu_core_dev_force_first_boot(struct kgsl_device *device) { const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 8877f2272c..ff2190ed84 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -448,6 +448,8 @@ struct gmu_core_device { enum gmu_fault_panic_policy gf_panic; /** @pdev: platform device for the gmu */ struct platform_device *pdev; + /** @domain: IOMMU domain for the gmu context */ + struct iommu_domain *domain; }; extern struct platform_driver a6xx_gmu_driver; @@ -539,6 +541,15 @@ struct gmu_mem_type_desc { */ int gmu_core_map_memdesc(struct iommu_domain *domain, struct kgsl_memdesc *memdesc, u64 gmuaddr, int attrs); + +/** + * gmu_core_iommu_init - Set up GMU IOMMU and shared memory with GMU + * @device: Pointer to KGSL device + * + * Return: 0 on success or error value on failure + */ +int gmu_core_iommu_init(struct kgsl_device *device); + void gmu_core_dev_force_first_boot(struct kgsl_device *device); /** From 986c1d292cca4f888f8feeaee8dd34c70e871454 Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Fri, 6 Sep 2024 18:21:51 +0530 Subject: [PATCH 0948/1016] kgsl: iommu: Send skip tlb hint for gmu domain Currently kgsl is not sending skip tlb hint to smmu for the gmu domain during slumber sequence. This may cause smmu to have vote on cx gdsc in slumber if any map/unmaps and results to cx wait timeouts. Change-Id: I612f7dc2de59befe09833b67900693d9a304eabd Signed-off-by: SIVA MULLATI --- kgsl_iommu.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 6d2aba5be2..e52a2b77b8 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -276,6 +276,7 @@ static int _iopgtbl_unmap_pages(struct kgsl_iommu_pt *pt, u64 gpuaddr, static void kgsl_iommu_flush_tlb(struct kgsl_mmu *mmu) { + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); struct kgsl_iommu *iommu = &mmu->iommu; iommu_flush_iotlb_all(to_iommu_domain(&iommu->user_context)); @@ -283,6 +284,10 @@ static void kgsl_iommu_flush_tlb(struct kgsl_mmu *mmu) /* As LPAC is optional, check LPAC domain is present before flush */ if (iommu->lpac_context.domain) iommu_flush_iotlb_all(to_iommu_domain(&iommu->lpac_context)); + + /* Flush iotbl for GMU domian */ + if (device->gmu_core.domain) + iommu_flush_iotlb_all(device->gmu_core.domain); } static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) @@ -352,16 +357,19 @@ static size_t _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, static void kgsl_iommu_send_tlb_hint(struct kgsl_mmu *mmu, bool hint) { + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); struct kgsl_iommu *iommu = &mmu->iommu; /* - * Send hint to SMMU driver for skipping TLB operations during slumber. - * This will help to avoid unnecessary cx gdsc toggling. + * Send skip TLB hints for user context, LPAC context, and GMU domains + * to the SMMU driver to skip TLB operations during slumber. This will + * help avoid unnecessary CX GDSC toggling. */ qcom_skip_tlb_management(&iommu->user_context.pdev->dev, hint); if (iommu->lpac_context.domain) qcom_skip_tlb_management(&iommu->lpac_context.pdev->dev, hint); - + if (device->gmu_core.domain) + qcom_skip_tlb_management(&device->gmu_core.pdev->dev, hint); /* * TLB operations are skipped during slumber. Incase CX doesn't * go down, it can result in incorrect translations due to stale From 5dfc9ff995ca1f99a63d9147aa24055960879425 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 21 Nov 2024 22:04:49 +0530 Subject: [PATCH 0949/1016] kgsl: Remove __GFP_NORETRY flag from kgsl_system_alloc_pages System memory allocation in kgsl_system_alloc_pages is primarily for internal purposes and occurs during specific scenarios: 1. GPU initialization and first boot (once). 2. Context creation - Context queue & hardware fence queue (runtime). During runtime, context creation may fail due to memory unavailability, negatively impacting user experience. Removing the __GFP_NORETRY flag can potentially improve the success rate of memory allocations. Given that system memory is allocated during limited scenarios and the requested size is smaller during runtime (context creation), removing the __GFP_NORETRY flag for system allocations can enhance user experience by potentially improving the success rate of memory allocations. Change-Id: Ic74ef0a51bf839aef989d3097cb99d5716f97a48 Signed-off-by: Kamal Agrawal --- kgsl_sharedmem.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index 6012f9a561..e7f55670c4 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -1633,13 +1633,12 @@ static int kgsl_system_alloc_pages(struct kgsl_memdesc *memdesc, struct page *** struct page **local; int i, npages = memdesc->size >> PAGE_SHIFT; - local = kvcalloc(npages, sizeof(*pages), GFP_KERNEL | __GFP_NORETRY); + local = kvcalloc(npages, sizeof(*pages), GFP_KERNEL); if (!local) return -ENOMEM; for (i = 0; i < npages; i++) { - gfp_t gfp = __GFP_ZERO | __GFP_HIGHMEM | - GFP_KERNEL | __GFP_NORETRY; + gfp_t gfp = __GFP_ZERO | __GFP_HIGHMEM | GFP_KERNEL; if (!fatal_signal_pending(current)) local[i] = alloc_pages(gfp, get_order(PAGE_SIZE)); From 4eea9dd5380c03e44d32ca678413fca6fd4689ce Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Mon, 9 Dec 2024 23:23:54 +0530 Subject: [PATCH 0950/1016] kgsl: gmu: Handle dummy gMxC resource entries from cmd_db For targets where gMxC is not supported but RM still has a hardware instance, AOP will update this as a dummy entry in cmd_db. This means cmd_db will have an entry for gMxC with all-zero values (no supported operating levels). To handle this in KGSL, add a check for dummy gMxC resource entries from cmd_db. If such an entry is found, return an error code (-ENODATA) instead of attempting to process it, which would lead to an invalid MxC vote. Change-Id: I2efa3924bf7e0da1edf615ef0523ced883ad98b5 Signed-off-by: SIVA MULLATI --- adreno_gen8_rpmh.c | 9 ++++++--- adreno_rpmh.c | 8 ++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/adreno_gen8_rpmh.c b/adreno_gen8_rpmh.c index 6392799aa2..6bc24a808c 100644 --- a/adreno_gen8_rpmh.c +++ b/adreno_gen8_rpmh.c @@ -211,10 +211,13 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) /* If the target supports dedicated MxC rail, read the same */ if (cmd_db_read_addr("gmxc.lvl")) { ret = adreno_rpmh_arc_cmds(&gmxc_arc, "gmxc.lvl"); - if (ret) - return ret; - ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &gmxc_arc, &cx_arc); + /* Dummy gMxC resource, treat as if no dedicated MxC */ + if (ret == -ENODATA) + ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, NULL, &cx_arc); + else + ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &gmxc_arc, &cx_arc); } else { + /* No gMxC resource entry, treat as if no dedicated MxC */ ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, NULL, &cx_arc); } diff --git a/adreno_rpmh.c b/adreno_rpmh.c index 3c1bcfc002..9d6f87211b 100644 --- a/adreno_rpmh.c +++ b/adreno_rpmh.c @@ -14,6 +14,7 @@ int adreno_rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id) { size_t len = 0; + bool dummy_res = true; arc->val = cmd_db_read_aux_data(res_id, &len); @@ -25,10 +26,17 @@ int adreno_rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id) * zero padding. */ for (arc->num = 1; arc->num < (len >> 1); arc->num++) { + if (arc->val[arc->num] != 0) + dummy_res = false; + if (arc->val[arc->num - 1] != 0 && arc->val[arc->num] == 0) break; } + /* Dummy resource entry in cmd_db with all zeros */ + if (dummy_res) + return -ENODATA; + return 0; } From b5853882c9e119778ad331fa5411908b90adc0a3 Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Fri, 6 Sep 2024 16:08:50 +0530 Subject: [PATCH 0951/1016] kgsl: gmu: Make gmu's iommu-domain generation agnostic Currently gmu's iommu domain is present across all generations of gmu based target. Make it generation agnostic by moving out to gmu core struct. Change-Id: I36b3ebff37fa0b214adfc08ef017fbe9d34da16a Signed-off-by: SIVA MULLATI --- adreno_a6xx_gmu.c | 73 ++++--------------------------------- adreno_a6xx_gmu.h | 2 - adreno_a6xx_hwsched_hfi.c | 4 +- adreno_gen7_gmu.c | 76 +++++--------------------------------- adreno_gen7_gmu.h | 2 - adreno_gen8_gmu.c | 77 ++++++--------------------------------- adreno_gen8_gmu.h | 2 - kgsl_gmu_core.c | 53 +++++++++++++++++++++++++++ kgsl_gmu_core.h | 11 ++++++ 9 files changed, 95 insertions(+), 205 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index dc7b3edad6..95b4c8c8cb 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -1603,7 +1602,7 @@ struct kgsl_memdesc *reserve_gmu_kernel_block(struct a6xx_gmu_device *gmu, if (!addr) addr = ALIGN(vma->next_va, hfi_get_gmu_va_alignment(align)); - ret = gmu_core_map_memdesc(gmu->domain, md, addr, + ret = gmu_core_map_memdesc(device->gmu_core.domain, md, addr, IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV); if (ret) { dev_err(GMU_PDEV_DEV(device), @@ -1653,7 +1652,7 @@ struct kgsl_memdesc *reserve_gmu_kernel_block_fixed(struct a6xx_gmu_device *gmu, goto done; } - ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); + ret = gmu_core_map_memdesc(device->gmu_core.domain, md, addr, attrs); if (ret) { dev_err(GMU_PDEV_DEV(device), "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", @@ -2682,7 +2681,7 @@ static void a6xx_free_gmu_globals(struct a6xx_gmu_device *gmu) if (!md->gmuaddr) continue; - iommu_unmap(gmu->domain, md->gmuaddr, md->size); + iommu_unmap(device->gmu_core.domain, md->gmuaddr, md->size); if (md->priv & KGSL_MEMDESC_SYSMEM) kgsl_sharedmem_free(md); @@ -2690,10 +2689,10 @@ static void a6xx_free_gmu_globals(struct a6xx_gmu_device *gmu) memset(md, 0, sizeof(*md)); } - if (gmu->domain) { - iommu_detach_device(gmu->domain, GMU_PDEV_DEV(device)); - iommu_domain_free(gmu->domain); - gmu->domain = NULL; + if (device->gmu_core.domain) { + iommu_detach_device(device->gmu_core.domain, GMU_PDEV_DEV(device)); + iommu_domain_free(device->gmu_core.domain); + device->gmu_core.domain = NULL; } gmu->global_entries = 0; @@ -2877,62 +2876,6 @@ void a6xx_gmu_remove(struct kgsl_device *device) kobject_put(&gmu->stats_kobj); } -static int a6xx_gmu_iommu_fault_handler(struct iommu_domain *domain, - struct device *dev, unsigned long addr, int flags, void *token) -{ - char *fault_type = "unknown"; - - if (flags & IOMMU_FAULT_TRANSLATION) - fault_type = "translation"; - else if (flags & IOMMU_FAULT_PERMISSION) - fault_type = "permission"; - else if (flags & IOMMU_FAULT_EXTERNAL) - fault_type = "external"; - else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) - fault_type = "transaction stalled"; - - dev_err(dev, "GMU fault addr = %lX, context=kernel (%s %s fault)\n", - addr, - (flags & IOMMU_FAULT_WRITE) ? "write" : "read", - fault_type); - - return 0; -} - -static int a6xx_gmu_iommu_init(struct a6xx_gmu_device *gmu) -{ - struct kgsl_device *device = KGSL_DEVICE(a6xx_gmu_to_adreno(gmu)); - int ret; - - gmu->domain = iommu_domain_alloc(&platform_bus_type); - if (gmu->domain == NULL) { - dev_err(GMU_PDEV_DEV(device), - "Unable to allocate GMU IOMMU domain\n"); - return -ENODEV; - } - - /* - * Disable stall on fault for the GMU context bank. - * This sets SCTLR.CFCFG = 0. - * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. - */ - qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL); - - ret = iommu_attach_device(gmu->domain, GMU_PDEV_DEV(device)); - if (!ret) { - iommu_set_fault_handler(gmu->domain, - a6xx_gmu_iommu_fault_handler, gmu); - return 0; - } - - dev_err(GMU_PDEV_DEV(device), - "Unable to attach GMU IOMMU domain: %d\n", ret); - iommu_domain_free(gmu->domain); - gmu->domain = NULL; - - return ret; -} - int a6xx_gmu_probe(struct kgsl_device *device, struct platform_device *pdev) { @@ -2973,7 +2916,7 @@ int a6xx_gmu_probe(struct kgsl_device *device, return ret; /* Set up GMU IOMMU and shared memory with GMU */ - ret = a6xx_gmu_iommu_init(gmu); + ret = gmu_core_iommu_init(device); if (ret) goto error; diff --git a/adreno_a6xx_gmu.h b/adreno_a6xx_gmu.h index 8dde9f435d..f22e5d50fe 100644 --- a/adreno_a6xx_gmu.h +++ b/adreno_a6xx_gmu.h @@ -68,8 +68,6 @@ struct a6xx_gmu_device { unsigned long flags; /** @rscc_virt: Pointer where RSCC block is mapped */ void __iomem *rscc_virt; - /** @domain: IOMMU domain for the kernel context */ - struct iommu_domain *domain; /** @rdpm_cx_virt: Pointer where the RDPM CX block is mapped */ void __iomem *rdpm_cx_virt; /** @rdpm_mx_virt: Pointer where the RDPM MX block is mapped */ diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 66a7ead1cb..a88a42a80c 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -777,8 +777,8 @@ static int gmu_import_buffer(struct adreno_device *adreno_dev, return -ENOMEM; } - - ret = gmu_core_map_memdesc(gmu->domain, entry->md, vma->next_va, attrs); + ret = gmu_core_map_memdesc(device->gmu_core.domain, + entry->md, vma->next_va, attrs); if (ret) { dev_err(GMU_PDEV_DEV(device), "gmu map err: 0x%08x, %x\n", vma->next_va, attrs); diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 6d76312ce8..6bffb5cce4 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -1162,7 +1161,7 @@ static int _map_gmu_dynamic(struct gen7_gmu_device *gmu, return ret; } - ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); + ret = gmu_core_map_memdesc(device->gmu_core.domain, md, addr, attrs); if (!ret) { md->gmuaddr = addr; return 0; @@ -1195,7 +1194,7 @@ static int _map_gmu_static(struct gen7_gmu_device *gmu, if (!addr) addr = ALIGN(vma->next_va, hfi_get_gmu_va_alignment(align)); - ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); + ret = gmu_core_map_memdesc(device->gmu_core.domain, md, addr, attrs); if (ret) { dev_err(GMU_PDEV_DEV(device), "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", @@ -1313,6 +1312,7 @@ void gen7_free_gmu_block(struct gen7_gmu_device *gmu, struct kgsl_memdesc *md) int vma_id = find_vma_block(gmu, md->gmuaddr, md->size); struct gmu_vma_entry *vma; struct gmu_vma_node *vma_node; + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); if ((vma_id < 0) || !vma_is_dynamic(vma_id)) return; @@ -1323,7 +1323,7 @@ void gen7_free_gmu_block(struct gen7_gmu_device *gmu, struct kgsl_memdesc *md) * Do not remove the vma node if we failed to unmap the entire buffer. This is because the * iommu driver considers remapping an already mapped iova as fatal. */ - if (md->size != iommu_unmap(gmu->domain, md->gmuaddr, md->size)) + if (md->size != iommu_unmap(device->gmu_core.domain, md->gmuaddr, md->size)) goto free; spin_lock(&vma->lock); @@ -2325,7 +2325,7 @@ static void gen7_free_gmu_globals(struct gen7_gmu_device *gmu) if (!md->gmuaddr) continue; - iommu_unmap(gmu->domain, md->gmuaddr, md->size); + iommu_unmap(device->gmu_core.domain, md->gmuaddr, md->size); if (md->priv & KGSL_MEMDESC_SYSMEM) kgsl_sharedmem_free(md); @@ -2333,10 +2333,10 @@ static void gen7_free_gmu_globals(struct gen7_gmu_device *gmu) memset(md, 0, sizeof(*md)); } - if (gmu->domain) { - iommu_detach_device(gmu->domain, GMU_PDEV_DEV(device)); - iommu_domain_free(gmu->domain); - gmu->domain = NULL; + if (device->gmu_core.domain) { + iommu_detach_device(device->gmu_core.domain, GMU_PDEV_DEV(device)); + iommu_domain_free(device->gmu_core.domain); + device->gmu_core.domain = NULL; } gmu->global_entries = 0; @@ -2525,62 +2525,6 @@ void gen7_gmu_remove(struct kgsl_device *device) kobject_put(&gmu->stats_kobj); } -static int gen7_gmu_iommu_fault_handler(struct iommu_domain *domain, - struct device *dev, unsigned long addr, int flags, void *token) -{ - char *fault_type = "unknown"; - - if (flags & IOMMU_FAULT_TRANSLATION) - fault_type = "translation"; - else if (flags & IOMMU_FAULT_PERMISSION) - fault_type = "permission"; - else if (flags & IOMMU_FAULT_EXTERNAL) - fault_type = "external"; - else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) - fault_type = "transaction stalled"; - - dev_err(dev, "GMU fault addr = %lX, context=kernel (%s %s fault)\n", - addr, - (flags & IOMMU_FAULT_WRITE) ? "write" : "read", - fault_type); - - return 0; -} - -static int gen7_gmu_iommu_init(struct gen7_gmu_device *gmu) -{ - struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); - struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); - int ret; - - gmu->domain = iommu_domain_alloc(&platform_bus_type); - if (gmu->domain == NULL) { - dev_err(gmu_pdev_dev, "Unable to allocate GMU IOMMU domain\n"); - return -ENODEV; - } - - /* - * Disable stall on fault for the GMU context bank. - * This sets SCTLR.CFCFG = 0. - * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. - */ - qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL); - - ret = iommu_attach_device(gmu->domain, gmu_pdev_dev); - if (!ret) { - iommu_set_fault_handler(gmu->domain, - gen7_gmu_iommu_fault_handler, gmu); - return 0; - } - - dev_err(gmu_pdev_dev, - "Unable to attach GMU IOMMU domain: %d\n", ret); - iommu_domain_free(gmu->domain); - gmu->domain = NULL; - - return ret; -} - /* Default IFPC timer (300usec) value */ #define GEN7_GMU_LONG_IFPC_HYST FIELD_PREP(GENMASK(15, 0), 0x1680) @@ -2627,7 +2571,7 @@ int gen7_gmu_probe(struct kgsl_device *device, return ret; /* Set up GMU IOMMU and shared memory with GMU */ - ret = gen7_gmu_iommu_init(gmu); + ret = gmu_core_iommu_init(device); if (ret) goto error; diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 106057d8d5..3c251eebf4 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -78,8 +78,6 @@ struct gen7_gmu_device { unsigned long flags; /** @rscc_virt: Pointer where RSCC block is mapped */ void __iomem *rscc_virt; - /** @domain: IOMMU domain for the kernel context */ - struct iommu_domain *domain; /** @log_stream_enable: GMU log streaming enable. Disabled by default */ bool log_stream_enable; /** @log_group_mask: Allows overriding default GMU log group mask */ diff --git a/adreno_gen8_gmu.c b/adreno_gen8_gmu.c index 2b999b0bcd..e132eb0d28 100644 --- a/adreno_gen8_gmu.c +++ b/adreno_gen8_gmu.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -1061,7 +1060,7 @@ static int _map_gmu_dynamic(struct gen8_gmu_device *gmu, return ret; } - ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); + ret = gmu_core_map_memdesc(device->gmu_core.domain, md, addr, attrs); if (!ret) { md->gmuaddr = addr; return 0; @@ -1094,7 +1093,7 @@ static int _map_gmu_static(struct gen8_gmu_device *gmu, if (!addr) addr = ALIGN(vma->next_va, hfi_get_gmu_va_alignment(align)); - ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs); + ret = gmu_core_map_memdesc(device->gmu_core.domain, md, addr, attrs); if (ret) { dev_err(GMU_PDEV_DEV(device), "Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n", @@ -1212,6 +1211,7 @@ void gen8_free_gmu_block(struct gen8_gmu_device *gmu, struct kgsl_memdesc *md) int vma_id = find_vma_block(gmu, md->gmuaddr, md->size); struct gmu_vma_entry *vma; struct gmu_vma_node *vma_node; + struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); if ((vma_id < 0) || !vma_is_dynamic(vma_id)) return; @@ -1222,7 +1222,8 @@ void gen8_free_gmu_block(struct gen8_gmu_device *gmu, struct kgsl_memdesc *md) * Do not remove the vma node if we failed to unmap the entire buffer. This is because the * iommu driver considers remapping an already mapped iova as fatal. */ - if (md->size != iommu_unmap(gmu->domain, md->gmuaddr, md->size)) + if (md->size != + iommu_unmap(device->gmu_core.domain, md->gmuaddr, md->size)) goto free; spin_lock(&vma->lock); @@ -2141,7 +2142,7 @@ static void gen8_free_gmu_globals(struct gen8_gmu_device *gmu) if (!md->gmuaddr) continue; - iommu_unmap(gmu->domain, md->gmuaddr, md->size); + iommu_unmap(device->gmu_core.domain, md->gmuaddr, md->size); if (md->priv & KGSL_MEMDESC_SYSMEM) kgsl_sharedmem_free(md); @@ -2149,10 +2150,10 @@ static void gen8_free_gmu_globals(struct gen8_gmu_device *gmu) memset(md, 0, sizeof(*md)); } - if (gmu->domain) { - iommu_detach_device(gmu->domain, GMU_PDEV_DEV(device)); - iommu_domain_free(gmu->domain); - gmu->domain = NULL; + if (device->gmu_core.domain) { + iommu_detach_device(device->gmu_core.domain, GMU_PDEV_DEV(device)); + iommu_domain_free(device->gmu_core.domain); + device->gmu_core.domain = NULL; } gmu->global_entries = 0; @@ -2332,62 +2333,6 @@ void gen8_gmu_remove(struct kgsl_device *device) kobject_put(&gmu->stats_kobj); } -static int gen8_gmu_iommu_fault_handler(struct iommu_domain *domain, - struct device *dev, unsigned long addr, int flags, void *token) -{ - char *fault_type = "unknown"; - - if (flags & IOMMU_FAULT_TRANSLATION) - fault_type = "translation"; - else if (flags & IOMMU_FAULT_PERMISSION) - fault_type = "permission"; - else if (flags & IOMMU_FAULT_EXTERNAL) - fault_type = "external"; - else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) - fault_type = "transaction stalled"; - - dev_err(dev, "GMU fault addr = %lX, context=kernel (%s %s fault)\n", - addr, - (flags & IOMMU_FAULT_WRITE) ? "write" : "read", - fault_type); - - return 0; -} - -static int gen8_gmu_iommu_init(struct gen8_gmu_device *gmu) -{ - struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu)); - struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); - int ret; - - gmu->domain = iommu_domain_alloc(&platform_bus_type); - if (gmu->domain == NULL) { - dev_err(gmu_pdev_dev, "Unable to allocate GMU IOMMU domain\n"); - return -ENODEV; - } - - /* - * Disable stall on fault for the GMU context bank. - * This sets SCTLR.CFCFG = 0. - * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. - */ - qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL); - - ret = iommu_attach_device(gmu->domain, gmu_pdev_dev); - if (!ret) { - iommu_set_fault_handler(gmu->domain, - gen8_gmu_iommu_fault_handler, gmu); - return 0; - } - - dev_err(gmu_pdev_dev, - "Unable to attach GMU IOMMU domain: %d\n", ret); - iommu_domain_free(gmu->domain); - gmu->domain = NULL; - - return ret; -} - /* Default IFPC timer (300usec) value */ #define GEN8_GMU_LONG_IFPC_HYST FIELD_PREP(GENMASK(15, 0), 0x1680) @@ -2434,7 +2379,7 @@ int gen8_gmu_probe(struct kgsl_device *device, return ret; /* Set up GMU IOMMU and shared memory with GMU */ - ret = gen8_gmu_iommu_init(gmu); + ret = gmu_core_iommu_init(device); if (ret) goto error; diff --git a/adreno_gen8_gmu.h b/adreno_gen8_gmu.h index f3a1f7fe5c..34cf22d2cb 100644 --- a/adreno_gen8_gmu.h +++ b/adreno_gen8_gmu.h @@ -80,8 +80,6 @@ struct gen8_gmu_device { unsigned long flags; /** @rscc_virt: Pointer where RSCC block is mapped */ void __iomem *rscc_virt; - /** @domain: IOMMU domain for the kernel context */ - struct iommu_domain *domain; /** @log_stream_enable: GMU log streaming enable. Disabled by default */ bool log_stream_enable; /** @log_group_mask: Allows overriding default GMU log group mask */ diff --git a/kgsl_gmu_core.c b/kgsl_gmu_core.c index 3a5ae869ae..8c950cd784 100644 --- a/kgsl_gmu_core.c +++ b/kgsl_gmu_core.c @@ -212,6 +212,59 @@ int gmu_core_map_memdesc(struct iommu_domain *domain, struct kgsl_memdesc *memde return mapped == 0 ? -ENOMEM : 0; } +static int gmu_core_iommu_fault_handler(struct iommu_domain *domain, + struct device *dev, unsigned long addr, int flags, void *token) +{ + char *fault_type = "unknown"; + + if (flags & IOMMU_FAULT_TRANSLATION) + fault_type = "translation"; + else if (flags & IOMMU_FAULT_PERMISSION) + fault_type = "permission"; + else if (flags & IOMMU_FAULT_EXTERNAL) + fault_type = "external"; + else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) + fault_type = "transaction stalled"; + + dev_err(dev, "GMU fault addr = %lX, context=kernel (%s %s fault)\n", + addr, (flags & IOMMU_FAULT_WRITE) ? "write" : "read", fault_type); + + return 0; +} + +int gmu_core_iommu_init(struct kgsl_device *device) +{ + struct device *gmu_pdev_dev = GMU_PDEV_DEV(device); + int ret; + + device->gmu_core.domain = iommu_domain_alloc(&platform_bus_type); + if (!device->gmu_core.domain) { + dev_err(gmu_pdev_dev, "Unable to allocate GMU IOMMU domain\n"); + return -ENODEV; + } + + /* + * Disable stall on fault for the GMU context bank. + * This sets SCTLR.CFCFG = 0. + * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. + */ + qcom_iommu_set_fault_model(device->gmu_core.domain, + QCOM_IOMMU_FAULT_MODEL_NO_STALL); + + ret = iommu_attach_device(device->gmu_core.domain, gmu_pdev_dev); + if (!ret) { + iommu_set_fault_handler(device->gmu_core.domain, + gmu_core_iommu_fault_handler, device); + return 0; + } + + dev_err(gmu_pdev_dev, "Unable to attach GMU IOMMU domain: %d\n", ret); + iommu_domain_free(device->gmu_core.domain); + device->gmu_core.domain = NULL; + + return ret; +} + void gmu_core_dev_force_first_boot(struct kgsl_device *device) { const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 8877f2272c..ff2190ed84 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -448,6 +448,8 @@ struct gmu_core_device { enum gmu_fault_panic_policy gf_panic; /** @pdev: platform device for the gmu */ struct platform_device *pdev; + /** @domain: IOMMU domain for the gmu context */ + struct iommu_domain *domain; }; extern struct platform_driver a6xx_gmu_driver; @@ -539,6 +541,15 @@ struct gmu_mem_type_desc { */ int gmu_core_map_memdesc(struct iommu_domain *domain, struct kgsl_memdesc *memdesc, u64 gmuaddr, int attrs); + +/** + * gmu_core_iommu_init - Set up GMU IOMMU and shared memory with GMU + * @device: Pointer to KGSL device + * + * Return: 0 on success or error value on failure + */ +int gmu_core_iommu_init(struct kgsl_device *device); + void gmu_core_dev_force_first_boot(struct kgsl_device *device); /** From e0fbd48dde50c8b415902fbd7231e5e401a86012 Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Fri, 6 Sep 2024 18:21:51 +0530 Subject: [PATCH 0952/1016] kgsl: iommu: Send skip tlb hint for gmu domain Currently kgsl is not sending skip tlb hint to smmu for the gmu domain during slumber sequence. This may cause smmu to have vote on cx gdsc in slumber if any map/unmaps and results to cx wait timeouts. Change-Id: I612f7dc2de59befe09833b67900693d9a304eabd Signed-off-by: SIVA MULLATI --- kgsl_iommu.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index f1fb362a48..801d7bb4db 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -276,6 +276,7 @@ static int _iopgtbl_unmap_pages(struct kgsl_iommu_pt *pt, u64 gpuaddr, static void kgsl_iommu_flush_tlb(struct kgsl_mmu *mmu) { + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); struct kgsl_iommu *iommu = &mmu->iommu; iommu_flush_iotlb_all(to_iommu_domain(&iommu->user_context)); @@ -283,6 +284,10 @@ static void kgsl_iommu_flush_tlb(struct kgsl_mmu *mmu) /* As LPAC is optional, check LPAC domain is present before flush */ if (iommu->lpac_context.domain) iommu_flush_iotlb_all(to_iommu_domain(&iommu->lpac_context)); + + /* Flush iotbl for GMU domian */ + if (device->gmu_core.domain) + iommu_flush_iotlb_all(device->gmu_core.domain); } static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) @@ -352,16 +357,19 @@ static size_t _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, static void kgsl_iommu_send_tlb_hint(struct kgsl_mmu *mmu, bool hint) { + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); struct kgsl_iommu *iommu = &mmu->iommu; /* - * Send hint to SMMU driver for skipping TLB operations during slumber. - * This will help to avoid unnecessary cx gdsc toggling. + * Send skip TLB hints for user context, LPAC context, and GMU domains + * to the SMMU driver to skip TLB operations during slumber. This will + * help avoid unnecessary CX GDSC toggling. */ qcom_skip_tlb_management(&iommu->user_context.pdev->dev, hint); if (iommu->lpac_context.domain) qcom_skip_tlb_management(&iommu->lpac_context.pdev->dev, hint); - + if (device->gmu_core.domain) + qcom_skip_tlb_management(&device->gmu_core.pdev->dev, hint); /* * TLB operations are skipped during slumber. Incase CX doesn't * go down, it can result in incorrect translations due to stale From b593297bdf84a2c18c2768fcb40d3891c1e49805 Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Wed, 21 Aug 2024 16:03:15 -0700 Subject: [PATCH 0953/1016] kgsl: adreno: DCVS Tuning nodes Add sysfs nodes to allow for dynamic DCVS tuning. Change-Id: Ic1c9eec76001962fc8a4c51b96197bdc2b7b979e Signed-off-by: Carter Cooper --- adreno.c | 1 + adreno.h | 8 +++ adreno_sysfs.c | 122 +++++++++++++++++++++++++++++++++++++++ adreno_trace.h | 19 ++++++ governor_msm_adreno_tz.c | 2 + 5 files changed, 152 insertions(+) diff --git a/adreno.c b/adreno.c index 1a714d4004..c47ba6c136 100644 --- a/adreno.c +++ b/adreno.c @@ -1198,6 +1198,7 @@ static void adreno_setup_device(struct adreno_device *adreno_dev) mutex_init(&adreno_dev->dev.mutex); mutex_init(&adreno_dev->dev.file_mutex); mutex_init(&adreno_dev->fault_recovery_mutex); + mutex_init(&adreno_dev->dcvs_tuning_mutex); INIT_LIST_HEAD(&adreno_dev->dev.globals); /* Set the fault tolerance policy to replay, skip, throttle */ diff --git a/adreno.h b/adreno.h index e0da437819..494443e8c1 100644 --- a/adreno.h +++ b/adreno.h @@ -766,6 +766,14 @@ struct adreno_device { struct kthread_work scheduler_work; /** @scheduler_fault: Atomic to trigger scheduler based fault recovery */ atomic_t scheduler_fault; + /** @dcvs_tuning_mutex: Mutex taken during dcvs tuning */ + struct mutex dcvs_tuning_mutex; + /** @dcvs_tuning_mingap_lvl: Current DCVS tuning level for mingap */ + u32 dcvs_tuning_mingap_lvl; + /** @dcvs_tuning_penalty_lvl: Current DCVS tuning level for penalty */ + u32 dcvs_tuning_penalty_lvl; + /** @dcvs_tuning_numbusy_lvl: Current DCVS tuning level for numbusy */ + u32 dcvs_tuning_numbusy_lvl; }; /* Time to wait for suspend recovery gate to complete */ diff --git a/adreno_sysfs.c b/adreno_sysfs.c index 9fab257c9a..0c1024aae6 100644 --- a/adreno_sysfs.c +++ b/adreno_sysfs.c @@ -5,9 +5,16 @@ */ #include +#include +#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) +#include +#else +#include +#endif #include "adreno.h" #include "adreno_sysfs.h" +#include "adreno_trace.h" #include "kgsl_sysfs.h" static ssize_t _gpu_model_show(struct kgsl_device *device, char *buf) @@ -99,6 +106,114 @@ static u32 _rt_bus_hint_show(struct adreno_device *adreno_dev) return device->pwrctrl.rt_bus_hint; } +/* Tuning values can be set to 0/1/2/3 */ +#define DCVS_TUNING_MAX 3 +#define DCVS_TUNING_EN_BIT BIT(5) + +/* + * GPU DCVS Tuning allows for small adjustments to the DCVS + * algorithm. The default value for each tunable is 0. Setting + * a higher tunable value will increase the aggressivenes + * of the DCVS algorithm. Currently 0-3 are supported values + * for each tunable, 3 being most aggressive. + */ + +/* Mingap is the count of consecutive low requests before moving to lower DCVS levels. */ +#define DCVS_TUNING_MINGAP 0 +/* Penalty is the busy threshold for moving between levels. */ +#define DCVS_TUNING_PENALTY 1 +/* Numbusy is the backoff from mingap to transition power level more quickly. */ +#define DCVS_TUNING_NUMBUSY 2 + +#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) +static int __dcvs_tuning_scm_entry(struct adreno_device *adreno_dev, + u32 param, u32 val) +{ + int ret; + u32 mingap = 0, penalty = 0, numbusy = 0; + u32 *save; + + switch (param) { + case DCVS_TUNING_MINGAP: + mingap = DCVS_TUNING_EN_BIT | FIELD_PREP(GENMASK(4, 0), val); + save = &adreno_dev->dcvs_tuning_mingap_lvl; + break; + case DCVS_TUNING_PENALTY: + penalty = DCVS_TUNING_EN_BIT | FIELD_PREP(GENMASK(4, 0), val); + save = &adreno_dev->dcvs_tuning_penalty_lvl; + break; + case DCVS_TUNING_NUMBUSY: + numbusy = DCVS_TUNING_EN_BIT | FIELD_PREP(GENMASK(4, 0), val); + save = &adreno_dev->dcvs_tuning_numbusy_lvl; + break; + default: + return -EINVAL; + } + + if (!mutex_trylock(&adreno_dev->dcvs_tuning_mutex)) + return -EDEADLK; + + ret = qcom_scm_kgsl_dcvs_tuning(mingap, penalty, numbusy); + if (ret == 0) { + *save = val; + trace_adreno_dcvs_tuning(param, + adreno_dev->dcvs_tuning_mingap_lvl, + adreno_dev->dcvs_tuning_penalty_lvl, + adreno_dev->dcvs_tuning_numbusy_lvl); + } + mutex_unlock(&adreno_dev->dcvs_tuning_mutex); + + return ret; +} +#else +static int __dcvs_tuning_scm_entry(struct adreno_device *adreno_dev, u32 param, u32 val) +{ + return -EOPNOTSUPP; +} +#endif + +static int _dcvs_tuning_mingap_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + if (val > DCVS_TUNING_MAX) + return -EINVAL; + + return __dcvs_tuning_scm_entry(adreno_dev, DCVS_TUNING_MINGAP, val); +} + +static u32 _dcvs_tuning_mingap_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->dcvs_tuning_mingap_lvl; +} + +static int _dcvs_tuning_penalty_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + if (val > DCVS_TUNING_MAX) + return -EINVAL; + + return __dcvs_tuning_scm_entry(adreno_dev, DCVS_TUNING_PENALTY, val); +} + +static u32 _dcvs_tuning_penalty_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->dcvs_tuning_penalty_lvl; +} + +static int _dcvs_tuning_numbusy_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + if (val > DCVS_TUNING_MAX) + return -EINVAL; + + return __dcvs_tuning_scm_entry(adreno_dev, DCVS_TUNING_NUMBUSY, val); +} + +static u32 _dcvs_tuning_numbusy_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->dcvs_tuning_numbusy_lvl; +} + static int _gpu_llc_slice_enable_store(struct adreno_device *adreno_dev, bool val) { @@ -450,6 +565,10 @@ static DEVICE_ATTR_RO(gpu_model); static DEVICE_ATTR_RO(gpufaults); static DEVICE_ATTR_RO(gpufault_procs); +static ADRENO_SYSFS_U32(dcvs_tuning_mingap); +static ADRENO_SYSFS_U32(dcvs_tuning_penalty); +static ADRENO_SYSFS_U32(dcvs_tuning_numbusy); + static const struct attribute *_attr_list[] = { &adreno_attr_ft_policy.attr.attr, &adreno_attr_ft_pagefault_policy.attr.attr, @@ -477,6 +596,9 @@ static const struct attribute *_attr_list[] = { &adreno_attr_clx.attr.attr, &dev_attr_gpufaults.attr, &dev_attr_gpufault_procs.attr, + &adreno_attr_dcvs_tuning_mingap.attr.attr, + &adreno_attr_dcvs_tuning_penalty.attr.attr, + &adreno_attr_dcvs_tuning_numbusy.attr.attr, NULL, }; diff --git a/adreno_trace.h b/adreno_trace.h index 16974a83f7..71e98c575f 100644 --- a/adreno_trace.h +++ b/adreno_trace.h @@ -985,6 +985,25 @@ TRACE_EVENT(adreno_ifpc_count, TP_printk("total times GMU entered IFPC = %d", __entry->ifpc_count) ); +TRACE_EVENT(adreno_dcvs_tuning, + TP_PROTO(u32 param, u32 mingap, u32 penalty, u32 numbusy), + TP_ARGS(param, mingap, penalty, numbusy), + TP_STRUCT__entry( + __field(u32, param) + __field(u32, mingap) + __field(u32, penalty) + __field(u32, numbusy) + ), + TP_fast_assign( + __entry->param = param; + __entry->mingap = mingap; + __entry->penalty = penalty; + __entry->numbusy = numbusy; + ), + TP_printk("param=%u mingap=%u penalty=%u numbusy=%u", + __entry->param, __entry->mingap, __entry->penalty, __entry->numbusy) +); + #endif /* _ADRENO_TRACE_H */ /* This part must be outside protection */ diff --git a/governor_msm_adreno_tz.c b/governor_msm_adreno_tz.c index 4f52ae0ecd..c9cd1db943 100644 --- a/governor_msm_adreno_tz.c +++ b/governor_msm_adreno_tz.c @@ -57,6 +57,8 @@ static DEFINE_SPINLOCK(suspend_lock); #define TZ_V2_INIT_CA_ID_64 0xC #define TZ_V2_UPDATE_WITH_CA_ID_64 0xD +#define TZ_DCVS_TUNING_ID 0xE + #define TAG "msm_adreno_tz: " static u64 suspend_time; From 35c592db9043ed6b8ccb934220ca60040da363e5 Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Tue, 26 Nov 2024 22:19:43 +0530 Subject: [PATCH 0954/1016] msm: kgsl: Enable IFPC feature for gen_8_6_0 Enable IFPC feature for GPU power saving. Change-Id: I5d80b2aa9d12b1770d92801ec9167e8083efca25 Signed-off-by: SIVA MULLATI --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 1947baff1d..b263d7402d 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3204,7 +3204,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .compatible = "qcom,adreno-gpu-gen8-6-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | - ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE, + ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_IFPC, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, From b41c20fc6e88ba9cd578d5ea0e449350ba3d8093 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Thu, 7 Nov 2024 11:34:00 -0800 Subject: [PATCH 0955/1016] kgsl: gen7: Fix GBIF CX part of register programming Currently, the GBIF CX part of the register is programmed after GMU initialization. This commit fixes the issue by ensuring all GBIF CX registers are programmed before the GMU boot, allowing for successful GMU initialization. Change-Id: I4c5b129d817fa176134375e13c384b0cad5363b4 Signed-off-by: Hareesh Gundu --- adreno-gpulist.h | 53 ++++++++++++++++++++++------------------------- adreno_gen7.c | 14 +++---------- adreno_gen7_gmu.c | 12 +++++++++++ 3 files changed, 40 insertions(+), 39 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index b7e2e068b5..aa1bab3d1a 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1824,12 +1824,11 @@ static const struct adreno_a6xx_core adreno_gpu_core_gen6_3_26_0 = { extern const struct gen7_snapshot_block_list gen7_0_0_snapshot_block_list; -static const struct kgsl_regmap_list gen7_0_0_gbif_regs[] = { +static const struct kgsl_regmap_list gen7_0_0_gbif_cx_regs[] = { { GEN7_GBIF_QSB_SIDE0, 0x00071620 }, { GEN7_GBIF_QSB_SIDE1, 0x00071620 }, { GEN7_GBIF_QSB_SIDE2, 0x00071620 }, { GEN7_GBIF_QSB_SIDE3, 0x00071620 }, - { GEN7_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212 }, }; static const struct kgsl_regmap_list a702_hwcg_regs[] = { @@ -2051,8 +2050,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_0 = { .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), .ao_hwcg = gen7_0_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), - .gbif = gen7_0_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .gbif = gen7_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 16, @@ -2087,8 +2086,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_1 = { .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), .ao_hwcg = gen7_0_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), - .gbif = gen7_0_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .gbif = gen7_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 16, @@ -2101,23 +2100,21 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_1 = { extern const struct gen7_snapshot_block_list gen7_2_0_snapshot_block_list; extern const struct gen7_snapshot_block_list gen7_6_0_snapshot_block_list; -static const struct kgsl_regmap_list gen7_2_0_gbif_regs[] = { +static const struct kgsl_regmap_list gen7_2_0_gbif_cx_regs[] = { { GEN7_GBIF_QSB_SIDE0, 0x00071620 }, { GEN7_GBIF_QSB_SIDE1, 0x00071620 }, { GEN7_GBIF_QSB_SIDE2, 0x00071620 }, { GEN7_GBIF_QSB_SIDE3, 0x00071620 }, - { GEN7_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212 }, { GEN7_GMU_CX_MRC_GBIF_QOS_CTRL, 0x33 }, }; extern const struct gen7_snapshot_block_list gen7_3_0_snapshot_block_list; -static const struct kgsl_regmap_list gen7_3_0_gbif_regs[] = { +static const struct kgsl_regmap_list gen7_3_0_gbif_cx_regs[] = { { GEN7_GBIF_QSB_SIDE0, 0x00071620 }, { GEN7_GBIF_QSB_SIDE1, 0x00071620 }, { GEN7_GBIF_QSB_SIDE2, 0x00071620 }, { GEN7_GBIF_QSB_SIDE3, 0x00071620 }, - { GEN7_RBBM_GBIF_CLIENT_QOS_CNTL, 0x00000003 }, }; static const u32 gen7_6_0_gbif_client_qos_values[KGSL_PRIORITY_MAX_RB_LEVELS] = { @@ -2263,8 +2260,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_2_0 = { .hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs), .ao_hwcg = gen7_2_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), - .gbif = gen7_2_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs), + .gbif = gen7_2_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 16, @@ -2300,8 +2297,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_2_1 = { .hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs), .ao_hwcg = gen7_2_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), - .gbif = gen7_2_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs), + .gbif = gen7_2_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 16, @@ -2336,8 +2333,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_3_0 = { .hwcg_count = ARRAY_SIZE(gen7_3_0_hwcg_regs), .ao_hwcg = gen7_0_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), - .gbif = gen7_3_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_3_0_gbif_regs), + .gbif = gen7_3_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_3_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 15, @@ -2371,8 +2368,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_4_0 = { .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), .ao_hwcg = gen7_0_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), - .gbif = gen7_0_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .gbif = gen7_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 16, @@ -2406,8 +2403,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_6_0 = { .hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs), .ao_hwcg = gen7_2_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), - .gbif = gen7_2_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs), + .gbif = gen7_2_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 16, @@ -2498,8 +2495,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_9_0 = { .zap_name = "gen70900_zap.mbn", .ao_hwcg = gen7_2_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), - .gbif = gen7_2_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs), + .gbif = gen7_2_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_9_0_protected_regs, .highest_bank_bit = 16, @@ -2537,8 +2534,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_9_1 = { .zap_name = "gen70900_zap.mbn", .ao_hwcg = gen7_2_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), - .gbif = gen7_0_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .gbif = gen7_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_9_0_protected_regs, .highest_bank_bit = 16, @@ -2575,8 +2572,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_14_0 = { .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), .ao_hwcg = gen7_0_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), - .gbif = gen7_0_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .gbif = gen7_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 15, @@ -2611,8 +2608,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_11_0 = { .hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs), .ao_hwcg = gen7_2_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), - .gbif = gen7_2_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs), + .gbif = gen7_2_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 16, diff --git a/adreno_gen7.c b/adreno_gen7.c index 098fa012b6..57b8571ecd 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -792,10 +792,9 @@ int gen7_start(struct adreno_device *adreno_dev) struct cpu_gpu_lock *pwrup_lock = adreno_dev->pwrup_reglist->hostptr; u64 uche_trap_base = gen7_get_uche_trap_base(); - /* Set up GBIF registers from the GPU core definition */ - kgsl_regmap_multi_write(&device->regmap, gen7_core->gbif, - gen7_core->gbif_count); - + /* Set up GX GBIF registers */ + kgsl_regwrite(device, GEN7_RBBM_GBIF_CLIENT_QOS_CNTL, + (adreno_is_gen7_3_0(adreno_dev)) ? 0x00000003 : 0x2120212); kgsl_regwrite(device, GEN7_UCHE_GBIF_GX_CONFIG, 0x10240e0); /* Make all blocks contribute to the GPU BUSY perf counter */ @@ -962,13 +961,6 @@ int gen7_start(struct adreno_device *adreno_dev) if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) kgsl_regwrite(device, GEN7_CP_AQE_APRIV_CNTL, BIT(0)); - if (adreno_is_gen7_9_x(adreno_dev)) - kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), - FIELD_PREP(GENMASK(31, 29), 1)); - else if (adreno_is_gen7_14_0(adreno_dev)) - kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), - FIELD_PREP(GENMASK(31, 29), 2)); - /* * CP Icache prefetch brings no benefit on few gen7 variants because of * the prefetch granularity size. diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 6bffb5cce4..5ac7026f33 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -970,6 +970,7 @@ void gen7_gmu_register_config(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev); u32 val; /* Clear any previously set cm3 fault */ @@ -998,6 +999,17 @@ void gen7_gmu_register_config(struct adreno_device *adreno_dev) */ gmu_core_regwrite(device, GEN7_GMU_CM3_CFG, 0x4052); + /* Set up GBIF registers from the GPU core definition */ + kgsl_regmap_multi_write(&device->regmap, gen7_core->gbif, + gen7_core->gbif_count); + + if (adreno_is_gen7_9_x(adreno_dev)) + kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), + FIELD_PREP(GENMASK(31, 29), 1)); + else if (adreno_is_gen7_14_0(adreno_dev)) + kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), + FIELD_PREP(GENMASK(31, 29), 2)); + /** * We may have asserted gbif halt as part of reset sequence which may * not get cleared if the gdsc was not reset. So clear it before From 351aac0866d3dae5f51af9c4fa2bc5c07f79dbe3 Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Thu, 7 Nov 2024 17:23:40 +0530 Subject: [PATCH 0956/1016] kgsl: gen7: Add support for Gen7_17_0 GPU Add support for Gen7_17_0 GPU. Change-Id: I8863e9ff5494b5ef12c87297b0242bb0d9720016 Signed-off-by: Harshitha Sai Neelati --- adreno-gpulist.h | 30 ++++++++++++++++++++++++++++++ adreno.c | 4 ++-- adreno.h | 11 +++++++++-- adreno_gen7_gmu.c | 2 +- adreno_gen7_snapshot.c | 6 +++--- 5 files changed, 45 insertions(+), 8 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index aa1bab3d1a..3611b87c71 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2619,6 +2619,35 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_11_0 = { .fast_bus_hint = true, }; +static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN7_17_0, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen7-17-0", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION, + .gpudev = &adreno_gen7_gmu_gpudev.base, + .perfcounters = &adreno_gen7_no_cb_perfcounters, + .uche_gmem_alignment = SZ_16M, + .gmem_size = SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_4M, + }, + .sqefw_name = "gen70e00_sqe.fw", + .gmufw_name = "gmu_gen70e00.bin", + .zap_name = "gen70e00_zap.mbn", + .hwcg = gen7_0_0_hwcg_regs, + .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .ao_hwcg = gen7_0_0_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), + .gbif = gen7_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_cx_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen7_0_0_protected_regs, + .highest_bank_bit = 15, + .gen7_snapshot_block_list = &gen7_14_0_snapshot_block_list, + .ctxt_record_size = 1536 * 1024 +}; + static const struct kgsl_regmap_list a663_hwcg_regs[] = { {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, @@ -3281,6 +3310,7 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen7_9_1.base, &adreno_gpu_core_gen7_14_0.base, &adreno_gpu_core_gen7_11_0.base, + &adreno_gpu_core_gen7_17_0.base, &adreno_gpu_core_gen8_0_0.base, &adreno_gpu_core_gen8_0_1.base, &adreno_gpu_core_gen8_3_0.base, diff --git a/adreno.c b/adreno.c index 109e9c5a29..5cdf0cd00b 100644 --- a/adreno.c +++ b/adreno.c @@ -1405,12 +1405,12 @@ int adreno_device_probe(struct platform_device *pdev, /* * Force no write allocate for A5x, A6x and all gen7 targets - * except gen_7_9_x and gen_7_14_0. gen_7_9_x and gen_7_14_0 + * except gen_7_9_x and gen_7_14_0_family. gen_7_9_x and gen_7_14_0_family * use write allocate. */ if (adreno_is_a5xx(adreno_dev) || adreno_is_a6xx(adreno_dev) || (adreno_is_gen7(adreno_dev) && !adreno_is_gen7_9_x(adreno_dev) && - !adreno_is_gen7_14_0(adreno_dev))) + !adreno_is_gen7_14_0_family(adreno_dev))) kgsl_mmu_set_feature(device, KGSL_MMU_FORCE_LLCC_NWA); /* Bind the components before doing the KGSL platform probe. */ diff --git a/adreno.h b/adreno.h index 169b16b8d7..6cd9661643 100644 --- a/adreno.h +++ b/adreno.h @@ -251,6 +251,7 @@ enum adreno_gpurev { ADRENO_REV_GEN7_9_1 = ADRENO_GPUREV_VALUE(7, 9, 1), ADRENO_REV_GEN7_14_0 = ADRENO_GPUREV_VALUE(7, 14, 0), ADRENO_REV_GEN7_11_0 = ADRENO_GPUREV_VALUE(7, 11, 0), + ADRENO_REV_GEN7_17_0 = ADRENO_GPUREV_VALUE(7, 17, 0), ADRENO_REV_GEN8_0_0 = ADRENO_GPUREV_VALUE(8, 0, 0), ADRENO_REV_GEN8_0_1 = ADRENO_GPUREV_VALUE(8, 0, 1), ADRENO_REV_GEN8_3_0 = ADRENO_GPUREV_VALUE(8, 3, 0), @@ -1281,6 +1282,7 @@ ADRENO_TARGET(gen7_9_0, ADRENO_REV_GEN7_9_0) ADRENO_TARGET(gen7_9_1, ADRENO_REV_GEN7_9_1) ADRENO_TARGET(gen7_14_0, ADRENO_REV_GEN7_14_0) ADRENO_TARGET(gen7_11_0, ADRENO_REV_GEN7_11_0) +ADRENO_TARGET(gen7_17_0, ADRENO_REV_GEN7_17_0) ADRENO_TARGET(gen8_0_0, ADRENO_REV_GEN8_0_0) ADRENO_TARGET(gen8_0_1, ADRENO_REV_GEN8_0_1) ADRENO_TARGET(gen8_3_0, ADRENO_REV_GEN8_3_0) @@ -1298,11 +1300,16 @@ static inline int adreno_is_gen7_0_x_family(struct adreno_device *adreno_dev) adreno_is_gen7_4_0(adreno_dev) || adreno_is_gen7_3_0(adreno_dev); } +static inline int adreno_is_gen7_14_0_family(struct adreno_device *adreno_dev) +{ + return adreno_is_gen7_14_0(adreno_dev) || adreno_is_gen7_17_0(adreno_dev); +} + static inline int adreno_is_gen7_2_x_family(struct adreno_device *adreno_dev) { return adreno_is_gen7_2_0(adreno_dev) || adreno_is_gen7_2_1(adreno_dev) || adreno_is_gen7_6_0(adreno_dev) || adreno_is_gen7_9_x(adreno_dev) || - adreno_is_gen7_14_0(adreno_dev) || adreno_is_gen7_11_0(adreno_dev); + adreno_is_gen7_14_0_family(adreno_dev) || adreno_is_gen7_11_0(adreno_dev); } static inline int adreno_is_gen8_0_x_family(struct adreno_device *adreno_dev) @@ -1314,7 +1321,7 @@ static inline int adreno_is_gen8_0_x_family(struct adreno_device *adreno_dev) /* Gen7 targets which does not support concurrent binning */ static inline int adreno_is_gen7_no_cb_family(struct adreno_device *adreno_dev) { - return adreno_is_gen7_14_0(adreno_dev) || adreno_is_gen7_3_0(adreno_dev); + return adreno_is_gen7_14_0_family(adreno_dev) || adreno_is_gen7_3_0(adreno_dev); } /* diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 5ac7026f33..76e2469adc 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1006,7 +1006,7 @@ void gen7_gmu_register_config(struct adreno_device *adreno_dev) if (adreno_is_gen7_9_x(adreno_dev)) kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), FIELD_PREP(GENMASK(31, 29), 1)); - else if (adreno_is_gen7_14_0(adreno_dev)) + else if (adreno_is_gen7_14_0_family(adreno_dev)) kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), FIELD_PREP(GENMASK(31, 29), 2)); diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 1d4b63bd85..2f2123f255 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -380,7 +380,7 @@ static size_t gen7_legacy_snapshot_shader(struct kgsl_device *device, * AHB path might fail. Hence, skip SP_INST_TAG and SP_INST_DATA* * state types during snapshot dump in legacy flow. */ - if (adreno_is_gen7_0_x_family(adreno_dev) || adreno_is_gen7_14_0(adreno_dev)) { + if (adreno_is_gen7_0_x_family(adreno_dev) || adreno_is_gen7_14_0_family(adreno_dev)) { if (block->statetype == SP_INST_TAG || block->statetype == SP_INST_DATA || block->statetype == SP_INST_DATA_1 || @@ -716,7 +716,7 @@ static void gen7_snapshot_shader(struct kgsl_device *device, void *priv) = gen7_legacy_snapshot_shader; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - if (adreno_is_gen7_0_x_family(adreno_dev)) + if (adreno_is_gen7_0_x_family(adreno_dev) || adreno_is_gen7_14_0_family(adreno_dev)) kgsl_regrmw(device, GEN7_SP_DBG_CNTL, GENMASK(1, 0), 3); if (CD_SCRIPT_CHECK(device)) { @@ -786,7 +786,7 @@ static void gen7_snapshot_shader(struct kgsl_device *device, } done: - if (adreno_is_gen7_0_x_family(adreno_dev)) + if (adreno_is_gen7_0_x_family(adreno_dev) || adreno_is_gen7_14_0_family(adreno_dev)) kgsl_regrmw(device, GEN7_SP_DBG_CNTL, GENMASK(1, 0), 0x0); } From 7e868adcd9e8bbaf5115e6c64345cb6dcbfffc1c Mon Sep 17 00:00:00 2001 From: Kaushal Sanadhya Date: Tue, 17 Dec 2024 14:36:36 +0530 Subject: [PATCH 0957/1016] kgsl: gen7: Update GPU_CC_CX_CFG_GDSCR offset for Gen7_17_0 Update the dword offset of GPU_CC_CX_CFG_GDSCR register for Gen7_17_0 GPU. Change-Id: I96229ae07ab4c7ddd0db8566aa48bee0e2cb0f57 Signed-off-by: Kaushal Sanadhya --- adreno_gen7.c | 12 +++++++++--- gen7_reg.h | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/adreno_gen7.c b/adreno_gen7.c index 57b8571ecd..1d4318a437 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1763,6 +1763,7 @@ int gen7_probe_common(struct platform_device *pdev, { const struct adreno_gpudev *gpudev = gpucore->gpudev; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; const struct adreno_gen7_core *gen7_core = container_of(gpucore, struct adreno_gen7_core, base); int ret; @@ -1777,9 +1778,14 @@ int gen7_probe_common(struct platform_device *pdev, kgsl_pwrscale_fast_bus_hint(gen7_core->fast_bus_hint); - device->pwrctrl.rt_bus_hint = gen7_core->rt_bus_hint; - device->pwrctrl.cx_cfg_gdsc_offset = adreno_is_gen7_11_0(adreno_dev) ? - GEN7_11_0_GPU_CC_CX_CFG_GDSCR : GEN7_GPU_CC_CX_CFG_GDSCR; + pwr->rt_bus_hint = gen7_core->rt_bus_hint; + + if (adreno_is_gen7_11_0(adreno_dev)) + pwr->cx_cfg_gdsc_offset = GEN7_11_0_GPU_CC_CX_CFG_GDSCR; + else if (adreno_is_gen7_17_0(adreno_dev)) + pwr->cx_cfg_gdsc_offset = GEN7_17_0_GPU_CC_CX_CFG_GDSCR; + else + pwr->cx_cfg_gdsc_offset = GEN7_GPU_CC_CX_CFG_GDSCR; ret = adreno_device_probe(pdev, adreno_dev); if (ret) diff --git a/gen7_reg.h b/gen7_reg.h index dc74e33c0d..ddd9506d9c 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -1251,6 +1251,7 @@ /* GPUCC registers */ #define GEN7_11_0_GPU_CC_CX_CFG_GDSCR 0x26424 #define GEN7_GPU_CC_CX_CFG_GDSCR 0x26443 +#define GEN7_17_0_GPU_CC_CX_CFG_GDSCR 0x26445 #define GEN7_GPU_CC_GX_DOMAIN_MISC3 0x26541 /* GPU RSC sequencer registers */ From 7ad3d19bf7e5c5af7b44190ca1562ef76a94f769 Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Thu, 19 Dec 2024 11:44:55 +0530 Subject: [PATCH 0958/1016] kgsl: gen8: Update snapshot headers for gen8_6_0 Update shader block for gen8_6_0 in snapshot section and dump few registers through AHB path. Change-Id: If6b39b538dca5d8c2dae1b2c0b0209bf50d1f513 Signed-off-by: SIVA MULLATI Signed-off-by: Gayathri Veeragandam --- adreno_gen8_6_0_snapshot.h | 137 +++++++++++++++++++++++++++++++++---- adreno_gen8_snapshot.c | 6 +- 2 files changed, 126 insertions(+), 17 deletions(-) diff --git a/adreno_gen8_6_0_snapshot.h b/adreno_gen8_6_0_snapshot.h index be85fe01df..be719ea5e8 100644 --- a/adreno_gen8_6_0_snapshot.h +++ b/adreno_gen8_6_0_snapshot.h @@ -9,10 +9,6 @@ #include "adreno_gen8_0_0_snapshot.h" static const u32 gen8_6_0_debugbus_blocks[] = { - DEBUGBUS_GBIF_CX_GC_US_I_0, - DEBUGBUS_GMU_CX_GC_US_I_0, - DEBUGBUS_CX_GC_US_I_0, - DEBUGBUS_GBIF_GX_GC_US_I_0, DEBUGBUS_GMU_GX_GC_US_I_0, DEBUGBUS_DBGC_GC_US_I_0, DEBUGBUS_RBBM_GC_US_I_0, @@ -124,16 +120,118 @@ static const u32 gen8_6_0_debugbus_blocks[] = { DEBUGBUS_VFD_GC_S_1_I_1, }; +static struct gen8_shader_block gen8_6_0_shader_blocks[] = { + { TP0_TMO_DATA, 0x0200, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { TP0_SMO_DATA, 0x0080, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { TP0_MIPMAP_BASE_DATA, 0x0080, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_DATA_3, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_DATA_1, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_0_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_1_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_2_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_3_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_4_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_5_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_6_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_7_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_CB_RAM, 0x0390, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_13_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_14_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_TAG, 0x0100, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_INST_DATA_2, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_TMO_TAG, 0x0080, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_SMO_TAG, 0x0080, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_STATE_DATA, 0x0040, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_HWAVE_RAM, 0x0200, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_L0_INST_BUF, 0x0080, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_8_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_9_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_10_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_11_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { SP_LB_12_DATA, 0x0800, 2, 2, PIPE_BR, USPTP, SLICE, 1}, + { HLSQ_DATAPATH_DSTR_META, 0x0600, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DATAPATH_DSTR_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DESC_REMAP_META, 0x00A0, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DESC_REMAP_META, 0x000C, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DESC_REMAP_META, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_TOP_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_L2STC_TAG_RAM, 0x0200, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_L2STC_INFO_CMD, 0x0474, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x0100, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CPS_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CPS_RAM, 0x0180, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM, 0x00B0, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM, 0x0200, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_MISC_RAM_TAG, 0x0060, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CVS_MISC_RAM_TAG, 0x0060, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM_TAG, 0x0600, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_CPS_MISC_RAM_TAG, 0x0012, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_TAG, 0x0004, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0060, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x03C0, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x0280, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM, 0x0050, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0014, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x000C, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_STPROC_META, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_BACKEND_META, 0x0188, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_BACKEND_META, 0x0188, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INST_RAM_2, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_DATAPATH_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_INDIRECT_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_BACKEND_META, 0x0034, 1, 1, PIPE_BR, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_BACKEND_META, 0x0034, 1, 1, PIPE_BV, HLSQ_STATE, UNSLICE, 1}, + { HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, UNSLICE, 1}, +}; + /* * Block : ['BROADCAST', 'GRAS', 'PC'] * Block : ['RBBM', 'RDVM', 'UCHE'] * Block : ['VFD', 'VPC', 'VSC'] * REGION : UNSLICE * Pipeline: PIPE_NONE - * pairs : 121 (Regs:1162) + * pairs : 118 (Regs:1153) */ static const u32 gen8_6_0_gpu_registers[] = { - 0x00000, 0x00000, 0x00002, 0x00002, 0x00008, 0x0000d, 0x00010, 0x00013, + 0x00000, 0x00000, 0x00002, 0x00002, 0x00008, 0x0000d, 0x00010, 0x00011, 0x00015, 0x00016, 0x00018, 0x00018, 0x0001a, 0x0001a, 0x0001c, 0x0001c, 0x0001e, 0x0001e, 0x00028, 0x0002b, 0x0002d, 0x00039, 0x00040, 0x00053, 0x00062, 0x00066, 0x00069, 0x0006e, 0x00071, 0x00072, 0x00074, 0x00074, @@ -162,14 +260,29 @@ static const u32 gen8_6_0_gpu_registers[] = { 0x0ed47, 0x0ed47, 0x0ed4a, 0x0ed4a, 0x0ed52, 0x0ed52, 0x0ed66, 0x0ed68, 0x0ed6b, 0x0ed6d, 0x0ed6f, 0x0ed6f, 0x0ed80, 0x0ed81, 0x0ed85, 0x0ed85, 0x0ed87, 0x0ed87, 0x0ed8a, 0x0ed8a, 0x0ed92, 0x0ed92, 0x0eda6, 0x0eda8, - 0x0edab, 0x0edad, 0x0edaf, 0x0edaf, 0x0f400, 0x0f400, 0x0f800, 0x0f803, - 0x0fc00, 0x0fc01, + 0x0edab, 0x0edad, 0x0edaf, 0x0edaf, UINT_MAX, UINT_MAX, }; static_assert(IS_ALIGNED(sizeof(gen8_6_0_gpu_registers), 8)); /* - * Block : ['GMUAO', 'GMUCX', 'GMUCX_RAM'] + * Block : ['BROADCAST', 'GRAS', 'PC'] + * Block : ['RBBM', 'RDVM', 'UCHE'] + * Block : ['VFD', 'VPC', 'VSC'] + * REGION : SLICE + * Pipeline: PIPE_NONE + * pairs : 12 (Regs:86) + */ +static const u32 gen8_6_0_gpu_slice_registers[] = { + 0x00500, 0x00500, 0x00583, 0x00584, 0x00586, 0x0058b, 0x0058f, 0x00599, + 0x005a0, 0x005b3, 0x005c0, 0x005c0, 0x005c2, 0x005c6, 0x005e0, 0x005e3, + 0x005ec, 0x005ec, 0x00f01, 0x00f02, 0x00f04, 0x00f0c, 0x00f20, 0x00f37, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen8_6_0_gpu_slice_registers), 8)); + +/* + * Block : ['GMUGX'] * REGION : UNSLICE * Pipeline: PIPE_NONE * pairs : 160 (Regs:616) @@ -359,17 +472,13 @@ static struct gen8_cluster_registers gen8_6_0_mvc_clusters[] = { static struct gen8_reg_list gen8_6_0_reg_list[] = { { UNSLICE, gen8_6_0_gpu_registers }, - { SLICE, gen8_0_0_gpu_slice_registers }, + { SLICE, gen8_6_0_gpu_slice_registers }, { UNSLICE, gen8_0_0_dbgc_registers }, { SLICE, gen8_0_0_dbgc_slice_registers }, { UNSLICE, gen8_0_0_cx_dbgc_registers }, { UNSLICE, NULL}, }; -static struct gen8_reg_list gen8_6_0_ahb_registers[] = { - { UNSLICE, gen8_0_0_gbif_registers }, -}; - /* * Block : ['GPU_CC_GPU_CC_REG'] * REGION : UNSLICE diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index acc4ffbf7f..19edbb11c1 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -82,8 +82,8 @@ const struct gen8_snapshot_block_list gen8_3_0_snapshot_block_list = { }; const struct gen8_snapshot_block_list gen8_6_0_snapshot_block_list = { - .pre_crashdumper_regs = gen8_6_0_ahb_registers, - .num_pre_crashdumper_regs = ARRAY_SIZE(gen8_6_0_ahb_registers), + .pre_crashdumper_regs = gen8_0_0_ahb_registers, + .num_pre_crashdumper_regs = ARRAY_SIZE(gen8_0_0_ahb_registers), .debugbus_blocks = gen8_6_0_debugbus_blocks, .debugbus_blocks_len = ARRAY_SIZE(gen8_6_0_debugbus_blocks), .gbif_debugbus_blocks = gen8_gbif_debugbus_blocks, @@ -98,7 +98,7 @@ const struct gen8_snapshot_block_list gen8_6_0_snapshot_block_list = { .rscc_regs = gen8_0_0_rscc_rsc_registers, .reg_list = gen8_6_0_reg_list, .cx_misc_regs = gen8_0_0_cx_misc_registers, - .shader_blocks = gen8_0_0_shader_blocks, + .shader_blocks = gen8_6_0_shader_blocks, .num_shader_blocks = ARRAY_SIZE(gen8_0_0_shader_blocks), .cp_clusters = gen8_0_0_cp_clusters, .num_cp_clusters = ARRAY_SIZE(gen8_0_0_cp_clusters), From 83e9bb03bb538f84f59bc5e85d2e45b31497ffde Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Tue, 17 Dec 2024 07:39:39 +0530 Subject: [PATCH 0959/1016] kgsl: gen8: Update noncontext register list for gen8_6_0 Update noncontext register list for gen8_6_0 as per the latest recommendation. Change-Id: I237d026d27c74869c23d9ca042965e7659635f91 Signed-off-by: SIVA MULLATI Signed-off-by: Gayathri Veeragandam --- adreno-gpulist.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index b7e2e068b5..af33306e31 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3195,6 +3195,69 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_3_0 = { .ctxt_record_size = (4558 * SZ_1K), }; +/* GEN8_6_0 noncontext register list */ +static const struct gen8_nonctxt_regs gen8_6_0_nonctxt_regs[] = { + { GEN8_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) }, + { GEN8_GRAS_DBG_ECO_CNTL, 0x00000800, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_AUTO_VERTEX_STRIDE, 0x00000001, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_VIS_STREAM_CNTL, 0x10010000, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1, 0x00000002, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_CHICKEN_BITS_1, 0x00000003, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_CHICKEN_BITS_2, 0x00000200, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_CHICKEN_BITS_3, 0x00500000, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_PC_CHICKEN_BITS_4, 0x00500050, BIT(PIPE_BR) | BIT(PIPE_BV) }, + /* Configure GBIF GX registers */ + { GEN8_UCHE_GBIF_GX_CONFIG, 0x010240e0, BIT(PIPE_NONE) }, + { GEN8_RBBM_GBIF_CLIENT_QOS_CNTL, 0x22122212, BIT(PIPE_NONE) }, + /* Enable full concurrent resolve and unresolves */ + { GEN8_RB_CCU_CNTL, 0x00000068, BIT(PIPE_BR) }, + { GEN8_RB_GC_GMEM_PROTECT, 0x02600000, BIT(PIPE_BR) }, + /* Configure number of outstanding transactions to 32 */ + { GEN8_RB_RESOLVE_PREFETCH_CNTL, 0x00000007, BIT(PIPE_BR) }, + /* Configure UCHE request time out to 16 cycles for CCU/UCHE arbitration */ + { GEN8_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) }, + { GEN8_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + { GEN8_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + { GEN8_RBBM_WAIT_IDLE_CLOCKS_CNTL, 0x00000030, BIT(PIPE_NONE) }, + { GEN8_RBBM_WAIT_IDLE_CLOCKS_CNTL2, 0x00000030, BIT(PIPE_NONE) }, + { GEN8_RBBM_CGC_P2S_CNTL, 0x00000040, BIT(PIPE_NONE) }, + /* Enable contribution of all shader stages to SP perfcounters */ + { GEN8_SP_PERFCTR_SHADER_MASK, 0x0000003f, BIT(PIPE_NONE) }, + /* + * BIT(26): Limit the number of wave-slots for Eviction buffer to 1 per ALU GRP + * BIT(30): Disable LPAC auto-promotion + */ + { GEN8_SP_CHICKEN_BITS_1, BIT(26) | BIT(30), BIT(PIPE_NONE) }, + /* + * BIT(22): Disable PS out of order retire + * BIT(23): Enable half wave mode and MM instruction src&dst is half precision + */ + { GEN8_SP_CHICKEN_BITS_2, BIT(22) | BIT(23), BIT(PIPE_NONE) }, + { GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, + { GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, + { GEN8_TPL1_DBG_ECO_CNTL, 0x10000000, BIT(PIPE_NONE) }, + /* Enable cubemap small miplevel optimization settings */ + { GEN8_TPL1_DBG_ECO_CNTL1, 0x00000724, BIT(PIPE_NONE) }, + /* Disable tag bank id hashing */ + { GEN8_UCHE_MODE_CNTL, 0x00080000, BIT(PIPE_NONE) }, + { GEN8_UCHE_CCHE_MODE_CNTL, 0x00001000, BIT(PIPE_NONE) }, + /* Limit gmem number of ways for GMEM requests in each set */ + { GEN8_UCHE_CCHE_CACHE_WAYS, 0x00000800, BIT(PIPE_NONE)}, + /* Disable write slow pointer in data phase queue */ + { GEN8_UCHE_HW_DBG_CNTL, BIT(8), BIT(PIPE_NONE) }, + /* Configure UCHE to CCU switchthreshold timeout cycles */ + { GEN8_UCHE_VARB_IDLE_TIMEOUT, 0x00000020, BIT(PIPE_NONE) }, + { GEN8_VFD_DBG_ECO_CNTL, 0x00008000, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VFD_CB_BV_THRESHOLD, 0x00500050, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VFD_CB_BR_THRESHOLD, 0x00600060, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VFD_CB_BUSY_REQ_CNT, 0x00200020, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VFD_CB_LP_REQ_CNT, 0x00000020, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { GEN8_VPC_FLATSHADE_MODE_CNTL, 0x00000001, BIT(PIPE_BR) | BIT(PIPE_BV) }, + /* Disable redundant tile data optimization */ + { GEN8_VSC_KMD_DBG_ECO_CNTL, BIT(11), BIT(PIPE_NONE)}, + { 0 }, +}; + extern const struct gen8_snapshot_block_list gen8_6_0_snapshot_block_list; static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { @@ -3223,7 +3286,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .gbif_count = ARRAY_SIZE(gen8_3_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen8_0_0_protected_regs, - .nonctxt_regs = gen8_0_0_nonctxt_regs, + .nonctxt_regs = gen8_6_0_nonctxt_regs, .highest_bank_bit = 16, .gmu_hub_clk_freq = 200000000, .gen8_snapshot_block_list = &gen8_6_0_snapshot_block_list, From b4cd88aa52a9c4bb59e319ebc0c19cf9206b62bb Mon Sep 17 00:00:00 2001 From: Siva Srinivas Venigalla Date: Wed, 11 Dec 2024 20:51:27 +0530 Subject: [PATCH 0960/1016] kgsl: gen8: Fix MX vote in setup_dependency_domain_tbl When the available voltage corner exceeds the highest possible MX voltage corner, mx_vote may remain uninitialized, resulting in incorrect voting. This fix ensures that MX vote is set to the highest possible MX voltage corner in such cases. Change-Id: I1dbe34ea752c610b6b5233b5779dea23dc66fa7f Signed-off-by: Siva Srinivas Venigalla --- adreno_gen8_rpmh.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/adreno_gen8_rpmh.c b/adreno_gen8_rpmh.c index 6392799aa2..a56f60b7a5 100644 --- a/adreno_gen8_rpmh.c +++ b/adreno_gen8_rpmh.c @@ -63,7 +63,7 @@ static int setup_dependency_domain_tbl(u32 *votes, struct rpmh_arc_vals *dep_rail, struct rpmh_arc_vals *cx_rail, u16 *vlvl, u32 *cx_vlvl, u32 num_entries) { - u32 cx_vote, mx_vote; + u32 cx_vote; int i, j; for (i = 1; i < num_entries; i++) { @@ -90,25 +90,16 @@ static int setup_dependency_domain_tbl(u32 *votes, } /* - * Set Mx dependency domain votes for Gx level. Look for indexes + * Set MX dependency domain votes for GX level. Look for indexes * whose vlvl value is greater than or equal to the vlvl value * of the corresponding index of dependency rail */ for (j = 0; j < dep_rail->num; j++) { - if (dep_rail->val[j] >= vlvl[i]) { - mx_vote = j; - found_match = true; + if (dep_rail->val[j] >= vlvl[i] || j+1 == dep_rail->num) break; - } } - /* If we did not find a matching VLVL level then abort */ - if (!found_match) { - pr_err("kgsl: Unsupported mx corner: %u\n", vlvl[i]); - return -EINVAL; - } - - votes[i] = GEN8_DEP_VOTE_SET(cx_vote, mx_vote); + votes[i] = GEN8_DEP_VOTE_SET(cx_vote, j); } return 0; From 700e5dd7151eddbdcfc8a40fac53b1b45b4d17ee Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Fri, 20 Dec 2024 01:26:45 +0530 Subject: [PATCH 0961/1016] kgsl: gen7: Update GMU FW binary for Gen7_17_0 GPU Update the GMU firmware binary for Gen7_17_0 GPU. Change-Id: I1be94919871cefa2232d2b3505f05f822887ea7d Signed-off-by: Harshitha Sai Neelati --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 3611b87c71..391986b157 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2633,7 +2633,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { .snapshot_size = SZ_4M, }, .sqefw_name = "gen70e00_sqe.fw", - .gmufw_name = "gmu_gen70e00.bin", + .gmufw_name = "gen71700_gmu.bin", .zap_name = "gen70e00_zap.mbn", .hwcg = gen7_0_0_hwcg_regs, .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), From e0a570950e28a978695321f989a995f101c4de9f Mon Sep 17 00:00:00 2001 From: Kaushal Sanadhya Date: Tue, 17 Dec 2024 14:36:36 +0530 Subject: [PATCH 0962/1016] kgsl: gen7: Update GPU_CC_CX_CFG_GDSCR offset for Gen7_17_0 Update the dword offset of GPU_CC_CX_CFG_GDSCR register for Gen7_17_0 GPU. Change-Id: I96229ae07ab4c7ddd0db8566aa48bee0e2cb0f57 Signed-off-by: Kaushal Sanadhya --- adreno_gen7.c | 12 +++++++++--- gen7_reg.h | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/adreno_gen7.c b/adreno_gen7.c index 7a8d3aae74..3811cd4f92 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1764,6 +1764,7 @@ int gen7_probe_common(struct platform_device *pdev, { const struct adreno_gpudev *gpudev = gpucore->gpudev; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; const struct adreno_gen7_core *gen7_core = container_of(gpucore, struct adreno_gen7_core, base); int ret; @@ -1778,9 +1779,14 @@ int gen7_probe_common(struct platform_device *pdev, kgsl_pwrscale_fast_bus_hint(gen7_core->fast_bus_hint); - device->pwrctrl.rt_bus_hint = gen7_core->rt_bus_hint; - device->pwrctrl.cx_cfg_gdsc_offset = adreno_is_gen7_11_0(adreno_dev) ? - GEN7_11_0_GPU_CC_CX_CFG_GDSCR : GEN7_GPU_CC_CX_CFG_GDSCR; + pwr->rt_bus_hint = gen7_core->rt_bus_hint; + + if (adreno_is_gen7_11_0(adreno_dev)) + pwr->cx_cfg_gdsc_offset = GEN7_11_0_GPU_CC_CX_CFG_GDSCR; + else if (adreno_is_gen7_17_0(adreno_dev)) + pwr->cx_cfg_gdsc_offset = GEN7_17_0_GPU_CC_CX_CFG_GDSCR; + else + pwr->cx_cfg_gdsc_offset = GEN7_GPU_CC_CX_CFG_GDSCR; ret = adreno_device_probe(pdev, adreno_dev); if (ret) diff --git a/gen7_reg.h b/gen7_reg.h index dc74e33c0d..ddd9506d9c 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -1251,6 +1251,7 @@ /* GPUCC registers */ #define GEN7_11_0_GPU_CC_CX_CFG_GDSCR 0x26424 #define GEN7_GPU_CC_CX_CFG_GDSCR 0x26443 +#define GEN7_17_0_GPU_CC_CX_CFG_GDSCR 0x26445 #define GEN7_GPU_CC_GX_DOMAIN_MISC3 0x26541 /* GPU RSC sequencer registers */ From 958f08dc090a5ee05189e9ef70af40dcba28519a Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Thu, 7 Nov 2024 11:34:00 -0800 Subject: [PATCH 0963/1016] kgsl: gen7: Fix GBIF CX part of register programming Currently, the GBIF CX part of the register is programmed after GMU initialization. This commit fixes the issue by ensuring all GBIF CX registers are programmed before the GMU boot, allowing for successful GMU initialization. Change-Id: I4c5b129d817fa176134375e13c384b0cad5363b4 Signed-off-by: Hareesh Gundu --- adreno-gpulist.h | 53 ++++++++++++++++++++++------------------------- adreno_gen7.c | 14 +++---------- adreno_gen7_gmu.c | 12 +++++++++++ 3 files changed, 40 insertions(+), 39 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index b263d7402d..4f8ba48cc1 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1824,12 +1824,11 @@ static const struct adreno_a6xx_core adreno_gpu_core_gen6_3_26_0 = { extern const struct gen7_snapshot_block_list gen7_0_0_snapshot_block_list; -static const struct kgsl_regmap_list gen7_0_0_gbif_regs[] = { +static const struct kgsl_regmap_list gen7_0_0_gbif_cx_regs[] = { { GEN7_GBIF_QSB_SIDE0, 0x00071620 }, { GEN7_GBIF_QSB_SIDE1, 0x00071620 }, { GEN7_GBIF_QSB_SIDE2, 0x00071620 }, { GEN7_GBIF_QSB_SIDE3, 0x00071620 }, - { GEN7_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212 }, }; static const struct kgsl_regmap_list a702_hwcg_regs[] = { @@ -2051,8 +2050,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_0 = { .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), .ao_hwcg = gen7_0_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), - .gbif = gen7_0_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .gbif = gen7_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 16, @@ -2087,8 +2086,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_1 = { .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), .ao_hwcg = gen7_0_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), - .gbif = gen7_0_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .gbif = gen7_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 16, @@ -2101,23 +2100,21 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_1 = { extern const struct gen7_snapshot_block_list gen7_2_0_snapshot_block_list; extern const struct gen7_snapshot_block_list gen7_6_0_snapshot_block_list; -static const struct kgsl_regmap_list gen7_2_0_gbif_regs[] = { +static const struct kgsl_regmap_list gen7_2_0_gbif_cx_regs[] = { { GEN7_GBIF_QSB_SIDE0, 0x00071620 }, { GEN7_GBIF_QSB_SIDE1, 0x00071620 }, { GEN7_GBIF_QSB_SIDE2, 0x00071620 }, { GEN7_GBIF_QSB_SIDE3, 0x00071620 }, - { GEN7_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212 }, { GEN7_GMU_CX_MRC_GBIF_QOS_CTRL, 0x33 }, }; extern const struct gen7_snapshot_block_list gen7_3_0_snapshot_block_list; -static const struct kgsl_regmap_list gen7_3_0_gbif_regs[] = { +static const struct kgsl_regmap_list gen7_3_0_gbif_cx_regs[] = { { GEN7_GBIF_QSB_SIDE0, 0x00071620 }, { GEN7_GBIF_QSB_SIDE1, 0x00071620 }, { GEN7_GBIF_QSB_SIDE2, 0x00071620 }, { GEN7_GBIF_QSB_SIDE3, 0x00071620 }, - { GEN7_RBBM_GBIF_CLIENT_QOS_CNTL, 0x00000003 }, }; static const u32 gen7_6_0_gbif_client_qos_values[KGSL_PRIORITY_MAX_RB_LEVELS] = { @@ -2263,8 +2260,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_2_0 = { .hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs), .ao_hwcg = gen7_2_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), - .gbif = gen7_2_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs), + .gbif = gen7_2_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 16, @@ -2300,8 +2297,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_2_1 = { .hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs), .ao_hwcg = gen7_2_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), - .gbif = gen7_2_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs), + .gbif = gen7_2_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 16, @@ -2336,8 +2333,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_3_0 = { .hwcg_count = ARRAY_SIZE(gen7_3_0_hwcg_regs), .ao_hwcg = gen7_0_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), - .gbif = gen7_3_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_3_0_gbif_regs), + .gbif = gen7_3_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_3_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 15, @@ -2371,8 +2368,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_4_0 = { .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), .ao_hwcg = gen7_0_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), - .gbif = gen7_0_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .gbif = gen7_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 16, @@ -2406,8 +2403,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_6_0 = { .hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs), .ao_hwcg = gen7_2_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), - .gbif = gen7_2_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs), + .gbif = gen7_2_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 16, @@ -2498,8 +2495,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_9_0 = { .zap_name = "gen70900_zap.mbn", .ao_hwcg = gen7_2_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), - .gbif = gen7_2_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs), + .gbif = gen7_2_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_9_0_protected_regs, .highest_bank_bit = 16, @@ -2537,8 +2534,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_9_1 = { .zap_name = "gen70900_zap.mbn", .ao_hwcg = gen7_2_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), - .gbif = gen7_0_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .gbif = gen7_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_9_0_protected_regs, .highest_bank_bit = 16, @@ -2575,8 +2572,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_14_0 = { .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), .ao_hwcg = gen7_0_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), - .gbif = gen7_0_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .gbif = gen7_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 15, @@ -2611,8 +2608,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_11_0 = { .hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs), .ao_hwcg = gen7_2_0_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), - .gbif = gen7_2_0_gbif_regs, - .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs), + .gbif = gen7_2_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_2_0_gbif_cx_regs), .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 16, diff --git a/adreno_gen7.c b/adreno_gen7.c index 3811cd4f92..90a589b97c 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -785,10 +785,9 @@ int gen7_start(struct adreno_device *adreno_dev) struct cpu_gpu_lock *pwrup_lock = adreno_dev->pwrup_reglist->hostptr; u64 uche_trap_base = gen7_get_uche_trap_base(); - /* Set up GBIF registers from the GPU core definition */ - kgsl_regmap_multi_write(&device->regmap, gen7_core->gbif, - gen7_core->gbif_count); - + /* Set up GX GBIF registers */ + kgsl_regwrite(device, GEN7_RBBM_GBIF_CLIENT_QOS_CNTL, + (adreno_is_gen7_3_0(adreno_dev)) ? 0x00000003 : 0x2120212); kgsl_regwrite(device, GEN7_UCHE_GBIF_GX_CONFIG, 0x10240e0); /* Make all blocks contribute to the GPU BUSY perf counter */ @@ -955,13 +954,6 @@ int gen7_start(struct adreno_device *adreno_dev) if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) kgsl_regwrite(device, GEN7_CP_AQE_APRIV_CNTL, BIT(0)); - if (adreno_is_gen7_9_x(adreno_dev)) - kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), - FIELD_PREP(GENMASK(31, 29), 1)); - else if (adreno_is_gen7_14_0(adreno_dev)) - kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), - FIELD_PREP(GENMASK(31, 29), 2)); - /* * CP Icache prefetch brings no benefit on few gen7 variants because of * the prefetch granularity size. diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 6bffb5cce4..5ac7026f33 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -970,6 +970,7 @@ void gen7_gmu_register_config(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev); u32 val; /* Clear any previously set cm3 fault */ @@ -998,6 +999,17 @@ void gen7_gmu_register_config(struct adreno_device *adreno_dev) */ gmu_core_regwrite(device, GEN7_GMU_CM3_CFG, 0x4052); + /* Set up GBIF registers from the GPU core definition */ + kgsl_regmap_multi_write(&device->regmap, gen7_core->gbif, + gen7_core->gbif_count); + + if (adreno_is_gen7_9_x(adreno_dev)) + kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), + FIELD_PREP(GENMASK(31, 29), 1)); + else if (adreno_is_gen7_14_0(adreno_dev)) + kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), + FIELD_PREP(GENMASK(31, 29), 2)); + /** * We may have asserted gbif halt as part of reset sequence which may * not get cleared if the gdsc was not reset. So clear it before From bfc5182b1930231451373ed1d385fa9c536b23a7 Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Thu, 7 Nov 2024 17:23:40 +0530 Subject: [PATCH 0964/1016] kgsl: gen7: Add support for Gen7_17_0 GPU Add support for Gen7_17_0 GPU. Change-Id: I8863e9ff5494b5ef12c87297b0242bb0d9720016 Signed-off-by: Harshitha Sai Neelati --- adreno-gpulist.h | 30 ++++++++++++++++++++++++++++++ adreno.c | 4 ++-- adreno.h | 11 +++++++++-- adreno_gen7_gmu.c | 2 +- adreno_gen7_snapshot.c | 6 +++--- 5 files changed, 45 insertions(+), 8 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 4f8ba48cc1..cb6166a2e7 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2619,6 +2619,35 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_11_0 = { .fast_bus_hint = true, }; +static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN7_17_0, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen7-17-0", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION, + .gpudev = &adreno_gen7_gmu_gpudev.base, + .perfcounters = &adreno_gen7_no_cb_perfcounters, + .uche_gmem_alignment = SZ_16M, + .gmem_size = SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_4M, + }, + .sqefw_name = "gen70e00_sqe.fw", + .gmufw_name = "gmu_gen70e00.bin", + .zap_name = "gen70e00_zap.mbn", + .hwcg = gen7_0_0_hwcg_regs, + .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .ao_hwcg = gen7_0_0_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), + .gbif = gen7_0_0_gbif_cx_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_cx_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen7_0_0_protected_regs, + .highest_bank_bit = 15, + .gen7_snapshot_block_list = &gen7_14_0_snapshot_block_list, + .ctxt_record_size = 1536 * 1024 +}; + static const struct kgsl_regmap_list a663_hwcg_regs[] = { {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, @@ -3281,6 +3310,7 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen7_9_1.base, &adreno_gpu_core_gen7_14_0.base, &adreno_gpu_core_gen7_11_0.base, + &adreno_gpu_core_gen7_17_0.base, &adreno_gpu_core_gen8_0_0.base, &adreno_gpu_core_gen8_0_1.base, &adreno_gpu_core_gen8_3_0.base, diff --git a/adreno.c b/adreno.c index c47ba6c136..3126503b5b 100644 --- a/adreno.c +++ b/adreno.c @@ -1406,12 +1406,12 @@ int adreno_device_probe(struct platform_device *pdev, /* * Force no write allocate for A5x, A6x and all gen7 targets - * except gen_7_9_x and gen_7_14_0. gen_7_9_x and gen_7_14_0 + * except gen_7_9_x and gen_7_14_0_family. gen_7_9_x and gen_7_14_0_family * use write allocate. */ if (adreno_is_a5xx(adreno_dev) || adreno_is_a6xx(adreno_dev) || (adreno_is_gen7(adreno_dev) && !adreno_is_gen7_9_x(adreno_dev) && - !adreno_is_gen7_14_0(adreno_dev))) + !adreno_is_gen7_14_0_family(adreno_dev))) kgsl_mmu_set_feature(device, KGSL_MMU_FORCE_LLCC_NWA); /* Bind the components before doing the KGSL platform probe. */ diff --git a/adreno.h b/adreno.h index 494443e8c1..99e47569fd 100644 --- a/adreno.h +++ b/adreno.h @@ -251,6 +251,7 @@ enum adreno_gpurev { ADRENO_REV_GEN7_9_1 = ADRENO_GPUREV_VALUE(7, 9, 1), ADRENO_REV_GEN7_14_0 = ADRENO_GPUREV_VALUE(7, 14, 0), ADRENO_REV_GEN7_11_0 = ADRENO_GPUREV_VALUE(7, 11, 0), + ADRENO_REV_GEN7_17_0 = ADRENO_GPUREV_VALUE(7, 17, 0), ADRENO_REV_GEN8_0_0 = ADRENO_GPUREV_VALUE(8, 0, 0), ADRENO_REV_GEN8_0_1 = ADRENO_GPUREV_VALUE(8, 0, 1), ADRENO_REV_GEN8_3_0 = ADRENO_GPUREV_VALUE(8, 3, 0), @@ -1289,6 +1290,7 @@ ADRENO_TARGET(gen7_9_0, ADRENO_REV_GEN7_9_0) ADRENO_TARGET(gen7_9_1, ADRENO_REV_GEN7_9_1) ADRENO_TARGET(gen7_14_0, ADRENO_REV_GEN7_14_0) ADRENO_TARGET(gen7_11_0, ADRENO_REV_GEN7_11_0) +ADRENO_TARGET(gen7_17_0, ADRENO_REV_GEN7_17_0) ADRENO_TARGET(gen8_0_0, ADRENO_REV_GEN8_0_0) ADRENO_TARGET(gen8_0_1, ADRENO_REV_GEN8_0_1) ADRENO_TARGET(gen8_3_0, ADRENO_REV_GEN8_3_0) @@ -1306,11 +1308,16 @@ static inline int adreno_is_gen7_0_x_family(struct adreno_device *adreno_dev) adreno_is_gen7_4_0(adreno_dev) || adreno_is_gen7_3_0(adreno_dev); } +static inline int adreno_is_gen7_14_0_family(struct adreno_device *adreno_dev) +{ + return adreno_is_gen7_14_0(adreno_dev) || adreno_is_gen7_17_0(adreno_dev); +} + static inline int adreno_is_gen7_2_x_family(struct adreno_device *adreno_dev) { return adreno_is_gen7_2_0(adreno_dev) || adreno_is_gen7_2_1(adreno_dev) || adreno_is_gen7_6_0(adreno_dev) || adreno_is_gen7_9_x(adreno_dev) || - adreno_is_gen7_14_0(adreno_dev) || adreno_is_gen7_11_0(adreno_dev); + adreno_is_gen7_14_0_family(adreno_dev) || adreno_is_gen7_11_0(adreno_dev); } static inline int adreno_is_gen8_0_x_family(struct adreno_device *adreno_dev) @@ -1322,7 +1329,7 @@ static inline int adreno_is_gen8_0_x_family(struct adreno_device *adreno_dev) /* Gen7 targets which does not support concurrent binning */ static inline int adreno_is_gen7_no_cb_family(struct adreno_device *adreno_dev) { - return adreno_is_gen7_14_0(adreno_dev) || adreno_is_gen7_3_0(adreno_dev); + return adreno_is_gen7_14_0_family(adreno_dev) || adreno_is_gen7_3_0(adreno_dev); } /* diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 5ac7026f33..76e2469adc 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1006,7 +1006,7 @@ void gen7_gmu_register_config(struct adreno_device *adreno_dev) if (adreno_is_gen7_9_x(adreno_dev)) kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), FIELD_PREP(GENMASK(31, 29), 1)); - else if (adreno_is_gen7_14_0(adreno_dev)) + else if (adreno_is_gen7_14_0_family(adreno_dev)) kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29), FIELD_PREP(GENMASK(31, 29), 2)); diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 1d4b63bd85..2f2123f255 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -380,7 +380,7 @@ static size_t gen7_legacy_snapshot_shader(struct kgsl_device *device, * AHB path might fail. Hence, skip SP_INST_TAG and SP_INST_DATA* * state types during snapshot dump in legacy flow. */ - if (adreno_is_gen7_0_x_family(adreno_dev) || adreno_is_gen7_14_0(adreno_dev)) { + if (adreno_is_gen7_0_x_family(adreno_dev) || adreno_is_gen7_14_0_family(adreno_dev)) { if (block->statetype == SP_INST_TAG || block->statetype == SP_INST_DATA || block->statetype == SP_INST_DATA_1 || @@ -716,7 +716,7 @@ static void gen7_snapshot_shader(struct kgsl_device *device, void *priv) = gen7_legacy_snapshot_shader; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - if (adreno_is_gen7_0_x_family(adreno_dev)) + if (adreno_is_gen7_0_x_family(adreno_dev) || adreno_is_gen7_14_0_family(adreno_dev)) kgsl_regrmw(device, GEN7_SP_DBG_CNTL, GENMASK(1, 0), 3); if (CD_SCRIPT_CHECK(device)) { @@ -786,7 +786,7 @@ static void gen7_snapshot_shader(struct kgsl_device *device, } done: - if (adreno_is_gen7_0_x_family(adreno_dev)) + if (adreno_is_gen7_0_x_family(adreno_dev) || adreno_is_gen7_14_0_family(adreno_dev)) kgsl_regrmw(device, GEN7_SP_DBG_CNTL, GENMASK(1, 0), 0x0); } From 032006f47a78c600a970879ea48fe926807d20cf Mon Sep 17 00:00:00 2001 From: Kaushal Sanadhya Date: Tue, 17 Dec 2024 16:04:19 +0530 Subject: [PATCH 0965/1016] kgsl: gen7: Update external core registers for Gen7_17_0 snapshot Update external cores (RSCC, GPUCC, and CPR) register ranges for Gen7_17_0 snapshot. Change-Id: I13c5cc5569010a5894a398430517b5de361e1b95 Signed-off-by: Kaushal Sanadhya --- adreno-gpulist.h | 4 +- adreno_gen7_17_0_snapshot.h | 91 +++++++++++++++++++++++++++++++++++++ adreno_gen7_snapshot.c | 29 +++++++++++- 3 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 adreno_gen7_17_0_snapshot.h diff --git a/adreno-gpulist.h b/adreno-gpulist.h index cb6166a2e7..b7684f2e61 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2619,6 +2619,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_11_0 = { .fast_bus_hint = true, }; +extern const struct gen7_snapshot_block_list gen7_17_0_snapshot_block_list; + static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { .base = { DEFINE_ADRENO_REV(ADRENO_REV_GEN7_17_0, @@ -2644,7 +2646,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 15, - .gen7_snapshot_block_list = &gen7_14_0_snapshot_block_list, + .gen7_snapshot_block_list = &gen7_17_0_snapshot_block_list, .ctxt_record_size = 1536 * 1024 }; diff --git a/adreno_gen7_17_0_snapshot.h b/adreno_gen7_17_0_snapshot.h new file mode 100644 index 0000000000..6e7241d702 --- /dev/null +++ b/adreno_gen7_17_0_snapshot.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#ifndef __ADRENO_GEN7_17_0_SNAPSHOT_H +#define __ADRENO_GEN7_17_0_SNAPSHOT_H + +#include "adreno_gen7_snapshot.h" + +/* + * Block : ['RSCC_RSC'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 101 (Regs:606) + */ +static const u32 gen7_17_0_rscc_registers[] = { + 0x14000, 0x14034, 0x14036, 0x14036, 0x14040, 0x14042, 0x14044, 0x14045, + 0x14047, 0x14047, 0x14080, 0x14084, 0x14089, 0x1408c, 0x14091, 0x14094, + 0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac, 0x140b1, 0x140b4, + 0x140b9, 0x140bc, 0x14100, 0x14104, 0x14114, 0x14119, 0x14124, 0x14132, + 0x14154, 0x1416b, 0x14340, 0x14341, 0x14344, 0x14344, 0x14346, 0x1437c, + 0x143f0, 0x143f8, 0x143fa, 0x143fe, 0x14400, 0x14404, 0x14406, 0x1440a, + 0x1440c, 0x14410, 0x14412, 0x14416, 0x14418, 0x1441c, 0x1441e, 0x14422, + 0x14424, 0x14424, 0x14498, 0x144a0, 0x144a2, 0x144a6, 0x144a8, 0x144ac, + 0x144ae, 0x144b2, 0x144b4, 0x144b8, 0x144ba, 0x144be, 0x144c0, 0x144c4, + 0x144c6, 0x144ca, 0x144cc, 0x144cc, 0x14540, 0x14548, 0x1454a, 0x1454e, + 0x14550, 0x14554, 0x14556, 0x1455a, 0x1455c, 0x14560, 0x14562, 0x14566, + 0x14568, 0x1456c, 0x1456e, 0x14572, 0x14574, 0x14574, 0x145e8, 0x145f0, + 0x145f2, 0x145f6, 0x145f8, 0x145fc, 0x145fe, 0x14602, 0x14604, 0x14608, + 0x1460a, 0x1460e, 0x14610, 0x14614, 0x14616, 0x1461a, 0x1461c, 0x1461c, + 0x14690, 0x14698, 0x1469a, 0x1469e, 0x146a0, 0x146a4, 0x146a6, 0x146aa, + 0x146ac, 0x146b0, 0x146b2, 0x146b6, 0x146b8, 0x146bc, 0x146be, 0x146c2, + 0x146c4, 0x146c4, 0x14738, 0x14740, 0x14742, 0x14746, 0x14748, 0x1474c, + 0x1474e, 0x14752, 0x14754, 0x14758, 0x1475a, 0x1475e, 0x14760, 0x14764, + 0x14766, 0x1476a, 0x1476c, 0x1476c, 0x147e0, 0x147e8, 0x147ea, 0x147ee, + 0x147f0, 0x147f4, 0x147f6, 0x147fa, 0x147fc, 0x14800, 0x14802, 0x14806, + 0x14808, 0x1480c, 0x1480e, 0x14812, 0x14814, 0x14814, 0x14888, 0x14890, + 0x14892, 0x14896, 0x14898, 0x1489c, 0x1489e, 0x148a2, 0x148a4, 0x148a8, + 0x148aa, 0x148ae, 0x148b0, 0x148b4, 0x148b6, 0x148ba, 0x148bc, 0x148bc, + 0x14930, 0x14938, 0x1493a, 0x1493e, 0x14940, 0x14944, 0x14946, 0x1494a, + 0x1494c, 0x14950, 0x14952, 0x14956, 0x14958, 0x1495c, 0x1495e, 0x14962, + 0x14964, 0x14964, + UINT_MAX, UINT_MAX, + +}; +static_assert(IS_ALIGNED(sizeof(gen7_17_0_rscc_registers), 8)); + +/* + * Block : ['CPR'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 22 (Regs:478) + */ +static const u32 gen7_17_0_cpr_registers[] = { + 0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c, + 0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850, + 0x26880, 0x2688e, 0x26980, 0x269b0, 0x269c0, 0x269c2, 0x269c6, 0x269c8, + 0x269e0, 0x269ee, 0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, + 0x26a10, 0x26b0f, 0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, + 0x274ac, 0x274c4, 0x274c8, 0x274da, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_17_0_cpr_registers), 8)); + +/* + * Block : ['GPU_CC'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 42 (Regs:494) + */ +static const u32 gen7_17_0_gpucc_registers[] = { + 0x24000, 0x2400f, 0x24400, 0x2440f, 0x24800, 0x24805, 0x24c00, 0x24cff, + 0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, + 0x26400, 0x26405, 0x26414, 0x2641d, 0x2642a, 0x2642c, 0x2642e, 0x26432, + 0x26434, 0x26434, 0x26443, 0x26457, 0x26459, 0x2645d, 0x2645f, 0x26464, + 0x26477, 0x26479, 0x26489, 0x2648b, 0x2649a, 0x2649b, 0x264ad, 0x264af, + 0x264b1, 0x264b5, 0x264d6, 0x264d8, 0x264e7, 0x264e9, 0x264f9, 0x264fa, + 0x2650a, 0x2650d, 0x2651f, 0x26520, 0x2652d, 0x2652f, 0x2653e, 0x2653e, + 0x26540, 0x2654e, 0x26554, 0x26573, 0x26576, 0x26576, 0x26593, 0x26593, + 0x26600, 0x26616, 0x26620, 0x2662d, 0x26630, 0x26631, 0x26635, 0x26635, + 0x26637, 0x26637, 0x2663a, 0x2663a, 0x26642, 0x26642, 0x26656, 0x26658, + 0x2665b, 0x2665d, 0x2665f, 0x26662, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_17_0_gpucc_registers), 8)); + +static const u32 *gen7_17_0_external_core_regs[] = { + gen7_17_0_gpucc_registers, + gen7_17_0_cpr_registers, +}; +#endif /*_ADRENO_GEN7_17_0_SNAPSHOT_H */ diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 2f2123f255..7fc1018959 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -11,8 +11,9 @@ #include "adreno_gen7_3_0_snapshot.h" #include "adreno_gen7_6_0_snapshot.h" #include "adreno_gen7_9_0_snapshot.h" -#include "adreno_gen7_14_0_snapshot.h" #include "adreno_gen7_11_0_snapshot.h" +#include "adreno_gen7_14_0_snapshot.h" +#include "adreno_gen7_17_0_snapshot.h" static struct kgsl_memdesc *gen7_capturescript; static struct kgsl_memdesc *gen7_crashdump_registers; @@ -205,6 +206,32 @@ const struct gen7_snapshot_block_list gen7_11_0_snapshot_block_list = { .index_registers_len = ARRAY_SIZE(gen7_11_0_cp_indexed_reg_list), }; +const struct gen7_snapshot_block_list gen7_17_0_snapshot_block_list = { + .pre_crashdumper_regs = gen7_9_0_pre_crashdumper_gpu_registers, + .debugbus_blocks = gen7_14_0_debugbus_blocks, + .debugbus_blocks_len = ARRAY_SIZE(gen7_14_0_debugbus_blocks), + .gbif_debugbus_blocks = gen7_gbif_debugbus_blocks, + .gbif_debugbus_blocks_len = ARRAY_SIZE(gen7_gbif_debugbus_blocks), + .cx_debugbus_blocks = gen7_cx_dbgc_debugbus_blocks, + .cx_debugbus_blocks_len = ARRAY_SIZE(gen7_cx_dbgc_debugbus_blocks), + .external_core_regs = gen7_17_0_external_core_regs, + .num_external_core_regs = ARRAY_SIZE(gen7_17_0_external_core_regs), + .gmu_regs = gen7_14_0_gmu_registers, + .gmu_gx_regs = gen7_14_0_gmu_gx_registers, + .rscc_regs = gen7_17_0_rscc_registers, + .reg_list = gen7_14_0_reg_list, + .cx_misc_regs = gen7_14_0_cx_misc_registers, + .shader_blocks = gen7_14_0_shader_blocks, + .num_shader_blocks = ARRAY_SIZE(gen7_14_0_shader_blocks), + .clusters = gen7_14_0_clusters, + .num_clusters = ARRAY_SIZE(gen7_14_0_clusters), + .sptp_clusters = gen7_14_0_sptp_clusters, + .num_sptp_clusters = ARRAY_SIZE(gen7_14_0_sptp_clusters), + .post_crashdumper_regs = gen7_14_0_post_crashdumper_registers, + .index_registers = gen7_14_0_cp_indexed_reg_list, + .index_registers_len = ARRAY_SIZE(gen7_14_0_cp_indexed_reg_list), +}; + #define GEN7_SP_READ_SEL_VAL(_location, _pipe, _statetype, _usptp, _sptp) \ (FIELD_PREP(GENMASK(19, 18), _location) | \ FIELD_PREP(GENMASK(17, 16), _pipe) | \ From 3aa5f2b95ad339191fd944f9e44131e0da67620a Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Fri, 20 Dec 2024 01:26:45 +0530 Subject: [PATCH 0966/1016] kgsl: gen7: Update GMU FW binary for Gen7_17_0 GPU Update the GMU firmware binary for Gen7_17_0 GPU. Change-Id: I1be94919871cefa2232d2b3505f05f822887ea7d Signed-off-by: Harshitha Sai Neelati --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index b7684f2e61..bc893c3cba 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2635,7 +2635,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { .snapshot_size = SZ_4M, }, .sqefw_name = "gen70e00_sqe.fw", - .gmufw_name = "gmu_gen70e00.bin", + .gmufw_name = "gen71700_gmu.bin", .zap_name = "gen70e00_zap.mbn", .hwcg = gen7_0_0_hwcg_regs, .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), From 683829368113d81e99373b73b4d591419c291130 Mon Sep 17 00:00:00 2001 From: Kaushal Sanadhya Date: Tue, 17 Dec 2024 16:04:19 +0530 Subject: [PATCH 0967/1016] kgsl: gen7: Update external core registers for Gen7_17_0 snapshot Update external cores (RSCC, GPUCC, and CPR) register ranges for Gen7_17_0 snapshot. Change-Id: I13c5cc5569010a5894a398430517b5de361e1b95 Signed-off-by: Kaushal Sanadhya --- adreno-gpulist.h | 4 +- adreno_gen7_17_0_snapshot.h | 91 +++++++++++++++++++++++++++++++++++++ adreno_gen7_snapshot.c | 29 +++++++++++- 3 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 adreno_gen7_17_0_snapshot.h diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 391986b157..b506814405 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2619,6 +2619,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_11_0 = { .fast_bus_hint = true, }; +extern const struct gen7_snapshot_block_list gen7_17_0_snapshot_block_list; + static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { .base = { DEFINE_ADRENO_REV(ADRENO_REV_GEN7_17_0, @@ -2644,7 +2646,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { .hang_detect_cycles = 0xcfffff, .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 15, - .gen7_snapshot_block_list = &gen7_14_0_snapshot_block_list, + .gen7_snapshot_block_list = &gen7_17_0_snapshot_block_list, .ctxt_record_size = 1536 * 1024 }; diff --git a/adreno_gen7_17_0_snapshot.h b/adreno_gen7_17_0_snapshot.h new file mode 100644 index 0000000000..6e7241d702 --- /dev/null +++ b/adreno_gen7_17_0_snapshot.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#ifndef __ADRENO_GEN7_17_0_SNAPSHOT_H +#define __ADRENO_GEN7_17_0_SNAPSHOT_H + +#include "adreno_gen7_snapshot.h" + +/* + * Block : ['RSCC_RSC'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 101 (Regs:606) + */ +static const u32 gen7_17_0_rscc_registers[] = { + 0x14000, 0x14034, 0x14036, 0x14036, 0x14040, 0x14042, 0x14044, 0x14045, + 0x14047, 0x14047, 0x14080, 0x14084, 0x14089, 0x1408c, 0x14091, 0x14094, + 0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac, 0x140b1, 0x140b4, + 0x140b9, 0x140bc, 0x14100, 0x14104, 0x14114, 0x14119, 0x14124, 0x14132, + 0x14154, 0x1416b, 0x14340, 0x14341, 0x14344, 0x14344, 0x14346, 0x1437c, + 0x143f0, 0x143f8, 0x143fa, 0x143fe, 0x14400, 0x14404, 0x14406, 0x1440a, + 0x1440c, 0x14410, 0x14412, 0x14416, 0x14418, 0x1441c, 0x1441e, 0x14422, + 0x14424, 0x14424, 0x14498, 0x144a0, 0x144a2, 0x144a6, 0x144a8, 0x144ac, + 0x144ae, 0x144b2, 0x144b4, 0x144b8, 0x144ba, 0x144be, 0x144c0, 0x144c4, + 0x144c6, 0x144ca, 0x144cc, 0x144cc, 0x14540, 0x14548, 0x1454a, 0x1454e, + 0x14550, 0x14554, 0x14556, 0x1455a, 0x1455c, 0x14560, 0x14562, 0x14566, + 0x14568, 0x1456c, 0x1456e, 0x14572, 0x14574, 0x14574, 0x145e8, 0x145f0, + 0x145f2, 0x145f6, 0x145f8, 0x145fc, 0x145fe, 0x14602, 0x14604, 0x14608, + 0x1460a, 0x1460e, 0x14610, 0x14614, 0x14616, 0x1461a, 0x1461c, 0x1461c, + 0x14690, 0x14698, 0x1469a, 0x1469e, 0x146a0, 0x146a4, 0x146a6, 0x146aa, + 0x146ac, 0x146b0, 0x146b2, 0x146b6, 0x146b8, 0x146bc, 0x146be, 0x146c2, + 0x146c4, 0x146c4, 0x14738, 0x14740, 0x14742, 0x14746, 0x14748, 0x1474c, + 0x1474e, 0x14752, 0x14754, 0x14758, 0x1475a, 0x1475e, 0x14760, 0x14764, + 0x14766, 0x1476a, 0x1476c, 0x1476c, 0x147e0, 0x147e8, 0x147ea, 0x147ee, + 0x147f0, 0x147f4, 0x147f6, 0x147fa, 0x147fc, 0x14800, 0x14802, 0x14806, + 0x14808, 0x1480c, 0x1480e, 0x14812, 0x14814, 0x14814, 0x14888, 0x14890, + 0x14892, 0x14896, 0x14898, 0x1489c, 0x1489e, 0x148a2, 0x148a4, 0x148a8, + 0x148aa, 0x148ae, 0x148b0, 0x148b4, 0x148b6, 0x148ba, 0x148bc, 0x148bc, + 0x14930, 0x14938, 0x1493a, 0x1493e, 0x14940, 0x14944, 0x14946, 0x1494a, + 0x1494c, 0x14950, 0x14952, 0x14956, 0x14958, 0x1495c, 0x1495e, 0x14962, + 0x14964, 0x14964, + UINT_MAX, UINT_MAX, + +}; +static_assert(IS_ALIGNED(sizeof(gen7_17_0_rscc_registers), 8)); + +/* + * Block : ['CPR'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 22 (Regs:478) + */ +static const u32 gen7_17_0_cpr_registers[] = { + 0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c, + 0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850, + 0x26880, 0x2688e, 0x26980, 0x269b0, 0x269c0, 0x269c2, 0x269c6, 0x269c8, + 0x269e0, 0x269ee, 0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, + 0x26a10, 0x26b0f, 0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, + 0x274ac, 0x274c4, 0x274c8, 0x274da, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_17_0_cpr_registers), 8)); + +/* + * Block : ['GPU_CC'] + * REGION : UNSLICE + * Pipeline: PIPE_NONE + * pairs : 42 (Regs:494) + */ +static const u32 gen7_17_0_gpucc_registers[] = { + 0x24000, 0x2400f, 0x24400, 0x2440f, 0x24800, 0x24805, 0x24c00, 0x24cff, + 0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, + 0x26400, 0x26405, 0x26414, 0x2641d, 0x2642a, 0x2642c, 0x2642e, 0x26432, + 0x26434, 0x26434, 0x26443, 0x26457, 0x26459, 0x2645d, 0x2645f, 0x26464, + 0x26477, 0x26479, 0x26489, 0x2648b, 0x2649a, 0x2649b, 0x264ad, 0x264af, + 0x264b1, 0x264b5, 0x264d6, 0x264d8, 0x264e7, 0x264e9, 0x264f9, 0x264fa, + 0x2650a, 0x2650d, 0x2651f, 0x26520, 0x2652d, 0x2652f, 0x2653e, 0x2653e, + 0x26540, 0x2654e, 0x26554, 0x26573, 0x26576, 0x26576, 0x26593, 0x26593, + 0x26600, 0x26616, 0x26620, 0x2662d, 0x26630, 0x26631, 0x26635, 0x26635, + 0x26637, 0x26637, 0x2663a, 0x2663a, 0x26642, 0x26642, 0x26656, 0x26658, + 0x2665b, 0x2665d, 0x2665f, 0x26662, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_17_0_gpucc_registers), 8)); + +static const u32 *gen7_17_0_external_core_regs[] = { + gen7_17_0_gpucc_registers, + gen7_17_0_cpr_registers, +}; +#endif /*_ADRENO_GEN7_17_0_SNAPSHOT_H */ diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 2f2123f255..7fc1018959 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -11,8 +11,9 @@ #include "adreno_gen7_3_0_snapshot.h" #include "adreno_gen7_6_0_snapshot.h" #include "adreno_gen7_9_0_snapshot.h" -#include "adreno_gen7_14_0_snapshot.h" #include "adreno_gen7_11_0_snapshot.h" +#include "adreno_gen7_14_0_snapshot.h" +#include "adreno_gen7_17_0_snapshot.h" static struct kgsl_memdesc *gen7_capturescript; static struct kgsl_memdesc *gen7_crashdump_registers; @@ -205,6 +206,32 @@ const struct gen7_snapshot_block_list gen7_11_0_snapshot_block_list = { .index_registers_len = ARRAY_SIZE(gen7_11_0_cp_indexed_reg_list), }; +const struct gen7_snapshot_block_list gen7_17_0_snapshot_block_list = { + .pre_crashdumper_regs = gen7_9_0_pre_crashdumper_gpu_registers, + .debugbus_blocks = gen7_14_0_debugbus_blocks, + .debugbus_blocks_len = ARRAY_SIZE(gen7_14_0_debugbus_blocks), + .gbif_debugbus_blocks = gen7_gbif_debugbus_blocks, + .gbif_debugbus_blocks_len = ARRAY_SIZE(gen7_gbif_debugbus_blocks), + .cx_debugbus_blocks = gen7_cx_dbgc_debugbus_blocks, + .cx_debugbus_blocks_len = ARRAY_SIZE(gen7_cx_dbgc_debugbus_blocks), + .external_core_regs = gen7_17_0_external_core_regs, + .num_external_core_regs = ARRAY_SIZE(gen7_17_0_external_core_regs), + .gmu_regs = gen7_14_0_gmu_registers, + .gmu_gx_regs = gen7_14_0_gmu_gx_registers, + .rscc_regs = gen7_17_0_rscc_registers, + .reg_list = gen7_14_0_reg_list, + .cx_misc_regs = gen7_14_0_cx_misc_registers, + .shader_blocks = gen7_14_0_shader_blocks, + .num_shader_blocks = ARRAY_SIZE(gen7_14_0_shader_blocks), + .clusters = gen7_14_0_clusters, + .num_clusters = ARRAY_SIZE(gen7_14_0_clusters), + .sptp_clusters = gen7_14_0_sptp_clusters, + .num_sptp_clusters = ARRAY_SIZE(gen7_14_0_sptp_clusters), + .post_crashdumper_regs = gen7_14_0_post_crashdumper_registers, + .index_registers = gen7_14_0_cp_indexed_reg_list, + .index_registers_len = ARRAY_SIZE(gen7_14_0_cp_indexed_reg_list), +}; + #define GEN7_SP_READ_SEL_VAL(_location, _pipe, _statetype, _usptp, _sptp) \ (FIELD_PREP(GENMASK(19, 18), _location) | \ FIELD_PREP(GENMASK(17, 16), _pipe) | \ From c87b5662cef15d36ea2e051a85b6f2de8c95191f Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Sat, 4 Jan 2025 11:18:19 +0530 Subject: [PATCH 0968/1016] kgsl: gen7: Enable IFPC feature for Gen7_17_0 GPU Enable IFPC feature for power saving. Change-Id: I592fc5e48fb187d5034d686c4d9a5a310b69ca5c Signed-off-by: Harshitha Sai Neelati --- adreno-gpulist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index b506814405..7edbe88f74 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2626,7 +2626,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { DEFINE_ADRENO_REV(ADRENO_REV_GEN7_17_0, UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), .compatible = "qcom,adreno-gpu-gen7-17-0", - .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION, + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | + ADRENO_IFPC, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_no_cb_perfcounters, .uche_gmem_alignment = SZ_16M, From 6d62a363f1e53f5b97fea16fa1c15cecc5efd861 Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Sat, 4 Jan 2025 11:25:25 +0530 Subject: [PATCH 0969/1016] kgsl: gen7: Update uche_gmem_alignment for Gen7_17_0 GPU Update uche_gmem_alignment for Gen7_17_0 GPU. Change-Id: Ia6ca00a0c41871a1dabe919734f4ad0d2f3c82ff Signed-off-by: Harshitha Sai Neelati --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 7edbe88f74..35fd33c4de 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2630,7 +2630,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { ADRENO_IFPC, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_no_cb_perfcounters, - .uche_gmem_alignment = SZ_16M, + .uche_gmem_alignment = 0, .gmem_size = SZ_1M, .bus_width = 32, .snapshot_size = SZ_4M, From 4aab5fb3b637a154a0d840487282418639b3877e Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Mon, 9 Dec 2024 23:23:54 +0530 Subject: [PATCH 0970/1016] kgsl: gmu: Handle dummy gMxC resource entries from cmd_db For targets where gMxC is not supported but RM still has a hardware instance, AOP will update this as a dummy entry in cmd_db. This means cmd_db will have an entry for gMxC with all-zero values (no supported operating levels). To handle this in KGSL, add a check for dummy gMxC resource entries from cmd_db. If such an entry is found, return an error code (-ENODATA) instead of attempting to process it, which would lead to an invalid MxC vote. Change-Id: I2efa3924bf7e0da1edf615ef0523ced883ad98b5 Signed-off-by: SIVA MULLATI --- adreno_gen8_rpmh.c | 9 ++++++--- adreno_rpmh.c | 8 ++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/adreno_gen8_rpmh.c b/adreno_gen8_rpmh.c index 6392799aa2..6bc24a808c 100644 --- a/adreno_gen8_rpmh.c +++ b/adreno_gen8_rpmh.c @@ -211,10 +211,13 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) /* If the target supports dedicated MxC rail, read the same */ if (cmd_db_read_addr("gmxc.lvl")) { ret = adreno_rpmh_arc_cmds(&gmxc_arc, "gmxc.lvl"); - if (ret) - return ret; - ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &gmxc_arc, &cx_arc); + /* Dummy gMxC resource, treat as if no dedicated MxC */ + if (ret == -ENODATA) + ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, NULL, &cx_arc); + else + ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &gmxc_arc, &cx_arc); } else { + /* No gMxC resource entry, treat as if no dedicated MxC */ ret = setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, NULL, &cx_arc); } diff --git a/adreno_rpmh.c b/adreno_rpmh.c index 3c1bcfc002..9d6f87211b 100644 --- a/adreno_rpmh.c +++ b/adreno_rpmh.c @@ -14,6 +14,7 @@ int adreno_rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id) { size_t len = 0; + bool dummy_res = true; arc->val = cmd_db_read_aux_data(res_id, &len); @@ -25,10 +26,17 @@ int adreno_rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id) * zero padding. */ for (arc->num = 1; arc->num < (len >> 1); arc->num++) { + if (arc->val[arc->num] != 0) + dummy_res = false; + if (arc->val[arc->num - 1] != 0 && arc->val[arc->num] == 0) break; } + /* Dummy resource entry in cmd_db with all zeros */ + if (dummy_res) + return -ENODATA; + return 0; } From a62e750e4abfb9f081bd5678a329e26c4f5b7e31 Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Sat, 4 Jan 2025 11:45:53 +0530 Subject: [PATCH 0971/1016] kgsl: gen7: Enable L0 preemption for Gen7_17_0 GPU Enable L0 preemption for Gen7_17_0 GPU. Change-Id: Id38d4ae6b734258a14598d1e6bbc60202eb3951f Signed-off-by: Harshitha Sai Neelati --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 35fd33c4de..cf199857f5 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2627,7 +2627,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), .compatible = "qcom,adreno-gpu-gen7-17-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | - ADRENO_IFPC, + ADRENO_IFPC | ADRENO_PREEMPTION, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_no_cb_perfcounters, .uche_gmem_alignment = 0, From 289c392b7a391198432961fc79e07f504d2d8da1 Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Sat, 4 Jan 2025 11:18:19 +0530 Subject: [PATCH 0972/1016] kgsl: gen7: Enable IFPC feature for Gen7_17_0 GPU Enable IFPC feature for power saving. Change-Id: I592fc5e48fb187d5034d686c4d9a5a310b69ca5c Signed-off-by: Harshitha Sai Neelati --- adreno-gpulist.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 391986b157..80164f3e0c 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #define ANY_ID (~0) @@ -2624,7 +2624,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { DEFINE_ADRENO_REV(ADRENO_REV_GEN7_17_0, UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), .compatible = "qcom,adreno-gpu-gen7-17-0", - .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION, + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | + ADRENO_IFPC, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_no_cb_perfcounters, .uche_gmem_alignment = SZ_16M, From 8c9964feab359cce44e68c4f95dc97761e1ed3da Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Tue, 24 Dec 2024 11:41:04 +0530 Subject: [PATCH 0973/1016] kgsl: gen8: Enable L1 preemption for gen8_6_0 Enable the L1 preemption GPU functionality for QoS improvement. Change-Id: I2ab854ac31bf7a78645d01977bdebf48156cd3f3 Signed-off-by: SIVA MULLATI --- adreno-gpulist.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 50dc5617fb..14433d45d0 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #define ANY_ID (~0) @@ -3293,7 +3293,8 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .compatible = "qcom,adreno-gpu-gen8-6-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | - ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_IFPC, + ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_IFPC | + ADRENO_PREEMPTION, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, @@ -3318,6 +3319,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .gen8_snapshot_block_list = &gen8_6_0_snapshot_block_list, .noc_timeout_us = 3410, /* 3.41 msec */ .ctxt_record_size = (3372 * SZ_1K), + .preempt_level = 1, }; static const struct adreno_gpu_core *adreno_gpulist[] = { From 2f61ec1cf5db60a7fcbaf892a9f8c220e4bc99eb Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Fri, 10 Jan 2025 10:23:54 +0530 Subject: [PATCH 0974/1016] kgsl: gen8: Enable ACV perfmode vote for gen8_6_0 Enable ACV perfmode vote for gen8_6_0 for improved performance. Change-Id: I1dc8e50237d36882e150804a03a872ab610c8844 Signed-off-by: SIVA MULLATI Signed-off-by: Gayathri Veeragandam --- adreno-gpulist.h | 1 + 1 file changed, 1 insertion(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 14433d45d0..4c96330493 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3320,6 +3320,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .noc_timeout_us = 3410, /* 3.41 msec */ .ctxt_record_size = (3372 * SZ_1K), .preempt_level = 1, + .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), }; static const struct adreno_gpu_core *adreno_gpulist[] = { From 97ba40095c23863a8869aa81fc7cbebc37c0de78 Mon Sep 17 00:00:00 2001 From: Gayathri Veeragandam Date: Tue, 31 Dec 2024 11:34:30 +0530 Subject: [PATCH 0975/1016] kgsl: gen8: Enable BCL support for gen8_6_0 GPU Enable Battery Current Limiting feature for gen8_6_0 GPU. BCL prevents UVLO (Under Voltage Lock out) due to sudden voltage droop and OCP (Over Current Protection). Change-Id: I0d92f47873ec597b15210dfe5ce2932a947145f1 Signed-off-by: Gayathri Veeragandam --- adreno-gpulist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 4c96330493..4eee31e2e9 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3294,7 +3294,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_IFPC | - ADRENO_PREEMPTION, + ADRENO_PREEMPTION | ADRENO_BCL, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, @@ -3321,6 +3321,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .ctxt_record_size = (3372 * SZ_1K), .preempt_level = 1, .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), + .bcl_data = 1, }; static const struct adreno_gpu_core *adreno_gpulist[] = { From 6cd61218bacb58c5fa4bd336a4b02a674ef1084c Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Tue, 24 Dec 2024 11:29:58 +0530 Subject: [PATCH 0976/1016] kgsl: gen8: Enable ACD on gen8_6_0 GPU Adaptive Clock Distribution feature helps mitigate peak current and voltage droops. Change-Id: I870628c76a9f809b505fc97cddb9d568c82e8243 Signed-off-by: SIVA MULLATI --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 4eee31e2e9..da5236c153 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3294,7 +3294,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_IFPC | - ADRENO_PREEMPTION | ADRENO_BCL, + ADRENO_PREEMPTION | ADRENO_BCL | ADRENO_ACD, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, From 0ca54ccf3902eb51972ade010e61feae046e21f3 Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Fri, 10 Jan 2025 10:36:28 +0530 Subject: [PATCH 0977/1016] kgsl: gen8: Update AHB timeout for gen8_6_0 Update noc_timeout_us for gen8_6_0 GPU as per the latest recommendation. Change-Id: I04bd75c00383dfc9c5f26ec2eb9dd11a7b97cac7 Signed-off-by: SIVA MULLATI Signed-off-by: Gayathri Veeragandam --- adreno-gpulist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 50dc5617fb..8d3beed652 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #define ANY_ID (~0) @@ -3316,7 +3316,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .highest_bank_bit = 16, .gmu_hub_clk_freq = 200000000, .gen8_snapshot_block_list = &gen8_6_0_snapshot_block_list, - .noc_timeout_us = 3410, /* 3.41 msec */ + .noc_timeout_us = 6800, /* 6.8 msec */ .ctxt_record_size = (3372 * SZ_1K), }; From 628528663140fe22fef3664731844341405a5582 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 8 Oct 2024 12:10:29 +0530 Subject: [PATCH 0978/1016] kgsl: gen7: Correct register read order in LPAC fault header Currently, the register offsets are passed in an incorrect order to the kgsl_regread64 API. Update the order to ensure that the addresses are read accurately. Change-Id: Ic4c45af3c14ccf35fdeff2fdcdee40899b5fc434 Signed-off-by: Kamal Agrawal --- adreno_gen7.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/adreno_gen7.c b/adreno_gen7.c index 1d4318a437..7c8186d380 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -2279,11 +2279,11 @@ static void gen7_lpac_fault_header(struct adreno_device *adreno_dev, kgsl_regread(device, GEN7_RBBM_STATUS, &status); kgsl_regread(device, GEN7_CP_LPAC_RB_RPTR, &lpac_rptr); kgsl_regread(device, GEN7_CP_LPAC_RB_WPTR, &lpac_wptr); - kgsl_regread64(device, GEN7_CP_LPAC_IB1_BASE_HI, - GEN7_CP_LPAC_IB1_BASE, &lpac_ib1base); + kgsl_regread64(device, GEN7_CP_LPAC_IB1_BASE, + GEN7_CP_LPAC_IB1_BASE_HI, &lpac_ib1base); kgsl_regread(device, GEN7_CP_LPAC_IB1_REM_SIZE, &lpac_ib1sz); - kgsl_regread64(device, GEN7_CP_LPAC_IB2_BASE_HI, - GEN7_CP_LPAC_IB2_BASE, &lpac_ib2base); + kgsl_regread64(device, GEN7_CP_LPAC_IB2_BASE, + GEN7_CP_LPAC_IB2_BASE_HI, &lpac_ib2base); kgsl_regread(device, GEN7_CP_LPAC_IB2_REM_SIZE, &lpac_ib2sz); pr_context(device, drawobj_lpac->context, From 119037746c5ffce015abeccf23f76425a46228b9 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 24 Oct 2024 15:50:23 +0530 Subject: [PATCH 0979/1016] kgsl: gen7: Correct register read order in IB base and LPAC base Currently, the register offsets are passed in an incorrect order to the kgsl_regread64 API. Update the order to ensure that the addresses are read accurately. Change-Id: Icd60e0e7d17161cf5cba1317e62464bbadc52a04 Signed-off-by: Pankaj Gupta Signed-off-by: Sanjay Yadav --- adreno_gen7_snapshot.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index a9c0bcaeab..6afacdf2c1 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -1691,21 +1691,20 @@ void gen7_snapshot(struct adreno_device *adreno_dev, if (is_current_rt) sched_set_normal(current, 0); - kgsl_regread64(device, GEN7_CP_IB1_BASE_HI, GEN7_CP_IB1_BASE, &snapshot->ib1base); + kgsl_regread64(device, GEN7_CP_IB1_BASE, GEN7_CP_IB1_BASE_HI, &snapshot->ib1base); - kgsl_regread64(device, GEN7_CP_IB2_BASE_HI, GEN7_CP_IB2_BASE, &snapshot->ib2base); + kgsl_regread64(device, GEN7_CP_IB2_BASE, GEN7_CP_IB2_BASE_HI, &snapshot->ib2base); - kgsl_regread64(device, GEN7_CP_IB3_BASE_HI, GEN7_CP_IB3_BASE, &snapshot->ib3base); + kgsl_regread64(device, GEN7_CP_IB3_BASE, GEN7_CP_IB3_BASE_HI, &snapshot->ib3base); kgsl_regread(device, GEN7_CP_IB1_REM_SIZE, &snapshot->ib1size); kgsl_regread(device, GEN7_CP_IB2_REM_SIZE, &snapshot->ib2size); kgsl_regread(device, GEN7_CP_IB3_REM_SIZE, &snapshot->ib3size); - kgsl_regread64(device, GEN7_CP_LPAC_IB1_BASE_HI, - GEN7_CP_LPAC_IB1_BASE, &snapshot->ib1base_lpac); - - kgsl_regread64(device, GEN7_CP_LPAC_IB2_BASE_HI, - GEN7_CP_LPAC_IB2_BASE, &snapshot->ib2base_lpac); + kgsl_regread64(device, GEN7_CP_LPAC_IB1_BASE, + GEN7_CP_LPAC_IB1_BASE_HI, &snapshot->ib1base_lpac); + kgsl_regread64(device, GEN7_CP_LPAC_IB2_BASE, + GEN7_CP_LPAC_IB2_BASE_HI, &snapshot->ib2base_lpac); kgsl_regread(device, GEN7_CP_LPAC_IB1_REM_SIZE, &snapshot->ib1size_lpac); kgsl_regread(device, GEN7_CP_LPAC_IB2_REM_SIZE, &snapshot->ib2size_lpac); From f4204fd5e5a6bdd2a2223150905ac1fe687c78a8 Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Sat, 4 Jan 2025 11:45:53 +0530 Subject: [PATCH 0980/1016] kgsl: gen7: Enable L0 preemption for Gen7_17_0 GPU Enable L0 preemption for Gen7_17_0 GPU. Change-Id: Id38d4ae6b734258a14598d1e6bbc60202eb3951f Signed-off-by: Harshitha Sai Neelati --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 80164f3e0c..ce7c0c20e8 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2625,7 +2625,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), .compatible = "qcom,adreno-gpu-gen7-17-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | - ADRENO_IFPC, + ADRENO_IFPC | ADRENO_PREEMPTION, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_no_cb_perfcounters, .uche_gmem_alignment = SZ_16M, From fe5db95e467098cc540d3ebec615ff415e6187cd Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Tue, 28 Jan 2025 11:21:43 +0530 Subject: [PATCH 0981/1016] kgsl: gen8: Update GMU firmware binary name for gen8_6_0 GPU Update the GPU list for gen8_6_0 GPU to pick right GPU firmware binary. Change-Id: Iae7d928f3445bcce220918fe13aefd0005aede4d Signed-off-by: SIVA MULLATI Signed-off-by: Gayathri Veeragandam --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 70d0d9be1d..78a456eaad 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3308,7 +3308,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { }, .aqefw_name = "gen80000_aqe.fw", .sqefw_name = "gen80000_sqe.fw", - .gmufw_name = "gen80000_gmu.bin", + .gmufw_name = "gen80600_gmu.bin", .zap_name = "gen80000_zap.mbn", .ao_hwcg = gen8_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen8_ao_hwcg_regs), From bbba7d96cd7c539e086d77bc628fe10232291edb Mon Sep 17 00:00:00 2001 From: Kaushal Sanadhya Date: Thu, 23 Jan 2025 12:19:14 +0530 Subject: [PATCH 0982/1016] kgsl: gen7: Reduce snapshot size to 2M for gen7_17_0 GPU Gen7_17_0 GPU necessitates only 2M snapshot memory. Therefore, cut down the size. Change-Id: I5ab9757d874bc8df38219d0d5a164ff9d97981ff Signed-off-by: Kaushal Sanadhya --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 1d8b235e6f..448080434f 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2633,7 +2633,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { .uche_gmem_alignment = SZ_16M, .gmem_size = SZ_1M, .bus_width = 32, - .snapshot_size = SZ_4M, + .snapshot_size = SZ_2M, }, .sqefw_name = "gen70e00_sqe.fw", .gmufw_name = "gen71700_gmu.bin", From fd202c377de1701cf903acf33ea7a43de9ca3f0f Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Tue, 24 Dec 2024 11:41:04 +0530 Subject: [PATCH 0983/1016] kgsl: gen8: Enable L1 preemption for gen8_6_0 Enable the L1 preemption GPU functionality for QoS improvement. Change-Id: I2ab854ac31bf7a78645d01977bdebf48156cd3f3 Signed-off-by: SIVA MULLATI --- adreno-gpulist.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 4d4649db35..935f95df32 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3296,7 +3296,8 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .compatible = "qcom,adreno-gpu-gen8-6-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | - ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_IFPC, + ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_IFPC | + ADRENO_PREEMPTION, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, @@ -3321,6 +3322,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .gen8_snapshot_block_list = &gen8_6_0_snapshot_block_list, .noc_timeout_us = 3410, /* 3.41 msec */ .ctxt_record_size = (3372 * SZ_1K), + .preempt_level = 1, }; static const struct adreno_gpu_core *adreno_gpulist[] = { From e085f67ca579c3d6b786a0774a8b5c9e067ed8ff Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Fri, 10 Jan 2025 10:23:54 +0530 Subject: [PATCH 0984/1016] kgsl: gen8: Enable ACV perfmode vote for gen8_6_0 Enable ACV perfmode vote for gen8_6_0 for improved performance. Change-Id: I1dc8e50237d36882e150804a03a872ab610c8844 Signed-off-by: SIVA MULLATI Signed-off-by: Gayathri Veeragandam --- adreno-gpulist.h | 1 + 1 file changed, 1 insertion(+) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 935f95df32..b2323ec841 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3323,6 +3323,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .noc_timeout_us = 3410, /* 3.41 msec */ .ctxt_record_size = (3372 * SZ_1K), .preempt_level = 1, + .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), }; static const struct adreno_gpu_core *adreno_gpulist[] = { From 114196670b67584ae776e076dc962a0753a50ae9 Mon Sep 17 00:00:00 2001 From: Gayathri Veeragandam Date: Tue, 31 Dec 2024 11:34:30 +0530 Subject: [PATCH 0985/1016] kgsl: gen8: Enable BCL support for gen8_6_0 GPU Enable Battery Current Limiting feature for gen8_6_0 GPU. BCL prevents UVLO (Under Voltage Lock out) due to sudden voltage droop and OCP (Over Current Protection). Change-Id: I0d92f47873ec597b15210dfe5ce2932a947145f1 Signed-off-by: Gayathri Veeragandam --- adreno-gpulist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index b2323ec841..fe5ccd84d5 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3297,7 +3297,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_IFPC | - ADRENO_PREEMPTION, + ADRENO_PREEMPTION | ADRENO_BCL, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, @@ -3324,6 +3324,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .ctxt_record_size = (3372 * SZ_1K), .preempt_level = 1, .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), + .bcl_data = 1, }; static const struct adreno_gpu_core *adreno_gpulist[] = { From ec202952bb7f7b16ee5779b86d661eb868b9a7e8 Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Fri, 10 Jan 2025 10:36:28 +0530 Subject: [PATCH 0986/1016] kgsl: gen8: Update AHB timeout for gen8_6_0 Update noc_timeout_us for gen8_6_0 GPU as per the latest recommendation. Change-Id: I04bd75c00383dfc9c5f26ec2eb9dd11a7b97cac7 Signed-off-by: SIVA MULLATI Signed-off-by: Gayathri Veeragandam --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index fe5ccd84d5..709dbe21ad 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3320,7 +3320,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .highest_bank_bit = 16, .gmu_hub_clk_freq = 200000000, .gen8_snapshot_block_list = &gen8_6_0_snapshot_block_list, - .noc_timeout_us = 3410, /* 3.41 msec */ + .noc_timeout_us = 6800, /* 6.8 msec */ .ctxt_record_size = (3372 * SZ_1K), .preempt_level = 1, .acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4), From fc2e1ca1e407d7d615044e5ccea790c6bcd3bc4f Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 19 Dec 2024 17:12:14 +0530 Subject: [PATCH 0987/1016] kgsl: Mark KGSL device as dma coherent Currently, cache operations are performed for DMA buffers imported to KGSL even when IO-coherency is enabled. This commit marks the KGSL device as dma coherent when IO-coherency is enabled, thereby skipping cache operations for imported dma buffers. Change-Id: I39a8c5ddbe7e9baad8086d8c07ae2055b99d24eb Signed-off-by: Kamal Agrawal Signed-off-by: Pankaj Gupta --- kgsl.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kgsl.c b/kgsl.c index 7679e719d9..25c3041f33 100644 --- a/kgsl.c +++ b/kgsl.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -5127,6 +5127,15 @@ static int _register_device(struct kgsl_device *device) device->dev->dma_mask = &dma_mask; device->dev->dma_parms = &dma_parms; + /* + * Mark KGSL device as dma coherent when io-coherency + * is enabled to skip cache operations for imported dma + * buffers. + */ + if (kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT) && + IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT)) + device->dev->dma_coherent = true; + dma_set_max_seg_size(device->dev, (u32)DMA_BIT_MASK(32)); set_dma_ops(device->dev, NULL); From ce599df5fc90db5f9ff1193675d4f0dd402e8cc6 Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Wed, 21 Aug 2024 16:03:15 -0700 Subject: [PATCH 0988/1016] kgsl: adreno: DCVS Tuning nodes Add sysfs nodes to allow for dynamic DCVS tuning. Change-Id: Ic1c9eec76001962fc8a4c51b96197bdc2b7b979e Signed-off-by: Carter Cooper Signed-off-by: Pankaj Gupta --- adreno.c | 3 +- adreno.h | 10 +++- adreno_sysfs.c | 124 ++++++++++++++++++++++++++++++++++++++- adreno_trace.h | 21 ++++++- governor_msm_adreno_tz.c | 4 +- 5 files changed, 157 insertions(+), 5 deletions(-) diff --git a/adreno.c b/adreno.c index 109e9c5a29..153c026fed 100644 --- a/adreno.c +++ b/adreno.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include @@ -1198,6 +1198,7 @@ static void adreno_setup_device(struct adreno_device *adreno_dev) mutex_init(&adreno_dev->dev.mutex); mutex_init(&adreno_dev->dev.file_mutex); mutex_init(&adreno_dev->fault_recovery_mutex); + mutex_init(&adreno_dev->dcvs_tuning_mutex); INIT_LIST_HEAD(&adreno_dev->dev.globals); /* Set the fault tolerance policy to replay, skip, throttle */ diff --git a/adreno.h b/adreno.h index 169b16b8d7..d935df5d56 100644 --- a/adreno.h +++ b/adreno.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_H #define __ADRENO_H @@ -766,6 +766,14 @@ struct adreno_device { struct kthread_work scheduler_work; /** @scheduler_fault: Atomic to trigger scheduler based fault recovery */ atomic_t scheduler_fault; + /** @dcvs_tuning_mutex: Mutex taken during dcvs tuning */ + struct mutex dcvs_tuning_mutex; + /** @dcvs_tuning_mingap_lvl: Current DCVS tuning level for mingap */ + u32 dcvs_tuning_mingap_lvl; + /** @dcvs_tuning_penalty_lvl: Current DCVS tuning level for penalty */ + u32 dcvs_tuning_penalty_lvl; + /** @dcvs_tuning_numbusy_lvl: Current DCVS tuning level for numbusy */ + u32 dcvs_tuning_numbusy_lvl; }; /* Time to wait for suspend recovery gate to complete */ diff --git a/adreno_sysfs.c b/adreno_sysfs.c index 9fab257c9a..342c4c701b 100644 --- a/adreno_sysfs.c +++ b/adreno_sysfs.c @@ -1,13 +1,20 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include +#include +#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) +#include +#else +#include +#endif #include "adreno.h" #include "adreno_sysfs.h" +#include "adreno_trace.h" #include "kgsl_sysfs.h" static ssize_t _gpu_model_show(struct kgsl_device *device, char *buf) @@ -99,6 +106,114 @@ static u32 _rt_bus_hint_show(struct adreno_device *adreno_dev) return device->pwrctrl.rt_bus_hint; } +/* Tuning values can be set to 0/1/2/3 */ +#define DCVS_TUNING_MAX 3 +#define DCVS_TUNING_EN_BIT BIT(5) + +/* + * GPU DCVS Tuning allows for small adjustments to the DCVS + * algorithm. The default value for each tunable is 0. Setting + * a higher tunable value will increase the aggressivenes + * of the DCVS algorithm. Currently 0-3 are supported values + * for each tunable, 3 being most aggressive. + */ + +/* Mingap is the count of consecutive low requests before moving to lower DCVS levels. */ +#define DCVS_TUNING_MINGAP 0 +/* Penalty is the busy threshold for moving between levels. */ +#define DCVS_TUNING_PENALTY 1 +/* Numbusy is the backoff from mingap to transition power level more quickly. */ +#define DCVS_TUNING_NUMBUSY 2 + +#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) +static int __dcvs_tuning_scm_entry(struct adreno_device *adreno_dev, + u32 param, u32 val) +{ + int ret; + u32 mingap = 0, penalty = 0, numbusy = 0; + u32 *save; + + switch (param) { + case DCVS_TUNING_MINGAP: + mingap = DCVS_TUNING_EN_BIT | FIELD_PREP(GENMASK(4, 0), val); + save = &adreno_dev->dcvs_tuning_mingap_lvl; + break; + case DCVS_TUNING_PENALTY: + penalty = DCVS_TUNING_EN_BIT | FIELD_PREP(GENMASK(4, 0), val); + save = &adreno_dev->dcvs_tuning_penalty_lvl; + break; + case DCVS_TUNING_NUMBUSY: + numbusy = DCVS_TUNING_EN_BIT | FIELD_PREP(GENMASK(4, 0), val); + save = &adreno_dev->dcvs_tuning_numbusy_lvl; + break; + default: + return -EINVAL; + } + + if (!mutex_trylock(&adreno_dev->dcvs_tuning_mutex)) + return -EDEADLK; + + ret = qcom_scm_kgsl_dcvs_tuning(mingap, penalty, numbusy); + if (ret == 0) { + *save = val; + trace_adreno_dcvs_tuning(param, + adreno_dev->dcvs_tuning_mingap_lvl, + adreno_dev->dcvs_tuning_penalty_lvl, + adreno_dev->dcvs_tuning_numbusy_lvl); + } + mutex_unlock(&adreno_dev->dcvs_tuning_mutex); + + return ret; +} +#else +static int __dcvs_tuning_scm_entry(struct adreno_device *adreno_dev, u32 param, u32 val) +{ + return -EOPNOTSUPP; +} +#endif + +static int _dcvs_tuning_mingap_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + if (val > DCVS_TUNING_MAX) + return -EINVAL; + + return __dcvs_tuning_scm_entry(adreno_dev, DCVS_TUNING_MINGAP, val); +} + +static u32 _dcvs_tuning_mingap_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->dcvs_tuning_mingap_lvl; +} + +static int _dcvs_tuning_penalty_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + if (val > DCVS_TUNING_MAX) + return -EINVAL; + + return __dcvs_tuning_scm_entry(adreno_dev, DCVS_TUNING_PENALTY, val); +} + +static u32 _dcvs_tuning_penalty_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->dcvs_tuning_penalty_lvl; +} + +static int _dcvs_tuning_numbusy_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + if (val > DCVS_TUNING_MAX) + return -EINVAL; + + return __dcvs_tuning_scm_entry(adreno_dev, DCVS_TUNING_NUMBUSY, val); +} + +static u32 _dcvs_tuning_numbusy_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->dcvs_tuning_numbusy_lvl; +} + static int _gpu_llc_slice_enable_store(struct adreno_device *adreno_dev, bool val) { @@ -450,6 +565,10 @@ static DEVICE_ATTR_RO(gpu_model); static DEVICE_ATTR_RO(gpufaults); static DEVICE_ATTR_RO(gpufault_procs); +static ADRENO_SYSFS_U32(dcvs_tuning_mingap); +static ADRENO_SYSFS_U32(dcvs_tuning_penalty); +static ADRENO_SYSFS_U32(dcvs_tuning_numbusy); + static const struct attribute *_attr_list[] = { &adreno_attr_ft_policy.attr.attr, &adreno_attr_ft_pagefault_policy.attr.attr, @@ -477,6 +596,9 @@ static const struct attribute *_attr_list[] = { &adreno_attr_clx.attr.attr, &dev_attr_gpufaults.attr, &dev_attr_gpufault_procs.attr, + &adreno_attr_dcvs_tuning_mingap.attr.attr, + &adreno_attr_dcvs_tuning_penalty.attr.attr, + &adreno_attr_dcvs_tuning_numbusy.attr.attr, NULL, }; diff --git a/adreno_trace.h b/adreno_trace.h index 16974a83f7..890a8990a5 100644 --- a/adreno_trace.h +++ b/adreno_trace.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2013-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #if !defined(_ADRENO_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) @@ -985,6 +985,25 @@ TRACE_EVENT(adreno_ifpc_count, TP_printk("total times GMU entered IFPC = %d", __entry->ifpc_count) ); +TRACE_EVENT(adreno_dcvs_tuning, + TP_PROTO(u32 param, u32 mingap, u32 penalty, u32 numbusy), + TP_ARGS(param, mingap, penalty, numbusy), + TP_STRUCT__entry( + __field(u32, param) + __field(u32, mingap) + __field(u32, penalty) + __field(u32, numbusy) + ), + TP_fast_assign( + __entry->param = param; + __entry->mingap = mingap; + __entry->penalty = penalty; + __entry->numbusy = numbusy; + ), + TP_printk("param=%u mingap=%u penalty=%u numbusy=%u", + __entry->param, __entry->mingap, __entry->penalty, __entry->numbusy) +); + #endif /* _ADRENO_TRACE_H */ /* This part must be outside protection */ diff --git a/governor_msm_adreno_tz.c b/governor_msm_adreno_tz.c index 4f52ae0ecd..5cbb5e6be4 100644 --- a/governor_msm_adreno_tz.c +++ b/governor_msm_adreno_tz.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include @@ -57,6 +57,8 @@ static DEFINE_SPINLOCK(suspend_lock); #define TZ_V2_INIT_CA_ID_64 0xC #define TZ_V2_UPDATE_WITH_CA_ID_64 0xD +#define TZ_DCVS_TUNING_ID 0xE + #define TAG "msm_adreno_tz: " static u64 suspend_time; From bafdd911287ddee03750e38b112533d171b846fd Mon Sep 17 00:00:00 2001 From: Urvashi Agrawal Date: Tue, 12 Nov 2024 13:57:50 -0800 Subject: [PATCH 0989/1016] kgsl: gen8: Fix snapshot header for GPU registers Some unsliced registers need to have UINT_MAX in the snapshot header to correctly identify them as unsliced registers. Ensure the header is correct. Change-Id: Ib1f3a06b3d7e0ca622c0fb2c6af18d92f12c85de Signed-off-by: Urvashi Agrawal --- adreno_gen8_snapshot.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 521a6f542d..1c7ac63400 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -247,7 +247,7 @@ size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, header->location_id = UINT_MAX; header->sp_id = UINT_MAX; header->usptp_id = UINT_MAX; - header->slice_id = info->slice_id; + header->slice_id = info->regs->slice_region ? info->slice_id : UINT_MAX; if (info->regs->sel) kgsl_regwrite(device, info->regs->sel->host_reg, info->regs->sel->val); @@ -298,7 +298,7 @@ static size_t gen8_snapshot_registers(struct kgsl_device *device, u8 *buf, header->location_id = UINT_MAX; header->sp_id = UINT_MAX; header->usptp_id = UINT_MAX; - header->slice_id = info->slice_id; + header->slice_id = info->regs->slice_region ? info->slice_id : UINT_MAX; src = gen8_crashdump_registers->hostptr + info->offset; @@ -336,7 +336,7 @@ static size_t gen8_legacy_snapshot_shader(struct kgsl_device *device, } header->type = block->statetype; - header->slice_id = info->slice_id; + header->slice_id = block->slice_region ? info->slice_id : UINT_MAX; header->sp_index = info->sp_id; header->usptp = info->usptp; header->pipe_id = block->pipeid; @@ -376,7 +376,7 @@ static size_t gen8_snapshot_shader_memory(struct kgsl_device *device, } header->type = block->statetype; - header->slice_id = info->slice_id; + header->slice_id = block->slice_region ? info->slice_id : UINT_MAX; header->sp_index = info->sp_id; header->usptp = info->usptp; header->pipe_id = block->pipeid; @@ -820,7 +820,7 @@ static size_t gen8_legacy_snapshot_cluster_dbgahb(struct kgsl_device *device, header->location_id = info->location_id; header->sp_id = info->sp_id; header->usptp_id = info->usptp_id; - header->slice_id = info->slice_id; + header->slice_id = info->cluster->slice_region ? info->slice_id : UINT_MAX; read_sel = GEN8_SP_READ_SEL_VAL(info->slice_id, info->location_id, info->pipe_id, info->statetype_id, info->usptp_id, info->sp_id); @@ -872,7 +872,7 @@ static size_t gen8_snapshot_cluster_dbgahb(struct kgsl_device *device, u8 *buf, header->location_id = info->location_id; header->sp_id = info->sp_id; header->usptp_id = info->usptp_id; - header->slice_id = info->slice_id; + header->slice_id = info->cluster->slice_region ? info->slice_id : UINT_MAX; src = gen8_crashdump_registers->hostptr + info->offset; @@ -1009,7 +1009,7 @@ static size_t gen8_legacy_snapshot_mvc(struct kgsl_device *device, u8 *buf, header->location_id = UINT_MAX; header->sp_id = UINT_MAX; header->usptp_id = UINT_MAX; - header->slice_id = info->slice_id; + header->slice_id = info->cluster->slice_region ? info->slice_id : UINT_MAX; /* * Set the AHB control for the Host to read from the @@ -1064,7 +1064,7 @@ static size_t gen8_snapshot_mvc(struct kgsl_device *device, u8 *buf, header->location_id = UINT_MAX; header->sp_id = UINT_MAX; header->usptp_id = UINT_MAX; - header->slice_id = info->slice_id; + header->slice_id = info->cluster->slice_region ? info->slice_id : UINT_MAX; src = gen8_crashdump_registers->hostptr + info->offset; From c22dc5f24df295a0d84ab095b6d14d32c5f2ba22 Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Tue, 28 Jan 2025 11:21:43 +0530 Subject: [PATCH 0990/1016] kgsl: gen8: Update GMU firmware binary name for gen8_6_0 GPU Update the GPU list for gen8_6_0 GPU to pick right GPU firmware binary. Change-Id: Iae7d928f3445bcce220918fe13aefd0005aede4d Signed-off-by: SIVA MULLATI Signed-off-by: Gayathri Veeragandam --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 709dbe21ad..8ac8deed00 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3308,7 +3308,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { }, .aqefw_name = "gen80000_aqe.fw", .sqefw_name = "gen80000_sqe.fw", - .gmufw_name = "gen80000_gmu.bin", + .gmufw_name = "gen80600_gmu.bin", .zap_name = "gen80000_zap.mbn", .ao_hwcg = gen8_ao_hwcg_regs, .ao_hwcg_count = ARRAY_SIZE(gen8_ao_hwcg_regs), From 544c7fcc6823cefb0292b7c8ed09f7e2e987c23f Mon Sep 17 00:00:00 2001 From: SIVA MULLATI Date: Tue, 24 Dec 2024 11:29:58 +0530 Subject: [PATCH 0991/1016] kgsl: gen8: Enable ACD on gen8_6_0 GPU Adaptive Clock Distribution feature helps mitigate peak current and voltage droops. Change-Id: I870628c76a9f809b505fc97cddb9d568c82e8243 Signed-off-by: SIVA MULLATI --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 8ac8deed00..5bed159446 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -3297,7 +3297,7 @@ static const struct adreno_gen8_core adreno_gpu_core_gen8_6_0 = { .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_AQE | ADRENO_GMU_WARMBOOT | ADRENO_L3_VOTE | ADRENO_IFPC | - ADRENO_PREEMPTION | ADRENO_BCL, + ADRENO_PREEMPTION | ADRENO_BCL | ADRENO_ACD, .gpudev = &adreno_gen8_hwsched_gpudev.base, .perfcounters = &adreno_gen8_perfcounters, .uche_gmem_alignment = SZ_64M, From 6ab9851d0f163d02609d7816e2d2da0d9a69f09d Mon Sep 17 00:00:00 2001 From: Rohit Jadhav Date: Tue, 11 Feb 2025 08:54:05 -0800 Subject: [PATCH 0992/1016] Revert "kgsl: gen7: Update uche_gmem_alignment for Gen7_17_0 GPU" Revert submission 5739443 Reason for revert: GFX revert Reverted changes: /q/submissionid:5739443 Change-Id: I201325cd8735f39b15a17ded983cdd4bfe3b7c4d Signed-off-by: Rohit Jadhav --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 5bed159446..d6a9884c6e 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2630,7 +2630,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { ADRENO_IFPC | ADRENO_PREEMPTION, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_no_cb_perfcounters, - .uche_gmem_alignment = 0, + .uche_gmem_alignment = SZ_16M, .gmem_size = SZ_1M, .bus_width = 32, .snapshot_size = SZ_4M, From e2ff146c3266f8995e9a5e9e2828a3c3db5d7724 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Thu, 26 Dec 2024 15:58:09 +0530 Subject: [PATCH 0993/1016] kgsl: Fix imported memory calculation for shared buffers Currently, when the same buffer is mapped as a surface in one process, it is ignored in the imported memory calculation of another process that imports it as an image, causing discrepancies. To fix this, the memory object size is now divided by the sum of EGL surface and EGL image counts. This ensures that the imported memory is accurately accounted for across different processes, regardless of flags. Change-Id: Ieade58964c51758f7fe54b2a473aa03fe374482e Signed-off-by: Sanjay Yadav --- kgsl_sharedmem.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index e7f55670c4..24bb1f120f 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -256,19 +256,19 @@ imported_mem_show(struct kgsl_process_private *priv, m = &entry->memdesc; if (kgsl_memdesc_usermem_type(m) == KGSL_MEM_ENTRY_ION) { + u64 size = m->size; + int total_egl_count; + kgsl_get_egl_counts(entry, &egl_surface_count, &egl_image_count); + total_egl_count = egl_surface_count + egl_image_count; - if ((kgsl_memdesc_get_memtype(m) == KGSL_MEMTYPE_EGL_SURFACE) || - (kgsl_memdesc_get_memtype(m) == KGSL_MEMTYPE_SURFACE)) - imported_mem += m->size; - else if (egl_surface_count == 0) { - uint64_t size = m->size; - - do_div(size, (egl_image_count ? - egl_image_count : 1)); - imported_mem += size; - } + /* + * Divide the total buffer size uniformly across all the + * processes that imported the buffer. + */ + do_div(size, (total_egl_count ? total_egl_count : 1)); + imported_mem += size; } kgsl_mem_entry_put(entry); From 949c8e694a756d8a03ea90426b5470c993df7d02 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Thu, 13 Feb 2025 14:13:11 +0530 Subject: [PATCH 0994/1016] kgsl: gen7: Enable BCL feature for gen7_17_0 GPU Battery Current Limiter prevents under voltage and provides overcurrent protection. Change-Id: I800f58e394e0b127363545c67257aa9d773b8e08 Signed-off-by: Sanjay Yadav --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 8465548a92..08cd081643 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2627,7 +2627,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), .compatible = "qcom,adreno-gpu-gen7-17-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | - ADRENO_IFPC | ADRENO_PREEMPTION, + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_BCL, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_no_cb_perfcounters, .uche_gmem_alignment = SZ_16M, From 6d74af52fbe2fa82d8fab0c05ac4681d567a48b4 Mon Sep 17 00:00:00 2001 From: Gayathri Veeragandam Date: Wed, 19 Feb 2025 12:15:39 +0530 Subject: [PATCH 0995/1016] Revert "Merge "kgsl: Mark KGSL device as dma coherent"" Boot up Failures. This reverts commit 2033a54d99bc14fb4e868a662e22a64b9b1f3a7b, reversing changes made to df8a84e63ec52da9fd936141604d3e5c0141d984. Change-Id: I668279aa6a7dac090bcf5d272eae5880611b9177 Signed-off-by: Gayathri Veeragandam --- kgsl.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/kgsl.c b/kgsl.c index 25c3041f33..13d4c6887c 100644 --- a/kgsl.c +++ b/kgsl.c @@ -5127,15 +5127,6 @@ static int _register_device(struct kgsl_device *device) device->dev->dma_mask = &dma_mask; device->dev->dma_parms = &dma_parms; - /* - * Mark KGSL device as dma coherent when io-coherency - * is enabled to skip cache operations for imported dma - * buffers. - */ - if (kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT) && - IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT)) - device->dev->dma_coherent = true; - dma_set_max_seg_size(device->dev, (u32)DMA_BIT_MASK(32)); set_dma_ops(device->dev, NULL); From 69c26466f7f6795b85620e236f0a299d57e4ded5 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 16 Jan 2025 16:45:45 +0530 Subject: [PATCH 0996/1016] kgsl: gen8: Add CP semaphore around perfcounter access When CP is executing preemption switch sequence, which includes performing GPU state and register link resets, some register accesses to the register link may be lost. There is a possibility of accessing performance counters from host while preemption is in progress. Use CP semaphore around these accesses to ensure that CP and host requests don't race with each other, thereby avoiding system timeouts. Change-Id: I85e3a8ff04231c73944f6fa410a763cab952da3c Signed-off-by: Urvashi Agrawal Signed-off-by: Kamal Agrawal --- adreno.h | 43 +++++++++++++++++++++++++++++++ adreno_gen8.c | 53 +++++++++++++++++++++++++++++++++++--- adreno_gen8_perfcounter.c | 54 +++++++++++++++++++++------------------ adreno_perfcounter.c | 9 ++++++- gen8_reg.h | 3 ++- 5 files changed, 132 insertions(+), 30 deletions(-) diff --git a/adreno.h b/adreno.h index 61bbb1de62..50b76d3ae7 100644 --- a/adreno.h +++ b/adreno.h @@ -79,6 +79,41 @@ /* ADRENO_GPUREV - Return the GPU ID for the given adreno_device */ #define ADRENO_GPUREV(_a) ((_a)->gpucore->gpurev) +/* + * Disable local interrupts and CPU preemption to avoid interruptions + * while holding the CP semaphore; otherwise, it could stall the CP. + * Make sure to call ADRENO_RELEASE_CP_SEMAPHORE after calling the + * below macro to reenable CPU interrupts. + */ +#define ADRENO_ACQUIRE_CP_SEMAPHORE(_adreno_dev, _flags) \ + ({ \ + bool ret = true; \ + if ((_adreno_dev)->gpucore->gpudev->acquire_cp_semaphore) { \ + local_irq_save(_flags); \ + preempt_disable(); \ + ret = (_adreno_dev)->gpucore->gpudev->acquire_cp_semaphore(_adreno_dev); \ + if (!ret) { \ + preempt_enable(); \ + local_irq_restore(_flags); \ + dev_err_ratelimited(KGSL_DEVICE(_adreno_dev)->dev, \ + "Timed out waiting to acquire CP semaphore:" \ + " status=0x%08x\n", ret); \ + } \ + } \ + ret; \ + }) + +#define ADRENO_RELEASE_CP_SEMAPHORE(_adreno_dev, _flags) \ + ({ \ + do { \ + if ((_adreno_dev)->gpucore->gpudev->release_cp_semaphore) { \ + (_adreno_dev)->gpucore->gpudev->release_cp_semaphore(_adreno_dev); \ + preempt_enable(); \ + local_irq_restore(_flags); \ + } \ + } while (0);\ + }) + /* * ADRENO_FEATURE - return true if the specified feature is supported by the GPU * core @@ -1003,6 +1038,14 @@ struct adreno_gpudev { * @lpac_fault_header: Print LPAC fault header */ void (*lpac_fault_header)(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj); + /** + * @acquire_cp_semaphore: Return true if CP semaphore is acquired, otherwise false + */ + bool (*acquire_cp_semaphore)(struct adreno_device *adreno_dev); + /** + * @release_cp_semaphore: Release CP semaphore + */ + void (*release_cp_semaphore)(struct adreno_device *adreno_dev); }; /** diff --git a/adreno_gen8.c b/adreno_gen8.c index 91704d7004..96b32ffce0 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -2472,6 +2472,41 @@ static u32 _get_pipeid(u32 groupid) } } +static bool gen8_acquire_cp_semaphore(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 sem, i; + + for (i = 0; i < 10; i++) { + kgsl_regwrite(device, GEN8_CP_SEMAPHORE_REG_0, BIT(8)); + + /* + * Make sure the previous register write is posted before + * checking the CP sempahore status + */ + mb(); + + kgsl_regread(device, GEN8_CP_SEMAPHORE_REG_0, &sem); + if (sem) + return true; + + udelay(10); + } + + /* Check CP semaphore status one last time */ + kgsl_regread(device, GEN8_CP_SEMAPHORE_REG_0, &sem); + + if (!sem) + return false; + + return true; +} + +static void gen8_release_cp_semaphore(struct adreno_device *adreno_dev) +{ + kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_CP_SEMAPHORE_REG_0, 0); +} + int gen8_perfcounter_remove(struct adreno_device *adreno_dev, struct adreno_perfcount_register *reg, u32 groupid) { @@ -2561,6 +2596,11 @@ int gen8_perfcounter_update(struct adreno_device *adreno_dev, u32 *data = ptr + sizeof(*lock); int i, start_offset = -1; u16 perfcntr_list_len = lock->dynamic_list_len - gen8_dev->ext_pwrup_list_len; + unsigned long irq_flags; + int ret = 0; + + if (!ADRENO_ACQUIRE_CP_SEMAPHORE(adreno_dev, irq_flags)) + return -EBUSY; if (flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) { for (i = 0; i < perfcntr_list_len - 2; i++) { @@ -2577,7 +2617,8 @@ int gen8_perfcounter_update(struct adreno_device *adreno_dev, if (kgsl_hwlock(lock)) { kgsl_hwunlock(lock); - return -EBUSY; + ret = -EBUSY; + goto err; } /* @@ -2638,7 +2679,9 @@ update: kgsl_regwrite(device, reg->reg_dependency[i], reg->countable); } - return 0; +err: + ADRENO_RELEASE_CP_SEMAPHORE(adreno_dev, irq_flags); + return ret; } static u64 gen8_read_alwayson(struct adreno_device *adreno_dev) @@ -2969,6 +3012,8 @@ const struct gen8_gpudev adreno_gen8_hwsched_gpudev = { .get_uche_trap_base = gen8_get_uche_trap_base, .fault_header = gen8_fault_header, .lpac_fault_header = gen8_lpac_fault_header, + .acquire_cp_semaphore = gen8_acquire_cp_semaphore, + .release_cp_semaphore = gen8_release_cp_semaphore, }, .hfi_probe = gen8_hwsched_hfi_probe, .hfi_remove = gen8_hwsched_hfi_remove, @@ -2999,6 +3044,8 @@ const struct gen8_gpudev adreno_gen8_gmu_gpudev = { .swfuse_irqctrl = gen8_swfuse_irqctrl, .get_uche_trap_base = gen8_get_uche_trap_base, .fault_header = gen8_fault_header, + .acquire_cp_semaphore = gen8_acquire_cp_semaphore, + .release_cp_semaphore = gen8_release_cp_semaphore, }, .hfi_probe = gen8_gmu_hfi_probe, .handle_watchdog = gen8_gmu_handle_watchdog, diff --git a/adreno_gen8_perfcounter.c b/adreno_gen8_perfcounter.c index 59d648b518..d7ed88b10e 100644 --- a/adreno_gen8_perfcounter.c +++ b/adreno_gen8_perfcounter.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -14,8 +14,7 @@ static void gen8_rbbm_perfctr_flush(struct kgsl_device *device) { - u32 val; - int ret; + u32 status, i; /* * Flush delta counters (both perf counters and pipe stats) present in @@ -27,31 +26,24 @@ static void gen8_rbbm_perfctr_flush(struct kgsl_device *device) /* Ensure all writes are posted before polling status register */ wmb(); - ret = kgsl_regmap_read_poll_timeout(&device->regmap, GEN8_RBBM_PERFCTR_FLUSH_HOST_STATUS, - val, (val & PERFCOUNTER_FLUSH_DONE_MASK) == PERFCOUNTER_FLUSH_DONE_MASK, - 100, 100 * 1000); + /* + * Poll RBBM_PERFCTR_FLUSH_HOST_STATUS to wait for perfcounter flush completion. + * Use a busy loop as this is called with interrupts and preemption disabled. + */ + for (i = 0; i < 10; i++) { + kgsl_regread(device, GEN8_RBBM_PERFCTR_FLUSH_HOST_STATUS, &status); + if (((status & PERFCOUNTER_FLUSH_DONE_MASK) == PERFCOUNTER_FLUSH_DONE_MASK)) + return; - if (ret) - dev_err(device->dev, "Perfcounter flush timed out: status=0x%08x\n", val); -} + udelay(10); + } -/* - * For registers that do not get restored on power cycle, read the value and add - * the stored shadow value - */ -static u64 gen8_counter_read_norestore(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, u32 counter) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_perfcount_register *reg = &group->regs[counter]; - u32 hi, lo; + /* Check status one last time */ + kgsl_regread(device, GEN8_RBBM_PERFCTR_FLUSH_HOST_STATUS, &status); - gen8_rbbm_perfctr_flush(device); - - kgsl_regread(device, reg->offset, &lo); - kgsl_regread(device, reg->offset_hi, &hi); - - return ((((u64) hi) << 32) | lo) + reg->value; + if ((status & PERFCOUNTER_FLUSH_DONE_MASK) != PERFCOUNTER_FLUSH_DONE_MASK) + dev_err_ratelimited(device->dev, + "Perfcounter flush timed out: status=0x%08x\n", status); } static int gen8_counter_br_enable(struct adreno_device *adreno_dev, @@ -125,6 +117,18 @@ static u64 gen8_counter_read(struct adreno_device *adreno_dev, return (((u64) hi) << 32) | lo; } +/* + * For registers that do not get restored on power cycle, read the value and add + * the stored shadow value + */ +static u64 gen8_counter_read_norestore(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, u32 counter) +{ + struct adreno_perfcount_register *reg = &group->regs[counter]; + + return gen8_counter_read(adreno_dev, group, counter) + reg->value; +} + static int gen8_counter_gbif_enable(struct adreno_device *adreno_dev, const struct adreno_perfcount_group *group, u32 counter, u32 countable) diff --git a/adreno_perfcounter.c b/adreno_perfcounter.c index e4d52db27d..0442683104 100644 --- a/adreno_perfcounter.c +++ b/adreno_perfcounter.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023,2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -198,8 +198,15 @@ int adreno_perfcounter_read_group(struct adreno_device *adreno_dev, /* group/counter iterator */ for (i = 0; i < group->reg_count; i++) { if (group->regs[i].countable == list[j].countable) { + unsigned long irq_flags; + + if (!ADRENO_ACQUIRE_CP_SEMAPHORE(adreno_dev, irq_flags)) { + ret = -EAGAIN; + break; + } list[j].value = adreno_perfcounter_read( adreno_dev, list[j].groupid, i); + ADRENO_RELEASE_CP_SEMAPHORE(adreno_dev, irq_flags); break; } } diff --git a/gen8_reg.h b/gen8_reg.h index a60908583c..778ec6cf94 100644 --- a/gen8_reg.h +++ b/gen8_reg.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _GEN8_REG_H @@ -766,6 +766,7 @@ #define GEN8_CP_APERTURE_CNTL_GMU 0x81d #define GEN8_CP_APERTURE_CNTL_CD 0x81e #define GEN8_CP_CP2GMU_STATUS 0x822 +#define GEN8_CP_SEMAPHORE_REG_0 0x825 #define GEN8_CP_RL_ERROR_DETAILS_0 0x840 #define GEN8_CP_RL_ERROR_DETAILS_1 0x841 #define GEN8_CP_CRASH_DUMP_SCRIPT_BASE_LO 0x842 From f810c4c9a6358ff45b9cc72f191de673c274c8a8 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Fri, 17 Jan 2025 19:54:48 +0530 Subject: [PATCH 0997/1016] msm: kgsl: Add ACD feature for Kera GPU Add ACD feature support for Kera GPU. Change-Id: Ib6f577b079cc3e833e52cd78751f780d29fb19ca Signed-off-by: Kaushal Sanadhya Signed-off-by: Sanjay Yadav --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 08cd081643..250fedc07d 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2627,7 +2627,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), .compatible = "qcom,adreno-gpu-gen7-17-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | - ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_BCL, + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_BCL | ADRENO_ACD, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_no_cb_perfcounters, .uche_gmem_alignment = SZ_16M, From f795c8faf46f1c82cb69ab23633e477ef92d8a1c Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Thu, 13 Feb 2025 14:13:11 +0530 Subject: [PATCH 0998/1016] kgsl: gen7: Enable BCL feature for gen7_17_0 GPU Battery Current Limiter prevents under voltage and provides overcurrent protection. Change-Id: I800f58e394e0b127363545c67257aa9d773b8e08 Signed-off-by: Sanjay Yadav (cherry picked from commit 949c8e694a756d8a03ea90426b5470c993df7d02) --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 8465548a92..08cd081643 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2627,7 +2627,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), .compatible = "qcom,adreno-gpu-gen7-17-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | - ADRENO_IFPC | ADRENO_PREEMPTION, + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_BCL, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_no_cb_perfcounters, .uche_gmem_alignment = SZ_16M, From a6ce38b17b04182d79fab3dc210b99b1ee38bd6a Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Fri, 17 Jan 2025 19:54:48 +0530 Subject: [PATCH 0999/1016] msm: kgsl: Add ACD feature for Kera GPU Add ACD feature support for Kera GPU. Change-Id: Ib6f577b079cc3e833e52cd78751f780d29fb19ca Signed-off-by: Kaushal Sanadhya Signed-off-by: Sanjay Yadav --- adreno-gpulist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 08cd081643..250fedc07d 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2627,7 +2627,7 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), .compatible = "qcom,adreno-gpu-gen7-17-0", .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | - ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_BCL, + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_BCL | ADRENO_ACD, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_no_cb_perfcounters, .uche_gmem_alignment = SZ_16M, From 2442cb033ac4e731c420a9f86d8c0266c1952ca8 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Wed, 12 Mar 2025 22:31:23 +0530 Subject: [PATCH 1000/1016] kgsl: gen7: Enable L1 preemption for gen_7_17_0 Enable the L1 preemption GPU functionality for QoS improvement. Change-Id: I7fd25bdd0e4d5d3a9d8d3dbca4302327e3ea381b Signed-off-by: Sanjay Yadav --- adreno-gpulist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 250fedc07d..610683d4e6 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2648,7 +2648,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 15, .gen7_snapshot_block_list = &gen7_17_0_snapshot_block_list, - .ctxt_record_size = 1536 * 1024 + .ctxt_record_size = 1536 * 1024, + .preempt_level = 1, }; static const struct kgsl_regmap_list a663_hwcg_regs[] = { From 6531b7515cbd7adbfad4c021516c81df22d8361a Mon Sep 17 00:00:00 2001 From: NISARG SHETH Date: Wed, 11 Sep 2024 22:59:43 +0530 Subject: [PATCH 1001/1016] kgsl: pwrscale: Move devfreq notify to a separate worker Currently, the devfreq notify operation occurs within a workqueue that has normal priority. This can be blocked by higher priority processes, potentially causing issues if preemption occurs while the device mutex is held. To avoid this, handle the devfreq notify operation within a separate high priority kthread worker. Change-Id: I92f8836983719d3679f1ccfaf59980be774b912e Signed-off-by: NISARG SHETH --- kgsl_pwrscale.c | 31 +++++++++++++++++++++---------- kgsl_pwrscale.h | 6 ++++-- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index b4d3fd1caf..f0def2b29c 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -22,7 +22,7 @@ static struct devfreq_msm_adreno_tz_data adreno_tz_data = { static void do_devfreq_suspend(struct work_struct *work); static void do_devfreq_resume(struct work_struct *work); -static void do_devfreq_notify(struct work_struct *work); +static void do_devfreq_notify(struct kthread_work *work); /* * These variables are used to keep the latest data @@ -133,24 +133,25 @@ void kgsl_pwrscale_update_stats(struct kgsl_device *device) void kgsl_pwrscale_update(struct kgsl_device *device) { ktime_t t; + struct kgsl_pwrscale *pwrscale = &device->pwrscale; if (WARN_ON(!mutex_is_locked(&device->mutex))) return; - if (!device->pwrscale.enabled) + if (!pwrscale->enabled) return; t = ktime_get(); - if (ktime_compare(t, device->pwrscale.next_governor_call) < 0) + if (ktime_compare(t, pwrscale->next_governor_call) < 0) return; - device->pwrscale.next_governor_call = ktime_add_us(t, + pwrscale->next_governor_call = ktime_add_us(t, KGSL_GOVERNOR_CALL_INTERVAL); /* to call update_devfreq() from a kernel thread */ if (device->state != KGSL_STATE_SLUMBER) - queue_work(device->pwrscale.devfreq_wq, - &device->pwrscale.devfreq_notify_ws); + kthread_queue_work(pwrscale->devfreq_notify_worker, + &pwrscale->devfreq_notify_work); } /* @@ -708,6 +709,13 @@ int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, return -ENOMEM; } + pwrscale->devfreq_notify_worker = kthread_create_worker(0, "kgsl_devfreq_notifier"); + if (IS_ERR(pwrscale->devfreq_notify_worker)) { + ret = PTR_ERR(pwrscale->devfreq_notify_worker); + dev_err(device->dev, "Failed to create devfreq notify worker ret: %d\n", ret); + return ret; + } + ret = msm_adreno_tz_init(); if (ret) { dev_err(device->dev, "Failed to add adreno tz governor: %d\n", ret); @@ -734,8 +742,8 @@ int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, INIT_WORK(&pwrscale->devfreq_suspend_ws, do_devfreq_suspend); INIT_WORK(&pwrscale->devfreq_resume_ws, do_devfreq_resume); - INIT_WORK(&pwrscale->devfreq_notify_ws, do_devfreq_notify); - + kthread_init_work(&pwrscale->devfreq_notify_work, do_devfreq_notify); + sched_set_fifo(pwrscale->devfreq_notify_worker->task); pwrscale->next_governor_call = ktime_add_us(ktime_get(), KGSL_GOVERNOR_CALL_INTERVAL); @@ -767,6 +775,9 @@ void kgsl_pwrscale_close(struct kgsl_device *device) if (!pwrscale->devfreqptr) return; + if (!IS_ERR_OR_NULL(pwrscale->devfreq_notify_worker)) + kthread_destroy_worker(pwrscale->devfreq_notify_worker); + if (pwrscale->devfreq_wq) { flush_workqueue(pwrscale->devfreq_wq); destroy_workqueue(pwrscale->devfreq_wq); @@ -796,10 +807,10 @@ static void do_devfreq_resume(struct work_struct *work) devfreq_resume_device(pwrscale->bus_devfreq); } -static void do_devfreq_notify(struct work_struct *work) +static void do_devfreq_notify(struct kthread_work *work) { struct kgsl_pwrscale *pwrscale = container_of(work, - struct kgsl_pwrscale, devfreq_notify_ws); + struct kgsl_pwrscale, devfreq_notify_work); mutex_lock(&pwrscale->devfreqptr->lock); update_devfreq(pwrscale->devfreqptr); diff --git a/kgsl_pwrscale.h b/kgsl_pwrscale.h index 35d37f342c..97ba10c1f8 100644 --- a/kgsl_pwrscale.h +++ b/kgsl_pwrscale.h @@ -36,7 +36,6 @@ struct kgsl_power_stats { * @devfreq_wq - Main devfreq workqueue * @devfreq_suspend_ws - Pass device suspension to devfreq * @devfreq_resume_ws - Pass device resume to devfreq - * @devfreq_notify_ws - Notify devfreq to update sampling * @next_governor_call - Timestamp after which the governor may be notified of * a new sample * @ctxt_aware_enable - Whether or not ctxt aware DCVS feature is enabled @@ -56,7 +55,10 @@ struct kgsl_pwrscale { struct workqueue_struct *devfreq_wq; struct work_struct devfreq_suspend_ws; struct work_struct devfreq_resume_ws; - struct work_struct devfreq_notify_ws; + /** @devfreq_notify_worker: kthread worker to handle devfreq notify event */ + struct kthread_worker *devfreq_notify_worker; + /** @devfreq_notify_work: work struct to update devfreq as per request */ + struct kthread_work devfreq_notify_work; ktime_t next_governor_call; bool ctxt_aware_enable; unsigned int ctxt_aware_target_pwrlevel; From a52ad6834ce556020e6c09e73a04f1d0bb27b400 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Thu, 5 Sep 2024 11:45:22 -0700 Subject: [PATCH 1002/1016] kgsl: Elevate command submission thread priority MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Command submission will be done while holding the device mutex. Other threads that are waiting for the device mutex will end up waiting longer if the submitting thread is preempted while holding the device mutex. Therefore, increase the submission thread’s priority to avoid preemption while holding the device mutex. Change-Id: I646e42e153b088f825de7b036775488ef886be28 Signed-off-by: Hareesh Gundu --- adreno_dispatch.c | 25 ++++++++++++++++++------- adreno_hwsched.c | 29 ++++++++++++++++------------- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/adreno_dispatch.c b/adreno_dispatch.c index 816dc6b548..7e389e265c 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -463,11 +463,17 @@ static int sendcmd(struct adreno_device *adreno_dev, unsigned long nsecs = 0; int ret; struct submission_info info = {0}; + int is_current_rt = rt_task(current); mutex_lock(&device->mutex); + + /* Elevating thread’s priority to avoid context switch with holding device mutex */ + if (!is_current_rt) + sched_set_fifo(current); + if (adreno_gpu_halt(adreno_dev) != 0) { - mutex_unlock(&device->mutex); - return -EBUSY; + ret = -EBUSY; + goto err; } memset(&time, 0x0, sizeof(time)); @@ -482,8 +488,7 @@ static int sendcmd(struct adreno_device *adreno_dev, if (ret) { dispatcher->inflight--; dispatch_q->inflight--; - mutex_unlock(&device->mutex); - return ret; + goto err; } set_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv); @@ -524,8 +529,6 @@ static int sendcmd(struct adreno_device *adreno_dev, process_rt_bus_hint(device, false); - mutex_unlock(&device->mutex); - /* * Don't log a message in case of: * -ENOENT means that the context was detached before the @@ -539,7 +542,7 @@ static int sendcmd(struct adreno_device *adreno_dev, dev_err(device->dev, "Unable to submit command to the ringbuffer %d\n", ret); - return ret; + goto err; } secs = time.ktime; @@ -574,6 +577,9 @@ static int sendcmd(struct adreno_device *adreno_dev, log_kgsl_cmdbatch_submitted_event(context->id, drawobj->timestamp, context->priority, drawobj->flags); + if (!is_current_rt) + sched_set_normal(current, 0); + mutex_unlock(&device->mutex); cmdobj->submit_ticks = time.ticks; @@ -600,6 +606,11 @@ static int sendcmd(struct adreno_device *adreno_dev, if (gpudev->preemption_schedule) gpudev->preemption_schedule(adreno_dev); return 0; +err: + if (!is_current_rt) + sched_set_normal(current, 0); + mutex_unlock(&device->mutex); + return ret; } /** diff --git a/adreno_hwsched.c b/adreno_hwsched.c index e7cc7dbf4c..5129f12b52 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -423,6 +423,7 @@ static int hwsched_sendcmd(struct adreno_device *adreno_dev, struct kgsl_context *context = drawobj->context; int ret; struct cmd_list_obj *obj; + int is_current_rt = rt_task(current); obj = kmem_cache_alloc(obj_cache, GFP_KERNEL); if (!obj) @@ -430,17 +431,19 @@ static int hwsched_sendcmd(struct adreno_device *adreno_dev, mutex_lock(&device->mutex); + /* Elevating thread’s priority to avoid context switch with holding device mutex */ + if (!is_current_rt) + sched_set_fifo(current); + if (_abort_submission(adreno_dev)) { - mutex_unlock(&device->mutex); - kmem_cache_free(obj_cache, obj); - return -EBUSY; + ret = -EBUSY; + goto done; } if (kgsl_context_detached(context)) { - mutex_unlock(&device->mutex); - kmem_cache_free(obj_cache, obj); - return -ENOENT; + ret = -ENOENT; + goto done; } hwsched->inflight++; @@ -450,9 +453,7 @@ static int hwsched_sendcmd(struct adreno_device *adreno_dev, ret = adreno_active_count_get(adreno_dev); if (ret) { hwsched->inflight--; - mutex_unlock(&device->mutex); - kmem_cache_free(obj_cache, obj); - return ret; + goto done; } set_bit(ADRENO_HWSCHED_POWER, &hwsched->flags); } @@ -469,9 +470,7 @@ static int hwsched_sendcmd(struct adreno_device *adreno_dev, } hwsched->inflight--; - kmem_cache_free(obj_cache, obj); - mutex_unlock(&device->mutex); - return ret; + goto done; } if ((hwsched->inflight == 1) && @@ -498,9 +497,13 @@ static int hwsched_sendcmd(struct adreno_device *adreno_dev, list_add_tail(&obj->node, &hwsched->cmd_list); done: + if (!is_current_rt) + sched_set_normal(current, 0); mutex_unlock(&device->mutex); + if (ret) + kmem_cache_free(obj_cache, obj); - return 0; + return ret; } /** From 50737e76e582d55ac8e3e0dd3fd5d9eb39020d5a Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Fri, 14 Feb 2025 12:32:52 +0530 Subject: [PATCH 1003/1016] kgsl: Restore original task priority after real-time elevation In hwsched_sendcmd() and gen8_start(), the current task's priority is temporarily elevated to real-time to avoid context switches while holding the device mutex. However, the priority was being reset to the default nice value of 0, which might not be the original priority of the task. This fix ensures that the original nice value of the task is restored after releasing the mutex. Change-Id: Ie63b5c1d332c04def5f8e0358640fdd2cbdf7627 Signed-off-by: Pankaj Gupta --- adreno_dispatch.c | 7 ++++--- adreno_gen8.c | 3 ++- adreno_hwsched.c | 6 +++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/adreno_dispatch.c b/adreno_dispatch.c index 7e389e265c..6f4c64aa7f 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -464,6 +464,7 @@ static int sendcmd(struct adreno_device *adreno_dev, int ret; struct submission_info info = {0}; int is_current_rt = rt_task(current); + int nice = task_nice(current); mutex_lock(&device->mutex); @@ -578,7 +579,7 @@ static int sendcmd(struct adreno_device *adreno_dev, context->priority, drawobj->flags); if (!is_current_rt) - sched_set_normal(current, 0); + sched_set_normal(current, nice); mutex_unlock(&device->mutex); @@ -608,7 +609,7 @@ static int sendcmd(struct adreno_device *adreno_dev, return 0; err: if (!is_current_rt) - sched_set_normal(current, 0); + sched_set_normal(current, nice); mutex_unlock(&device->mutex); return ret; } diff --git a/adreno_gen8.c b/adreno_gen8.c index 96b32ffce0..fc7fa92deb 100644 --- a/adreno_gen8.c +++ b/adreno_gen8.c @@ -1316,6 +1316,7 @@ int gen8_start(struct adreno_device *adreno_dev) u64 uche_trap_base = gen8_get_uche_trap_base(); u32 rgba8888_lossless = 0, fp16compoptdis = 0; int is_current_rt = rt_task(current); + int nice = task_nice(current); /* Reset aperture fields to go through first aperture write check */ gen8_dev->aperture = UINT_MAX; @@ -1532,7 +1533,7 @@ int gen8_start(struct adreno_device *adreno_dev) device->regmap.use_relaxed = true; if (!is_current_rt) - sched_set_normal(current, 0); + sched_set_normal(current, nice); return 0; } diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 5129f12b52..f0c1a6ab3e 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -424,6 +424,7 @@ static int hwsched_sendcmd(struct adreno_device *adreno_dev, int ret; struct cmd_list_obj *obj; int is_current_rt = rt_task(current); + int nice = task_nice(current); obj = kmem_cache_alloc(obj_cache, GFP_KERNEL); if (!obj) @@ -440,7 +441,6 @@ static int hwsched_sendcmd(struct adreno_device *adreno_dev, goto done; } - if (kgsl_context_detached(context)) { ret = -ENOENT; goto done; @@ -498,7 +498,7 @@ static int hwsched_sendcmd(struct adreno_device *adreno_dev, done: if (!is_current_rt) - sched_set_normal(current, 0); + sched_set_normal(current, nice); mutex_unlock(&device->mutex); if (ret) kmem_cache_free(obj_cache, obj); From 663852430b8de2b4c8719418ef318cf3b5d3aeae Mon Sep 17 00:00:00 2001 From: Piyush Mehta Date: Thu, 20 Mar 2025 18:43:45 +0530 Subject: [PATCH 1004/1016] kgsl: build: Add support to compile graphics driver for SM6150 To compile the graphics kernel for the SM6150, added a GKI configuration file and included an extra LE compiler flag to link the msm_hw_fence.h header file during compilation. Change-Id: I2c6311803f81a3585e923f54a8e1aff857ae5d8a Signed-off-by: Piyush Mehta --- Kbuild | 4 ++++ config/gki_sm6150.conf | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 config/gki_sm6150.conf diff --git a/Kbuild b/Kbuild index db6c057828..c0b5cf9e90 100644 --- a/Kbuild +++ b/Kbuild @@ -73,6 +73,10 @@ endif ifeq ($(CONFIG_ARCH_SCUBA), y) include $(KGSL_PATH)/config/gki_scuba.conf endif +ifeq ($(CONFIG_ARCH_SM6150), y) + include $(KGSL_PATH)/config/gki_sm6150.conf + subdir-ccflags-y += $(LE_EXTRA_CFLAGS) +endif ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq diff --git a/config/gki_sm6150.conf b/config/gki_sm6150.conf new file mode 100644 index 0000000000..65d0b23b73 --- /dev/null +++ b/config/gki_sm6150.conf @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + +CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 +CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y +CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT = y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" +CONFIG_QCOM_KGSL_SORT_POOL = y + +ifneq ($(CONFIG_SHMEM),) + CONFIG_QCOM_KGSL_USE_SHMEM = y + CONFIG_QCOM_KGSL_PROCESS_RECLAIM = y +endif + +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ + -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ + -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=1 \ + -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" \ + -DCONFIG_QCOM_KGSL_SORT_POOL=1 + +ifneq ($(CONFIG_SHMEM),) + ccflags-y += -DCONFIG_QCOM_KGSL_USE_SHMEM=1 \ + -DCONFIG_QCOM_KGSL_PROCESS_RECLAIM=1 +endif From 0fff2b124aedf558de32352e06d6748ff2748a6d Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Thu, 20 Mar 2025 18:49:47 +0530 Subject: [PATCH 1005/1016] kgsl: hwsched: Set ADRENO_CONTEXT_FAULT flag upon GPU fault Currently, we do not check and set ADRENO_CONTEXT_FAULT after queuing the command, which results in IOCTL_KGSL_GPU_COMMAND not returning the appropriate error to the user. Add a check to ensure that if there is a gpu fault, it returns EPROTO error to userspace. This error indicates to userspace to start pulling the snapshot from the sysfs node (kgsl-3d0/snapshot) into the specified path. Change-Id: I75ac9eec87abd64e256dd1bc6e240eea04ba25f1 Signed-off-by: Sanjay Yadav --- adreno_hwsched.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/adreno_hwsched.c b/adreno_hwsched.c index e7cc7dbf4c..3c130ad94a 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -1090,6 +1090,9 @@ static int adreno_hwsched_queue_cmds(struct kgsl_device_private *dev_priv, } else kmem_cache_free(jobs_cache, job); + if (test_and_clear_bit(ADRENO_CONTEXT_FAULT, &context->priv)) + return -EPROTO; + return 0; } @@ -1711,6 +1714,9 @@ static void adreno_hwsched_reset_and_snapshot_legacy(struct adreno_device *adren drawobj = NULL; } + if (drawobj && drawobj->context) + set_bit(ADRENO_CONTEXT_FAULT, &drawobj->context->priv); + adreno_gpufault_stats(adreno_dev, drawobj, NULL, fault); if (!drawobj) { @@ -1796,6 +1802,9 @@ static void adreno_hwsched_reset_and_snapshot(struct adreno_device *adreno_dev, drawobj = NULL; } + if (drawobj && drawobj->context) + set_bit(ADRENO_CONTEXT_FAULT, &drawobj->context->priv); + do_fault_header(adreno_dev, drawobj, fault); if (!obj_lpac && (fault & ADRENO_IOMMU_STALL_ON_PAGE_FAULT)) @@ -1822,6 +1831,9 @@ static void adreno_hwsched_reset_and_snapshot(struct adreno_device *adreno_dev, gpudev->lpac_fault_header(adreno_dev, drawobj_lpac); } + if (drawobj_lpac && drawobj_lpac->context) + set_bit(ADRENO_CONTEXT_FAULT, &drawobj_lpac->context->priv); + kgsl_device_snapshot(device, context, context_lpac, false); adreno_gpufault_stats(adreno_dev, drawobj, drawobj_lpac, fault); From d266507dedf0fe7bc262c0d3f153fc8929d2f5d0 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 10 Jan 2025 01:04:33 +0530 Subject: [PATCH 1006/1016] kgsl: Use KGSL virtual device for cache operations DMA ops for KGSL virtual device is set to null, allowing it to use direct DMA operations. In contrast, the GPU platform device has DMA operations set, which results in the use of sync_sg_for_device ops in dma_map_ops. Direct DMA operations are more performant. Since KGSL performs cache operations on a per-page basis (contiguous memory), it can benefit from using direct DMA operations. Therefore, use KGSL virtual device instead of GPU platform device for cache operations. Change-Id: Iaf3311717a92326b8eb79bf7cd8676aeefe21fcd Signed-off-by: Kamal Agrawal Signed-off-by: Gayathri Veeragandam --- kgsl.c | 9 +++++++++ kgsl_sharedmem.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/kgsl.c b/kgsl.c index 13d4c6887c..c4a4552ea5 100644 --- a/kgsl.c +++ b/kgsl.c @@ -5334,6 +5334,8 @@ void kgsl_core_exit(void) int __init kgsl_core_init(void) { + static u64 dma_mask = (u64)DMA_BIT_MASK(64); + static struct device_dma_parameters dma_parms; int result = 0; KGSL_BOOT_MARKER("KGSL Init"); @@ -5384,6 +5386,13 @@ int __init kgsl_core_init(void) goto err; } + kgsl_driver.virtdev.dma_mask = &dma_mask; + kgsl_driver.virtdev.dma_parms = &dma_parms; + + dma_set_max_seg_size(&kgsl_driver.virtdev, (u32)DMA_BIT_MASK(32)); + + set_dma_ops(&kgsl_driver.virtdev, NULL); + /* Make kobjects in the virtual device for storing statistics */ kgsl_driver.ptkobj = diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index 24bb1f120f..dfb53690fd 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -830,7 +830,7 @@ void kgsl_memdesc_init(struct kgsl_device *device, * cache operations at allocation time */ if (!(flags & KGSL_MEMFLAGS_IOCOHERENT)) - memdesc->dev = &device->pdev->dev; + memdesc->dev = &kgsl_driver.virtdev; align = max_t(unsigned int, kgsl_memdesc_get_align(memdesc), ilog2(PAGE_SIZE)); From 7e053aed440508f122796581d2f44fea7c197d6f Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 27 Mar 2025 11:30:35 +0530 Subject: [PATCH 1007/1016] kgsl: Add support for GPU frequency tracing in power system Add support to trace GPU frequency events from the power system. This is necessary for CTS test cases on the latest Android version. Since duplicate events are not allowed, the existing gpu_frequency inside kgsl has been renamed to kgsl_gpu_frequency. Change-Id: Idd627d167e051c2cd2e26214025b0eebb89910cc Signed-off-by: Kamal Agrawal --- kgsl_power_trace.h | 22 +++++++++++++++++++++- kgsl_pwrctrl.c | 11 ++++++----- kgsl_trace.h | 10 ++++++++-- 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/kgsl_power_trace.h b/kgsl_power_trace.h index 76882b01e0..a1247d985a 100644 --- a/kgsl_power_trace.h +++ b/kgsl_power_trace.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022, 2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #if !defined(_KGSL_POWER_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) @@ -52,6 +52,26 @@ DEFINE_EVENT(gpu_work_period_class, gpu_work_period, TP_ARGS(gpu_id, uid, start_time_ns, end_time_ns, total_active_duration_ns) ); + +/* + * Tracepoint for power gpu_frequency + */ +TRACE_EVENT(gpu_frequency, + TP_PROTO(u32 state, u32 gpu_id), + TP_ARGS(state, gpu_id), + TP_STRUCT__entry( + __field(unsigned int, state) + __field(unsigned int, gpu_id) + ), + TP_fast_assign( + __entry->state = state; + __entry->gpu_id = gpu_id; + ), + + TP_printk("state=%lu gpu_id=%lu", + (unsigned long)__entry->state, + (unsigned long)__entry->gpu_id) +); #endif /* _KGSL_POWER_TRACE_H */ /* This part must be outside protection */ diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 2ac79015ef..9ce493684a 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -19,6 +19,7 @@ #include "kgsl_device.h" #include "kgsl_bus.h" +#include "kgsl_power_trace.h" #include "kgsl_pwrscale.h" #include "kgsl_sysfs.h" #include "kgsl_trace.h" @@ -249,7 +250,7 @@ void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device, pwr->previous_pwrlevel, pwr->pwrlevels[old_level].gpu_freq); - trace_gpu_frequency(pwrlevel->gpu_freq/1000, 0); + KGSL_TRACE_GPU_FREQ(pwrlevel->gpu_freq/1000, 0); /* Update the bus after GPU clock decreases. */ if (new_level > old_level) @@ -2256,7 +2257,7 @@ static int _wake(struct kgsl_device *device) kgsl_pwrctrl_axi(device, true); kgsl_pwrscale_wake(device); kgsl_pwrctrl_irq(device, true); - trace_gpu_frequency( + KGSL_TRACE_GPU_FREQ( pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq/1000, 0); kgsl_bus_update(device, KGSL_BUS_VOTE_ON); @@ -2347,7 +2348,7 @@ _slumber(struct kgsl_device *device) device->ftbl->stop(device); kgsl_pwrctrl_disable(device); kgsl_pwrscale_sleep(device); - trace_gpu_frequency(0, 0); + KGSL_TRACE_GPU_FREQ(0, 0); kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); break; case KGSL_STATE_SUSPEND: @@ -2357,7 +2358,7 @@ _slumber(struct kgsl_device *device) break; case KGSL_STATE_AWARE: kgsl_pwrctrl_disable(device); - trace_gpu_frequency(0, 0); + KGSL_TRACE_GPU_FREQ(0, 0); kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); break; default: diff --git a/kgsl_trace.h b/kgsl_trace.h index 4206219057..3daa4de9c7 100644 --- a/kgsl_trace.h +++ b/kgsl_trace.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2011-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #if !defined(_KGSL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) @@ -20,6 +20,12 @@ #include "kgsl_drawobj.h" #include "kgsl_sharedmem.h" +#define KGSL_TRACE_GPU_FREQ(freq, gpu_id) \ + do { \ + trace_gpu_frequency(freq, gpu_id); \ + trace_kgsl_gpu_frequency(freq, gpu_id); \ + } while (0) + #define show_memtype(type) \ __print_symbolic(type, \ { KGSL_MEM_ENTRY_KERNEL, "gpumem" }, \ @@ -316,7 +322,7 @@ TRACE_EVENT(kgsl_pwrlevel, /* * Tracepoint for kgsl gpu_frequency */ -TRACE_EVENT(gpu_frequency, +TRACE_EVENT(kgsl_gpu_frequency, TP_PROTO(unsigned int gpu_freq, unsigned int gpu_id), TP_ARGS(gpu_freq, gpu_id), TP_STRUCT__entry( From 1e76d5f446b555dffc648b54a5008122956588f5 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Wed, 12 Mar 2025 22:31:23 +0530 Subject: [PATCH 1008/1016] kgsl: gen7: Enable L1 preemption for gen_7_17_0 Enable the L1 preemption GPU functionality for QoS improvement. Change-Id: I7fd25bdd0e4d5d3a9d8d3dbca4302327e3ea381b Signed-off-by: Sanjay Yadav --- adreno-gpulist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 250fedc07d..610683d4e6 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -2648,7 +2648,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_17_0 = { .protected_regs = gen7_0_0_protected_regs, .highest_bank_bit = 15, .gen7_snapshot_block_list = &gen7_17_0_snapshot_block_list, - .ctxt_record_size = 1536 * 1024 + .ctxt_record_size = 1536 * 1024, + .preempt_level = 1, }; static const struct kgsl_regmap_list a663_hwcg_regs[] = { From 452fc8d4cb6bef8acd023eb2bf6ece42ffc63ff3 Mon Sep 17 00:00:00 2001 From: Phani Muralidhar Sistla Date: Sat, 26 Apr 2025 12:00:07 +0530 Subject: [PATCH 1009/1016] kgsl: pwrctrl: Fix GPU frequency and power level traces Currenlty, kgsl power level trace and GPU frequency trace are not printed during slumber exit. This can impact automation scripts and analysis. therefore, move the tracing to adreno_gpu_clock_set function which is invoked for every GPU frequency update. Change-Id: Ib566d06085411f45225ce83b453d9263271e886c Signed-off-by: Kamal Agrawal Signed-off-by: Phani Muralidhar Sistla --- adreno.c | 20 +++++++++++++++----- kgsl_pwrctrl.c | 7 ------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/adreno.c b/adreno.c index dad1b12704..98b5963f81 100644 --- a/adreno.c +++ b/adreno.c @@ -31,6 +31,7 @@ #include "adreno_pm4types.h" #include "adreno_trace.h" #include "kgsl_bus.h" +#include "kgsl_power_trace.h" #include "kgsl_reclaim.h" #include "kgsl_trace.h" #include "kgsl_util.h" @@ -3457,16 +3458,25 @@ static int adreno_gpu_clock_set(struct kgsl_device *device, u32 pwrlevel) const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct kgsl_pwrlevel *pl = &pwr->pwrlevels[pwrlevel]; + u32 prev_pwrlevel = pwr->previous_pwrlevel; int ret; - if (ops->gpu_clock_set) - return ops->gpu_clock_set(adreno_dev, pwrlevel); + if (ops->gpu_clock_set) { + ret = ops->gpu_clock_set(adreno_dev, pwrlevel); + } else { + ret = clk_set_rate(pwr->grp_clks[0], pl->gpu_freq); + if (ret) + dev_err(device->dev, "GPU clk freq set failure: %d\n", ret); + } - ret = clk_set_rate(pwr->grp_clks[0], pl->gpu_freq); if (ret) - dev_err(device->dev, "GPU clk freq set failure: %d\n", ret); + return ret; - return ret; + trace_kgsl_pwrlevel(device, pwrlevel, pl->gpu_freq, + prev_pwrlevel, pwr->pwrlevels[prev_pwrlevel].gpu_freq); + + KGSL_TRACE_GPU_FREQ(pl->gpu_freq/1000, 0); + return 0; } static int adreno_interconnect_bus_set(struct adreno_device *adreno_dev, diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 9ce493684a..f5d4a76c3d 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -245,13 +245,6 @@ void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device, device->ftbl->gpu_clock_set(device, pwr->active_pwrlevel); _isense_clk_set_rate(pwr, pwr->active_pwrlevel); - trace_kgsl_pwrlevel(device, - pwr->active_pwrlevel, pwrlevel->gpu_freq, - pwr->previous_pwrlevel, - pwr->pwrlevels[old_level].gpu_freq); - - KGSL_TRACE_GPU_FREQ(pwrlevel->gpu_freq/1000, 0); - /* Update the bus after GPU clock decreases. */ if (new_level > old_level) kgsl_bus_update(device, KGSL_BUS_VOTE_ON); From 80290c7f04ee58adb427bdf07acaeb44868a34dc Mon Sep 17 00:00:00 2001 From: Piyush Mehta Date: Thu, 3 Apr 2025 10:48:09 +0530 Subject: [PATCH 1010/1016] kgsl: rgmu: corrected pm suspend flag check on resume Resume issue observed during the kgsl API test, such as the resume being invoked without a suspend, this was triggered due to the incorrect bit GMU_PRIV_PM_SUSPEND conditional check. To resolved the above issue, replace the pm flag of rgmu GMU_PRIV_PM_SUSPEND with RGMU_PRIV_PM_SUSPEND. Change-Id: I04b4f829663ff4c6aa737a2256e6e59f04fef9c9 Signed-off-by: Piyush Mehta --- adreno_a6xx_rgmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 86c5cf0ea0..c8c0da3577 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1162,7 +1162,7 @@ static void a6xx_rgmu_pm_resume(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); - if (WARN(!test_bit(GMU_PRIV_PM_SUSPEND, &rgmu->flags), + if (WARN(!test_bit(RGMU_PRIV_PM_SUSPEND, &rgmu->flags), "resume invoked without a suspend\n")) return; From 3907b73114ee8f1b1dbc7f427bbf56bca784706f Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 27 Mar 2025 11:30:35 +0530 Subject: [PATCH 1011/1016] kgsl: Add support for GPU frequency tracing in power system Add support to trace GPU frequency events from the power system. This is necessary for CTS test cases on the latest Android version. Since duplicate events are not allowed, the existing gpu_frequency inside kgsl has been renamed to kgsl_gpu_frequency. Change-Id: Idd627d167e051c2cd2e26214025b0eebb89910cc Signed-off-by: Kamal Agrawal (cherry picked from commit 7e053aed440508f122796581d2f44fea7c197d6f) --- kgsl_power_trace.h | 22 +++++++++++++++++++++- kgsl_pwrctrl.c | 11 ++++++----- kgsl_trace.h | 10 ++++++++-- 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/kgsl_power_trace.h b/kgsl_power_trace.h index 76882b01e0..a1247d985a 100644 --- a/kgsl_power_trace.h +++ b/kgsl_power_trace.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022, 2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #if !defined(_KGSL_POWER_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) @@ -52,6 +52,26 @@ DEFINE_EVENT(gpu_work_period_class, gpu_work_period, TP_ARGS(gpu_id, uid, start_time_ns, end_time_ns, total_active_duration_ns) ); + +/* + * Tracepoint for power gpu_frequency + */ +TRACE_EVENT(gpu_frequency, + TP_PROTO(u32 state, u32 gpu_id), + TP_ARGS(state, gpu_id), + TP_STRUCT__entry( + __field(unsigned int, state) + __field(unsigned int, gpu_id) + ), + TP_fast_assign( + __entry->state = state; + __entry->gpu_id = gpu_id; + ), + + TP_printk("state=%lu gpu_id=%lu", + (unsigned long)__entry->state, + (unsigned long)__entry->gpu_id) +); #endif /* _KGSL_POWER_TRACE_H */ /* This part must be outside protection */ diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 2ac79015ef..9ce493684a 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -19,6 +19,7 @@ #include "kgsl_device.h" #include "kgsl_bus.h" +#include "kgsl_power_trace.h" #include "kgsl_pwrscale.h" #include "kgsl_sysfs.h" #include "kgsl_trace.h" @@ -249,7 +250,7 @@ void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device, pwr->previous_pwrlevel, pwr->pwrlevels[old_level].gpu_freq); - trace_gpu_frequency(pwrlevel->gpu_freq/1000, 0); + KGSL_TRACE_GPU_FREQ(pwrlevel->gpu_freq/1000, 0); /* Update the bus after GPU clock decreases. */ if (new_level > old_level) @@ -2256,7 +2257,7 @@ static int _wake(struct kgsl_device *device) kgsl_pwrctrl_axi(device, true); kgsl_pwrscale_wake(device); kgsl_pwrctrl_irq(device, true); - trace_gpu_frequency( + KGSL_TRACE_GPU_FREQ( pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq/1000, 0); kgsl_bus_update(device, KGSL_BUS_VOTE_ON); @@ -2347,7 +2348,7 @@ _slumber(struct kgsl_device *device) device->ftbl->stop(device); kgsl_pwrctrl_disable(device); kgsl_pwrscale_sleep(device); - trace_gpu_frequency(0, 0); + KGSL_TRACE_GPU_FREQ(0, 0); kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); break; case KGSL_STATE_SUSPEND: @@ -2357,7 +2358,7 @@ _slumber(struct kgsl_device *device) break; case KGSL_STATE_AWARE: kgsl_pwrctrl_disable(device); - trace_gpu_frequency(0, 0); + KGSL_TRACE_GPU_FREQ(0, 0); kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); break; default: diff --git a/kgsl_trace.h b/kgsl_trace.h index 4206219057..3daa4de9c7 100644 --- a/kgsl_trace.h +++ b/kgsl_trace.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2011-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. */ #if !defined(_KGSL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) @@ -20,6 +20,12 @@ #include "kgsl_drawobj.h" #include "kgsl_sharedmem.h" +#define KGSL_TRACE_GPU_FREQ(freq, gpu_id) \ + do { \ + trace_gpu_frequency(freq, gpu_id); \ + trace_kgsl_gpu_frequency(freq, gpu_id); \ + } while (0) + #define show_memtype(type) \ __print_symbolic(type, \ { KGSL_MEM_ENTRY_KERNEL, "gpumem" }, \ @@ -316,7 +322,7 @@ TRACE_EVENT(kgsl_pwrlevel, /* * Tracepoint for kgsl gpu_frequency */ -TRACE_EVENT(gpu_frequency, +TRACE_EVENT(kgsl_gpu_frequency, TP_PROTO(unsigned int gpu_freq, unsigned int gpu_id), TP_ARGS(gpu_freq, gpu_id), TP_STRUCT__entry( From b64e0240eddf135f614b722a6edce8121918dd32 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Wed, 30 Apr 2025 11:30:55 -0700 Subject: [PATCH 1012/1016] kgsl: hwsched: Fix context unregister timeout value Remove the extra conversion to msecs_to_jiffies. Change-Id: I18f563088a488cd9de7d165dcbf37dc19efd3566 Signed-off-by: Harshdeep Dhatt Signed-off-by: Kamal Agrawal --- adreno_hwsched.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 6aee56f051..ae0dc8f049 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include "adreno.h" @@ -2365,8 +2365,7 @@ int adreno_hwsched_ctxt_unregister_wait_completion( start = gpudev->read_alwayson(adreno_dev); mutex_unlock(&device->mutex); - ret = wait_for_completion_timeout(&ack->complete, - msecs_to_jiffies(msecs_to_jiffies(30 * 1000))); + ret = wait_for_completion_timeout(&ack->complete, msecs_to_jiffies(30 * 1000)); mutex_lock(&device->mutex); if (ret) From 3e2c5895c8d5e7591cb3a02744811f2a239a1584 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Fri, 6 Jun 2025 10:19:24 +0530 Subject: [PATCH 1013/1016] kgsl: Fix null pointer dereference during secure memory alloc Currently, when we attempt to allocate shared memory we track the underlying pages in the memdesc with a scatter gather table. Due to this we do not assign a pointer to the memdesc->pages. Under memory pressure, allocation of the sg or sgt may fail which is detected properly. However, our clean up code drops the page references based on memdesc->pages. Since secure memory does not track via memdesc->pages we get a NPD. We'll quickly assign pages before calling _kgsl_free_pages, and modify _kgsl_free_pages to clean up the references/count after. Change-Id: I763634bf00ebef7a57a8d56c21ee8d45cf2b39da Signed-off-by: Lynus Vaz Signed-off-by: Pankaj Gupta --- kgsl_sharedmem.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index 200b50ebb9..305712678a 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include @@ -1190,6 +1190,11 @@ static void _kgsl_free_pages(struct kgsl_memdesc *memdesc) if (memdesc->pages[i]) put_page(memdesc->pages[i]); + memdesc->page_count = 0; + kvfree(memdesc->pages); + + memdesc->pages = NULL; + SHMEM_I(memdesc->shmem_filp->f_mapping->host)->android_vendor_data1 = 0; fput(memdesc->shmem_filp); } @@ -1230,6 +1235,11 @@ static void kgsl_free_page(struct page *p) static void _kgsl_free_pages(struct kgsl_memdesc *memdesc) { kgsl_pool_free_pages(memdesc->pages, memdesc->page_count); + + memdesc->page_count = 0; + kvfree(memdesc->pages); + + memdesc->pages = NULL; } static u32 kgsl_get_page_order(struct page *page) @@ -1384,10 +1394,6 @@ static void kgsl_free_pages(struct kgsl_memdesc *memdesc) _kgsl_free_pages(memdesc); - memdesc->page_count = 0; - kvfree(memdesc->pages); - - memdesc->pages = NULL; } static void kgsl_free_system_pages(struct kgsl_memdesc *memdesc) @@ -1691,16 +1697,16 @@ static int kgsl_alloc_secure_pages(struct kgsl_device *device, sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); if (!sgt) { + memdesc->pages = pages; _kgsl_free_pages(memdesc); - kvfree(pages); return -ENOMEM; } ret = sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL); if (ret) { kfree(sgt); + memdesc->pages = pages; _kgsl_free_pages(memdesc); - kvfree(pages); return ret; } From 683588cd47efc8d0b320fd0fb0e8372bea5aa274 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 22 May 2025 21:43:25 +0530 Subject: [PATCH 1014/1016] kgsl: Allocate secure pages using system memory for SHMEM config SHMEM pages are requested in 4K chunks, which introduces higher allocation latency. This can also result in fragmented memory. Since secure memory is non-reclaimable, allocating it via SHMEM is inefficient. Use system memory directly to reduce allocation latency for secure buffers and improve performance. Change-Id: I956d6800337167fe48e53da7ffa68de79f2467f7 Signed-off-by: Kamal Agrawal --- kgsl_sharedmem.c | 76 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 8 deletions(-) diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index 305712678a..463c7b1be1 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -1114,6 +1114,45 @@ void kgsl_register_shmem_callback(void) register_trace_android_rvh_shmem_get_folio(kgsl_shmem_fill_page, NULL); } +static int kgsl_alloc_secure(int *page_size, struct page **pages, + u32 *align, struct device *dev) +{ + int order = get_order(*page_size); + gfp_t gfp_mask = kgsl_gfp_mask(order); + struct page *page = NULL; + int j, pcount = 0; + size_t size = 0; + + page = alloc_pages(gfp_mask, order); + if (!page) { + /* Retry with lower order pages */ + if (order > 0) { + size = PAGE_SIZE << --order; + goto eagain; + } else + return -ENOMEM; + } + + kgsl_zero_page(page, order, dev); + + for (j = 0; j < (*page_size >> PAGE_SHIFT); j++) { + pages[pcount] = nth_page(page, j); + pcount++; + } + + return pcount; + +eagain: + *page_size = kgsl_get_page_size(size, ilog2(size)); + *align = ilog2(*page_size); + return -EAGAIN; +} + +static void kgsl_free_secure(struct page *p) +{ + __free_pages(p, compound_order(p)); +} + static int kgsl_alloc_page(struct kgsl_memdesc *memdesc, int *page_size, struct page **pages, unsigned int pages_len, unsigned int *align, unsigned int page_off) @@ -1127,6 +1166,9 @@ static int kgsl_alloc_page(struct kgsl_memdesc *memdesc, int *page_size, if (fatal_signal_pending(current)) return -EINTR; + if (!memdesc->shmem_filp) + return kgsl_alloc_secure(page_size, pages, align, memdesc->dev); + page = shmem_read_mapping_page_gfp(memdesc->shmem_filp->f_mapping, page_off, kgsl_gfp_mask(0)); if (IS_ERR(page)) @@ -1149,6 +1191,15 @@ static int kgsl_memdesc_file_setup(struct kgsl_memdesc *memdesc) { int ret; + /* + * SHMEM pages are allocated in 4K chunks, which introduces higher + * allocation latency. Since secure memory is non-reclaimable, + * allocating it via SHMEM is inefficient. Use system memory directly + * to reduce allocation latency for secure buffers. + */ + if (kgsl_memdesc_is_secured(memdesc)) + return 0; + memdesc->shmem_filp = shmem_file_setup("kgsl-3d0", memdesc->size, VM_NORESERVE); if (IS_ERR(memdesc->shmem_filp)) { @@ -1164,18 +1215,24 @@ static int kgsl_memdesc_file_setup(struct kgsl_memdesc *memdesc) return 0; } -static void kgsl_free_page(struct page *p) +static void kgsl_free_page(struct kgsl_memdesc *memdesc, struct page *p) { + if (!memdesc->shmem_filp) + return kgsl_free_secure(p); + put_page(p); } static void kgsl_memdesc_pagelist_cleanup(struct kgsl_memdesc *memdesc) { + if (!memdesc->shmem_filp) + return; + while (!list_empty(&memdesc->shmem_page_list)) { struct page *page = list_first_entry(&memdesc->shmem_page_list, struct page, lru); list_del(&page->lru); - kgsl_free_page(page); + kgsl_free_page(memdesc, page); } } @@ -1183,9 +1240,6 @@ static void _kgsl_free_pages(struct kgsl_memdesc *memdesc) { int i; - WARN(!list_empty(&memdesc->shmem_page_list), - "KGSL shmem page list is not empty\n"); - for (i = 0; i < memdesc->page_count; i++) if (memdesc->pages[i]) put_page(memdesc->pages[i]); @@ -1195,6 +1249,12 @@ static void _kgsl_free_pages(struct kgsl_memdesc *memdesc) memdesc->pages = NULL; + if (!memdesc->shmem_filp) + return; + + WARN(!list_empty(&memdesc->shmem_page_list), + "KGSL shmem page list is not empty\n"); + SHMEM_I(memdesc->shmem_filp->f_mapping->host)->android_vendor_data1 = 0; fput(memdesc->shmem_filp); } @@ -1227,7 +1287,7 @@ static void kgsl_memdesc_pagelist_cleanup(struct kgsl_memdesc *memdesc) { } -static void kgsl_free_page(struct page *p) +static void kgsl_free_page(struct kgsl_memdesc *memdesc, struct page *p) { kgsl_pool_free_page(p); } @@ -1351,7 +1411,7 @@ static int _kgsl_alloc_pages(struct kgsl_memdesc *memdesc, for (i = 0; i < count; ) { int n = 1 << kgsl_get_page_order(local[i]); - kgsl_free_page(local[i]); + kgsl_free_page(memdesc, local[i]); i += n; } kvfree(local); @@ -1470,7 +1530,7 @@ static void kgsl_free_pages_from_sgt(struct kgsl_memdesc *memdesc) while (j < (sg->length/PAGE_SIZE)) { count = 1 << compound_order(p); next = nth_page(p, count); - kgsl_free_page(p); + kgsl_free_page(memdesc, p); p = next; j += count; From 2dd2d2a7743f6f6cf59e08824762553a3015b749 Mon Sep 17 00:00:00 2001 From: Kai Xing Date: Wed, 9 Apr 2025 16:48:26 +0800 Subject: [PATCH 1015/1016] kgsl: build: Add changes to compile graphics-kernel for Sun Add changes to compile graphics-kernel for Sun. Change-Id: I4fccc5c65c6651d7db8bc04ffd195ae6e5ee14d0 Signed-off-by: Kai Xing --- Kbuild | 4 ++++ config/gki_sun.conf | 25 +++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 config/gki_sun.conf diff --git a/Kbuild b/Kbuild index c0b5cf9e90..6acc7dceb5 100644 --- a/Kbuild +++ b/Kbuild @@ -77,6 +77,10 @@ ifeq ($(CONFIG_ARCH_SM6150), y) include $(KGSL_PATH)/config/gki_sm6150.conf subdir-ccflags-y += $(LE_EXTRA_CFLAGS) endif +ifeq ($(CONFIG_ARCH_SUN), y) + include $(KGSL_PATH)/config/gki_sun.conf + subdir-ccflags-y += $(LE_EXTRA_CFLAGS) +endif ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq diff --git a/config/gki_sun.conf b/config/gki_sun.conf new file mode 100644 index 0000000000..69af82f495 --- /dev/null +++ b/config/gki_sun.conf @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: GPL-2.0-only + +CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 +CONFIG_QCOM_KGSL_SORT_POOL = y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y +CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT = y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" +CONFIG_QCOM_KGSL_SYNX = y + +ifneq ($(CONFIG_SHMEM),) + CONFIG_QCOM_KGSL_USE_SHMEM = y + CONFIG_QCOM_KGSL_PROCESS_RECLAIM = y +endif + +ccflags-y += -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ + -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ + -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ + -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=1 \ + -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" \ + -DCONFIG_QCOM_KGSL_SYNX=1 # This is needed to enable the hw-fence driver APIs in kernel headers + +ifneq ($(CONFIG_SHMEM),) + ccflags-y += -DCONFIG_QCOM_KGSL_USE_SHMEM=1 \ + -DCONFIG_QCOM_KGSL_PROCESS_RECLAIM=1 +endif From c7312f9dcd6bc25db3f82b1ff616f99afbc4bf9f Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Tue, 10 Dec 2024 13:10:55 -0800 Subject: [PATCH 1016/1016] kgsl: gen8: Use the correct slice id for unsliced snapshot sections The snapshot section headers use a slice id of UINT_MAX instead of 0. Correct places where a slice id of 0 was used. Introduce a macro to generate the slice id to be used in the section header to simplify the code. Change-Id: If5038fb9b8152202f95ba50c855641becd76ff1d Signed-off-by: Lynus Vaz Signed-off-by: Urvashi Agrawal --- adreno_gen8_snapshot.c | 20 ++++++++++---------- adreno_gen8_snapshot.h | 10 +++++++++- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/adreno_gen8_snapshot.c b/adreno_gen8_snapshot.c index 1c7ac63400..b98a547ee2 100644 --- a/adreno_gen8_snapshot.c +++ b/adreno_gen8_snapshot.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include "adreno.h" @@ -247,7 +247,7 @@ size_t gen8_legacy_snapshot_registers(struct kgsl_device *device, header->location_id = UINT_MAX; header->sp_id = UINT_MAX; header->usptp_id = UINT_MAX; - header->slice_id = info->regs->slice_region ? info->slice_id : UINT_MAX; + header->slice_id = HEADER_SLICE_ID(info->regs->slice_region, info->slice_id); if (info->regs->sel) kgsl_regwrite(device, info->regs->sel->host_reg, info->regs->sel->val); @@ -298,7 +298,7 @@ static size_t gen8_snapshot_registers(struct kgsl_device *device, u8 *buf, header->location_id = UINT_MAX; header->sp_id = UINT_MAX; header->usptp_id = UINT_MAX; - header->slice_id = info->regs->slice_region ? info->slice_id : UINT_MAX; + header->slice_id = HEADER_SLICE_ID(info->regs->slice_region, info->slice_id); src = gen8_crashdump_registers->hostptr + info->offset; @@ -336,7 +336,7 @@ static size_t gen8_legacy_snapshot_shader(struct kgsl_device *device, } header->type = block->statetype; - header->slice_id = block->slice_region ? info->slice_id : UINT_MAX; + header->slice_id = HEADER_SLICE_ID(block->slice_region, info->slice_id); header->sp_index = info->sp_id; header->usptp = info->usptp; header->pipe_id = block->pipeid; @@ -376,7 +376,7 @@ static size_t gen8_snapshot_shader_memory(struct kgsl_device *device, } header->type = block->statetype; - header->slice_id = block->slice_region ? info->slice_id : UINT_MAX; + header->slice_id = HEADER_SLICE_ID(block->slice_region, info->slice_id); header->sp_index = info->sp_id; header->usptp = info->usptp; header->pipe_id = block->pipeid; @@ -773,7 +773,7 @@ static void gen8_snapshot_mempool(struct kgsl_device *device, kgsl_snapshot_indexed_registers_v2(device, snapshot, cp_indexed_reg->addr, cp_indexed_reg->data, 0, cp_indexed_reg->size, cp_indexed_reg->pipe_id, - SLICE_ID(cp_indexed_reg->slice_region, j)); + HEADER_SLICE_ID(cp_indexed_reg->slice_region, j)); /* Reset CP_CHICKEN_DBG[StabilizeMVC] once we are done */ gen8_rmw_aperture(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x0, @@ -820,7 +820,7 @@ static size_t gen8_legacy_snapshot_cluster_dbgahb(struct kgsl_device *device, header->location_id = info->location_id; header->sp_id = info->sp_id; header->usptp_id = info->usptp_id; - header->slice_id = info->cluster->slice_region ? info->slice_id : UINT_MAX; + header->slice_id = HEADER_SLICE_ID(info->cluster->slice_region, info->slice_id); read_sel = GEN8_SP_READ_SEL_VAL(info->slice_id, info->location_id, info->pipe_id, info->statetype_id, info->usptp_id, info->sp_id); @@ -872,7 +872,7 @@ static size_t gen8_snapshot_cluster_dbgahb(struct kgsl_device *device, u8 *buf, header->location_id = info->location_id; header->sp_id = info->sp_id; header->usptp_id = info->usptp_id; - header->slice_id = info->cluster->slice_region ? info->slice_id : UINT_MAX; + header->slice_id = HEADER_SLICE_ID(info->cluster->slice_region, info->slice_id); src = gen8_crashdump_registers->hostptr + info->offset; @@ -1009,7 +1009,7 @@ static size_t gen8_legacy_snapshot_mvc(struct kgsl_device *device, u8 *buf, header->location_id = UINT_MAX; header->sp_id = UINT_MAX; header->usptp_id = UINT_MAX; - header->slice_id = info->cluster->slice_region ? info->slice_id : UINT_MAX; + header->slice_id = HEADER_SLICE_ID(info->cluster->slice_region, info->slice_id); /* * Set the AHB control for the Host to read from the @@ -1064,7 +1064,7 @@ static size_t gen8_snapshot_mvc(struct kgsl_device *device, u8 *buf, header->location_id = UINT_MAX; header->sp_id = UINT_MAX; header->usptp_id = UINT_MAX; - header->slice_id = info->cluster->slice_region ? info->slice_id : UINT_MAX; + header->slice_id = HEADER_SLICE_ID(info->cluster->slice_region, info->slice_id); src = gen8_crashdump_registers->hostptr + info->offset; diff --git a/adreno_gen8_snapshot.h b/adreno_gen8_snapshot.h index 36cea69298..ebc41455dd 100644 --- a/adreno_gen8_snapshot.h +++ b/adreno_gen8_snapshot.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #ifndef __ADRENO_GEN8_SNAPSHOT_H #define __ADRENO_GEN8_SNAPSHOT_H @@ -42,8 +42,16 @@ enum location_id { #define NUMBER_OF_SLICES(region, adreno_dev) \ ((region == SLICE) ? gen8_get_num_slices(adreno_dev) : 1) + +/* Use SLICE_ID to access the region (0 for unsliced) */ #define SLICE_ID(region, j) ((region == SLICE) ? j : 0) +/* + * Use HEADER_SLICE_ID to specify the slice in the section header (UINT_MAX for unsliced). + * This allows snapshot parsers to differentiate between slice ID 0 and unsliced regions. + */ +#define HEADER_SLICE_ID(region, j) ((region == SLICE) ? j : UINT_MAX) + #define GEN8_DEBUGBUS_BLOCK_SIZE 0x100 struct sel_reg {