diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2017-12-31 07:12:27 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2017-12-31 07:12:27 +0000 |
commit | 051645c92924bf915d82bf219f2ed67309b5577a (patch) | |
tree | 4aae126dd8e5a18c6a9926a5468d1561e6038a07 /lib/mesa/src/gallium/drivers/freedreno | |
parent | 2dae6fe6f74cf7fb9fd65285302c0331d9786b00 (diff) |
Merge Mesa 17.2.8
Diffstat (limited to 'lib/mesa/src/gallium/drivers/freedreno')
81 files changed, 13992 insertions, 1104 deletions
diff --git a/lib/mesa/src/gallium/drivers/freedreno/Makefile.am b/lib/mesa/src/gallium/drivers/freedreno/Makefile.am index ffb4db182..128c7fb59 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/Makefile.am +++ b/lib/mesa/src/gallium/drivers/freedreno/Makefile.am @@ -21,6 +21,7 @@ libfreedreno_la_SOURCES = \ $(a2xx_SOURCES) \ $(a3xx_SOURCES) \ $(a4xx_SOURCES) \ + $(a5xx_SOURCES) \ $(ir3_SOURCES) \ $(ir3_GENERATED_FILES) diff --git a/lib/mesa/src/gallium/drivers/freedreno/Makefile.in b/lib/mesa/src/gallium/drivers/freedreno/Makefile.in index 70ee05953..df6b5ebb8 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/Makefile.in +++ b/lib/mesa/src/gallium/drivers/freedreno/Makefile.in @@ -55,10 +55,13 @@ target_triplet = @target@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \ $(srcdir)/Makefile.sources $(top_srcdir)/bin/depcomp \ $(top_srcdir)/src/gallium/Automake.inc -@HAVE_DRISW_TRUE@am__append_1 = \ +@HAVE_LIBDRM_TRUE@am__append_1 = \ +@HAVE_LIBDRM_TRUE@ $(LIBDRM_LIBS) + +@HAVE_DRISW_TRUE@am__append_2 = \ @HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la -@HAVE_DRISW_KMS_TRUE@am__append_2 = \ +@HAVE_DRISW_KMS_TRUE@am__append_3 = \ @HAVE_DRISW_KMS_TRUE@ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \ @HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS) @@ -85,9 +88,10 @@ libfreedreno_la_LIBADD = am__objects_1 = freedreno_batch.lo freedreno_batch_cache.lo \ freedreno_context.lo freedreno_draw.lo freedreno_fence.lo \ freedreno_gmem.lo freedreno_program.lo freedreno_query.lo \ - freedreno_query_hw.lo freedreno_query_sw.lo \ - freedreno_resource.lo freedreno_screen.lo freedreno_state.lo \ - freedreno_surface.lo freedreno_texture.lo freedreno_util.lo + freedreno_query_acc.lo freedreno_query_hw.lo \ + freedreno_query_sw.lo freedreno_resource.lo \ + freedreno_screen.lo freedreno_state.lo freedreno_surface.lo \ + freedreno_texture.lo freedreno_util.lo am__dirstamp = $(am__leading_dot)dirstamp am__objects_2 = a2xx/disasm-a2xx.lo a2xx/fd2_blend.lo \ a2xx/fd2_compiler.lo a2xx/fd2_context.lo a2xx/fd2_draw.lo \ @@ -102,15 +106,20 @@ am__objects_4 = a4xx/fd4_blend.lo a4xx/fd4_context.lo a4xx/fd4_draw.lo \ a4xx/fd4_emit.lo a4xx/fd4_format.lo a4xx/fd4_gmem.lo \ a4xx/fd4_program.lo a4xx/fd4_query.lo a4xx/fd4_rasterizer.lo \ a4xx/fd4_screen.lo a4xx/fd4_texture.lo a4xx/fd4_zsa.lo -am__objects_5 = ir3/disasm-a3xx.lo ir3/ir3.lo ir3/ir3_compiler_nir.lo \ +am__objects_5 = a5xx/fd5_blend.lo a5xx/fd5_compute.lo \ + a5xx/fd5_context.lo a5xx/fd5_draw.lo a5xx/fd5_emit.lo \ + a5xx/fd5_format.lo a5xx/fd5_gmem.lo a5xx/fd5_program.lo \ + a5xx/fd5_query.lo a5xx/fd5_rasterizer.lo a5xx/fd5_screen.lo \ + a5xx/fd5_texture.lo a5xx/fd5_zsa.lo +am__objects_6 = ir3/disasm-a3xx.lo ir3/ir3.lo ir3/ir3_compiler_nir.lo \ ir3/ir3_compiler.lo ir3/ir3_cp.lo ir3/ir3_depth.lo \ ir3/ir3_group.lo ir3/ir3_legalize.lo ir3/ir3_nir.lo \ ir3/ir3_nir_lower_if_else.lo ir3/ir3_print.lo ir3/ir3_ra.lo \ ir3/ir3_sched.lo ir3/ir3_shader.lo -am__objects_6 = ir3/ir3_nir_trig.lo +am__objects_7 = ir3/ir3_nir_trig.lo am_libfreedreno_la_OBJECTS = $(am__objects_1) $(am__objects_2) \ $(am__objects_3) $(am__objects_4) $(am__objects_5) \ - $(am__objects_6) + $(am__objects_6) $(am__objects_7) libfreedreno_la_OBJECTS = $(am_libfreedreno_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) @@ -120,15 +129,17 @@ PROGRAMS = $(noinst_PROGRAMS) am_ir3_compiler_OBJECTS = ir3/ir3_cmdline.$(OBJEXT) ir3_compiler_OBJECTS = $(am_ir3_compiler_OBJECTS) am__DEPENDENCIES_1 = -am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ - $(am__DEPENDENCIES_1) +@HAVE_LIBDRM_TRUE@am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) +am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_2) ir3_compiler_DEPENDENCIES = libfreedreno.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ $(top_builddir)/src/compiler/nir/libnir.la \ $(top_builddir)/src/compiler/glsl/libstandalone.la \ $(top_builddir)/src/util/libmesautil.la \ $(top_builddir)/src/mesa/libmesagallium.la \ - $(am__DEPENDENCIES_2) $(am__DEPENDENCIES_1) + $(am__DEPENDENCIES_3) $(am__DEPENDENCIES_1) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false @@ -197,6 +208,8 @@ AMDGPU_CFLAGS = @AMDGPU_CFLAGS@ AMDGPU_LIBS = @AMDGPU_LIBS@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +ANDROID_CFLAGS = @ANDROID_CFLAGS@ +ANDROID_LIBS = @ANDROID_LIBS@ AR = @AR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ @@ -227,8 +240,6 @@ DLLTOOL = @DLLTOOL@ DLOPEN_LIBS = @DLOPEN_LIBS@ DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@ DRI2PROTO_LIBS = @DRI2PROTO_LIBS@ -DRI3PROTO_CFLAGS = @DRI3PROTO_CFLAGS@ -DRI3PROTO_LIBS = @DRI3PROTO_LIBS@ DRIGL_CFLAGS = @DRIGL_CFLAGS@ DRIGL_LIBS = @DRIGL_LIBS@ DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@ @@ -241,10 +252,11 @@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGL_CFLAGS = @EGL_CFLAGS@ -EGL_CLIENT_APIS = @EGL_CLIENT_APIS@ EGL_LIB_DEPS = @EGL_LIB_DEPS@ EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@ EGREP = @EGREP@ +ETNAVIV_CFLAGS = @ETNAVIV_CFLAGS@ +ETNAVIV_LIBS = @ETNAVIV_LIBS@ EXEEXT = @EXEEXT@ EXPAT_CFLAGS = @EXPAT_CFLAGS@ EXPAT_LIBS = @EXPAT_LIBS@ @@ -271,6 +283,8 @@ GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@ GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@ GREP = @GREP@ HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@ +I915_CFLAGS = @I915_CFLAGS@ +I915_LIBS = @I915_LIBS@ INDENT = @INDENT@ INDENT_FLAGS = @INDENT_FLAGS@ INSTALL = @INSTALL@ @@ -278,45 +292,40 @@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ -INTEL_CFLAGS = @INTEL_CFLAGS@ -INTEL_LIBS = @INTEL_LIBS@ LD = @LD@ LDFLAGS = @LDFLAGS@ LD_NO_UNDEFINED = @LD_NO_UNDEFINED@ LEX = @LEX@ LEXLIB = @LEXLIB@ LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ +LIBATOMIC_LIBS = @LIBATOMIC_LIBS@ LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@ LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@ LIBDRM_CFLAGS = @LIBDRM_CFLAGS@ LIBDRM_LIBS = @LIBDRM_LIBS@ LIBELF_CFLAGS = @LIBELF_CFLAGS@ LIBELF_LIBS = @LIBELF_LIBS@ +LIBGLVND_DATADIR = @LIBGLVND_DATADIR@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ -LIBSENSORS_LDFLAGS = @LIBSENSORS_LDFLAGS@ -LIBSHA1_CFLAGS = @LIBSHA1_CFLAGS@ -LIBSHA1_LIBS = @LIBSHA1_LIBS@ +LIBSENSORS_LIBS = @LIBSENSORS_LIBS@ LIBTOOL = @LIBTOOL@ +LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@ +LIBUNWIND_LIBS = @LIBUNWIND_LIBS@ LIB_DIR = @LIB_DIR@ LIB_EXT = @LIB_EXT@ LIPO = @LIPO@ -LLVM_BINDIR = @LLVM_BINDIR@ LLVM_CFLAGS = @LLVM_CFLAGS@ LLVM_CONFIG = @LLVM_CONFIG@ -LLVM_CPPFLAGS = @LLVM_CPPFLAGS@ LLVM_CXXFLAGS = @LLVM_CXXFLAGS@ LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@ LLVM_LDFLAGS = @LLVM_LDFLAGS@ -LLVM_LIBDIR = @LLVM_LIBDIR@ LLVM_LIBS = @LLVM_LIBS@ -LLVM_VERSION = @LLVM_VERSION@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ -MESA_LLVM = @MESA_LLVM@ MKDIR_P = @MKDIR_P@ MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@ MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@ @@ -337,8 +346,6 @@ OMX_LIBS = @OMX_LIBS@ OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@ OPENCL_LIBNAME = @OPENCL_LIBNAME@ OPENCL_VERSION = @OPENCL_VERSION@ -OPENSSL_CFLAGS = @OPENSSL_CFLAGS@ -OPENSSL_LIBS = @OPENSSL_LIBS@ OSMESA_LIB = @OSMESA_LIB@ OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@ OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@ @@ -358,8 +365,6 @@ PKG_CONFIG = @PKG_CONFIG@ PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ POSIX_SHELL = @POSIX_SHELL@ -PRESENTPROTO_CFLAGS = @PRESENTPROTO_CFLAGS@ -PRESENTPROTO_LIBS = @PRESENTPROTO_LIBS@ PTHREADSTUBS_CFLAGS = @PTHREADSTUBS_CFLAGS@ PTHREADSTUBS_LIBS = @PTHREADSTUBS_LIBS@ PTHREAD_CC = @PTHREAD_CC@ @@ -375,8 +380,6 @@ SED = @SED@ SELINUX_CFLAGS = @SELINUX_CFLAGS@ SELINUX_LIBS = @SELINUX_LIBS@ SET_MAKE = @SET_MAKE@ -SHA1_CFLAGS = @SHA1_CFLAGS@ -SHA1_LIBS = @SHA1_LIBS@ SHELL = @SHELL@ SIMPENROSE_CFLAGS = @SIMPENROSE_CFLAGS@ SIMPENROSE_LIBS = @SIMPENROSE_LIBS@ @@ -385,7 +388,8 @@ STRIP = @STRIP@ SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@ SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@ SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@ -TIMESTAMP_CMD = @TIMESTAMP_CMD@ +SWR_KNL_CXXFLAGS = @SWR_KNL_CXXFLAGS@ +SWR_SKX_CXXFLAGS = @SWR_SKX_CXXFLAGS@ VALGRIND_CFLAGS = @VALGRIND_CFLAGS@ VALGRIND_LIBS = @VALGRIND_LIBS@ VA_CFLAGS = @VA_CFLAGS@ @@ -393,15 +397,12 @@ VA_LIBS = @VA_LIBS@ VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@ VA_MAJOR = @VA_MAJOR@ VA_MINOR = @VA_MINOR@ -VC4_CFLAGS = @VC4_CFLAGS@ -VC4_LIBS = @VC4_LIBS@ VDPAU_CFLAGS = @VDPAU_CFLAGS@ VDPAU_LIBS = @VDPAU_LIBS@ VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@ VDPAU_MAJOR = @VDPAU_MAJOR@ VDPAU_MINOR = @VDPAU_MINOR@ VERSION = @VERSION@ -VG_LIB_DEPS = @VG_LIB_DEPS@ VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ VL_CFLAGS = @VL_CFLAGS@ @@ -409,6 +410,7 @@ VL_LIBS = @VL_LIBS@ VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@ WAYLAND_CFLAGS = @WAYLAND_CFLAGS@ WAYLAND_LIBS = @WAYLAND_LIBS@ +WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@ WAYLAND_SCANNER = @WAYLAND_SCANNER@ WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@ WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@ @@ -430,9 +432,10 @@ XVMC_LIBS = @XVMC_LIBS@ XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@ XVMC_MAJOR = @XVMC_MAJOR@ XVMC_MINOR = @XVMC_MINOR@ -XXD = @XXD@ YACC = @YACC@ YFLAGS = @YFLAGS@ +ZLIB_CFLAGS = @ZLIB_CFLAGS@ +ZLIB_LIBS = @ZLIB_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ @@ -513,6 +516,8 @@ C_SOURCES := \ freedreno_program.h \ freedreno_query.c \ freedreno_query.h \ + freedreno_query_acc.c \ + freedreno_query_acc.h \ freedreno_query_hw.c \ freedreno_query_hw.h \ freedreno_query_sw.c \ @@ -615,6 +620,35 @@ a4xx_SOURCES := \ a4xx/fd4_zsa.c \ a4xx/fd4_zsa.h +a5xx_SOURCES := \ + a5xx/a5xx.xml.h \ + a5xx/fd5_blend.c \ + a5xx/fd5_blend.h \ + a5xx/fd5_compute.c \ + a5xx/fd5_compute.h \ + a5xx/fd5_context.c \ + a5xx/fd5_context.h \ + a5xx/fd5_draw.c \ + a5xx/fd5_draw.h \ + a5xx/fd5_emit.c \ + a5xx/fd5_emit.h \ + a5xx/fd5_format.c \ + a5xx/fd5_format.h \ + a5xx/fd5_gmem.c \ + a5xx/fd5_gmem.h \ + a5xx/fd5_program.c \ + a5xx/fd5_program.h \ + a5xx/fd5_query.c \ + a5xx/fd5_query.h \ + a5xx/fd5_rasterizer.c \ + a5xx/fd5_rasterizer.h \ + a5xx/fd5_screen.c \ + a5xx/fd5_screen.h \ + a5xx/fd5_texture.c \ + a5xx/fd5_texture.h \ + a5xx/fd5_zsa.c \ + a5xx/fd5_zsa.h + ir3_SOURCES := \ ir3/disasm-a3xx.c \ ir3/instr-a3xx.h \ @@ -686,12 +720,8 @@ GALLIUM_TARGET_CFLAGS = \ $(LIBDRM_CFLAGS) \ $(VISIBILITY_CFLAGS) -GALLIUM_COMMON_LIB_DEPS = \ - -lm \ - $(CLOCK_LIB) \ - $(PTHREAD_LIBS) \ - $(DLOPEN_LIBS) - +GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \ + $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1) GALLIUM_WINSYS_CFLAGS = \ -I$(top_srcdir)/src \ -I$(top_srcdir)/include \ @@ -703,7 +733,7 @@ GALLIUM_WINSYS_CFLAGS = \ GALLIUM_PIPE_LOADER_WINSYS_LIBS = \ $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \ $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \ - $(am__append_1) $(am__append_2) + $(am__append_2) $(am__append_3) AM_CFLAGS = \ -Wno-packed-bitfield-compat \ -I$(top_srcdir)/src/gallium/drivers/freedreno/ir3 \ @@ -719,6 +749,7 @@ libfreedreno_la_SOURCES = \ $(a2xx_SOURCES) \ $(a3xx_SOURCES) \ $(a4xx_SOURCES) \ + $(a5xx_SOURCES) \ $(ir3_SOURCES) \ $(ir3_GENERATED_FILES) @@ -863,6 +894,32 @@ a4xx/fd4_screen.lo: a4xx/$(am__dirstamp) \ a4xx/fd4_texture.lo: a4xx/$(am__dirstamp) \ a4xx/$(DEPDIR)/$(am__dirstamp) a4xx/fd4_zsa.lo: a4xx/$(am__dirstamp) a4xx/$(DEPDIR)/$(am__dirstamp) +a5xx/$(am__dirstamp): + @$(MKDIR_P) a5xx + @: > a5xx/$(am__dirstamp) +a5xx/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) a5xx/$(DEPDIR) + @: > a5xx/$(DEPDIR)/$(am__dirstamp) +a5xx/fd5_blend.lo: a5xx/$(am__dirstamp) a5xx/$(DEPDIR)/$(am__dirstamp) +a5xx/fd5_compute.lo: a5xx/$(am__dirstamp) \ + a5xx/$(DEPDIR)/$(am__dirstamp) +a5xx/fd5_context.lo: a5xx/$(am__dirstamp) \ + a5xx/$(DEPDIR)/$(am__dirstamp) +a5xx/fd5_draw.lo: a5xx/$(am__dirstamp) a5xx/$(DEPDIR)/$(am__dirstamp) +a5xx/fd5_emit.lo: a5xx/$(am__dirstamp) a5xx/$(DEPDIR)/$(am__dirstamp) +a5xx/fd5_format.lo: a5xx/$(am__dirstamp) \ + a5xx/$(DEPDIR)/$(am__dirstamp) +a5xx/fd5_gmem.lo: a5xx/$(am__dirstamp) a5xx/$(DEPDIR)/$(am__dirstamp) +a5xx/fd5_program.lo: a5xx/$(am__dirstamp) \ + a5xx/$(DEPDIR)/$(am__dirstamp) +a5xx/fd5_query.lo: a5xx/$(am__dirstamp) a5xx/$(DEPDIR)/$(am__dirstamp) +a5xx/fd5_rasterizer.lo: a5xx/$(am__dirstamp) \ + a5xx/$(DEPDIR)/$(am__dirstamp) +a5xx/fd5_screen.lo: a5xx/$(am__dirstamp) \ + a5xx/$(DEPDIR)/$(am__dirstamp) +a5xx/fd5_texture.lo: a5xx/$(am__dirstamp) \ + a5xx/$(DEPDIR)/$(am__dirstamp) +a5xx/fd5_zsa.lo: a5xx/$(am__dirstamp) a5xx/$(DEPDIR)/$(am__dirstamp) ir3/$(am__dirstamp): @$(MKDIR_P) ir3 @: > ir3/$(am__dirstamp) @@ -911,6 +968,8 @@ mostlyclean-compile: -rm -f a3xx/*.lo -rm -f a4xx/*.$(OBJEXT) -rm -f a4xx/*.lo + -rm -f a5xx/*.$(OBJEXT) + -rm -f a5xx/*.lo -rm -f ir3/*.$(OBJEXT) -rm -f ir3/*.lo @@ -926,6 +985,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_gmem.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_program.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_query.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_query_acc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_query_hw.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_query_sw.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_resource.Plo@am__quote@ @@ -972,6 +1032,19 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_screen.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_texture.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_zsa.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@a5xx/$(DEPDIR)/fd5_blend.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@a5xx/$(DEPDIR)/fd5_compute.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@a5xx/$(DEPDIR)/fd5_context.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@a5xx/$(DEPDIR)/fd5_draw.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@a5xx/$(DEPDIR)/fd5_emit.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@a5xx/$(DEPDIR)/fd5_format.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@a5xx/$(DEPDIR)/fd5_gmem.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@a5xx/$(DEPDIR)/fd5_program.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@a5xx/$(DEPDIR)/fd5_query.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@a5xx/$(DEPDIR)/fd5_rasterizer.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@a5xx/$(DEPDIR)/fd5_screen.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@a5xx/$(DEPDIR)/fd5_texture.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@a5xx/$(DEPDIR)/fd5_zsa.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/disasm-a3xx.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3_cmdline.Po@am__quote@ @@ -1045,6 +1118,7 @@ clean-libtool: -rm -rf a2xx/.libs a2xx/_libs -rm -rf a3xx/.libs a3xx/_libs -rm -rf a4xx/.libs a4xx/_libs + -rm -rf a5xx/.libs a5xx/_libs -rm -rf ir3/.libs ir3/_libs ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) @@ -1182,6 +1256,8 @@ distclean-generic: -rm -f a3xx/$(am__dirstamp) -rm -f a4xx/$(DEPDIR)/$(am__dirstamp) -rm -f a4xx/$(am__dirstamp) + -rm -f a5xx/$(DEPDIR)/$(am__dirstamp) + -rm -f a5xx/$(am__dirstamp) -rm -f ir3/$(DEPDIR)/$(am__dirstamp) -rm -f ir3/$(am__dirstamp) @@ -1195,7 +1271,7 @@ clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ clean-noinstPROGRAMS mostlyclean-am distclean: distclean-am - -rm -rf ./$(DEPDIR) a2xx/$(DEPDIR) a3xx/$(DEPDIR) a4xx/$(DEPDIR) ir3/$(DEPDIR) + -rm -rf ./$(DEPDIR) a2xx/$(DEPDIR) a3xx/$(DEPDIR) a4xx/$(DEPDIR) a5xx/$(DEPDIR) ir3/$(DEPDIR) -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags @@ -1241,7 +1317,7 @@ install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am - -rm -rf ./$(DEPDIR) a2xx/$(DEPDIR) a3xx/$(DEPDIR) a4xx/$(DEPDIR) ir3/$(DEPDIR) + -rm -rf ./$(DEPDIR) a2xx/$(DEPDIR) a3xx/$(DEPDIR) a4xx/$(DEPDIR) a5xx/$(DEPDIR) ir3/$(DEPDIR) -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic diff --git a/lib/mesa/src/gallium/drivers/freedreno/Makefile.sources b/lib/mesa/src/gallium/drivers/freedreno/Makefile.sources index 92d918659..db716f39d 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/Makefile.sources +++ b/lib/mesa/src/gallium/drivers/freedreno/Makefile.sources @@ -18,6 +18,8 @@ C_SOURCES := \ freedreno_program.h \ freedreno_query.c \ freedreno_query.h \ + freedreno_query_acc.c \ + freedreno_query_acc.h \ freedreno_query_hw.c \ freedreno_query_hw.h \ freedreno_query_sw.c \ @@ -120,6 +122,35 @@ a4xx_SOURCES := \ a4xx/fd4_zsa.c \ a4xx/fd4_zsa.h +a5xx_SOURCES := \ + a5xx/a5xx.xml.h \ + a5xx/fd5_blend.c \ + a5xx/fd5_blend.h \ + a5xx/fd5_compute.c \ + a5xx/fd5_compute.h \ + a5xx/fd5_context.c \ + a5xx/fd5_context.h \ + a5xx/fd5_draw.c \ + a5xx/fd5_draw.h \ + a5xx/fd5_emit.c \ + a5xx/fd5_emit.h \ + a5xx/fd5_format.c \ + a5xx/fd5_format.h \ + a5xx/fd5_gmem.c \ + a5xx/fd5_gmem.h \ + a5xx/fd5_program.c \ + a5xx/fd5_program.h \ + a5xx/fd5_query.c \ + a5xx/fd5_query.h \ + a5xx/fd5_rasterizer.c \ + a5xx/fd5_rasterizer.h \ + a5xx/fd5_screen.c \ + a5xx/fd5_screen.h \ + a5xx/fd5_texture.c \ + a5xx/fd5_texture.h \ + a5xx/fd5_zsa.c \ + a5xx/fd5_zsa.h + ir3_SOURCES := \ ir3/disasm-a3xx.c \ ir3/instr-a3xx.h \ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/lib/mesa/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h index 16c2bd39d..0811bdc13 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h +++ b/lib/mesa/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h @@ -8,16 +8,17 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16185 bytes, from 2016-03-05 03:08:05) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110685 bytes, from 2016-04-25 17:56:43) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) - -Copyright (C) 2013-2015 by the following authors: +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-02 15:50:23) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-05-30 19:25:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 142603 bytes, from 2017-06-06 17:02:32) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) + +Copyright (C) 2013-2017 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) @@ -206,12 +207,12 @@ enum a2xx_rb_copy_sample_select { }; enum a2xx_rb_blend_opcode { - BLEND_DST_PLUS_SRC = 0, - BLEND_SRC_MINUS_DST = 1, - BLEND_MIN_DST_SRC = 2, - BLEND_MAX_DST_SRC = 3, - BLEND_DST_MINUS_SRC = 4, - BLEND_DST_PLUS_SRC_BIAS = 5, + BLEND2_DST_PLUS_SRC = 0, + BLEND2_SRC_MINUS_DST = 1, + BLEND2_MIN_DST_SRC = 2, + BLEND2_MAX_DST_SRC = 3, + BLEND2_DST_MINUS_SRC = 4, + BLEND2_DST_PLUS_SRC_BIAS = 5, }; enum adreno_mmu_clnt_beh { @@ -351,6 +352,38 @@ static inline uint32_t A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR(enum adreno_mmu_cln #define REG_A2XX_RBBM_DEBUG 0x0000039b #define REG_A2XX_RBBM_PM_OVERRIDE1 0x0000039c +#define A2XX_RBBM_PM_OVERRIDE1_RBBM_AHBCLK_PM_OVERRIDE 0x00000001 +#define A2XX_RBBM_PM_OVERRIDE1_SC_REG_SCLK_PM_OVERRIDE 0x00000002 +#define A2XX_RBBM_PM_OVERRIDE1_SC_SCLK_PM_OVERRIDE 0x00000004 +#define A2XX_RBBM_PM_OVERRIDE1_SP_TOP_SCLK_PM_OVERRIDE 0x00000008 +#define A2XX_RBBM_PM_OVERRIDE1_SP_V0_SCLK_PM_OVERRIDE 0x00000010 +#define A2XX_RBBM_PM_OVERRIDE1_SQ_REG_SCLK_PM_OVERRIDE 0x00000020 +#define A2XX_RBBM_PM_OVERRIDE1_SQ_REG_FIFOS_SCLK_PM_OVERRIDE 0x00000040 +#define A2XX_RBBM_PM_OVERRIDE1_SQ_CONST_MEM_SCLK_PM_OVERRIDE 0x00000080 +#define A2XX_RBBM_PM_OVERRIDE1_SQ_SQ_SCLK_PM_OVERRIDE 0x00000100 +#define A2XX_RBBM_PM_OVERRIDE1_SX_SCLK_PM_OVERRIDE 0x00000200 +#define A2XX_RBBM_PM_OVERRIDE1_SX_REG_SCLK_PM_OVERRIDE 0x00000400 +#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCO_SCLK_PM_OVERRIDE 0x00000800 +#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCM_SCLK_PM_OVERRIDE 0x00001000 +#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCD_SCLK_PM_OVERRIDE 0x00002000 +#define A2XX_RBBM_PM_OVERRIDE1_TCM_REG_SCLK_PM_OVERRIDE 0x00004000 +#define A2XX_RBBM_PM_OVERRIDE1_TPC_TPC_SCLK_PM_OVERRIDE 0x00008000 +#define A2XX_RBBM_PM_OVERRIDE1_TPC_REG_SCLK_PM_OVERRIDE 0x00010000 +#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCA_SCLK_PM_OVERRIDE 0x00020000 +#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCB_SCLK_PM_OVERRIDE 0x00040000 +#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCB_READ_SCLK_PM_OVERRIDE 0x00080000 +#define A2XX_RBBM_PM_OVERRIDE1_TP_TP_SCLK_PM_OVERRIDE 0x00100000 +#define A2XX_RBBM_PM_OVERRIDE1_TP_REG_SCLK_PM_OVERRIDE 0x00200000 +#define A2XX_RBBM_PM_OVERRIDE1_CP_G_SCLK_PM_OVERRIDE 0x00400000 +#define A2XX_RBBM_PM_OVERRIDE1_CP_REG_SCLK_PM_OVERRIDE 0x00800000 +#define A2XX_RBBM_PM_OVERRIDE1_CP_G_REG_SCLK_PM_OVERRIDE 0x01000000 +#define A2XX_RBBM_PM_OVERRIDE1_SPI_SCLK_PM_OVERRIDE 0x02000000 +#define A2XX_RBBM_PM_OVERRIDE1_RB_REG_SCLK_PM_OVERRIDE 0x04000000 +#define A2XX_RBBM_PM_OVERRIDE1_RB_SCLK_PM_OVERRIDE 0x08000000 +#define A2XX_RBBM_PM_OVERRIDE1_MH_MH_SCLK_PM_OVERRIDE 0x10000000 +#define A2XX_RBBM_PM_OVERRIDE1_MH_REG_SCLK_PM_OVERRIDE 0x20000000 +#define A2XX_RBBM_PM_OVERRIDE1_MH_MMU_SCLK_PM_OVERRIDE 0x40000000 +#define A2XX_RBBM_PM_OVERRIDE1_MH_TCROQ_SCLK_PM_OVERRIDE 0x80000000 #define REG_A2XX_RBBM_PM_OVERRIDE2 0x0000039d @@ -444,12 +477,14 @@ static inline uint32_t A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT(uint32_t val) #define A2XX_A220_VSC_BIN_SIZE_WIDTH__SHIFT 0 static inline uint32_t A2XX_A220_VSC_BIN_SIZE_WIDTH(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_WIDTH__SHIFT) & A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK; } #define A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK 0x000003e0 #define A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT 5 static inline uint32_t A2XX_A220_VSC_BIN_SIZE_HEIGHT(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT) & A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK; } @@ -476,12 +511,43 @@ static inline uint32_t REG_A2XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x000 #define REG_A2XX_PA_SU_DEBUG_DATA 0x00000c81 #define REG_A2XX_PA_SU_FACE_DATA 0x00000c86 +#define A2XX_PA_SU_FACE_DATA_BASE_ADDR__MASK 0xffffffe0 +#define A2XX_PA_SU_FACE_DATA_BASE_ADDR__SHIFT 5 +static inline uint32_t A2XX_PA_SU_FACE_DATA_BASE_ADDR(uint32_t val) +{ + return ((val) << A2XX_PA_SU_FACE_DATA_BASE_ADDR__SHIFT) & A2XX_PA_SU_FACE_DATA_BASE_ADDR__MASK; +} #define REG_A2XX_SQ_GPR_MANAGEMENT 0x00000d00 +#define A2XX_SQ_GPR_MANAGEMENT_REG_DYNAMIC 0x00000001 +#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__MASK 0x00000ff0 +#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__SHIFT 4 +static inline uint32_t A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX(uint32_t val) +{ + return ((val) << A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__SHIFT) & A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__MASK; +} +#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__MASK 0x000ff000 +#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__SHIFT 12 +static inline uint32_t A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX(uint32_t val) +{ + return ((val) << A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__SHIFT) & A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__MASK; +} #define REG_A2XX_SQ_FLOW_CONTROL 0x00000d01 #define REG_A2XX_SQ_INST_STORE_MANAGMENT 0x00000d02 +#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__MASK 0x00000fff +#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__SHIFT 0 +static inline uint32_t A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX(uint32_t val) +{ + return ((val) << A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__SHIFT) & A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__MASK; +} +#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__MASK 0x0fff0000 +#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__SHIFT 16 +static inline uint32_t A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX(uint32_t val) +{ + return ((val) << A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__SHIFT) & A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__MASK; +} #define REG_A2XX_SQ_DEBUG_MISC 0x00000d05 @@ -620,6 +686,7 @@ static inline uint32_t A2XX_RB_COLOR_INFO_SWAP(uint32_t val) #define A2XX_RB_COLOR_INFO_BASE__SHIFT 12 static inline uint32_t A2XX_RB_COLOR_INFO_BASE(uint32_t val) { + assert(!(val & 0x3ff)); return ((val >> 10) << A2XX_RB_COLOR_INFO_BASE__SHIFT) & A2XX_RB_COLOR_INFO_BASE__MASK; } @@ -634,6 +701,7 @@ static inline uint32_t A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_form #define A2XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT 12 static inline uint32_t A2XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val) { + assert(!(val & 0x3ff)); return ((val >> 10) << A2XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A2XX_RB_DEPTH_INFO_DEPTH_BASE__MASK; } @@ -741,6 +809,24 @@ static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) #define REG_A2XX_RB_BLEND_ALPHA 0x00002108 #define REG_A2XX_RB_FOG_COLOR 0x00002109 +#define A2XX_RB_FOG_COLOR_FOG_RED__MASK 0x000000ff +#define A2XX_RB_FOG_COLOR_FOG_RED__SHIFT 0 +static inline uint32_t A2XX_RB_FOG_COLOR_FOG_RED(uint32_t val) +{ + return ((val) << A2XX_RB_FOG_COLOR_FOG_RED__SHIFT) & A2XX_RB_FOG_COLOR_FOG_RED__MASK; +} +#define A2XX_RB_FOG_COLOR_FOG_GREEN__MASK 0x0000ff00 +#define A2XX_RB_FOG_COLOR_FOG_GREEN__SHIFT 8 +static inline uint32_t A2XX_RB_FOG_COLOR_FOG_GREEN(uint32_t val) +{ + return ((val) << A2XX_RB_FOG_COLOR_FOG_GREEN__SHIFT) & A2XX_RB_FOG_COLOR_FOG_GREEN__MASK; +} +#define A2XX_RB_FOG_COLOR_FOG_BLUE__MASK 0x00ff0000 +#define A2XX_RB_FOG_COLOR_FOG_BLUE__SHIFT 16 +static inline uint32_t A2XX_RB_FOG_COLOR_FOG_BLUE(uint32_t val) +{ + return ((val) << A2XX_RB_FOG_COLOR_FOG_BLUE__SHIFT) & A2XX_RB_FOG_COLOR_FOG_BLUE__MASK; +} #define REG_A2XX_RB_STENCILREFMASK_BF 0x0000210c #define A2XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff @@ -889,14 +975,146 @@ static inline uint32_t A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(uint32_t val) #define A2XX_SQ_CONTEXT_MISC_TX_CACHE_SEL 0x00040000 #define REG_A2XX_SQ_INTERPOLATOR_CNTL 0x00002182 +#define A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__MASK 0x0000ffff +#define A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__SHIFT 0 +static inline uint32_t A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE(uint32_t val) +{ + return ((val) << A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__SHIFT) & A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__MASK; +} +#define A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__MASK 0xffff0000 +#define A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__SHIFT 16 +static inline uint32_t A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN(uint32_t val) +{ + return ((val) << A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__SHIFT) & A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__MASK; +} #define REG_A2XX_SQ_WRAPPING_0 0x00002183 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__MASK 0x0000000f +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__SHIFT 0 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_0(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__MASK 0x000000f0 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__SHIFT 4 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_1(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__MASK 0x00000f00 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__SHIFT 8 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_2(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__MASK 0x0000f000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__SHIFT 12 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_3(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__MASK 0x000f0000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__SHIFT 16 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_4(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__MASK 0x00f00000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__SHIFT 20 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_5(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__MASK 0x0f000000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__SHIFT 24 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_6(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__MASK 0xf0000000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__SHIFT 28 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_7(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__MASK; +} #define REG_A2XX_SQ_WRAPPING_1 0x00002184 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__MASK 0x0000000f +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__SHIFT 0 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_8(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__MASK 0x000000f0 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__SHIFT 4 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_9(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__MASK 0x00000f00 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__SHIFT 8 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_10(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__MASK 0x0000f000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__SHIFT 12 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_11(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__MASK 0x000f0000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__SHIFT 16 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_12(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__MASK 0x00f00000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__SHIFT 20 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_13(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__MASK 0x0f000000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__SHIFT 24 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_14(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__MASK 0xf0000000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__SHIFT 28 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_15(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__MASK; +} #define REG_A2XX_SQ_PS_PROGRAM 0x000021f6 +#define A2XX_SQ_PS_PROGRAM_BASE__MASK 0x00000fff +#define A2XX_SQ_PS_PROGRAM_BASE__SHIFT 0 +static inline uint32_t A2XX_SQ_PS_PROGRAM_BASE(uint32_t val) +{ + return ((val) << A2XX_SQ_PS_PROGRAM_BASE__SHIFT) & A2XX_SQ_PS_PROGRAM_BASE__MASK; +} +#define A2XX_SQ_PS_PROGRAM_SIZE__MASK 0x00fff000 +#define A2XX_SQ_PS_PROGRAM_SIZE__SHIFT 12 +static inline uint32_t A2XX_SQ_PS_PROGRAM_SIZE(uint32_t val) +{ + return ((val) << A2XX_SQ_PS_PROGRAM_SIZE__SHIFT) & A2XX_SQ_PS_PROGRAM_SIZE__MASK; +} #define REG_A2XX_SQ_VS_PROGRAM 0x000021f7 +#define A2XX_SQ_VS_PROGRAM_BASE__MASK 0x00000fff +#define A2XX_SQ_VS_PROGRAM_BASE__SHIFT 0 +static inline uint32_t A2XX_SQ_VS_PROGRAM_BASE(uint32_t val) +{ + return ((val) << A2XX_SQ_VS_PROGRAM_BASE__SHIFT) & A2XX_SQ_VS_PROGRAM_BASE__MASK; +} +#define A2XX_SQ_VS_PROGRAM_SIZE__MASK 0x00fff000 +#define A2XX_SQ_VS_PROGRAM_SIZE__SHIFT 12 +static inline uint32_t A2XX_SQ_VS_PROGRAM_SIZE(uint32_t val) +{ + return ((val) << A2XX_SQ_VS_PROGRAM_SIZE__SHIFT) & A2XX_SQ_VS_PROGRAM_SIZE__MASK; +} #define REG_A2XX_VGT_EVENT_INITIATOR 0x000021f9 @@ -1303,6 +1521,14 @@ static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL(enum a2xx_pa_sc_a } #define REG_A2XX_PA_SC_VIZ_QUERY 0x00002293 +#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ENA 0x00000001 +#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__MASK 0x0000007e +#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__SHIFT 1 +static inline uint32_t A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(uint32_t val) +{ + return ((val) << A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__SHIFT) & A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__MASK; +} +#define A2XX_PA_SC_VIZ_QUERY_KILL_PIX_POST_EARLY_Z 0x00000100 #define REG_A2XX_VGT_ENHANCE 0x00002294 @@ -1318,6 +1544,18 @@ static inline uint32_t A2XX_PA_SC_LINE_CNTL_BRES_CNTL(uint32_t val) #define A2XX_PA_SC_LINE_CNTL_LAST_PIXEL 0x00000400 #define REG_A2XX_PA_SC_AA_CONFIG 0x00002301 +#define A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__MASK 0x00000007 +#define A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__SHIFT 0 +static inline uint32_t A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(uint32_t val) +{ + return ((val) << A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__SHIFT) & A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__MASK; +} +#define A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__MASK 0x0001e000 +#define A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__SHIFT 13 +static inline uint32_t A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST(uint32_t val) +{ + return ((val) << A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__SHIFT) & A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__MASK; +} #define REG_A2XX_PA_SU_VTX_CNTL 0x00002302 #define A2XX_PA_SU_VTX_CNTL_PIX_CENTER__MASK 0x00000001 @@ -1406,8 +1644,20 @@ static inline uint32_t A2XX_SQ_PS_CONST_SIZE(uint32_t val) #define REG_A2XX_PA_SC_AA_MASK 0x00002312 #define REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL 0x00002316 +#define A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__MASK 0x00000007 +#define A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__SHIFT 0 +static inline uint32_t A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH(uint32_t val) +{ + return ((val) << A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__SHIFT) & A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__MASK; +} #define REG_A2XX_VGT_OUT_DEALLOC_CNTL 0x00002317 +#define A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__MASK 0x00000003 +#define A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__SHIFT 0 +static inline uint32_t A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST(uint32_t val) +{ + return ((val) << A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__SHIFT) & A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__MASK; +} #define REG_A2XX_RB_COPY_CONTROL 0x00002318 #define A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__MASK 0x00000007 @@ -1431,6 +1681,7 @@ static inline uint32_t A2XX_RB_COPY_CONTROL_CLEAR_MASK(uint32_t val) #define A2XX_RB_COPY_DEST_PITCH__SHIFT 0 static inline uint32_t A2XX_RB_COPY_DEST_PITCH(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A2XX_RB_COPY_DEST_PITCH__SHIFT) & A2XX_RB_COPY_DEST_PITCH__MASK; } @@ -1538,6 +1789,7 @@ static inline uint32_t A2XX_SQ_TEX_0_CLAMP_Z(enum sq_tex_clamp val) #define A2XX_SQ_TEX_0_PITCH__SHIFT 22 static inline uint32_t A2XX_SQ_TEX_0_PITCH(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A2XX_SQ_TEX_0_PITCH__SHIFT) & A2XX_SQ_TEX_0_PITCH__MASK; } diff --git a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_blend.c b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_blend.c index b3cb23977..f063ebed6 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_blend.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_blend.c @@ -40,15 +40,15 @@ blend_func(unsigned func) { switch (func) { case PIPE_BLEND_ADD: - return BLEND_DST_PLUS_SRC; + return BLEND2_DST_PLUS_SRC; case PIPE_BLEND_MIN: - return BLEND_MIN_DST_SRC; + return BLEND2_MIN_DST_SRC; case PIPE_BLEND_MAX: - return BLEND_MAX_DST_SRC; + return BLEND2_MAX_DST_SRC; case PIPE_BLEND_SUBTRACT: - return BLEND_SRC_MINUS_DST; + return BLEND2_SRC_MINUS_DST; case PIPE_BLEND_REVERSE_SUBTRACT: - return BLEND_DST_MINUS_SRC; + return BLEND2_DST_MINUS_SRC; default: DBG("invalid blend func: %x", func); return 0; diff --git a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c index 39418fca4..2ffd8cd5a 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c @@ -1080,11 +1080,6 @@ translate_instruction(struct fd2_compile_context *ctx, case TGSI_OPCODE_POW: translate_pow(ctx, inst); break; - case TGSI_OPCODE_ABS: - instr = ir2_instr_create_alu(cf, MAXv, ~0); - add_regs_vector_1(ctx, inst, instr); - instr->regs[1]->flags |= IR2_REG_NEGATE; /* src0 */ - break; case TGSI_OPCODE_COS: case TGSI_OPCODE_SIN: translate_trig(ctx, inst, opc); diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/lib/mesa/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index bf787d1cc..9574789ce 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -8,16 +8,17 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16185 bytes, from 2016-03-05 03:08:05) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110685 bytes, from 2016-04-25 17:56:43) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) - -Copyright (C) 2013-2016 by the following authors: +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-02 15:50:23) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-05-30 19:25:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 142603 bytes, from 2017-06-06 17:02:32) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) + +Copyright (C) 2013-2017 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) @@ -129,10 +130,14 @@ enum a3xx_tex_fmt { TFMT_Z16_UNORM = 9, TFMT_X8Z24_UNORM = 10, TFMT_Z32_FLOAT = 11, - TFMT_NV12_UV_TILED = 17, - TFMT_NV12_Y_TILED = 19, - TFMT_NV12_UV = 21, - TFMT_NV12_Y = 23, + TFMT_UV_64X32 = 16, + TFMT_VU_64X32 = 17, + TFMT_Y_64X32 = 18, + TFMT_NV12_64X32 = 19, + TFMT_UV_LINEAR = 20, + TFMT_VU_LINEAR = 21, + TFMT_Y_LINEAR = 22, + TFMT_NV12_LINEAR = 23, TFMT_I420_Y = 24, TFMT_I420_U = 26, TFMT_I420_V = 27, @@ -525,14 +530,6 @@ enum a3xx_uche_perfcounter_select { UCHE_UCHEPERF_ACTIVE_CYCLES = 20, }; -enum a3xx_rb_blend_opcode { - BLEND_DST_PLUS_SRC = 0, - BLEND_SRC_MINUS_DST = 1, - BLEND_DST_MINUS_SRC = 2, - BLEND_MIN_DST_SRC = 3, - BLEND_MAX_DST_SRC = 4, -}; - enum a3xx_intp_mode { SMOOTH = 0, FLAT = 1, @@ -1167,6 +1164,7 @@ static inline uint32_t A3XX_RB_MODE_CONTROL_MRT(uint32_t val) #define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT 4 static inline uint32_t A3XX_RB_RENDER_CONTROL_BIN_WIDTH(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT) & A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK; } #define A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE 0x00001000 @@ -1265,6 +1263,7 @@ static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) #define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT 17 static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK; } @@ -1273,6 +1272,7 @@ static inline uint32_t REG_A3XX_RB_MRT_BUF_BASE(uint32_t i0) { return 0x000020c6 #define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT 4 static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK; } @@ -1393,17 +1393,19 @@ static inline uint32_t A3XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mod { return ((val) << A3XX_RB_COPY_CONTROL_MODE__SHIFT) & A3XX_RB_COPY_CONTROL_MODE__MASK; } +#define A3XX_RB_COPY_CONTROL_MSAA_SRGB_DOWNSAMPLE 0x00000080 #define A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK 0x00000f00 #define A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT 8 static inline uint32_t A3XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val) { return ((val) << A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT) & A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK; } -#define A3XX_RB_COPY_CONTROL_UNK12 0x00001000 +#define A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE 0x00001000 #define A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK 0xffffc000 #define A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT 14 static inline uint32_t A3XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val) { + assert(!(val & 0x3fff)); return ((val >> 14) << A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK; } @@ -1412,6 +1414,7 @@ static inline uint32_t A3XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val) #define A3XX_RB_COPY_DEST_BASE_BASE__SHIFT 4 static inline uint32_t A3XX_RB_COPY_DEST_BASE_BASE(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A3XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A3XX_RB_COPY_DEST_BASE_BASE__MASK; } @@ -1420,6 +1423,7 @@ static inline uint32_t A3XX_RB_COPY_DEST_BASE_BASE(uint32_t val) #define A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT 0 static inline uint32_t A3XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A3XX_RB_COPY_DEST_PITCH_PITCH__MASK; } @@ -1488,6 +1492,7 @@ static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_form #define A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT 11 static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val) { + assert(!(val & 0xfff)); return ((val >> 12) << A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK; } @@ -1496,6 +1501,7 @@ static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val) #define A3XX_RB_DEPTH_PITCH__SHIFT 0 static inline uint32_t A3XX_RB_DEPTH_PITCH(uint32_t val) { + assert(!(val & 0x7)); return ((val >> 3) << A3XX_RB_DEPTH_PITCH__SHIFT) & A3XX_RB_DEPTH_PITCH__MASK; } @@ -1559,6 +1565,7 @@ static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op v #define A3XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT 11 static inline uint32_t A3XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val) { + assert(!(val & 0xfff)); return ((val >> 12) << A3XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A3XX_RB_STENCIL_INFO_STENCIL_BASE__MASK; } @@ -1567,6 +1574,7 @@ static inline uint32_t A3XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val) #define A3XX_RB_STENCIL_PITCH__SHIFT 0 static inline uint32_t A3XX_RB_STENCIL_PITCH(uint32_t val) { + assert(!(val & 0x7)); return ((val >> 3) << A3XX_RB_STENCIL_PITCH__SHIFT) & A3XX_RB_STENCIL_PITCH__MASK; } @@ -2484,6 +2492,7 @@ static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val) #define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT 5 static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK; } @@ -2622,6 +2631,7 @@ static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val) #define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT 5 static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK; } @@ -2802,12 +2812,14 @@ static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(uint32_t val) #define A3XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 static inline uint32_t A3XX_VSC_BIN_SIZE_WIDTH(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A3XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A3XX_VSC_BIN_SIZE_WIDTH__MASK; } #define A3XX_VSC_BIN_SIZE_HEIGHT__MASK 0x000003e0 #define A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT 5 static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A3XX_VSC_BIN_SIZE_HEIGHT__MASK; } @@ -3204,6 +3216,7 @@ static inline uint32_t A3XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val) #define A3XX_TEX_CONST_3_LAYERSZ1__SHIFT 0 static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ1(uint32_t val) { + assert(!(val & 0xfff)); return ((val >> 12) << A3XX_TEX_CONST_3_LAYERSZ1__SHIFT) & A3XX_TEX_CONST_3_LAYERSZ1__MASK; } #define A3XX_TEX_CONST_3_DEPTH__MASK 0x0ffe0000 @@ -3216,6 +3229,7 @@ static inline uint32_t A3XX_TEX_CONST_3_DEPTH(uint32_t val) #define A3XX_TEX_CONST_3_LAYERSZ2__SHIFT 28 static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ2(uint32_t val) { + assert(!(val & 0xfff)); return ((val >> 12) << A3XX_TEX_CONST_3_LAYERSZ2__SHIFT) & A3XX_TEX_CONST_3_LAYERSZ2__MASK; } diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 6d223c05c..aefbbea4a 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -54,7 +54,7 @@ static const enum adreno_state_block sb[] = { * prsc or dwords: buffer containing constant values * sizedwords: size of const value buffer */ -void +static void fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) @@ -96,16 +96,16 @@ static void fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) { + uint32_t anum = align(num, 4); uint32_t i; debug_assert((regid % 4) == 0); - debug_assert((num % 4) == 0); - OUT_PKT3(ring, CP_LOAD_STATE, 2 + num); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + anum); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | - CP_LOAD_STATE_0_NUM_UNIT(num/2)); + CP_LOAD_STATE_0_NUM_UNIT(anum/2)); OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); @@ -120,6 +120,9 @@ fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, OUT_RING(ring, 0xbad00000 | (i << 16)); } } + + for (; i < anum; i++) + OUT_RING(ring, 0xffffffff); } #define VERT_TEX_OFF 0 @@ -299,13 +302,13 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, } struct fd_resource *rsc = fd_resource(psurf[i]->texture); - enum pipe_format format = fd3_gmem_restore_format(psurf[i]->format); + enum pipe_format format = fd_gmem_restore_format(psurf[i]->format); /* The restore blit_zs shader expects stencil in sampler 0, and depth * in sampler 1 */ if (rsc->stencil && i == 0) { rsc = rsc->stencil; - format = fd3_gmem_restore_format(rsc->base.b.format); + format = fd_gmem_restore_format(rsc->base.b.format); } /* note: PIPE_BUFFER disallowed for surfaces */ @@ -398,7 +401,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; const struct pipe_vertex_buffer *vb = &vtx->vertexbuf.vb[elem->vertex_buffer_index]; - struct fd_resource *rsc = fd_resource(vb->buffer); + struct fd_resource *rsc = fd_resource(vb->buffer.resource); enum pipe_format pfmt = elem->src_format; enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt); bool switchnext = (i != last) || @@ -487,7 +490,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, { const struct ir3_shader_variant *vp = fd3_emit_get_vp(emit); const struct ir3_shader_variant *fp = fd3_emit_get_fp(emit); - uint32_t dirty = emit->dirty; + const enum fd_dirty_3d_state dirty = emit->dirty; emit_marker(ring, 5); @@ -619,7 +622,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, val |= A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc); } - if (info->indexed && info->primitive_restart) { + if (info->index_size && info->primitive_restart) { val |= A3XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART; } @@ -710,9 +713,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, HLSQ_FLUSH); if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */ - ir3_emit_consts(vp, ring, ctx, emit->info, dirty); + ir3_emit_vs_consts(vp, ring, ctx, emit->info); if (!emit->key.binning_pass) - ir3_emit_consts(fp, ring, ctx, emit->info, dirty); + ir3_emit_fs_consts(fp, ring, ctx); } if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) { @@ -780,24 +783,14 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, A3XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3])); } - if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX)) + if (dirty & FD_DIRTY_TEX) fd_wfi(ctx->batch, ring); - if (dirty & FD_DIRTY_VERTTEX) { - if (vp->has_samp) - emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex); - else - dirty &= ~FD_DIRTY_VERTTEX; - } - - if (dirty & FD_DIRTY_FRAGTEX) { - if (fp->has_samp) - emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex); - else - dirty &= ~FD_DIRTY_FRAGTEX; - } + if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) + emit_textures(ctx, ring, SB_VERT_TEX, &ctx->tex[PIPE_SHADER_VERTEX]); - ctx->dirty &= ~dirty; + if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) + emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]); } /* emit setup at begin of new cmdstream buffer (don't rely on previous diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.h index d8e5991ca..5e574da19 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.h +++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.h @@ -38,10 +38,6 @@ struct fd_ringbuffer; -void fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type, - uint32_t regid, uint32_t offset, uint32_t sizedwords, - const uint32_t *dwords, struct pipe_resource *prsc); - void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface **psurf, int bufs); @@ -52,7 +48,7 @@ struct fd3_emit { const struct fd_program_stateobj *prog; const struct pipe_draw_info *info; struct ir3_shader_key key; - uint32_t dirty; + enum fd_dirty_3d_state dirty; uint32_t sprite_coord_enable; bool sprite_coord_mode; diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.c index 5cf458bb0..f8508977a 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.c @@ -348,27 +348,6 @@ fd3_pipe2nblocksx(enum pipe_format format, unsigned width) return util_format_get_nblocksx(format, width); } -/* we need to special case a bit the depth/stencil restore, because we are - * using the texture sampler to blit into the depth/stencil buffer, *not* - * into a color buffer. Otherwise fd3_tex_swiz() will do the wrong thing, - * as it is assuming that you are sampling into normal render target.. - */ -enum pipe_format -fd3_gmem_restore_format(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - return PIPE_FORMAT_R8G8B8A8_UNORM; - case PIPE_FORMAT_Z16_UNORM: - return PIPE_FORMAT_R8G8_UNORM; - case PIPE_FORMAT_S8_UINT: - return PIPE_FORMAT_R8_UNORM; - default: - return format; - } -} - enum a3xx_color_fmt fd3_fs_output_format(enum pipe_format format) { diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.h index 48c503e9a..7286770d8 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.h +++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.h @@ -34,7 +34,6 @@ enum a3xx_vtx_fmt fd3_pipe2vtx(enum pipe_format format); enum a3xx_tex_fmt fd3_pipe2tex(enum pipe_format format); enum a3xx_tex_fetchsize fd3_pipe2fetchsize(enum pipe_format format); enum a3xx_color_fmt fd3_pipe2color(enum pipe_format format); -enum pipe_format fd3_gmem_restore_format(enum pipe_format format); enum a3xx_color_fmt fd3_fs_output_format(enum pipe_format format); enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format); unsigned fd3_pipe2nblocksx(enum pipe_format format, unsigned width); diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 51c858ace..151ecfbf6 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -149,6 +149,12 @@ use_hw_binning(struct fd_batch *batch) if (gmem->minx || gmem->miny) return false; + if ((gmem->maxpw * gmem->maxph) > 32) + return false; + + if ((gmem->maxpw > 15) || (gmem->maxph > 15)) + return false; + return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2); } @@ -332,7 +338,7 @@ emit_gmem2mem_surf(struct fd_batch *batch, A3XX_RB_COPY_CONTROL_GMEM_BASE(base) | COND(format == PIPE_FORMAT_Z32_FLOAT || format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, - A3XX_RB_COPY_CONTROL_UNK12)); + A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE)); OUT_RELOCW(ring, rsc->bo, offset, 0, -1); /* RB_COPY_DEST_BASE */ OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp)); diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 3146dc5d0..f43d5c47c 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -151,7 +151,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, uint32_t fpbuffersz, vpbuffersz, fsoff; uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0}; int constmode; - int i, j, k; + int i, j; debug_assert(nr <= ARRAY_SIZE(color_regid)); @@ -275,45 +275,34 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) | A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in)); - for (i = 0, j = -1; (i < 8) && (j < (int)fp->inputs_count); i++) { + struct ir3_shader_linkage l = {0}; + ir3_link_shaders(&l, vp, fp); + + for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1); - j = ir3_next_varying(fp, j); - if (j < fp->inputs_count) { - k = ir3_find_output(vp, fp->inputs[j].slot); - reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid); - reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask); - } + reg |= A3XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid); + reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask); + j++; - j = ir3_next_varying(fp, j); - if (j < fp->inputs_count) { - k = ir3_find_output(vp, fp->inputs[j].slot); - reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid); - reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask); - } + reg |= A3XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid); + reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask); + j++; OUT_RING(ring, reg); } - for (i = 0, j = -1; (i < 4) && (j < (int)fp->inputs_count); i++) { + for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1); - j = ir3_next_varying(fp, j); - if (j < fp->inputs_count) - reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[j].inloc); - j = ir3_next_varying(fp, j); - if (j < fp->inputs_count) - reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[j].inloc); - j = ir3_next_varying(fp, j); - if (j < fp->inputs_count) - reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[j].inloc); - j = ir3_next_varying(fp, j); - if (j < fp->inputs_count) - reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[j].inloc); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8); OUT_RING(ring, reg); } @@ -402,10 +391,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, */ unsigned compmask = fp->inputs[j].compmask; - /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG - * instead.. rather than -8 everywhere else.. - */ - uint32_t inloc = fp->inputs[j].inloc - 8; + uint32_t inloc = fp->inputs[j].inloc; if ((fp->inputs[j].interpolate == INTERP_MODE_FLAT) || (fp->inputs[j].rasterflat && emit->rasterflat)) { diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/lib/mesa/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h index aeb61e755..8f4b0da27 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h +++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h @@ -8,16 +8,17 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16185 bytes, from 2016-03-05 03:08:05) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110685 bytes, from 2016-04-25 17:56:43) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) - -Copyright (C) 2013-2016 by the following authors: +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-02 15:50:23) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-05-30 19:25:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 142603 bytes, from 2017-06-06 17:02:32) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) + +Copyright (C) 2013-2017 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) @@ -92,17 +93,10 @@ enum a4xx_color_fmt { enum a4xx_tile_mode { TILE4_LINEAR = 0, + TILE4_2 = 2, TILE4_3 = 3, }; -enum a4xx_rb_blend_opcode { - BLEND_DST_PLUS_SRC = 0, - BLEND_SRC_MINUS_DST = 1, - BLEND_DST_MINUS_SRC = 2, - BLEND_MIN_DST_SRC = 3, - BLEND_MAX_DST_SRC = 4, -}; - enum a4xx_vtx_fmt { VFMT4_32_FLOAT = 1, VFMT4_32_32_FLOAT = 2, @@ -935,12 +929,14 @@ static inline uint32_t A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(uint32_t val) #define A4XX_RB_MODE_CONTROL_WIDTH__SHIFT 0 static inline uint32_t A4XX_RB_MODE_CONTROL_WIDTH(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A4XX_RB_MODE_CONTROL_WIDTH__SHIFT) & A4XX_RB_MODE_CONTROL_WIDTH__MASK; } #define A4XX_RB_MODE_CONTROL_HEIGHT__MASK 0x00003f00 #define A4XX_RB_MODE_CONTROL_HEIGHT__SHIFT 8 static inline uint32_t A4XX_RB_MODE_CONTROL_HEIGHT(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A4XX_RB_MODE_CONTROL_HEIGHT__SHIFT) & A4XX_RB_MODE_CONTROL_HEIGHT__MASK; } #define A4XX_RB_MODE_CONTROL_ENABLE_GMEM 0x00010000 @@ -1025,6 +1021,7 @@ static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) #define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT 14 static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val) { + assert(!(val & 0xf)); return ((val >> 4) << A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK; } @@ -1047,7 +1044,7 @@ static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_b } #define A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK 0x000000e0 #define A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT 5 -static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a4xx_rb_blend_opcode val) +static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) { return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK; } @@ -1065,7 +1062,7 @@ static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb } #define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK 0x00e00000 #define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT 21 -static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a4xx_rb_blend_opcode val) +static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) { return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK; } @@ -1224,6 +1221,7 @@ static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val) #define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT 2 static inline uint32_t A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR(uint32_t val) { + assert(!(val & 0x3)); return ((val >> 2) << A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT) & A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK; } @@ -1300,6 +1298,7 @@ static inline uint32_t A4XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val) #define A4XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT 14 static inline uint32_t A4XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val) { + assert(!(val & 0x3fff)); return ((val >> 14) << A4XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A4XX_RB_COPY_CONTROL_GMEM_BASE__MASK; } @@ -1308,6 +1307,7 @@ static inline uint32_t A4XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val) #define A4XX_RB_COPY_DEST_BASE_BASE__SHIFT 5 static inline uint32_t A4XX_RB_COPY_DEST_BASE_BASE(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A4XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A4XX_RB_COPY_DEST_BASE_BASE__MASK; } @@ -1316,6 +1316,7 @@ static inline uint32_t A4XX_RB_COPY_DEST_BASE_BASE(uint32_t val) #define A4XX_RB_COPY_DEST_PITCH_PITCH__SHIFT 0 static inline uint32_t A4XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A4XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A4XX_RB_COPY_DEST_PITCH_PITCH__MASK; } @@ -1394,6 +1395,7 @@ static inline uint32_t A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum a4xx_depth_format va #define A4XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT 12 static inline uint32_t A4XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val) { + assert(!(val & 0xfff)); return ((val >> 12) << A4XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A4XX_RB_DEPTH_INFO_DEPTH_BASE__MASK; } @@ -1402,6 +1404,7 @@ static inline uint32_t A4XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val) #define A4XX_RB_DEPTH_PITCH__SHIFT 0 static inline uint32_t A4XX_RB_DEPTH_PITCH(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A4XX_RB_DEPTH_PITCH__SHIFT) & A4XX_RB_DEPTH_PITCH__MASK; } @@ -1410,6 +1413,7 @@ static inline uint32_t A4XX_RB_DEPTH_PITCH(uint32_t val) #define A4XX_RB_DEPTH_PITCH2__SHIFT 0 static inline uint32_t A4XX_RB_DEPTH_PITCH2(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A4XX_RB_DEPTH_PITCH2__SHIFT) & A4XX_RB_DEPTH_PITCH2__MASK; } @@ -1475,6 +1479,7 @@ static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op v #define A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT 12 static inline uint32_t A4XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val) { + assert(!(val & 0xfff)); return ((val >> 12) << A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK; } @@ -1483,6 +1488,7 @@ static inline uint32_t A4XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val) #define A4XX_RB_STENCIL_PITCH__SHIFT 0 static inline uint32_t A4XX_RB_STENCIL_PITCH(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A4XX_RB_STENCIL_PITCH__SHIFT) & A4XX_RB_STENCIL_PITCH__MASK; } @@ -2205,11 +2211,23 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0) #define REG_A4XX_CP_DRAW_STATE_ADDR 0x00000232 -#define REG_A4XX_CP_PROTECT_REG_0 0x00000240 - static inline uint32_t REG_A4XX_CP_PROTECT(uint32_t i0) { return 0x00000240 + 0x1*i0; } static inline uint32_t REG_A4XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000240 + 0x1*i0; } +#define A4XX_CP_PROTECT_REG_BASE_ADDR__MASK 0x0001ffff +#define A4XX_CP_PROTECT_REG_BASE_ADDR__SHIFT 0 +static inline uint32_t A4XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val) +{ + return ((val) << A4XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A4XX_CP_PROTECT_REG_BASE_ADDR__MASK; +} +#define A4XX_CP_PROTECT_REG_MASK_LEN__MASK 0x1f000000 +#define A4XX_CP_PROTECT_REG_MASK_LEN__SHIFT 24 +static inline uint32_t A4XX_CP_PROTECT_REG_MASK_LEN(uint32_t val) +{ + return ((val) << A4XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A4XX_CP_PROTECT_REG_MASK_LEN__MASK; +} +#define A4XX_CP_PROTECT_REG_TRAP_WRITE 0x20000000 +#define A4XX_CP_PROTECT_REG_TRAP_READ 0x40000000 #define REG_A4XX_CP_PROTECT_CTRL 0x00000250 @@ -2300,7 +2318,7 @@ static inline uint32_t A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) { return ((val) << A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK; } -#define A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0003fc00 +#define A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 #define A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 static inline uint32_t A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) { @@ -2448,7 +2466,7 @@ static inline uint32_t A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) { return ((val) << A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK; } -#define A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0003fc00 +#define A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 #define A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 static inline uint32_t A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) { @@ -2832,12 +2850,14 @@ static inline uint32_t REG_A4XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0 #define A4XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 static inline uint32_t A4XX_VSC_BIN_SIZE_WIDTH(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A4XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A4XX_VSC_BIN_SIZE_WIDTH__MASK; } #define A4XX_VSC_BIN_SIZE_HEIGHT__MASK 0x000003e0 #define A4XX_VSC_BIN_SIZE_HEIGHT__SHIFT 5 static inline uint32_t A4XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A4XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A4XX_VSC_BIN_SIZE_HEIGHT__MASK; } @@ -3004,11 +3024,11 @@ static inline uint32_t A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val) static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x0000220b + 0x4*i0; } static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_2(uint32_t i0) { return 0x0000220c + 0x4*i0; } -#define A4XX_VFD_FETCH_INSTR_2_SIZE__MASK 0xfffffff0 -#define A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT 4 +#define A4XX_VFD_FETCH_INSTR_2_SIZE__MASK 0xffffffff +#define A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT 0 static inline uint32_t A4XX_VFD_FETCH_INSTR_2_SIZE(uint32_t val) { - return ((val >> 4) << A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT) & A4XX_VFD_FETCH_INSTR_2_SIZE__MASK; + return ((val) << A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT) & A4XX_VFD_FETCH_INSTR_2_SIZE__MASK; } static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_3(uint32_t i0) { return 0x0000220d + 0x4*i0; } @@ -3283,6 +3303,7 @@ static inline uint32_t A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(float val) return ((((int32_t)(val * 4.0))) << A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK; } #define A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET 0x00000800 +#define A4XX_GRAS_SU_MODE_CONTROL_MSAA_ENABLE 0x00002000 #define A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS 0x00100000 #define REG_A4XX_GRAS_SC_CONTROL 0x0000207b @@ -3700,6 +3721,8 @@ static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val) #define REG_A4XX_PC_BINNING_COMMAND 0x00000d00 #define A4XX_PC_BINNING_COMMAND_BINNING_ENABLE 0x00000001 +#define REG_A4XX_PC_TESSFACTOR_ADDR 0x00000d08 + #define REG_A4XX_PC_DRAWCALL_SETUP_OVERRIDE 0x00000d0c #define REG_A4XX_PC_PERFCTR_PC_SEL_0 0x00000d10 @@ -3796,12 +3819,8 @@ static inline uint32_t A4XX_PC_HS_PARAM_SPACING(enum a4xx_tess_spacing val) { return ((val) << A4XX_PC_HS_PARAM_SPACING__SHIFT) & A4XX_PC_HS_PARAM_SPACING__MASK; } -#define A4XX_PC_HS_PARAM_PRIMTYPE__MASK 0x01800000 -#define A4XX_PC_HS_PARAM_PRIMTYPE__SHIFT 23 -static inline uint32_t A4XX_PC_HS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val) -{ - return ((val) << A4XX_PC_HS_PARAM_PRIMTYPE__SHIFT) & A4XX_PC_HS_PARAM_PRIMTYPE__MASK; -} +#define A4XX_PC_HS_PARAM_CW 0x00800000 +#define A4XX_PC_HS_PARAM_CONNECTED 0x01000000 #define REG_A4XX_VBIF_VERSION 0x00003000 @@ -3824,6 +3843,44 @@ static inline uint32_t A4XX_PC_HS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val) #define REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB 0x00003049 +#define REG_A4XX_VBIF_PERF_CNT_EN0 0x000030c0 + +#define REG_A4XX_VBIF_PERF_CNT_EN1 0x000030c1 + +#define REG_A4XX_VBIF_PERF_CNT_EN2 0x000030c2 + +#define REG_A4XX_VBIF_PERF_CNT_EN3 0x000030c3 + +#define REG_A4XX_VBIF_PERF_CNT_SEL0 0x000030d0 + +#define REG_A4XX_VBIF_PERF_CNT_SEL1 0x000030d1 + +#define REG_A4XX_VBIF_PERF_CNT_SEL2 0x000030d2 + +#define REG_A4XX_VBIF_PERF_CNT_SEL3 0x000030d3 + +#define REG_A4XX_VBIF_PERF_CNT_LOW0 0x000030d8 + +#define REG_A4XX_VBIF_PERF_CNT_LOW1 0x000030d9 + +#define REG_A4XX_VBIF_PERF_CNT_LOW2 0x000030da + +#define REG_A4XX_VBIF_PERF_CNT_LOW3 0x000030db + +#define REG_A4XX_VBIF_PERF_CNT_HIGH0 0x000030e0 + +#define REG_A4XX_VBIF_PERF_CNT_HIGH1 0x000030e1 + +#define REG_A4XX_VBIF_PERF_CNT_HIGH2 0x000030e2 + +#define REG_A4XX_VBIF_PERF_CNT_HIGH3 0x000030e3 + +#define REG_A4XX_VBIF_PERF_PWR_CNT_EN0 0x00003100 + +#define REG_A4XX_VBIF_PERF_PWR_CNT_EN1 0x00003101 + +#define REG_A4XX_VBIF_PERF_PWR_CNT_EN2 0x00003102 + #define REG_A4XX_UNKNOWN_0CC5 0x00000cc5 #define REG_A4XX_UNKNOWN_0CC6 0x00000cc6 @@ -4015,6 +4072,7 @@ static inline uint32_t A4XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val) #define A4XX_TEX_CONST_3_LAYERSZ__SHIFT 0 static inline uint32_t A4XX_TEX_CONST_3_LAYERSZ(uint32_t val) { + assert(!(val & 0xfff)); return ((val >> 12) << A4XX_TEX_CONST_3_LAYERSZ__SHIFT) & A4XX_TEX_CONST_3_LAYERSZ__MASK; } #define A4XX_TEX_CONST_3_DEPTH__MASK 0x7ffc0000 @@ -4029,12 +4087,14 @@ static inline uint32_t A4XX_TEX_CONST_3_DEPTH(uint32_t val) #define A4XX_TEX_CONST_4_LAYERSZ__SHIFT 0 static inline uint32_t A4XX_TEX_CONST_4_LAYERSZ(uint32_t val) { + assert(!(val & 0xfff)); return ((val >> 12) << A4XX_TEX_CONST_4_LAYERSZ__SHIFT) & A4XX_TEX_CONST_4_LAYERSZ__MASK; } #define A4XX_TEX_CONST_4_BASE__MASK 0xffffffe0 #define A4XX_TEX_CONST_4_BASE__SHIFT 5 static inline uint32_t A4XX_TEX_CONST_4_BASE(uint32_t val) { + assert(!(val & 0x1f)); return ((val >> 5) << A4XX_TEX_CONST_4_BASE__SHIFT) & A4XX_TEX_CONST_4_BASE__MASK; } diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_blend.c index f19702280..e262e05de 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_blend.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_blend.c @@ -35,7 +35,7 @@ #include "fd4_context.h" #include "fd4_format.h" -static enum a4xx_rb_blend_opcode +static enum a3xx_rb_blend_opcode blend_func(unsigned func) { switch (func) { diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_draw.h index 09d07bb97..842a95271 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_draw.h +++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_draw.h @@ -87,7 +87,6 @@ fd4_draw(struct fd_batch *batch, struct fd_ringbuffer *ring, fd_reset_wfi(batch); } - static inline enum a4xx_index_size fd4_size2indextype(unsigned index_size) { @@ -100,26 +99,26 @@ fd4_size2indextype(unsigned index_size) assert(0); return INDEX4_SIZE_32_BIT; } + static inline void fd4_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring, enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode, - const struct pipe_draw_info *info) + const struct pipe_draw_info *info, + unsigned index_offset) { struct pipe_resource *idx_buffer = NULL; enum a4xx_index_size idx_type; enum pc_di_src_sel src_sel; uint32_t idx_size, idx_offset; - if (info->indexed) { - struct pipe_index_buffer *idx = &batch->ctx->indexbuf; - - assert(!idx->user_buffer); + if (info->index_size) { + assert(!info->has_user_indices); - idx_buffer = idx->buffer; - idx_type = fd4_size2indextype(idx->index_size); - idx_size = idx->index_size * info->count; - idx_offset = idx->offset + (info->start * idx->index_size); + idx_buffer = info->index.resource; + idx_type = fd4_size2indextype(info->index_size); + idx_size = info->index_size * info->count; + idx_offset = index_offset + info->start * info->index_size; src_sel = DI_SRC_SEL_DMA; } else { idx_buffer = NULL; diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index fc0e4d111..0f7c64703 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -45,46 +45,41 @@ #include "fd4_format.h" #include "fd4_zsa.h" -static const enum adreno_state_block sb[] = { - [SHADER_VERTEX] = SB_VERT_SHADER, - [SHADER_FRAGMENT] = SB_FRAG_SHADER, -}; - /* regid: base const register * prsc or dwords: buffer containing constant values * sizedwords: size of const value buffer */ -void +static void fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { uint32_t i, sz; - enum adreno_state_src src; + enum a4xx_state_src src; debug_assert((regid % 4) == 0); debug_assert((sizedwords % 4) == 0); if (prsc) { sz = 0; - src = 0x2; // TODO ?? + src = SS4_INDIRECT; } else { sz = sizedwords; - src = SS_DIRECT; + src = SS4_DIRECT; } - OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | - CP_LOAD_STATE_0_STATE_SRC(src) | - CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | - CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4)); + OUT_PKT3(ring, CP_LOAD_STATE4, 2 + sz); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) | + CP_LOAD_STATE4_0_STATE_SRC(src) | + CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(type)) | + CP_LOAD_STATE4_0_NUM_UNIT(sizedwords/4)); if (prsc) { struct fd_bo *bo = fd_resource(prsc)->bo; OUT_RELOC(ring, bo, offset, - CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); + CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0); } else { - OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | - CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS)); dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; } for (i = 0; i < sz; i++) { @@ -96,18 +91,18 @@ static void fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) { + uint32_t anum = align(num, 4); uint32_t i; debug_assert((regid % 4) == 0); - debug_assert((num % 4) == 0); - OUT_PKT3(ring, CP_LOAD_STATE, 2 + num); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | - CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | - CP_LOAD_STATE_0_NUM_UNIT(num/4)); - OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | - CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + OUT_PKT3(ring, CP_LOAD_STATE4, 2 + anum); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(type)) | + CP_LOAD_STATE4_0_NUM_UNIT(anum/4)); + OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS)); for (i = 0; i < num; i++) { if (prscs[i]) { @@ -120,16 +115,19 @@ fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, OUT_RING(ring, 0xbad00000 | (i << 16)); } } + + for (; i < anum; i++) + OUT_RING(ring, 0xffffffff); } static void emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, - enum adreno_state_block sb, struct fd_texture_stateobj *tex, + enum a4xx_state_block sb, struct fd_texture_stateobj *tex, const struct ir3_shader_variant *v) { static const uint32_t bcolor_reg[] = { - [SB_VERT_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, - [SB_FRAG_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, + [SB4_VS_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, + [SB4_FS_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, }; struct fd4_context *fd4_ctx = fd4_context(ctx); bool needs_border = false; @@ -145,13 +143,13 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, num_samplers = align(tex->num_samplers, 2); /* output sampler state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * num_samplers)); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | - CP_LOAD_STATE_0_STATE_BLOCK(sb) | - CP_LOAD_STATE_0_NUM_UNIT(num_samplers)); - OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | - CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (2 * num_samplers)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(num_samplers)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); for (i = 0; i < tex->num_samplers; i++) { static const struct fd4_sampler_stateobj dummy_sampler = {}; const struct fd4_sampler_stateobj *sampler = tex->samplers[i] ? @@ -173,13 +171,13 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, unsigned num_textures = tex->num_textures + v->astc_srgb.count; /* emit texture state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * num_textures)); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | - CP_LOAD_STATE_0_STATE_BLOCK(sb) | - CP_LOAD_STATE_0_NUM_UNIT(num_textures)); - OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | - CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (8 * num_textures)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(num_textures)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); for (i = 0; i < tex->num_textures; i++) { static const struct fd4_pipe_sampler_view dummy_view = {}; const struct fd4_pipe_sampler_view *view = tex->textures[i] ? @@ -264,13 +262,13 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs, } /* output sampler state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * nr_bufs)); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | - CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | - CP_LOAD_STATE_0_NUM_UNIT(nr_bufs)); - OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | - CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (2 * nr_bufs)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(SB4_FS_TEX) | + CP_LOAD_STATE4_0_NUM_UNIT(nr_bufs)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); for (i = 0; i < nr_bufs; i++) { OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) | A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) | @@ -281,24 +279,24 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs, } /* emit texture state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * nr_bufs)); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | - CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | - CP_LOAD_STATE_0_NUM_UNIT(nr_bufs)); - OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | - CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (8 * nr_bufs)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(SB4_FS_TEX) | + CP_LOAD_STATE4_0_NUM_UNIT(nr_bufs)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); for (i = 0; i < nr_bufs; i++) { if (bufs[i]) { struct fd_resource *rsc = fd_resource(bufs[i]->texture); - enum pipe_format format = fd4_gmem_restore_format(bufs[i]->format); + enum pipe_format format = fd_gmem_restore_format(bufs[i]->format); /* The restore blit_zs shader expects stencil in sampler 0, * and depth in sampler 1 */ if (rsc->stencil && (i == 0)) { rsc = rsc->stencil; - format = fd4_gmem_restore_format(rsc->base.b.format); + format = fd_gmem_restore_format(rsc->base.b.format); } /* note: PIPE_BUFFER disallowed for surfaces */ @@ -405,7 +403,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; const struct pipe_vertex_buffer *vb = &vtx->vertexbuf.vb[elem->vertex_buffer_index]; - struct fd_resource *rsc = fd_resource(vb->buffer); + struct fd_resource *rsc = fd_resource(vb->buffer.resource); enum pipe_format pfmt = elem->src_format; enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt); bool switchnext = (i != last) || @@ -501,7 +499,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, { const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit); const struct ir3_shader_variant *fp = fd4_emit_get_fp(emit); - uint32_t dirty = emit->dirty; + const enum fd_dirty_3d_state dirty = emit->dirty; emit_marker(ring, 5); @@ -602,7 +600,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, fd4_rasterizer_stateobj(ctx->rasterizer); uint32_t val = rast->pc_prim_vtx_cntl; - if (info->indexed && info->primitive_restart) + if (info->index_size && info->primitive_restart) val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART; val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE); @@ -679,9 +677,9 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, } if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */ - ir3_emit_consts(vp, ring, ctx, emit->info, dirty); + ir3_emit_vs_consts(vp, ring, ctx, emit->info); if (!emit->key.binning_pass) - ir3_emit_consts(fp, ring, ctx, emit->info, dirty); + ir3_emit_fs_consts(fp, ring, ctx); } if ((dirty & FD_DIRTY_BLEND)) { @@ -742,21 +740,11 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3])); } - if (dirty & FD_DIRTY_VERTTEX) { - if (vp->has_samp) - emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex, vp); - else - dirty &= ~FD_DIRTY_VERTTEX; - } - - if (dirty & FD_DIRTY_FRAGTEX) { - if (fp->has_samp) - emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex, fp); - else - dirty &= ~FD_DIRTY_FRAGTEX; - } + if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) + emit_textures(ctx, ring, SB4_VS_TEX, &ctx->tex[PIPE_SHADER_VERTEX], vp); - ctx->dirty &= ~dirty; + if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) + emit_textures(ctx, ring, SB4_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT], fp); } /* emit setup at begin of new cmdstream buffer (don't rely on previous @@ -874,10 +862,10 @@ fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) /* we don't use this yet.. probably best to disable.. */ OUT_PKT3(ring, CP_SET_DRAW_STATE, 2); - OUT_RING(ring, CP_SET_DRAW_STATE_0_COUNT(0) | - CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS | - CP_SET_DRAW_STATE_0_GROUP_ID(0)); - OUT_RING(ring, CP_SET_DRAW_STATE_1_ADDR(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | + CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | + CP_SET_DRAW_STATE__0_GROUP_ID(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2); OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_PARAM */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.h index 42e0e5e64..a724caedc 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.h +++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.h @@ -38,10 +38,6 @@ struct fd_ringbuffer; -void fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type, - uint32_t regid, uint32_t offset, uint32_t sizedwords, - const uint32_t *dwords, struct pipe_resource *prsc); - void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs, struct pipe_surface **bufs); @@ -52,7 +48,7 @@ struct fd4_emit { const struct fd_program_stateobj *prog; const struct pipe_draw_info *info; struct ir3_shader_key key; - uint32_t dirty; + enum fd_dirty_3d_state dirty; uint32_t sprite_coord_enable; /* bitmask */ bool sprite_coord_mode; diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.c index efc5e7c22..3e1dc2778 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.c @@ -394,29 +394,6 @@ fd4_pipe2fetchsize(enum pipe_format format) } } -/* we need to special case a bit the depth/stencil restore, because we are - * using the texture sampler to blit into the depth/stencil buffer, *not* - * into a color buffer. Otherwise fd4_tex_swiz() will do the wrong thing, - * as it is assuming that you are sampling into normal render target.. - * - * TODO looks like we can probably share w/ a3xx.. - */ -enum pipe_format -fd4_gmem_restore_format(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - return PIPE_FORMAT_R8G8B8A8_UNORM; - case PIPE_FORMAT_Z16_UNORM: - return PIPE_FORMAT_R8G8_UNORM; - case PIPE_FORMAT_S8_UINT: - return PIPE_FORMAT_R8_UNORM; - default: - return format; - } -} - enum a4xx_depth_format fd4_pipe2depth(enum pipe_format format) { diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.h b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.h index 04837da65..a558e4201 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.h +++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.h @@ -36,7 +36,6 @@ enum a4xx_vtx_fmt fd4_pipe2vtx(enum pipe_format format); enum a4xx_tex_fmt fd4_pipe2tex(enum pipe_format format); enum a4xx_color_fmt fd4_pipe2color(enum pipe_format format); -enum pipe_format fd4_gmem_restore_format(enum pipe_format format); enum a3xx_color_swap fd4_pipe2swap(enum pipe_format format); enum a4xx_tex_fetchsize fd4_pipe2fetchsize(enum pipe_format format); enum a4xx_depth_format fd4_pipe2depth(enum pipe_format format); diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 0e8efc2e5..05b0c4f9a 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -89,37 +89,31 @@ static void emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { const struct ir3_info *si = &so->info; - enum adreno_state_block sb; + enum a4xx_state_block sb = fd4_stage2shadersb(so->type); enum adreno_state_src src; uint32_t i, sz, *bin; - if (so->type == SHADER_VERTEX) { - sb = SB_VERT_SHADER; - } else { - sb = SB_FRAG_SHADER; - } - if (fd_mesa_debug & FD_DBG_DIRECT) { sz = si->sizedwords; - src = SS_DIRECT; + src = SS4_DIRECT; bin = fd_bo_map(so->bo); } else { sz = 0; - src = 2; // enums different on a4xx.. + src = SS4_INDIRECT; bin = NULL; } - OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | - CP_LOAD_STATE_0_STATE_SRC(src) | - CP_LOAD_STATE_0_STATE_BLOCK(sb) | - CP_LOAD_STATE_0_NUM_UNIT(so->instrlen)); + OUT_PKT3(ring, CP_LOAD_STATE4, 2 + sz); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(src) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(so->instrlen)); if (bin) { - OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | - CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER)); + OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER)); } else { OUT_RELOC(ring, so->bo, 0, - CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0); + CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER), 0); } /* for how clever coverity is, it is sometimes rather dull, and @@ -220,7 +214,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, uint32_t face_regid, coord_regid, zwcoord_regid; enum a3xx_threadsize fssz; int constmode; - int i, j, k; + int i, j; debug_assert(nr <= ARRAY_SIZE(color_regid)); @@ -342,45 +336,34 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) | A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(s[FS].v->varying_in)); - for (i = 0, j = -1; (i < 16) && (j < (int)s[FS].v->inputs_count); i++) { + struct ir3_shader_linkage l = {0}; + ir3_link_shaders(&l, s[VS].v, s[FS].v); + + for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A4XX_SP_VS_OUT_REG(i), 1); - j = ir3_next_varying(s[FS].v, j); - if (j < s[FS].v->inputs_count) { - k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot); - reg |= A4XX_SP_VS_OUT_REG_A_REGID(s[VS].v->outputs[k].regid); - reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(s[FS].v->inputs[j].compmask); - } + reg |= A4XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid); + reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask); + j++; - j = ir3_next_varying(s[FS].v, j); - if (j < s[FS].v->inputs_count) { - k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot); - reg |= A4XX_SP_VS_OUT_REG_B_REGID(s[VS].v->outputs[k].regid); - reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(s[FS].v->inputs[j].compmask); - } + reg |= A4XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid); + reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask); + j++; OUT_RING(ring, reg); } - for (i = 0, j = -1; (i < 8) && (j < (int)s[FS].v->inputs_count); i++) { + for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1); - j = ir3_next_varying(s[FS].v, j); - if (j < s[FS].v->inputs_count) - reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(s[FS].v->inputs[j].inloc); - j = ir3_next_varying(s[FS].v, j); - if (j < s[FS].v->inputs_count) - reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(s[FS].v->inputs[j].inloc); - j = ir3_next_varying(s[FS].v, j); - if (j < s[FS].v->inputs_count) - reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(s[FS].v->inputs[j].inloc); - j = ir3_next_varying(s[FS].v, j); - if (j < s[FS].v->inputs_count) - reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(s[FS].v->inputs[j].inloc); + reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8); + reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8); + reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8); + reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8); OUT_RING(ring, reg); } @@ -515,10 +498,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, */ unsigned compmask = s[FS].v->inputs[j].compmask; - /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG - * instead.. rather than -8 everywhere else.. - */ - uint32_t inloc = s[FS].v->inputs[j].inloc - 8; + uint32_t inloc = s[FS].v->inputs[j].inloc; if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) || (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) { diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h new file mode 100644 index 000000000..ae946d81b --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h @@ -0,0 +1,4896 @@ +#ifndef A5XX_XML +#define A5XX_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/freedreno/envytools/ +git clone https://github.com/freedreno/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/ilia/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 141938 bytes, from 2017-07-08 01:02:47) +- /home/ilia/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-11 01:04:14) +- /home/ilia/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-07-04 02:59:47) +- /home/ilia/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-07-04 02:59:47) + +Copyright (C) 2013-2017 by the following authors: +- Rob Clark <robdclark@gmail.com> (robclark) +- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +enum a5xx_color_fmt { + RB5_A8_UNORM = 2, + RB5_R8_UNORM = 3, + RB5_R8_SNORM = 4, + RB5_R8_UINT = 5, + RB5_R8_SINT = 6, + RB5_R4G4B4A4_UNORM = 8, + RB5_R5G5B5A1_UNORM = 10, + RB5_R5G6B5_UNORM = 14, + RB5_R8G8_UNORM = 15, + RB5_R8G8_SNORM = 16, + RB5_R8G8_UINT = 17, + RB5_R8G8_SINT = 18, + RB5_R16_UNORM = 21, + RB5_R16_SNORM = 22, + RB5_R16_FLOAT = 23, + RB5_R16_UINT = 24, + RB5_R16_SINT = 25, + RB5_R8G8B8A8_UNORM = 48, + RB5_R8G8B8_UNORM = 49, + RB5_R8G8B8A8_SNORM = 50, + RB5_R8G8B8A8_UINT = 51, + RB5_R8G8B8A8_SINT = 52, + RB5_R10G10B10A2_UNORM = 55, + RB5_R10G10B10A2_UINT = 58, + RB5_R11G11B10_FLOAT = 66, + RB5_R16G16_UNORM = 67, + RB5_R16G16_SNORM = 68, + RB5_R16G16_FLOAT = 69, + RB5_R16G16_UINT = 70, + RB5_R16G16_SINT = 71, + RB5_R32_FLOAT = 74, + RB5_R32_UINT = 75, + RB5_R32_SINT = 76, + RB5_R16G16B16A16_UNORM = 96, + RB5_R16G16B16A16_SNORM = 97, + RB5_R16G16B16A16_FLOAT = 98, + RB5_R16G16B16A16_UINT = 99, + RB5_R16G16B16A16_SINT = 100, + RB5_R32G32_FLOAT = 103, + RB5_R32G32_UINT = 104, + RB5_R32G32_SINT = 105, + RB5_R32G32B32A32_FLOAT = 130, + RB5_R32G32B32A32_UINT = 131, + RB5_R32G32B32A32_SINT = 132, +}; + +enum a5xx_tile_mode { + TILE5_LINEAR = 0, + TILE5_2 = 2, + TILE5_3 = 3, +}; + +enum a5xx_vtx_fmt { + VFMT5_8_UNORM = 3, + VFMT5_8_SNORM = 4, + VFMT5_8_UINT = 5, + VFMT5_8_SINT = 6, + VFMT5_8_8_UNORM = 15, + VFMT5_8_8_SNORM = 16, + VFMT5_8_8_UINT = 17, + VFMT5_8_8_SINT = 18, + VFMT5_16_UNORM = 21, + VFMT5_16_SNORM = 22, + VFMT5_16_FLOAT = 23, + VFMT5_16_UINT = 24, + VFMT5_16_SINT = 25, + VFMT5_8_8_8_UNORM = 33, + VFMT5_8_8_8_SNORM = 34, + VFMT5_8_8_8_UINT = 35, + VFMT5_8_8_8_SINT = 36, + VFMT5_8_8_8_8_UNORM = 48, + VFMT5_8_8_8_8_SNORM = 50, + VFMT5_8_8_8_8_UINT = 51, + VFMT5_8_8_8_8_SINT = 52, + VFMT5_10_10_10_2_UNORM = 54, + VFMT5_10_10_10_2_SNORM = 57, + VFMT5_10_10_10_2_UINT = 58, + VFMT5_10_10_10_2_SINT = 59, + VFMT5_11_11_10_FLOAT = 66, + VFMT5_16_16_UNORM = 67, + VFMT5_16_16_SNORM = 68, + VFMT5_16_16_FLOAT = 69, + VFMT5_16_16_UINT = 70, + VFMT5_16_16_SINT = 71, + VFMT5_32_UNORM = 72, + VFMT5_32_SNORM = 73, + VFMT5_32_FLOAT = 74, + VFMT5_32_UINT = 75, + VFMT5_32_SINT = 76, + VFMT5_32_FIXED = 77, + VFMT5_16_16_16_UNORM = 88, + VFMT5_16_16_16_SNORM = 89, + VFMT5_16_16_16_FLOAT = 90, + VFMT5_16_16_16_UINT = 91, + VFMT5_16_16_16_SINT = 92, + VFMT5_16_16_16_16_UNORM = 96, + VFMT5_16_16_16_16_SNORM = 97, + VFMT5_16_16_16_16_FLOAT = 98, + VFMT5_16_16_16_16_UINT = 99, + VFMT5_16_16_16_16_SINT = 100, + VFMT5_32_32_UNORM = 101, + VFMT5_32_32_SNORM = 102, + VFMT5_32_32_FLOAT = 103, + VFMT5_32_32_UINT = 104, + VFMT5_32_32_SINT = 105, + VFMT5_32_32_FIXED = 106, + VFMT5_32_32_32_UNORM = 112, + VFMT5_32_32_32_SNORM = 113, + VFMT5_32_32_32_UINT = 114, + VFMT5_32_32_32_SINT = 115, + VFMT5_32_32_32_FLOAT = 116, + VFMT5_32_32_32_FIXED = 117, + VFMT5_32_32_32_32_UNORM = 128, + VFMT5_32_32_32_32_SNORM = 129, + VFMT5_32_32_32_32_FLOAT = 130, + VFMT5_32_32_32_32_UINT = 131, + VFMT5_32_32_32_32_SINT = 132, + VFMT5_32_32_32_32_FIXED = 133, +}; + +enum a5xx_tex_fmt { + TFMT5_A8_UNORM = 2, + TFMT5_8_UNORM = 3, + TFMT5_8_SNORM = 4, + TFMT5_8_UINT = 5, + TFMT5_8_SINT = 6, + TFMT5_4_4_4_4_UNORM = 8, + TFMT5_5_5_5_1_UNORM = 10, + TFMT5_5_6_5_UNORM = 14, + TFMT5_8_8_UNORM = 15, + TFMT5_8_8_SNORM = 16, + TFMT5_8_8_UINT = 17, + TFMT5_8_8_SINT = 18, + TFMT5_L8_A8_UNORM = 19, + TFMT5_16_UNORM = 21, + TFMT5_16_SNORM = 22, + TFMT5_16_FLOAT = 23, + TFMT5_16_UINT = 24, + TFMT5_16_SINT = 25, + TFMT5_8_8_8_8_UNORM = 48, + TFMT5_8_8_8_UNORM = 49, + TFMT5_8_8_8_8_SNORM = 50, + TFMT5_8_8_8_8_UINT = 51, + TFMT5_8_8_8_8_SINT = 52, + TFMT5_9_9_9_E5_FLOAT = 53, + TFMT5_10_10_10_2_UNORM = 54, + TFMT5_10_10_10_2_UINT = 58, + TFMT5_11_11_10_FLOAT = 66, + TFMT5_16_16_UNORM = 67, + TFMT5_16_16_SNORM = 68, + TFMT5_16_16_FLOAT = 69, + TFMT5_16_16_UINT = 70, + TFMT5_16_16_SINT = 71, + TFMT5_32_FLOAT = 74, + TFMT5_32_UINT = 75, + TFMT5_32_SINT = 76, + TFMT5_16_16_16_16_UNORM = 96, + TFMT5_16_16_16_16_SNORM = 97, + TFMT5_16_16_16_16_FLOAT = 98, + TFMT5_16_16_16_16_UINT = 99, + TFMT5_16_16_16_16_SINT = 100, + TFMT5_32_32_FLOAT = 103, + TFMT5_32_32_UINT = 104, + TFMT5_32_32_SINT = 105, + TFMT5_32_32_32_UINT = 114, + TFMT5_32_32_32_SINT = 115, + TFMT5_32_32_32_FLOAT = 116, + TFMT5_32_32_32_32_FLOAT = 130, + TFMT5_32_32_32_32_UINT = 131, + TFMT5_32_32_32_32_SINT = 132, + TFMT5_X8Z24_UNORM = 160, + TFMT5_ETC2_RG11_UNORM = 171, + TFMT5_ETC2_RG11_SNORM = 172, + TFMT5_ETC2_R11_UNORM = 173, + TFMT5_ETC2_R11_SNORM = 174, + TFMT5_ETC1 = 175, + TFMT5_ETC2_RGB8 = 176, + TFMT5_ETC2_RGBA8 = 177, + TFMT5_ETC2_RGB8A1 = 178, + TFMT5_DXT1 = 179, + TFMT5_DXT3 = 180, + TFMT5_DXT5 = 181, + TFMT5_RGTC1_UNORM = 183, + TFMT5_RGTC1_SNORM = 184, + TFMT5_RGTC2_UNORM = 187, + TFMT5_RGTC2_SNORM = 188, + TFMT5_BPTC_UFLOAT = 190, + TFMT5_BPTC_FLOAT = 191, + TFMT5_BPTC = 192, + TFMT5_ASTC_4x4 = 193, + TFMT5_ASTC_5x4 = 194, + TFMT5_ASTC_5x5 = 195, + TFMT5_ASTC_6x5 = 196, + TFMT5_ASTC_6x6 = 197, + TFMT5_ASTC_8x5 = 198, + TFMT5_ASTC_8x6 = 199, + TFMT5_ASTC_8x8 = 200, + TFMT5_ASTC_10x5 = 201, + TFMT5_ASTC_10x6 = 202, + TFMT5_ASTC_10x8 = 203, + TFMT5_ASTC_10x10 = 204, + TFMT5_ASTC_12x10 = 205, + TFMT5_ASTC_12x12 = 206, +}; + +enum a5xx_tex_fetchsize { + TFETCH5_1_BYTE = 0, + TFETCH5_2_BYTE = 1, + TFETCH5_4_BYTE = 2, + TFETCH5_8_BYTE = 3, + TFETCH5_16_BYTE = 4, +}; + +enum a5xx_depth_format { + DEPTH5_NONE = 0, + DEPTH5_16 = 1, + DEPTH5_24_8 = 2, + DEPTH5_32 = 4, +}; + +enum a5xx_blit_buf { + BLIT_MRT0 = 0, + BLIT_MRT1 = 1, + BLIT_MRT2 = 2, + BLIT_MRT3 = 3, + BLIT_MRT4 = 4, + BLIT_MRT5 = 5, + BLIT_MRT6 = 6, + BLIT_MRT7 = 7, + BLIT_ZS = 8, + BLIT_Z32 = 9, +}; + +enum a5xx_cp_perfcounter_select { + PERF_CP_ALWAYS_COUNT = 0, + PERF_CP_BUSY_GFX_CORE_IDLE = 1, + PERF_CP_BUSY_CYCLES = 2, + PERF_CP_PFP_IDLE = 3, + PERF_CP_PFP_BUSY_WORKING = 4, + PERF_CP_PFP_STALL_CYCLES_ANY = 5, + PERF_CP_PFP_STARVE_CYCLES_ANY = 6, + PERF_CP_PFP_ICACHE_MISS = 7, + PERF_CP_PFP_ICACHE_HIT = 8, + PERF_CP_PFP_MATCH_PM4_PKT_PROFILE = 9, + PERF_CP_ME_BUSY_WORKING = 10, + PERF_CP_ME_IDLE = 11, + PERF_CP_ME_STARVE_CYCLES_ANY = 12, + PERF_CP_ME_FIFO_EMPTY_PFP_IDLE = 13, + PERF_CP_ME_FIFO_EMPTY_PFP_BUSY = 14, + PERF_CP_ME_FIFO_FULL_ME_BUSY = 15, + PERF_CP_ME_FIFO_FULL_ME_NON_WORKING = 16, + PERF_CP_ME_STALL_CYCLES_ANY = 17, + PERF_CP_ME_ICACHE_MISS = 18, + PERF_CP_ME_ICACHE_HIT = 19, + PERF_CP_NUM_PREEMPTIONS = 20, + PERF_CP_PREEMPTION_REACTION_DELAY = 21, + PERF_CP_PREEMPTION_SWITCH_OUT_TIME = 22, + PERF_CP_PREEMPTION_SWITCH_IN_TIME = 23, + PERF_CP_DEAD_DRAWS_IN_BIN_RENDER = 24, + PERF_CP_PREDICATED_DRAWS_KILLED = 25, + PERF_CP_MODE_SWITCH = 26, + PERF_CP_ZPASS_DONE = 27, + PERF_CP_CONTEXT_DONE = 28, + PERF_CP_CACHE_FLUSH = 29, + PERF_CP_LONG_PREEMPTIONS = 30, +}; + +enum a5xx_rbbm_perfcounter_select { + PERF_RBBM_ALWAYS_COUNT = 0, + PERF_RBBM_ALWAYS_ON = 1, + PERF_RBBM_TSE_BUSY = 2, + PERF_RBBM_RAS_BUSY = 3, + PERF_RBBM_PC_DCALL_BUSY = 4, + PERF_RBBM_PC_VSD_BUSY = 5, + PERF_RBBM_STATUS_MASKED = 6, + PERF_RBBM_COM_BUSY = 7, + PERF_RBBM_DCOM_BUSY = 8, + PERF_RBBM_VBIF_BUSY = 9, + PERF_RBBM_VSC_BUSY = 10, + PERF_RBBM_TESS_BUSY = 11, + PERF_RBBM_UCHE_BUSY = 12, + PERF_RBBM_HLSQ_BUSY = 13, +}; + +enum a5xx_pc_perfcounter_select { + PERF_PC_BUSY_CYCLES = 0, + PERF_PC_WORKING_CYCLES = 1, + PERF_PC_STALL_CYCLES_VFD = 2, + PERF_PC_STALL_CYCLES_TSE = 3, + PERF_PC_STALL_CYCLES_VPC = 4, + PERF_PC_STALL_CYCLES_UCHE = 5, + PERF_PC_STALL_CYCLES_TESS = 6, + PERF_PC_STALL_CYCLES_TSE_ONLY = 7, + PERF_PC_STALL_CYCLES_VPC_ONLY = 8, + PERF_PC_PASS1_TF_STALL_CYCLES = 9, + PERF_PC_STARVE_CYCLES_FOR_INDEX = 10, + PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR = 11, + PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM = 12, + PERF_PC_STARVE_CYCLES_FOR_POSITION = 13, + PERF_PC_STARVE_CYCLES_DI = 14, + PERF_PC_VIS_STREAMS_LOADED = 15, + PERF_PC_INSTANCES = 16, + PERF_PC_VPC_PRIMITIVES = 17, + PERF_PC_DEAD_PRIM = 18, + PERF_PC_LIVE_PRIM = 19, + PERF_PC_VERTEX_HITS = 20, + PERF_PC_IA_VERTICES = 21, + PERF_PC_IA_PRIMITIVES = 22, + PERF_PC_GS_PRIMITIVES = 23, + PERF_PC_HS_INVOCATIONS = 24, + PERF_PC_DS_INVOCATIONS = 25, + PERF_PC_VS_INVOCATIONS = 26, + PERF_PC_GS_INVOCATIONS = 27, + PERF_PC_DS_PRIMITIVES = 28, + PERF_PC_VPC_POS_DATA_TRANSACTION = 29, + PERF_PC_3D_DRAWCALLS = 30, + PERF_PC_2D_DRAWCALLS = 31, + PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS = 32, + PERF_TESS_BUSY_CYCLES = 33, + PERF_TESS_WORKING_CYCLES = 34, + PERF_TESS_STALL_CYCLES_PC = 35, + PERF_TESS_STARVE_CYCLES_PC = 36, +}; + +enum a5xx_vfd_perfcounter_select { + PERF_VFD_BUSY_CYCLES = 0, + PERF_VFD_STALL_CYCLES_UCHE = 1, + PERF_VFD_STALL_CYCLES_VPC_ALLOC = 2, + PERF_VFD_STALL_CYCLES_MISS_VB = 3, + PERF_VFD_STALL_CYCLES_MISS_Q = 4, + PERF_VFD_STALL_CYCLES_SP_INFO = 5, + PERF_VFD_STALL_CYCLES_SP_ATTR = 6, + PERF_VFD_STALL_CYCLES_VFDP_VB = 7, + PERF_VFD_STALL_CYCLES_VFDP_Q = 8, + PERF_VFD_DECODER_PACKER_STALL = 9, + PERF_VFD_STARVE_CYCLES_UCHE = 10, + PERF_VFD_RBUFFER_FULL = 11, + PERF_VFD_ATTR_INFO_FIFO_FULL = 12, + PERF_VFD_DECODED_ATTRIBUTE_BYTES = 13, + PERF_VFD_NUM_ATTRIBUTES = 14, + PERF_VFD_INSTRUCTIONS = 15, + PERF_VFD_UPPER_SHADER_FIBERS = 16, + PERF_VFD_LOWER_SHADER_FIBERS = 17, + PERF_VFD_MODE_0_FIBERS = 18, + PERF_VFD_MODE_1_FIBERS = 19, + PERF_VFD_MODE_2_FIBERS = 20, + PERF_VFD_MODE_3_FIBERS = 21, + PERF_VFD_MODE_4_FIBERS = 22, + PERF_VFD_TOTAL_VERTICES = 23, + PERF_VFD_NUM_ATTR_MISS = 24, + PERF_VFD_1_BURST_REQ = 25, + PERF_VFDP_STALL_CYCLES_VFD = 26, + PERF_VFDP_STALL_CYCLES_VFD_INDEX = 27, + PERF_VFDP_STALL_CYCLES_VFD_PROG = 28, + PERF_VFDP_STARVE_CYCLES_PC = 29, + PERF_VFDP_VS_STAGE_32_WAVES = 30, +}; + +enum a5xx_hlsq_perfcounter_select { + PERF_HLSQ_BUSY_CYCLES = 0, + PERF_HLSQ_STALL_CYCLES_UCHE = 1, + PERF_HLSQ_STALL_CYCLES_SP_STATE = 2, + PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE = 3, + PERF_HLSQ_UCHE_LATENCY_CYCLES = 4, + PERF_HLSQ_UCHE_LATENCY_COUNT = 5, + PERF_HLSQ_FS_STAGE_32_WAVES = 6, + PERF_HLSQ_FS_STAGE_64_WAVES = 7, + PERF_HLSQ_QUADS = 8, + PERF_HLSQ_SP_STATE_COPY_TRANS_FS_STAGE = 9, + PERF_HLSQ_SP_STATE_COPY_TRANS_VS_STAGE = 10, + PERF_HLSQ_TP_STATE_COPY_TRANS_FS_STAGE = 11, + PERF_HLSQ_TP_STATE_COPY_TRANS_VS_STAGE = 12, + PERF_HLSQ_CS_INVOCATIONS = 13, + PERF_HLSQ_COMPUTE_DRAWCALLS = 14, +}; + +enum a5xx_vpc_perfcounter_select { + PERF_VPC_BUSY_CYCLES = 0, + PERF_VPC_WORKING_CYCLES = 1, + PERF_VPC_STALL_CYCLES_UCHE = 2, + PERF_VPC_STALL_CYCLES_VFD_WACK = 3, + PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC = 4, + PERF_VPC_STALL_CYCLES_PC = 5, + PERF_VPC_STALL_CYCLES_SP_LM = 6, + PERF_VPC_POS_EXPORT_STALL_CYCLES = 7, + PERF_VPC_STARVE_CYCLES_SP = 8, + PERF_VPC_STARVE_CYCLES_LRZ = 9, + PERF_VPC_PC_PRIMITIVES = 10, + PERF_VPC_SP_COMPONENTS = 11, + PERF_VPC_SP_LM_PRIMITIVES = 12, + PERF_VPC_SP_LM_COMPONENTS = 13, + PERF_VPC_SP_LM_DWORDS = 14, + PERF_VPC_STREAMOUT_COMPONENTS = 15, + PERF_VPC_GRANT_PHASES = 16, +}; + +enum a5xx_tse_perfcounter_select { + PERF_TSE_BUSY_CYCLES = 0, + PERF_TSE_CLIPPING_CYCLES = 1, + PERF_TSE_STALL_CYCLES_RAS = 2, + PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE = 3, + PERF_TSE_STALL_CYCLES_LRZ_ZPLANE = 4, + PERF_TSE_STARVE_CYCLES_PC = 5, + PERF_TSE_INPUT_PRIM = 6, + PERF_TSE_INPUT_NULL_PRIM = 7, + PERF_TSE_TRIVAL_REJ_PRIM = 8, + PERF_TSE_CLIPPED_PRIM = 9, + PERF_TSE_ZERO_AREA_PRIM = 10, + PERF_TSE_FACENESS_CULLED_PRIM = 11, + PERF_TSE_ZERO_PIXEL_PRIM = 12, + PERF_TSE_OUTPUT_NULL_PRIM = 13, + PERF_TSE_OUTPUT_VISIBLE_PRIM = 14, + PERF_TSE_CINVOCATION = 15, + PERF_TSE_CPRIMITIVES = 16, + PERF_TSE_2D_INPUT_PRIM = 17, + PERF_TSE_2D_ALIVE_CLCLES = 18, +}; + +enum a5xx_ras_perfcounter_select { + PERF_RAS_BUSY_CYCLES = 0, + PERF_RAS_SUPERTILE_ACTIVE_CYCLES = 1, + PERF_RAS_STALL_CYCLES_LRZ = 2, + PERF_RAS_STARVE_CYCLES_TSE = 3, + PERF_RAS_SUPER_TILES = 4, + PERF_RAS_8X4_TILES = 5, + PERF_RAS_MASKGEN_ACTIVE = 6, + PERF_RAS_FULLY_COVERED_SUPER_TILES = 7, + PERF_RAS_FULLY_COVERED_8X4_TILES = 8, + PERF_RAS_PRIM_KILLED_INVISILBE = 9, +}; + +enum a5xx_lrz_perfcounter_select { + PERF_LRZ_BUSY_CYCLES = 0, + PERF_LRZ_STARVE_CYCLES_RAS = 1, + PERF_LRZ_STALL_CYCLES_RB = 2, + PERF_LRZ_STALL_CYCLES_VSC = 3, + PERF_LRZ_STALL_CYCLES_VPC = 4, + PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH = 5, + PERF_LRZ_STALL_CYCLES_UCHE = 6, + PERF_LRZ_LRZ_READ = 7, + PERF_LRZ_LRZ_WRITE = 8, + PERF_LRZ_READ_LATENCY = 9, + PERF_LRZ_MERGE_CACHE_UPDATING = 10, + PERF_LRZ_PRIM_KILLED_BY_MASKGEN = 11, + PERF_LRZ_PRIM_KILLED_BY_LRZ = 12, + PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ = 13, + PERF_LRZ_FULL_8X8_TILES = 14, + PERF_LRZ_PARTIAL_8X8_TILES = 15, + PERF_LRZ_TILE_KILLED = 16, + PERF_LRZ_TOTAL_PIXEL = 17, + PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ = 18, +}; + +enum a5xx_uche_perfcounter_select { + PERF_UCHE_BUSY_CYCLES = 0, + PERF_UCHE_STALL_CYCLES_VBIF = 1, + PERF_UCHE_VBIF_LATENCY_CYCLES = 2, + PERF_UCHE_VBIF_LATENCY_SAMPLES = 3, + PERF_UCHE_VBIF_READ_BEATS_TP = 4, + PERF_UCHE_VBIF_READ_BEATS_VFD = 5, + PERF_UCHE_VBIF_READ_BEATS_HLSQ = 6, + PERF_UCHE_VBIF_READ_BEATS_LRZ = 7, + PERF_UCHE_VBIF_READ_BEATS_SP = 8, + PERF_UCHE_READ_REQUESTS_TP = 9, + PERF_UCHE_READ_REQUESTS_VFD = 10, + PERF_UCHE_READ_REQUESTS_HLSQ = 11, + PERF_UCHE_READ_REQUESTS_LRZ = 12, + PERF_UCHE_READ_REQUESTS_SP = 13, + PERF_UCHE_WRITE_REQUESTS_LRZ = 14, + PERF_UCHE_WRITE_REQUESTS_SP = 15, + PERF_UCHE_WRITE_REQUESTS_VPC = 16, + PERF_UCHE_WRITE_REQUESTS_VSC = 17, + PERF_UCHE_EVICTS = 18, + PERF_UCHE_BANK_REQ0 = 19, + PERF_UCHE_BANK_REQ1 = 20, + PERF_UCHE_BANK_REQ2 = 21, + PERF_UCHE_BANK_REQ3 = 22, + PERF_UCHE_BANK_REQ4 = 23, + PERF_UCHE_BANK_REQ5 = 24, + PERF_UCHE_BANK_REQ6 = 25, + PERF_UCHE_BANK_REQ7 = 26, + PERF_UCHE_VBIF_READ_BEATS_CH0 = 27, + PERF_UCHE_VBIF_READ_BEATS_CH1 = 28, + PERF_UCHE_GMEM_READ_BEATS = 29, + PERF_UCHE_FLAG_COUNT = 30, +}; + +enum a5xx_tp_perfcounter_select { + PERF_TP_BUSY_CYCLES = 0, + PERF_TP_STALL_CYCLES_UCHE = 1, + PERF_TP_LATENCY_CYCLES = 2, + PERF_TP_LATENCY_TRANS = 3, + PERF_TP_FLAG_CACHE_REQUEST_SAMPLES = 4, + PERF_TP_FLAG_CACHE_REQUEST_LATENCY = 5, + PERF_TP_L1_CACHELINE_REQUESTS = 6, + PERF_TP_L1_CACHELINE_MISSES = 7, + PERF_TP_SP_TP_TRANS = 8, + PERF_TP_TP_SP_TRANS = 9, + PERF_TP_OUTPUT_PIXELS = 10, + PERF_TP_FILTER_WORKLOAD_16BIT = 11, + PERF_TP_FILTER_WORKLOAD_32BIT = 12, + PERF_TP_QUADS_RECEIVED = 13, + PERF_TP_QUADS_OFFSET = 14, + PERF_TP_QUADS_SHADOW = 15, + PERF_TP_QUADS_ARRAY = 16, + PERF_TP_QUADS_GRADIENT = 17, + PERF_TP_QUADS_1D = 18, + PERF_TP_QUADS_2D = 19, + PERF_TP_QUADS_BUFFER = 20, + PERF_TP_QUADS_3D = 21, + PERF_TP_QUADS_CUBE = 22, + PERF_TP_STATE_CACHE_REQUESTS = 23, + PERF_TP_STATE_CACHE_MISSES = 24, + PERF_TP_DIVERGENT_QUADS_RECEIVED = 25, + PERF_TP_BINDLESS_STATE_CACHE_REQUESTS = 26, + PERF_TP_BINDLESS_STATE_CACHE_MISSES = 27, + PERF_TP_PRT_NON_RESIDENT_EVENTS = 28, + PERF_TP_OUTPUT_PIXELS_POINT = 29, + PERF_TP_OUTPUT_PIXELS_BILINEAR = 30, + PERF_TP_OUTPUT_PIXELS_MIP = 31, + PERF_TP_OUTPUT_PIXELS_ANISO = 32, + PERF_TP_OUTPUT_PIXELS_ZERO_LOD = 33, + PERF_TP_FLAG_CACHE_REQUESTS = 34, + PERF_TP_FLAG_CACHE_MISSES = 35, + PERF_TP_L1_5_L2_REQUESTS = 36, + PERF_TP_2D_OUTPUT_PIXELS = 37, + PERF_TP_2D_OUTPUT_PIXELS_POINT = 38, + PERF_TP_2D_OUTPUT_PIXELS_BILINEAR = 39, + PERF_TP_2D_FILTER_WORKLOAD_16BIT = 40, + PERF_TP_2D_FILTER_WORKLOAD_32BIT = 41, +}; + +enum a5xx_sp_perfcounter_select { + PERF_SP_BUSY_CYCLES = 0, + PERF_SP_ALU_WORKING_CYCLES = 1, + PERF_SP_EFU_WORKING_CYCLES = 2, + PERF_SP_STALL_CYCLES_VPC = 3, + PERF_SP_STALL_CYCLES_TP = 4, + PERF_SP_STALL_CYCLES_UCHE = 5, + PERF_SP_STALL_CYCLES_RB = 6, + PERF_SP_SCHEDULER_NON_WORKING = 7, + PERF_SP_WAVE_CONTEXTS = 8, + PERF_SP_WAVE_CONTEXT_CYCLES = 9, + PERF_SP_FS_STAGE_WAVE_CYCLES = 10, + PERF_SP_FS_STAGE_WAVE_SAMPLES = 11, + PERF_SP_VS_STAGE_WAVE_CYCLES = 12, + PERF_SP_VS_STAGE_WAVE_SAMPLES = 13, + PERF_SP_FS_STAGE_DURATION_CYCLES = 14, + PERF_SP_VS_STAGE_DURATION_CYCLES = 15, + PERF_SP_WAVE_CTRL_CYCLES = 16, + PERF_SP_WAVE_LOAD_CYCLES = 17, + PERF_SP_WAVE_EMIT_CYCLES = 18, + PERF_SP_WAVE_NOP_CYCLES = 19, + PERF_SP_WAVE_WAIT_CYCLES = 20, + PERF_SP_WAVE_FETCH_CYCLES = 21, + PERF_SP_WAVE_IDLE_CYCLES = 22, + PERF_SP_WAVE_END_CYCLES = 23, + PERF_SP_WAVE_LONG_SYNC_CYCLES = 24, + PERF_SP_WAVE_SHORT_SYNC_CYCLES = 25, + PERF_SP_WAVE_JOIN_CYCLES = 26, + PERF_SP_LM_LOAD_INSTRUCTIONS = 27, + PERF_SP_LM_STORE_INSTRUCTIONS = 28, + PERF_SP_LM_ATOMICS = 29, + PERF_SP_GM_LOAD_INSTRUCTIONS = 30, + PERF_SP_GM_STORE_INSTRUCTIONS = 31, + PERF_SP_GM_ATOMICS = 32, + PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = 33, + PERF_SP_VS_STAGE_CFLOW_INSTRUCTIONS = 34, + PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = 35, + PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 36, + PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 37, + PERF_SP_FS_STAGE_TEX_INSTRUCTIONS = 38, + PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS = 39, + PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = 40, + PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 41, + PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 42, + PERF_SP_FS_STAGE_BARY_INSTRUCTIONS = 43, + PERF_SP_VS_INSTRUCTIONS = 44, + PERF_SP_FS_INSTRUCTIONS = 45, + PERF_SP_ADDR_LOCK_COUNT = 46, + PERF_SP_UCHE_READ_TRANS = 47, + PERF_SP_UCHE_WRITE_TRANS = 48, + PERF_SP_EXPORT_VPC_TRANS = 49, + PERF_SP_EXPORT_RB_TRANS = 50, + PERF_SP_PIXELS_KILLED = 51, + PERF_SP_ICL1_REQUESTS = 52, + PERF_SP_ICL1_MISSES = 53, + PERF_SP_ICL0_REQUESTS = 54, + PERF_SP_ICL0_MISSES = 55, + PERF_SP_HS_INSTRUCTIONS = 56, + PERF_SP_DS_INSTRUCTIONS = 57, + PERF_SP_GS_INSTRUCTIONS = 58, + PERF_SP_CS_INSTRUCTIONS = 59, + PERF_SP_GPR_READ = 60, + PERF_SP_GPR_WRITE = 61, + PERF_SP_LM_CH0_REQUESTS = 62, + PERF_SP_LM_CH1_REQUESTS = 63, + PERF_SP_LM_BANK_CONFLICTS = 64, +}; + +enum a5xx_rb_perfcounter_select { + PERF_RB_BUSY_CYCLES = 0, + PERF_RB_STALL_CYCLES_CCU = 1, + PERF_RB_STALL_CYCLES_HLSQ = 2, + PERF_RB_STALL_CYCLES_FIFO0_FULL = 3, + PERF_RB_STALL_CYCLES_FIFO1_FULL = 4, + PERF_RB_STALL_CYCLES_FIFO2_FULL = 5, + PERF_RB_STARVE_CYCLES_SP = 6, + PERF_RB_STARVE_CYCLES_LRZ_TILE = 7, + PERF_RB_STARVE_CYCLES_CCU = 8, + PERF_RB_STARVE_CYCLES_Z_PLANE = 9, + PERF_RB_STARVE_CYCLES_BARY_PLANE = 10, + PERF_RB_Z_WORKLOAD = 11, + PERF_RB_HLSQ_ACTIVE = 12, + PERF_RB_Z_READ = 13, + PERF_RB_Z_WRITE = 14, + PERF_RB_C_READ = 15, + PERF_RB_C_WRITE = 16, + PERF_RB_TOTAL_PASS = 17, + PERF_RB_Z_PASS = 18, + PERF_RB_Z_FAIL = 19, + PERF_RB_S_FAIL = 20, + PERF_RB_BLENDED_FXP_COMPONENTS = 21, + PERF_RB_BLENDED_FP16_COMPONENTS = 22, + RB_RESERVED = 23, + PERF_RB_2D_ALIVE_CYCLES = 24, + PERF_RB_2D_STALL_CYCLES_A2D = 25, + PERF_RB_2D_STARVE_CYCLES_SRC = 26, + PERF_RB_2D_STARVE_CYCLES_SP = 27, + PERF_RB_2D_STARVE_CYCLES_DST = 28, + PERF_RB_2D_VALID_PIXELS = 29, +}; + +enum a5xx_rb_samples_perfcounter_select { + TOTAL_SAMPLES = 0, + ZPASS_SAMPLES = 1, + ZFAIL_SAMPLES = 2, + SFAIL_SAMPLES = 3, +}; + +enum a5xx_vsc_perfcounter_select { + PERF_VSC_BUSY_CYCLES = 0, + PERF_VSC_WORKING_CYCLES = 1, + PERF_VSC_STALL_CYCLES_UCHE = 2, + PERF_VSC_EOT_NUM = 3, +}; + +enum a5xx_ccu_perfcounter_select { + PERF_CCU_BUSY_CYCLES = 0, + PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN = 1, + PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN = 2, + PERF_CCU_STARVE_CYCLES_FLAG_RETURN = 3, + PERF_CCU_DEPTH_BLOCKS = 4, + PERF_CCU_COLOR_BLOCKS = 5, + PERF_CCU_DEPTH_BLOCK_HIT = 6, + PERF_CCU_COLOR_BLOCK_HIT = 7, + PERF_CCU_PARTIAL_BLOCK_READ = 8, + PERF_CCU_GMEM_READ = 9, + PERF_CCU_GMEM_WRITE = 10, + PERF_CCU_DEPTH_READ_FLAG0_COUNT = 11, + PERF_CCU_DEPTH_READ_FLAG1_COUNT = 12, + PERF_CCU_DEPTH_READ_FLAG2_COUNT = 13, + PERF_CCU_DEPTH_READ_FLAG3_COUNT = 14, + PERF_CCU_DEPTH_READ_FLAG4_COUNT = 15, + PERF_CCU_COLOR_READ_FLAG0_COUNT = 16, + PERF_CCU_COLOR_READ_FLAG1_COUNT = 17, + PERF_CCU_COLOR_READ_FLAG2_COUNT = 18, + PERF_CCU_COLOR_READ_FLAG3_COUNT = 19, + PERF_CCU_COLOR_READ_FLAG4_COUNT = 20, + PERF_CCU_2D_BUSY_CYCLES = 21, + PERF_CCU_2D_RD_REQ = 22, + PERF_CCU_2D_WR_REQ = 23, + PERF_CCU_2D_REORDER_STARVE_CYCLES = 24, + PERF_CCU_2D_PIXELS = 25, +}; + +enum a5xx_cmp_perfcounter_select { + PERF_CMPDECMP_STALL_CYCLES_VBIF = 0, + PERF_CMPDECMP_VBIF_LATENCY_CYCLES = 1, + PERF_CMPDECMP_VBIF_LATENCY_SAMPLES = 2, + PERF_CMPDECMP_VBIF_READ_DATA_CCU = 3, + PERF_CMPDECMP_VBIF_WRITE_DATA_CCU = 4, + PERF_CMPDECMP_VBIF_READ_REQUEST = 5, + PERF_CMPDECMP_VBIF_WRITE_REQUEST = 6, + PERF_CMPDECMP_VBIF_READ_DATA = 7, + PERF_CMPDECMP_VBIF_WRITE_DATA = 8, + PERF_CMPDECMP_FLAG_FETCH_CYCLES = 9, + PERF_CMPDECMP_FLAG_FETCH_SAMPLES = 10, + PERF_CMPDECMP_DEPTH_WRITE_FLAG1_COUNT = 11, + PERF_CMPDECMP_DEPTH_WRITE_FLAG2_COUNT = 12, + PERF_CMPDECMP_DEPTH_WRITE_FLAG3_COUNT = 13, + PERF_CMPDECMP_DEPTH_WRITE_FLAG4_COUNT = 14, + PERF_CMPDECMP_COLOR_WRITE_FLAG1_COUNT = 15, + PERF_CMPDECMP_COLOR_WRITE_FLAG2_COUNT = 16, + PERF_CMPDECMP_COLOR_WRITE_FLAG3_COUNT = 17, + PERF_CMPDECMP_COLOR_WRITE_FLAG4_COUNT = 18, + PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_REQ = 19, + PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_WR = 20, + PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_RETURN = 21, + PERF_CMPDECMP_2D_RD_DATA = 22, + PERF_CMPDECMP_2D_WR_DATA = 23, +}; + +enum a5xx_vbif_perfcounter_select { + AXI_READ_REQUESTS_ID_0 = 0, + AXI_READ_REQUESTS_ID_1 = 1, + AXI_READ_REQUESTS_ID_2 = 2, + AXI_READ_REQUESTS_ID_3 = 3, + AXI_READ_REQUESTS_ID_4 = 4, + AXI_READ_REQUESTS_ID_5 = 5, + AXI_READ_REQUESTS_ID_6 = 6, + AXI_READ_REQUESTS_ID_7 = 7, + AXI_READ_REQUESTS_ID_8 = 8, + AXI_READ_REQUESTS_ID_9 = 9, + AXI_READ_REQUESTS_ID_10 = 10, + AXI_READ_REQUESTS_ID_11 = 11, + AXI_READ_REQUESTS_ID_12 = 12, + AXI_READ_REQUESTS_ID_13 = 13, + AXI_READ_REQUESTS_ID_14 = 14, + AXI_READ_REQUESTS_ID_15 = 15, + AXI0_READ_REQUESTS_TOTAL = 16, + AXI1_READ_REQUESTS_TOTAL = 17, + AXI2_READ_REQUESTS_TOTAL = 18, + AXI3_READ_REQUESTS_TOTAL = 19, + AXI_READ_REQUESTS_TOTAL = 20, + AXI_WRITE_REQUESTS_ID_0 = 21, + AXI_WRITE_REQUESTS_ID_1 = 22, + AXI_WRITE_REQUESTS_ID_2 = 23, + AXI_WRITE_REQUESTS_ID_3 = 24, + AXI_WRITE_REQUESTS_ID_4 = 25, + AXI_WRITE_REQUESTS_ID_5 = 26, + AXI_WRITE_REQUESTS_ID_6 = 27, + AXI_WRITE_REQUESTS_ID_7 = 28, + AXI_WRITE_REQUESTS_ID_8 = 29, + AXI_WRITE_REQUESTS_ID_9 = 30, + AXI_WRITE_REQUESTS_ID_10 = 31, + AXI_WRITE_REQUESTS_ID_11 = 32, + AXI_WRITE_REQUESTS_ID_12 = 33, + AXI_WRITE_REQUESTS_ID_13 = 34, + AXI_WRITE_REQUESTS_ID_14 = 35, + AXI_WRITE_REQUESTS_ID_15 = 36, + AXI0_WRITE_REQUESTS_TOTAL = 37, + AXI1_WRITE_REQUESTS_TOTAL = 38, + AXI2_WRITE_REQUESTS_TOTAL = 39, + AXI3_WRITE_REQUESTS_TOTAL = 40, + AXI_WRITE_REQUESTS_TOTAL = 41, + AXI_TOTAL_REQUESTS = 42, + AXI_READ_DATA_BEATS_ID_0 = 43, + AXI_READ_DATA_BEATS_ID_1 = 44, + AXI_READ_DATA_BEATS_ID_2 = 45, + AXI_READ_DATA_BEATS_ID_3 = 46, + AXI_READ_DATA_BEATS_ID_4 = 47, + AXI_READ_DATA_BEATS_ID_5 = 48, + AXI_READ_DATA_BEATS_ID_6 = 49, + AXI_READ_DATA_BEATS_ID_7 = 50, + AXI_READ_DATA_BEATS_ID_8 = 51, + AXI_READ_DATA_BEATS_ID_9 = 52, + AXI_READ_DATA_BEATS_ID_10 = 53, + AXI_READ_DATA_BEATS_ID_11 = 54, + AXI_READ_DATA_BEATS_ID_12 = 55, + AXI_READ_DATA_BEATS_ID_13 = 56, + AXI_READ_DATA_BEATS_ID_14 = 57, + AXI_READ_DATA_BEATS_ID_15 = 58, + AXI0_READ_DATA_BEATS_TOTAL = 59, + AXI1_READ_DATA_BEATS_TOTAL = 60, + AXI2_READ_DATA_BEATS_TOTAL = 61, + AXI3_READ_DATA_BEATS_TOTAL = 62, + AXI_READ_DATA_BEATS_TOTAL = 63, + AXI_WRITE_DATA_BEATS_ID_0 = 64, + AXI_WRITE_DATA_BEATS_ID_1 = 65, + AXI_WRITE_DATA_BEATS_ID_2 = 66, + AXI_WRITE_DATA_BEATS_ID_3 = 67, + AXI_WRITE_DATA_BEATS_ID_4 = 68, + AXI_WRITE_DATA_BEATS_ID_5 = 69, + AXI_WRITE_DATA_BEATS_ID_6 = 70, + AXI_WRITE_DATA_BEATS_ID_7 = 71, + AXI_WRITE_DATA_BEATS_ID_8 = 72, + AXI_WRITE_DATA_BEATS_ID_9 = 73, + AXI_WRITE_DATA_BEATS_ID_10 = 74, + AXI_WRITE_DATA_BEATS_ID_11 = 75, + AXI_WRITE_DATA_BEATS_ID_12 = 76, + AXI_WRITE_DATA_BEATS_ID_13 = 77, + AXI_WRITE_DATA_BEATS_ID_14 = 78, + AXI_WRITE_DATA_BEATS_ID_15 = 79, + AXI0_WRITE_DATA_BEATS_TOTAL = 80, + AXI1_WRITE_DATA_BEATS_TOTAL = 81, + AXI2_WRITE_DATA_BEATS_TOTAL = 82, + AXI3_WRITE_DATA_BEATS_TOTAL = 83, + AXI_WRITE_DATA_BEATS_TOTAL = 84, + AXI_DATA_BEATS_TOTAL = 85, +}; + +enum a5xx_tex_filter { + A5XX_TEX_NEAREST = 0, + A5XX_TEX_LINEAR = 1, + A5XX_TEX_ANISO = 2, +}; + +enum a5xx_tex_clamp { + A5XX_TEX_REPEAT = 0, + A5XX_TEX_CLAMP_TO_EDGE = 1, + A5XX_TEX_MIRROR_REPEAT = 2, + A5XX_TEX_CLAMP_TO_BORDER = 3, + A5XX_TEX_MIRROR_CLAMP = 4, +}; + +enum a5xx_tex_aniso { + A5XX_TEX_ANISO_1 = 0, + A5XX_TEX_ANISO_2 = 1, + A5XX_TEX_ANISO_4 = 2, + A5XX_TEX_ANISO_8 = 3, + A5XX_TEX_ANISO_16 = 4, +}; + +enum a5xx_tex_swiz { + A5XX_TEX_X = 0, + A5XX_TEX_Y = 1, + A5XX_TEX_Z = 2, + A5XX_TEX_W = 3, + A5XX_TEX_ZERO = 4, + A5XX_TEX_ONE = 5, +}; + +enum a5xx_tex_type { + A5XX_TEX_1D = 0, + A5XX_TEX_2D = 1, + A5XX_TEX_CUBE = 2, + A5XX_TEX_3D = 3, +}; + +#define A5XX_INT0_RBBM_GPU_IDLE 0x00000001 +#define A5XX_INT0_RBBM_AHB_ERROR 0x00000002 +#define A5XX_INT0_RBBM_TRANSFER_TIMEOUT 0x00000004 +#define A5XX_INT0_RBBM_ME_MS_TIMEOUT 0x00000008 +#define A5XX_INT0_RBBM_PFP_MS_TIMEOUT 0x00000010 +#define A5XX_INT0_RBBM_ETS_MS_TIMEOUT 0x00000020 +#define A5XX_INT0_RBBM_ATB_ASYNC_OVERFLOW 0x00000040 +#define A5XX_INT0_RBBM_GPC_ERROR 0x00000080 +#define A5XX_INT0_CP_SW 0x00000100 +#define A5XX_INT0_CP_HW_ERROR 0x00000200 +#define A5XX_INT0_CP_CCU_FLUSH_DEPTH_TS 0x00000400 +#define A5XX_INT0_CP_CCU_FLUSH_COLOR_TS 0x00000800 +#define A5XX_INT0_CP_CCU_RESOLVE_TS 0x00001000 +#define A5XX_INT0_CP_IB2 0x00002000 +#define A5XX_INT0_CP_IB1 0x00004000 +#define A5XX_INT0_CP_RB 0x00008000 +#define A5XX_INT0_CP_UNUSED_1 0x00010000 +#define A5XX_INT0_CP_RB_DONE_TS 0x00020000 +#define A5XX_INT0_CP_WT_DONE_TS 0x00040000 +#define A5XX_INT0_UNKNOWN_1 0x00080000 +#define A5XX_INT0_CP_CACHE_FLUSH_TS 0x00100000 +#define A5XX_INT0_UNUSED_2 0x00200000 +#define A5XX_INT0_RBBM_ATB_BUS_OVERFLOW 0x00400000 +#define A5XX_INT0_MISC_HANG_DETECT 0x00800000 +#define A5XX_INT0_UCHE_OOB_ACCESS 0x01000000 +#define A5XX_INT0_UCHE_TRAP_INTR 0x02000000 +#define A5XX_INT0_DEBBUS_INTR_0 0x04000000 +#define A5XX_INT0_DEBBUS_INTR_1 0x08000000 +#define A5XX_INT0_GPMU_VOLTAGE_DROOP 0x10000000 +#define A5XX_INT0_GPMU_FIRMWARE 0x20000000 +#define A5XX_INT0_ISDB_CPU_IRQ 0x40000000 +#define A5XX_INT0_ISDB_UNDER_DEBUG 0x80000000 +#define A5XX_CP_INT_CP_OPCODE_ERROR 0x00000001 +#define A5XX_CP_INT_CP_RESERVED_BIT_ERROR 0x00000002 +#define A5XX_CP_INT_CP_HW_FAULT_ERROR 0x00000004 +#define A5XX_CP_INT_CP_DMA_ERROR 0x00000008 +#define A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR 0x00000010 +#define A5XX_CP_INT_CP_AHB_ERROR 0x00000020 +#define REG_A5XX_CP_RB_BASE 0x00000800 + +#define REG_A5XX_CP_RB_BASE_HI 0x00000801 + +#define REG_A5XX_CP_RB_CNTL 0x00000802 + +#define REG_A5XX_CP_RB_RPTR_ADDR 0x00000804 + +#define REG_A5XX_CP_RB_RPTR_ADDR_HI 0x00000805 + +#define REG_A5XX_CP_RB_RPTR 0x00000806 + +#define REG_A5XX_CP_RB_WPTR 0x00000807 + +#define REG_A5XX_CP_PFP_STAT_ADDR 0x00000808 + +#define REG_A5XX_CP_PFP_STAT_DATA 0x00000809 + +#define REG_A5XX_CP_DRAW_STATE_ADDR 0x0000080b + +#define REG_A5XX_CP_DRAW_STATE_DATA 0x0000080c + +#define REG_A5XX_CP_CRASH_SCRIPT_BASE_LO 0x00000817 + +#define REG_A5XX_CP_CRASH_SCRIPT_BASE_HI 0x00000818 + +#define REG_A5XX_CP_CRASH_DUMP_CNTL 0x00000819 + +#define REG_A5XX_CP_ME_STAT_ADDR 0x0000081a + +#define REG_A5XX_CP_ROQ_THRESHOLDS_1 0x0000081f + +#define REG_A5XX_CP_ROQ_THRESHOLDS_2 0x00000820 + +#define REG_A5XX_CP_ROQ_DBG_ADDR 0x00000821 + +#define REG_A5XX_CP_ROQ_DBG_DATA 0x00000822 + +#define REG_A5XX_CP_MEQ_DBG_ADDR 0x00000823 + +#define REG_A5XX_CP_MEQ_DBG_DATA 0x00000824 + +#define REG_A5XX_CP_MEQ_THRESHOLDS 0x00000825 + +#define REG_A5XX_CP_MERCIU_SIZE 0x00000826 + +#define REG_A5XX_CP_MERCIU_DBG_ADDR 0x00000827 + +#define REG_A5XX_CP_MERCIU_DBG_DATA_1 0x00000828 + +#define REG_A5XX_CP_MERCIU_DBG_DATA_2 0x00000829 + +#define REG_A5XX_CP_PFP_UCODE_DBG_ADDR 0x0000082a + +#define REG_A5XX_CP_PFP_UCODE_DBG_DATA 0x0000082b + +#define REG_A5XX_CP_ME_UCODE_DBG_ADDR 0x0000082f + +#define REG_A5XX_CP_ME_UCODE_DBG_DATA 0x00000830 + +#define REG_A5XX_CP_CNTL 0x00000831 + +#define REG_A5XX_CP_PFP_ME_CNTL 0x00000832 + +#define REG_A5XX_CP_CHICKEN_DBG 0x00000833 + +#define REG_A5XX_CP_PFP_INSTR_BASE_LO 0x00000835 + +#define REG_A5XX_CP_PFP_INSTR_BASE_HI 0x00000836 + +#define REG_A5XX_CP_ME_INSTR_BASE_LO 0x00000838 + +#define REG_A5XX_CP_ME_INSTR_BASE_HI 0x00000839 + +#define REG_A5XX_CP_CONTEXT_SWITCH_CNTL 0x0000083b + +#define REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO 0x0000083c + +#define REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI 0x0000083d + +#define REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO 0x0000083e + +#define REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI 0x0000083f + +#define REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x00000840 + +#define REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x00000841 + +#define REG_A5XX_CP_ADDR_MODE_CNTL 0x00000860 + +#define REG_A5XX_CP_ME_STAT_DATA 0x00000b14 + +#define REG_A5XX_CP_WFI_PEND_CTR 0x00000b15 + +#define REG_A5XX_CP_INTERRUPT_STATUS 0x00000b18 + +#define REG_A5XX_CP_HW_FAULT 0x00000b1a + +#define REG_A5XX_CP_PROTECT_STATUS 0x00000b1c + +#define REG_A5XX_CP_IB1_BASE 0x00000b1f + +#define REG_A5XX_CP_IB1_BASE_HI 0x00000b20 + +#define REG_A5XX_CP_IB1_BUFSZ 0x00000b21 + +#define REG_A5XX_CP_IB2_BASE 0x00000b22 + +#define REG_A5XX_CP_IB2_BASE_HI 0x00000b23 + +#define REG_A5XX_CP_IB2_BUFSZ 0x00000b24 + +static inline uint32_t REG_A5XX_CP_SCRATCH(uint32_t i0) { return 0x00000b78 + 0x1*i0; } + +static inline uint32_t REG_A5XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000b78 + 0x1*i0; } + +static inline uint32_t REG_A5XX_CP_PROTECT(uint32_t i0) { return 0x00000880 + 0x1*i0; } + +static inline uint32_t REG_A5XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000880 + 0x1*i0; } +#define A5XX_CP_PROTECT_REG_BASE_ADDR__MASK 0x0001ffff +#define A5XX_CP_PROTECT_REG_BASE_ADDR__SHIFT 0 +static inline uint32_t A5XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val) +{ + return ((val) << A5XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A5XX_CP_PROTECT_REG_BASE_ADDR__MASK; +} +#define A5XX_CP_PROTECT_REG_MASK_LEN__MASK 0x1f000000 +#define A5XX_CP_PROTECT_REG_MASK_LEN__SHIFT 24 +static inline uint32_t A5XX_CP_PROTECT_REG_MASK_LEN(uint32_t val) +{ + return ((val) << A5XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A5XX_CP_PROTECT_REG_MASK_LEN__MASK; +} +#define A5XX_CP_PROTECT_REG_TRAP_WRITE 0x20000000 +#define A5XX_CP_PROTECT_REG_TRAP_READ 0x40000000 + +#define REG_A5XX_CP_PROTECT_CNTL 0x000008a0 + +#define REG_A5XX_CP_AHB_FAULT 0x00000b1b + +#define REG_A5XX_CP_PERFCTR_CP_SEL_0 0x00000bb0 + +#define REG_A5XX_CP_PERFCTR_CP_SEL_1 0x00000bb1 + +#define REG_A5XX_CP_PERFCTR_CP_SEL_2 0x00000bb2 + +#define REG_A5XX_CP_PERFCTR_CP_SEL_3 0x00000bb3 + +#define REG_A5XX_CP_PERFCTR_CP_SEL_4 0x00000bb4 + +#define REG_A5XX_CP_PERFCTR_CP_SEL_5 0x00000bb5 + +#define REG_A5XX_CP_PERFCTR_CP_SEL_6 0x00000bb6 + +#define REG_A5XX_CP_PERFCTR_CP_SEL_7 0x00000bb7 + +#define REG_A5XX_VSC_ADDR_MODE_CNTL 0x00000bc1 + +#define REG_A5XX_CP_POWERCTR_CP_SEL_0 0x00000bba + +#define REG_A5XX_CP_POWERCTR_CP_SEL_1 0x00000bbb + +#define REG_A5XX_CP_POWERCTR_CP_SEL_2 0x00000bbc + +#define REG_A5XX_CP_POWERCTR_CP_SEL_3 0x00000bbd + +#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_A 0x00000004 + +#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_B 0x00000005 + +#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_C 0x00000006 + +#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_D 0x00000007 + +#define REG_A5XX_RBBM_CFG_DBGBUS_CNTLT 0x00000008 + +#define REG_A5XX_RBBM_CFG_DBGBUS_CNTLM 0x00000009 + +#define REG_A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT 0x00000018 + +#define REG_A5XX_RBBM_CFG_DBGBUS_OPL 0x0000000a + +#define REG_A5XX_RBBM_CFG_DBGBUS_OPE 0x0000000b + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_0 0x0000000c + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_1 0x0000000d + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_2 0x0000000e + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_3 0x0000000f + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_0 0x00000010 + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_1 0x00000011 + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_2 0x00000012 + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_3 0x00000013 + +#define REG_A5XX_RBBM_CFG_DBGBUS_BYTEL_0 0x00000014 + +#define REG_A5XX_RBBM_CFG_DBGBUS_BYTEL_1 0x00000015 + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_0 0x00000016 + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_1 0x00000017 + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_2 0x00000018 + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_3 0x00000019 + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_0 0x0000001a + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_1 0x0000001b + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_2 0x0000001c + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_3 0x0000001d + +#define REG_A5XX_RBBM_CFG_DBGBUS_NIBBLEE 0x0000001e + +#define REG_A5XX_RBBM_CFG_DBGBUS_PTRC0 0x0000001f + +#define REG_A5XX_RBBM_CFG_DBGBUS_PTRC1 0x00000020 + +#define REG_A5XX_RBBM_CFG_DBGBUS_LOADREG 0x00000021 + +#define REG_A5XX_RBBM_CFG_DBGBUS_IDX 0x00000022 + +#define REG_A5XX_RBBM_CFG_DBGBUS_CLRC 0x00000023 + +#define REG_A5XX_RBBM_CFG_DBGBUS_LOADIVT 0x00000024 + +#define REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL 0x0000002f + +#define REG_A5XX_RBBM_INT_CLEAR_CMD 0x00000037 + +#define REG_A5XX_RBBM_INT_0_MASK 0x00000038 +#define A5XX_RBBM_INT_0_MASK_RBBM_GPU_IDLE 0x00000001 +#define A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR 0x00000002 +#define A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT 0x00000004 +#define A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT 0x00000008 +#define A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT 0x00000010 +#define A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT 0x00000020 +#define A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW 0x00000040 +#define A5XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR 0x00000080 +#define A5XX_RBBM_INT_0_MASK_CP_SW 0x00000100 +#define A5XX_RBBM_INT_0_MASK_CP_HW_ERROR 0x00000200 +#define A5XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_DEPTH_TS 0x00000400 +#define A5XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_COLOR_TS 0x00000800 +#define A5XX_RBBM_INT_0_MASK_CP_CCU_RESOLVE_TS 0x00001000 +#define A5XX_RBBM_INT_0_MASK_CP_IB2 0x00002000 +#define A5XX_RBBM_INT_0_MASK_CP_IB1 0x00004000 +#define A5XX_RBBM_INT_0_MASK_CP_RB 0x00008000 +#define A5XX_RBBM_INT_0_MASK_CP_RB_DONE_TS 0x00020000 +#define A5XX_RBBM_INT_0_MASK_CP_WT_DONE_TS 0x00040000 +#define A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS 0x00100000 +#define A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW 0x00400000 +#define A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT 0x00800000 +#define A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS 0x01000000 +#define A5XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR 0x02000000 +#define A5XX_RBBM_INT_0_MASK_DEBBUS_INTR_0 0x04000000 +#define A5XX_RBBM_INT_0_MASK_DEBBUS_INTR_1 0x08000000 +#define A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP 0x10000000 +#define A5XX_RBBM_INT_0_MASK_GPMU_FIRMWARE 0x20000000 +#define A5XX_RBBM_INT_0_MASK_ISDB_CPU_IRQ 0x40000000 +#define A5XX_RBBM_INT_0_MASK_ISDB_UNDER_DEBUG 0x80000000 + +#define REG_A5XX_RBBM_AHB_DBG_CNTL 0x0000003f + +#define REG_A5XX_RBBM_EXT_VBIF_DBG_CNTL 0x00000041 + +#define REG_A5XX_RBBM_SW_RESET_CMD 0x00000043 + +#define REG_A5XX_RBBM_BLOCK_SW_RESET_CMD 0x00000045 + +#define REG_A5XX_RBBM_BLOCK_SW_RESET_CMD2 0x00000046 + +#define REG_A5XX_RBBM_DBG_LO_HI_GPIO 0x00000048 + +#define REG_A5XX_RBBM_EXT_TRACE_BUS_CNTL 0x00000049 + +#define REG_A5XX_RBBM_CLOCK_CNTL_TP0 0x0000004a + +#define REG_A5XX_RBBM_CLOCK_CNTL_TP1 0x0000004b + +#define REG_A5XX_RBBM_CLOCK_CNTL_TP2 0x0000004c + +#define REG_A5XX_RBBM_CLOCK_CNTL_TP3 0x0000004d + +#define REG_A5XX_RBBM_CLOCK_CNTL2_TP0 0x0000004e + +#define REG_A5XX_RBBM_CLOCK_CNTL2_TP1 0x0000004f + +#define REG_A5XX_RBBM_CLOCK_CNTL2_TP2 0x00000050 + +#define REG_A5XX_RBBM_CLOCK_CNTL2_TP3 0x00000051 + +#define REG_A5XX_RBBM_CLOCK_CNTL3_TP0 0x00000052 + +#define REG_A5XX_RBBM_CLOCK_CNTL3_TP1 0x00000053 + +#define REG_A5XX_RBBM_CLOCK_CNTL3_TP2 0x00000054 + +#define REG_A5XX_RBBM_CLOCK_CNTL3_TP3 0x00000055 + +#define REG_A5XX_RBBM_READ_AHB_THROUGH_DBG 0x00000059 + +#define REG_A5XX_RBBM_CLOCK_CNTL_UCHE 0x0000005a + +#define REG_A5XX_RBBM_CLOCK_CNTL2_UCHE 0x0000005b + +#define REG_A5XX_RBBM_CLOCK_CNTL3_UCHE 0x0000005c + +#define REG_A5XX_RBBM_CLOCK_CNTL4_UCHE 0x0000005d + +#define REG_A5XX_RBBM_CLOCK_HYST_UCHE 0x0000005e + +#define REG_A5XX_RBBM_CLOCK_DELAY_UCHE 0x0000005f + +#define REG_A5XX_RBBM_CLOCK_MODE_GPC 0x00000060 + +#define REG_A5XX_RBBM_CLOCK_DELAY_GPC 0x00000061 + +#define REG_A5XX_RBBM_CLOCK_HYST_GPC 0x00000062 + +#define REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x00000063 + +#define REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x00000064 + +#define REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x00000065 + +#define REG_A5XX_RBBM_CLOCK_DELAY_HLSQ 0x00000066 + +#define REG_A5XX_RBBM_CLOCK_CNTL 0x00000067 + +#define REG_A5XX_RBBM_CLOCK_CNTL_SP0 0x00000068 + +#define REG_A5XX_RBBM_CLOCK_CNTL_SP1 0x00000069 + +#define REG_A5XX_RBBM_CLOCK_CNTL_SP2 0x0000006a + +#define REG_A5XX_RBBM_CLOCK_CNTL_SP3 0x0000006b + +#define REG_A5XX_RBBM_CLOCK_CNTL2_SP0 0x0000006c + +#define REG_A5XX_RBBM_CLOCK_CNTL2_SP1 0x0000006d + +#define REG_A5XX_RBBM_CLOCK_CNTL2_SP2 0x0000006e + +#define REG_A5XX_RBBM_CLOCK_CNTL2_SP3 0x0000006f + +#define REG_A5XX_RBBM_CLOCK_HYST_SP0 0x00000070 + +#define REG_A5XX_RBBM_CLOCK_HYST_SP1 0x00000071 + +#define REG_A5XX_RBBM_CLOCK_HYST_SP2 0x00000072 + +#define REG_A5XX_RBBM_CLOCK_HYST_SP3 0x00000073 + +#define REG_A5XX_RBBM_CLOCK_DELAY_SP0 0x00000074 + +#define REG_A5XX_RBBM_CLOCK_DELAY_SP1 0x00000075 + +#define REG_A5XX_RBBM_CLOCK_DELAY_SP2 0x00000076 + +#define REG_A5XX_RBBM_CLOCK_DELAY_SP3 0x00000077 + +#define REG_A5XX_RBBM_CLOCK_CNTL_RB0 0x00000078 + +#define REG_A5XX_RBBM_CLOCK_CNTL_RB1 0x00000079 + +#define REG_A5XX_RBBM_CLOCK_CNTL_RB2 0x0000007a + +#define REG_A5XX_RBBM_CLOCK_CNTL_RB3 0x0000007b + +#define REG_A5XX_RBBM_CLOCK_CNTL2_RB0 0x0000007c + +#define REG_A5XX_RBBM_CLOCK_CNTL2_RB1 0x0000007d + +#define REG_A5XX_RBBM_CLOCK_CNTL2_RB2 0x0000007e + +#define REG_A5XX_RBBM_CLOCK_CNTL2_RB3 0x0000007f + +#define REG_A5XX_RBBM_CLOCK_HYST_RAC 0x00000080 + +#define REG_A5XX_RBBM_CLOCK_DELAY_RAC 0x00000081 + +#define REG_A5XX_RBBM_CLOCK_CNTL_CCU0 0x00000082 + +#define REG_A5XX_RBBM_CLOCK_CNTL_CCU1 0x00000083 + +#define REG_A5XX_RBBM_CLOCK_CNTL_CCU2 0x00000084 + +#define REG_A5XX_RBBM_CLOCK_CNTL_CCU3 0x00000085 + +#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0 0x00000086 + +#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1 0x00000087 + +#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2 0x00000088 + +#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3 0x00000089 + +#define REG_A5XX_RBBM_CLOCK_CNTL_RAC 0x0000008a + +#define REG_A5XX_RBBM_CLOCK_CNTL2_RAC 0x0000008b + +#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0 0x0000008c + +#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1 0x0000008d + +#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2 0x0000008e + +#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3 0x0000008f + +#define REG_A5XX_RBBM_CLOCK_HYST_VFD 0x00000090 + +#define REG_A5XX_RBBM_CLOCK_MODE_VFD 0x00000091 + +#define REG_A5XX_RBBM_CLOCK_DELAY_VFD 0x00000092 + +#define REG_A5XX_RBBM_AHB_CNTL0 0x00000093 + +#define REG_A5XX_RBBM_AHB_CNTL1 0x00000094 + +#define REG_A5XX_RBBM_AHB_CNTL2 0x00000095 + +#define REG_A5XX_RBBM_AHB_CMD 0x00000096 + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11 0x0000009c + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12 0x0000009d + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13 0x0000009e + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14 0x0000009f + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15 0x000000a0 + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16 0x000000a1 + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17 0x000000a2 + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18 0x000000a3 + +#define REG_A5XX_RBBM_CLOCK_DELAY_TP0 0x000000a4 + +#define REG_A5XX_RBBM_CLOCK_DELAY_TP1 0x000000a5 + +#define REG_A5XX_RBBM_CLOCK_DELAY_TP2 0x000000a6 + +#define REG_A5XX_RBBM_CLOCK_DELAY_TP3 0x000000a7 + +#define REG_A5XX_RBBM_CLOCK_DELAY2_TP0 0x000000a8 + +#define REG_A5XX_RBBM_CLOCK_DELAY2_TP1 0x000000a9 + +#define REG_A5XX_RBBM_CLOCK_DELAY2_TP2 0x000000aa + +#define REG_A5XX_RBBM_CLOCK_DELAY2_TP3 0x000000ab + +#define REG_A5XX_RBBM_CLOCK_DELAY3_TP0 0x000000ac + +#define REG_A5XX_RBBM_CLOCK_DELAY3_TP1 0x000000ad + +#define REG_A5XX_RBBM_CLOCK_DELAY3_TP2 0x000000ae + +#define REG_A5XX_RBBM_CLOCK_DELAY3_TP3 0x000000af + +#define REG_A5XX_RBBM_CLOCK_HYST_TP0 0x000000b0 + +#define REG_A5XX_RBBM_CLOCK_HYST_TP1 0x000000b1 + +#define REG_A5XX_RBBM_CLOCK_HYST_TP2 0x000000b2 + +#define REG_A5XX_RBBM_CLOCK_HYST_TP3 0x000000b3 + +#define REG_A5XX_RBBM_CLOCK_HYST2_TP0 0x000000b4 + +#define REG_A5XX_RBBM_CLOCK_HYST2_TP1 0x000000b5 + +#define REG_A5XX_RBBM_CLOCK_HYST2_TP2 0x000000b6 + +#define REG_A5XX_RBBM_CLOCK_HYST2_TP3 0x000000b7 + +#define REG_A5XX_RBBM_CLOCK_HYST3_TP0 0x000000b8 + +#define REG_A5XX_RBBM_CLOCK_HYST3_TP1 0x000000b9 + +#define REG_A5XX_RBBM_CLOCK_HYST3_TP2 0x000000ba + +#define REG_A5XX_RBBM_CLOCK_HYST3_TP3 0x000000bb + +#define REG_A5XX_RBBM_CLOCK_CNTL_GPMU 0x000000c8 + +#define REG_A5XX_RBBM_CLOCK_DELAY_GPMU 0x000000c9 + +#define REG_A5XX_RBBM_CLOCK_HYST_GPMU 0x000000ca + +#define REG_A5XX_RBBM_PERFCTR_CP_0_LO 0x000003a0 + +#define REG_A5XX_RBBM_PERFCTR_CP_0_HI 0x000003a1 + +#define REG_A5XX_RBBM_PERFCTR_CP_1_LO 0x000003a2 + +#define REG_A5XX_RBBM_PERFCTR_CP_1_HI 0x000003a3 + +#define REG_A5XX_RBBM_PERFCTR_CP_2_LO 0x000003a4 + +#define REG_A5XX_RBBM_PERFCTR_CP_2_HI 0x000003a5 + +#define REG_A5XX_RBBM_PERFCTR_CP_3_LO 0x000003a6 + +#define REG_A5XX_RBBM_PERFCTR_CP_3_HI 0x000003a7 + +#define REG_A5XX_RBBM_PERFCTR_CP_4_LO 0x000003a8 + +#define REG_A5XX_RBBM_PERFCTR_CP_4_HI 0x000003a9 + +#define REG_A5XX_RBBM_PERFCTR_CP_5_LO 0x000003aa + +#define REG_A5XX_RBBM_PERFCTR_CP_5_HI 0x000003ab + +#define REG_A5XX_RBBM_PERFCTR_CP_6_LO 0x000003ac + +#define REG_A5XX_RBBM_PERFCTR_CP_6_HI 0x000003ad + +#define REG_A5XX_RBBM_PERFCTR_CP_7_LO 0x000003ae + +#define REG_A5XX_RBBM_PERFCTR_CP_7_HI 0x000003af + +#define REG_A5XX_RBBM_PERFCTR_RBBM_0_LO 0x000003b0 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_0_HI 0x000003b1 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_1_LO 0x000003b2 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_1_HI 0x000003b3 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_2_LO 0x000003b4 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_2_HI 0x000003b5 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_3_LO 0x000003b6 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_3_HI 0x000003b7 + +#define REG_A5XX_RBBM_PERFCTR_PC_0_LO 0x000003b8 + +#define REG_A5XX_RBBM_PERFCTR_PC_0_HI 0x000003b9 + +#define REG_A5XX_RBBM_PERFCTR_PC_1_LO 0x000003ba + +#define REG_A5XX_RBBM_PERFCTR_PC_1_HI 0x000003bb + +#define REG_A5XX_RBBM_PERFCTR_PC_2_LO 0x000003bc + +#define REG_A5XX_RBBM_PERFCTR_PC_2_HI 0x000003bd + +#define REG_A5XX_RBBM_PERFCTR_PC_3_LO 0x000003be + +#define REG_A5XX_RBBM_PERFCTR_PC_3_HI 0x000003bf + +#define REG_A5XX_RBBM_PERFCTR_PC_4_LO 0x000003c0 + +#define REG_A5XX_RBBM_PERFCTR_PC_4_HI 0x000003c1 + +#define REG_A5XX_RBBM_PERFCTR_PC_5_LO 0x000003c2 + +#define REG_A5XX_RBBM_PERFCTR_PC_5_HI 0x000003c3 + +#define REG_A5XX_RBBM_PERFCTR_PC_6_LO 0x000003c4 + +#define REG_A5XX_RBBM_PERFCTR_PC_6_HI 0x000003c5 + +#define REG_A5XX_RBBM_PERFCTR_PC_7_LO 0x000003c6 + +#define REG_A5XX_RBBM_PERFCTR_PC_7_HI 0x000003c7 + +#define REG_A5XX_RBBM_PERFCTR_VFD_0_LO 0x000003c8 + +#define REG_A5XX_RBBM_PERFCTR_VFD_0_HI 0x000003c9 + +#define REG_A5XX_RBBM_PERFCTR_VFD_1_LO 0x000003ca + +#define REG_A5XX_RBBM_PERFCTR_VFD_1_HI 0x000003cb + +#define REG_A5XX_RBBM_PERFCTR_VFD_2_LO 0x000003cc + +#define REG_A5XX_RBBM_PERFCTR_VFD_2_HI 0x000003cd + +#define REG_A5XX_RBBM_PERFCTR_VFD_3_LO 0x000003ce + +#define REG_A5XX_RBBM_PERFCTR_VFD_3_HI 0x000003cf + +#define REG_A5XX_RBBM_PERFCTR_VFD_4_LO 0x000003d0 + +#define REG_A5XX_RBBM_PERFCTR_VFD_4_HI 0x000003d1 + +#define REG_A5XX_RBBM_PERFCTR_VFD_5_LO 0x000003d2 + +#define REG_A5XX_RBBM_PERFCTR_VFD_5_HI 0x000003d3 + +#define REG_A5XX_RBBM_PERFCTR_VFD_6_LO 0x000003d4 + +#define REG_A5XX_RBBM_PERFCTR_VFD_6_HI 0x000003d5 + +#define REG_A5XX_RBBM_PERFCTR_VFD_7_LO 0x000003d6 + +#define REG_A5XX_RBBM_PERFCTR_VFD_7_HI 0x000003d7 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_0_LO 0x000003d8 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_0_HI 0x000003d9 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_1_LO 0x000003da + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_1_HI 0x000003db + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_2_LO 0x000003dc + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_2_HI 0x000003dd + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_3_LO 0x000003de + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_3_HI 0x000003df + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_4_LO 0x000003e0 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_4_HI 0x000003e1 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_5_LO 0x000003e2 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_5_HI 0x000003e3 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_6_LO 0x000003e4 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_6_HI 0x000003e5 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_7_LO 0x000003e6 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_7_HI 0x000003e7 + +#define REG_A5XX_RBBM_PERFCTR_VPC_0_LO 0x000003e8 + +#define REG_A5XX_RBBM_PERFCTR_VPC_0_HI 0x000003e9 + +#define REG_A5XX_RBBM_PERFCTR_VPC_1_LO 0x000003ea + +#define REG_A5XX_RBBM_PERFCTR_VPC_1_HI 0x000003eb + +#define REG_A5XX_RBBM_PERFCTR_VPC_2_LO 0x000003ec + +#define REG_A5XX_RBBM_PERFCTR_VPC_2_HI 0x000003ed + +#define REG_A5XX_RBBM_PERFCTR_VPC_3_LO 0x000003ee + +#define REG_A5XX_RBBM_PERFCTR_VPC_3_HI 0x000003ef + +#define REG_A5XX_RBBM_PERFCTR_CCU_0_LO 0x000003f0 + +#define REG_A5XX_RBBM_PERFCTR_CCU_0_HI 0x000003f1 + +#define REG_A5XX_RBBM_PERFCTR_CCU_1_LO 0x000003f2 + +#define REG_A5XX_RBBM_PERFCTR_CCU_1_HI 0x000003f3 + +#define REG_A5XX_RBBM_PERFCTR_CCU_2_LO 0x000003f4 + +#define REG_A5XX_RBBM_PERFCTR_CCU_2_HI 0x000003f5 + +#define REG_A5XX_RBBM_PERFCTR_CCU_3_LO 0x000003f6 + +#define REG_A5XX_RBBM_PERFCTR_CCU_3_HI 0x000003f7 + +#define REG_A5XX_RBBM_PERFCTR_TSE_0_LO 0x000003f8 + +#define REG_A5XX_RBBM_PERFCTR_TSE_0_HI 0x000003f9 + +#define REG_A5XX_RBBM_PERFCTR_TSE_1_LO 0x000003fa + +#define REG_A5XX_RBBM_PERFCTR_TSE_1_HI 0x000003fb + +#define REG_A5XX_RBBM_PERFCTR_TSE_2_LO 0x000003fc + +#define REG_A5XX_RBBM_PERFCTR_TSE_2_HI 0x000003fd + +#define REG_A5XX_RBBM_PERFCTR_TSE_3_LO 0x000003fe + +#define REG_A5XX_RBBM_PERFCTR_TSE_3_HI 0x000003ff + +#define REG_A5XX_RBBM_PERFCTR_RAS_0_LO 0x00000400 + +#define REG_A5XX_RBBM_PERFCTR_RAS_0_HI 0x00000401 + +#define REG_A5XX_RBBM_PERFCTR_RAS_1_LO 0x00000402 + +#define REG_A5XX_RBBM_PERFCTR_RAS_1_HI 0x00000403 + +#define REG_A5XX_RBBM_PERFCTR_RAS_2_LO 0x00000404 + +#define REG_A5XX_RBBM_PERFCTR_RAS_2_HI 0x00000405 + +#define REG_A5XX_RBBM_PERFCTR_RAS_3_LO 0x00000406 + +#define REG_A5XX_RBBM_PERFCTR_RAS_3_HI 0x00000407 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_0_LO 0x00000408 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_0_HI 0x00000409 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_1_LO 0x0000040a + +#define REG_A5XX_RBBM_PERFCTR_UCHE_1_HI 0x0000040b + +#define REG_A5XX_RBBM_PERFCTR_UCHE_2_LO 0x0000040c + +#define REG_A5XX_RBBM_PERFCTR_UCHE_2_HI 0x0000040d + +#define REG_A5XX_RBBM_PERFCTR_UCHE_3_LO 0x0000040e + +#define REG_A5XX_RBBM_PERFCTR_UCHE_3_HI 0x0000040f + +#define REG_A5XX_RBBM_PERFCTR_UCHE_4_LO 0x00000410 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_4_HI 0x00000411 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_5_LO 0x00000412 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_5_HI 0x00000413 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_6_LO 0x00000414 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_6_HI 0x00000415 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_7_LO 0x00000416 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_7_HI 0x00000417 + +#define REG_A5XX_RBBM_PERFCTR_TP_0_LO 0x00000418 + +#define REG_A5XX_RBBM_PERFCTR_TP_0_HI 0x00000419 + +#define REG_A5XX_RBBM_PERFCTR_TP_1_LO 0x0000041a + +#define REG_A5XX_RBBM_PERFCTR_TP_1_HI 0x0000041b + +#define REG_A5XX_RBBM_PERFCTR_TP_2_LO 0x0000041c + +#define REG_A5XX_RBBM_PERFCTR_TP_2_HI 0x0000041d + +#define REG_A5XX_RBBM_PERFCTR_TP_3_LO 0x0000041e + +#define REG_A5XX_RBBM_PERFCTR_TP_3_HI 0x0000041f + +#define REG_A5XX_RBBM_PERFCTR_TP_4_LO 0x00000420 + +#define REG_A5XX_RBBM_PERFCTR_TP_4_HI 0x00000421 + +#define REG_A5XX_RBBM_PERFCTR_TP_5_LO 0x00000422 + +#define REG_A5XX_RBBM_PERFCTR_TP_5_HI 0x00000423 + +#define REG_A5XX_RBBM_PERFCTR_TP_6_LO 0x00000424 + +#define REG_A5XX_RBBM_PERFCTR_TP_6_HI 0x00000425 + +#define REG_A5XX_RBBM_PERFCTR_TP_7_LO 0x00000426 + +#define REG_A5XX_RBBM_PERFCTR_TP_7_HI 0x00000427 + +#define REG_A5XX_RBBM_PERFCTR_SP_0_LO 0x00000428 + +#define REG_A5XX_RBBM_PERFCTR_SP_0_HI 0x00000429 + +#define REG_A5XX_RBBM_PERFCTR_SP_1_LO 0x0000042a + +#define REG_A5XX_RBBM_PERFCTR_SP_1_HI 0x0000042b + +#define REG_A5XX_RBBM_PERFCTR_SP_2_LO 0x0000042c + +#define REG_A5XX_RBBM_PERFCTR_SP_2_HI 0x0000042d + +#define REG_A5XX_RBBM_PERFCTR_SP_3_LO 0x0000042e + +#define REG_A5XX_RBBM_PERFCTR_SP_3_HI 0x0000042f + +#define REG_A5XX_RBBM_PERFCTR_SP_4_LO 0x00000430 + +#define REG_A5XX_RBBM_PERFCTR_SP_4_HI 0x00000431 + +#define REG_A5XX_RBBM_PERFCTR_SP_5_LO 0x00000432 + +#define REG_A5XX_RBBM_PERFCTR_SP_5_HI 0x00000433 + +#define REG_A5XX_RBBM_PERFCTR_SP_6_LO 0x00000434 + +#define REG_A5XX_RBBM_PERFCTR_SP_6_HI 0x00000435 + +#define REG_A5XX_RBBM_PERFCTR_SP_7_LO 0x00000436 + +#define REG_A5XX_RBBM_PERFCTR_SP_7_HI 0x00000437 + +#define REG_A5XX_RBBM_PERFCTR_SP_8_LO 0x00000438 + +#define REG_A5XX_RBBM_PERFCTR_SP_8_HI 0x00000439 + +#define REG_A5XX_RBBM_PERFCTR_SP_9_LO 0x0000043a + +#define REG_A5XX_RBBM_PERFCTR_SP_9_HI 0x0000043b + +#define REG_A5XX_RBBM_PERFCTR_SP_10_LO 0x0000043c + +#define REG_A5XX_RBBM_PERFCTR_SP_10_HI 0x0000043d + +#define REG_A5XX_RBBM_PERFCTR_SP_11_LO 0x0000043e + +#define REG_A5XX_RBBM_PERFCTR_SP_11_HI 0x0000043f + +#define REG_A5XX_RBBM_PERFCTR_RB_0_LO 0x00000440 + +#define REG_A5XX_RBBM_PERFCTR_RB_0_HI 0x00000441 + +#define REG_A5XX_RBBM_PERFCTR_RB_1_LO 0x00000442 + +#define REG_A5XX_RBBM_PERFCTR_RB_1_HI 0x00000443 + +#define REG_A5XX_RBBM_PERFCTR_RB_2_LO 0x00000444 + +#define REG_A5XX_RBBM_PERFCTR_RB_2_HI 0x00000445 + +#define REG_A5XX_RBBM_PERFCTR_RB_3_LO 0x00000446 + +#define REG_A5XX_RBBM_PERFCTR_RB_3_HI 0x00000447 + +#define REG_A5XX_RBBM_PERFCTR_RB_4_LO 0x00000448 + +#define REG_A5XX_RBBM_PERFCTR_RB_4_HI 0x00000449 + +#define REG_A5XX_RBBM_PERFCTR_RB_5_LO 0x0000044a + +#define REG_A5XX_RBBM_PERFCTR_RB_5_HI 0x0000044b + +#define REG_A5XX_RBBM_PERFCTR_RB_6_LO 0x0000044c + +#define REG_A5XX_RBBM_PERFCTR_RB_6_HI 0x0000044d + +#define REG_A5XX_RBBM_PERFCTR_RB_7_LO 0x0000044e + +#define REG_A5XX_RBBM_PERFCTR_RB_7_HI 0x0000044f + +#define REG_A5XX_RBBM_PERFCTR_VSC_0_LO 0x00000450 + +#define REG_A5XX_RBBM_PERFCTR_VSC_0_HI 0x00000451 + +#define REG_A5XX_RBBM_PERFCTR_VSC_1_LO 0x00000452 + +#define REG_A5XX_RBBM_PERFCTR_VSC_1_HI 0x00000453 + +#define REG_A5XX_RBBM_PERFCTR_LRZ_0_LO 0x00000454 + +#define REG_A5XX_RBBM_PERFCTR_LRZ_0_HI 0x00000455 + +#define REG_A5XX_RBBM_PERFCTR_LRZ_1_LO 0x00000456 + +#define REG_A5XX_RBBM_PERFCTR_LRZ_1_HI 0x00000457 + +#define REG_A5XX_RBBM_PERFCTR_LRZ_2_LO 0x00000458 + +#define REG_A5XX_RBBM_PERFCTR_LRZ_2_HI 0x00000459 + +#define REG_A5XX_RBBM_PERFCTR_LRZ_3_LO 0x0000045a + +#define REG_A5XX_RBBM_PERFCTR_LRZ_3_HI 0x0000045b + +#define REG_A5XX_RBBM_PERFCTR_CMP_0_LO 0x0000045c + +#define REG_A5XX_RBBM_PERFCTR_CMP_0_HI 0x0000045d + +#define REG_A5XX_RBBM_PERFCTR_CMP_1_LO 0x0000045e + +#define REG_A5XX_RBBM_PERFCTR_CMP_1_HI 0x0000045f + +#define REG_A5XX_RBBM_PERFCTR_CMP_2_LO 0x00000460 + +#define REG_A5XX_RBBM_PERFCTR_CMP_2_HI 0x00000461 + +#define REG_A5XX_RBBM_PERFCTR_CMP_3_LO 0x00000462 + +#define REG_A5XX_RBBM_PERFCTR_CMP_3_HI 0x00000463 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0 0x0000046b + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_1 0x0000046c + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_2 0x0000046d + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_3 0x0000046e + +#define REG_A5XX_RBBM_ALWAYSON_COUNTER_LO 0x000004d2 + +#define REG_A5XX_RBBM_ALWAYSON_COUNTER_HI 0x000004d3 + +#define REG_A5XX_RBBM_STATUS 0x000004f5 +#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB 0x80000000 +#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_CP 0x40000000 +#define A5XX_RBBM_STATUS_HLSQ_BUSY 0x20000000 +#define A5XX_RBBM_STATUS_VSC_BUSY 0x10000000 +#define A5XX_RBBM_STATUS_TPL1_BUSY 0x08000000 +#define A5XX_RBBM_STATUS_SP_BUSY 0x04000000 +#define A5XX_RBBM_STATUS_UCHE_BUSY 0x02000000 +#define A5XX_RBBM_STATUS_VPC_BUSY 0x01000000 +#define A5XX_RBBM_STATUS_VFDP_BUSY 0x00800000 +#define A5XX_RBBM_STATUS_VFD_BUSY 0x00400000 +#define A5XX_RBBM_STATUS_TESS_BUSY 0x00200000 +#define A5XX_RBBM_STATUS_PC_VSD_BUSY 0x00100000 +#define A5XX_RBBM_STATUS_PC_DCALL_BUSY 0x00080000 +#define A5XX_RBBM_STATUS_GPMU_SLAVE_BUSY 0x00040000 +#define A5XX_RBBM_STATUS_DCOM_BUSY 0x00020000 +#define A5XX_RBBM_STATUS_COM_BUSY 0x00010000 +#define A5XX_RBBM_STATUS_LRZ_BUZY 0x00008000 +#define A5XX_RBBM_STATUS_A2D_DSP_BUSY 0x00004000 +#define A5XX_RBBM_STATUS_CCUFCHE_BUSY 0x00002000 +#define A5XX_RBBM_STATUS_RB_BUSY 0x00001000 +#define A5XX_RBBM_STATUS_RAS_BUSY 0x00000800 +#define A5XX_RBBM_STATUS_TSE_BUSY 0x00000400 +#define A5XX_RBBM_STATUS_VBIF_BUSY 0x00000200 +#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_HYST 0x00000100 +#define A5XX_RBBM_STATUS_CP_BUSY_IGN_HYST 0x00000080 +#define A5XX_RBBM_STATUS_CP_BUSY 0x00000040 +#define A5XX_RBBM_STATUS_GPMU_MASTER_BUSY 0x00000020 +#define A5XX_RBBM_STATUS_CP_CRASH_BUSY 0x00000010 +#define A5XX_RBBM_STATUS_CP_ETS_BUSY 0x00000008 +#define A5XX_RBBM_STATUS_CP_PFP_BUSY 0x00000004 +#define A5XX_RBBM_STATUS_CP_ME_BUSY 0x00000002 +#define A5XX_RBBM_STATUS_HI_BUSY 0x00000001 + +#define REG_A5XX_RBBM_STATUS3 0x00000530 + +#define REG_A5XX_RBBM_INT_0_STATUS 0x000004e1 + +#define REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS 0x000004f0 + +#define REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS 0x000004f1 + +#define REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS 0x000004f3 + +#define REG_A5XX_RBBM_AHB_ERROR_STATUS 0x000004f4 + +#define REG_A5XX_RBBM_PERFCTR_CNTL 0x00000464 + +#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD0 0x00000465 + +#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD1 0x00000466 + +#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD2 0x00000467 + +#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD3 0x00000468 + +#define REG_A5XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000469 + +#define REG_A5XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x0000046a + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0 0x0000046b + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_1 0x0000046c + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_2 0x0000046d + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_3 0x0000046e + +#define REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED 0x0000046f + +#define REG_A5XX_RBBM_AHB_ERROR 0x000004ed + +#define REG_A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC 0x00000504 + +#define REG_A5XX_RBBM_CFG_DBGBUS_OVER 0x00000505 + +#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT0 0x00000506 + +#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT1 0x00000507 + +#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT2 0x00000508 + +#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT3 0x00000509 + +#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT4 0x0000050a + +#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT5 0x0000050b + +#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR 0x0000050c + +#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0 0x0000050d + +#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1 0x0000050e + +#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2 0x0000050f + +#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3 0x00000510 + +#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4 0x00000511 + +#define REG_A5XX_RBBM_CFG_DBGBUS_MISR0 0x00000512 + +#define REG_A5XX_RBBM_CFG_DBGBUS_MISR1 0x00000513 + +#define REG_A5XX_RBBM_ISDB_CNT 0x00000533 + +#define REG_A5XX_RBBM_SECVID_TRUST_CONFIG 0x0000f000 + +#define REG_A5XX_RBBM_SECVID_TRUST_CNTL 0x0000f400 + +#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0x0000f800 + +#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0x0000f801 + +#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE 0x0000f802 + +#define REG_A5XX_RBBM_SECVID_TSB_CNTL 0x0000f803 + +#define REG_A5XX_RBBM_SECVID_TSB_COMP_STATUS_LO 0x0000f804 + +#define REG_A5XX_RBBM_SECVID_TSB_COMP_STATUS_HI 0x0000f805 + +#define REG_A5XX_RBBM_SECVID_TSB_UCHE_STATUS_LO 0x0000f806 + +#define REG_A5XX_RBBM_SECVID_TSB_UCHE_STATUS_HI 0x0000f807 + +#define REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL 0x0000f810 + +#define REG_A5XX_VSC_BIN_SIZE 0x00000bc2 +#define A5XX_VSC_BIN_SIZE_WIDTH__MASK 0x000000ff +#define A5XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 +static inline uint32_t A5XX_VSC_BIN_SIZE_WIDTH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A5XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A5XX_VSC_BIN_SIZE_WIDTH__MASK; +} +#define A5XX_VSC_BIN_SIZE_HEIGHT__MASK 0x0001fe00 +#define A5XX_VSC_BIN_SIZE_HEIGHT__SHIFT 9 +static inline uint32_t A5XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A5XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A5XX_VSC_BIN_SIZE_HEIGHT__MASK; +} + +#define REG_A5XX_VSC_SIZE_ADDRESS_LO 0x00000bc3 + +#define REG_A5XX_VSC_SIZE_ADDRESS_HI 0x00000bc4 + +#define REG_A5XX_UNKNOWN_0BC5 0x00000bc5 + +#define REG_A5XX_UNKNOWN_0BC6 0x00000bc6 + +static inline uint32_t REG_A5XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000bd0 + 0x1*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000bd0 + 0x1*i0; } +#define A5XX_VSC_PIPE_CONFIG_REG_X__MASK 0x000003ff +#define A5XX_VSC_PIPE_CONFIG_REG_X__SHIFT 0 +static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_X(uint32_t val) +{ + return ((val) << A5XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_X__MASK; +} +#define A5XX_VSC_PIPE_CONFIG_REG_Y__MASK 0x000ffc00 +#define A5XX_VSC_PIPE_CONFIG_REG_Y__SHIFT 10 +static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val) +{ + return ((val) << A5XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_Y__MASK; +} +#define A5XX_VSC_PIPE_CONFIG_REG_W__MASK 0x00f00000 +#define A5XX_VSC_PIPE_CONFIG_REG_W__SHIFT 20 +static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_W(uint32_t val) +{ + return ((val) << A5XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_W__MASK; +} +#define A5XX_VSC_PIPE_CONFIG_REG_H__MASK 0x0f000000 +#define A5XX_VSC_PIPE_CONFIG_REG_H__SHIFT 24 +static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_H(uint32_t val) +{ + return ((val) << A5XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_H__MASK; +} + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000be0 + 0x2*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(uint32_t i0) { return 0x00000be0 + 0x2*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS_HI(uint32_t i0) { return 0x00000be1 + 0x2*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c00 + 0x1*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0x00000c00 + 0x1*i0; } + +#define REG_A5XX_VSC_PERFCTR_VSC_SEL_0 0x00000c60 + +#define REG_A5XX_VSC_PERFCTR_VSC_SEL_1 0x00000c61 + +#define REG_A5XX_VSC_RESOLVE_CNTL 0x00000cdd +#define A5XX_VSC_RESOLVE_CNTL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_VSC_RESOLVE_CNTL_X__MASK 0x00007fff +#define A5XX_VSC_RESOLVE_CNTL_X__SHIFT 0 +static inline uint32_t A5XX_VSC_RESOLVE_CNTL_X(uint32_t val) +{ + return ((val) << A5XX_VSC_RESOLVE_CNTL_X__SHIFT) & A5XX_VSC_RESOLVE_CNTL_X__MASK; +} +#define A5XX_VSC_RESOLVE_CNTL_Y__MASK 0x7fff0000 +#define A5XX_VSC_RESOLVE_CNTL_Y__SHIFT 16 +static inline uint32_t A5XX_VSC_RESOLVE_CNTL_Y(uint32_t val) +{ + return ((val) << A5XX_VSC_RESOLVE_CNTL_Y__SHIFT) & A5XX_VSC_RESOLVE_CNTL_Y__MASK; +} + +#define REG_A5XX_GRAS_ADDR_MODE_CNTL 0x00000c81 + +#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_0 0x00000c90 + +#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_1 0x00000c91 + +#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_2 0x00000c92 + +#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c93 + +#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_0 0x00000c94 + +#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_1 0x00000c95 + +#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_2 0x00000c96 + +#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_3 0x00000c97 + +#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_0 0x00000c98 + +#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_1 0x00000c99 + +#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_2 0x00000c9a + +#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_3 0x00000c9b + +#define REG_A5XX_RB_DBG_ECO_CNTL 0x00000cc4 + +#define REG_A5XX_RB_ADDR_MODE_CNTL 0x00000cc5 + +#define REG_A5XX_RB_MODE_CNTL 0x00000cc6 + +#define REG_A5XX_RB_CCU_CNTL 0x00000cc7 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_0 0x00000cd0 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_1 0x00000cd1 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_2 0x00000cd2 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_3 0x00000cd3 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_4 0x00000cd4 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_5 0x00000cd5 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_6 0x00000cd6 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_7 0x00000cd7 + +#define REG_A5XX_RB_PERFCTR_CCU_SEL_0 0x00000cd8 + +#define REG_A5XX_RB_PERFCTR_CCU_SEL_1 0x00000cd9 + +#define REG_A5XX_RB_PERFCTR_CCU_SEL_2 0x00000cda + +#define REG_A5XX_RB_PERFCTR_CCU_SEL_3 0x00000cdb + +#define REG_A5XX_RB_POWERCTR_RB_SEL_0 0x00000ce0 + +#define REG_A5XX_RB_POWERCTR_RB_SEL_1 0x00000ce1 + +#define REG_A5XX_RB_POWERCTR_RB_SEL_2 0x00000ce2 + +#define REG_A5XX_RB_POWERCTR_RB_SEL_3 0x00000ce3 + +#define REG_A5XX_RB_POWERCTR_CCU_SEL_0 0x00000ce4 + +#define REG_A5XX_RB_POWERCTR_CCU_SEL_1 0x00000ce5 + +#define REG_A5XX_RB_PERFCTR_CMP_SEL_0 0x00000cec + +#define REG_A5XX_RB_PERFCTR_CMP_SEL_1 0x00000ced + +#define REG_A5XX_RB_PERFCTR_CMP_SEL_2 0x00000cee + +#define REG_A5XX_RB_PERFCTR_CMP_SEL_3 0x00000cef + +#define REG_A5XX_PC_DBG_ECO_CNTL 0x00000d00 +#define A5XX_PC_DBG_ECO_CNTL_TWOPASSUSEWFI 0x00000100 + +#define REG_A5XX_PC_ADDR_MODE_CNTL 0x00000d01 + +#define REG_A5XX_PC_MODE_CNTL 0x00000d02 + +#define REG_A5XX_UNKNOWN_0D08 0x00000d08 + +#define REG_A5XX_UNKNOWN_0D09 0x00000d09 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_0 0x00000d10 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_1 0x00000d11 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_2 0x00000d12 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_3 0x00000d13 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_4 0x00000d14 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_5 0x00000d15 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_6 0x00000d16 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_7 0x00000d17 + +#define REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_0 0x00000e00 + +#define REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_1 0x00000e01 + +#define REG_A5XX_HLSQ_ADDR_MODE_CNTL 0x00000e05 + +#define REG_A5XX_HLSQ_MODE_CNTL 0x00000e06 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x00000e10 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x00000e11 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x00000e12 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x00000e13 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x00000e14 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x00000e15 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_6 0x00000e16 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_7 0x00000e17 + +#define REG_A5XX_HLSQ_SPTP_RDSEL 0x00000f08 + +#define REG_A5XX_HLSQ_DBG_READ_SEL 0x0000bc00 + +#define REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE 0x0000a000 + +#define REG_A5XX_VFD_ADDR_MODE_CNTL 0x00000e41 + +#define REG_A5XX_VFD_MODE_CNTL 0x00000e42 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_0 0x00000e50 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_1 0x00000e51 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_2 0x00000e52 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_3 0x00000e53 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_4 0x00000e54 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_5 0x00000e55 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_6 0x00000e56 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_7 0x00000e57 + +#define REG_A5XX_VPC_DBG_ECO_CNTL 0x00000e60 + +#define REG_A5XX_VPC_ADDR_MODE_CNTL 0x00000e61 + +#define REG_A5XX_VPC_MODE_CNTL 0x00000e62 +#define A5XX_VPC_MODE_CNTL_BINNING_PASS 0x00000001 + +#define REG_A5XX_VPC_PERFCTR_VPC_SEL_0 0x00000e64 + +#define REG_A5XX_VPC_PERFCTR_VPC_SEL_1 0x00000e65 + +#define REG_A5XX_VPC_PERFCTR_VPC_SEL_2 0x00000e66 + +#define REG_A5XX_VPC_PERFCTR_VPC_SEL_3 0x00000e67 + +#define REG_A5XX_UCHE_ADDR_MODE_CNTL 0x00000e80 + +#define REG_A5XX_UCHE_SVM_CNTL 0x00000e82 + +#define REG_A5XX_UCHE_WRITE_THRU_BASE_LO 0x00000e87 + +#define REG_A5XX_UCHE_WRITE_THRU_BASE_HI 0x00000e88 + +#define REG_A5XX_UCHE_TRAP_BASE_LO 0x00000e89 + +#define REG_A5XX_UCHE_TRAP_BASE_HI 0x00000e8a + +#define REG_A5XX_UCHE_GMEM_RANGE_MIN_LO 0x00000e8b + +#define REG_A5XX_UCHE_GMEM_RANGE_MIN_HI 0x00000e8c + +#define REG_A5XX_UCHE_GMEM_RANGE_MAX_LO 0x00000e8d + +#define REG_A5XX_UCHE_GMEM_RANGE_MAX_HI 0x00000e8e + +#define REG_A5XX_UCHE_DBG_ECO_CNTL_2 0x00000e8f + +#define REG_A5XX_UCHE_DBG_ECO_CNTL 0x00000e90 + +#define REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO 0x00000e91 + +#define REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_HI 0x00000e92 + +#define REG_A5XX_UCHE_CACHE_INVALIDATE_MAX_LO 0x00000e93 + +#define REG_A5XX_UCHE_CACHE_INVALIDATE_MAX_HI 0x00000e94 + +#define REG_A5XX_UCHE_CACHE_INVALIDATE 0x00000e95 + +#define REG_A5XX_UCHE_CACHE_WAYS 0x00000e96 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000ea0 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000ea1 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000ea2 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000ea3 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000ea4 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000ea5 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000ea6 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000ea7 + +#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_0 0x00000ea8 + +#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_1 0x00000ea9 + +#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_2 0x00000eaa + +#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_3 0x00000eab + +#define REG_A5XX_UCHE_TRAP_LOG_LO 0x00000eb1 + +#define REG_A5XX_UCHE_TRAP_LOG_HI 0x00000eb2 + +#define REG_A5XX_SP_DBG_ECO_CNTL 0x00000ec0 + +#define REG_A5XX_SP_ADDR_MODE_CNTL 0x00000ec1 + +#define REG_A5XX_SP_MODE_CNTL 0x00000ec2 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_0 0x00000ed0 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_1 0x00000ed1 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_2 0x00000ed2 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_3 0x00000ed3 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_4 0x00000ed4 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_5 0x00000ed5 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_6 0x00000ed6 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_7 0x00000ed7 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_8 0x00000ed8 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_9 0x00000ed9 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_10 0x00000eda + +#define REG_A5XX_SP_PERFCTR_SP_SEL_11 0x00000edb + +#define REG_A5XX_SP_POWERCTR_SP_SEL_0 0x00000edc + +#define REG_A5XX_SP_POWERCTR_SP_SEL_1 0x00000edd + +#define REG_A5XX_SP_POWERCTR_SP_SEL_2 0x00000ede + +#define REG_A5XX_SP_POWERCTR_SP_SEL_3 0x00000edf + +#define REG_A5XX_TPL1_ADDR_MODE_CNTL 0x00000f01 + +#define REG_A5XX_TPL1_MODE_CNTL 0x00000f02 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_0 0x00000f10 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_1 0x00000f11 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_2 0x00000f12 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_3 0x00000f13 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_4 0x00000f14 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_5 0x00000f15 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_6 0x00000f16 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_7 0x00000f17 + +#define REG_A5XX_TPL1_POWERCTR_TP_SEL_0 0x00000f18 + +#define REG_A5XX_TPL1_POWERCTR_TP_SEL_1 0x00000f19 + +#define REG_A5XX_TPL1_POWERCTR_TP_SEL_2 0x00000f1a + +#define REG_A5XX_TPL1_POWERCTR_TP_SEL_3 0x00000f1b + +#define REG_A5XX_VBIF_VERSION 0x00003000 + +#define REG_A5XX_VBIF_CLKON 0x00003001 + +#define REG_A5XX_VBIF_ABIT_SORT 0x00003028 + +#define REG_A5XX_VBIF_ABIT_SORT_CONF 0x00003029 + +#define REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB 0x00003049 + +#define REG_A5XX_VBIF_GATE_OFF_WRREQ_EN 0x0000302a + +#define REG_A5XX_VBIF_IN_RD_LIM_CONF0 0x0000302c + +#define REG_A5XX_VBIF_IN_RD_LIM_CONF1 0x0000302d + +#define REG_A5XX_VBIF_XIN_HALT_CTRL0 0x00003080 + +#define REG_A5XX_VBIF_XIN_HALT_CTRL1 0x00003081 + +#define REG_A5XX_VBIF_TEST_BUS_OUT_CTRL 0x00003084 + +#define REG_A5XX_VBIF_TEST_BUS1_CTRL0 0x00003085 + +#define REG_A5XX_VBIF_TEST_BUS1_CTRL1 0x00003086 + +#define REG_A5XX_VBIF_TEST_BUS2_CTRL0 0x00003087 + +#define REG_A5XX_VBIF_TEST_BUS2_CTRL1 0x00003088 + +#define REG_A5XX_VBIF_TEST_BUS_OUT 0x0000308c + +#define REG_A5XX_VBIF_PERF_CNT_EN0 0x000030c0 + +#define REG_A5XX_VBIF_PERF_CNT_EN1 0x000030c1 + +#define REG_A5XX_VBIF_PERF_CNT_EN2 0x000030c2 + +#define REG_A5XX_VBIF_PERF_CNT_EN3 0x000030c3 + +#define REG_A5XX_VBIF_PERF_CNT_SEL0 0x000030d0 + +#define REG_A5XX_VBIF_PERF_CNT_SEL1 0x000030d1 + +#define REG_A5XX_VBIF_PERF_CNT_SEL2 0x000030d2 + +#define REG_A5XX_VBIF_PERF_CNT_SEL3 0x000030d3 + +#define REG_A5XX_VBIF_PERF_CNT_LOW0 0x000030d8 + +#define REG_A5XX_VBIF_PERF_CNT_LOW1 0x000030d9 + +#define REG_A5XX_VBIF_PERF_CNT_LOW2 0x000030da + +#define REG_A5XX_VBIF_PERF_CNT_LOW3 0x000030db + +#define REG_A5XX_VBIF_PERF_CNT_HIGH0 0x000030e0 + +#define REG_A5XX_VBIF_PERF_CNT_HIGH1 0x000030e1 + +#define REG_A5XX_VBIF_PERF_CNT_HIGH2 0x000030e2 + +#define REG_A5XX_VBIF_PERF_CNT_HIGH3 0x000030e3 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_EN0 0x00003100 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_EN1 0x00003101 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_EN2 0x00003102 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW0 0x00003110 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW1 0x00003111 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW2 0x00003112 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH0 0x00003118 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH1 0x00003119 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH2 0x0000311a + +#define REG_A5XX_GPMU_INST_RAM_BASE 0x00008800 + +#define REG_A5XX_GPMU_DATA_RAM_BASE 0x00009800 + +#define REG_A5XX_GPMU_SP_POWER_CNTL 0x0000a881 + +#define REG_A5XX_GPMU_RBCCU_CLOCK_CNTL 0x0000a886 + +#define REG_A5XX_GPMU_RBCCU_POWER_CNTL 0x0000a887 + +#define REG_A5XX_GPMU_SP_PWR_CLK_STATUS 0x0000a88b +#define A5XX_GPMU_SP_PWR_CLK_STATUS_PWR_ON 0x00100000 + +#define REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS 0x0000a88d +#define A5XX_GPMU_RBCCU_PWR_CLK_STATUS_PWR_ON 0x00100000 + +#define REG_A5XX_GPMU_PWR_COL_STAGGER_DELAY 0x0000a891 + +#define REG_A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL 0x0000a892 + +#define REG_A5XX_GPMU_PWR_COL_INTER_FRAME_HYST 0x0000a893 + +#define REG_A5XX_GPMU_PWR_COL_BINNING_CTRL 0x0000a894 + +#define REG_A5XX_GPMU_CLOCK_THROTTLE_CTRL 0x0000a8a3 + +#define REG_A5XX_GPMU_WFI_CONFIG 0x0000a8c1 + +#define REG_A5XX_GPMU_RBBM_INTR_INFO 0x0000a8d6 + +#define REG_A5XX_GPMU_CM3_SYSRESET 0x0000a8d8 + +#define REG_A5XX_GPMU_GENERAL_0 0x0000a8e0 + +#define REG_A5XX_GPMU_GENERAL_1 0x0000a8e1 + +#define REG_A5XX_SP_POWER_COUNTER_0_LO 0x0000a840 + +#define REG_A5XX_SP_POWER_COUNTER_0_HI 0x0000a841 + +#define REG_A5XX_SP_POWER_COUNTER_1_LO 0x0000a842 + +#define REG_A5XX_SP_POWER_COUNTER_1_HI 0x0000a843 + +#define REG_A5XX_SP_POWER_COUNTER_2_LO 0x0000a844 + +#define REG_A5XX_SP_POWER_COUNTER_2_HI 0x0000a845 + +#define REG_A5XX_SP_POWER_COUNTER_3_LO 0x0000a846 + +#define REG_A5XX_SP_POWER_COUNTER_3_HI 0x0000a847 + +#define REG_A5XX_TP_POWER_COUNTER_0_LO 0x0000a848 + +#define REG_A5XX_TP_POWER_COUNTER_0_HI 0x0000a849 + +#define REG_A5XX_TP_POWER_COUNTER_1_LO 0x0000a84a + +#define REG_A5XX_TP_POWER_COUNTER_1_HI 0x0000a84b + +#define REG_A5XX_TP_POWER_COUNTER_2_LO 0x0000a84c + +#define REG_A5XX_TP_POWER_COUNTER_2_HI 0x0000a84d + +#define REG_A5XX_TP_POWER_COUNTER_3_LO 0x0000a84e + +#define REG_A5XX_TP_POWER_COUNTER_3_HI 0x0000a84f + +#define REG_A5XX_RB_POWER_COUNTER_0_LO 0x0000a850 + +#define REG_A5XX_RB_POWER_COUNTER_0_HI 0x0000a851 + +#define REG_A5XX_RB_POWER_COUNTER_1_LO 0x0000a852 + +#define REG_A5XX_RB_POWER_COUNTER_1_HI 0x0000a853 + +#define REG_A5XX_RB_POWER_COUNTER_2_LO 0x0000a854 + +#define REG_A5XX_RB_POWER_COUNTER_2_HI 0x0000a855 + +#define REG_A5XX_RB_POWER_COUNTER_3_LO 0x0000a856 + +#define REG_A5XX_RB_POWER_COUNTER_3_HI 0x0000a857 + +#define REG_A5XX_CCU_POWER_COUNTER_0_LO 0x0000a858 + +#define REG_A5XX_CCU_POWER_COUNTER_0_HI 0x0000a859 + +#define REG_A5XX_CCU_POWER_COUNTER_1_LO 0x0000a85a + +#define REG_A5XX_CCU_POWER_COUNTER_1_HI 0x0000a85b + +#define REG_A5XX_UCHE_POWER_COUNTER_0_LO 0x0000a85c + +#define REG_A5XX_UCHE_POWER_COUNTER_0_HI 0x0000a85d + +#define REG_A5XX_UCHE_POWER_COUNTER_1_LO 0x0000a85e + +#define REG_A5XX_UCHE_POWER_COUNTER_1_HI 0x0000a85f + +#define REG_A5XX_UCHE_POWER_COUNTER_2_LO 0x0000a860 + +#define REG_A5XX_UCHE_POWER_COUNTER_2_HI 0x0000a861 + +#define REG_A5XX_UCHE_POWER_COUNTER_3_LO 0x0000a862 + +#define REG_A5XX_UCHE_POWER_COUNTER_3_HI 0x0000a863 + +#define REG_A5XX_CP_POWER_COUNTER_0_LO 0x0000a864 + +#define REG_A5XX_CP_POWER_COUNTER_0_HI 0x0000a865 + +#define REG_A5XX_CP_POWER_COUNTER_1_LO 0x0000a866 + +#define REG_A5XX_CP_POWER_COUNTER_1_HI 0x0000a867 + +#define REG_A5XX_CP_POWER_COUNTER_2_LO 0x0000a868 + +#define REG_A5XX_CP_POWER_COUNTER_2_HI 0x0000a869 + +#define REG_A5XX_CP_POWER_COUNTER_3_LO 0x0000a86a + +#define REG_A5XX_CP_POWER_COUNTER_3_HI 0x0000a86b + +#define REG_A5XX_GPMU_POWER_COUNTER_0_LO 0x0000a86c + +#define REG_A5XX_GPMU_POWER_COUNTER_0_HI 0x0000a86d + +#define REG_A5XX_GPMU_POWER_COUNTER_1_LO 0x0000a86e + +#define REG_A5XX_GPMU_POWER_COUNTER_1_HI 0x0000a86f + +#define REG_A5XX_GPMU_POWER_COUNTER_2_LO 0x0000a870 + +#define REG_A5XX_GPMU_POWER_COUNTER_2_HI 0x0000a871 + +#define REG_A5XX_GPMU_POWER_COUNTER_3_LO 0x0000a872 + +#define REG_A5XX_GPMU_POWER_COUNTER_3_HI 0x0000a873 + +#define REG_A5XX_GPMU_POWER_COUNTER_4_LO 0x0000a874 + +#define REG_A5XX_GPMU_POWER_COUNTER_4_HI 0x0000a875 + +#define REG_A5XX_GPMU_POWER_COUNTER_5_LO 0x0000a876 + +#define REG_A5XX_GPMU_POWER_COUNTER_5_HI 0x0000a877 + +#define REG_A5XX_GPMU_POWER_COUNTER_ENABLE 0x0000a878 + +#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_LO 0x0000a879 + +#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_HI 0x0000a87a + +#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_RESET 0x0000a87b + +#define REG_A5XX_GPMU_POWER_COUNTER_SELECT_0 0x0000a87c + +#define REG_A5XX_GPMU_POWER_COUNTER_SELECT_1 0x0000a87d + +#define REG_A5XX_GPMU_CLOCK_THROTTLE_CTRL 0x0000a8a3 + +#define REG_A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL 0x0000a8a8 + +#define REG_A5XX_GPMU_TEMP_SENSOR_ID 0x0000ac00 + +#define REG_A5XX_GPMU_TEMP_SENSOR_CONFIG 0x0000ac01 + +#define REG_A5XX_GPMU_TEMP_VAL 0x0000ac02 + +#define REG_A5XX_GPMU_DELTA_TEMP_THRESHOLD 0x0000ac03 + +#define REG_A5XX_GPMU_TEMP_THRESHOLD_INTR_STATUS 0x0000ac05 + +#define REG_A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK 0x0000ac06 + +#define REG_A5XX_GPMU_LEAKAGE_TEMP_COEFF_0_1 0x0000ac40 + +#define REG_A5XX_GPMU_LEAKAGE_TEMP_COEFF_2_3 0x0000ac41 + +#define REG_A5XX_GPMU_LEAKAGE_VTG_COEFF_0_1 0x0000ac42 + +#define REG_A5XX_GPMU_LEAKAGE_VTG_COEFF_2_3 0x0000ac43 + +#define REG_A5XX_GPMU_BASE_LEAKAGE 0x0000ac46 + +#define REG_A5XX_GPMU_GPMU_VOLTAGE 0x0000ac60 + +#define REG_A5XX_GPMU_GPMU_VOLTAGE_INTR_STATUS 0x0000ac61 + +#define REG_A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK 0x0000ac62 + +#define REG_A5XX_GPMU_GPMU_PWR_THRESHOLD 0x0000ac80 + +#define REG_A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL 0x0000acc4 + +#define REG_A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS 0x0000acc5 + +#define REG_A5XX_GDPM_CONFIG1 0x0000b80c + +#define REG_A5XX_GDPM_CONFIG2 0x0000b80d + +#define REG_A5XX_GDPM_INT_EN 0x0000b80f + +#define REG_A5XX_GDPM_INT_MASK 0x0000b811 + +#define REG_A5XX_GPMU_BEC_ENABLE 0x0000b9a0 + +#define REG_A5XX_GPU_CS_SENSOR_GENERAL_STATUS 0x0000c41a + +#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_0 0x0000c41d + +#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_2 0x0000c41f + +#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_4 0x0000c421 + +#define REG_A5XX_GPU_CS_ENABLE_REG 0x0000c520 + +#define REG_A5XX_GPU_CS_AMP_CALIBRATION_CONTROL1 0x0000c557 + +#define REG_A5XX_GRAS_CL_CNTL 0x0000e000 +#define A5XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z 0x00000040 + +#define REG_A5XX_UNKNOWN_E001 0x0000e001 + +#define REG_A5XX_UNKNOWN_E004 0x0000e004 + +#define REG_A5XX_GRAS_CNTL 0x0000e005 +#define A5XX_GRAS_CNTL_VARYING 0x00000001 +#define A5XX_GRAS_CNTL_UNK3 0x00000008 +#define A5XX_GRAS_CNTL_XCOORD 0x00000040 +#define A5XX_GRAS_CNTL_YCOORD 0x00000080 +#define A5XX_GRAS_CNTL_ZCOORD 0x00000100 +#define A5XX_GRAS_CNTL_WCOORD 0x00000200 + +#define REG_A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ 0x0000e006 +#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK 0x000003ff +#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT 0 +static inline uint32_t A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(uint32_t val) +{ + return ((val) << A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT) & A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK; +} +#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK 0x000ffc00 +#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT 10 +static inline uint32_t A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(uint32_t val) +{ + return ((val) << A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT) & A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK; +} + +#define REG_A5XX_GRAS_CL_VPORT_XOFFSET_0 0x0000e010 +#define A5XX_GRAS_CL_VPORT_XOFFSET_0__MASK 0xffffffff +#define A5XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT 0 +static inline uint32_t A5XX_GRAS_CL_VPORT_XOFFSET_0(float val) +{ + return ((fui(val)) << A5XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_XOFFSET_0__MASK; +} + +#define REG_A5XX_GRAS_CL_VPORT_XSCALE_0 0x0000e011 +#define A5XX_GRAS_CL_VPORT_XSCALE_0__MASK 0xffffffff +#define A5XX_GRAS_CL_VPORT_XSCALE_0__SHIFT 0 +static inline uint32_t A5XX_GRAS_CL_VPORT_XSCALE_0(float val) +{ + return ((fui(val)) << A5XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_XSCALE_0__MASK; +} + +#define REG_A5XX_GRAS_CL_VPORT_YOFFSET_0 0x0000e012 +#define A5XX_GRAS_CL_VPORT_YOFFSET_0__MASK 0xffffffff +#define A5XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT 0 +static inline uint32_t A5XX_GRAS_CL_VPORT_YOFFSET_0(float val) +{ + return ((fui(val)) << A5XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_YOFFSET_0__MASK; +} + +#define REG_A5XX_GRAS_CL_VPORT_YSCALE_0 0x0000e013 +#define A5XX_GRAS_CL_VPORT_YSCALE_0__MASK 0xffffffff +#define A5XX_GRAS_CL_VPORT_YSCALE_0__SHIFT 0 +static inline uint32_t A5XX_GRAS_CL_VPORT_YSCALE_0(float val) +{ + return ((fui(val)) << A5XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_YSCALE_0__MASK; +} + +#define REG_A5XX_GRAS_CL_VPORT_ZOFFSET_0 0x0000e014 +#define A5XX_GRAS_CL_VPORT_ZOFFSET_0__MASK 0xffffffff +#define A5XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT 0 +static inline uint32_t A5XX_GRAS_CL_VPORT_ZOFFSET_0(float val) +{ + return ((fui(val)) << A5XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_ZOFFSET_0__MASK; +} + +#define REG_A5XX_GRAS_CL_VPORT_ZSCALE_0 0x0000e015 +#define A5XX_GRAS_CL_VPORT_ZSCALE_0__MASK 0xffffffff +#define A5XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT 0 +static inline uint32_t A5XX_GRAS_CL_VPORT_ZSCALE_0(float val) +{ + return ((fui(val)) << A5XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_ZSCALE_0__MASK; +} + +#define REG_A5XX_GRAS_SU_CNTL 0x0000e090 +#define A5XX_GRAS_SU_CNTL_CULL_FRONT 0x00000001 +#define A5XX_GRAS_SU_CNTL_CULL_BACK 0x00000002 +#define A5XX_GRAS_SU_CNTL_FRONT_CW 0x00000004 +#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK 0x000007f8 +#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT 3 +static inline uint32_t A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(float val) +{ + return ((((int32_t)(val * 4.0))) << A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT) & A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; +} +#define A5XX_GRAS_SU_CNTL_POLY_OFFSET 0x00000800 +#define A5XX_GRAS_SU_CNTL_MSAA_ENABLE 0x00002000 + +#define REG_A5XX_GRAS_SU_POINT_MINMAX 0x0000e091 +#define A5XX_GRAS_SU_POINT_MINMAX_MIN__MASK 0x0000ffff +#define A5XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0 +static inline uint32_t A5XX_GRAS_SU_POINT_MINMAX_MIN(float val) +{ + return ((((uint32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A5XX_GRAS_SU_POINT_MINMAX_MIN__MASK; +} +#define A5XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000 +#define A5XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16 +static inline uint32_t A5XX_GRAS_SU_POINT_MINMAX_MAX(float val) +{ + return ((((uint32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A5XX_GRAS_SU_POINT_MINMAX_MAX__MASK; +} + +#define REG_A5XX_GRAS_SU_POINT_SIZE 0x0000e092 +#define A5XX_GRAS_SU_POINT_SIZE__MASK 0xffffffff +#define A5XX_GRAS_SU_POINT_SIZE__SHIFT 0 +static inline uint32_t A5XX_GRAS_SU_POINT_SIZE(float val) +{ + return ((((int32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_SIZE__SHIFT) & A5XX_GRAS_SU_POINT_SIZE__MASK; +} + +#define REG_A5XX_UNKNOWN_E093 0x0000e093 + +#define REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL 0x0000e094 +#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 +#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1 0x00000002 + +#define REG_A5XX_GRAS_SU_POLY_OFFSET_SCALE 0x0000e095 +#define A5XX_GRAS_SU_POLY_OFFSET_SCALE__MASK 0xffffffff +#define A5XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT 0 +static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_SCALE(float val) +{ + return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_SCALE__MASK; +} + +#define REG_A5XX_GRAS_SU_POLY_OFFSET_OFFSET 0x0000e096 +#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK 0xffffffff +#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0 +static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_OFFSET(float val) +{ + return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; +} + +#define REG_A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP 0x0000e097 +#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK 0xffffffff +#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT 0 +static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(float val) +{ + return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK; +} + +#define REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO 0x0000e098 +#define A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK 0x00000007 +#define A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT 0 +static inline uint32_t A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a5xx_depth_format val) +{ + return ((val) << A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK; +} + +#define REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL 0x0000e099 + +#define REG_A5XX_GRAS_SC_CNTL 0x0000e0a0 +#define A5XX_GRAS_SC_CNTL_BINNING_PASS 0x00000001 +#define A5XX_GRAS_SC_CNTL_SAMPLES_PASSED 0x00008000 + +#define REG_A5XX_GRAS_SC_BIN_CNTL 0x0000e0a1 + +#define REG_A5XX_GRAS_SC_RAS_MSAA_CNTL 0x0000e0a2 +#define A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__MASK; +} + +#define REG_A5XX_GRAS_SC_DEST_MSAA_CNTL 0x0000e0a3 +#define A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__MASK; +} +#define A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 + +#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_CNTL 0x0000e0a4 + +#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0 0x0000e0aa +#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK 0x00007fff +#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK; +} +#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK 0x7fff0000 +#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT 16 +static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK; +} + +#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0 0x0000e0ab +#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK 0x00007fff +#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK; +} +#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK 0x7fff0000 +#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT 16 +static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK; +} + +#define REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0 0x0000e0ca +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK 0x00007fff +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK; +} +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK 0x7fff0000 +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT 16 +static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK; +} + +#define REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0 0x0000e0cb +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK 0x00007fff +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK; +} +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK 0x7fff0000 +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT 16 +static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK; +} + +#define REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL 0x0000e0ea +#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff +#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK; +} +#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000 +#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16 +static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK; +} + +#define REG_A5XX_GRAS_SC_WINDOW_SCISSOR_BR 0x0000e0eb +#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff +#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK; +} +#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000 +#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16 +static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK; +} + +#define REG_A5XX_GRAS_LRZ_CNTL 0x0000e100 +#define A5XX_GRAS_LRZ_CNTL_ENABLE 0x00000001 +#define A5XX_GRAS_LRZ_CNTL_LRZ_WRITE 0x00000002 +#define A5XX_GRAS_LRZ_CNTL_GREATER 0x00000004 + +#define REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO 0x0000e101 + +#define REG_A5XX_GRAS_LRZ_BUFFER_BASE_HI 0x0000e102 + +#define REG_A5XX_GRAS_LRZ_BUFFER_PITCH 0x0000e103 +#define A5XX_GRAS_LRZ_BUFFER_PITCH__MASK 0xffffffff +#define A5XX_GRAS_LRZ_BUFFER_PITCH__SHIFT 0 +static inline uint32_t A5XX_GRAS_LRZ_BUFFER_PITCH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A5XX_GRAS_LRZ_BUFFER_PITCH__SHIFT) & A5XX_GRAS_LRZ_BUFFER_PITCH__MASK; +} + +#define REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO 0x0000e104 + +#define REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI 0x0000e105 + +#define REG_A5XX_RB_CNTL 0x0000e140 +#define A5XX_RB_CNTL_WIDTH__MASK 0x000000ff +#define A5XX_RB_CNTL_WIDTH__SHIFT 0 +static inline uint32_t A5XX_RB_CNTL_WIDTH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A5XX_RB_CNTL_WIDTH__SHIFT) & A5XX_RB_CNTL_WIDTH__MASK; +} +#define A5XX_RB_CNTL_HEIGHT__MASK 0x0001fe00 +#define A5XX_RB_CNTL_HEIGHT__SHIFT 9 +static inline uint32_t A5XX_RB_CNTL_HEIGHT(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A5XX_RB_CNTL_HEIGHT__SHIFT) & A5XX_RB_CNTL_HEIGHT__MASK; +} +#define A5XX_RB_CNTL_BYPASS 0x00020000 + +#define REG_A5XX_RB_RENDER_CNTL 0x0000e141 +#define A5XX_RB_RENDER_CNTL_BINNING_PASS 0x00000001 +#define A5XX_RB_RENDER_CNTL_SAMPLES_PASSED 0x00000040 +#define A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE 0x00000080 +#define A5XX_RB_RENDER_CNTL_FLAG_DEPTH 0x00004000 +#define A5XX_RB_RENDER_CNTL_FLAG_DEPTH2 0x00008000 +#define A5XX_RB_RENDER_CNTL_FLAG_MRTS__MASK 0x00ff0000 +#define A5XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT 16 +static inline uint32_t A5XX_RB_RENDER_CNTL_FLAG_MRTS(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT) & A5XX_RB_RENDER_CNTL_FLAG_MRTS__MASK; +} +#define A5XX_RB_RENDER_CNTL_FLAG_MRTS2__MASK 0xff000000 +#define A5XX_RB_RENDER_CNTL_FLAG_MRTS2__SHIFT 24 +static inline uint32_t A5XX_RB_RENDER_CNTL_FLAG_MRTS2(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_CNTL_FLAG_MRTS2__SHIFT) & A5XX_RB_RENDER_CNTL_FLAG_MRTS2__MASK; +} + +#define REG_A5XX_RB_RAS_MSAA_CNTL 0x0000e142 +#define A5XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A5XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A5XX_RB_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A5XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK; +} + +#define REG_A5XX_RB_DEST_MSAA_CNTL 0x0000e143 +#define A5XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A5XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A5XX_RB_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A5XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK; +} +#define A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 + +#define REG_A5XX_RB_RENDER_CONTROL0 0x0000e144 +#define A5XX_RB_RENDER_CONTROL0_VARYING 0x00000001 +#define A5XX_RB_RENDER_CONTROL0_UNK3 0x00000008 +#define A5XX_RB_RENDER_CONTROL0_XCOORD 0x00000040 +#define A5XX_RB_RENDER_CONTROL0_YCOORD 0x00000080 +#define A5XX_RB_RENDER_CONTROL0_ZCOORD 0x00000100 +#define A5XX_RB_RENDER_CONTROL0_WCOORD 0x00000200 + +#define REG_A5XX_RB_RENDER_CONTROL1 0x0000e145 +#define A5XX_RB_RENDER_CONTROL1_FACENESS 0x00000002 + +#define REG_A5XX_RB_FS_OUTPUT_CNTL 0x0000e146 +#define A5XX_RB_FS_OUTPUT_CNTL_MRT__MASK 0x0000000f +#define A5XX_RB_FS_OUTPUT_CNTL_MRT__SHIFT 0 +static inline uint32_t A5XX_RB_FS_OUTPUT_CNTL_MRT(uint32_t val) +{ + return ((val) << A5XX_RB_FS_OUTPUT_CNTL_MRT__SHIFT) & A5XX_RB_FS_OUTPUT_CNTL_MRT__MASK; +} +#define A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z 0x00000020 + +#define REG_A5XX_RB_RENDER_COMPONENTS 0x0000e147 +#define A5XX_RB_RENDER_COMPONENTS_RT0__MASK 0x0000000f +#define A5XX_RB_RENDER_COMPONENTS_RT0__SHIFT 0 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT0(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT0__MASK; +} +#define A5XX_RB_RENDER_COMPONENTS_RT1__MASK 0x000000f0 +#define A5XX_RB_RENDER_COMPONENTS_RT1__SHIFT 4 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT1(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT1__MASK; +} +#define A5XX_RB_RENDER_COMPONENTS_RT2__MASK 0x00000f00 +#define A5XX_RB_RENDER_COMPONENTS_RT2__SHIFT 8 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT2(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT2__MASK; +} +#define A5XX_RB_RENDER_COMPONENTS_RT3__MASK 0x0000f000 +#define A5XX_RB_RENDER_COMPONENTS_RT3__SHIFT 12 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT3(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT3__MASK; +} +#define A5XX_RB_RENDER_COMPONENTS_RT4__MASK 0x000f0000 +#define A5XX_RB_RENDER_COMPONENTS_RT4__SHIFT 16 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT4(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT4__MASK; +} +#define A5XX_RB_RENDER_COMPONENTS_RT5__MASK 0x00f00000 +#define A5XX_RB_RENDER_COMPONENTS_RT5__SHIFT 20 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT5(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT5__MASK; +} +#define A5XX_RB_RENDER_COMPONENTS_RT6__MASK 0x0f000000 +#define A5XX_RB_RENDER_COMPONENTS_RT6__SHIFT 24 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT6(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT6__MASK; +} +#define A5XX_RB_RENDER_COMPONENTS_RT7__MASK 0xf0000000 +#define A5XX_RB_RENDER_COMPONENTS_RT7__SHIFT 28 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT7(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT7__MASK; +} + +static inline uint32_t REG_A5XX_RB_MRT(uint32_t i0) { return 0x0000e150 + 0x7*i0; } + +static inline uint32_t REG_A5XX_RB_MRT_CONTROL(uint32_t i0) { return 0x0000e150 + 0x7*i0; } +#define A5XX_RB_MRT_CONTROL_BLEND 0x00000001 +#define A5XX_RB_MRT_CONTROL_BLEND2 0x00000002 +#define A5XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000004 +#define A5XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000078 +#define A5XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 3 +static inline uint32_t A5XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) +{ + return ((val) << A5XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A5XX_RB_MRT_CONTROL_ROP_CODE__MASK; +} +#define A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x00000780 +#define A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 7 +static inline uint32_t A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) +{ + return ((val) << A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; +} + +static inline uint32_t REG_A5XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x0000e151 + 0x7*i0; } +#define A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f +#define A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 +static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK; +} +#define A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK 0x000000e0 +#define A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT 5 +static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) +{ + return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK; +} +#define A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK 0x00001f00 +#define A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT 8 +static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK; +} +#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK 0x001f0000 +#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT 16 +static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK; +} +#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK 0x00e00000 +#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT 21 +static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) +{ + return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK; +} +#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK 0x1f000000 +#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT 24 +static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK; +} + +static inline uint32_t REG_A5XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x0000e152 + 0x7*i0; } +#define A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) +{ + return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK; +} +#define A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK 0x00000300 +#define A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT 8 +static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a5xx_tile_mode val) +{ + return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK; +} +#define A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK 0x00006000 +#define A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT 13 +static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK; +} +#define A5XX_RB_MRT_BUF_INFO_COLOR_SRGB 0x00008000 + +static inline uint32_t REG_A5XX_RB_MRT_PITCH(uint32_t i0) { return 0x0000e153 + 0x7*i0; } +#define A5XX_RB_MRT_PITCH__MASK 0xffffffff +#define A5XX_RB_MRT_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_MRT_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_MRT_PITCH__SHIFT) & A5XX_RB_MRT_PITCH__MASK; +} + +static inline uint32_t REG_A5XX_RB_MRT_ARRAY_PITCH(uint32_t i0) { return 0x0000e154 + 0x7*i0; } +#define A5XX_RB_MRT_ARRAY_PITCH__MASK 0xffffffff +#define A5XX_RB_MRT_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_MRT_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_MRT_ARRAY_PITCH__SHIFT) & A5XX_RB_MRT_ARRAY_PITCH__MASK; +} + +static inline uint32_t REG_A5XX_RB_MRT_BASE_LO(uint32_t i0) { return 0x0000e155 + 0x7*i0; } + +static inline uint32_t REG_A5XX_RB_MRT_BASE_HI(uint32_t i0) { return 0x0000e156 + 0x7*i0; } + +#define REG_A5XX_RB_BLEND_RED 0x0000e1a0 +#define A5XX_RB_BLEND_RED_UINT__MASK 0x000000ff +#define A5XX_RB_BLEND_RED_UINT__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_RED_UINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_RED_UINT__SHIFT) & A5XX_RB_BLEND_RED_UINT__MASK; +} +#define A5XX_RB_BLEND_RED_SINT__MASK 0x0000ff00 +#define A5XX_RB_BLEND_RED_SINT__SHIFT 8 +static inline uint32_t A5XX_RB_BLEND_RED_SINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_RED_SINT__SHIFT) & A5XX_RB_BLEND_RED_SINT__MASK; +} +#define A5XX_RB_BLEND_RED_FLOAT__MASK 0xffff0000 +#define A5XX_RB_BLEND_RED_FLOAT__SHIFT 16 +static inline uint32_t A5XX_RB_BLEND_RED_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A5XX_RB_BLEND_RED_FLOAT__SHIFT) & A5XX_RB_BLEND_RED_FLOAT__MASK; +} + +#define REG_A5XX_RB_BLEND_RED_F32 0x0000e1a1 +#define A5XX_RB_BLEND_RED_F32__MASK 0xffffffff +#define A5XX_RB_BLEND_RED_F32__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_RED_F32(float val) +{ + return ((fui(val)) << A5XX_RB_BLEND_RED_F32__SHIFT) & A5XX_RB_BLEND_RED_F32__MASK; +} + +#define REG_A5XX_RB_BLEND_GREEN 0x0000e1a2 +#define A5XX_RB_BLEND_GREEN_UINT__MASK 0x000000ff +#define A5XX_RB_BLEND_GREEN_UINT__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_GREEN_UINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_GREEN_UINT__SHIFT) & A5XX_RB_BLEND_GREEN_UINT__MASK; +} +#define A5XX_RB_BLEND_GREEN_SINT__MASK 0x0000ff00 +#define A5XX_RB_BLEND_GREEN_SINT__SHIFT 8 +static inline uint32_t A5XX_RB_BLEND_GREEN_SINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_GREEN_SINT__SHIFT) & A5XX_RB_BLEND_GREEN_SINT__MASK; +} +#define A5XX_RB_BLEND_GREEN_FLOAT__MASK 0xffff0000 +#define A5XX_RB_BLEND_GREEN_FLOAT__SHIFT 16 +static inline uint32_t A5XX_RB_BLEND_GREEN_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A5XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A5XX_RB_BLEND_GREEN_FLOAT__MASK; +} + +#define REG_A5XX_RB_BLEND_GREEN_F32 0x0000e1a3 +#define A5XX_RB_BLEND_GREEN_F32__MASK 0xffffffff +#define A5XX_RB_BLEND_GREEN_F32__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_GREEN_F32(float val) +{ + return ((fui(val)) << A5XX_RB_BLEND_GREEN_F32__SHIFT) & A5XX_RB_BLEND_GREEN_F32__MASK; +} + +#define REG_A5XX_RB_BLEND_BLUE 0x0000e1a4 +#define A5XX_RB_BLEND_BLUE_UINT__MASK 0x000000ff +#define A5XX_RB_BLEND_BLUE_UINT__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_BLUE_UINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_BLUE_UINT__SHIFT) & A5XX_RB_BLEND_BLUE_UINT__MASK; +} +#define A5XX_RB_BLEND_BLUE_SINT__MASK 0x0000ff00 +#define A5XX_RB_BLEND_BLUE_SINT__SHIFT 8 +static inline uint32_t A5XX_RB_BLEND_BLUE_SINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_BLUE_SINT__SHIFT) & A5XX_RB_BLEND_BLUE_SINT__MASK; +} +#define A5XX_RB_BLEND_BLUE_FLOAT__MASK 0xffff0000 +#define A5XX_RB_BLEND_BLUE_FLOAT__SHIFT 16 +static inline uint32_t A5XX_RB_BLEND_BLUE_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A5XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A5XX_RB_BLEND_BLUE_FLOAT__MASK; +} + +#define REG_A5XX_RB_BLEND_BLUE_F32 0x0000e1a5 +#define A5XX_RB_BLEND_BLUE_F32__MASK 0xffffffff +#define A5XX_RB_BLEND_BLUE_F32__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_BLUE_F32(float val) +{ + return ((fui(val)) << A5XX_RB_BLEND_BLUE_F32__SHIFT) & A5XX_RB_BLEND_BLUE_F32__MASK; +} + +#define REG_A5XX_RB_BLEND_ALPHA 0x0000e1a6 +#define A5XX_RB_BLEND_ALPHA_UINT__MASK 0x000000ff +#define A5XX_RB_BLEND_ALPHA_UINT__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_ALPHA_UINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_ALPHA_UINT__SHIFT) & A5XX_RB_BLEND_ALPHA_UINT__MASK; +} +#define A5XX_RB_BLEND_ALPHA_SINT__MASK 0x0000ff00 +#define A5XX_RB_BLEND_ALPHA_SINT__SHIFT 8 +static inline uint32_t A5XX_RB_BLEND_ALPHA_SINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_ALPHA_SINT__SHIFT) & A5XX_RB_BLEND_ALPHA_SINT__MASK; +} +#define A5XX_RB_BLEND_ALPHA_FLOAT__MASK 0xffff0000 +#define A5XX_RB_BLEND_ALPHA_FLOAT__SHIFT 16 +static inline uint32_t A5XX_RB_BLEND_ALPHA_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A5XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A5XX_RB_BLEND_ALPHA_FLOAT__MASK; +} + +#define REG_A5XX_RB_BLEND_ALPHA_F32 0x0000e1a7 +#define A5XX_RB_BLEND_ALPHA_F32__MASK 0xffffffff +#define A5XX_RB_BLEND_ALPHA_F32__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_ALPHA_F32(float val) +{ + return ((fui(val)) << A5XX_RB_BLEND_ALPHA_F32__SHIFT) & A5XX_RB_BLEND_ALPHA_F32__MASK; +} + +#define REG_A5XX_RB_ALPHA_CONTROL 0x0000e1a8 +#define A5XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK 0x000000ff +#define A5XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT 0 +static inline uint32_t A5XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val) +{ + return ((val) << A5XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A5XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK; +} +#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST 0x00000100 +#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK 0x00000e00 +#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT 9 +static inline uint32_t A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val) +{ + return ((val) << A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK; +} + +#define REG_A5XX_RB_BLEND_CNTL 0x0000e1a9 +#define A5XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK 0x000000ff +#define A5XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_CNTL_ENABLE_BLEND(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT) & A5XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK; +} +#define A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND 0x00000100 +#define A5XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK 0xffff0000 +#define A5XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT 16 +static inline uint32_t A5XX_RB_BLEND_CNTL_SAMPLE_MASK(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT) & A5XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK; +} + +#define REG_A5XX_RB_DEPTH_PLANE_CNTL 0x0000e1b0 +#define A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 +#define A5XX_RB_DEPTH_PLANE_CNTL_UNK1 0x00000002 + +#define REG_A5XX_RB_DEPTH_CNTL 0x0000e1b1 +#define A5XX_RB_DEPTH_CNTL_Z_ENABLE 0x00000001 +#define A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE 0x00000002 +#define A5XX_RB_DEPTH_CNTL_ZFUNC__MASK 0x0000001c +#define A5XX_RB_DEPTH_CNTL_ZFUNC__SHIFT 2 +static inline uint32_t A5XX_RB_DEPTH_CNTL_ZFUNC(enum adreno_compare_func val) +{ + return ((val) << A5XX_RB_DEPTH_CNTL_ZFUNC__SHIFT) & A5XX_RB_DEPTH_CNTL_ZFUNC__MASK; +} +#define A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE 0x00000040 + +#define REG_A5XX_RB_DEPTH_BUFFER_INFO 0x0000e1b2 +#define A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK 0x00000007 +#define A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT 0 +static inline uint32_t A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a5xx_depth_format val) +{ + return ((val) << A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK; +} + +#define REG_A5XX_RB_DEPTH_BUFFER_BASE_LO 0x0000e1b3 + +#define REG_A5XX_RB_DEPTH_BUFFER_BASE_HI 0x0000e1b4 + +#define REG_A5XX_RB_DEPTH_BUFFER_PITCH 0x0000e1b5 +#define A5XX_RB_DEPTH_BUFFER_PITCH__MASK 0xffffffff +#define A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_DEPTH_BUFFER_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_PITCH__MASK; +} + +#define REG_A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH 0x0000e1b6 +#define A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK 0xffffffff +#define A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_STENCIL_CONTROL 0x0000e1c0 +#define A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE 0x00000001 +#define A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF 0x00000002 +#define A5XX_RB_STENCIL_CONTROL_STENCIL_READ 0x00000004 +#define A5XX_RB_STENCIL_CONTROL_FUNC__MASK 0x00000700 +#define A5XX_RB_STENCIL_CONTROL_FUNC__SHIFT 8 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A5XX_RB_STENCIL_CONTROL_FUNC__MASK; +} +#define A5XX_RB_STENCIL_CONTROL_FAIL__MASK 0x00003800 +#define A5XX_RB_STENCIL_CONTROL_FAIL__SHIFT 11 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A5XX_RB_STENCIL_CONTROL_FAIL__MASK; +} +#define A5XX_RB_STENCIL_CONTROL_ZPASS__MASK 0x0001c000 +#define A5XX_RB_STENCIL_CONTROL_ZPASS__SHIFT 14 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZPASS__MASK; +} +#define A5XX_RB_STENCIL_CONTROL_ZFAIL__MASK 0x000e0000 +#define A5XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT 17 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZFAIL__MASK; +} +#define A5XX_RB_STENCIL_CONTROL_FUNC_BF__MASK 0x00700000 +#define A5XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT 20 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_FUNC_BF__MASK; +} +#define A5XX_RB_STENCIL_CONTROL_FAIL_BF__MASK 0x03800000 +#define A5XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT 23 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_FAIL_BF__MASK; +} +#define A5XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK 0x1c000000 +#define A5XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT 26 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK; +} +#define A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK 0xe0000000 +#define A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT 29 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK; +} + +#define REG_A5XX_RB_STENCIL_INFO 0x0000e1c1 +#define A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL 0x00000001 + +#define REG_A5XX_RB_STENCIL_BASE_LO 0x0000e1c2 + +#define REG_A5XX_RB_STENCIL_BASE_HI 0x0000e1c3 + +#define REG_A5XX_RB_STENCIL_PITCH 0x0000e1c4 +#define A5XX_RB_STENCIL_PITCH__MASK 0xffffffff +#define A5XX_RB_STENCIL_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_STENCIL_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_STENCIL_PITCH__SHIFT) & A5XX_RB_STENCIL_PITCH__MASK; +} + +#define REG_A5XX_RB_STENCIL_ARRAY_PITCH 0x0000e1c5 +#define A5XX_RB_STENCIL_ARRAY_PITCH__MASK 0xffffffff +#define A5XX_RB_STENCIL_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_STENCIL_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_STENCIL_ARRAY_PITCH__SHIFT) & A5XX_RB_STENCIL_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_STENCILREFMASK 0x0000e1c6 +#define A5XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff +#define A5XX_RB_STENCILREFMASK_STENCILREF__SHIFT 0 +static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILREF(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILREF__MASK; +} +#define A5XX_RB_STENCILREFMASK_STENCILMASK__MASK 0x0000ff00 +#define A5XX_RB_STENCILREFMASK_STENCILMASK__SHIFT 8 +static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILMASK__MASK; +} +#define A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK 0x00ff0000 +#define A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT 16 +static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK; +} + +#define REG_A5XX_RB_STENCILREFMASK_BF 0x0000e1c7 +#define A5XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff +#define A5XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT 0 +static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILREF__MASK; +} +#define A5XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK 0x0000ff00 +#define A5XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT 8 +static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK; +} +#define A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK 0x00ff0000 +#define A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT 16 +static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK; +} + +#define REG_A5XX_RB_WINDOW_OFFSET 0x0000e1d0 +#define A5XX_RB_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_RB_WINDOW_OFFSET_X__MASK 0x00007fff +#define A5XX_RB_WINDOW_OFFSET_X__SHIFT 0 +static inline uint32_t A5XX_RB_WINDOW_OFFSET_X(uint32_t val) +{ + return ((val) << A5XX_RB_WINDOW_OFFSET_X__SHIFT) & A5XX_RB_WINDOW_OFFSET_X__MASK; +} +#define A5XX_RB_WINDOW_OFFSET_Y__MASK 0x7fff0000 +#define A5XX_RB_WINDOW_OFFSET_Y__SHIFT 16 +static inline uint32_t A5XX_RB_WINDOW_OFFSET_Y(uint32_t val) +{ + return ((val) << A5XX_RB_WINDOW_OFFSET_Y__SHIFT) & A5XX_RB_WINDOW_OFFSET_Y__MASK; +} + +#define REG_A5XX_RB_SAMPLE_COUNT_CONTROL 0x0000e1d1 +#define A5XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002 + +#define REG_A5XX_RB_BLIT_CNTL 0x0000e210 +#define A5XX_RB_BLIT_CNTL_BUF__MASK 0x0000000f +#define A5XX_RB_BLIT_CNTL_BUF__SHIFT 0 +static inline uint32_t A5XX_RB_BLIT_CNTL_BUF(enum a5xx_blit_buf val) +{ + return ((val) << A5XX_RB_BLIT_CNTL_BUF__SHIFT) & A5XX_RB_BLIT_CNTL_BUF__MASK; +} + +#define REG_A5XX_RB_RESOLVE_CNTL_1 0x0000e211 +#define A5XX_RB_RESOLVE_CNTL_1_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_RB_RESOLVE_CNTL_1_X__MASK 0x00007fff +#define A5XX_RB_RESOLVE_CNTL_1_X__SHIFT 0 +static inline uint32_t A5XX_RB_RESOLVE_CNTL_1_X(uint32_t val) +{ + return ((val) << A5XX_RB_RESOLVE_CNTL_1_X__SHIFT) & A5XX_RB_RESOLVE_CNTL_1_X__MASK; +} +#define A5XX_RB_RESOLVE_CNTL_1_Y__MASK 0x7fff0000 +#define A5XX_RB_RESOLVE_CNTL_1_Y__SHIFT 16 +static inline uint32_t A5XX_RB_RESOLVE_CNTL_1_Y(uint32_t val) +{ + return ((val) << A5XX_RB_RESOLVE_CNTL_1_Y__SHIFT) & A5XX_RB_RESOLVE_CNTL_1_Y__MASK; +} + +#define REG_A5XX_RB_RESOLVE_CNTL_2 0x0000e212 +#define A5XX_RB_RESOLVE_CNTL_2_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_RB_RESOLVE_CNTL_2_X__MASK 0x00007fff +#define A5XX_RB_RESOLVE_CNTL_2_X__SHIFT 0 +static inline uint32_t A5XX_RB_RESOLVE_CNTL_2_X(uint32_t val) +{ + return ((val) << A5XX_RB_RESOLVE_CNTL_2_X__SHIFT) & A5XX_RB_RESOLVE_CNTL_2_X__MASK; +} +#define A5XX_RB_RESOLVE_CNTL_2_Y__MASK 0x7fff0000 +#define A5XX_RB_RESOLVE_CNTL_2_Y__SHIFT 16 +static inline uint32_t A5XX_RB_RESOLVE_CNTL_2_Y(uint32_t val) +{ + return ((val) << A5XX_RB_RESOLVE_CNTL_2_Y__SHIFT) & A5XX_RB_RESOLVE_CNTL_2_Y__MASK; +} + +#define REG_A5XX_RB_RESOLVE_CNTL_3 0x0000e213 + +#define REG_A5XX_RB_BLIT_DST_LO 0x0000e214 + +#define REG_A5XX_RB_BLIT_DST_HI 0x0000e215 + +#define REG_A5XX_RB_BLIT_DST_PITCH 0x0000e216 +#define A5XX_RB_BLIT_DST_PITCH__MASK 0xffffffff +#define A5XX_RB_BLIT_DST_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_BLIT_DST_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_BLIT_DST_PITCH__SHIFT) & A5XX_RB_BLIT_DST_PITCH__MASK; +} + +#define REG_A5XX_RB_BLIT_DST_ARRAY_PITCH 0x0000e217 +#define A5XX_RB_BLIT_DST_ARRAY_PITCH__MASK 0xffffffff +#define A5XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_BLIT_DST_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT) & A5XX_RB_BLIT_DST_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_CLEAR_COLOR_DW0 0x0000e218 + +#define REG_A5XX_RB_CLEAR_COLOR_DW1 0x0000e219 + +#define REG_A5XX_RB_CLEAR_COLOR_DW2 0x0000e21a + +#define REG_A5XX_RB_CLEAR_COLOR_DW3 0x0000e21b + +#define REG_A5XX_RB_CLEAR_CNTL 0x0000e21c +#define A5XX_RB_CLEAR_CNTL_FAST_CLEAR 0x00000002 +#define A5XX_RB_CLEAR_CNTL_MASK__MASK 0x000000f0 +#define A5XX_RB_CLEAR_CNTL_MASK__SHIFT 4 +static inline uint32_t A5XX_RB_CLEAR_CNTL_MASK(uint32_t val) +{ + return ((val) << A5XX_RB_CLEAR_CNTL_MASK__SHIFT) & A5XX_RB_CLEAR_CNTL_MASK__MASK; +} + +#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO 0x0000e240 + +#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_HI 0x0000e241 + +#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_PITCH 0x0000e242 + +static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER(uint32_t i0) { return 0x0000e243 + 0x4*i0; } + +static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ADDR_LO(uint32_t i0) { return 0x0000e243 + 0x4*i0; } + +static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ADDR_HI(uint32_t i0) { return 0x0000e244 + 0x4*i0; } + +static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t i0) { return 0x0000e245 + 0x4*i0; } +#define A5XX_RB_MRT_FLAG_BUFFER_PITCH__MASK 0xffffffff +#define A5XX_RB_MRT_FLAG_BUFFER_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_MRT_FLAG_BUFFER_PITCH__SHIFT) & A5XX_RB_MRT_FLAG_BUFFER_PITCH__MASK; +} + +static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(uint32_t i0) { return 0x0000e246 + 0x4*i0; } +#define A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__MASK 0xffffffff +#define A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__SHIFT) & A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_BLIT_FLAG_DST_LO 0x0000e263 + +#define REG_A5XX_RB_BLIT_FLAG_DST_HI 0x0000e264 + +#define REG_A5XX_RB_BLIT_FLAG_DST_PITCH 0x0000e265 +#define A5XX_RB_BLIT_FLAG_DST_PITCH__MASK 0xffffffff +#define A5XX_RB_BLIT_FLAG_DST_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_BLIT_FLAG_DST_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_BLIT_FLAG_DST_PITCH__SHIFT) & A5XX_RB_BLIT_FLAG_DST_PITCH__MASK; +} + +#define REG_A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH 0x0000e266 +#define A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__MASK 0xffffffff +#define A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__SHIFT) & A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO 0x0000e267 + +#define REG_A5XX_RB_SAMPLE_COUNT_ADDR_HI 0x0000e268 + +#define REG_A5XX_VPC_CNTL_0 0x0000e280 +#define A5XX_VPC_CNTL_0_STRIDE_IN_VPC__MASK 0x0000007f +#define A5XX_VPC_CNTL_0_STRIDE_IN_VPC__SHIFT 0 +static inline uint32_t A5XX_VPC_CNTL_0_STRIDE_IN_VPC(uint32_t val) +{ + return ((val) << A5XX_VPC_CNTL_0_STRIDE_IN_VPC__SHIFT) & A5XX_VPC_CNTL_0_STRIDE_IN_VPC__MASK; +} +#define A5XX_VPC_CNTL_0_VARYING 0x00000800 + +static inline uint32_t REG_A5XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x0000e282 + 0x1*i0; } + +static inline uint32_t REG_A5XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x0000e282 + 0x1*i0; } + +static inline uint32_t REG_A5XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x0000e28a + 0x1*i0; } + +static inline uint32_t REG_A5XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x0000e28a + 0x1*i0; } + +#define REG_A5XX_UNKNOWN_E292 0x0000e292 + +#define REG_A5XX_UNKNOWN_E293 0x0000e293 + +static inline uint32_t REG_A5XX_VPC_VAR(uint32_t i0) { return 0x0000e294 + 0x1*i0; } + +static inline uint32_t REG_A5XX_VPC_VAR_DISABLE(uint32_t i0) { return 0x0000e294 + 0x1*i0; } + +#define REG_A5XX_VPC_GS_SIV_CNTL 0x0000e298 + +#define REG_A5XX_UNKNOWN_E29A 0x0000e29a + +#define REG_A5XX_VPC_PACK 0x0000e29d +#define A5XX_VPC_PACK_NUMNONPOSVAR__MASK 0x000000ff +#define A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT 0 +static inline uint32_t A5XX_VPC_PACK_NUMNONPOSVAR(uint32_t val) +{ + return ((val) << A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT) & A5XX_VPC_PACK_NUMNONPOSVAR__MASK; +} +#define A5XX_VPC_PACK_PSIZELOC__MASK 0x0000ff00 +#define A5XX_VPC_PACK_PSIZELOC__SHIFT 8 +static inline uint32_t A5XX_VPC_PACK_PSIZELOC(uint32_t val) +{ + return ((val) << A5XX_VPC_PACK_PSIZELOC__SHIFT) & A5XX_VPC_PACK_PSIZELOC__MASK; +} + +#define REG_A5XX_VPC_FS_PRIMITIVEID_CNTL 0x0000e2a0 + +#define REG_A5XX_VPC_SO_BUF_CNTL 0x0000e2a1 +#define A5XX_VPC_SO_BUF_CNTL_BUF0 0x00000001 +#define A5XX_VPC_SO_BUF_CNTL_BUF1 0x00000008 +#define A5XX_VPC_SO_BUF_CNTL_BUF2 0x00000040 +#define A5XX_VPC_SO_BUF_CNTL_BUF3 0x00000200 +#define A5XX_VPC_SO_BUF_CNTL_ENABLE 0x00008000 + +#define REG_A5XX_VPC_SO_OVERRIDE 0x0000e2a2 +#define A5XX_VPC_SO_OVERRIDE_SO_DISABLE 0x00000001 + +#define REG_A5XX_VPC_SO_CNTL 0x0000e2a3 +#define A5XX_VPC_SO_CNTL_ENABLE 0x00010000 + +#define REG_A5XX_VPC_SO_PROG 0x0000e2a4 +#define A5XX_VPC_SO_PROG_A_BUF__MASK 0x00000003 +#define A5XX_VPC_SO_PROG_A_BUF__SHIFT 0 +static inline uint32_t A5XX_VPC_SO_PROG_A_BUF(uint32_t val) +{ + return ((val) << A5XX_VPC_SO_PROG_A_BUF__SHIFT) & A5XX_VPC_SO_PROG_A_BUF__MASK; +} +#define A5XX_VPC_SO_PROG_A_OFF__MASK 0x000007fc +#define A5XX_VPC_SO_PROG_A_OFF__SHIFT 2 +static inline uint32_t A5XX_VPC_SO_PROG_A_OFF(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << A5XX_VPC_SO_PROG_A_OFF__SHIFT) & A5XX_VPC_SO_PROG_A_OFF__MASK; +} +#define A5XX_VPC_SO_PROG_A_EN 0x00000800 +#define A5XX_VPC_SO_PROG_B_BUF__MASK 0x00003000 +#define A5XX_VPC_SO_PROG_B_BUF__SHIFT 12 +static inline uint32_t A5XX_VPC_SO_PROG_B_BUF(uint32_t val) +{ + return ((val) << A5XX_VPC_SO_PROG_B_BUF__SHIFT) & A5XX_VPC_SO_PROG_B_BUF__MASK; +} +#define A5XX_VPC_SO_PROG_B_OFF__MASK 0x007fc000 +#define A5XX_VPC_SO_PROG_B_OFF__SHIFT 14 +static inline uint32_t A5XX_VPC_SO_PROG_B_OFF(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << A5XX_VPC_SO_PROG_B_OFF__SHIFT) & A5XX_VPC_SO_PROG_B_OFF__MASK; +} +#define A5XX_VPC_SO_PROG_B_EN 0x00800000 + +static inline uint32_t REG_A5XX_VPC_SO(uint32_t i0) { return 0x0000e2a7 + 0x7*i0; } + +static inline uint32_t REG_A5XX_VPC_SO_BUFFER_BASE_LO(uint32_t i0) { return 0x0000e2a7 + 0x7*i0; } + +static inline uint32_t REG_A5XX_VPC_SO_BUFFER_BASE_HI(uint32_t i0) { return 0x0000e2a8 + 0x7*i0; } + +static inline uint32_t REG_A5XX_VPC_SO_BUFFER_SIZE(uint32_t i0) { return 0x0000e2a9 + 0x7*i0; } + +static inline uint32_t REG_A5XX_VPC_SO_NCOMP(uint32_t i0) { return 0x0000e2aa + 0x7*i0; } + +static inline uint32_t REG_A5XX_VPC_SO_BUFFER_OFFSET(uint32_t i0) { return 0x0000e2ab + 0x7*i0; } + +static inline uint32_t REG_A5XX_VPC_SO_FLUSH_BASE_LO(uint32_t i0) { return 0x0000e2ac + 0x7*i0; } + +static inline uint32_t REG_A5XX_VPC_SO_FLUSH_BASE_HI(uint32_t i0) { return 0x0000e2ad + 0x7*i0; } + +#define REG_A5XX_PC_PRIMITIVE_CNTL 0x0000e384 +#define A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK 0x0000007f +#define A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT 0 +static inline uint32_t A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(uint32_t val) +{ + return ((val) << A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT) & A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK; +} +#define A5XX_PC_PRIMITIVE_CNTL_PRIMITIVE_RESTART 0x00000100 +#define A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST 0x00000400 + +#define REG_A5XX_PC_PRIM_VTX_CNTL 0x0000e385 +#define A5XX_PC_PRIM_VTX_CNTL_PSIZE 0x00000800 + +#define REG_A5XX_PC_RASTER_CNTL 0x0000e388 +#define A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__MASK 0x00000007 +#define A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__SHIFT 0 +static inline uint32_t A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__MASK; +} +#define A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__MASK 0x00000038 +#define A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__SHIFT 3 +static inline uint32_t A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__MASK; +} +#define A5XX_PC_RASTER_CNTL_POLYMODE_ENABLE 0x00000040 + +#define REG_A5XX_UNKNOWN_E389 0x0000e389 + +#define REG_A5XX_PC_RESTART_INDEX 0x0000e38c + +#define REG_A5XX_UNKNOWN_E38D 0x0000e38d + +#define REG_A5XX_PC_GS_PARAM 0x0000e38e + +#define REG_A5XX_PC_HS_PARAM 0x0000e38f + +#define REG_A5XX_PC_POWER_CNTL 0x0000e3b0 + +#define REG_A5XX_VFD_CONTROL_0 0x0000e400 +#define A5XX_VFD_CONTROL_0_VTXCNT__MASK 0x0000003f +#define A5XX_VFD_CONTROL_0_VTXCNT__SHIFT 0 +static inline uint32_t A5XX_VFD_CONTROL_0_VTXCNT(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_0_VTXCNT__SHIFT) & A5XX_VFD_CONTROL_0_VTXCNT__MASK; +} + +#define REG_A5XX_VFD_CONTROL_1 0x0000e401 +#define A5XX_VFD_CONTROL_1_REGID4VTX__MASK 0x000000ff +#define A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT 0 +static inline uint32_t A5XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A5XX_VFD_CONTROL_1_REGID4VTX__MASK; +} +#define A5XX_VFD_CONTROL_1_REGID4INST__MASK 0x0000ff00 +#define A5XX_VFD_CONTROL_1_REGID4INST__SHIFT 8 +static inline uint32_t A5XX_VFD_CONTROL_1_REGID4INST(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A5XX_VFD_CONTROL_1_REGID4INST__MASK; +} + +#define REG_A5XX_VFD_CONTROL_2 0x0000e402 + +#define REG_A5XX_VFD_CONTROL_3 0x0000e403 + +#define REG_A5XX_VFD_CONTROL_4 0x0000e404 + +#define REG_A5XX_VFD_CONTROL_5 0x0000e405 + +#define REG_A5XX_VFD_INDEX_OFFSET 0x0000e408 + +#define REG_A5XX_VFD_INSTANCE_START_OFFSET 0x0000e409 + +static inline uint32_t REG_A5XX_VFD_FETCH(uint32_t i0) { return 0x0000e40a + 0x4*i0; } + +static inline uint32_t REG_A5XX_VFD_FETCH_BASE_LO(uint32_t i0) { return 0x0000e40a + 0x4*i0; } + +static inline uint32_t REG_A5XX_VFD_FETCH_BASE_HI(uint32_t i0) { return 0x0000e40b + 0x4*i0; } + +static inline uint32_t REG_A5XX_VFD_FETCH_SIZE(uint32_t i0) { return 0x0000e40c + 0x4*i0; } + +static inline uint32_t REG_A5XX_VFD_FETCH_STRIDE(uint32_t i0) { return 0x0000e40d + 0x4*i0; } + +static inline uint32_t REG_A5XX_VFD_DECODE(uint32_t i0) { return 0x0000e48a + 0x2*i0; } + +static inline uint32_t REG_A5XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000e48a + 0x2*i0; } +#define A5XX_VFD_DECODE_INSTR_IDX__MASK 0x0000001f +#define A5XX_VFD_DECODE_INSTR_IDX__SHIFT 0 +static inline uint32_t A5XX_VFD_DECODE_INSTR_IDX(uint32_t val) +{ + return ((val) << A5XX_VFD_DECODE_INSTR_IDX__SHIFT) & A5XX_VFD_DECODE_INSTR_IDX__MASK; +} +#define A5XX_VFD_DECODE_INSTR_INSTANCED 0x00020000 +#define A5XX_VFD_DECODE_INSTR_FORMAT__MASK 0x0ff00000 +#define A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT 20 +static inline uint32_t A5XX_VFD_DECODE_INSTR_FORMAT(enum a5xx_vtx_fmt val) +{ + return ((val) << A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A5XX_VFD_DECODE_INSTR_FORMAT__MASK; +} +#define A5XX_VFD_DECODE_INSTR_SWAP__MASK 0x30000000 +#define A5XX_VFD_DECODE_INSTR_SWAP__SHIFT 28 +static inline uint32_t A5XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A5XX_VFD_DECODE_INSTR_SWAP__MASK; +} +#define A5XX_VFD_DECODE_INSTR_UNK30 0x40000000 +#define A5XX_VFD_DECODE_INSTR_FLOAT 0x80000000 + +static inline uint32_t REG_A5XX_VFD_DECODE_STEP_RATE(uint32_t i0) { return 0x0000e48b + 0x2*i0; } + +static inline uint32_t REG_A5XX_VFD_DEST_CNTL(uint32_t i0) { return 0x0000e4ca + 0x1*i0; } + +static inline uint32_t REG_A5XX_VFD_DEST_CNTL_INSTR(uint32_t i0) { return 0x0000e4ca + 0x1*i0; } +#define A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK 0x0000000f +#define A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT 0 +static inline uint32_t A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK(uint32_t val) +{ + return ((val) << A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT) & A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK; +} +#define A5XX_VFD_DEST_CNTL_INSTR_REGID__MASK 0x00000ff0 +#define A5XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT 4 +static inline uint32_t A5XX_VFD_DEST_CNTL_INSTR_REGID(uint32_t val) +{ + return ((val) << A5XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT) & A5XX_VFD_DEST_CNTL_INSTR_REGID__MASK; +} + +#define REG_A5XX_VFD_POWER_CNTL 0x0000e4f0 + +#define REG_A5XX_SP_SP_CNTL 0x0000e580 + +#define REG_A5XX_SP_VS_CONFIG 0x0000e584 +#define A5XX_SP_VS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_VS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_SP_FS_CONFIG 0x0000e585 +#define A5XX_SP_FS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_FS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_SP_HS_CONFIG 0x0000e586 +#define A5XX_SP_HS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_HS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_SP_DS_CONFIG 0x0000e587 +#define A5XX_SP_DS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_DS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_SP_GS_CONFIG 0x0000e588 +#define A5XX_SP_GS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_GS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_SP_CS_CONFIG 0x0000e589 +#define A5XX_SP_CS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_CS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_SP_VS_CONFIG_MAX_CONST 0x0000e58a + +#define REG_A5XX_SP_FS_CONFIG_MAX_CONST 0x0000e58b + +#define REG_A5XX_SP_VS_CTRL_REG0 0x0000e590 +#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK; +} +#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A5XX_SP_VS_CTRL_REG0_VARYING 0x00010000 +#define A5XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK; +} + +#define REG_A5XX_SP_PRIMITIVE_CNTL 0x0000e592 +#define A5XX_SP_PRIMITIVE_CNTL_VSOUT__MASK 0x0000001f +#define A5XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT 0 +static inline uint32_t A5XX_SP_PRIMITIVE_CNTL_VSOUT(uint32_t val) +{ + return ((val) << A5XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT) & A5XX_SP_PRIMITIVE_CNTL_VSOUT__MASK; +} + +static inline uint32_t REG_A5XX_SP_VS_OUT(uint32_t i0) { return 0x0000e593 + 0x1*i0; } + +static inline uint32_t REG_A5XX_SP_VS_OUT_REG(uint32_t i0) { return 0x0000e593 + 0x1*i0; } +#define A5XX_SP_VS_OUT_REG_A_REGID__MASK 0x000000ff +#define A5XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 +static inline uint32_t A5XX_SP_VS_OUT_REG_A_REGID(uint32_t val) +{ + return ((val) << A5XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A5XX_SP_VS_OUT_REG_A_REGID__MASK; +} +#define A5XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00000f00 +#define A5XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 8 +static inline uint32_t A5XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val) +{ + return ((val) << A5XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A5XX_SP_VS_OUT_REG_A_COMPMASK__MASK; +} +#define A5XX_SP_VS_OUT_REG_B_REGID__MASK 0x00ff0000 +#define A5XX_SP_VS_OUT_REG_B_REGID__SHIFT 16 +static inline uint32_t A5XX_SP_VS_OUT_REG_B_REGID(uint32_t val) +{ + return ((val) << A5XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A5XX_SP_VS_OUT_REG_B_REGID__MASK; +} +#define A5XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x0f000000 +#define A5XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 24 +static inline uint32_t A5XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) +{ + return ((val) << A5XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A5XX_SP_VS_OUT_REG_B_COMPMASK__MASK; +} + +static inline uint32_t REG_A5XX_SP_VS_VPC_DST(uint32_t i0) { return 0x0000e5a3 + 0x1*i0; } + +static inline uint32_t REG_A5XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x0000e5a3 + 0x1*i0; } +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 +static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) +{ + return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK; +} +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8 +static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val) +{ + return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK; +} +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16 +static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val) +{ + return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK; +} +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24 +static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) +{ + return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK; +} + +#define REG_A5XX_UNKNOWN_E5AB 0x0000e5ab + +#define REG_A5XX_SP_VS_OBJ_START_LO 0x0000e5ac + +#define REG_A5XX_SP_VS_OBJ_START_HI 0x0000e5ad + +#define REG_A5XX_SP_FS_CTRL_REG0 0x0000e5c0 +#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK; +} +#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A5XX_SP_FS_CTRL_REG0_VARYING 0x00010000 +#define A5XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK; +} + +#define REG_A5XX_UNKNOWN_E5C2 0x0000e5c2 + +#define REG_A5XX_SP_FS_OBJ_START_LO 0x0000e5c3 + +#define REG_A5XX_SP_FS_OBJ_START_HI 0x0000e5c4 + +#define REG_A5XX_SP_BLEND_CNTL 0x0000e5c9 +#define A5XX_SP_BLEND_CNTL_ENABLED 0x00000001 +#define A5XX_SP_BLEND_CNTL_UNK8 0x00000100 + +#define REG_A5XX_SP_FS_OUTPUT_CNTL 0x0000e5ca +#define A5XX_SP_FS_OUTPUT_CNTL_MRT__MASK 0x0000000f +#define A5XX_SP_FS_OUTPUT_CNTL_MRT__SHIFT 0 +static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_MRT(uint32_t val) +{ + return ((val) << A5XX_SP_FS_OUTPUT_CNTL_MRT__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_MRT__MASK; +} +#define A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__MASK 0x00001fe0 +#define A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__SHIFT 5 +static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(uint32_t val) +{ + return ((val) << A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__MASK; +} +#define A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__MASK 0x001fe000 +#define A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__SHIFT 13 +static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(uint32_t val) +{ + return ((val) << A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__MASK; +} + +static inline uint32_t REG_A5XX_SP_FS_OUTPUT(uint32_t i0) { return 0x0000e5cb + 0x1*i0; } + +static inline uint32_t REG_A5XX_SP_FS_OUTPUT_REG(uint32_t i0) { return 0x0000e5cb + 0x1*i0; } +#define A5XX_SP_FS_OUTPUT_REG_REGID__MASK 0x000000ff +#define A5XX_SP_FS_OUTPUT_REG_REGID__SHIFT 0 +static inline uint32_t A5XX_SP_FS_OUTPUT_REG_REGID(uint32_t val) +{ + return ((val) << A5XX_SP_FS_OUTPUT_REG_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_REG_REGID__MASK; +} +#define A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION 0x00000100 + +static inline uint32_t REG_A5XX_SP_FS_MRT(uint32_t i0) { return 0x0000e5d3 + 0x1*i0; } + +static inline uint32_t REG_A5XX_SP_FS_MRT_REG(uint32_t i0) { return 0x0000e5d3 + 0x1*i0; } +#define A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK 0x000000ff +#define A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A5XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a5xx_color_fmt val) +{ + return ((val) << A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK; +} +#define A5XX_SP_FS_MRT_REG_COLOR_SINT 0x00000100 +#define A5XX_SP_FS_MRT_REG_COLOR_UINT 0x00000200 +#define A5XX_SP_FS_MRT_REG_COLOR_SRGB 0x00000400 + +#define REG_A5XX_UNKNOWN_E5DB 0x0000e5db + +#define REG_A5XX_UNKNOWN_E5F2 0x0000e5f2 + +#define REG_A5XX_SP_CS_OBJ_START_LO 0x0000e5f3 + +#define REG_A5XX_SP_CS_OBJ_START_HI 0x0000e5f4 + +#define REG_A5XX_SP_CS_CTRL_REG0 0x0000e5f0 +#define A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_CS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK; +} +#define A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A5XX_SP_CS_CTRL_REG0_VARYING 0x00010000 +#define A5XX_SP_CS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK; +} + +#define REG_A5XX_UNKNOWN_E600 0x0000e600 + +#define REG_A5XX_UNKNOWN_E602 0x0000e602 + +#define REG_A5XX_SP_HS_OBJ_START_LO 0x0000e603 + +#define REG_A5XX_SP_HS_OBJ_START_HI 0x0000e604 + +#define REG_A5XX_UNKNOWN_E62B 0x0000e62b + +#define REG_A5XX_SP_DS_OBJ_START_LO 0x0000e62c + +#define REG_A5XX_SP_DS_OBJ_START_HI 0x0000e62d + +#define REG_A5XX_UNKNOWN_E640 0x0000e640 + +#define REG_A5XX_UNKNOWN_E65B 0x0000e65b + +#define REG_A5XX_SP_GS_OBJ_START_LO 0x0000e65c + +#define REG_A5XX_SP_GS_OBJ_START_HI 0x0000e65d + +#define REG_A5XX_TPL1_TP_RAS_MSAA_CNTL 0x0000e704 +#define A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__MASK; +} + +#define REG_A5XX_TPL1_TP_DEST_MSAA_CNTL 0x0000e705 +#define A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__MASK; +} +#define A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 + +#define REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_LO 0x0000e706 + +#define REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_HI 0x0000e707 + +#define REG_A5XX_TPL1_VS_TEX_COUNT 0x0000e700 + +#define REG_A5XX_TPL1_HS_TEX_COUNT 0x0000e701 + +#define REG_A5XX_TPL1_DS_TEX_COUNT 0x0000e702 + +#define REG_A5XX_TPL1_GS_TEX_COUNT 0x0000e703 + +#define REG_A5XX_TPL1_VS_TEX_SAMP_LO 0x0000e722 + +#define REG_A5XX_TPL1_VS_TEX_SAMP_HI 0x0000e723 + +#define REG_A5XX_TPL1_HS_TEX_SAMP_LO 0x0000e724 + +#define REG_A5XX_TPL1_HS_TEX_SAMP_HI 0x0000e725 + +#define REG_A5XX_TPL1_DS_TEX_SAMP_LO 0x0000e726 + +#define REG_A5XX_TPL1_DS_TEX_SAMP_HI 0x0000e727 + +#define REG_A5XX_TPL1_GS_TEX_SAMP_LO 0x0000e728 + +#define REG_A5XX_TPL1_GS_TEX_SAMP_HI 0x0000e729 + +#define REG_A5XX_TPL1_VS_TEX_CONST_LO 0x0000e72a + +#define REG_A5XX_TPL1_VS_TEX_CONST_HI 0x0000e72b + +#define REG_A5XX_TPL1_HS_TEX_CONST_LO 0x0000e72c + +#define REG_A5XX_TPL1_HS_TEX_CONST_HI 0x0000e72d + +#define REG_A5XX_TPL1_DS_TEX_CONST_LO 0x0000e72e + +#define REG_A5XX_TPL1_DS_TEX_CONST_HI 0x0000e72f + +#define REG_A5XX_TPL1_GS_TEX_CONST_LO 0x0000e730 + +#define REG_A5XX_TPL1_GS_TEX_CONST_HI 0x0000e731 + +#define REG_A5XX_TPL1_FS_TEX_COUNT 0x0000e750 + +#define REG_A5XX_TPL1_CS_TEX_COUNT 0x0000e751 + +#define REG_A5XX_TPL1_FS_TEX_SAMP_LO 0x0000e75a + +#define REG_A5XX_TPL1_FS_TEX_SAMP_HI 0x0000e75b + +#define REG_A5XX_TPL1_CS_TEX_SAMP_LO 0x0000e75c + +#define REG_A5XX_TPL1_CS_TEX_SAMP_HI 0x0000e75d + +#define REG_A5XX_TPL1_FS_TEX_CONST_LO 0x0000e75e + +#define REG_A5XX_TPL1_FS_TEX_CONST_HI 0x0000e75f + +#define REG_A5XX_TPL1_CS_TEX_CONST_LO 0x0000e760 + +#define REG_A5XX_TPL1_CS_TEX_CONST_HI 0x0000e761 + +#define REG_A5XX_TPL1_TP_FS_ROTATION_CNTL 0x0000e764 + +#define REG_A5XX_HLSQ_CONTROL_0_REG 0x0000e784 +#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000001 +#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK; +} +#define A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__MASK 0x00000004 +#define A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__SHIFT 2 +static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__MASK; +} + +#define REG_A5XX_HLSQ_CONTROL_1_REG 0x0000e785 +#define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK 0x0000003f +#define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__SHIFT) & A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK; +} + +#define REG_A5XX_HLSQ_CONTROL_2_REG 0x0000e786 +#define A5XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK 0x000000ff +#define A5XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK; +} + +#define REG_A5XX_HLSQ_CONTROL_3_REG 0x0000e787 +#define A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK 0x000000ff +#define A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT) & A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK; +} + +#define REG_A5XX_HLSQ_CONTROL_4_REG 0x0000e788 +#define A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK 0x00ff0000 +#define A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT 16 +static inline uint32_t A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT) & A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK; +} +#define A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK 0xff000000 +#define A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT 24 +static inline uint32_t A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT) & A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK; +} + +#define REG_A5XX_HLSQ_UPDATE_CNTL 0x0000e78a + +#define REG_A5XX_HLSQ_VS_CONFIG 0x0000e78b +#define A5XX_HLSQ_VS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_HLSQ_FS_CONFIG 0x0000e78c +#define A5XX_HLSQ_FS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_HLSQ_HS_CONFIG 0x0000e78d +#define A5XX_HLSQ_HS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_HLSQ_DS_CONFIG 0x0000e78e +#define A5XX_HLSQ_DS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_HLSQ_GS_CONFIG 0x0000e78f +#define A5XX_HLSQ_GS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_HLSQ_CS_CONFIG 0x0000e790 +#define A5XX_HLSQ_CS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_HLSQ_VS_CNTL 0x0000e791 +#define A5XX_HLSQ_VS_CNTL_SSBO_ENABLE 0x00000001 +#define A5XX_HLSQ_VS_CNTL_INSTRLEN__MASK 0xfffffffe +#define A5XX_HLSQ_VS_CNTL_INSTRLEN__SHIFT 1 +static inline uint32_t A5XX_HLSQ_VS_CNTL_INSTRLEN(uint32_t val) +{ + return ((val) << A5XX_HLSQ_VS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_VS_CNTL_INSTRLEN__MASK; +} + +#define REG_A5XX_HLSQ_FS_CNTL 0x0000e792 +#define A5XX_HLSQ_FS_CNTL_SSBO_ENABLE 0x00000001 +#define A5XX_HLSQ_FS_CNTL_INSTRLEN__MASK 0xfffffffe +#define A5XX_HLSQ_FS_CNTL_INSTRLEN__SHIFT 1 +static inline uint32_t A5XX_HLSQ_FS_CNTL_INSTRLEN(uint32_t val) +{ + return ((val) << A5XX_HLSQ_FS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_FS_CNTL_INSTRLEN__MASK; +} + +#define REG_A5XX_HLSQ_HS_CNTL 0x0000e793 +#define A5XX_HLSQ_HS_CNTL_SSBO_ENABLE 0x00000001 +#define A5XX_HLSQ_HS_CNTL_INSTRLEN__MASK 0xfffffffe +#define A5XX_HLSQ_HS_CNTL_INSTRLEN__SHIFT 1 +static inline uint32_t A5XX_HLSQ_HS_CNTL_INSTRLEN(uint32_t val) +{ + return ((val) << A5XX_HLSQ_HS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_HS_CNTL_INSTRLEN__MASK; +} + +#define REG_A5XX_HLSQ_DS_CNTL 0x0000e794 +#define A5XX_HLSQ_DS_CNTL_SSBO_ENABLE 0x00000001 +#define A5XX_HLSQ_DS_CNTL_INSTRLEN__MASK 0xfffffffe +#define A5XX_HLSQ_DS_CNTL_INSTRLEN__SHIFT 1 +static inline uint32_t A5XX_HLSQ_DS_CNTL_INSTRLEN(uint32_t val) +{ + return ((val) << A5XX_HLSQ_DS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_DS_CNTL_INSTRLEN__MASK; +} + +#define REG_A5XX_HLSQ_GS_CNTL 0x0000e795 +#define A5XX_HLSQ_GS_CNTL_SSBO_ENABLE 0x00000001 +#define A5XX_HLSQ_GS_CNTL_INSTRLEN__MASK 0xfffffffe +#define A5XX_HLSQ_GS_CNTL_INSTRLEN__SHIFT 1 +static inline uint32_t A5XX_HLSQ_GS_CNTL_INSTRLEN(uint32_t val) +{ + return ((val) << A5XX_HLSQ_GS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_GS_CNTL_INSTRLEN__MASK; +} + +#define REG_A5XX_HLSQ_CS_CNTL 0x0000e796 +#define A5XX_HLSQ_CS_CNTL_SSBO_ENABLE 0x00000001 +#define A5XX_HLSQ_CS_CNTL_INSTRLEN__MASK 0xfffffffe +#define A5XX_HLSQ_CS_CNTL_INSTRLEN__SHIFT 1 +static inline uint32_t A5XX_HLSQ_CS_CNTL_INSTRLEN(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_CS_CNTL_INSTRLEN__MASK; +} + +#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_X 0x0000e7b9 + +#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_Y 0x0000e7ba + +#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_Z 0x0000e7bb + +#define REG_A5XX_HLSQ_CS_NDRANGE_0 0x0000e7b0 +#define A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK 0x00000003 +#define A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK; +} +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK 0x00000ffc +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT 2 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK; +} +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK 0x003ff000 +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT 12 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK; +} +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK 0xffc00000 +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT 22 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK; +} + +#define REG_A5XX_HLSQ_CS_NDRANGE_1 0x0000e7b1 +#define A5XX_HLSQ_CS_NDRANGE_1_SIZE_X__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_1_SIZE_X__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_1_SIZE_X(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_1_SIZE_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_1_SIZE_X__MASK; +} + +#define REG_A5XX_HLSQ_CS_NDRANGE_2 0x0000e7b2 + +#define REG_A5XX_HLSQ_CS_NDRANGE_3 0x0000e7b3 +#define A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y__MASK; +} + +#define REG_A5XX_HLSQ_CS_NDRANGE_4 0x0000e7b4 + +#define REG_A5XX_HLSQ_CS_NDRANGE_5 0x0000e7b5 +#define A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z__MASK; +} + +#define REG_A5XX_HLSQ_CS_NDRANGE_6 0x0000e7b6 + +#define REG_A5XX_HLSQ_CS_CNTL_0 0x0000e7b7 +#define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK 0x000000ff +#define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT) & A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK; +} +#define A5XX_HLSQ_CS_CNTL_0_UNK0__MASK 0x0000ff00 +#define A5XX_HLSQ_CS_CNTL_0_UNK0__SHIFT 8 +static inline uint32_t A5XX_HLSQ_CS_CNTL_0_UNK0(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CNTL_0_UNK0__SHIFT) & A5XX_HLSQ_CS_CNTL_0_UNK0__MASK; +} +#define A5XX_HLSQ_CS_CNTL_0_UNK1__MASK 0x00ff0000 +#define A5XX_HLSQ_CS_CNTL_0_UNK1__SHIFT 16 +static inline uint32_t A5XX_HLSQ_CS_CNTL_0_UNK1(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CNTL_0_UNK1__SHIFT) & A5XX_HLSQ_CS_CNTL_0_UNK1__MASK; +} +#define A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK 0xff000000 +#define A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT 24 +static inline uint32_t A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT) & A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK; +} + +#define REG_A5XX_HLSQ_CS_CNTL_1 0x0000e7b8 + +#define REG_A5XX_UNKNOWN_E7C0 0x0000e7c0 + +#define REG_A5XX_HLSQ_VS_CONSTLEN 0x0000e7c3 + +#define REG_A5XX_HLSQ_VS_INSTRLEN 0x0000e7c4 + +#define REG_A5XX_UNKNOWN_E7C5 0x0000e7c5 + +#define REG_A5XX_HLSQ_HS_CONSTLEN 0x0000e7c8 + +#define REG_A5XX_HLSQ_HS_INSTRLEN 0x0000e7c9 + +#define REG_A5XX_UNKNOWN_E7CA 0x0000e7ca + +#define REG_A5XX_HLSQ_DS_CONSTLEN 0x0000e7cd + +#define REG_A5XX_HLSQ_DS_INSTRLEN 0x0000e7ce + +#define REG_A5XX_UNKNOWN_E7CF 0x0000e7cf + +#define REG_A5XX_HLSQ_GS_CONSTLEN 0x0000e7d2 + +#define REG_A5XX_HLSQ_GS_INSTRLEN 0x0000e7d3 + +#define REG_A5XX_UNKNOWN_E7D4 0x0000e7d4 + +#define REG_A5XX_HLSQ_FS_CONSTLEN 0x0000e7d7 + +#define REG_A5XX_HLSQ_FS_INSTRLEN 0x0000e7d8 + +#define REG_A5XX_UNKNOWN_E7D9 0x0000e7d9 + +#define REG_A5XX_HLSQ_CS_CONSTLEN 0x0000e7dc + +#define REG_A5XX_HLSQ_CS_INSTRLEN 0x0000e7dd + +#define REG_A5XX_RB_2D_SRC_SOLID_DW0 0x00002101 + +#define REG_A5XX_RB_2D_SRC_SOLID_DW1 0x00002102 + +#define REG_A5XX_RB_2D_SRC_SOLID_DW2 0x00002103 + +#define REG_A5XX_RB_2D_SRC_SOLID_DW3 0x00002104 + +#define REG_A5XX_RB_2D_SRC_INFO 0x00002107 +#define A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) +{ + return ((val) << A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__MASK; +} +#define A5XX_RB_2D_SRC_INFO_COLOR_SWAP__MASK 0x00000c00 +#define A5XX_RB_2D_SRC_INFO_COLOR_SWAP__SHIFT 10 +static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_RB_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_SRC_INFO_COLOR_SWAP__MASK; +} + +#define REG_A5XX_RB_2D_SRC_LO 0x00002108 + +#define REG_A5XX_RB_2D_SRC_HI 0x00002109 + +#define REG_A5XX_RB_2D_SRC_SIZE 0x0000210a +#define A5XX_RB_2D_SRC_SIZE_PITCH__MASK 0x0000ffff +#define A5XX_RB_2D_SRC_SIZE_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_2D_SRC_SIZE_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_2D_SRC_SIZE_PITCH__SHIFT) & A5XX_RB_2D_SRC_SIZE_PITCH__MASK; +} +#define A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__MASK 0xffff0000 +#define A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__SHIFT 16 +static inline uint32_t A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__SHIFT) & A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_2D_DST_INFO 0x00002110 +#define A5XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A5XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) +{ + return ((val) << A5XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK; +} +#define A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK 0x00000c00 +#define A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT 10 +static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK; +} + +#define REG_A5XX_RB_2D_DST_LO 0x00002111 + +#define REG_A5XX_RB_2D_DST_HI 0x00002112 + +#define REG_A5XX_RB_2D_DST_SIZE 0x00002113 +#define A5XX_RB_2D_DST_SIZE_PITCH__MASK 0x0000ffff +#define A5XX_RB_2D_DST_SIZE_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_2D_DST_SIZE_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_2D_DST_SIZE_PITCH__SHIFT) & A5XX_RB_2D_DST_SIZE_PITCH__MASK; +} +#define A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__MASK 0xffff0000 +#define A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__SHIFT 16 +static inline uint32_t A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__SHIFT) & A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_2D_SRC_FLAGS_LO 0x00002140 + +#define REG_A5XX_RB_2D_SRC_FLAGS_HI 0x00002141 + +#define REG_A5XX_RB_2D_DST_FLAGS_LO 0x00002143 + +#define REG_A5XX_RB_2D_DST_FLAGS_HI 0x00002144 + +#define REG_A5XX_GRAS_2D_SRC_INFO 0x00002181 +#define A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) +{ + return ((val) << A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__MASK; +} +#define A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__MASK 0x00000c00 +#define A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__SHIFT 10 +static inline uint32_t A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__MASK; +} + +#define REG_A5XX_GRAS_2D_DST_INFO 0x00002182 +#define A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) +{ + return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__MASK; +} +#define A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK 0x00000c00 +#define A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT 10 +static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK; +} + +#define REG_A5XX_UNKNOWN_2100 0x00002100 + +#define REG_A5XX_UNKNOWN_2180 0x00002180 + +#define REG_A5XX_UNKNOWN_2184 0x00002184 + +#define REG_A5XX_TEX_SAMP_0 0x00000000 +#define A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR 0x00000001 +#define A5XX_TEX_SAMP_0_XY_MAG__MASK 0x00000006 +#define A5XX_TEX_SAMP_0_XY_MAG__SHIFT 1 +static inline uint32_t A5XX_TEX_SAMP_0_XY_MAG(enum a5xx_tex_filter val) +{ + return ((val) << A5XX_TEX_SAMP_0_XY_MAG__SHIFT) & A5XX_TEX_SAMP_0_XY_MAG__MASK; +} +#define A5XX_TEX_SAMP_0_XY_MIN__MASK 0x00000018 +#define A5XX_TEX_SAMP_0_XY_MIN__SHIFT 3 +static inline uint32_t A5XX_TEX_SAMP_0_XY_MIN(enum a5xx_tex_filter val) +{ + return ((val) << A5XX_TEX_SAMP_0_XY_MIN__SHIFT) & A5XX_TEX_SAMP_0_XY_MIN__MASK; +} +#define A5XX_TEX_SAMP_0_WRAP_S__MASK 0x000000e0 +#define A5XX_TEX_SAMP_0_WRAP_S__SHIFT 5 +static inline uint32_t A5XX_TEX_SAMP_0_WRAP_S(enum a5xx_tex_clamp val) +{ + return ((val) << A5XX_TEX_SAMP_0_WRAP_S__SHIFT) & A5XX_TEX_SAMP_0_WRAP_S__MASK; +} +#define A5XX_TEX_SAMP_0_WRAP_T__MASK 0x00000700 +#define A5XX_TEX_SAMP_0_WRAP_T__SHIFT 8 +static inline uint32_t A5XX_TEX_SAMP_0_WRAP_T(enum a5xx_tex_clamp val) +{ + return ((val) << A5XX_TEX_SAMP_0_WRAP_T__SHIFT) & A5XX_TEX_SAMP_0_WRAP_T__MASK; +} +#define A5XX_TEX_SAMP_0_WRAP_R__MASK 0x00003800 +#define A5XX_TEX_SAMP_0_WRAP_R__SHIFT 11 +static inline uint32_t A5XX_TEX_SAMP_0_WRAP_R(enum a5xx_tex_clamp val) +{ + return ((val) << A5XX_TEX_SAMP_0_WRAP_R__SHIFT) & A5XX_TEX_SAMP_0_WRAP_R__MASK; +} +#define A5XX_TEX_SAMP_0_ANISO__MASK 0x0001c000 +#define A5XX_TEX_SAMP_0_ANISO__SHIFT 14 +static inline uint32_t A5XX_TEX_SAMP_0_ANISO(enum a5xx_tex_aniso val) +{ + return ((val) << A5XX_TEX_SAMP_0_ANISO__SHIFT) & A5XX_TEX_SAMP_0_ANISO__MASK; +} +#define A5XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000 +#define A5XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19 +static inline uint32_t A5XX_TEX_SAMP_0_LOD_BIAS(float val) +{ + return ((((int32_t)(val * 256.0))) << A5XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A5XX_TEX_SAMP_0_LOD_BIAS__MASK; +} + +#define REG_A5XX_TEX_SAMP_1 0x00000001 +#define A5XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e +#define A5XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT 1 +static inline uint32_t A5XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val) +{ + return ((val) << A5XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A5XX_TEX_SAMP_1_COMPARE_FUNC__MASK; +} +#define A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010 +#define A5XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020 +#define A5XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040 +#define A5XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00 +#define A5XX_TEX_SAMP_1_MAX_LOD__SHIFT 8 +static inline uint32_t A5XX_TEX_SAMP_1_MAX_LOD(float val) +{ + return ((((uint32_t)(val * 256.0))) << A5XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A5XX_TEX_SAMP_1_MAX_LOD__MASK; +} +#define A5XX_TEX_SAMP_1_MIN_LOD__MASK 0xfff00000 +#define A5XX_TEX_SAMP_1_MIN_LOD__SHIFT 20 +static inline uint32_t A5XX_TEX_SAMP_1_MIN_LOD(float val) +{ + return ((((uint32_t)(val * 256.0))) << A5XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A5XX_TEX_SAMP_1_MIN_LOD__MASK; +} + +#define REG_A5XX_TEX_SAMP_2 0x00000002 +#define A5XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK 0xfffffff0 +#define A5XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT 4 +static inline uint32_t A5XX_TEX_SAMP_2_BCOLOR_OFFSET(uint32_t val) +{ + return ((val) << A5XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT) & A5XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK; +} + +#define REG_A5XX_TEX_SAMP_3 0x00000003 + +#define REG_A5XX_TEX_CONST_0 0x00000000 +#define A5XX_TEX_CONST_0_TILE_MODE__MASK 0x00000003 +#define A5XX_TEX_CONST_0_TILE_MODE__SHIFT 0 +static inline uint32_t A5XX_TEX_CONST_0_TILE_MODE(enum a5xx_tile_mode val) +{ + return ((val) << A5XX_TEX_CONST_0_TILE_MODE__SHIFT) & A5XX_TEX_CONST_0_TILE_MODE__MASK; +} +#define A5XX_TEX_CONST_0_SRGB 0x00000004 +#define A5XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070 +#define A5XX_TEX_CONST_0_SWIZ_X__SHIFT 4 +static inline uint32_t A5XX_TEX_CONST_0_SWIZ_X(enum a5xx_tex_swiz val) +{ + return ((val) << A5XX_TEX_CONST_0_SWIZ_X__SHIFT) & A5XX_TEX_CONST_0_SWIZ_X__MASK; +} +#define A5XX_TEX_CONST_0_SWIZ_Y__MASK 0x00000380 +#define A5XX_TEX_CONST_0_SWIZ_Y__SHIFT 7 +static inline uint32_t A5XX_TEX_CONST_0_SWIZ_Y(enum a5xx_tex_swiz val) +{ + return ((val) << A5XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A5XX_TEX_CONST_0_SWIZ_Y__MASK; +} +#define A5XX_TEX_CONST_0_SWIZ_Z__MASK 0x00001c00 +#define A5XX_TEX_CONST_0_SWIZ_Z__SHIFT 10 +static inline uint32_t A5XX_TEX_CONST_0_SWIZ_Z(enum a5xx_tex_swiz val) +{ + return ((val) << A5XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A5XX_TEX_CONST_0_SWIZ_Z__MASK; +} +#define A5XX_TEX_CONST_0_SWIZ_W__MASK 0x0000e000 +#define A5XX_TEX_CONST_0_SWIZ_W__SHIFT 13 +static inline uint32_t A5XX_TEX_CONST_0_SWIZ_W(enum a5xx_tex_swiz val) +{ + return ((val) << A5XX_TEX_CONST_0_SWIZ_W__SHIFT) & A5XX_TEX_CONST_0_SWIZ_W__MASK; +} +#define A5XX_TEX_CONST_0_MIPLVLS__MASK 0x000f0000 +#define A5XX_TEX_CONST_0_MIPLVLS__SHIFT 16 +static inline uint32_t A5XX_TEX_CONST_0_MIPLVLS(uint32_t val) +{ + return ((val) << A5XX_TEX_CONST_0_MIPLVLS__SHIFT) & A5XX_TEX_CONST_0_MIPLVLS__MASK; +} +#define A5XX_TEX_CONST_0_FMT__MASK 0x3fc00000 +#define A5XX_TEX_CONST_0_FMT__SHIFT 22 +static inline uint32_t A5XX_TEX_CONST_0_FMT(enum a5xx_tex_fmt val) +{ + return ((val) << A5XX_TEX_CONST_0_FMT__SHIFT) & A5XX_TEX_CONST_0_FMT__MASK; +} +#define A5XX_TEX_CONST_0_SWAP__MASK 0xc0000000 +#define A5XX_TEX_CONST_0_SWAP__SHIFT 30 +static inline uint32_t A5XX_TEX_CONST_0_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_TEX_CONST_0_SWAP__SHIFT) & A5XX_TEX_CONST_0_SWAP__MASK; +} + +#define REG_A5XX_TEX_CONST_1 0x00000001 +#define A5XX_TEX_CONST_1_WIDTH__MASK 0x00007fff +#define A5XX_TEX_CONST_1_WIDTH__SHIFT 0 +static inline uint32_t A5XX_TEX_CONST_1_WIDTH(uint32_t val) +{ + return ((val) << A5XX_TEX_CONST_1_WIDTH__SHIFT) & A5XX_TEX_CONST_1_WIDTH__MASK; +} +#define A5XX_TEX_CONST_1_HEIGHT__MASK 0x3fff8000 +#define A5XX_TEX_CONST_1_HEIGHT__SHIFT 15 +static inline uint32_t A5XX_TEX_CONST_1_HEIGHT(uint32_t val) +{ + return ((val) << A5XX_TEX_CONST_1_HEIGHT__SHIFT) & A5XX_TEX_CONST_1_HEIGHT__MASK; +} + +#define REG_A5XX_TEX_CONST_2 0x00000002 +#define A5XX_TEX_CONST_2_FETCHSIZE__MASK 0x0000000f +#define A5XX_TEX_CONST_2_FETCHSIZE__SHIFT 0 +static inline uint32_t A5XX_TEX_CONST_2_FETCHSIZE(enum a5xx_tex_fetchsize val) +{ + return ((val) << A5XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A5XX_TEX_CONST_2_FETCHSIZE__MASK; +} +#define A5XX_TEX_CONST_2_PITCH__MASK 0x1fffff80 +#define A5XX_TEX_CONST_2_PITCH__SHIFT 7 +static inline uint32_t A5XX_TEX_CONST_2_PITCH(uint32_t val) +{ + return ((val) << A5XX_TEX_CONST_2_PITCH__SHIFT) & A5XX_TEX_CONST_2_PITCH__MASK; +} +#define A5XX_TEX_CONST_2_TYPE__MASK 0x60000000 +#define A5XX_TEX_CONST_2_TYPE__SHIFT 29 +static inline uint32_t A5XX_TEX_CONST_2_TYPE(enum a5xx_tex_type val) +{ + return ((val) << A5XX_TEX_CONST_2_TYPE__SHIFT) & A5XX_TEX_CONST_2_TYPE__MASK; +} + +#define REG_A5XX_TEX_CONST_3 0x00000003 +#define A5XX_TEX_CONST_3_ARRAY_PITCH__MASK 0x00003fff +#define A5XX_TEX_CONST_3_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A5XX_TEX_CONST_3_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A5XX_TEX_CONST_3_ARRAY_PITCH__SHIFT) & A5XX_TEX_CONST_3_ARRAY_PITCH__MASK; +} +#define A5XX_TEX_CONST_3_FLAG 0x10000000 + +#define REG_A5XX_TEX_CONST_4 0x00000004 +#define A5XX_TEX_CONST_4_BASE_LO__MASK 0xffffffe0 +#define A5XX_TEX_CONST_4_BASE_LO__SHIFT 5 +static inline uint32_t A5XX_TEX_CONST_4_BASE_LO(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A5XX_TEX_CONST_4_BASE_LO__SHIFT) & A5XX_TEX_CONST_4_BASE_LO__MASK; +} + +#define REG_A5XX_TEX_CONST_5 0x00000005 +#define A5XX_TEX_CONST_5_BASE_HI__MASK 0x0001ffff +#define A5XX_TEX_CONST_5_BASE_HI__SHIFT 0 +static inline uint32_t A5XX_TEX_CONST_5_BASE_HI(uint32_t val) +{ + return ((val) << A5XX_TEX_CONST_5_BASE_HI__SHIFT) & A5XX_TEX_CONST_5_BASE_HI__MASK; +} +#define A5XX_TEX_CONST_5_DEPTH__MASK 0x3ffe0000 +#define A5XX_TEX_CONST_5_DEPTH__SHIFT 17 +static inline uint32_t A5XX_TEX_CONST_5_DEPTH(uint32_t val) +{ + return ((val) << A5XX_TEX_CONST_5_DEPTH__SHIFT) & A5XX_TEX_CONST_5_DEPTH__MASK; +} + +#define REG_A5XX_TEX_CONST_6 0x00000006 + +#define REG_A5XX_TEX_CONST_7 0x00000007 + +#define REG_A5XX_TEX_CONST_8 0x00000008 + +#define REG_A5XX_TEX_CONST_9 0x00000009 + +#define REG_A5XX_TEX_CONST_10 0x0000000a + +#define REG_A5XX_TEX_CONST_11 0x0000000b + + +#endif /* A5XX_XML */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_blend.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_blend.c new file mode 100644 index 000000000..98b6d4498 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_blend.c @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#include "pipe/p_state.h" +#include "util/u_blend.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd5_blend.h" +#include "fd5_context.h" +#include "fd5_format.h" + +// XXX move somewhere common.. same across a3xx/a4xx/a5xx.. +static enum a3xx_rb_blend_opcode +blend_func(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + return BLEND_DST_PLUS_SRC; + case PIPE_BLEND_MIN: + return BLEND_MIN_DST_SRC; + case PIPE_BLEND_MAX: + return BLEND_MAX_DST_SRC; + case PIPE_BLEND_SUBTRACT: + return BLEND_SRC_MINUS_DST; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BLEND_DST_MINUS_SRC; + default: + DBG("invalid blend func: %x", func); + return 0; + } +} + +void * +fd5_blend_state_create(struct pipe_context *pctx, + const struct pipe_blend_state *cso) +{ + struct fd5_blend_stateobj *so; + enum a3xx_rop_code rop = ROP_COPY; + bool reads_dest = false; + unsigned i, mrt_blend = 0; + + if (cso->logicop_enable) { + rop = cso->logicop_func; /* maps 1:1 */ + + switch (cso->logicop_func) { + case PIPE_LOGICOP_NOR: + case PIPE_LOGICOP_AND_INVERTED: + case PIPE_LOGICOP_AND_REVERSE: + case PIPE_LOGICOP_INVERT: + case PIPE_LOGICOP_XOR: + case PIPE_LOGICOP_NAND: + case PIPE_LOGICOP_AND: + case PIPE_LOGICOP_EQUIV: + case PIPE_LOGICOP_NOOP: + case PIPE_LOGICOP_OR_INVERTED: + case PIPE_LOGICOP_OR_REVERSE: + case PIPE_LOGICOP_OR: + reads_dest = true; + break; + } + } + + so = CALLOC_STRUCT(fd5_blend_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + so->lrz_write = true; /* unless blend enabled for any MRT */ + + for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) { + const struct pipe_rt_blend_state *rt; + + if (cso->independent_blend_enable) + rt = &cso->rt[i]; + else + rt = &cso->rt[0]; + + so->rb_mrt[i].blend_control_rgb = + A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) | + A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | + A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)); + + so->rb_mrt[i].blend_control_alpha = + A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) | + A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) | + A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor)); + + so->rb_mrt[i].blend_control_no_alpha_rgb = + A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_src_factor))) | + A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | + A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_dst_factor))); + + + so->rb_mrt[i].control = + A5XX_RB_MRT_CONTROL_ROP_CODE(rop) | + COND(cso->logicop_enable, A5XX_RB_MRT_CONTROL_ROP_ENABLE) | + A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask); + + if (rt->blend_enable) { + so->rb_mrt[i].control |= +// A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE | + A5XX_RB_MRT_CONTROL_BLEND | + A5XX_RB_MRT_CONTROL_BLEND2; + mrt_blend |= (1 << i); + so->lrz_write = false; + } + + if (reads_dest) { +// so->rb_mrt[i].control |= A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE; + mrt_blend |= (1 << i); + } + +// if (cso->dither) +// so->rb_mrt[i].buf_info |= A5XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS); + } + + so->rb_blend_cntl = A5XX_RB_BLEND_CNTL_ENABLE_BLEND(mrt_blend) | + COND(cso->independent_blend_enable, A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND); + so->sp_blend_cntl = A5XX_SP_BLEND_CNTL_UNK8 | + COND(mrt_blend, A5XX_SP_BLEND_CNTL_ENABLED); + + return so; +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_blend.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_blend.h new file mode 100644 index 000000000..698549548 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_blend.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FD5_BLEND_H_ +#define FD5_BLEND_H_ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +#include "freedreno_util.h" + +struct fd5_blend_stateobj { + struct pipe_blend_state base; + + struct { + uint32_t control; + uint32_t buf_info; + /* Blend control bits for color if there is an alpha channel */ + uint32_t blend_control_rgb; + /* Blend control bits for color if there is no alpha channel */ + uint32_t blend_control_no_alpha_rgb; + /* Blend control bits for alpha channel */ + uint32_t blend_control_alpha; + } rb_mrt[A5XX_MAX_RENDER_TARGETS]; + uint32_t rb_blend_cntl; + uint32_t sp_blend_cntl; + bool lrz_write; +}; + +static inline struct fd5_blend_stateobj * +fd5_blend_stateobj(struct pipe_blend_state *blend) +{ + return (struct fd5_blend_stateobj *)blend; +} + +void * fd5_blend_state_create(struct pipe_context *pctx, + const struct pipe_blend_state *cso); + +#endif /* FD5_BLEND_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_context.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_context.c new file mode 100644 index 000000000..3632cc522 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_context.c @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#include "freedreno_query_acc.h" + +#include "fd5_context.h" +#include "fd5_compute.h" +#include "fd5_blend.h" +#include "fd5_draw.h" +#include "fd5_emit.h" +#include "fd5_gmem.h" +#include "fd5_program.h" +#include "fd5_query.h" +#include "fd5_rasterizer.h" +#include "fd5_texture.h" +#include "fd5_zsa.h" + +static void +fd5_context_destroy(struct pipe_context *pctx) +{ + struct fd5_context *fd5_ctx = fd5_context(fd_context(pctx)); + + fd_bo_del(fd5_ctx->vs_pvt_mem); + fd_bo_del(fd5_ctx->fs_pvt_mem); + fd_bo_del(fd5_ctx->vsc_size_mem); + fd_bo_del(fd5_ctx->blit_mem); + + fd_context_cleanup_common_vbos(&fd5_ctx->base); + + u_upload_destroy(fd5_ctx->border_color_uploader); + + fd_context_destroy(pctx); +} + +static const uint8_t primtypes[] = { + [PIPE_PRIM_POINTS] = DI_PT_POINTLIST, + [PIPE_PRIM_LINES] = DI_PT_LINELIST, + [PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP, + [PIPE_PRIM_LINE_LOOP] = DI_PT_LINELOOP, + [PIPE_PRIM_TRIANGLES] = DI_PT_TRILIST, + [PIPE_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP, + [PIPE_PRIM_TRIANGLE_FAN] = DI_PT_TRIFAN, + [PIPE_PRIM_MAX] = DI_PT_RECTLIST, /* internal clear blits */ +}; + +struct pipe_context * +fd5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) +{ + struct fd_screen *screen = fd_screen(pscreen); + struct fd5_context *fd5_ctx = CALLOC_STRUCT(fd5_context); + struct pipe_context *pctx; + + if (!fd5_ctx) + return NULL; + + pctx = &fd5_ctx->base.base; + + fd5_ctx->base.dev = fd_device_ref(screen->dev); + fd5_ctx->base.screen = fd_screen(pscreen); + + pctx->destroy = fd5_context_destroy; + pctx->create_blend_state = fd5_blend_state_create; + pctx->create_rasterizer_state = fd5_rasterizer_state_create; + pctx->create_depth_stencil_alpha_state = fd5_zsa_state_create; + + fd5_draw_init(pctx); + fd5_compute_init(pctx); + fd5_gmem_init(pctx); + fd5_texture_init(pctx); + fd5_prog_init(pctx); + fd5_emit_init(pctx); + + pctx = fd_context_init(&fd5_ctx->base, pscreen, primtypes, priv); + if (!pctx) + return NULL; + + fd5_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd5_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd5_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd5_ctx->blit_mem = fd_bo_new(screen->dev, 0x1000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd_context_setup_common_vbos(&fd5_ctx->base); + + fd5_query_context_init(pctx); + + fd5_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0, + PIPE_USAGE_STREAM); + + return pctx; +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_context.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_context.h new file mode 100644 index 000000000..f6de6ca2a --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_context.h @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FD5_CONTEXT_H_ +#define FD5_CONTEXT_H_ + +#include "util/u_upload_mgr.h" + +#include "freedreno_drmif.h" + +#include "freedreno_context.h" + +#include "ir3_shader.h" + +struct fd5_context { + struct fd_context base; + + struct fd_bo *vs_pvt_mem, *fs_pvt_mem; + + /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We + * could combine it with another allocation. + * + * (upper area used as scratch bo.. see fd5_query) + * + * XXX remove if unneeded after binning r/e.. + */ + struct fd_bo *vsc_size_mem; + + /* TODO not sure what this is for.. probably similar to + * CACHE_FLUSH_TS on kernel side, where value gets written + * to this address synchronized w/ 3d (ie. a way to + * synchronize when the CP is running far ahead) + */ + struct fd_bo *blit_mem; + + struct u_upload_mgr *border_color_uploader; + struct pipe_resource *border_color_buf; + + /* if *any* of bits are set in {v,f}saturate_{s,t,r} */ + bool vsaturate, fsaturate; + + /* bitmask of sampler which needs coords clamped for vertex + * shader: + */ + uint16_t vsaturate_s, vsaturate_t, vsaturate_r; + + /* bitmask of sampler which needs coords clamped for frag + * shader: + */ + uint16_t fsaturate_s, fsaturate_t, fsaturate_r; + + /* bitmask of samplers which need astc srgb workaround: */ + uint16_t vastc_srgb, fastc_srgb; + + /* some state changes require a different shader variant. Keep + * track of this so we know when we need to re-emit shader state + * due to variant change. See fixup_shader_state() + */ + struct ir3_shader_key last_key; + + /* number of active samples-passed queries: */ + int samples_passed_queries; + + /* cached state about current emitted shader program (3d): */ + unsigned max_loc; +}; + +static inline struct fd5_context * +fd5_context(struct fd_context *ctx) +{ + return (struct fd5_context *)ctx; +} + +struct pipe_context * +fd5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); + +#endif /* FD5_CONTEXT_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c new file mode 100644 index 000000000..d1f1d039b --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c @@ -0,0 +1,394 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_prim.h" + +#include "freedreno_state.h" +#include "freedreno_resource.h" + +#include "fd5_draw.h" +#include "fd5_context.h" +#include "fd5_emit.h" +#include "fd5_program.h" +#include "fd5_format.h" +#include "fd5_zsa.h" + + +static void +draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd5_emit *emit, unsigned index_offset) +{ + const struct pipe_draw_info *info = emit->info; + enum pc_di_primtype primtype = ctx->primtypes[info->mode]; + + fd5_emit_state(ctx, ring, emit); + + if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE)) + fd5_emit_vertex_bufs(ring, emit); + + OUT_PKT4(ring, REG_A5XX_VFD_INDEX_OFFSET, 2); + OUT_RING(ring, info->index_size ? info->index_bias : info->start); /* VFD_INDEX_OFFSET */ + OUT_RING(ring, info->start_instance); /* ??? UNKNOWN_2209 */ + + OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1); + OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ + info->restart_index : 0xffffffff); + + fd5_emit_render_cntl(ctx, false, emit->key.binning_pass); + fd5_draw_emit(ctx->batch, ring, primtype, + emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, + info, index_offset); +} + +/* fixup dirty shader state in case some "unrelated" (from the state- + * tracker's perspective) state change causes us to switch to a + * different variant. + */ +static void +fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key) +{ + struct fd5_context *fd5_ctx = fd5_context(ctx); + struct ir3_shader_key *last_key = &fd5_ctx->last_key; + + if (!ir3_shader_key_equal(last_key, key)) { + if (ir3_shader_key_changes_fs(last_key, key)) { + ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= FD_DIRTY_SHADER_PROG; + ctx->dirty |= FD_DIRTY_PROG; + } + + if (ir3_shader_key_changes_vs(last_key, key)) { + ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG; + ctx->dirty |= FD_DIRTY_PROG; + } + + fd5_ctx->last_key = *key; + } +} + +static bool +fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, + unsigned index_offset) +{ + struct fd5_context *fd5_ctx = fd5_context(ctx); + struct fd5_emit emit = { + .debug = &ctx->debug, + .vtx = &ctx->vtx, + .prog = &ctx->prog, + .info = info, + .key = { + .color_two_side = ctx->rasterizer->light_twoside, + .vclamp_color = ctx->rasterizer->clamp_vertex_color, + .fclamp_color = ctx->rasterizer->clamp_fragment_color, + .rasterflat = ctx->rasterizer->flatshade, + .half_precision = ctx->in_blit && + fd_half_precision(&ctx->batch->framebuffer), + .ucp_enables = ctx->rasterizer->clip_plane_enable, + .has_per_samp = (fd5_ctx->fsaturate || fd5_ctx->vsaturate || + fd5_ctx->fastc_srgb || fd5_ctx->vastc_srgb), + .vsaturate_s = fd5_ctx->vsaturate_s, + .vsaturate_t = fd5_ctx->vsaturate_t, + .vsaturate_r = fd5_ctx->vsaturate_r, + .fsaturate_s = fd5_ctx->fsaturate_s, + .fsaturate_t = fd5_ctx->fsaturate_t, + .fsaturate_r = fd5_ctx->fsaturate_r, + .vastc_srgb = fd5_ctx->vastc_srgb, + .fastc_srgb = fd5_ctx->fastc_srgb, + }, + .rasterflat = ctx->rasterizer->flatshade, + .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, + .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, + }; + + fixup_shader_state(ctx, &emit.key); + + unsigned dirty = ctx->dirty; + const struct ir3_shader_variant *vp = fd5_emit_get_vp(&emit); + const struct ir3_shader_variant *fp = fd5_emit_get_fp(&emit); + + /* do regular pass first, since that is more likely to fail compiling: */ + + if (!vp || !fp) + return false; + + /* figure out whether we need to disable LRZ write for binning + * pass using draw pass's fp: + */ + emit.no_lrz_write = fp->writes_pos || fp->has_kill; + + emit.key.binning_pass = false; + emit.dirty = dirty; + + draw_impl(ctx, ctx->batch->draw, &emit, index_offset); + + /* and now binning pass: */ + emit.key.binning_pass = true; + emit.dirty = dirty & ~(FD_DIRTY_BLEND); + emit.vp = NULL; /* we changed key so need to refetch vp */ + emit.fp = NULL; + draw_impl(ctx, ctx->batch->binning, &emit, index_offset); + + if (emit.streamout_mask) { + struct fd_ringbuffer *ring = ctx->batch->draw; + + for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { + if (emit.streamout_mask & (1 << i)) { + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, FLUSH_SO_0 + i); + } + } + } + + fd_context_all_clean(ctx); + + return true; +} + +static bool is_z32(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + case PIPE_FORMAT_Z32_UNORM: + case PIPE_FORMAT_Z32_FLOAT: + return true; + default: + return false; + } +} + +static void +fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) +{ + struct fd_ringbuffer *ring; + uint32_t clear = util_pack_z(PIPE_FORMAT_Z16_UNORM, depth); + + // TODO mid-frame clears (ie. app doing crazy stuff)?? Maybe worth + // splitting both clear and lrz clear out into their own rb's. And + // just throw away any draws prior to clear. (Anything not fullscreen + // clear, just fallback to generic path that treats it as a normal + // draw + + if (!batch->lrz_clear) { + batch->lrz_clear = fd_ringbuffer_new(batch->ctx->screen->pipe, 0x1000); + fd_ringbuffer_set_parent(batch->lrz_clear, batch->gmem); + } + + ring = batch->lrz_clear; + + OUT_WFI5(ring); + + OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); + OUT_RING(ring, 0x10000000); + + OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); + OUT_RING(ring, 0x20fffff); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1); + OUT_RING(ring, A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(0.0)); + + OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1); + OUT_RING(ring, 0x00000181); + + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5); + OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(RB5_R16_UNORM) | + A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) | + A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX)); + OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_pitch * 2)); + OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_bo_size(zsbuf->lrz))); + OUT_RELOCW(ring, zsbuf->lrz, 0x1000, 0, 0); + + OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_RB_DEST_MSAA_CNTL, 1); + OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE)); + + OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); + OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0)); + + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); + OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR | + A5XX_RB_CLEAR_CNTL_MASK(0xf)); + + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1); + OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */ + + OUT_PKT4(ring, REG_A5XX_VSC_RESOLVE_CNTL, 2); + OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_width) | + A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_height)); + OUT_RING(ring, 0x00000000); // XXX UNKNOWN_0CDE + + OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); + OUT_RING(ring, A5XX_RB_CNTL_BYPASS); + + OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2); + OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | + A5XX_RB_RESOLVE_CNTL_1_Y(0)); + OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_width - 1) | + A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_height - 1)); + + fd5_emit_blit(batch->ctx, ring); +} + +static bool +fd5_clear(struct fd_context *ctx, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct fd_ringbuffer *ring = ctx->batch->draw; + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; + struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); + + if ((buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) && + is_z32(pfb->zsbuf->format)) + return false; + + ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); + ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); + ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); + ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy); + + fd5_emit_render_cntl(ctx, true, false); + + if (buffers & PIPE_CLEAR_COLOR) { + for (int i = 0; i < pfb->nr_cbufs; i++) { + union util_color uc = {0}; + + if (!pfb->cbufs[i]) + continue; + + if (!(buffers & (PIPE_CLEAR_COLOR0 << i))) + continue; + + enum pipe_format pfmt = pfb->cbufs[i]->format; + + // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP?? + union pipe_color_union swapped; + switch (fd5_pipe2swap(pfmt)) { + case WZYX: + swapped.ui[0] = color->ui[0]; + swapped.ui[1] = color->ui[1]; + swapped.ui[2] = color->ui[2]; + swapped.ui[3] = color->ui[3]; + break; + case WXYZ: + swapped.ui[2] = color->ui[0]; + swapped.ui[1] = color->ui[1]; + swapped.ui[0] = color->ui[2]; + swapped.ui[3] = color->ui[3]; + break; + case ZYXW: + swapped.ui[3] = color->ui[0]; + swapped.ui[0] = color->ui[1]; + swapped.ui[1] = color->ui[2]; + swapped.ui[2] = color->ui[3]; + break; + case XYZW: + swapped.ui[3] = color->ui[0]; + swapped.ui[2] = color->ui[1]; + swapped.ui[1] = color->ui[2]; + swapped.ui[0] = color->ui[3]; + break; + } + + if (util_format_is_pure_uint(pfmt)) { + util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1); + } else if (util_format_is_pure_sint(pfmt)) { + util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1); + } else { + util_pack_color(swapped.f, pfmt, &uc); + } + + OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); + OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0 + i)); + + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); + OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR | + A5XX_RB_CLEAR_CNTL_MASK(0xf)); + + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 4); + OUT_RING(ring, uc.ui[0]); /* RB_CLEAR_COLOR_DW0 */ + OUT_RING(ring, uc.ui[1]); /* RB_CLEAR_COLOR_DW1 */ + OUT_RING(ring, uc.ui[2]); /* RB_CLEAR_COLOR_DW2 */ + OUT_RING(ring, uc.ui[3]); /* RB_CLEAR_COLOR_DW3 */ + + fd5_emit_blit(ctx, ring); + } + } + + if (pfb->zsbuf && (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { + uint32_t clear = + util_pack_z_stencil(pfb->zsbuf->format, depth, stencil); + uint32_t mask = 0; + + if (buffers & PIPE_CLEAR_DEPTH) + mask |= 0x1; + + if (buffers & PIPE_CLEAR_STENCIL) + mask |= 0x2; + + OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); + OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_ZS)); + + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); + OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR | + A5XX_RB_CLEAR_CNTL_MASK(mask)); + + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1); + OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */ + + fd5_emit_blit(ctx, ring); + + if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) { + struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture); + if (zsbuf->lrz) { + zsbuf->lrz_valid = true; + fd5_clear_lrz(ctx->batch, zsbuf, depth); + } + } + } + + /* disable fast clear to not interfere w/ gmem->mem, etc.. */ + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); + OUT_RING(ring, 0x00000000); /* RB_CLEAR_CNTL */ + + return true; +} + +void +fd5_draw_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->draw_vbo = fd5_draw_vbo; + ctx->clear = fd5_clear; +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.h new file mode 100644 index 000000000..de210e456 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FD5_DRAW_H_ +#define FD5_DRAW_H_ + +#include "pipe/p_context.h" + +#include "freedreno_draw.h" + +/* some bits in common w/ a4xx: */ +#include "a4xx/fd4_draw.h" + +void fd5_draw_init(struct pipe_context *pctx); + +static inline void +fd5_draw(struct fd_batch *batch, struct fd_ringbuffer *ring, + enum pc_di_primtype primtype, + enum pc_di_vis_cull_mode vismode, + enum pc_di_src_sel src_sel, uint32_t count, + uint32_t instances, enum a4xx_index_size idx_type, + uint32_t idx_size, uint32_t idx_offset, + struct pipe_resource *idx_buffer) +{ + /* for debug after a lock up, write a unique counter value + * to scratch7 for each draw, to make it easier to match up + * register dumps to cmdstream. The combination of IB + * (scratch6) and DRAW is enough to "triangulate" the + * particular draw that caused lockup. + */ + emit_marker5(ring, 7); + + OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 7 : 3); + if (vismode == USE_VISIBILITY) { + /* leave vis mode blank for now, it will be patched up when + * we know if we are binning or not + */ + OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0), + &batch->draw_patches); + } else { + OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode)); + } + OUT_RING(ring, instances); /* NumInstances */ + OUT_RING(ring, count); /* NumIndices */ + if (idx_buffer) { + OUT_RING(ring, 0x0); /* XXX */ + OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0); + OUT_RING (ring, idx_size); + } + + emit_marker5(ring, 7); + + fd_reset_wfi(batch); +} + +static inline void +fd5_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring, + enum pc_di_primtype primtype, + enum pc_di_vis_cull_mode vismode, + const struct pipe_draw_info *info, + unsigned index_offset) +{ + struct pipe_resource *idx_buffer = NULL; + enum a4xx_index_size idx_type; + enum pc_di_src_sel src_sel; + uint32_t idx_size, idx_offset; + + if (info->index_size) { + assert(!info->has_user_indices); + + idx_buffer = info->index.resource; + idx_type = fd4_size2indextype(info->index_size); + idx_size = info->index_size * info->count; + idx_offset = index_offset + info->start * info->index_size; + src_sel = DI_SRC_SEL_DMA; + } else { + idx_buffer = NULL; + idx_type = INDEX4_SIZE_32_BIT; + idx_size = 0; + idx_offset = 0; + src_sel = DI_SRC_SEL_AUTO_INDEX; + } + + fd5_draw(batch, ring, primtype, vismode, src_sel, + info->count, info->instance_count, + idx_type, idx_size, idx_offset, idx_buffer); +} + +#endif /* FD5_DRAW_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c new file mode 100644 index 000000000..21931e9df --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -0,0 +1,1069 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_helpers.h" +#include "util/u_format.h" +#include "util/u_viewport.h" + +#include "freedreno_resource.h" +#include "freedreno_query_hw.h" + +#include "fd5_emit.h" +#include "fd5_blend.h" +#include "fd5_context.h" +#include "fd5_program.h" +#include "fd5_rasterizer.h" +#include "fd5_texture.h" +#include "fd5_format.h" +#include "fd5_zsa.h" + +/* regid: base const register + * prsc or dwords: buffer containing constant values + * sizedwords: size of const value buffer + */ +static void +fd5_emit_const(struct fd_ringbuffer *ring, enum shader_t type, + uint32_t regid, uint32_t offset, uint32_t sizedwords, + const uint32_t *dwords, struct pipe_resource *prsc) +{ + uint32_t i, sz; + enum a4xx_state_src src; + + debug_assert((regid % 4) == 0); + debug_assert((sizedwords % 4) == 0); + + if (prsc) { + sz = 0; + src = SS4_INDIRECT; + } else { + sz = sizedwords; + src = SS4_DIRECT; + } + + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + sz); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) | + CP_LOAD_STATE4_0_STATE_SRC(src) | + CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(type)) | + CP_LOAD_STATE4_0_NUM_UNIT(sizedwords/4)); + if (prsc) { + struct fd_bo *bo = fd_resource(prsc)->bo; + OUT_RELOC(ring, bo, offset, + CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0); + } else { + OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; + } + for (i = 0; i < sz; i++) { + OUT_RING(ring, dwords[i]); + } +} + +static void +fd5_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, + uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) +{ + uint32_t anum = align(num, 2); + uint32_t i; + + debug_assert((regid % 4) == 0); + + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * anum)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(type)) | + CP_LOAD_STATE4_0_NUM_UNIT(anum/2)); + OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + + for (i = 0; i < num; i++) { + if (prscs[i]) { + if (write) { + OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); + } else { + OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); + } + } else { + OUT_RING(ring, 0xbad00000 | (i << 16)); + OUT_RING(ring, 0xbad00000 | (i << 16)); + } + } + + for (; i < anum; i++) { + OUT_RING(ring, 0xffffffff); + OUT_RING(ring, 0xffffffff); + } +} + +/* Border color layout is diff from a4xx/a5xx.. if it turns out to be + * the same as a6xx then move this somewhere common ;-) + * + * Entry layout looks like (total size, 0x60 bytes): + */ + +struct PACKED bcolor_entry { + uint32_t fp32[4]; + uint16_t ui16[4]; + int16_t si16[4]; + uint16_t fp16[4]; + uint16_t rgb565; + uint16_t rgb5a1; + uint16_t rgba4; + uint8_t __pad0[2]; + uint8_t ui8[4]; + int8_t si8[4]; + uint32_t rgb10a2; + uint32_t z24; /* also s8? */ + uint8_t __pad1[32]; +}; + +#define FD5_BORDER_COLOR_SIZE 0x60 +#define FD5_BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * FD5_BORDER_COLOR_SIZE) + +static void +setup_border_colors(struct fd_texture_stateobj *tex, struct bcolor_entry *entries) +{ + unsigned i, j; + STATIC_ASSERT(sizeof(struct bcolor_entry) == FD5_BORDER_COLOR_SIZE); + + for (i = 0; i < tex->num_samplers; i++) { + struct bcolor_entry *e = &entries[i]; + struct pipe_sampler_state *sampler = tex->samplers[i]; + union pipe_color_union *bc; + + if (!sampler) + continue; + + bc = &sampler->border_color; + + /* + * XXX HACK ALERT XXX + * + * The border colors need to be swizzled in a particular + * format-dependent order. Even though samplers don't know about + * formats, we can assume that with a GL state tracker, there's a + * 1:1 correspondence between sampler and texture. Take advantage + * of that knowledge. + */ + if ((i >= tex->num_textures) || !tex->textures[i]) + continue; + + const struct util_format_description *desc = + util_format_description(tex->textures[i]->format); + + e->rgb565 = 0; + e->rgb5a1 = 0; + e->rgba4 = 0; + e->rgb10a2 = 0; + e->z24 = 0; + + for (j = 0; j < 4; j++) { + int c = desc->swizzle[j]; + + if (c >= 4) + continue; + + if (desc->channel[c].pure_integer) { + uint16_t clamped; + switch (desc->channel[c].size) { + case 2: + assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED); + clamped = CLAMP(bc->ui[j], 0, 0x3); + break; + case 8: + if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED) + clamped = CLAMP(bc->i[j], -128, 127); + else + clamped = CLAMP(bc->ui[j], 0, 255); + break; + case 10: + assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED); + clamped = CLAMP(bc->ui[j], 0, 0x3ff); + break; + case 16: + if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED) + clamped = CLAMP(bc->i[j], -32768, 32767); + else + clamped = CLAMP(bc->ui[j], 0, 65535); + break; + default: + assert(!"Unexpected bit size"); + case 32: + clamped = 0; + break; + } + e->fp32[c] = bc->ui[j]; + e->fp16[c] = clamped; + } else { + float f = bc->f[j]; + float f_u = CLAMP(f, 0, 1); + float f_s = CLAMP(f, -1, 1); + + e->fp32[c] = fui(f); + e->fp16[c] = util_float_to_half(f); + e->ui16[c] = f_u * 0xffff; + e->si16[c] = f_s * 0x7fff; + e->ui8[c] = f_u * 0xff; + e->si8[c] = f_s * 0x7f; + if (c == 1) + e->rgb565 |= (int)(f_u * 0x3f) << 5; + else if (c < 3) + e->rgb565 |= (int)(f_u * 0x1f) << (c ? 11 : 0); + if (c == 3) + e->rgb5a1 |= (f_u > 0.5) ? 0x8000 : 0; + else + e->rgb5a1 |= (int)(f_u * 0x1f) << (c * 5); + if (c == 3) + e->rgb10a2 |= (int)(f_u * 0x3) << 30; + else + e->rgb10a2 |= (int)(f_u * 0x3ff) << (c * 10); + e->rgba4 |= (int)(f_u * 0xf) << (c * 4); + if (c == 0) + e->z24 = f_u * 0xffffff; + } + } + +#ifdef DEBUG + memset(&e->__pad0, 0, sizeof(e->__pad0)); + memset(&e->__pad1, 0, sizeof(e->__pad1)); +#endif + } +} + +static void +emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + struct fd5_context *fd5_ctx = fd5_context(ctx); + struct bcolor_entry *entries; + unsigned off; + void *ptr; + + STATIC_ASSERT(sizeof(struct bcolor_entry) == FD5_BORDER_COLOR_SIZE); + + u_upload_alloc(fd5_ctx->border_color_uploader, + 0, FD5_BORDER_COLOR_UPLOAD_SIZE, + FD5_BORDER_COLOR_UPLOAD_SIZE, &off, + &fd5_ctx->border_color_buf, + &ptr); + + entries = ptr; + + setup_border_colors(&ctx->tex[PIPE_SHADER_VERTEX], &entries[0]); + setup_border_colors(&ctx->tex[PIPE_SHADER_FRAGMENT], + &entries[ctx->tex[PIPE_SHADER_VERTEX].num_samplers]); + + OUT_PKT4(ring, REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, 2); + OUT_RELOC(ring, fd_resource(fd5_ctx->border_color_buf)->bo, off, 0, 0); + + u_upload_unmap(fd5_ctx->border_color_uploader); +} + +static bool +emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum a4xx_state_block sb, struct fd_texture_stateobj *tex) +{ + bool needs_border = false; + unsigned bcolor_offset = (sb == SB4_FS_TEX) ? ctx->tex[PIPE_SHADER_VERTEX].num_samplers : 0; + unsigned i; + + if (tex->num_samplers > 0) { + /* output sampler state: */ + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * tex->num_samplers)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(tex->num_samplers)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + for (i = 0; i < tex->num_samplers; i++) { + static const struct fd5_sampler_stateobj dummy_sampler = {}; + const struct fd5_sampler_stateobj *sampler = tex->samplers[i] ? + fd5_sampler_stateobj(tex->samplers[i]) : + &dummy_sampler; + OUT_RING(ring, sampler->texsamp0); + OUT_RING(ring, sampler->texsamp1); + OUT_RING(ring, sampler->texsamp2 | + A5XX_TEX_SAMP_2_BCOLOR_OFFSET(bcolor_offset)); + OUT_RING(ring, sampler->texsamp3); + + needs_border |= sampler->needs_border; + } + } + + if (tex->num_textures > 0) { + unsigned num_textures = tex->num_textures; + + /* emit texture state: */ + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (12 * num_textures)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(num_textures)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + for (i = 0; i < tex->num_textures; i++) { + static const struct fd5_pipe_sampler_view dummy_view = {}; + const struct fd5_pipe_sampler_view *view = tex->textures[i] ? + fd5_pipe_sampler_view(tex->textures[i]) : + &dummy_view; + + OUT_RING(ring, view->texconst0); + OUT_RING(ring, view->texconst1); + OUT_RING(ring, view->texconst2); + OUT_RING(ring, view->texconst3); + if (view->base.texture) { + struct fd_resource *rsc = fd_resource(view->base.texture); + OUT_RELOC(ring, rsc->bo, view->offset, + (uint64_t)view->texconst5 << 32, 0); + } else { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, view->texconst5); + } + OUT_RING(ring, view->texconst6); + OUT_RING(ring, view->texconst7); + OUT_RING(ring, view->texconst8); + OUT_RING(ring, view->texconst9); + OUT_RING(ring, view->texconst10); + OUT_RING(ring, view->texconst11); + } + } + + return needs_border; +} + +static void +emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so) +{ + unsigned count = util_last_bit(so->enabled_mask); + + if (count == 0) + return; + + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * count)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(count)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(0) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + for (unsigned i = 0; i < count; i++) { + struct pipe_shader_buffer *buf = &so->sb[i]; + if (buf->buffer) { + struct fd_resource *rsc = fd_resource(buf->buffer); + OUT_RELOCW(ring, rsc->bo, 0, 0, 0); + } else { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(count)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + for (unsigned i = 0; i < count; i++) { + struct pipe_shader_buffer *buf = &so->sb[i]; + + // TODO maybe offset encoded somewhere here?? + OUT_RING(ring, (buf->buffer_size << 16)); + OUT_RING(ring, 0x00000000); + } + + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(count)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + for (unsigned i = 0; i < count; i++) { + struct pipe_shader_buffer *buf = &so->sb[i]; + if (buf->buffer) { + struct fd_resource *rsc = fd_resource(buf->buffer); + OUT_RELOCW(ring, rsc->bo, 0, 0, 0); + } else { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + } +} + +void +fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit) +{ + int32_t i, j; + const struct fd_vertex_state *vtx = emit->vtx; + const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit); + + for (i = 0, j = 0; i <= vp->inputs_count; i++) { + if (vp->inputs[i].sysval) + continue; + if (vp->inputs[i].compmask) { + struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; + const struct pipe_vertex_buffer *vb = + &vtx->vertexbuf.vb[elem->vertex_buffer_index]; + struct fd_resource *rsc = fd_resource(vb->buffer.resource); + enum pipe_format pfmt = elem->src_format; + enum a5xx_vtx_fmt fmt = fd5_pipe2vtx(pfmt); + bool isint = util_format_is_pure_integer(pfmt); + uint32_t off = vb->buffer_offset + elem->src_offset; + uint32_t size = fd_bo_size(rsc->bo) - off; + debug_assert(fmt != ~0); + + OUT_PKT4(ring, REG_A5XX_VFD_FETCH(j), 4); + OUT_RELOC(ring, rsc->bo, off, 0, 0); + OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */ + OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */ + + OUT_PKT4(ring, REG_A5XX_VFD_DECODE(j), 2); + OUT_RING(ring, A5XX_VFD_DECODE_INSTR_IDX(j) | + A5XX_VFD_DECODE_INSTR_FORMAT(fmt) | + COND(elem->instance_divisor, A5XX_VFD_DECODE_INSTR_INSTANCED) | + A5XX_VFD_DECODE_INSTR_SWAP(fd5_pipe2swap(pfmt)) | + A5XX_VFD_DECODE_INSTR_UNK30 | + COND(!isint, A5XX_VFD_DECODE_INSTR_FLOAT)); + OUT_RING(ring, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */ + + OUT_PKT4(ring, REG_A5XX_VFD_DEST_CNTL(j), 1); + OUT_RING(ring, A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vp->inputs[i].compmask) | + A5XX_VFD_DEST_CNTL_INSTR_REGID(vp->inputs[i].regid)); + + j++; + } + } + + OUT_PKT4(ring, REG_A5XX_VFD_CONTROL_0, 1); + OUT_RING(ring, A5XX_VFD_CONTROL_0_VTXCNT(j)); +} + +void +fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd5_emit *emit) +{ + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; + const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit); + const struct ir3_shader_variant *fp = fd5_emit_get_fp(emit); + const enum fd_dirty_3d_state dirty = emit->dirty; + bool needs_border = false; + + emit_marker5(ring, 5); + + if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) { + unsigned char mrt_comp[A5XX_MAX_RENDER_TARGETS] = {0}; + + for (unsigned i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) { + mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0; + } + + OUT_PKT4(ring, REG_A5XX_RB_RENDER_COMPONENTS, 1); + OUT_RING(ring, A5XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) | + A5XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) | + A5XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) | + A5XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) | + A5XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) | + A5XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) | + A5XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) | + A5XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7])); + } + + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) { + struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa); + uint32_t rb_alpha_control = zsa->rb_alpha_control; + + if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0]))) + rb_alpha_control &= ~A5XX_RB_ALPHA_CONTROL_ALPHA_TEST; + + OUT_PKT4(ring, REG_A5XX_RB_ALPHA_CONTROL, 1); + OUT_RING(ring, rb_alpha_control); + + OUT_PKT4(ring, REG_A5XX_RB_STENCIL_CONTROL, 1); + OUT_RING(ring, zsa->rb_stencil_control); + } + + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG)) { + struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend); + struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa); + + if (pfb->zsbuf) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl; + + if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid) + gras_lrz_cntl = 0; + else if (emit->key.binning_pass && blend->lrz_write && zsa->lrz_write) + gras_lrz_cntl |= A5XX_GRAS_LRZ_CNTL_LRZ_WRITE; + + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1); + OUT_RING(ring, gras_lrz_cntl); + } + } + + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) { + struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa); + struct pipe_stencil_ref *sr = &ctx->stencil_ref; + + OUT_PKT4(ring, REG_A5XX_RB_STENCILREFMASK, 2); + OUT_RING(ring, zsa->rb_stencilrefmask | + A5XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0])); + OUT_RING(ring, zsa->rb_stencilrefmask_bf | + A5XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1])); + } + + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { + struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa); + bool fragz = fp->has_kill | fp->writes_pos; + + OUT_PKT4(ring, REG_A5XX_RB_DEPTH_CNTL, 1); + OUT_RING(ring, zsa->rb_depth_cntl); + + OUT_PKT4(ring, REG_A5XX_RB_DEPTH_PLANE_CNTL, 1); + OUT_RING(ring, COND(fragz, A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z) | + COND(fragz && fp->frag_coord, A5XX_RB_DEPTH_PLANE_CNTL_UNK1)); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL, 1); + OUT_RING(ring, COND(fragz, A5XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z) | + COND(fragz && fp->frag_coord, A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1)); + } + + if (dirty & FD_DIRTY_SCISSOR) { + struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2); + OUT_RING(ring, A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->minx) | + A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->miny)); + OUT_RING(ring, A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->maxx - 1) | + A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->maxy - 1)); + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2); + OUT_RING(ring, A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->minx) | + A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->miny)); + OUT_RING(ring, A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->maxx - 1) | + A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->maxy - 1)); + + ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); + ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); + ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); + ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy); + } + + if (dirty & FD_DIRTY_VIEWPORT) { + fd_wfi(ctx->batch, ring); + OUT_PKT4(ring, REG_A5XX_GRAS_CL_VPORT_XOFFSET_0, 6); + OUT_RING(ring, A5XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0])); + OUT_RING(ring, A5XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0])); + OUT_RING(ring, A5XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1])); + OUT_RING(ring, A5XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1])); + OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2])); + OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); + } + + if (dirty & FD_DIRTY_PROG) + fd5_program_emit(ctx, ring, emit); + + if (dirty & FD_DIRTY_RASTERIZER) { + struct fd5_rasterizer_stateobj *rasterizer = + fd5_rasterizer_stateobj(ctx->rasterizer); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1); + OUT_RING(ring, rasterizer->gras_su_cntl); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_POINT_MINMAX, 2); + OUT_RING(ring, rasterizer->gras_su_point_minmax); + OUT_RING(ring, rasterizer->gras_su_point_size); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_POLY_OFFSET_SCALE, 3); + OUT_RING(ring, rasterizer->gras_su_poly_offset_scale); + OUT_RING(ring, rasterizer->gras_su_poly_offset_offset); + OUT_RING(ring, rasterizer->gras_su_poly_offset_clamp); + + OUT_PKT4(ring, REG_A5XX_PC_RASTER_CNTL, 1); + OUT_RING(ring, rasterizer->pc_raster_cntl); + + OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1); + OUT_RING(ring, rasterizer->gras_cl_clip_cntl); + } + + /* note: must come after program emit.. because there is some overlap + * in registers, ex. PC_PRIMITIVE_CNTL and we rely on some cached + * values from fd5_program_emit() to avoid having to re-emit the prog + * every time rast state changes. + * + * Since the primitive restart state is not part of a tracked object, we + * re-emit this register every time. + */ + if (emit->info && ctx->rasterizer) { + struct fd5_rasterizer_stateobj *rasterizer = + fd5_rasterizer_stateobj(ctx->rasterizer); + unsigned max_loc = fd5_context(ctx)->max_loc; + + OUT_PKT4(ring, REG_A5XX_PC_PRIMITIVE_CNTL, 1); + OUT_RING(ring, rasterizer->pc_primitive_cntl | + A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(max_loc) | + COND(emit->info->primitive_restart && emit->info->index_size, + A5XX_PC_PRIMITIVE_CNTL_PRIMITIVE_RESTART)); + } + + if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { + uint32_t posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH); + unsigned nr = pfb->nr_cbufs; + + if (emit->key.binning_pass) + nr = 0; + else if (ctx->rasterizer->rasterizer_discard) + nr = 0; + + OUT_PKT4(ring, REG_A5XX_RB_FS_OUTPUT_CNTL, 1); + OUT_RING(ring, A5XX_RB_FS_OUTPUT_CNTL_MRT(nr) | + COND(fp->writes_pos, A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z)); + + OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_CNTL, 1); + OUT_RING(ring, A5XX_SP_FS_OUTPUT_CNTL_MRT(nr) | + A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(posz_regid) | + A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(regid(63, 0))); + } + + if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */ + ir3_emit_vs_consts(vp, ring, ctx, emit->info); + if (!emit->key.binning_pass) + ir3_emit_fs_consts(fp, ring, ctx); + + struct pipe_stream_output_info *info = &vp->shader->stream_output; + if (info->num_outputs) { + struct fd_streamout_stateobj *so = &ctx->streamout; + + for (unsigned i = 0; i < so->num_targets; i++) { + struct pipe_stream_output_target *target = so->targets[i]; + + if (!target) + continue; + + unsigned offset = (so->offsets[i] * info->stride[i] * 4) + + target->buffer_offset; + + OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(i), 3); + /* VPC_SO[i].BUFFER_BASE_LO: */ + OUT_RELOCW(ring, fd_resource(target->buffer)->bo, 0, 0, 0); + OUT_RING(ring, target->buffer_size + offset); + + OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(i), 3); + OUT_RING(ring, offset); + /* VPC_SO[i].FLUSH_BASE_LO/HI: */ + // TODO just give hw a dummy addr for now.. we should + // be using this an then CP_MEM_TO_REG to set the + // VPC_SO[i].BUFFER_OFFSET for the next draw.. + OUT_RELOCW(ring, fd5_context(ctx)->blit_mem, 0x100, 0, 0); + + emit->streamout_mask |= (1 << i); + } + } + } + + if ((dirty & FD_DIRTY_BLEND)) { + struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend); + uint32_t i; + + for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) { + enum pipe_format format = pipe_surface_format(pfb->cbufs[i]); + bool is_int = util_format_is_pure_integer(format); + bool has_alpha = util_format_has_alpha(format); + uint32_t control = blend->rb_mrt[i].control; + uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha; + + if (is_int) { + control &= A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; + control |= A5XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); + } + + if (has_alpha) { + blend_control |= blend->rb_mrt[i].blend_control_rgb; + } else { + blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb; + control &= ~A5XX_RB_MRT_CONTROL_BLEND2; + } + + OUT_PKT4(ring, REG_A5XX_RB_MRT_CONTROL(i), 1); + OUT_RING(ring, control); + + OUT_PKT4(ring, REG_A5XX_RB_MRT_BLEND_CONTROL(i), 1); + OUT_RING(ring, blend_control); + } + + OUT_PKT4(ring, REG_A5XX_RB_BLEND_CNTL, 1); + OUT_RING(ring, blend->rb_blend_cntl | + A5XX_RB_BLEND_CNTL_SAMPLE_MASK(0xffff)); + + OUT_PKT4(ring, REG_A5XX_SP_BLEND_CNTL, 1); + OUT_RING(ring, blend->sp_blend_cntl); + } + + if (dirty & FD_DIRTY_BLEND_COLOR) { + struct pipe_blend_color *bcolor = &ctx->blend_color; + + OUT_PKT4(ring, REG_A5XX_RB_BLEND_RED, 8); + OUT_RING(ring, A5XX_RB_BLEND_RED_FLOAT(bcolor->color[0]) | + A5XX_RB_BLEND_RED_UINT(bcolor->color[0] * 0xff) | + A5XX_RB_BLEND_RED_SINT(bcolor->color[0] * 0x7f)); + OUT_RING(ring, A5XX_RB_BLEND_RED_F32(bcolor->color[0])); + OUT_RING(ring, A5XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]) | + A5XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 0xff) | + A5XX_RB_BLEND_GREEN_SINT(bcolor->color[1] * 0x7f)); + OUT_RING(ring, A5XX_RB_BLEND_RED_F32(bcolor->color[1])); + OUT_RING(ring, A5XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]) | + A5XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 0xff) | + A5XX_RB_BLEND_BLUE_SINT(bcolor->color[2] * 0x7f)); + OUT_RING(ring, A5XX_RB_BLEND_BLUE_F32(bcolor->color[2])); + OUT_RING(ring, A5XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]) | + A5XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 0xff) | + A5XX_RB_BLEND_ALPHA_SINT(bcolor->color[3] * 0x7f)); + OUT_RING(ring, A5XX_RB_BLEND_ALPHA_F32(bcolor->color[3])); + } + + if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) { + needs_border |= emit_textures(ctx, ring, SB4_VS_TEX, + &ctx->tex[PIPE_SHADER_VERTEX]); + OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 1); + OUT_RING(ring, ctx->tex[PIPE_SHADER_VERTEX].num_textures); + } + + if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) { + needs_border |= emit_textures(ctx, ring, SB4_FS_TEX, + &ctx->tex[PIPE_SHADER_FRAGMENT]); + OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1); + OUT_RING(ring, ctx->tex[PIPE_SHADER_FRAGMENT].num_textures); + } + + OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1); + OUT_RING(ring, 0); + + if (needs_border) + emit_border_color(ctx, ring); + + if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO) + emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]); +} + +void +fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct ir3_shader_variant *cp) +{ + enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE]; + + if (dirty & FD_DIRTY_SHADER_TEX) { + bool needs_border = false; + needs_border |= emit_textures(ctx, ring, SB4_CS_TEX, + &ctx->tex[PIPE_SHADER_COMPUTE]); + + if (needs_border) + emit_border_color(ctx, ring); + + OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A5XX_TPL1_HS_TEX_COUNT, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A5XX_TPL1_DS_TEX_COUNT, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A5XX_TPL1_GS_TEX_COUNT, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1); + OUT_RING(ring, ctx->tex[PIPE_SHADER_COMPUTE].num_textures); + } + + if (dirty & FD_DIRTY_SHADER_SSBO) + emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE]); +} + +/* emit setup at begin of new cmdstream buffer (don't rely on previous + * state, there could have been a context switch between ioctls): + */ +void +fd5_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) +{ + struct fd_context *ctx = batch->ctx; + + fd5_set_render_mode(ctx, ring, BYPASS); + fd5_cache_flush(batch, ring); + + OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); + OUT_RING(ring, 0xfffff); + +/* +t7 opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords) +0000000500024048: 70d08003 00000000 001c5000 00000005 +t7 opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords) +0000000500024058: 70d08003 00000010 001c7000 00000005 + +t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) +0000000500024068: 70268000 +*/ + + OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1); + OUT_RING(ring, 0xffffffff); + + OUT_PKT4(ring, REG_A5XX_PC_RASTER_CNTL, 1); + OUT_RING(ring, 0x00000012); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_POINT_MINMAX, 2); + OUT_RING(ring, A5XX_GRAS_SU_POINT_MINMAX_MIN(1.0) | + A5XX_GRAS_SU_POINT_MINMAX_MAX(4092.0)); + OUT_RING(ring, A5XX_GRAS_SU_POINT_SIZE(0.5)); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1); + OUT_RING(ring, 0x00000000); /* GRAS_SU_CONSERVATIVE_RAS_CNTL */ + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_CNTL, 1); + OUT_RING(ring, 0x00000000); /* GRAS_SC_SCREEN_SCISSOR_CNTL */ + + OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG_MAX_CONST, 1); + OUT_RING(ring, 0); /* SP_VS_CONFIG_MAX_CONST */ + + OUT_PKT4(ring, REG_A5XX_SP_FS_CONFIG_MAX_CONST, 1); + OUT_RING(ring, 0); /* SP_FS_CONFIG_MAX_CONST */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E292, 2); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E292 */ + OUT_RING(ring, 0x00000000); /* UNKNOWN_E293 */ + + OUT_PKT4(ring, REG_A5XX_RB_MODE_CNTL, 1); + OUT_RING(ring, 0x00000044); /* RB_MODE_CNTL */ + + OUT_PKT4(ring, REG_A5XX_RB_DBG_ECO_CNTL, 1); + OUT_RING(ring, 0x00100000); /* RB_DBG_ECO_CNTL */ + + OUT_PKT4(ring, REG_A5XX_VFD_MODE_CNTL, 1); + OUT_RING(ring, 0x00000000); /* VFD_MODE_CNTL */ + + OUT_PKT4(ring, REG_A5XX_PC_MODE_CNTL, 1); + OUT_RING(ring, 0x0000001f); /* PC_MODE_CNTL */ + + OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1); + OUT_RING(ring, 0x0000001e); /* SP_MODE_CNTL */ + + OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1); + OUT_RING(ring, 0x40000800); /* SP_DBG_ECO_CNTL */ + + OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1); + OUT_RING(ring, 0x00000544); /* TPL1_MODE_CNTL */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_0, 2); + OUT_RING(ring, 0x00000080); /* HLSQ_TIMEOUT_THRESHOLD_0 */ + OUT_RING(ring, 0x00000000); /* HLSQ_TIMEOUT_THRESHOLD_1 */ + + OUT_PKT4(ring, REG_A5XX_VPC_DBG_ECO_CNTL, 1); + OUT_RING(ring, 0x00000400); /* VPC_DBG_ECO_CNTL */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_MODE_CNTL, 1); + OUT_RING(ring, 0x00000001); /* HLSQ_MODE_CNTL */ + + OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1); + OUT_RING(ring, 0x00000000); /* VPC_MODE_CNTL */ + + /* we don't use this yet.. probably best to disable.. */ + OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); + OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | + CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | + CP_SET_DRAW_STATE__0_GROUP_ID(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1); + OUT_RING(ring, 0x00000000); /* GRAS_SU_CONSERVATIVE_RAS_CNTL */ + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_BIN_CNTL, 1); + OUT_RING(ring, 0x00000000); /* GRAS_SC_BIN_CNTL */ + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_BIN_CNTL, 1); + OUT_RING(ring, 0x00000000); /* GRAS_SC_BIN_CNTL */ + + OUT_PKT4(ring, REG_A5XX_VPC_FS_PRIMITIVEID_CNTL, 1); + OUT_RING(ring, 0x000000ff); /* VPC_FS_PRIMITIVEID_CNTL */ + + OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1); + OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE); + + OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(0), 3); + OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_BASE_LO_0 */ + OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_BASE_HI_0 */ + OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_SIZE_0 */ + + OUT_PKT4(ring, REG_A5XX_VPC_SO_FLUSH_BASE_LO(0), 2); + OUT_RING(ring, 0x00000000); /* VPC_SO_FLUSH_BASE_LO_0 */ + OUT_RING(ring, 0x00000000); /* VPC_SO_FLUSH_BASE_HI_0 */ + + OUT_PKT4(ring, REG_A5XX_PC_GS_PARAM, 1); + OUT_RING(ring, 0x00000000); /* PC_GS_PARAM */ + + OUT_PKT4(ring, REG_A5XX_PC_HS_PARAM, 1); + OUT_RING(ring, 0x00000000); /* PC_HS_PARAM */ + + OUT_PKT4(ring, REG_A5XX_TPL1_TP_FS_ROTATION_CNTL, 1); + OUT_RING(ring, 0x00000000); /* TPL1_TP_FS_ROTATION_CNTL */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E001, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E001 */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E004, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E004 */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E093, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E093 */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E29A, 1); + OUT_RING(ring, 0x00ffff00); /* UNKNOWN_E29A */ + + OUT_PKT4(ring, REG_A5XX_VPC_SO_BUF_CNTL, 1); + OUT_RING(ring, 0x00000000); /* VPC_SO_BUF_CNTL */ + + OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(0), 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E2AB */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E389, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E389 */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E38D, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E38D */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5AB, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E5AB */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5C2, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E5C2 */ + + OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(1), 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(1), 6); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(2), 6); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(3), 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5DB, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E600, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E640, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 4); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 2); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7C0, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7C5, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7CA, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7CF, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7D4, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7D9, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); + OUT_RING(ring, 0x00000000); +} + +static void +fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) +{ + __OUT_IB5(ring, target); +} + +void +fd5_emit_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->emit_const = fd5_emit_const; + ctx->emit_const_bo = fd5_emit_const_bo; + ctx->emit_ib = fd5_emit_ib; +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.h new file mode 100644 index 000000000..2d8a0fd09 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.h @@ -0,0 +1,199 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FD5_EMIT_H +#define FD5_EMIT_H + +#include "pipe/p_context.h" + +#include "freedreno_context.h" +#include "fd5_context.h" +#include "fd5_format.h" +#include "fd5_program.h" +#include "ir3_shader.h" + +struct fd_ringbuffer; + +/* grouped together emit-state for prog/vertex/state emit: */ +struct fd5_emit { + struct pipe_debug_callback *debug; + const struct fd_vertex_state *vtx; + const struct fd_program_stateobj *prog; + const struct pipe_draw_info *info; + struct ir3_shader_key key; + enum fd_dirty_3d_state dirty; + + uint32_t sprite_coord_enable; /* bitmask */ + bool sprite_coord_mode; + bool rasterflat; + bool no_decode_srgb; + + /* in binning pass, we don't have real frag shader, so we + * don't know if real draw disqualifies lrz write. So just + * figure that out up-front and stash it in the emit. + */ + bool no_lrz_write; + + /* cached to avoid repeated lookups of same variants: */ + const struct ir3_shader_variant *vp, *fp; + /* TODO: other shader stages.. */ + + unsigned streamout_mask; +}; + +static inline enum a5xx_color_fmt fd5_emit_format(struct pipe_surface *surf) +{ + if (!surf) + return 0; + return fd5_pipe2color(surf->format); +} + +static inline const struct ir3_shader_variant * +fd5_emit_get_vp(struct fd5_emit *emit) +{ + if (!emit->vp) { + struct fd5_shader_stateobj *so = emit->prog->vp; + emit->vp = ir3_shader_variant(so->shader, emit->key, emit->debug); + } + return emit->vp; +} + +static inline const struct ir3_shader_variant * +fd5_emit_get_fp(struct fd5_emit *emit) +{ + if (!emit->fp) { + if (emit->key.binning_pass) { + /* use dummy stateobj to simplify binning vs non-binning: */ + static const struct ir3_shader_variant binning_fp = {}; + emit->fp = &binning_fp; + } else { + struct fd5_shader_stateobj *so = emit->prog->fp; + emit->fp = ir3_shader_variant(so->shader, emit->key, emit->debug); + } + } + return emit->fp; +} + +static inline void +fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) +{ + fd_reset_wfi(batch); + OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5); + OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_LO */ + OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_HI */ + OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_LO */ + OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_HI */ + OUT_RING(ring, 0x00000012); /* UCHE_CACHE_INVALIDATE */ + fd_wfi(batch, ring); +} + +static inline void +fd5_set_render_mode(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum render_mode_cmd mode) +{ + /* TODO add preemption support, gmem bypass, etc */ + emit_marker5(ring, 7); + OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); + OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode)); + OUT_RING(ring, 0x00000000); /* ADDR_LO */ + OUT_RING(ring, 0x00000000); /* ADDR_HI */ + OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE) | + COND(mode == BINNING, CP_SET_RENDER_MODE_3_VSC_ENABLE)); + OUT_RING(ring, 0x00000000); + emit_marker5(ring, 7); +} + +static inline void +fd5_emit_blit(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + struct fd5_context *fd5_ctx = fd5_context(ctx); + + emit_marker5(ring, 7); + + OUT_PKT7(ring, CP_EVENT_WRITE, 4); + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(BLIT)); + OUT_RELOCW(ring, fd5_ctx->blit_mem, 0, 0, 0); /* ADDR_LO/HI */ + OUT_RING(ring, 0x00000000); + + emit_marker5(ring, 7); +} + +static inline void +fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning) +{ + struct fd_ringbuffer *ring = binning ? ctx->batch->binning : ctx->batch->draw; + + /* TODO eventually this partially depends on the pfb state, ie. + * which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part + * we could probably cache and just regenerate if framebuffer + * state is dirty (or something like that).. + * + * Other bits seem to depend on query state, like if samples-passed + * query is active. + */ + bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0); + OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1); + OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */ + COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) | + COND(binning, A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) | + COND(samples_passed, A5XX_RB_RENDER_CNTL_SAMPLES_PASSED) | + COND(!blit, 0x8)); + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1); + OUT_RING(ring, 0x00000008 | /* GRAS_SC_CNTL */ + COND(binning, A5XX_GRAS_SC_CNTL_BINNING_PASS) | + COND(samples_passed, A5XX_GRAS_SC_CNTL_SAMPLES_PASSED)); +} + +static inline void +fd5_emit_lrz_flush(struct fd_ringbuffer *ring) +{ + /* TODO I think the extra writes to GRAS_LRZ_CNTL are probably + * a workaround and not needed on all a5xx. + */ + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1); + OUT_RING(ring, A5XX_GRAS_LRZ_CNTL_ENABLE); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, LRZ_FLUSH); + + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1); + OUT_RING(ring, 0x0); +} + +void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit); + +void fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd5_emit *emit); + +void fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct ir3_shader_variant *cp); + +void fd5_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring); + +void fd5_emit_init(struct pipe_context *pctx); + +#endif /* FD5_EMIT_H */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c new file mode 100644 index 000000000..ae5cc839f --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c @@ -0,0 +1,445 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#include "pipe/p_defines.h" +#include "util/u_format.h" + +#include "fd5_format.h" + + +/* Specifies the table of all the formats and their features. Also supplies + * the helpers that look up various data in those tables. + */ + +struct fd5_format { + enum a5xx_vtx_fmt vtx; + enum a5xx_tex_fmt tex; + enum a5xx_color_fmt rb; + enum a3xx_color_swap swap; + boolean present; +}; + +#define RB5_NONE ~0 + +/* vertex + texture */ +#define VT(pipe, fmt, rbfmt, swapfmt) \ + [PIPE_FORMAT_ ## pipe] = { \ + .present = 1, \ + .vtx = VFMT5_ ## fmt, \ + .tex = TFMT5_ ## fmt, \ + .rb = RB5_ ## rbfmt, \ + .swap = swapfmt \ + } + +/* texture-only */ +#define _T(pipe, fmt, rbfmt, swapfmt) \ + [PIPE_FORMAT_ ## pipe] = { \ + .present = 1, \ + .vtx = ~0, \ + .tex = TFMT5_ ## fmt, \ + .rb = RB5_ ## rbfmt, \ + .swap = swapfmt \ + } + +/* vertex-only */ +#define V_(pipe, fmt, rbfmt, swapfmt) \ + [PIPE_FORMAT_ ## pipe] = { \ + .present = 1, \ + .vtx = VFMT5_ ## fmt, \ + .tex = ~0, \ + .rb = RB5_ ## rbfmt, \ + .swap = swapfmt \ + } + +static struct fd5_format formats[PIPE_FORMAT_COUNT] = { + /* 8-bit */ + VT(R8_UNORM, 8_UNORM, R8_UNORM, WZYX), + VT(R8_SNORM, 8_SNORM, R8_SNORM, WZYX), + VT(R8_UINT, 8_UINT, R8_UINT, WZYX), + VT(R8_SINT, 8_SINT, R8_SINT, WZYX), + V_(R8_USCALED, 8_UINT, NONE, WZYX), + V_(R8_SSCALED, 8_UINT, NONE, WZYX), + + _T(A8_UNORM, 8_UNORM, A8_UNORM, WZYX), + _T(L8_UNORM, 8_UNORM, R8_UNORM, WZYX), + _T(I8_UNORM, 8_UNORM, NONE, WZYX), + + _T(A8_UINT, 8_UINT, NONE, WZYX), + _T(A8_SINT, 8_SINT, NONE, WZYX), + _T(L8_UINT, 8_UINT, NONE, WZYX), + _T(L8_SINT, 8_SINT, NONE, WZYX), + _T(I8_UINT, 8_UINT, NONE, WZYX), + _T(I8_SINT, 8_SINT, NONE, WZYX), + + _T(S8_UINT, 8_UINT, R8_UNORM, WZYX), + + /* 16-bit */ + VT(R16_UNORM, 16_UNORM, R16_UNORM, WZYX), + VT(R16_SNORM, 16_SNORM, R16_SNORM, WZYX), + VT(R16_UINT, 16_UINT, R16_UINT, WZYX), + VT(R16_SINT, 16_SINT, R16_SINT, WZYX), + V_(R16_USCALED, 16_UINT, NONE, WZYX), + V_(R16_SSCALED, 16_UINT, NONE, WZYX), + VT(R16_FLOAT, 16_FLOAT, R16_FLOAT, WZYX), + + _T(A16_UNORM, 16_UNORM, NONE, WZYX), + _T(A16_SNORM, 16_SNORM, NONE, WZYX), + _T(A16_UINT, 16_UINT, NONE, WZYX), + _T(A16_SINT, 16_SINT, NONE, WZYX), + _T(L16_UNORM, 16_UNORM, NONE, WZYX), + _T(L16_SNORM, 16_SNORM, NONE, WZYX), + _T(L16_UINT, 16_UINT, NONE, WZYX), + _T(L16_SINT, 16_SINT, NONE, WZYX), + _T(I16_UNORM, 16_UNORM, NONE, WZYX), + _T(I16_SNORM, 16_SNORM, NONE, WZYX), + _T(I16_UINT, 16_UINT, NONE, WZYX), + _T(I16_SINT, 16_SINT, NONE, WZYX), + + VT(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX), + VT(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX), + VT(R8G8_UINT, 8_8_UINT, R8G8_UINT, WZYX), + VT(R8G8_SINT, 8_8_SINT, R8G8_SINT, WZYX), + V_(R8G8_USCALED, 8_8_UINT, NONE, WZYX), + V_(R8G8_SSCALED, 8_8_SINT, NONE, WZYX), + + _T(L8A8_UINT, 8_8_UINT, NONE, WZYX), + _T(L8A8_SINT, 8_8_SINT, NONE, WZYX), + + _T(B5G6R5_UNORM, 5_6_5_UNORM, R5G6B5_UNORM, WXYZ), + _T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), + _T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), + _T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, R4G4B4A4_UNORM, WXYZ), + + /* 24-bit */ + V_(R8G8B8_UNORM, 8_8_8_UNORM, NONE, WZYX), + V_(R8G8B8_SNORM, 8_8_8_SNORM, NONE, WZYX), + V_(R8G8B8_UINT, 8_8_8_UINT, NONE, WZYX), + V_(R8G8B8_SINT, 8_8_8_SINT, NONE, WZYX), + V_(R8G8B8_USCALED, 8_8_8_UINT, NONE, WZYX), + V_(R8G8B8_SSCALED, 8_8_8_SINT, NONE, WZYX), + + /* 32-bit */ + VT(R32_UINT, 32_UINT, R32_UINT, WZYX), + VT(R32_SINT, 32_SINT, R32_SINT, WZYX), + V_(R32_USCALED, 32_UINT, NONE, WZYX), + V_(R32_SSCALED, 32_UINT, NONE, WZYX), + VT(R32_FLOAT, 32_FLOAT, R32_FLOAT,WZYX), + V_(R32_FIXED, 32_FIXED, NONE, WZYX), + + _T(A32_UINT, 32_UINT, NONE, WZYX), + _T(A32_SINT, 32_SINT, NONE, WZYX), + _T(L32_UINT, 32_UINT, NONE, WZYX), + _T(L32_SINT, 32_SINT, NONE, WZYX), + _T(I32_UINT, 32_UINT, NONE, WZYX), + _T(I32_SINT, 32_SINT, NONE, WZYX), + + VT(R16G16_UNORM, 16_16_UNORM, R16G16_UNORM, WZYX), + VT(R16G16_SNORM, 16_16_SNORM, R16G16_SNORM, WZYX), + VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX), + VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX), + VT(R16G16_USCALED, 16_16_UINT, NONE, WZYX), + VT(R16G16_SSCALED, 16_16_SINT, NONE, WZYX), + VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT, WZYX), + + _T(L16A16_UNORM, 16_16_UNORM, NONE, WZYX), + _T(L16A16_SNORM, 16_16_SNORM, NONE, WZYX), + _T(L16A16_UINT, 16_16_UINT, NONE, WZYX), + _T(L16A16_SINT, 16_16_SINT, NONE, WZYX), + + VT(R8G8B8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), + _T(R8G8B8X8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), + _T(R8G8B8A8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), + _T(R8G8B8X8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), + VT(R8G8B8A8_SNORM, 8_8_8_8_SNORM, R8G8B8A8_SNORM, WZYX), + VT(R8G8B8A8_UINT, 8_8_8_8_UINT, R8G8B8A8_UINT, WZYX), + VT(R8G8B8A8_SINT, 8_8_8_8_SINT, R8G8B8A8_SINT, WZYX), + V_(R8G8B8A8_USCALED, 8_8_8_8_UINT, NONE, WZYX), + V_(R8G8B8A8_SSCALED, 8_8_8_8_SINT, NONE, WZYX), + + VT(B8G8R8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), + _T(B8G8R8X8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), + VT(B8G8R8A8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), + _T(B8G8R8X8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), + + VT(A8B8G8R8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), + _T(X8B8G8R8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), + _T(A8B8G8R8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), + _T(X8B8G8R8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), + + VT(A8R8G8B8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), + _T(X8R8G8B8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), + _T(A8R8G8B8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), + _T(X8R8G8B8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), + + VT(R10G10B10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WZYX), + VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), + _T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), + V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX), + V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ), + VT(R10G10B10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WZYX), + VT(B10G10R10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WXYZ), + V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX), + V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ), + V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX), + V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ), + + VT(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), + _T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX), + + _T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), + _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), + _T(Z32_FLOAT, 32_FLOAT, R8G8B8A8_UNORM, WZYX), + _T(Z32_FLOAT_S8X24_UINT, 32_FLOAT,R8G8B8A8_UNORM, WZYX), + + /* 48-bit */ + V_(R16G16B16_UNORM, 16_16_16_UNORM, NONE, WZYX), + V_(R16G16B16_SNORM, 16_16_16_SNORM, NONE, WZYX), + V_(R16G16B16_UINT, 16_16_16_UINT, NONE, WZYX), + V_(R16G16B16_SINT, 16_16_16_SINT, NONE, WZYX), + V_(R16G16B16_USCALED, 16_16_16_UINT, NONE, WZYX), + V_(R16G16B16_SSCALED, 16_16_16_SINT, NONE, WZYX), + V_(R16G16B16_FLOAT, 16_16_16_FLOAT, NONE, WZYX), + + /* 64-bit */ + VT(R16G16B16A16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX), + VT(R16G16B16X16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX), + VT(R16G16B16A16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX), + VT(R16G16B16X16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX), + VT(R16G16B16A16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), + VT(R16G16B16X16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), + VT(R16G16B16A16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), + VT(R16G16B16X16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), + VT(R16G16B16A16_USCALED, 16_16_16_16_UINT, NONE, WZYX), + VT(R16G16B16A16_SSCALED, 16_16_16_16_SINT, NONE, WZYX), + VT(R16G16B16A16_FLOAT, 16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX), + VT(R16G16B16X16_FLOAT, 16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX), + + VT(R32G32_UINT, 32_32_UINT, R32G32_UINT, WZYX), + VT(R32G32_SINT, 32_32_SINT, R32G32_SINT, WZYX), + V_(R32G32_USCALED, 32_32_UINT, NONE, WZYX), + V_(R32G32_SSCALED, 32_32_SINT, NONE, WZYX), + VT(R32G32_FLOAT, 32_32_FLOAT, R32G32_FLOAT,WZYX), + V_(R32G32_FIXED, 32_32_FIXED, NONE, WZYX), + + _T(L32A32_UINT, 32_32_UINT, NONE, WZYX), + _T(L32A32_SINT, 32_32_SINT, NONE, WZYX), + + /* 96-bit */ + VT(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX), + VT(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX), + V_(R32G32B32_USCALED, 32_32_32_UINT, NONE, WZYX), + V_(R32G32B32_SSCALED, 32_32_32_SINT, NONE, WZYX), + VT(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX), + V_(R32G32B32_FIXED, 32_32_32_FIXED, NONE, WZYX), + + /* 128-bit */ + VT(R32G32B32A32_UINT, 32_32_32_32_UINT, R32G32B32A32_UINT, WZYX), + _T(R32G32B32X32_UINT, 32_32_32_32_UINT, R32G32B32A32_UINT, WZYX), + VT(R32G32B32A32_SINT, 32_32_32_32_SINT, R32G32B32A32_SINT, WZYX), + _T(R32G32B32X32_SINT, 32_32_32_32_SINT, R32G32B32A32_SINT, WZYX), + V_(R32G32B32A32_USCALED, 32_32_32_32_UINT, NONE, WZYX), + V_(R32G32B32A32_SSCALED, 32_32_32_32_SINT, NONE, WZYX), + VT(R32G32B32A32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), + _T(R32G32B32X32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), + V_(R32G32B32A32_FIXED, 32_32_32_32_FIXED, NONE, WZYX), + + /* compressed */ + _T(ETC1_RGB8, ETC1, NONE, WZYX), + _T(ETC2_RGB8, ETC2_RGB8, NONE, WZYX), + _T(ETC2_SRGB8, ETC2_RGB8, NONE, WZYX), + _T(ETC2_RGB8A1, ETC2_RGB8A1, NONE, WZYX), + _T(ETC2_SRGB8A1, ETC2_RGB8A1, NONE, WZYX), + _T(ETC2_RGBA8, ETC2_RGBA8, NONE, WZYX), + _T(ETC2_SRGBA8, ETC2_RGBA8, NONE, WZYX), + _T(ETC2_R11_UNORM, ETC2_R11_UNORM, NONE, WZYX), + _T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX), + _T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX), + _T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX), + + _T(DXT1_RGB, DXT1, NONE, WZYX), + _T(DXT1_SRGB, DXT1, NONE, WZYX), + _T(DXT1_RGBA, DXT1, NONE, WZYX), + _T(DXT1_SRGBA, DXT1, NONE, WZYX), + _T(DXT3_RGBA, DXT3, NONE, WZYX), + _T(DXT3_SRGBA, DXT3, NONE, WZYX), + _T(DXT5_RGBA, DXT5, NONE, WZYX), + _T(DXT5_SRGBA, DXT5, NONE, WZYX), + + _T(BPTC_RGBA_UNORM, BPTC, NONE, WZYX), + _T(BPTC_SRGBA, BPTC, NONE, WZYX), + _T(BPTC_RGB_FLOAT, BPTC_FLOAT, NONE, WZYX), + _T(BPTC_RGB_UFLOAT, BPTC_UFLOAT, NONE, WZYX), + + _T(RGTC1_UNORM, RGTC1_UNORM, NONE, WZYX), + _T(RGTC1_SNORM, RGTC1_SNORM, NONE, WZYX), + _T(RGTC2_UNORM, RGTC2_UNORM, NONE, WZYX), + _T(RGTC2_SNORM, RGTC2_SNORM, NONE, WZYX), + _T(LATC1_UNORM, RGTC1_UNORM, NONE, WZYX), + _T(LATC1_SNORM, RGTC1_SNORM, NONE, WZYX), + _T(LATC2_UNORM, RGTC2_UNORM, NONE, WZYX), + _T(LATC2_SNORM, RGTC2_SNORM, NONE, WZYX), + + _T(ASTC_4x4, ASTC_4x4, NONE, WZYX), + _T(ASTC_5x4, ASTC_5x4, NONE, WZYX), + _T(ASTC_5x5, ASTC_5x5, NONE, WZYX), + _T(ASTC_6x5, ASTC_6x5, NONE, WZYX), + _T(ASTC_6x6, ASTC_6x6, NONE, WZYX), + _T(ASTC_8x5, ASTC_8x5, NONE, WZYX), + _T(ASTC_8x6, ASTC_8x6, NONE, WZYX), + _T(ASTC_8x8, ASTC_8x8, NONE, WZYX), + _T(ASTC_10x5, ASTC_10x5, NONE, WZYX), + _T(ASTC_10x6, ASTC_10x6, NONE, WZYX), + _T(ASTC_10x8, ASTC_10x8, NONE, WZYX), + _T(ASTC_10x10, ASTC_10x10, NONE, WZYX), + _T(ASTC_12x10, ASTC_12x10, NONE, WZYX), + _T(ASTC_12x12, ASTC_12x12, NONE, WZYX), + + _T(ASTC_4x4_SRGB, ASTC_4x4, NONE, WZYX), + _T(ASTC_5x4_SRGB, ASTC_5x4, NONE, WZYX), + _T(ASTC_5x5_SRGB, ASTC_5x5, NONE, WZYX), + _T(ASTC_6x5_SRGB, ASTC_6x5, NONE, WZYX), + _T(ASTC_6x6_SRGB, ASTC_6x6, NONE, WZYX), + _T(ASTC_8x5_SRGB, ASTC_8x5, NONE, WZYX), + _T(ASTC_8x6_SRGB, ASTC_8x6, NONE, WZYX), + _T(ASTC_8x8_SRGB, ASTC_8x8, NONE, WZYX), + _T(ASTC_10x5_SRGB, ASTC_10x5, NONE, WZYX), + _T(ASTC_10x6_SRGB, ASTC_10x6, NONE, WZYX), + _T(ASTC_10x8_SRGB, ASTC_10x8, NONE, WZYX), + _T(ASTC_10x10_SRGB, ASTC_10x10, NONE, WZYX), + _T(ASTC_12x10_SRGB, ASTC_12x10, NONE, WZYX), + _T(ASTC_12x12_SRGB, ASTC_12x12, NONE, WZYX), +}; + +/* convert pipe format to vertex buffer format: */ +enum a5xx_vtx_fmt +fd5_pipe2vtx(enum pipe_format format) +{ + if (!formats[format].present) + return ~0; + return formats[format].vtx; +} + +/* convert pipe format to texture sampler format: */ +enum a5xx_tex_fmt +fd5_pipe2tex(enum pipe_format format) +{ + if (!formats[format].present) + return ~0; + return formats[format].tex; +} + +/* convert pipe format to MRT / copydest format used for render-target: */ +enum a5xx_color_fmt +fd5_pipe2color(enum pipe_format format) +{ + if (!formats[format].present) + return ~0; + return formats[format].rb; +} + +enum a3xx_color_swap +fd5_pipe2swap(enum pipe_format format) +{ + if (!formats[format].present) + return WZYX; + return formats[format].swap; +} + +// XXX possibly same as a4xx.. +enum a5xx_tex_fetchsize +fd5_pipe2fetchsize(enum pipe_format format) +{ + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + format = PIPE_FORMAT_Z32_FLOAT; + + if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC) + return TFETCH5_16_BYTE; + + switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) { + case 8: return TFETCH5_1_BYTE; + case 16: return TFETCH5_2_BYTE; + case 32: return TFETCH5_4_BYTE; + case 64: return TFETCH5_8_BYTE; + case 96: return TFETCH5_1_BYTE; /* Does this matter? */ + case 128: return TFETCH5_16_BYTE; + default: + debug_printf("Unknown block size for format %s: %d\n", + util_format_name(format), + util_format_get_blocksizebits(format)); + return TFETCH5_1_BYTE; + } +} + +enum a5xx_depth_format +fd5_pipe2depth(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return DEPTH5_16; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + return DEPTH5_24_8; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return DEPTH5_32; + default: + return ~0; + } +} + +static inline enum a5xx_tex_swiz +tex_swiz(unsigned swiz) +{ + switch (swiz) { + default: + case PIPE_SWIZZLE_X: return A5XX_TEX_X; + case PIPE_SWIZZLE_Y: return A5XX_TEX_Y; + case PIPE_SWIZZLE_Z: return A5XX_TEX_Z; + case PIPE_SWIZZLE_W: return A5XX_TEX_W; + case PIPE_SWIZZLE_0: return A5XX_TEX_ZERO; + case PIPE_SWIZZLE_1: return A5XX_TEX_ONE; + } +} + +uint32_t +fd5_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, + unsigned swizzle_b, unsigned swizzle_a) +{ + const struct util_format_description *desc = + util_format_description(format); + unsigned char swiz[4] = { + swizzle_r, swizzle_g, swizzle_b, swizzle_a, + }, rswiz[4]; + + util_format_compose_swizzles(desc->swizzle, swiz, rswiz); + + return A5XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) | + A5XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) | + A5XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) | + A5XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3])); +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.h new file mode 100644 index 000000000..b052aa529 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FD5_UTIL_H_ +#define FD5_UTIL_H_ + +#include "freedreno_util.h" + +#include "a5xx.xml.h" + +enum a5xx_vtx_fmt fd5_pipe2vtx(enum pipe_format format); +enum a5xx_tex_fmt fd5_pipe2tex(enum pipe_format format); +enum a5xx_color_fmt fd5_pipe2color(enum pipe_format format); +enum a3xx_color_swap fd5_pipe2swap(enum pipe_format format); +enum a5xx_tex_fetchsize fd5_pipe2fetchsize(enum pipe_format format); +enum a5xx_depth_format fd5_pipe2depth(enum pipe_format format); + +uint32_t fd5_tex_swiz(enum pipe_format format, unsigned swizzle_r, + unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); + +#endif /* FD5_UTIL_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c new file mode 100644 index 000000000..c623b572b --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c @@ -0,0 +1,774 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "freedreno_draw.h" +#include "freedreno_state.h" +#include "freedreno_resource.h" + +#include "fd5_gmem.h" +#include "fd5_context.h" +#include "fd5_draw.h" +#include "fd5_emit.h" +#include "fd5_program.h" +#include "fd5_format.h" +#include "fd5_zsa.h" + +static void +emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, + struct pipe_surface **bufs, struct fd_gmem_stateobj *gmem) +{ + enum a5xx_tile_mode tile_mode; + unsigned i; + + if (gmem) { + tile_mode = TILE5_2; + } else { + tile_mode = TILE5_LINEAR; + } + + for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) { + enum a5xx_color_fmt format = 0; + enum a3xx_color_swap swap = WZYX; + bool srgb = false, sint = false, uint = false; + struct fd_resource *rsc = NULL; + struct fd_resource_slice *slice = NULL; + uint32_t stride = 0; + uint32_t size = 0; + uint32_t base = 0; + uint32_t offset = 0; + + if ((i < nr_bufs) && bufs[i]) { + struct pipe_surface *psurf = bufs[i]; + enum pipe_format pformat = psurf->format; + + rsc = fd_resource(psurf->texture); + + slice = fd_resource_slice(rsc, psurf->u.tex.level); + format = fd5_pipe2color(pformat); + swap = fd5_pipe2swap(pformat); + srgb = util_format_is_srgb(pformat); + sint = util_format_is_pure_sint(pformat); + uint = util_format_is_pure_uint(pformat); + + debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); + + offset = fd_resource_offset(rsc, psurf->u.tex.level, + psurf->u.tex.first_layer); + + if (gmem) { + stride = gmem->bin_w * rsc->cpp; + size = stride * gmem->bin_h; + base = gmem->cbuf_base[i]; + } else { + stride = slice->pitch * rsc->cpp; + size = slice->size0; + } + } + + OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5); + OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | + A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | + A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) | + COND(gmem, 0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */ + COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB)); + OUT_RING(ring, A5XX_RB_MRT_PITCH(stride)); + OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size)); + if (gmem || (i >= nr_bufs) || !bufs[i]) { + OUT_RING(ring, base); /* RB_MRT[i].BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */ + } else { + debug_assert((offset + size) <= fd_bo_size(rsc->bo)); + OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */ + } + + OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1); + OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) | + COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) | + COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) | + COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB)); + + /* when we support UBWC, these would be the system memory + * addr/pitch/etc: + */ + OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4); + OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */ + OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */ + OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0)); + OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0)); + } +} + +static void +emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, + struct fd_gmem_stateobj *gmem) +{ + if (zsbuf) { + struct fd_resource *rsc = fd_resource(zsbuf->texture); + enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format); + uint32_t cpp = rsc->cpp; + uint32_t stride = 0; + uint32_t size = 0; + + if (gmem) { + stride = cpp * gmem->bin_w; + size = stride * gmem->bin_h; + } else { + struct fd_resource_slice *slice = fd_resource_slice(rsc, 0); + stride = slice->pitch * rsc->cpp; + size = slice->size0; + } + + OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5); + OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); + if (gmem) { + OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ + } else { + OUT_RELOCW(ring, rsc->bo, 0, 0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */ + } + OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride)); + OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size)); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); + OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); + + OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3); + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */ + + if (rsc->lrz) { + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3); + OUT_RELOCW(ring, rsc->lrz, 0x1000, 0, 0); + OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch)); + + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2); + OUT_RELOCW(ring, rsc->lrz, 0, 0, 0); + } else { + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */ + + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + + if (rsc->stencil) { + if (gmem) { + stride = 1 * gmem->bin_w; + size = stride * gmem->bin_h; + } else { + struct fd_resource_slice *slice = fd_resource_slice(rsc->stencil, 0); + stride = slice->pitch * rsc->cpp; + size = slice->size0; + } + + OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5); + OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL); + if (gmem) { + OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_STENCIL_BASE_HI */ + } else { + OUT_RELOCW(ring, rsc->stencil->bo, 0, 0, 0); /* RB_STENCIL_BASE_LO/HI */ + } + OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride)); + OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size)); + } else { + OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1); + OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */ + } + } else { + OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5); + OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE)); + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */ + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); + OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE)); + + OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3); + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */ + + OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1); + OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */ + } +} + +static bool +use_hw_binning(struct fd_batch *batch) +{ + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + + if ((gmem->maxpw * gmem->maxph) > 32) + return false; + + if ((gmem->maxpw > 15) || (gmem->maxph > 15)) + return false; + + return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) && + (batch->num_draws > 0); +} + +static void +patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode) +{ + unsigned i; + for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i); + *patch->cs = patch->val | DRAW4(0, 0, 0, vismode); + } + util_dynarray_resize(&batch->draw_patches, 0); +} + +static void +update_vsc_pipe(struct fd_batch *batch) +{ + struct fd_context *ctx = batch->ctx; + struct fd5_context *fd5_ctx = fd5_context(ctx); + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + struct fd_ringbuffer *ring = batch->gmem; + int i; + + OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3); + OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | + A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); + OUT_RELOCW(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2); + OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */ + OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */ + + OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16); + for (i = 0; i < 16; i++) { + struct fd_vsc_pipe *pipe = &ctx->pipe[i]; + OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) | + A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) | + A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) | + A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h)); + } + + OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32); + for (i = 0; i < 16; i++) { + struct fd_vsc_pipe *pipe = &ctx->pipe[i]; + if (!pipe->bo) { + pipe->bo = fd_bo_new(ctx->dev, 0x20000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + } + OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */ + } + + OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16); + for (i = 0; i < 16; i++) { + struct fd_vsc_pipe *pipe = &ctx->pipe[i]; + OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */ + } +} + +static void +emit_binning_pass(struct fd_batch *batch) +{ + struct fd_context *ctx = batch->ctx; + struct fd_ringbuffer *ring = batch->gmem; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + + uint32_t x1 = gmem->minx; + uint32_t y1 = gmem->miny; + uint32_t x2 = gmem->minx + gmem->width - 1; + uint32_t y2 = gmem->miny + gmem->height - 1; + + fd5_set_render_mode(batch->ctx, ring, BINNING); + + OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); + OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) | + A5XX_RB_CNTL_HEIGHT(gmem->bin_h)); + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); + OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) | + A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1)); + OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) | + A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2)); + + OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2); + OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | + A5XX_RB_RESOLVE_CNTL_1_Y(y1)); + OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | + A5XX_RB_RESOLVE_CNTL_2_Y(y2)); + + update_vsc_pipe(batch); + + OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1); + OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, UNK_2C); + + OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1); + OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | + A5XX_RB_WINDOW_OFFSET_Y(0)); + + /* emit IB to binning drawcmds: */ + ctx->emit_ib(ring, batch->binning); + + fd_reset_wfi(batch); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, UNK_2D); + + OUT_PKT7(ring, CP_EVENT_WRITE, 4); + OUT_RING(ring, CACHE_FLUSH_TS); + OUT_RELOCW(ring, fd5_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */ + OUT_RING(ring, 0x00000000); + + // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??) + + fd_wfi(batch, ring); + + OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1); + OUT_RING(ring, 0x0); +} + +/* before first tile */ +static void +fd5_emit_tile_init(struct fd_batch *batch) +{ + struct fd_context *ctx = batch->ctx; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + + fd5_emit_restore(batch, ring); + + if (batch->lrz_clear) + ctx->emit_ib(ring, batch->lrz_clear); + + fd5_emit_lrz_flush(ring); + + OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1); + OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */ + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1); + OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ + + OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1); + OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ + + /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ + fd_wfi(batch, ring); + OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); + OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */ + + emit_zs(ring, pfb->zsbuf, &ctx->gmem); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, &ctx->gmem); + + if (use_hw_binning(batch)) { + emit_binning_pass(batch); + fd5_emit_lrz_flush(ring); + patch_draws(batch, USE_VISIBILITY); + } else { + patch_draws(batch, IGNORE_VISIBILITY); + } + + fd5_set_render_mode(batch->ctx, ring, GMEM); +} + +/* before mem2gmem */ +static void +fd5_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) +{ + struct fd_context *ctx = batch->ctx; + struct fd5_context *fd5_ctx = fd5_context(ctx); + struct fd_ringbuffer *ring = batch->gmem; + + uint32_t x1 = tile->xoff; + uint32_t y1 = tile->yoff; + uint32_t x2 = tile->xoff + tile->bin_w - 1; + uint32_t y2 = tile->yoff + tile->bin_h - 1; + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); + OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) | + A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1)); + OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) | + A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2)); + + OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2); + OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | + A5XX_RB_RESOLVE_CNTL_1_Y(y1)); + OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | + A5XX_RB_RESOLVE_CNTL_2_Y(y2)); + + if (use_hw_binning(batch)) { + struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p]; + + OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); + + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); + OUT_RING(ring, 0x0); + + OUT_PKT7(ring, CP_SET_BIN_DATA5, 5); + OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) | + CP_SET_BIN_DATA5_0_VSC_N(tile->n)); + OUT_RELOC(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */ + OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */ + (tile->p * 4), 0, 0); + } else { + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); + OUT_RING(ring, 0x1); + } + + OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1); + OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) | + A5XX_RB_WINDOW_OFFSET_Y(y1)); +} + + +/* + * transfer from system memory to gmem + */ + +static void +emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base, + struct pipe_surface *psurf, enum a5xx_blit_buf buf) +{ + struct fd_ringbuffer *ring = batch->gmem; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + struct fd_resource *rsc = fd_resource(psurf->texture); + uint32_t stride, size; + + debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); + + stride = gmem->bin_w * rsc->cpp; + size = stride * gmem->bin_h; + + OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4); + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */ + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */ + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */ + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */ + + OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5); + OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */ + OUT_RING(ring, base); /* RB_BLIT_DST_LO */ + OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */ + OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride)); + OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size)); + + OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); + OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf)); + + fd5_emit_blit(batch->ctx, ring); +} + +static void +fd5_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile) +{ + struct fd_ringbuffer *ring = batch->gmem; + struct fd_context *ctx = batch->ctx; + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + + /* + * setup mrt and zs with system memory base addresses: + */ + + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL); +// emit_zs(ring, pfb->zsbuf, NULL); + + OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); + OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) | + A5XX_RB_CNTL_HEIGHT(gmem->bin_h) | + A5XX_RB_CNTL_BYPASS); + + if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) { + unsigned i; + for (i = 0; i < pfb->nr_cbufs; i++) { + if (!pfb->cbufs[i]) + continue; + if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i))) + continue; + emit_mem2gmem_surf(batch, gmem->cbuf_base[i], + pfb->cbufs[i], BLIT_MRT0 + i); + } + } + + if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + // XXX BLIT_ZS vs BLIT_Z32 .. need some more cmdstream traces + // with z32_x24s8.. + + // XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't + // know otherwise how to go from linear in sysmem to tiled in gmem. + // possibly we want to flip this around gmem2mem and keep depth + // tiled in sysmem (and fixup sampler state to assume tiled).. this + // might be required for doing depth/stencil in bypass mode? + struct fd_resource_slice *slice = fd_resource_slice(rsc, 0); + enum a5xx_color_fmt format = + fd5_pipe2color(fd_gmem_restore_format(pfb->zsbuf->format)); + + OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5); + OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | + A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) | + A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX)); + OUT_RING(ring, A5XX_RB_MRT_PITCH(slice->pitch * rsc->cpp)); + OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(slice->size0)); + OUT_RELOCW(ring, rsc->bo, 0, 0, 0); /* BASE_LO/HI */ + + emit_mem2gmem_surf(batch, ctx->gmem.zsbuf_base[0], pfb->zsbuf, BLIT_MRT0); + } +} + + +/* before IB to rendering cmds: */ +static void +fd5_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile) +{ + struct fd_ringbuffer *ring = batch->gmem; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + + OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); + OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) | + A5XX_RB_CNTL_HEIGHT(gmem->bin_h)); + + emit_zs(ring, pfb->zsbuf, gmem); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem); + + // TODO MSAA + OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2); + OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); + OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | + A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE); + + OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2); + OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); + OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | + A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE); + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2); + OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); + OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | + A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE); +} + + +/* + * transfer from gmem to system memory (ie. normal RAM) + */ + +static void +emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base, + struct pipe_surface *psurf, enum a5xx_blit_buf buf) +{ + struct fd_ringbuffer *ring = batch->gmem; + struct fd_resource *rsc = fd_resource(psurf->texture); + struct fd_resource_slice *slice; + uint32_t offset; + + slice = fd_resource_slice(rsc, psurf->u.tex.level); + offset = fd_resource_offset(rsc, psurf->u.tex.level, + psurf->u.tex.first_layer); + + debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); + + OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4); + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */ + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */ + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */ + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */ + + OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5); + OUT_RING(ring, 0x00000004); /* XXX RB_RESOLVE_CNTL_3 */ + OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */ + OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(slice->pitch * rsc->cpp)); + OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0)); + + OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); + OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf)); + + fd5_emit_blit(batch->ctx, ring); +} + +static void +fd5_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) +{ + struct fd_context *ctx = batch->ctx; + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + + if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + // XXX BLIT_ZS vs BLIT_Z32 .. need some more cmdstream traces + // with z32_x24s8.. + if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) + emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS); + if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) + emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_ZS); + } + + if (batch->resolve & FD_BUFFER_COLOR) { + unsigned i; + for (i = 0; i < pfb->nr_cbufs; i++) { + if (!pfb->cbufs[i]) + continue; + if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i))) + continue; + emit_gmem2mem_surf(batch, gmem->cbuf_base[i], + pfb->cbufs[i], BLIT_MRT0 + i); + } + } +} + +static void +fd5_emit_tile_fini(struct fd_batch *batch) +{ + struct fd_ringbuffer *ring = batch->gmem; + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x0); + + fd5_emit_lrz_flush(ring); + + fd5_cache_flush(batch, ring); + fd5_set_render_mode(batch->ctx, ring, BYPASS); +} + +static void +fd5_emit_sysmem_prep(struct fd_batch *batch) +{ + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; + + fd5_emit_restore(batch, ring); + + fd5_emit_lrz_flush(ring); + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x0); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, UNK_19); + + OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1); + OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ + + OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1); + OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ + + /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ + fd_wfi(batch, ring); + OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); + OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */ + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); + OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | + A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0)); + OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) | + A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1)); + + OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2); + OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | + A5XX_RB_RESOLVE_CNTL_1_Y(0)); + OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) | + A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1)); + + OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1); + OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | + A5XX_RB_WINDOW_OFFSET_Y(0)); + + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); + OUT_RING(ring, 0x1); + + OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); + OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) | + A5XX_RB_CNTL_HEIGHT(0) | + A5XX_RB_CNTL_BYPASS); + + patch_draws(batch, IGNORE_VISIBILITY); + + emit_zs(ring, pfb->zsbuf, NULL); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL); + + // TODO MSAA + OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2); + OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); + OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | + A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE); + + OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2); + OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); + OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | + A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE); + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2); + OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); + OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | + A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE); +} + +static void +fd5_emit_sysmem_fini(struct fd_batch *batch) +{ + struct fd5_context *fd5_ctx = fd5_context(batch->ctx); + struct fd_ringbuffer *ring = batch->gmem; + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x0); + + fd5_emit_lrz_flush(ring); + + OUT_PKT7(ring, CP_EVENT_WRITE, 4); + OUT_RING(ring, UNK_1D); + OUT_RELOCW(ring, fd5_ctx->blit_mem, 0, 0, 0); /* ADDR_LO/HI */ + OUT_RING(ring, 0x00000000); +} + +void +fd5_gmem_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + ctx->emit_tile_init = fd5_emit_tile_init; + ctx->emit_tile_prep = fd5_emit_tile_prep; + ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem; + ctx->emit_tile_renderprep = fd5_emit_tile_renderprep; + ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem; + ctx->emit_tile_fini = fd5_emit_tile_fini; + ctx->emit_sysmem_prep = fd5_emit_sysmem_prep; + ctx->emit_sysmem_fini = fd5_emit_sysmem_fini; +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.h new file mode 100644 index 000000000..7794bfb33 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FD5_GMEM_H_ +#define FD5_GMEM_H_ + +#include "pipe/p_context.h" + +void fd5_gmem_init(struct pipe_context *pctx); + +#endif /* FD5_GMEM_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c new file mode 100644 index 000000000..aa4babdf5 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -0,0 +1,732 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/bitset.h" + +#include "freedreno_program.h" + +#include "fd5_program.h" +#include "fd5_emit.h" +#include "fd5_texture.h" +#include "fd5_format.h" + +static void +delete_shader_stateobj(struct fd5_shader_stateobj *so) +{ + ir3_shader_destroy(so->shader); + free(so); +} + +static struct fd5_shader_stateobj * +create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso, + enum shader_t type) +{ + struct fd_context *ctx = fd_context(pctx); + struct ir3_compiler *compiler = ctx->screen->compiler; + struct fd5_shader_stateobj *so = CALLOC_STRUCT(fd5_shader_stateobj); + so->shader = ir3_shader_create(compiler, cso, type, &ctx->debug); + return so; +} + +static void * +fd5_fp_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT); +} + +static void +fd5_fp_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct fd5_shader_stateobj *so = hwcso; + delete_shader_stateobj(so); +} + +static void * +fd5_vp_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + return create_shader_stateobj(pctx, cso, SHADER_VERTEX); +} + +static void +fd5_vp_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct fd5_shader_stateobj *so = hwcso; + delete_shader_stateobj(so); +} + +void +fd5_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) +{ + const struct ir3_info *si = &so->info; + enum a4xx_state_block sb = fd4_stage2shadersb(so->type); + enum a4xx_state_src src; + uint32_t i, sz, *bin; + + if (fd_mesa_debug & FD_DBG_DIRECT) { + sz = si->sizedwords; + src = SS4_DIRECT; + bin = fd_bo_map(so->bo); + } else { + sz = 0; + src = SS4_INDIRECT; + bin = NULL; + } + + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + sz); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(src) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(so->instrlen)); + if (bin) { + OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + } else { + OUT_RELOC(ring, so->bo, 0, + CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER), 0); + } + + /* for how clever coverity is, it is sometimes rather dull, and + * doesn't realize that the only case where bin==NULL, sz==0: + */ + assume(bin || (sz == 0)); + + for (i = 0; i < sz; i++) { + OUT_RING(ring, bin[i]); + } +} + +/* Add any missing varyings needed for stream-out. Otherwise varyings not + * used by fragment shader will be stripped out. + */ +static void +link_stream_out(struct ir3_shader_linkage *l, const struct ir3_shader_variant *v) +{ + const struct pipe_stream_output_info *strmout = &v->shader->stream_output; + + /* + * First, any stream-out varyings not already in linkage map (ie. also + * consumed by frag shader) need to be added: + */ + for (unsigned i = 0; i < strmout->num_outputs; i++) { + const struct pipe_stream_output *out = &strmout->output[i]; + unsigned k = out->register_index; + unsigned compmask = + (1 << (out->num_components + out->start_component)) - 1; + unsigned idx, nextloc = 0; + + /* psize/pos need to be the last entries in linkage map, and will + * get added link_stream_out, so skip over them: + */ + if ((v->outputs[k].slot == VARYING_SLOT_PSIZ) || + (v->outputs[k].slot == VARYING_SLOT_POS)) + continue; + + for (idx = 0; idx < l->cnt; idx++) { + if (l->var[idx].regid == v->outputs[k].regid) + break; + nextloc = MAX2(nextloc, l->var[idx].loc + 4); + } + + /* add if not already in linkage map: */ + if (idx == l->cnt) + ir3_link_add(l, v->outputs[k].regid, compmask, nextloc); + + /* expand component-mask if needed, ie streaming out all components + * but frag shader doesn't consume all components: + */ + if (compmask & ~l->var[idx].compmask) { + l->var[idx].compmask |= compmask; + l->max_loc = MAX2(l->max_loc, + l->var[idx].loc + util_last_bit(l->var[idx].compmask)); + } + } +} + +/* TODO maybe some of this we could pre-compute once rather than having + * so much draw-time logic? + */ +static void +emit_stream_out(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, + struct ir3_shader_linkage *l) +{ + const struct pipe_stream_output_info *strmout = &v->shader->stream_output; + unsigned ncomp[PIPE_MAX_SO_BUFFERS] = {0}; + unsigned prog[align(l->max_loc, 2) / 2]; + + memset(prog, 0, sizeof(prog)); + + for (unsigned i = 0; i < strmout->num_outputs; i++) { + const struct pipe_stream_output *out = &strmout->output[i]; + unsigned k = out->register_index; + unsigned idx; + + ncomp[out->output_buffer] += out->num_components; + + /* linkage map sorted by order frag shader wants things, so + * a bit less ideal here.. + */ + for (idx = 0; idx < l->cnt; idx++) + if (l->var[idx].regid == v->outputs[k].regid) + break; + + debug_assert(idx < l->cnt); + + for (unsigned j = 0; j < out->num_components; j++) { + unsigned c = j + out->start_component; + unsigned loc = l->var[idx].loc + c; + unsigned off = j + out->dst_offset; /* in dwords */ + + if (loc & 1) { + prog[loc/2] |= A5XX_VPC_SO_PROG_B_EN | + A5XX_VPC_SO_PROG_B_BUF(out->output_buffer) | + A5XX_VPC_SO_PROG_B_OFF(off * 4); + } else { + prog[loc/2] |= A5XX_VPC_SO_PROG_A_EN | + A5XX_VPC_SO_PROG_A_BUF(out->output_buffer) | + A5XX_VPC_SO_PROG_A_OFF(off * 4); + } + } + } + + OUT_PKT7(ring, CP_CONTEXT_REG_BUNCH, 12 + (2 * ARRAY_SIZE(prog))); + OUT_RING(ring, REG_A5XX_VPC_SO_BUF_CNTL); + OUT_RING(ring, A5XX_VPC_SO_BUF_CNTL_ENABLE | + COND(ncomp[0] > 0, A5XX_VPC_SO_BUF_CNTL_BUF0) | + COND(ncomp[1] > 0, A5XX_VPC_SO_BUF_CNTL_BUF1) | + COND(ncomp[2] > 0, A5XX_VPC_SO_BUF_CNTL_BUF2) | + COND(ncomp[3] > 0, A5XX_VPC_SO_BUF_CNTL_BUF3)); + OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(0)); + OUT_RING(ring, ncomp[0]); + OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(1)); + OUT_RING(ring, ncomp[1]); + OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(2)); + OUT_RING(ring, ncomp[2]); + OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(3)); + OUT_RING(ring, ncomp[3]); + OUT_RING(ring, REG_A5XX_VPC_SO_CNTL); + OUT_RING(ring, A5XX_VPC_SO_CNTL_ENABLE); + for (unsigned i = 0; i < ARRAY_SIZE(prog); i++) { + OUT_RING(ring, REG_A5XX_VPC_SO_PROG); + OUT_RING(ring, prog[i]); + } +} + +struct stage { + const struct ir3_shader_variant *v; + const struct ir3_info *i; + /* const sizes are in units of 4 * vec4 */ + uint8_t constoff; + uint8_t constlen; + /* instr sizes are in units of 16 instructions */ + uint8_t instroff; + uint8_t instrlen; +}; + +enum { + VS = 0, + FS = 1, + HS = 2, + DS = 3, + GS = 4, + MAX_STAGES +}; + +static void +setup_stages(struct fd5_emit *emit, struct stage *s) +{ + unsigned i; + + s[VS].v = fd5_emit_get_vp(emit); + s[FS].v = fd5_emit_get_fp(emit); + + s[HS].v = s[DS].v = s[GS].v = NULL; /* for now */ + + for (i = 0; i < MAX_STAGES; i++) { + if (s[i].v) { + s[i].i = &s[i].v->info; + /* constlen is in units of 4 * vec4: */ + s[i].constlen = align(s[i].v->constlen, 4) / 4; + /* instrlen is already in units of 16 instr.. although + * probably we should ditch that and not make the compiler + * care about instruction group size of a3xx vs a5xx + */ + s[i].instrlen = s[i].v->instrlen; + } else { + s[i].i = NULL; + s[i].constlen = 0; + s[i].instrlen = 0; + } + } + + /* NOTE: at least for gles2, blob partitions VS at bottom of const + * space and FS taking entire remaining space. We probably don't + * need to do that the same way, but for now mimic what the blob + * does to make it easier to diff against register values from blob + * + * NOTE: if VS.instrlen + FS.instrlen > 64, then one or both shaders + * is run from external memory. + */ + if ((s[VS].instrlen + s[FS].instrlen) > 64) { + /* prioritize FS for internal memory: */ + if (s[FS].instrlen < 64) { + /* if FS can fit, kick VS out to external memory: */ + s[VS].instrlen = 0; + } else if (s[VS].instrlen < 64) { + /* otherwise if VS can fit, kick out FS: */ + s[FS].instrlen = 0; + } else { + /* neither can fit, run both from external memory: */ + s[VS].instrlen = 0; + s[FS].instrlen = 0; + } + } + + unsigned constoff = 0; + for (i = 0; i < MAX_STAGES; i++) { + s[i].constoff = constoff; + constoff += s[i].constlen; + } + + s[VS].instroff = 0; + s[FS].instroff = 64 - s[FS].instrlen; + s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff; +} + +void +fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd5_emit *emit) +{ + struct stage s[MAX_STAGES]; + uint32_t pos_regid, psize_regid, color_regid[8]; + uint32_t face_regid, coord_regid, zwcoord_regid; + uint32_t vcoord_regid, vertex_regid, instance_regid; + enum a3xx_threadsize fssz; + uint8_t psize_loc = ~0; + int i, j; + + setup_stages(emit, s); + + fssz = (s[FS].i->max_reg >= 24) ? TWO_QUADS : FOUR_QUADS; + + pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS); + psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ); + vertex_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_VERTEX_ID); + instance_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_INSTANCE_ID); + + if (s[FS].v->color0_mrt) { + color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = + color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] = + ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR); + } else { + color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0); + color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1); + color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2); + color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3); + color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4); + color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5); + color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6); + color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); + } + + /* TODO get these dynamically: */ + face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); + coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); + zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); + vcoord_regid = (s[FS].v->total_in > 0) ? s[FS].v->pos_regid : regid(63,0); + + /* we could probably divide this up into things that need to be + * emitted if frag-prog is dirty vs if vert-prog is dirty.. + */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONFIG, 5); + OUT_RING(ring, A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) | + A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET(s[VS].instroff) | + COND(s[VS].v, A5XX_HLSQ_VS_CONFIG_ENABLED)); + OUT_RING(ring, A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET(s[FS].constoff) | + A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET(s[FS].instroff) | + COND(s[FS].v, A5XX_HLSQ_FS_CONFIG_ENABLED)); + OUT_RING(ring, A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET(s[HS].constoff) | + A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET(s[HS].instroff) | + COND(s[HS].v, A5XX_HLSQ_HS_CONFIG_ENABLED)); + OUT_RING(ring, A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET(s[DS].constoff) | + A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET(s[DS].instroff) | + COND(s[DS].v, A5XX_HLSQ_DS_CONFIG_ENABLED)); + OUT_RING(ring, A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET(s[GS].constoff) | + A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET(s[GS].instroff) | + COND(s[GS].v, A5XX_HLSQ_GS_CONFIG_ENABLED)); + + OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5); + OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen) | + COND(s[VS].v && s[VS].v->has_ssbo, A5XX_HLSQ_VS_CNTL_SSBO_ENABLE)); + OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen) | + COND(s[FS].v && s[FS].v->has_ssbo, A5XX_HLSQ_FS_CNTL_SSBO_ENABLE)); + OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen) | + COND(s[HS].v && s[HS].v->has_ssbo, A5XX_HLSQ_HS_CNTL_SSBO_ENABLE)); + OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen) | + COND(s[DS].v && s[DS].v->has_ssbo, A5XX_HLSQ_DS_CNTL_SSBO_ENABLE)); + OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen) | + COND(s[GS].v && s[GS].v->has_ssbo, A5XX_HLSQ_GS_CNTL_SSBO_ENABLE)); + + OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG, 5); + OUT_RING(ring, A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) | + A5XX_SP_VS_CONFIG_SHADEROBJOFFSET(s[VS].instroff) | + COND(s[VS].v, A5XX_SP_VS_CONFIG_ENABLED)); + OUT_RING(ring, A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET(s[FS].constoff) | + A5XX_SP_FS_CONFIG_SHADEROBJOFFSET(s[FS].instroff) | + COND(s[FS].v, A5XX_SP_FS_CONFIG_ENABLED)); + OUT_RING(ring, A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET(s[HS].constoff) | + A5XX_SP_HS_CONFIG_SHADEROBJOFFSET(s[HS].instroff) | + COND(s[HS].v, A5XX_SP_HS_CONFIG_ENABLED)); + OUT_RING(ring, A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET(s[DS].constoff) | + A5XX_SP_DS_CONFIG_SHADEROBJOFFSET(s[DS].instroff) | + COND(s[DS].v, A5XX_SP_DS_CONFIG_ENABLED)); + OUT_RING(ring, A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET(s[GS].constoff) | + A5XX_SP_GS_CONFIG_SHADEROBJOFFSET(s[GS].instroff) | + COND(s[GS].v, A5XX_SP_GS_CONFIG_ENABLED)); + + OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONSTLEN, 2); + OUT_RING(ring, s[VS].constlen); /* HLSQ_VS_CONSTLEN */ + OUT_RING(ring, s[VS].instrlen); /* HLSQ_VS_INSTRLEN */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_FS_CONSTLEN, 2); + OUT_RING(ring, s[FS].constlen); /* HLSQ_FS_CONSTLEN */ + OUT_RING(ring, s[FS].instrlen); /* HLSQ_FS_INSTRLEN */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_HS_CONSTLEN, 2); + OUT_RING(ring, s[HS].constlen); /* HLSQ_HS_CONSTLEN */ + OUT_RING(ring, s[HS].instrlen); /* HLSQ_HS_INSTRLEN */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_DS_CONSTLEN, 2); + OUT_RING(ring, s[DS].constlen); /* HLSQ_DS_CONSTLEN */ + OUT_RING(ring, s[DS].instrlen); /* HLSQ_DS_INSTRLEN */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_GS_CONSTLEN, 2); + OUT_RING(ring, s[GS].constlen); /* HLSQ_GS_CONSTLEN */ + OUT_RING(ring, s[GS].instrlen); /* HLSQ_GS_INSTRLEN */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONSTLEN, 2); + OUT_RING(ring, 0x00000000); /* HLSQ_CS_CONSTLEN */ + OUT_RING(ring, 0x00000000); /* HLSQ_CS_INSTRLEN */ + + OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1); + OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | + A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | + 0x6 | /* XXX seems to be always set? */ + A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. + COND(s[VS].v->has_samp, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); + + struct ir3_shader_linkage l = {0}; + ir3_link_shaders(&l, s[VS].v, s[FS].v); + + if ((s[VS].v->shader->stream_output.num_outputs > 0) && + !emit->key.binning_pass) + link_stream_out(&l, s[VS].v); + + BITSET_DECLARE(varbs, 128) = {0}; + uint32_t *varmask = (uint32_t *)varbs; + + for (i = 0; i < l.cnt; i++) + for (j = 0; j < util_last_bit(l.var[i].compmask); j++) + BITSET_SET(varbs, l.var[i].loc + j); + + OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4); + OUT_RING(ring, ~varmask[0]); /* VPC_VAR[0].DISABLE */ + OUT_RING(ring, ~varmask[1]); /* VPC_VAR[1].DISABLE */ + OUT_RING(ring, ~varmask[2]); /* VPC_VAR[2].DISABLE */ + OUT_RING(ring, ~varmask[3]); /* VPC_VAR[3].DISABLE */ + + /* a5xx appends pos/psize to end of the linkage map: */ + if (pos_regid != regid(63,0)) + ir3_link_add(&l, pos_regid, 0xf, l.max_loc); + + if (psize_regid != regid(63,0)) { + psize_loc = l.max_loc; + ir3_link_add(&l, psize_regid, 0x1, l.max_loc); + } + + if ((s[VS].v->shader->stream_output.num_outputs > 0) && + !emit->key.binning_pass) { + emit_stream_out(ring, s[VS].v, &l); + + OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1); + OUT_RING(ring, 0x00000000); + } else { + OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1); + OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE); + } + + for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { + uint32_t reg = 0; + + OUT_PKT4(ring, REG_A5XX_SP_VS_OUT_REG(i), 1); + + reg |= A5XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid); + reg |= A5XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask); + j++; + + reg |= A5XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid); + reg |= A5XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask); + j++; + + OUT_RING(ring, reg); + } + + for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) { + uint32_t reg = 0; + + OUT_PKT4(ring, REG_A5XX_SP_VS_VPC_DST_REG(i), 1); + + reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc); + reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc); + reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc); + reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc); + + OUT_RING(ring, reg); + } + + OUT_PKT4(ring, REG_A5XX_SP_VS_OBJ_START_LO, 2); + OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_LO/HI */ + + if (s[VS].instrlen) + fd5_emit_shader(ring, s[VS].v); + + // TODO depending on other bits in this reg (if any) set somewhere else? + OUT_PKT4(ring, REG_A5XX_PC_PRIM_VTX_CNTL, 1); + OUT_RING(ring, COND(s[VS].v->writes_psize, A5XX_PC_PRIM_VTX_CNTL_PSIZE)); + + OUT_PKT4(ring, REG_A5XX_SP_PRIMITIVE_CNTL, 1); + OUT_RING(ring, A5XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt)); + + OUT_PKT4(ring, REG_A5XX_VPC_CNTL_0, 1); + OUT_RING(ring, A5XX_VPC_CNTL_0_STRIDE_IN_VPC(l.max_loc) | + COND(s[FS].v->total_in > 0, A5XX_VPC_CNTL_0_VARYING) | + COND(s[FS].v->frag_coord, A5XX_VPC_CNTL_0_VARYING) | + 0x10000); // XXX + + fd5_context(ctx)->max_loc = l.max_loc; + + if (emit->key.binning_pass) { + OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2); + OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_LO */ + OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_HI */ + } else { + OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2); + OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_LO/HI */ + } + + OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5); + OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) | + A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(TWO_QUADS) | + 0x00000880); /* XXX HLSQ_CONTROL_0 */ + OUT_RING(ring, A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(63)); + OUT_RING(ring, A5XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) | + 0xfcfcfc00); /* XXX */ + OUT_RING(ring, A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(vcoord_regid) | + 0xfcfcfc00); /* XXX */ + OUT_RING(ring, A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) | + A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) | + 0x0000fcfc); /* XXX */ + + OUT_PKT4(ring, REG_A5XX_SP_FS_CTRL_REG0, 1); + OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) | + COND(s[FS].v->frag_coord, A5XX_SP_FS_CTRL_REG0_VARYING) | + 0x40006 | /* XXX set pretty much everywhere */ + A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | + A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | + A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | + A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. + COND(s[FS].v->has_samp, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE)); + + OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); + OUT_RING(ring, 0x020fffff); /* XXX */ + + OUT_PKT4(ring, REG_A5XX_VPC_GS_SIV_CNTL, 1); + OUT_RING(ring, 0x0000ffff); /* XXX */ + + OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1); + OUT_RING(ring, 0x00000010); /* XXX */ + + OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1); + OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_GRAS_CNTL_VARYING) | + COND(s[FS].v->frag_coord, A5XX_GRAS_CNTL_XCOORD | + A5XX_GRAS_CNTL_YCOORD | + A5XX_GRAS_CNTL_ZCOORD | + A5XX_GRAS_CNTL_WCOORD | + A5XX_GRAS_CNTL_UNK3) | + COND(s[FS].v->frag_face, A5XX_GRAS_CNTL_UNK3)); + + OUT_PKT4(ring, REG_A5XX_RB_RENDER_CONTROL0, 2); + OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_RB_RENDER_CONTROL0_VARYING) | + COND(s[FS].v->frag_coord, A5XX_RB_RENDER_CONTROL0_XCOORD | + A5XX_RB_RENDER_CONTROL0_YCOORD | + A5XX_RB_RENDER_CONTROL0_ZCOORD | + A5XX_RB_RENDER_CONTROL0_WCOORD | + A5XX_RB_RENDER_CONTROL0_UNK3) | + COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL0_UNK3)); + OUT_RING(ring, COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL1_FACENESS)); + + OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_REG(0), 8); + for (i = 0; i < 8; i++) { + OUT_RING(ring, A5XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) | + COND(emit->key.half_precision, + A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION)); + } + + + OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1); + OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(s[FS].v->total_in) | + A5XX_VPC_PACK_PSIZELOC(psize_loc)); + + if (!emit->key.binning_pass) { + uint32_t vinterp[8], vpsrepl[8]; + + memset(vinterp, 0, sizeof(vinterp)); + memset(vpsrepl, 0, sizeof(vpsrepl)); + + /* looks like we need to do int varyings in the frag + * shader on a5xx (no flatshad reg? or a420.0 bug?): + * + * (sy)(ss)nop + * (sy)ldlv.u32 r0.x,l[r0.x], 1 + * ldlv.u32 r0.y,l[r0.x+1], 1 + * (ss)bary.f (ei)r63.x, 0, r0.x + * (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x + * (rpt5)nop + * sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0 + * + * Possibly on later a5xx variants we'll be able to use + * something like the code below instead of workaround + * in the shader: + */ + /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */ + for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) { + /* NOTE: varyings are packed, so if compmask is 0xb + * then first, third, and fourth component occupy + * three consecutive varying slots: + */ + unsigned compmask = s[FS].v->inputs[j].compmask; + + uint32_t inloc = s[FS].v->inputs[j].inloc; + + if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) || + (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) { + uint32_t loc = inloc; + + for (i = 0; i < 4; i++) { + if (compmask & (1 << i)) { + vinterp[loc / 16] |= 1 << ((loc % 16) * 2); + //flatshade[loc / 32] |= 1 << (loc % 32); + loc++; + } + } + } + + gl_varying_slot slot = s[FS].v->inputs[j].slot; + + /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */ + if (slot >= VARYING_SLOT_VAR0) { + unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0); + /* Replace the .xy coordinates with S/T from the point sprite. Set + * interpolation bits for .zw such that they become .01 + */ + if (emit->sprite_coord_enable & texmask) { + /* mask is two 2-bit fields, where: + * '01' -> S + * '10' -> T + * '11' -> 1 - T (flip mode) + */ + unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001; + uint32_t loc = inloc; + if (compmask & 0x1) { + vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x2) { + vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x4) { + /* .z <- 0.0f */ + vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x8) { + /* .w <- 1.0f */ + vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2); + loc++; + } + } + } + } + + OUT_PKT4(ring, REG_A5XX_VPC_VARYING_INTERP_MODE(0), 8); + for (i = 0; i < 8; i++) + OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */ + + OUT_PKT4(ring, REG_A5XX_VPC_VARYING_PS_REPL_MODE(0), 8); + for (i = 0; i < 8; i++) + OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */ + } + + if (!emit->key.binning_pass) + if (s[FS].instrlen) + fd5_emit_shader(ring, s[FS].v); + + OUT_PKT4(ring, REG_A5XX_VFD_CONTROL_1, 5); + OUT_RING(ring, A5XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) | + A5XX_VFD_CONTROL_1_REGID4INST(instance_regid) | + 0xfc0000); + OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_2 */ + OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_3 */ + OUT_RING(ring, 0x000000fc); /* VFD_CONTROL_4 */ + OUT_RING(ring, 0x00000000); /* VFD_CONTROL_5 */ +} + +void +fd5_prog_init(struct pipe_context *pctx) +{ + pctx->create_fs_state = fd5_fp_state_create; + pctx->delete_fs_state = fd5_fp_state_delete; + + pctx->create_vs_state = fd5_vp_state_create; + pctx->delete_vs_state = fd5_vp_state_delete; + + fd_prog_init(pctx); +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.h new file mode 100644 index 000000000..585263e0a --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FD5_PROGRAM_H_ +#define FD5_PROGRAM_H_ + +#include "pipe/p_context.h" +#include "freedreno_context.h" +#include "ir3_shader.h" + +struct fd5_shader_stateobj { + struct ir3_shader *shader; +}; + +struct fd5_emit; + +void fd5_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so); + +void fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd5_emit *emit); + +void fd5_prog_init(struct pipe_context *pctx); + +#endif /* FD5_PROGRAM_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c new file mode 100644 index 000000000..80b84ce54 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c @@ -0,0 +1,253 @@ +/* + * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +/* NOTE: see https://github.com/freedreno/freedreno/wiki/A5xx-Queries */ + +#include "freedreno_query_acc.h" +#include "freedreno_resource.h" + +#include "fd5_context.h" +#include "fd5_format.h" +#include "fd5_query.h" + +struct PACKED fd5_query_sample { + uint64_t start; + uint64_t result; + uint64_t stop; +}; + +#define query_sample(aq, field) \ + fd_resource((aq)->prsc)->bo, \ + offsetof(struct fd5_query_sample, field), \ + 0, 0 + +/* + * Occlusion Query: + * + * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they + * interpret results + */ + +static void +occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_ringbuffer *ring = batch->draw; + + OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1); + OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY); + + OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2); + OUT_RELOCW(ring, query_sample(aq, start)); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, ZPASS_DONE); + fd_reset_wfi(batch); + + fd5_context(batch->ctx)->samples_passed_queries++; +} + +static void +occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_ringbuffer *ring = batch->draw; + + OUT_PKT7(ring, CP_MEM_WRITE, 4); + OUT_RELOCW(ring, query_sample(aq, stop)); + OUT_RING(ring, 0xffffffff); + OUT_RING(ring, 0xffffffff); + + OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0); + + OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1); + OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY); + + OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2); + OUT_RELOCW(ring, query_sample(aq, stop)); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, ZPASS_DONE); + fd_reset_wfi(batch); + + OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); + OUT_RING(ring, 0x00000014); // XXX + OUT_RELOC(ring, query_sample(aq, stop)); + OUT_RING(ring, 0xffffffff); + OUT_RING(ring, 0xffffffff); + OUT_RING(ring, 0x00000010); // XXX + + /* result += stop - start: */ + OUT_PKT7(ring, CP_MEM_TO_MEM, 9); + OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | + CP_MEM_TO_MEM_0_NEG_C); + OUT_RELOCW(ring, query_sample(aq, result)); /* dst */ + OUT_RELOC(ring, query_sample(aq, result)); /* srcA */ + OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ + OUT_RELOC(ring, query_sample(aq, start)); /* srcC */ + + fd5_context(batch->ctx)->samples_passed_queries--; +} + +static void +occlusion_counter_result(struct fd_context *ctx, void *buf, + union pipe_query_result *result) +{ + struct fd5_query_sample *sp = buf; + result->u64 = sp->result; +} + +static void +occlusion_predicate_result(struct fd_context *ctx, void *buf, + union pipe_query_result *result) +{ + struct fd5_query_sample *sp = buf; + result->b = !!sp->result; +} + +static const struct fd_acc_sample_provider occlusion_counter = { + .query_type = PIPE_QUERY_OCCLUSION_COUNTER, + .active = FD_STAGE_DRAW, + .size = sizeof(struct fd5_query_sample), + .resume = occlusion_resume, + .pause = occlusion_pause, + .result = occlusion_counter_result, +}; + +static const struct fd_acc_sample_provider occlusion_predicate = { + .query_type = PIPE_QUERY_OCCLUSION_PREDICATE, + .active = FD_STAGE_DRAW, + .size = sizeof(struct fd5_query_sample), + .resume = occlusion_resume, + .pause = occlusion_pause, + .result = occlusion_predicate_result, +}; + +/* + * Timestamp Queries: + */ + +static void +timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_ringbuffer *ring = batch->draw; + + OUT_PKT7(ring, CP_EVENT_WRITE, 4); + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) | + CP_EVENT_WRITE_0_TIMESTAMP); + OUT_RELOCW(ring, query_sample(aq, start)); + OUT_RING(ring, 0x00000000); + + fd_reset_wfi(batch); +} + +static void +timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_ringbuffer *ring = batch->draw; + + OUT_PKT7(ring, CP_EVENT_WRITE, 4); + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) | + CP_EVENT_WRITE_0_TIMESTAMP); + OUT_RELOCW(ring, query_sample(aq, stop)); + OUT_RING(ring, 0x00000000); + + fd_reset_wfi(batch); + fd_wfi(batch, ring); + + /* result += stop - start: */ + OUT_PKT7(ring, CP_MEM_TO_MEM, 9); + OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | + CP_MEM_TO_MEM_0_NEG_C); + OUT_RELOCW(ring, query_sample(aq, result)); /* dst */ + OUT_RELOC(ring, query_sample(aq, result)); /* srcA */ + OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ + OUT_RELOC(ring, query_sample(aq, start)); /* srcC */ +} + +static uint64_t +ticks_to_ns(struct fd_context *ctx, uint32_t ts) +{ + /* This is based on the 19.2MHz always-on rbbm timer. + * + * TODO we should probably query this value from kernel.. + */ + return ts * (1000000000 / 19200000); +} + +static void +time_elapsed_accumulate_result(struct fd_context *ctx, void *buf, + union pipe_query_result *result) +{ + struct fd5_query_sample *sp = buf; + result->u64 = ticks_to_ns(ctx, sp->result); +} + +static void +timestamp_accumulate_result(struct fd_context *ctx, void *buf, + union pipe_query_result *result) +{ + struct fd5_query_sample *sp = buf; + result->u64 = ticks_to_ns(ctx, sp->result); +} + +static const struct fd_acc_sample_provider time_elapsed = { + .query_type = PIPE_QUERY_TIME_ELAPSED, + .active = FD_STAGE_DRAW | FD_STAGE_CLEAR, + .size = sizeof(struct fd5_query_sample), + .resume = timestamp_resume, + .pause = timestamp_pause, + .result = time_elapsed_accumulate_result, +}; + +/* NOTE: timestamp query isn't going to give terribly sensible results + * on a tiler. But it is needed by qapitrace profile heatmap. If you + * add in a binning pass, the results get even more non-sensical. So + * we just return the timestamp on the first tile and hope that is + * kind of good enough. + */ + +static const struct fd_acc_sample_provider timestamp = { + .query_type = PIPE_QUERY_TIMESTAMP, + .active = FD_STAGE_ALL, + .size = sizeof(struct fd5_query_sample), + .resume = timestamp_resume, + .pause = timestamp_pause, + .result = timestamp_accumulate_result, +}; + +void +fd5_query_context_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + ctx->create_query = fd_acc_create_query; + ctx->query_set_stage = fd_acc_query_set_stage; + + fd_acc_query_register_provider(pctx, &occlusion_counter); + fd_acc_query_register_provider(pctx, &occlusion_predicate); + + fd_acc_query_register_provider(pctx, &time_elapsed); + fd_acc_query_register_provider(pctx, ×tamp); +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.h new file mode 100644 index 000000000..2e563b0d5 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FD5_QUERY_H_ +#define FD5_QUERY_H_ + +#include "pipe/p_context.h" + +void fd5_query_context_init(struct pipe_context *pctx); + +#endif /* FD5_QUERY_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.c new file mode 100644 index 000000000..2bbcbf2d6 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.c @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd5_rasterizer.h" +#include "fd5_context.h" +#include "fd5_format.h" + +void * +fd5_rasterizer_state_create(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso) +{ + struct fd5_rasterizer_stateobj *so; + float psize_min, psize_max; + + so = CALLOC_STRUCT(fd5_rasterizer_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + if (cso->point_size_per_vertex) { + psize_min = util_get_min_point_size(cso); + psize_max = 4092; + } else { + /* Force the point size to be as if the vertex output was disabled. */ + psize_min = cso->point_size; + psize_max = cso->point_size; + } + + so->gras_su_point_minmax = + A5XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) | + A5XX_GRAS_SU_POINT_MINMAX_MAX(psize_max); + so->gras_su_point_size = A5XX_GRAS_SU_POINT_SIZE(cso->point_size); + so->gras_su_poly_offset_scale = + A5XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale); + so->gras_su_poly_offset_offset = + A5XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units); + so->gras_su_poly_offset_clamp = + A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp); + + so->gras_su_cntl = + A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(cso->line_width/2.0); + so->pc_raster_cntl = + A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) | + A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back)); + + if (cso->fill_front != PIPE_POLYGON_MODE_FILL || + cso->fill_back != PIPE_POLYGON_MODE_FILL) + so->pc_raster_cntl |= A5XX_PC_RASTER_CNTL_POLYMODE_ENABLE; + + if (cso->cull_face & PIPE_FACE_FRONT) + so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_FRONT; + if (cso->cull_face & PIPE_FACE_BACK) + so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_BACK; + if (!cso->front_ccw) + so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_FRONT_CW; + if (cso->offset_tri) + so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_POLY_OFFSET; + + if (!cso->flatshade_first) + so->pc_primitive_cntl |= A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST; + +// if (!cso->depth_clip) +// so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE | +// A5XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE; + if (cso->clip_halfz) + so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z; + + return so; +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.h new file mode 100644 index 000000000..b59758153 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FD5_RASTERIZER_H_ +#define FD5_RASTERIZER_H_ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +struct fd5_rasterizer_stateobj { + struct pipe_rasterizer_state base; + + uint32_t gras_su_point_minmax; + uint32_t gras_su_point_size; + uint32_t gras_su_poly_offset_scale; + uint32_t gras_su_poly_offset_offset; + uint32_t gras_su_poly_offset_clamp; + + uint32_t gras_su_cntl; + uint32_t gras_cl_clip_cntl; + uint32_t pc_primitive_cntl; + uint32_t pc_raster_cntl; +}; + +static inline struct fd5_rasterizer_stateobj * +fd5_rasterizer_stateobj(struct pipe_rasterizer_state *rast) +{ + return (struct fd5_rasterizer_stateobj *)rast; +} + +void * fd5_rasterizer_state_create(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso); + +#endif /* FD5_RASTERIZER_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c new file mode 100644 index 000000000..96f83ed33 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#include "pipe/p_screen.h" +#include "util/u_format.h" + +#include "fd5_screen.h" +#include "fd5_context.h" +#include "fd5_format.h" +#include "ir3_compiler.h" + +static boolean +fd5_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + unsigned retval = 0; + + if ((target >= PIPE_MAX_TEXTURE_TYPES) || + (sample_count > 1) || /* TODO add MSAA */ + !util_format_is_supported(format, usage)) { + DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x", + util_format_name(format), target, sample_count, usage); + return FALSE; + } + + if ((usage & PIPE_BIND_VERTEX_BUFFER) && + (fd5_pipe2vtx(format) != (enum a5xx_vtx_fmt)~0)) { + retval |= PIPE_BIND_VERTEX_BUFFER; + } + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + (target == PIPE_BUFFER || + util_format_get_blocksize(format) != 12) && + (fd5_pipe2tex(format) != (enum a5xx_tex_fmt)~0)) { + retval |= PIPE_BIND_SAMPLER_VIEW; + } + + if ((usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) && + (fd5_pipe2color(format) != (enum a5xx_color_fmt)~0) && + (fd5_pipe2tex(format) != (enum a5xx_tex_fmt)~0)) { + retval |= usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED); + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + (fd5_pipe2depth(format) != (enum a5xx_depth_format)~0) && + (fd5_pipe2tex(format) != (enum a5xx_tex_fmt)~0)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_INDEX_BUFFER) && + (fd_pipe2index(format) != (enum pc_di_index_size)~0)) { + retval |= PIPE_BIND_INDEX_BUFFER; + } + + if (retval != usage) { + DBG("not supported: format=%s, target=%d, sample_count=%d, " + "usage=%x, retval=%x", util_format_name(format), + target, sample_count, usage, retval); + } + + return retval == usage; +} + +void +fd5_screen_init(struct pipe_screen *pscreen) +{ + struct fd_screen *screen = fd_screen(pscreen); + screen->max_rts = A5XX_MAX_RENDER_TARGETS; + screen->compiler = ir3_compiler_create(screen->dev, screen->gpu_id); + pscreen->context_create = fd5_context_create; + pscreen->is_format_supported = fd5_screen_is_format_supported; +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.h new file mode 100644 index 000000000..ba0c7f15f --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FD5_SCREEN_H_ +#define FD5_SCREEN_H_ + +#include "pipe/p_screen.h" + +void fd5_screen_init(struct pipe_screen *pscreen); + +#endif /* FD5_SCREEN_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_texture.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_texture.c new file mode 100644 index 000000000..87b69ea1c --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_texture.c @@ -0,0 +1,348 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "fd5_texture.h" +#include "fd5_format.h" + +static enum a5xx_tex_clamp +tex_clamp(unsigned wrap, bool clamp_to_edge, bool *needs_border) +{ + /* Hardware does not support _CLAMP, but we emulate it: */ + if (wrap == PIPE_TEX_WRAP_CLAMP) { + wrap = (clamp_to_edge) ? + PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER; + } + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return A5XX_TEX_REPEAT; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return A5XX_TEX_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + *needs_border = true; + return A5XX_TEX_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + /* only works for PoT.. need to emulate otherwise! */ + return A5XX_TEX_MIRROR_CLAMP; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return A5XX_TEX_MIRROR_REPEAT; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + /* these two we could perhaps emulate, but we currently + * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP + */ + default: + DBG("invalid wrap: %u", wrap); + return 0; + } +} + +static enum a5xx_tex_filter +tex_filter(unsigned filter, bool aniso) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return A5XX_TEX_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return aniso ? A5XX_TEX_ANISO : A5XX_TEX_LINEAR; + default: + DBG("invalid filter: %u", filter); + return 0; + } +} + +static void * +fd5_sampler_state_create(struct pipe_context *pctx, + const struct pipe_sampler_state *cso) +{ + struct fd5_sampler_stateobj *so = CALLOC_STRUCT(fd5_sampler_stateobj); + unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8)); + bool miplinear = false; + bool clamp_to_edge; + + if (!so) + return NULL; + + so->base = *cso; + + if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) + miplinear = true; + + /* + * For nearest filtering, _CLAMP means _CLAMP_TO_EDGE; for linear + * filtering, _CLAMP means _CLAMP_TO_BORDER while additionally + * clamping the texture coordinates to [0.0, 1.0]. + * + * The clamping will be taken care of in the shaders. There are two + * filters here, but let the minification one has a say. + */ + clamp_to_edge = (cso->min_img_filter == PIPE_TEX_FILTER_NEAREST); + if (!clamp_to_edge) { + so->saturate_s = (cso->wrap_s == PIPE_TEX_WRAP_CLAMP); + so->saturate_t = (cso->wrap_t == PIPE_TEX_WRAP_CLAMP); + so->saturate_r = (cso->wrap_r == PIPE_TEX_WRAP_CLAMP); + } + + so->needs_border = false; + so->texsamp0 = + COND(miplinear, A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) | + A5XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) | + A5XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) | + A5XX_TEX_SAMP_0_ANISO(aniso) | + A5XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, clamp_to_edge, &so->needs_border)) | + A5XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, clamp_to_edge, &so->needs_border)) | + A5XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, clamp_to_edge, &so->needs_border)); + + so->texsamp1 = + COND(!cso->seamless_cube_map, A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | + COND(!cso->normalized_coords, A5XX_TEX_SAMP_1_UNNORM_COORDS); + + if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + so->texsamp0 |= A5XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias); + so->texsamp1 |= + A5XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | + A5XX_TEX_SAMP_1_MAX_LOD(cso->max_lod); + } + + if (cso->compare_mode) + so->texsamp1 |= A5XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */ + + return so; +} + +static void +fd5_sampler_states_bind(struct pipe_context *pctx, + enum pipe_shader_type shader, unsigned start, + unsigned nr, void **hwcso) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd5_context *fd5_ctx = fd5_context(ctx); + uint16_t saturate_s = 0, saturate_t = 0, saturate_r = 0; + unsigned i; + + if (!hwcso) + nr = 0; + + for (i = 0; i < nr; i++) { + if (hwcso[i]) { + struct fd5_sampler_stateobj *sampler = + fd5_sampler_stateobj(hwcso[i]); + if (sampler->saturate_s) + saturate_s |= (1 << i); + if (sampler->saturate_t) + saturate_t |= (1 << i); + if (sampler->saturate_r) + saturate_r |= (1 << i); + } + } + + fd_sampler_states_bind(pctx, shader, start, nr, hwcso); + + if (shader == PIPE_SHADER_FRAGMENT) { + fd5_ctx->fsaturate = + (saturate_s != 0) || + (saturate_t != 0) || + (saturate_r != 0); + fd5_ctx->fsaturate_s = saturate_s; + fd5_ctx->fsaturate_t = saturate_t; + fd5_ctx->fsaturate_r = saturate_r; + } else if (shader == PIPE_SHADER_VERTEX) { + fd5_ctx->vsaturate = + (saturate_s != 0) || + (saturate_t != 0) || + (saturate_r != 0); + fd5_ctx->vsaturate_s = saturate_s; + fd5_ctx->vsaturate_t = saturate_t; + fd5_ctx->vsaturate_r = saturate_r; + } +} + +static enum a5xx_tex_type +tex_type(unsigned target) +{ + switch (target) { + default: + assert(0); + case PIPE_BUFFER: + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return A5XX_TEX_1D; + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + return A5XX_TEX_2D; + case PIPE_TEXTURE_3D: + return A5XX_TEX_3D; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return A5XX_TEX_CUBE; + } +} + +static bool +use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format) +{ + return false; // TODO check if this is still needed on a5xx +} + +static struct pipe_sampler_view * +fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, + const struct pipe_sampler_view *cso) +{ + struct fd5_pipe_sampler_view *so = CALLOC_STRUCT(fd5_pipe_sampler_view); + struct fd_resource *rsc = fd_resource(prsc); + unsigned lvl, layers; + + if (!so) + return NULL; + + so->base = *cso; + pipe_reference(NULL, &prsc->reference); + so->base.texture = prsc; + so->base.reference.count = 1; + so->base.context = pctx; + + so->texconst0 = + A5XX_TEX_CONST_0_FMT(fd5_pipe2tex(cso->format)) | + fd5_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, + cso->swizzle_b, cso->swizzle_a); + + if (util_format_is_srgb(cso->format)) { + if (use_astc_srgb_workaround(pctx, cso->format)) + so->astc_srgb = true; + so->texconst0 |= A5XX_TEX_CONST_0_SRGB; + } + + if (cso->target == PIPE_BUFFER) { + unsigned elements = cso->u.buf.size / util_format_get_blocksize(cso->format); + + lvl = 0; + so->texconst1 = + A5XX_TEX_CONST_1_WIDTH(elements) | + A5XX_TEX_CONST_1_HEIGHT(1); + so->texconst2 = + A5XX_TEX_CONST_2_FETCHSIZE(fd5_pipe2fetchsize(cso->format)) | + A5XX_TEX_CONST_2_PITCH(elements * rsc->cpp); + so->offset = cso->u.buf.offset; + } else { + unsigned miplevels; + + lvl = fd_sampler_first_level(cso); + miplevels = fd_sampler_last_level(cso) - lvl; + layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1; + + so->texconst0 |= A5XX_TEX_CONST_0_MIPLVLS(miplevels); + so->texconst1 = + A5XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | + A5XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); + so->texconst2 = + A5XX_TEX_CONST_2_FETCHSIZE(fd5_pipe2fetchsize(cso->format)) | + A5XX_TEX_CONST_2_PITCH( + util_format_get_nblocksx( + cso->format, rsc->slices[lvl].pitch) * rsc->cpp); + so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer); + } + + so->texconst2 |= A5XX_TEX_CONST_2_TYPE(tex_type(cso->target)); + + switch (cso->target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + so->texconst3 = + A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size); + so->texconst5 = + A5XX_TEX_CONST_5_DEPTH(1); + break; + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + so->texconst3 = + A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size); + so->texconst5 = + A5XX_TEX_CONST_5_DEPTH(layers); + break; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + so->texconst3 = + A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size); + so->texconst5 = + A5XX_TEX_CONST_5_DEPTH(layers / 6); + break; + case PIPE_TEXTURE_3D: + so->texconst3 = + A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->slices[lvl].size0); + so->texconst5 = + A5XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl)); + break; + default: + so->texconst3 = 0x00000000; + break; + } + + return &so->base; +} + +static void +fd5_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader, + unsigned start, unsigned nr, + struct pipe_sampler_view **views) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd5_context *fd5_ctx = fd5_context(ctx); + uint16_t astc_srgb = 0; + unsigned i; + + for (i = 0; i < nr; i++) { + if (views[i]) { + struct fd5_pipe_sampler_view *view = + fd5_pipe_sampler_view(views[i]); + if (view->astc_srgb) + astc_srgb |= (1 << i); + } + } + + fd_set_sampler_views(pctx, shader, start, nr, views); + + if (shader == PIPE_SHADER_FRAGMENT) { + fd5_ctx->fastc_srgb = astc_srgb; + } else if (shader == PIPE_SHADER_VERTEX) { + fd5_ctx->vastc_srgb = astc_srgb; + } +} + +void +fd5_texture_init(struct pipe_context *pctx) +{ + pctx->create_sampler_state = fd5_sampler_state_create; + pctx->bind_sampler_states = fd5_sampler_states_bind; + pctx->create_sampler_view = fd5_sampler_view_create; + pctx->set_sampler_views = fd5_set_sampler_views; +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_texture.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_texture.h new file mode 100644 index 000000000..c4d109376 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_texture.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FD5_TEXTURE_H_ +#define FD5_TEXTURE_H_ + +#include "pipe/p_context.h" + +#include "freedreno_texture.h" +#include "freedreno_resource.h" + +#include "fd5_context.h" +#include "fd5_format.h" + +struct fd5_sampler_stateobj { + struct pipe_sampler_state base; + uint32_t texsamp0, texsamp1, texsamp2, texsamp3; + bool saturate_s, saturate_t, saturate_r; + bool needs_border; +}; + +static inline struct fd5_sampler_stateobj * +fd5_sampler_stateobj(struct pipe_sampler_state *samp) +{ + return (struct fd5_sampler_stateobj *)samp; +} + +struct fd5_pipe_sampler_view { + struct pipe_sampler_view base; + uint32_t texconst0, texconst1, texconst2, texconst3, texconst5; + uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11; + uint32_t offset; + bool astc_srgb; +}; + +static inline struct fd5_pipe_sampler_view * +fd5_pipe_sampler_view(struct pipe_sampler_view *pview) +{ + return (struct fd5_pipe_sampler_view *)pview; +} + +unsigned fd5_get_const_idx(struct fd_context *ctx, + struct fd_texture_stateobj *tex, unsigned samp_id); + +void fd5_texture_init(struct pipe_context *pctx); + +#endif /* FD5_TEXTURE_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c new file mode 100644 index 000000000..495a4cc8a --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd5_zsa.h" +#include "fd5_context.h" +#include "fd5_format.h" + +void * +fd5_zsa_state_create(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct fd5_zsa_stateobj *so; + + so = CALLOC_STRUCT(fd5_zsa_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + switch (cso->depth.func) { + case PIPE_FUNC_LESS: + case PIPE_FUNC_LEQUAL: + so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE; + break; + + case PIPE_FUNC_GREATER: + case PIPE_FUNC_GEQUAL: + so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE | A5XX_GRAS_LRZ_CNTL_GREATER; + break; + + default: + /* LRZ not enabled */ + so->gras_lrz_cntl = 0; + break; + } + + if (!(cso->stencil->enabled || cso->alpha.enabled || !cso->depth.writemask)) + so->lrz_write = true; + + so->rb_depth_cntl |= + A5XX_RB_DEPTH_CNTL_ZFUNC(cso->depth.func); /* maps 1:1 */ + + if (cso->depth.enabled) + so->rb_depth_cntl |= + A5XX_RB_DEPTH_CNTL_Z_ENABLE | + A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; + + if (cso->depth.writemask) + so->rb_depth_cntl |= A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; + + if (cso->stencil[0].enabled) { + const struct pipe_stencil_state *s = &cso->stencil[0]; + + so->rb_stencil_control |= + A5XX_RB_STENCIL_CONTROL_STENCIL_READ | + A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | + A5XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */ + A5XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) | + A5XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) | + A5XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op)); + so->rb_stencilrefmask |= + A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) | + A5XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask); + + if (cso->stencil[1].enabled) { + const struct pipe_stencil_state *bs = &cso->stencil[1]; + + so->rb_stencil_control |= + A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | + A5XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */ + A5XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) | + A5XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) | + A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op)); + so->rb_stencilrefmask_bf |= + A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) | + A5XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask); + } + } + + if (cso->alpha.enabled) { + uint32_t ref = cso->alpha.ref_value * 255.0; + so->rb_alpha_control = + A5XX_RB_ALPHA_CONTROL_ALPHA_TEST | + A5XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) | + A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func); +// so->rb_depth_control |= +// A5XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE; + } + + return so; +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h new file mode 100644 index 000000000..c15ba1aa8 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FD5_ZSA_H_ +#define FD5_ZSA_H_ + + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +#include "freedreno_util.h" + +struct fd5_zsa_stateobj { + struct pipe_depth_stencil_alpha_state base; + + uint32_t rb_alpha_control; + uint32_t rb_depth_cntl; + uint32_t rb_stencil_control; + uint32_t rb_stencilrefmask; + uint32_t rb_stencilrefmask_bf; + uint32_t gras_lrz_cntl; + bool lrz_write; +}; + +static inline struct fd5_zsa_stateobj * +fd5_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa) +{ + return (struct fd5_zsa_stateobj *)zsa; +} + +void * fd5_zsa_state_create(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso); + +#endif /* FD5_ZSA_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/adreno_common.xml.h b/lib/mesa/src/gallium/drivers/freedreno/adreno_common.xml.h index 1f18fc6fc..66a84502e 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/adreno_common.xml.h +++ b/lib/mesa/src/gallium/drivers/freedreno/adreno_common.xml.h @@ -8,16 +8,17 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16185 bytes, from 2016-03-05 03:08:05) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110685 bytes, from 2016-04-25 17:56:43) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) - -Copyright (C) 2013-2016 by the following authors: +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-02 15:50:23) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-05-30 19:25:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 142603 bytes, from 2017-06-06 17:02:32) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) + +Copyright (C) 2013-2017 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) @@ -174,6 +175,14 @@ enum a3xx_color_swap { XYZW = 3, }; +enum a3xx_rb_blend_opcode { + BLEND_DST_PLUS_SRC = 0, + BLEND_SRC_MINUS_DST = 1, + BLEND_DST_MINUS_SRC = 2, + BLEND_MIN_DST_SRC = 3, + BLEND_MAX_DST_SRC = 4, +}; + #define REG_AXXX_CP_RB_BASE 0x000001c0 #define REG_AXXX_CP_RB_CNTL 0x000001c1 @@ -210,6 +219,7 @@ static inline uint32_t AXXX_CP_RB_RPTR_ADDR_SWAP(uint32_t val) #define AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT 2 static inline uint32_t AXXX_CP_RB_RPTR_ADDR_ADDR(uint32_t val) { + assert(!(val & 0x3)); return ((val >> 2) << AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT) & AXXX_CP_RB_RPTR_ADDR_ADDR__MASK; } @@ -414,6 +424,35 @@ static inline uint32_t AXXX_CP_CSQ_IB2_STAT_WPTR(uint32_t val) #define REG_AXXX_CP_IB2_BUFSZ 0x0000045b #define REG_AXXX_CP_STAT 0x0000047f +#define AXXX_CP_STAT_CP_BUSY 0x80000000 +#define AXXX_CP_STAT_VS_EVENT_FIFO_BUSY 0x40000000 +#define AXXX_CP_STAT_PS_EVENT_FIFO_BUSY 0x20000000 +#define AXXX_CP_STAT_CF_EVENT_FIFO_BUSY 0x10000000 +#define AXXX_CP_STAT_RB_EVENT_FIFO_BUSY 0x08000000 +#define AXXX_CP_STAT_ME_BUSY 0x04000000 +#define AXXX_CP_STAT_MIU_WR_C_BUSY 0x02000000 +#define AXXX_CP_STAT_CP_3D_BUSY 0x00800000 +#define AXXX_CP_STAT_CP_NRT_BUSY 0x00400000 +#define AXXX_CP_STAT_RBIU_SCRATCH_BUSY 0x00200000 +#define AXXX_CP_STAT_RCIU_ME_BUSY 0x00100000 +#define AXXX_CP_STAT_RCIU_PFP_BUSY 0x00080000 +#define AXXX_CP_STAT_MEQ_RING_BUSY 0x00040000 +#define AXXX_CP_STAT_PFP_BUSY 0x00020000 +#define AXXX_CP_STAT_ST_QUEUE_BUSY 0x00010000 +#define AXXX_CP_STAT_INDIRECT2_QUEUE_BUSY 0x00002000 +#define AXXX_CP_STAT_INDIRECTS_QUEUE_BUSY 0x00001000 +#define AXXX_CP_STAT_RING_QUEUE_BUSY 0x00000800 +#define AXXX_CP_STAT_CSF_BUSY 0x00000400 +#define AXXX_CP_STAT_CSF_ST_BUSY 0x00000200 +#define AXXX_CP_STAT_EVENT_BUSY 0x00000100 +#define AXXX_CP_STAT_CSF_INDIRECT2_BUSY 0x00000080 +#define AXXX_CP_STAT_CSF_INDIRECTS_BUSY 0x00000040 +#define AXXX_CP_STAT_CSF_RING_BUSY 0x00000020 +#define AXXX_CP_STAT_RCIU_BUSY 0x00000010 +#define AXXX_CP_STAT_RBIU_BUSY 0x00000008 +#define AXXX_CP_STAT_MIU_RD_RETURN_BUSY 0x00000004 +#define AXXX_CP_STAT_MIU_RD_REQ_BUSY 0x00000002 +#define AXXX_CP_STAT_MIU_WR_BUSY 0x00000001 #define REG_AXXX_CP_SCRATCH_REG0 0x00000578 diff --git a/lib/mesa/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/lib/mesa/src/gallium/drivers/freedreno/adreno_pm4.xml.h index 5853699f9..9da798df7 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/adreno_pm4.xml.h +++ b/lib/mesa/src/gallium/drivers/freedreno/adreno_pm4.xml.h @@ -8,16 +8,17 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16185 bytes, from 2016-03-05 03:08:05) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110685 bytes, from 2016-04-25 17:56:43) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) - -Copyright (C) 2013-2016 by the following authors: +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-02 15:50:23) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-05-30 19:25:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 142603 bytes, from 2017-06-06 17:02:32) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) + +Copyright (C) 2013-2017 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) @@ -58,6 +59,7 @@ enum vgt_event_type { RST_PIX_CNT = 13, RST_VTX_CNT = 14, TILE_FLUSH = 15, + STAT_EVENT = 16, CACHE_FLUSH_AND_INV_TS_EVENT = 20, ZPASS_DONE = 21, CACHE_FLUSH_AND_INV_EVENT = 22, @@ -65,6 +67,18 @@ enum vgt_event_type { PERFCOUNTER_STOP = 24, VS_FETCH_DONE = 27, FACENESS_FLUSH = 28, + FLUSH_SO_0 = 17, + FLUSH_SO_1 = 18, + FLUSH_SO_2 = 19, + FLUSH_SO_3 = 20, + UNK_19 = 25, + UNK_1C = 28, + UNK_1D = 29, + BLIT = 30, + UNK_25 = 37, + LRZ_FLUSH = 38, + UNK_2C = 44, + UNK_2D = 45, }; enum pc_di_primtype { @@ -82,7 +96,6 @@ enum pc_di_primtype { DI_PT_LINESTRIP_ADJ = 11, DI_PT_TRI_ADJ = 12, DI_PT_TRISTRIP_ADJ = 13, - DI_PT_PATCHES = 34, }; enum pc_di_src_sel { @@ -110,11 +123,15 @@ enum adreno_pm4_packet_type { CP_TYPE1_PKT = 0x40000000, CP_TYPE2_PKT = 0x80000000, CP_TYPE3_PKT = 0xc0000000, + CP_TYPE4_PKT = 0x40000000, + CP_TYPE7_PKT = 0x70000000, }; enum adreno_pm4_type3_packets { CP_ME_INIT = 72, CP_NOP = 16, + CP_PREEMPT_ENABLE = 28, + CP_PREEMPT_TOKEN = 30, CP_INDIRECT_BUFFER = 63, CP_INDIRECT_BUFFER_PFD = 55, CP_WAIT_FOR_IDLE = 38, @@ -125,11 +142,13 @@ enum adreno_pm4_type3_packets { CP_WAIT_IB_PFD_COMPLETE = 93, CP_REG_RMW = 33, CP_SET_BIN_DATA = 47, + CP_SET_BIN_DATA5 = 47, CP_REG_TO_MEM = 62, CP_MEM_WRITE = 61, CP_MEM_WRITE_CNTR = 79, CP_COND_EXEC = 68, CP_COND_WRITE = 69, + CP_COND_WRITE5 = 69, CP_EVENT_WRITE = 70, CP_EVENT_WRITE_SHD = 88, CP_EVENT_WRITE_CFL = 89, @@ -156,6 +175,7 @@ enum adreno_pm4_type3_packets { CP_SET_PROTECTED_MODE = 95, CP_BOOTSTRAP_UCODE = 111, CP_LOAD_STATE = 48, + CP_LOAD_STATE4 = 48, CP_COND_INDIRECT_BUFFER_PFE = 58, CP_COND_INDIRECT_BUFFER_PFD = 50, CP_INDIRECT_BUFFER_PFE = 63, @@ -163,6 +183,7 @@ enum adreno_pm4_type3_packets { CP_TEST_TWO_MEMS = 113, CP_REG_WR_NO_CTXT = 120, CP_RECORD_PFP_TIMESTAMP = 17, + CP_SET_SECURE_MODE = 102, CP_WAIT_FOR_ME = 19, CP_SET_DRAW_STATE = 67, CP_DRAW_INDX_OFFSET = 56, @@ -178,6 +199,23 @@ enum adreno_pm4_type3_packets { CP_WAIT_MEM_WRITES = 18, CP_COND_REG_EXEC = 71, CP_MEM_TO_REG = 66, + CP_EXEC_CS = 51, + CP_PERFCOUNTER_ACTION = 80, + CP_SMMU_TABLE_UPDATE = 83, + CP_CONTEXT_REG_BUNCH = 92, + CP_YIELD_ENABLE = 28, + CP_SKIP_IB2_ENABLE_GLOBAL = 29, + CP_SKIP_IB2_ENABLE_LOCAL = 35, + CP_SET_SUBDRAW_SIZE = 53, + CP_SET_VISIBILITY_OVERRIDE = 100, + CP_PREEMPT_ENABLE_GLOBAL = 105, + CP_PREEMPT_ENABLE_LOCAL = 106, + CP_CONTEXT_SWITCH_YIELD = 107, + CP_SET_RENDER_MODE = 108, + CP_COMPUTE_CHECKPOINT = 110, + CP_MEM_TO_MEM = 115, + CP_BLIT = 44, + CP_UNK_39 = 57, IN_IB_PREFETCH_END = 23, IN_SUBBLK_PREFETCH = 31, IN_INSTR_PREFETCH = 32, @@ -196,6 +234,7 @@ enum adreno_state_block { SB_VERT_SHADER = 4, SB_GEOM_SHADER = 5, SB_FRAG_SHADER = 6, + SB_COMPUTE_SHADER = 7, }; enum adreno_state_type { @@ -212,12 +251,63 @@ enum adreno_state_src { SS_INDIRECT_STM = 6, }; +enum a4xx_state_block { + SB4_VS_TEX = 0, + SB4_HS_TEX = 1, + SB4_DS_TEX = 2, + SB4_GS_TEX = 3, + SB4_FS_TEX = 4, + SB4_CS_TEX = 5, + SB4_VS_SHADER = 8, + SB4_HS_SHADER = 9, + SB4_DS_SHADER = 10, + SB4_GS_SHADER = 11, + SB4_FS_SHADER = 12, + SB4_CS_SHADER = 13, + SB4_SSBO = 14, + SB4_CS_SSBO = 15, +}; + +enum a4xx_state_type { + ST4_SHADER = 0, + ST4_CONSTANTS = 1, +}; + +enum a4xx_state_src { + SS4_DIRECT = 0, + SS4_INDIRECT = 2, +}; + enum a4xx_index_size { INDEX4_SIZE_8_BIT = 0, INDEX4_SIZE_16_BIT = 1, INDEX4_SIZE_32_BIT = 2, }; +enum cp_cond_function { + WRITE_ALWAYS = 0, + WRITE_LT = 1, + WRITE_LE = 2, + WRITE_EQ = 3, + WRITE_NE = 4, + WRITE_GE = 5, + WRITE_GT = 6, +}; + +enum render_mode_cmd { + BYPASS = 1, + BINNING = 2, + GMEM = 3, + BLIT2D = 5, + BLIT2DSCALE = 7, +}; + +enum cp_blit_cmd { + BLIT_OP_FILL = 0, + BLIT_OP_COPY = 1, + BLIT_OP_SCALE = 3, +}; + #define REG_CP_LOAD_STATE_0 0x00000000 #define CP_LOAD_STATE_0_DST_OFF__MASK 0x0000ffff #define CP_LOAD_STATE_0_DST_OFF__SHIFT 0 @@ -255,9 +345,59 @@ static inline uint32_t CP_LOAD_STATE_1_STATE_TYPE(enum adreno_state_type val) #define CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT 2 static inline uint32_t CP_LOAD_STATE_1_EXT_SRC_ADDR(uint32_t val) { + assert(!(val & 0x3)); return ((val >> 2) << CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK; } +#define REG_CP_LOAD_STATE4_0 0x00000000 +#define CP_LOAD_STATE4_0_DST_OFF__MASK 0x0000ffff +#define CP_LOAD_STATE4_0_DST_OFF__SHIFT 0 +static inline uint32_t CP_LOAD_STATE4_0_DST_OFF(uint32_t val) +{ + return ((val) << CP_LOAD_STATE4_0_DST_OFF__SHIFT) & CP_LOAD_STATE4_0_DST_OFF__MASK; +} +#define CP_LOAD_STATE4_0_STATE_SRC__MASK 0x00030000 +#define CP_LOAD_STATE4_0_STATE_SRC__SHIFT 16 +static inline uint32_t CP_LOAD_STATE4_0_STATE_SRC(enum a4xx_state_src val) +{ + return ((val) << CP_LOAD_STATE4_0_STATE_SRC__SHIFT) & CP_LOAD_STATE4_0_STATE_SRC__MASK; +} +#define CP_LOAD_STATE4_0_STATE_BLOCK__MASK 0x003c0000 +#define CP_LOAD_STATE4_0_STATE_BLOCK__SHIFT 18 +static inline uint32_t CP_LOAD_STATE4_0_STATE_BLOCK(enum a4xx_state_block val) +{ + return ((val) << CP_LOAD_STATE4_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE4_0_STATE_BLOCK__MASK; +} +#define CP_LOAD_STATE4_0_NUM_UNIT__MASK 0xffc00000 +#define CP_LOAD_STATE4_0_NUM_UNIT__SHIFT 22 +static inline uint32_t CP_LOAD_STATE4_0_NUM_UNIT(uint32_t val) +{ + return ((val) << CP_LOAD_STATE4_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE4_0_NUM_UNIT__MASK; +} + +#define REG_CP_LOAD_STATE4_1 0x00000001 +#define CP_LOAD_STATE4_1_STATE_TYPE__MASK 0x00000003 +#define CP_LOAD_STATE4_1_STATE_TYPE__SHIFT 0 +static inline uint32_t CP_LOAD_STATE4_1_STATE_TYPE(enum a4xx_state_type val) +{ + return ((val) << CP_LOAD_STATE4_1_STATE_TYPE__SHIFT) & CP_LOAD_STATE4_1_STATE_TYPE__MASK; +} +#define CP_LOAD_STATE4_1_EXT_SRC_ADDR__MASK 0xfffffffc +#define CP_LOAD_STATE4_1_EXT_SRC_ADDR__SHIFT 2 +static inline uint32_t CP_LOAD_STATE4_1_EXT_SRC_ADDR(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << CP_LOAD_STATE4_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE4_1_EXT_SRC_ADDR__MASK; +} + +#define REG_CP_LOAD_STATE4_2 0x00000002 +#define CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK 0xffffffff +#define CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT 0 +static inline uint32_t CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(uint32_t val) +{ + return ((val) << CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT) & CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK; +} + #define REG_CP_DRAW_INDX_0 0x00000000 #define CP_DRAW_INDX_0_VIZ_QUERY__MASK 0xffffffff #define CP_DRAW_INDX_0_VIZ_QUERY__SHIFT 0 @@ -442,30 +582,40 @@ static inline uint32_t CP_DRAW_INDX_OFFSET_5_INDX_SIZE(uint32_t val) return ((val) << CP_DRAW_INDX_OFFSET_5_INDX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_5_INDX_SIZE__MASK; } -#define REG_CP_SET_DRAW_STATE_0 0x00000000 -#define CP_SET_DRAW_STATE_0_COUNT__MASK 0x0000ffff -#define CP_SET_DRAW_STATE_0_COUNT__SHIFT 0 -static inline uint32_t CP_SET_DRAW_STATE_0_COUNT(uint32_t val) +static inline uint32_t REG_CP_SET_DRAW_STATE_(uint32_t i0) { return 0x00000000 + 0x3*i0; } + +static inline uint32_t REG_CP_SET_DRAW_STATE__0(uint32_t i0) { return 0x00000000 + 0x3*i0; } +#define CP_SET_DRAW_STATE__0_COUNT__MASK 0x0000ffff +#define CP_SET_DRAW_STATE__0_COUNT__SHIFT 0 +static inline uint32_t CP_SET_DRAW_STATE__0_COUNT(uint32_t val) +{ + return ((val) << CP_SET_DRAW_STATE__0_COUNT__SHIFT) & CP_SET_DRAW_STATE__0_COUNT__MASK; +} +#define CP_SET_DRAW_STATE__0_DIRTY 0x00010000 +#define CP_SET_DRAW_STATE__0_DISABLE 0x00020000 +#define CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS 0x00040000 +#define CP_SET_DRAW_STATE__0_LOAD_IMMED 0x00080000 +#define CP_SET_DRAW_STATE__0_GROUP_ID__MASK 0x1f000000 +#define CP_SET_DRAW_STATE__0_GROUP_ID__SHIFT 24 +static inline uint32_t CP_SET_DRAW_STATE__0_GROUP_ID(uint32_t val) { - return ((val) << CP_SET_DRAW_STATE_0_COUNT__SHIFT) & CP_SET_DRAW_STATE_0_COUNT__MASK; + return ((val) << CP_SET_DRAW_STATE__0_GROUP_ID__SHIFT) & CP_SET_DRAW_STATE__0_GROUP_ID__MASK; } -#define CP_SET_DRAW_STATE_0_DIRTY 0x00010000 -#define CP_SET_DRAW_STATE_0_DISABLE 0x00020000 -#define CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS 0x00040000 -#define CP_SET_DRAW_STATE_0_LOAD_IMMED 0x00080000 -#define CP_SET_DRAW_STATE_0_GROUP_ID__MASK 0x1f000000 -#define CP_SET_DRAW_STATE_0_GROUP_ID__SHIFT 24 -static inline uint32_t CP_SET_DRAW_STATE_0_GROUP_ID(uint32_t val) + +static inline uint32_t REG_CP_SET_DRAW_STATE__1(uint32_t i0) { return 0x00000001 + 0x3*i0; } +#define CP_SET_DRAW_STATE__1_ADDR_LO__MASK 0xffffffff +#define CP_SET_DRAW_STATE__1_ADDR_LO__SHIFT 0 +static inline uint32_t CP_SET_DRAW_STATE__1_ADDR_LO(uint32_t val) { - return ((val) << CP_SET_DRAW_STATE_0_GROUP_ID__SHIFT) & CP_SET_DRAW_STATE_0_GROUP_ID__MASK; + return ((val) << CP_SET_DRAW_STATE__1_ADDR_LO__SHIFT) & CP_SET_DRAW_STATE__1_ADDR_LO__MASK; } -#define REG_CP_SET_DRAW_STATE_1 0x00000001 -#define CP_SET_DRAW_STATE_1_ADDR__MASK 0xffffffff -#define CP_SET_DRAW_STATE_1_ADDR__SHIFT 0 -static inline uint32_t CP_SET_DRAW_STATE_1_ADDR(uint32_t val) +static inline uint32_t REG_CP_SET_DRAW_STATE__2(uint32_t i0) { return 0x00000002 + 0x3*i0; } +#define CP_SET_DRAW_STATE__2_ADDR_HI__MASK 0xffffffff +#define CP_SET_DRAW_STATE__2_ADDR_HI__SHIFT 0 +static inline uint32_t CP_SET_DRAW_STATE__2_ADDR_HI(uint32_t val) { - return ((val) << CP_SET_DRAW_STATE_1_ADDR__SHIFT) & CP_SET_DRAW_STATE_1_ADDR__MASK; + return ((val) << CP_SET_DRAW_STATE__2_ADDR_HI__SHIFT) & CP_SET_DRAW_STATE__2_ADDR_HI__MASK; } #define REG_CP_SET_BIN_0 0x00000000 @@ -514,6 +664,52 @@ static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val) return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK; } +#define REG_CP_SET_BIN_DATA5_0 0x00000000 +#define CP_SET_BIN_DATA5_0_VSC_SIZE__MASK 0x003f0000 +#define CP_SET_BIN_DATA5_0_VSC_SIZE__SHIFT 16 +static inline uint32_t CP_SET_BIN_DATA5_0_VSC_SIZE(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_0_VSC_SIZE__SHIFT) & CP_SET_BIN_DATA5_0_VSC_SIZE__MASK; +} +#define CP_SET_BIN_DATA5_0_VSC_N__MASK 0x07c00000 +#define CP_SET_BIN_DATA5_0_VSC_N__SHIFT 22 +static inline uint32_t CP_SET_BIN_DATA5_0_VSC_N(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_0_VSC_N__SHIFT) & CP_SET_BIN_DATA5_0_VSC_N__MASK; +} + +#define REG_CP_SET_BIN_DATA5_1 0x00000001 +#define CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__MASK 0xffffffff +#define CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__SHIFT) & CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__MASK; +} + +#define REG_CP_SET_BIN_DATA5_2 0x00000002 +#define CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__MASK 0xffffffff +#define CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__SHIFT) & CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__MASK; +} + +#define REG_CP_SET_BIN_DATA5_3 0x00000003 +#define CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__MASK 0xffffffff +#define CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__SHIFT) & CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__MASK; +} + +#define REG_CP_SET_BIN_DATA5_4 0x00000004 +#define CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK 0xffffffff +#define CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT) & CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK; +} + #define REG_CP_REG_TO_MEM_0 0x00000000 #define CP_REG_TO_MEM_0_REG__MASK 0x0000ffff #define CP_REG_TO_MEM_0_REG__SHIFT 0 @@ -538,5 +734,386 @@ static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val) return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK; } +#define REG_CP_MEM_TO_MEM_0 0x00000000 +#define CP_MEM_TO_MEM_0_NEG_A 0x00000001 +#define CP_MEM_TO_MEM_0_NEG_B 0x00000002 +#define CP_MEM_TO_MEM_0_NEG_C 0x00000004 +#define CP_MEM_TO_MEM_0_DOUBLE 0x20000000 + +#define REG_CP_COND_WRITE_0 0x00000000 +#define CP_COND_WRITE_0_FUNCTION__MASK 0x00000007 +#define CP_COND_WRITE_0_FUNCTION__SHIFT 0 +static inline uint32_t CP_COND_WRITE_0_FUNCTION(enum cp_cond_function val) +{ + return ((val) << CP_COND_WRITE_0_FUNCTION__SHIFT) & CP_COND_WRITE_0_FUNCTION__MASK; +} +#define CP_COND_WRITE_0_POLL_MEMORY 0x00000010 +#define CP_COND_WRITE_0_WRITE_MEMORY 0x00000100 + +#define REG_CP_COND_WRITE_1 0x00000001 +#define CP_COND_WRITE_1_POLL_ADDR__MASK 0xffffffff +#define CP_COND_WRITE_1_POLL_ADDR__SHIFT 0 +static inline uint32_t CP_COND_WRITE_1_POLL_ADDR(uint32_t val) +{ + return ((val) << CP_COND_WRITE_1_POLL_ADDR__SHIFT) & CP_COND_WRITE_1_POLL_ADDR__MASK; +} + +#define REG_CP_COND_WRITE_2 0x00000002 +#define CP_COND_WRITE_2_REF__MASK 0xffffffff +#define CP_COND_WRITE_2_REF__SHIFT 0 +static inline uint32_t CP_COND_WRITE_2_REF(uint32_t val) +{ + return ((val) << CP_COND_WRITE_2_REF__SHIFT) & CP_COND_WRITE_2_REF__MASK; +} + +#define REG_CP_COND_WRITE_3 0x00000003 +#define CP_COND_WRITE_3_MASK__MASK 0xffffffff +#define CP_COND_WRITE_3_MASK__SHIFT 0 +static inline uint32_t CP_COND_WRITE_3_MASK(uint32_t val) +{ + return ((val) << CP_COND_WRITE_3_MASK__SHIFT) & CP_COND_WRITE_3_MASK__MASK; +} + +#define REG_CP_COND_WRITE_4 0x00000004 +#define CP_COND_WRITE_4_WRITE_ADDR__MASK 0xffffffff +#define CP_COND_WRITE_4_WRITE_ADDR__SHIFT 0 +static inline uint32_t CP_COND_WRITE_4_WRITE_ADDR(uint32_t val) +{ + return ((val) << CP_COND_WRITE_4_WRITE_ADDR__SHIFT) & CP_COND_WRITE_4_WRITE_ADDR__MASK; +} + +#define REG_CP_COND_WRITE_5 0x00000005 +#define CP_COND_WRITE_5_WRITE_DATA__MASK 0xffffffff +#define CP_COND_WRITE_5_WRITE_DATA__SHIFT 0 +static inline uint32_t CP_COND_WRITE_5_WRITE_DATA(uint32_t val) +{ + return ((val) << CP_COND_WRITE_5_WRITE_DATA__SHIFT) & CP_COND_WRITE_5_WRITE_DATA__MASK; +} + +#define REG_CP_COND_WRITE5_0 0x00000000 +#define CP_COND_WRITE5_0_FUNCTION__MASK 0x00000007 +#define CP_COND_WRITE5_0_FUNCTION__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_0_FUNCTION(enum cp_cond_function val) +{ + return ((val) << CP_COND_WRITE5_0_FUNCTION__SHIFT) & CP_COND_WRITE5_0_FUNCTION__MASK; +} +#define CP_COND_WRITE5_0_POLL_MEMORY 0x00000010 +#define CP_COND_WRITE5_0_WRITE_MEMORY 0x00000100 + +#define REG_CP_COND_WRITE5_1 0x00000001 +#define CP_COND_WRITE5_1_POLL_ADDR_LO__MASK 0xffffffff +#define CP_COND_WRITE5_1_POLL_ADDR_LO__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_1_POLL_ADDR_LO(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_1_POLL_ADDR_LO__SHIFT) & CP_COND_WRITE5_1_POLL_ADDR_LO__MASK; +} + +#define REG_CP_COND_WRITE5_2 0x00000002 +#define CP_COND_WRITE5_2_POLL_ADDR_HI__MASK 0xffffffff +#define CP_COND_WRITE5_2_POLL_ADDR_HI__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_2_POLL_ADDR_HI(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_2_POLL_ADDR_HI__SHIFT) & CP_COND_WRITE5_2_POLL_ADDR_HI__MASK; +} + +#define REG_CP_COND_WRITE5_3 0x00000003 +#define CP_COND_WRITE5_3_REF__MASK 0xffffffff +#define CP_COND_WRITE5_3_REF__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_3_REF(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_3_REF__SHIFT) & CP_COND_WRITE5_3_REF__MASK; +} + +#define REG_CP_COND_WRITE5_4 0x00000004 +#define CP_COND_WRITE5_4_MASK__MASK 0xffffffff +#define CP_COND_WRITE5_4_MASK__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_4_MASK(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_4_MASK__SHIFT) & CP_COND_WRITE5_4_MASK__MASK; +} + +#define REG_CP_COND_WRITE5_5 0x00000005 +#define CP_COND_WRITE5_5_WRITE_ADDR_LO__MASK 0xffffffff +#define CP_COND_WRITE5_5_WRITE_ADDR_LO__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_5_WRITE_ADDR_LO(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_5_WRITE_ADDR_LO__SHIFT) & CP_COND_WRITE5_5_WRITE_ADDR_LO__MASK; +} + +#define REG_CP_COND_WRITE5_6 0x00000006 +#define CP_COND_WRITE5_6_WRITE_ADDR_HI__MASK 0xffffffff +#define CP_COND_WRITE5_6_WRITE_ADDR_HI__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_6_WRITE_ADDR_HI(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_6_WRITE_ADDR_HI__SHIFT) & CP_COND_WRITE5_6_WRITE_ADDR_HI__MASK; +} + +#define REG_CP_COND_WRITE5_7 0x00000007 +#define CP_COND_WRITE5_7_WRITE_DATA__MASK 0xffffffff +#define CP_COND_WRITE5_7_WRITE_DATA__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_7_WRITE_DATA(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_7_WRITE_DATA__SHIFT) & CP_COND_WRITE5_7_WRITE_DATA__MASK; +} + +#define REG_CP_DISPATCH_COMPUTE_0 0x00000000 + +#define REG_CP_DISPATCH_COMPUTE_1 0x00000001 +#define CP_DISPATCH_COMPUTE_1_X__MASK 0xffffffff +#define CP_DISPATCH_COMPUTE_1_X__SHIFT 0 +static inline uint32_t CP_DISPATCH_COMPUTE_1_X(uint32_t val) +{ + return ((val) << CP_DISPATCH_COMPUTE_1_X__SHIFT) & CP_DISPATCH_COMPUTE_1_X__MASK; +} + +#define REG_CP_DISPATCH_COMPUTE_2 0x00000002 +#define CP_DISPATCH_COMPUTE_2_Y__MASK 0xffffffff +#define CP_DISPATCH_COMPUTE_2_Y__SHIFT 0 +static inline uint32_t CP_DISPATCH_COMPUTE_2_Y(uint32_t val) +{ + return ((val) << CP_DISPATCH_COMPUTE_2_Y__SHIFT) & CP_DISPATCH_COMPUTE_2_Y__MASK; +} + +#define REG_CP_DISPATCH_COMPUTE_3 0x00000003 +#define CP_DISPATCH_COMPUTE_3_Z__MASK 0xffffffff +#define CP_DISPATCH_COMPUTE_3_Z__SHIFT 0 +static inline uint32_t CP_DISPATCH_COMPUTE_3_Z(uint32_t val) +{ + return ((val) << CP_DISPATCH_COMPUTE_3_Z__SHIFT) & CP_DISPATCH_COMPUTE_3_Z__MASK; +} + +#define REG_CP_SET_RENDER_MODE_0 0x00000000 +#define CP_SET_RENDER_MODE_0_MODE__MASK 0x000001ff +#define CP_SET_RENDER_MODE_0_MODE__SHIFT 0 +static inline uint32_t CP_SET_RENDER_MODE_0_MODE(enum render_mode_cmd val) +{ + return ((val) << CP_SET_RENDER_MODE_0_MODE__SHIFT) & CP_SET_RENDER_MODE_0_MODE__MASK; +} + +#define REG_CP_SET_RENDER_MODE_1 0x00000001 +#define CP_SET_RENDER_MODE_1_ADDR_0_LO__MASK 0xffffffff +#define CP_SET_RENDER_MODE_1_ADDR_0_LO__SHIFT 0 +static inline uint32_t CP_SET_RENDER_MODE_1_ADDR_0_LO(uint32_t val) +{ + return ((val) << CP_SET_RENDER_MODE_1_ADDR_0_LO__SHIFT) & CP_SET_RENDER_MODE_1_ADDR_0_LO__MASK; +} + +#define REG_CP_SET_RENDER_MODE_2 0x00000002 +#define CP_SET_RENDER_MODE_2_ADDR_0_HI__MASK 0xffffffff +#define CP_SET_RENDER_MODE_2_ADDR_0_HI__SHIFT 0 +static inline uint32_t CP_SET_RENDER_MODE_2_ADDR_0_HI(uint32_t val) +{ + return ((val) << CP_SET_RENDER_MODE_2_ADDR_0_HI__SHIFT) & CP_SET_RENDER_MODE_2_ADDR_0_HI__MASK; +} + +#define REG_CP_SET_RENDER_MODE_3 0x00000003 +#define CP_SET_RENDER_MODE_3_VSC_ENABLE 0x00000008 +#define CP_SET_RENDER_MODE_3_GMEM_ENABLE 0x00000010 + +#define REG_CP_SET_RENDER_MODE_4 0x00000004 + +#define REG_CP_SET_RENDER_MODE_5 0x00000005 +#define CP_SET_RENDER_MODE_5_ADDR_1_LEN__MASK 0xffffffff +#define CP_SET_RENDER_MODE_5_ADDR_1_LEN__SHIFT 0 +static inline uint32_t CP_SET_RENDER_MODE_5_ADDR_1_LEN(uint32_t val) +{ + return ((val) << CP_SET_RENDER_MODE_5_ADDR_1_LEN__SHIFT) & CP_SET_RENDER_MODE_5_ADDR_1_LEN__MASK; +} + +#define REG_CP_SET_RENDER_MODE_6 0x00000006 +#define CP_SET_RENDER_MODE_6_ADDR_1_LO__MASK 0xffffffff +#define CP_SET_RENDER_MODE_6_ADDR_1_LO__SHIFT 0 +static inline uint32_t CP_SET_RENDER_MODE_6_ADDR_1_LO(uint32_t val) +{ + return ((val) << CP_SET_RENDER_MODE_6_ADDR_1_LO__SHIFT) & CP_SET_RENDER_MODE_6_ADDR_1_LO__MASK; +} + +#define REG_CP_SET_RENDER_MODE_7 0x00000007 +#define CP_SET_RENDER_MODE_7_ADDR_1_HI__MASK 0xffffffff +#define CP_SET_RENDER_MODE_7_ADDR_1_HI__SHIFT 0 +static inline uint32_t CP_SET_RENDER_MODE_7_ADDR_1_HI(uint32_t val) +{ + return ((val) << CP_SET_RENDER_MODE_7_ADDR_1_HI__SHIFT) & CP_SET_RENDER_MODE_7_ADDR_1_HI__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_0 0x00000000 +#define CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__SHIFT) & CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_1 0x00000001 +#define CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_2 0x00000002 + +#define REG_CP_COMPUTE_CHECKPOINT_3 0x00000003 + +#define REG_CP_COMPUTE_CHECKPOINT_4 0x00000004 +#define CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN__SHIFT) & CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_5 0x00000005 +#define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT) & CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_6 0x00000006 +#define CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK; +} + +#define REG_CP_PERFCOUNTER_ACTION_0 0x00000000 + +#define REG_CP_PERFCOUNTER_ACTION_1 0x00000001 +#define CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__MASK 0xffffffff +#define CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__SHIFT 0 +static inline uint32_t CP_PERFCOUNTER_ACTION_1_ADDR_0_LO(uint32_t val) +{ + return ((val) << CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__SHIFT) & CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__MASK; +} + +#define REG_CP_PERFCOUNTER_ACTION_2 0x00000002 +#define CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__MASK 0xffffffff +#define CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__SHIFT 0 +static inline uint32_t CP_PERFCOUNTER_ACTION_2_ADDR_0_HI(uint32_t val) +{ + return ((val) << CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__SHIFT) & CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__MASK; +} + +#define REG_CP_EVENT_WRITE_0 0x00000000 +#define CP_EVENT_WRITE_0_EVENT__MASK 0x000000ff +#define CP_EVENT_WRITE_0_EVENT__SHIFT 0 +static inline uint32_t CP_EVENT_WRITE_0_EVENT(enum vgt_event_type val) +{ + return ((val) << CP_EVENT_WRITE_0_EVENT__SHIFT) & CP_EVENT_WRITE_0_EVENT__MASK; +} +#define CP_EVENT_WRITE_0_TIMESTAMP 0x40000000 + +#define REG_CP_EVENT_WRITE_1 0x00000001 +#define CP_EVENT_WRITE_1_ADDR_0_LO__MASK 0xffffffff +#define CP_EVENT_WRITE_1_ADDR_0_LO__SHIFT 0 +static inline uint32_t CP_EVENT_WRITE_1_ADDR_0_LO(uint32_t val) +{ + return ((val) << CP_EVENT_WRITE_1_ADDR_0_LO__SHIFT) & CP_EVENT_WRITE_1_ADDR_0_LO__MASK; +} + +#define REG_CP_EVENT_WRITE_2 0x00000002 +#define CP_EVENT_WRITE_2_ADDR_0_HI__MASK 0xffffffff +#define CP_EVENT_WRITE_2_ADDR_0_HI__SHIFT 0 +static inline uint32_t CP_EVENT_WRITE_2_ADDR_0_HI(uint32_t val) +{ + return ((val) << CP_EVENT_WRITE_2_ADDR_0_HI__SHIFT) & CP_EVENT_WRITE_2_ADDR_0_HI__MASK; +} + +#define REG_CP_EVENT_WRITE_3 0x00000003 + +#define REG_CP_BLIT_0 0x00000000 +#define CP_BLIT_0_OP__MASK 0x0000000f +#define CP_BLIT_0_OP__SHIFT 0 +static inline uint32_t CP_BLIT_0_OP(enum cp_blit_cmd val) +{ + return ((val) << CP_BLIT_0_OP__SHIFT) & CP_BLIT_0_OP__MASK; +} + +#define REG_CP_BLIT_1 0x00000001 +#define CP_BLIT_1_SRC_X1__MASK 0x0000ffff +#define CP_BLIT_1_SRC_X1__SHIFT 0 +static inline uint32_t CP_BLIT_1_SRC_X1(uint32_t val) +{ + return ((val) << CP_BLIT_1_SRC_X1__SHIFT) & CP_BLIT_1_SRC_X1__MASK; +} +#define CP_BLIT_1_SRC_Y1__MASK 0xffff0000 +#define CP_BLIT_1_SRC_Y1__SHIFT 16 +static inline uint32_t CP_BLIT_1_SRC_Y1(uint32_t val) +{ + return ((val) << CP_BLIT_1_SRC_Y1__SHIFT) & CP_BLIT_1_SRC_Y1__MASK; +} + +#define REG_CP_BLIT_2 0x00000002 +#define CP_BLIT_2_SRC_X2__MASK 0x0000ffff +#define CP_BLIT_2_SRC_X2__SHIFT 0 +static inline uint32_t CP_BLIT_2_SRC_X2(uint32_t val) +{ + return ((val) << CP_BLIT_2_SRC_X2__SHIFT) & CP_BLIT_2_SRC_X2__MASK; +} +#define CP_BLIT_2_SRC_Y2__MASK 0xffff0000 +#define CP_BLIT_2_SRC_Y2__SHIFT 16 +static inline uint32_t CP_BLIT_2_SRC_Y2(uint32_t val) +{ + return ((val) << CP_BLIT_2_SRC_Y2__SHIFT) & CP_BLIT_2_SRC_Y2__MASK; +} + +#define REG_CP_BLIT_3 0x00000003 +#define CP_BLIT_3_DST_X1__MASK 0x0000ffff +#define CP_BLIT_3_DST_X1__SHIFT 0 +static inline uint32_t CP_BLIT_3_DST_X1(uint32_t val) +{ + return ((val) << CP_BLIT_3_DST_X1__SHIFT) & CP_BLIT_3_DST_X1__MASK; +} +#define CP_BLIT_3_DST_Y1__MASK 0xffff0000 +#define CP_BLIT_3_DST_Y1__SHIFT 16 +static inline uint32_t CP_BLIT_3_DST_Y1(uint32_t val) +{ + return ((val) << CP_BLIT_3_DST_Y1__SHIFT) & CP_BLIT_3_DST_Y1__MASK; +} + +#define REG_CP_BLIT_4 0x00000004 +#define CP_BLIT_4_DST_X2__MASK 0x0000ffff +#define CP_BLIT_4_DST_X2__SHIFT 0 +static inline uint32_t CP_BLIT_4_DST_X2(uint32_t val) +{ + return ((val) << CP_BLIT_4_DST_X2__SHIFT) & CP_BLIT_4_DST_X2__MASK; +} +#define CP_BLIT_4_DST_Y2__MASK 0xffff0000 +#define CP_BLIT_4_DST_Y2__SHIFT 16 +static inline uint32_t CP_BLIT_4_DST_Y2(uint32_t val) +{ + return ((val) << CP_BLIT_4_DST_Y2__SHIFT) & CP_BLIT_4_DST_Y2__MASK; +} + +#define REG_CP_EXEC_CS_0 0x00000000 + +#define REG_CP_EXEC_CS_1 0x00000001 +#define CP_EXEC_CS_1_NGROUPS_X__MASK 0xffffffff +#define CP_EXEC_CS_1_NGROUPS_X__SHIFT 0 +static inline uint32_t CP_EXEC_CS_1_NGROUPS_X(uint32_t val) +{ + return ((val) << CP_EXEC_CS_1_NGROUPS_X__SHIFT) & CP_EXEC_CS_1_NGROUPS_X__MASK; +} + +#define REG_CP_EXEC_CS_2 0x00000002 +#define CP_EXEC_CS_2_NGROUPS_Y__MASK 0xffffffff +#define CP_EXEC_CS_2_NGROUPS_Y__SHIFT 0 +static inline uint32_t CP_EXEC_CS_2_NGROUPS_Y(uint32_t val) +{ + return ((val) << CP_EXEC_CS_2_NGROUPS_Y__SHIFT) & CP_EXEC_CS_2_NGROUPS_Y__MASK; +} + +#define REG_CP_EXEC_CS_3 0x00000003 +#define CP_EXEC_CS_3_NGROUPS_Z__MASK 0xffffffff +#define CP_EXEC_CS_3_NGROUPS_Z__SHIFT 0 +static inline uint32_t CP_EXEC_CS_3_NGROUPS_Z(uint32_t val) +{ + return ((val) << CP_EXEC_CS_3_NGROUPS_Z__SHIFT) & CP_EXEC_CS_3_NGROUPS_Z__MASK; +} + #endif /* ADRENO_PM4_XML */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c index 176a31c77..c2142b5a2 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c @@ -48,7 +48,8 @@ batch_init(struct fd_batch *batch) * we don't need to grow the ringbuffer. Performance is likely to * suffer, but there is no good alternative. */ - if (fd_device_version(ctx->screen->dev) < FD_VERSION_UNLIMITED_CMDS) { + if ((fd_device_version(ctx->screen->dev) < FD_VERSION_UNLIMITED_CMDS) || + (fd_mesa_debug & FD_DBG_NOGROW)){ size = 0x100000; } @@ -60,6 +61,8 @@ batch_init(struct fd_batch *batch) fd_ringbuffer_set_parent(batch->draw, batch->gmem); fd_ringbuffer_set_parent(batch->binning, batch->gmem); + batch->in_fence_fd = -1; + batch->cleared = batch->partial_cleared = 0; batch->restore = batch->resolve = 0; batch->needs_flush = false; @@ -73,14 +76,14 @@ batch_init(struct fd_batch *batch) batch->max_scissor.minx = batch->max_scissor.miny = ~0; batch->max_scissor.maxx = batch->max_scissor.maxy = 0; - util_dynarray_init(&batch->draw_patches); + util_dynarray_init(&batch->draw_patches, NULL); if (is_a3xx(ctx->screen)) - util_dynarray_init(&batch->rbrc_patches); + util_dynarray_init(&batch->rbrc_patches, NULL); assert(batch->resources->entries == 0); - util_dynarray_init(&batch->samples); + util_dynarray_init(&batch->samples, NULL); } struct fd_batch * @@ -109,9 +112,16 @@ batch_fini(struct fd_batch *batch) { pipe_resource_reference(&batch->query_buf, NULL); + if (batch->in_fence_fd != -1) + close(batch->in_fence_fd); + fd_ringbuffer_del(batch->draw); fd_ringbuffer_del(batch->binning); fd_ringbuffer_del(batch->gmem); + if (batch->lrz_clear) { + fd_ringbuffer_del(batch->lrz_clear); + batch->lrz_clear = NULL; + } util_dynarray_fini(&batch->draw_patches); @@ -164,9 +174,9 @@ batch_reset_resources_locked(struct fd_batch *batch) static void batch_reset_resources(struct fd_batch *batch) { - pipe_mutex_lock(batch->ctx->screen->lock); + mtx_lock(&batch->ctx->screen->lock); batch_reset_resources_locked(batch); - pipe_mutex_unlock(batch->ctx->screen->lock); + mtx_unlock(&batch->ctx->screen->lock); } static void @@ -197,9 +207,9 @@ __fd_batch_destroy(struct fd_batch *batch) util_copy_framebuffer_state(&batch->framebuffer, NULL); - pipe_mutex_lock(batch->ctx->screen->lock); + mtx_lock(&batch->ctx->screen->lock); fd_bc_invalidate_batch(batch, true); - pipe_mutex_unlock(batch->ctx->screen->lock); + mtx_unlock(&batch->ctx->screen->lock); batch_fini(batch); @@ -224,7 +234,7 @@ fd_batch_sync(struct fd_batch *batch) { if (!batch->ctx->screen->reorder) return; - util_queue_job_wait(&batch->flush_fence); + util_queue_fence_wait(&batch->flush_fence); } static void @@ -256,9 +266,9 @@ batch_flush(struct fd_batch *batch) /* close out the draw cmds by making sure any active queries are * paused: */ - fd_hw_query_set_stage(batch, batch->draw, FD_STAGE_NULL); + fd_batch_set_stage(batch, FD_STAGE_NULL); - batch->ctx->dirty = ~0; + fd_context_all_dirty(batch->ctx); batch_flush_reset_dependencies(batch, true); if (batch->ctx->screen->reorder) { @@ -266,7 +276,7 @@ batch_flush(struct fd_batch *batch) fd_batch_reference(&tmp, batch); if (!util_queue_is_initialized(&batch->ctx->flush_queue)) - util_queue_init(&batch->ctx->flush_queue, "flush_queue", 16, 1); + util_queue_init(&batch->ctx->flush_queue, "flush_queue", 16, 1, 0); util_queue_add_job(&batch->ctx->flush_queue, batch, &batch->flush_fence, @@ -281,9 +291,9 @@ batch_flush(struct fd_batch *batch) if (batch == batch->ctx->batch) { batch_reset(batch); } else { - pipe_mutex_lock(batch->ctx->screen->lock); + mtx_lock(&batch->ctx->screen->lock); fd_bc_invalidate_batch(batch, false); - pipe_mutex_unlock(batch->ctx->screen->lock); + mtx_unlock(&batch->ctx->screen->lock); } } @@ -331,9 +341,9 @@ batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) */ if (batch_depends_on(dep, batch)) { DBG("%p: flush forced on %p!", batch, dep); - pipe_mutex_unlock(batch->ctx->screen->lock); + mtx_unlock(&batch->ctx->screen->lock); fd_batch_flush(dep, false); - pipe_mutex_lock(batch->ctx->screen->lock); + mtx_lock(&batch->ctx->screen->lock); } else { struct fd_batch *other = NULL; fd_batch_reference_locked(&other, dep); @@ -401,3 +411,18 @@ fd_batch_check_size(struct fd_batch *batch) (fd_mesa_debug & FD_DBG_FLUSH)) fd_batch_flush(batch, true); } + +/* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already + * been one since last draw: + */ +void +fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) +{ + if (batch->needs_wfi) { + if (batch->ctx->screen->gpu_id >= 500) + OUT_WFI5(ring); + else + OUT_WFI(ring); + batch->needs_wfi = false; + } +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.h index aeeb9c58a..d6a818a3b 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.h @@ -68,6 +68,9 @@ struct fd_batch { unsigned seqno; unsigned idx; + int in_fence_fd; + bool needs_out_fence_fd; + struct fd_context *ctx; struct util_queue_fence flush_fence; @@ -151,6 +154,9 @@ struct fd_batch { /** tiling/gmem (IB0) cmdstream: */ struct fd_ringbuffer *gmem; + // TODO maybe more generically split out clear and clear_binning rings? + struct fd_ringbuffer *lrz_clear; + /** * hw query related state: */ @@ -261,17 +267,7 @@ fd_reset_wfi(struct fd_batch *batch) batch->needs_wfi = true; } -/* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already - * been one since last draw: - */ -static inline void -fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) -{ - if (batch->needs_wfi) { - OUT_WFI(ring); - batch->needs_wfi = false; - } -} +void fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring); /* emit a CP_EVENT_WRITE: */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c index f3d5078d1..9fea7d682 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c @@ -130,21 +130,21 @@ fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx) struct hash_entry *entry; struct fd_batch *last_batch = NULL; - pipe_mutex_lock(ctx->screen->lock); + mtx_lock(&ctx->screen->lock); hash_table_foreach(cache->ht, entry) { struct fd_batch *batch = NULL; fd_batch_reference_locked(&batch, (struct fd_batch *)entry->data); if (batch->ctx == ctx) { - pipe_mutex_unlock(ctx->screen->lock); + mtx_unlock(&ctx->screen->lock); fd_batch_reference(&last_batch, batch); fd_batch_flush(batch, false); - pipe_mutex_lock(ctx->screen->lock); + mtx_lock(&ctx->screen->lock); } fd_batch_reference_locked(&batch, NULL); } - pipe_mutex_unlock(ctx->screen->lock); + mtx_unlock(&ctx->screen->lock); if (last_batch) { fd_batch_sync(last_batch); @@ -158,14 +158,14 @@ fd_bc_invalidate_context(struct fd_context *ctx) struct fd_batch_cache *cache = &ctx->screen->batch_cache; struct fd_batch *batch; - pipe_mutex_lock(ctx->screen->lock); + mtx_lock(&ctx->screen->lock); foreach_batch(batch, cache, cache->batch_mask) { if (batch->ctx == ctx) fd_batch_reference_locked(&batch, NULL); } - pipe_mutex_unlock(ctx->screen->lock); + mtx_unlock(&ctx->screen->lock); } void @@ -207,7 +207,7 @@ fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy) struct fd_screen *screen = fd_screen(rsc->base.b.screen); struct fd_batch *batch; - pipe_mutex_lock(screen->lock); + mtx_lock(&screen->lock); if (destroy) { foreach_batch(batch, &screen->batch_cache, rsc->batch_mask) { @@ -224,7 +224,7 @@ fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy) rsc->bc_batch_mask = 0; - pipe_mutex_unlock(screen->lock); + mtx_unlock(&screen->lock); } struct fd_batch * @@ -233,7 +233,7 @@ fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx) struct fd_batch *batch; uint32_t idx; - pipe_mutex_lock(ctx->screen->lock); + mtx_lock(&ctx->screen->lock); while ((idx = ffs(~cache->batch_mask)) == 0) { #if 0 @@ -263,10 +263,10 @@ fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx) /* we can drop lock temporarily here, since we hold a ref, * flush_batch won't disappear under us. */ - pipe_mutex_unlock(ctx->screen->lock); + mtx_unlock(&ctx->screen->lock); DBG("%p: too many batches! flush forced!", flush_batch); fd_batch_flush(flush_batch, true); - pipe_mutex_lock(ctx->screen->lock); + mtx_lock(&ctx->screen->lock); /* While the resources get cleaned up automatically, the flush_batch * doesn't get removed from the dependencies of other batches, so @@ -303,7 +303,7 @@ fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx) cache->batches[idx] = batch; out: - pipe_mutex_unlock(ctx->screen->lock); + mtx_unlock(&ctx->screen->lock); return batch; } @@ -338,7 +338,7 @@ batch_from_key(struct fd_batch_cache *cache, struct key *key, if (!batch) return NULL; - pipe_mutex_lock(ctx->screen->lock); + mtx_lock(&ctx->screen->lock); _mesa_hash_table_insert_pre_hashed(cache->ht, hash, key, batch); batch->key = key; @@ -349,7 +349,7 @@ batch_from_key(struct fd_batch_cache *cache, struct key *key, rsc->bc_batch_mask = (1 << batch->idx); } - pipe_mutex_unlock(ctx->screen->lock); + mtx_unlock(&ctx->screen->lock); return batch; } diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c index 70220f88d..1cf366b0c 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c @@ -37,6 +37,7 @@ #include "freedreno_query.h" #include "freedreno_query_hw.h" #include "freedreno_util.h" +#include "util/u_upload_mgr.h" static void fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, @@ -44,14 +45,26 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, { struct fd_context *ctx = fd_context(pctx); + if (flags & PIPE_FLUSH_FENCE_FD) + ctx->batch->needs_out_fence_fd = true; + if (!ctx->screen->reorder) { fd_batch_flush(ctx->batch, true); } else { fd_bc_flush(&ctx->screen->batch_cache, ctx); } - if (fence) + if (fence) { + /* if there hasn't been any rendering submitted yet, we might not + * have actually created a fence + */ + if (!ctx->last_fence || ctx->batch->needs_out_fence_fd) { + ctx->batch->needs_flush = true; + fd_gmem_render_noop(ctx->batch); + fd_batch_reset(ctx->batch); + } fd_fence_ref(pctx->screen, fence, ctx->last_fence); + } } /** @@ -73,7 +86,10 @@ fd_emit_string_marker(struct pipe_context *pctx, const char *string, int len) /* max packet size is 0x3fff dwords: */ len = MIN2(len, 0x3fff * 4); - OUT_PKT3(ring, CP_NOP, align(len, 4) / 4); + if (ctx->screen->gpu_id >= 500) + OUT_PKT7(ring, CP_NOP, align(len, 4) / 4); + else + OUT_PKT3(ring, CP_NOP, align(len, 4) / 4); while (len >= 4) { OUT_RING(ring, *buf); buf++; @@ -96,7 +112,7 @@ fd_context_destroy(struct pipe_context *pctx) DBG(""); - if (ctx->screen->reorder) + if (ctx->screen->reorder && util_queue_is_initialized(&ctx->flush_queue)) util_queue_destroy(&ctx->flush_queue); fd_batch_reference(&ctx->batch, NULL); /* unref current batch */ @@ -105,11 +121,13 @@ fd_context_destroy(struct pipe_context *pctx) fd_fence_ref(pctx->screen, &ctx->last_fence, NULL); fd_prog_fini(pctx); - fd_hw_query_fini(pctx); if (ctx->blitter) util_blitter_destroy(ctx->blitter); + if (pctx->stream_uploader) + u_upload_destroy(pctx->stream_uploader); + if (ctx->clear_rs_state) pctx->delete_rasterizer_state(pctx, ctx->clear_rs_state); @@ -192,7 +210,7 @@ fd_context_setup_common_vbos(struct fd_context *ctx) }}); ctx->solid_vbuf_state.vertexbuf.count = 1; ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12; - ctx->solid_vbuf_state.vertexbuf.vb[0].buffer = ctx->solid_vbuf; + ctx->solid_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->solid_vbuf; /* setup blit_vbuf_state: */ ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state( @@ -207,9 +225,9 @@ fd_context_setup_common_vbos(struct fd_context *ctx) }}); ctx->blit_vbuf_state.vertexbuf.count = 2; ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8; - ctx->blit_vbuf_state.vertexbuf.vb[0].buffer = ctx->blit_texcoord_vbuf; + ctx->blit_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->blit_texcoord_vbuf; ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12; - ctx->blit_vbuf_state.vertexbuf.vb[1].buffer = ctx->solid_vbuf; + ctx->blit_vbuf_state.vertexbuf.vb[1].buffer.resource = ctx->solid_vbuf; } void @@ -251,14 +269,15 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, pctx->flush = fd_context_flush; pctx->emit_string_marker = fd_emit_string_marker; pctx->set_debug_callback = fd_set_debug_callback; + pctx->create_fence_fd = fd_create_fence_fd; + pctx->fence_server_sync = fd_fence_server_sync; - /* TODO what about compute? Ideally it creates it's own independent - * batches per compute job (since it isn't using tiling, so no point - * in getting involved with the re-ordering madness).. - */ - if (!screen->reorder) { - ctx->batch = fd_bc_alloc_batch(&screen->batch_cache, ctx); - } + pctx->stream_uploader = u_upload_create_default(pctx); + if (!pctx->stream_uploader) + goto fail; + pctx->const_uploader = pctx->stream_uploader; + + ctx->batch = fd_bc_alloc_batch(&screen->batch_cache, ctx); slab_create_child(&ctx->transfer_pool, &screen->transfer_pool); @@ -267,7 +286,6 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, fd_query_context_init(pctx); fd_texture_init(pctx); fd_state_init(pctx); - fd_hw_query_init(pctx); ctx->blitter = util_blitter_create(pctx); if (!ctx->blitter) @@ -277,6 +295,9 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, if (!ctx->primconvert) goto fail; + list_inithead(&ctx->hw_active_queries); + list_inithead(&ctx->acc_active_queries); + return pctx; fail: diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h index 4a766f5cd..4472afb83 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h @@ -72,6 +72,12 @@ struct fd_constbuf_stateobj { uint32_t dirty_mask; }; +struct fd_shaderbuf_stateobj { + struct pipe_shader_buffer sb[PIPE_MAX_SHADER_BUFFERS]; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + struct fd_vertexbuf_stateobj { struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; unsigned count; @@ -107,6 +113,43 @@ struct fd_vertex_state { struct fd_vertexbuf_stateobj vertexbuf; }; +/* global 3d pipeline dirty state: */ +enum fd_dirty_3d_state { + FD_DIRTY_BLEND = BIT(0), + FD_DIRTY_RASTERIZER = BIT(1), + FD_DIRTY_ZSA = BIT(2), + FD_DIRTY_BLEND_COLOR = BIT(3), + FD_DIRTY_STENCIL_REF = BIT(4), + FD_DIRTY_SAMPLE_MASK = BIT(5), + FD_DIRTY_FRAMEBUFFER = BIT(6), + FD_DIRTY_STIPPLE = BIT(7), + FD_DIRTY_VIEWPORT = BIT(8), + FD_DIRTY_VTXSTATE = BIT(9), + FD_DIRTY_VTXBUF = BIT(10), + + FD_DIRTY_SCISSOR = BIT(12), + FD_DIRTY_STREAMOUT = BIT(13), + FD_DIRTY_UCP = BIT(14), + FD_DIRTY_BLEND_DUAL = BIT(15), + + /* These are a bit redundent with fd_dirty_shader_state, and possibly + * should be removed. (But OTOH kinda convenient in some places) + */ + FD_DIRTY_PROG = BIT(16), + FD_DIRTY_CONST = BIT(17), + FD_DIRTY_TEX = BIT(18), + + /* only used by a2xx.. possibly can be removed.. */ + FD_DIRTY_TEXSTATE = BIT(19), +}; + +/* per shader-stage dirty state: */ +enum fd_dirty_shader_state { + FD_DIRTY_SHADER_PROG = BIT(0), + FD_DIRTY_SHADER_CONST = BIT(1), + FD_DIRTY_SHADER_TEX = BIT(2), + FD_DIRTY_SHADER_SSBO = BIT(3), +}; struct fd_context { struct pipe_context base; @@ -123,15 +166,26 @@ struct fd_context { /* slab for pipe_transfer allocations: */ struct slab_child_pool transfer_pool; + /** + * query related state: + */ + /*@{*/ /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */ struct slab_mempool sample_pool; struct slab_mempool sample_period_pool; /* sample-providers for hw queries: */ - const struct fd_hw_sample_provider *sample_providers[MAX_HW_SAMPLE_PROVIDERS]; + const struct fd_hw_sample_provider *hw_sample_providers[MAX_HW_SAMPLE_PROVIDERS]; /* list of active queries: */ - struct list_head active_queries; + struct list_head hw_active_queries; + + /* sample-providers for accumulating hw queries: */ + const struct fd_acc_sample_provider *acc_sample_providers[MAX_HW_SAMPLE_PROVIDERS]; + + /* list of active accumulating queries: */ + struct list_head acc_active_queries; + /*@}*/ /* table with PIPE_PRIM_MAX entries mapping PIPE_PRIM_x to * DI_PT_x value to use for draw initiator. There are some @@ -192,44 +246,21 @@ struct fd_context { * means we'd always have to recalc tiles ever batch) */ struct fd_gmem_stateobj gmem; - struct fd_vsc_pipe pipe[8]; + struct fd_vsc_pipe pipe[16]; struct fd_tile tile[512]; /* which state objects need to be re-emit'd: */ - enum { - FD_DIRTY_BLEND = (1 << 0), - FD_DIRTY_RASTERIZER = (1 << 1), - FD_DIRTY_ZSA = (1 << 2), - FD_DIRTY_FRAGTEX = (1 << 3), - FD_DIRTY_VERTTEX = (1 << 4), - FD_DIRTY_TEXSTATE = (1 << 5), - - FD_SHADER_DIRTY_VP = (1 << 6), - FD_SHADER_DIRTY_FP = (1 << 7), - /* skip geom/tcs/tes/compute */ - FD_DIRTY_PROG = FD_SHADER_DIRTY_FP | FD_SHADER_DIRTY_VP, - - FD_DIRTY_BLEND_COLOR = (1 << 12), - FD_DIRTY_STENCIL_REF = (1 << 13), - FD_DIRTY_SAMPLE_MASK = (1 << 14), - FD_DIRTY_FRAMEBUFFER = (1 << 15), - FD_DIRTY_STIPPLE = (1 << 16), - FD_DIRTY_VIEWPORT = (1 << 17), - FD_DIRTY_CONSTBUF = (1 << 18), - FD_DIRTY_VTXSTATE = (1 << 19), - FD_DIRTY_VTXBUF = (1 << 20), - FD_DIRTY_INDEXBUF = (1 << 21), - FD_DIRTY_SCISSOR = (1 << 22), - FD_DIRTY_STREAMOUT = (1 << 23), - FD_DIRTY_UCP = (1 << 24), - FD_DIRTY_BLEND_DUAL = (1 << 25), - } dirty; + enum fd_dirty_3d_state dirty; + /* per shader-stage dirty status: */ + enum fd_dirty_shader_state dirty_shader[PIPE_SHADER_TYPES]; + + void *compute; struct pipe_blend_state *blend; struct pipe_rasterizer_state *rasterizer; struct pipe_depth_stencil_alpha_state *zsa; - struct fd_texture_stateobj verttex, fragtex; + struct fd_texture_stateobj tex[PIPE_SHADER_TYPES]; struct fd_program_stateobj prog; @@ -241,7 +272,7 @@ struct fd_context { struct pipe_poly_stipple stipple; struct pipe_viewport_state viewport; struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; - struct pipe_index_buffer indexbuf; + struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES]; struct fd_streamout_stateobj streamout; struct pipe_clip_state ucp; @@ -257,15 +288,21 @@ struct fd_context { void (*emit_tile_mem2gmem)(struct fd_batch *batch, struct fd_tile *tile); void (*emit_tile_renderprep)(struct fd_batch *batch, struct fd_tile *tile); void (*emit_tile_gmem2mem)(struct fd_batch *batch, struct fd_tile *tile); + void (*emit_tile_fini)(struct fd_batch *batch); /* optional */ /* optional, for GMEM bypass: */ void (*emit_sysmem_prep)(struct fd_batch *batch); + void (*emit_sysmem_fini)(struct fd_batch *batch); /* draw: */ - bool (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info); - void (*clear)(struct fd_context *ctx, unsigned buffers, + bool (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info, + unsigned index_offset); + bool (*clear)(struct fd_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil); + /* compute: */ + void (*launch_grid)(struct fd_context *ctx, const struct pipe_grid_info *info); + /* constant emit: (note currently not used/needed for a2xx) */ void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type, uint32_t regid, uint32_t offset, uint32_t sizedwords, @@ -277,6 +314,13 @@ struct fd_context { /* indirect-branch emit: */ void (*emit_ib)(struct fd_ringbuffer *ring, struct fd_ringbuffer *target); + /* query: */ + struct fd_query * (*create_query)(struct fd_context *ctx, unsigned query_type); + void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles); + void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n, + struct fd_ringbuffer *ring); + void (*query_set_stage)(struct fd_batch *batch, enum fd_render_stage stage); + /* * Common pre-cooked VBO state (used for a3xx and later): */ @@ -314,13 +358,38 @@ fd_context_assert_locked(struct fd_context *ctx) static inline void fd_context_lock(struct fd_context *ctx) { - pipe_mutex_lock(ctx->screen->lock); + mtx_lock(&ctx->screen->lock); } static inline void fd_context_unlock(struct fd_context *ctx) { - pipe_mutex_unlock(ctx->screen->lock); + mtx_unlock(&ctx->screen->lock); +} + +/* mark all state dirty: */ +static inline void +fd_context_all_dirty(struct fd_context *ctx) +{ + ctx->dirty = ~0; + for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) + ctx->dirty_shader[i] = ~0; +} + +static inline void +fd_context_all_clean(struct fd_context *ctx) +{ + ctx->dirty = 0; + for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) { + /* don't mark compute state as clean, since it is not emitted + * during normal draw call. The places that call _all_dirty(), + * it is safe to mark compute state dirty as well, but the + * inverse is not true. + */ + if (i == PIPE_SHADER_COMPUTE) + continue; + ctx->dirty_shader[i] = 0; + } } static inline struct pipe_scissor_state * @@ -337,6 +406,27 @@ fd_supported_prim(struct fd_context *ctx, unsigned prim) return (1 << prim) & ctx->primtype_mask; } +static inline void +fd_batch_set_stage(struct fd_batch *batch, enum fd_render_stage stage) +{ + struct fd_context *ctx = batch->ctx; + + /* special case: internal blits (like mipmap level generation) + * go through normal draw path (via util_blitter_blit()).. but + * we need to ignore the FD_STAGE_DRAW which will be set, so we + * don't enable queries which should be paused during internal + * blits: + */ + if ((batch->stage == FD_STAGE_BLIT) && + (stage != FD_STAGE_NULL)) + return; + + if (ctx->query_set_stage) + ctx->query_set_stage(batch, stage); + + batch->stage = stage; +} + void fd_context_setup_common_vbos(struct fd_context *ctx); void fd_context_cleanup_common_vbos(struct fd_context *ctx); diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_draw.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_draw.c index cfe13cd67..f2ccfc585 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_draw.c @@ -31,11 +31,13 @@ #include "util/u_memory.h" #include "util/u_prim.h" #include "util/u_format.h" +#include "util/u_helpers.h" #include "freedreno_draw.h" #include "freedreno_context.h" #include "freedreno_state.h" #include "freedreno_resource.h" +#include "freedreno_query_acc.h" #include "freedreno_query_hw.h" #include "freedreno_util.h" @@ -64,6 +66,11 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); unsigned i, prims, buffers = 0; + if (!info->count_from_stream_output && !info->indirect && + !info->primitive_restart && + !u_trim_pipe_prim(info->mode, (unsigned*)&info->count)) + return; + /* if we supported transform feedback, we'd have to disable this: */ if (((scissor->maxx - scissor->minx) * (scissor->maxy - scissor->miny)) == 0) { @@ -78,15 +85,31 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) if (!fd_supported_prim(ctx, info->mode)) { if (ctx->streamout.num_targets > 0) debug_error("stream-out with emulated prims"); - util_primconvert_save_index_buffer(ctx->primconvert, &ctx->indexbuf); util_primconvert_save_rasterizer_state(ctx->primconvert, ctx->rasterizer); util_primconvert_draw_vbo(ctx->primconvert, info); return; } + /* Upload a user index buffer. */ + struct pipe_resource *indexbuf = NULL; + unsigned index_offset = 0; + struct pipe_draw_info new_info; + if (info->index_size) { + if (info->has_user_indices) { + if (!util_upload_index_buffer(pctx, info, &indexbuf, &index_offset)) + return; + new_info = *info; + new_info.index.resource = indexbuf; + new_info.has_user_indices = false; + info = &new_info; + } else { + indexbuf = info->index.resource; + } + } + if (ctx->in_blit) { fd_batch_reset(batch); - ctx->dirty = ~0; + fd_context_all_dirty(ctx); } batch->blit = ctx->in_blit; @@ -95,13 +118,13 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) /* NOTE: needs to be before resource_written(batch->query_buf), otherwise * query_buf may not be created yet. */ - fd_hw_query_set_stage(batch, batch->draw, FD_STAGE_DRAW); + fd_batch_set_stage(batch, FD_STAGE_DRAW); /* * Figure out the buffers/features we need: */ - pipe_mutex_lock(ctx->screen->lock); + mtx_lock(&ctx->screen->lock); if (fd_depth_enabled(ctx)) { buffers |= FD_BUFFER_DEPTH; @@ -136,6 +159,12 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) batch->gmem_reason |= FD_GMEM_BLEND_ENABLED; } + /* Mark SSBOs as being written.. we don't actually know which ones are + * read vs written, so just assume the worst + */ + foreach_bit(i, ctx->shaderbuf[PIPE_SHADER_FRAGMENT].enabled_mask) + resource_read(batch, ctx->shaderbuf[PIPE_SHADER_FRAGMENT].sb[i].buffer); + foreach_bit(i, ctx->constbuf[PIPE_SHADER_VERTEX].enabled_mask) resource_read(batch, ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer); foreach_bit(i, ctx->constbuf[PIPE_SHADER_FRAGMENT].enabled_mask) @@ -143,18 +172,18 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) /* Mark VBOs as being read */ foreach_bit(i, ctx->vtx.vertexbuf.enabled_mask) { - assert(!ctx->vtx.vertexbuf.vb[i].user_buffer); - resource_read(batch, ctx->vtx.vertexbuf.vb[i].buffer); + assert(!ctx->vtx.vertexbuf.vb[i].is_user_buffer); + resource_read(batch, ctx->vtx.vertexbuf.vb[i].buffer.resource); } /* Mark index buffer as being read */ - resource_read(batch, ctx->indexbuf.buffer); + resource_read(batch, indexbuf); /* Mark textures as being read */ - foreach_bit(i, ctx->verttex.valid_textures) - resource_read(batch, ctx->verttex.textures[i]->texture); - foreach_bit(i, ctx->fragtex.valid_textures) - resource_read(batch, ctx->fragtex.textures[i]->texture); + foreach_bit(i, ctx->tex[PIPE_SHADER_VERTEX].valid_textures) + resource_read(batch, ctx->tex[PIPE_SHADER_VERTEX].textures[i]->texture); + foreach_bit(i, ctx->tex[PIPE_SHADER_FRAGMENT].valid_textures) + resource_read(batch, ctx->tex[PIPE_SHADER_FRAGMENT].textures[i]->texture); /* Mark streamout buffers as being written.. */ for (i = 0; i < ctx->streamout.num_targets; i++) @@ -163,7 +192,10 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) resource_written(batch, batch->query_buf); - pipe_mutex_unlock(ctx->screen->lock); + list_for_each_entry(struct fd_acc_query, aq, &ctx->acc_active_queries, node) + resource_written(batch, aq->prsc); + + mtx_unlock(&ctx->screen->lock); batch->num_draws++; @@ -191,16 +223,19 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); - if (ctx->draw_vbo(ctx, info)) + if (ctx->draw_vbo(ctx, info, index_offset)) batch->needs_flush = true; for (i = 0; i < ctx->streamout.num_targets; i++) ctx->streamout.offsets[i] += info->count; if (fd_mesa_debug & FD_DBG_DDRAW) - ctx->dirty = 0xffffffff; + fd_context_all_dirty(ctx); fd_batch_check_size(batch); + + if (info == &new_info) + pipe_resource_reference(&indexbuf, NULL); } /* Generic clear implementation (partially) using u_blitter: */ @@ -259,11 +294,13 @@ fd_blitter_clear(struct pipe_context *pctx, unsigned buffers, .max_index = 1, .instance_count = 1, }; - ctx->draw_vbo(ctx, &info); + ctx->draw_vbo(ctx, &info, 0); util_blitter_restore_constant_buffer_state(blitter); util_blitter_restore_vertex_states(blitter); util_blitter_restore_fragment_states(blitter); + util_blitter_restore_textures(blitter); + util_blitter_restore_fb_state(blitter); util_blitter_restore_render_cond(blitter); util_blitter_unset_running_flag(blitter); @@ -293,7 +330,7 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, if (ctx->in_blit) { fd_batch_reset(batch); - ctx->dirty = ~0; + fd_context_all_dirty(ctx); } /* for bookkeeping about which buffers have been cleared (and thus @@ -320,7 +357,7 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, batch->resolve |= buffers; batch->needs_flush = true; - pipe_mutex_lock(ctx->screen->lock); + mtx_lock(&ctx->screen->lock); if (buffers & PIPE_CLEAR_COLOR) for (i = 0; i < pfb->nr_cbufs; i++) @@ -334,7 +371,10 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, resource_written(batch, batch->query_buf); - pipe_mutex_unlock(ctx->screen->lock); + list_for_each_entry(struct fd_acc_query, aq, &ctx->acc_active_queries, node) + resource_written(batch, aq->prsc); + + mtx_unlock(&ctx->screen->lock); DBG("%p: %x %ux%u depth=%f, stencil=%u (%s/%s)", batch, buffers, pfb->width, pfb->height, depth, stencil, @@ -344,26 +384,22 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, /* if per-gen backend doesn't implement ctx->clear() generic * blitter clear: */ - if (!ctx->clear) { - fd_blitter_clear(pctx, buffers, color, depth, stencil); - return; - } + bool fallback = true; - fd_hw_query_set_stage(batch, batch->draw, FD_STAGE_CLEAR); + if (ctx->clear) { + fd_batch_set_stage(batch, FD_STAGE_CLEAR); - ctx->clear(ctx, buffers, color, depth, stencil); + if (ctx->clear(ctx, buffers, color, depth, stencil)) { + if (fd_mesa_debug & FD_DBG_DCLEAR) + fd_context_all_dirty(ctx); - ctx->dirty |= FD_DIRTY_ZSA | - FD_DIRTY_VIEWPORT | - FD_DIRTY_RASTERIZER | - FD_DIRTY_SAMPLE_MASK | - FD_DIRTY_PROG | - FD_DIRTY_CONSTBUF | - FD_DIRTY_BLEND | - FD_DIRTY_FRAMEBUFFER; + fallback = false; + } + } - if (fd_mesa_debug & FD_DBG_DCLEAR) - ctx->dirty = 0xffffffff; + if (fallback) { + fd_blitter_clear(pctx, buffers, color, depth, stencil); + } } static void @@ -385,6 +421,43 @@ fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps, buffers, depth, stencil, x, y, w, h); } +static void +fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_batch *batch, *save_batch = NULL; + unsigned i; + + batch = fd_batch_create(ctx); + fd_batch_reference(&save_batch, ctx->batch); + fd_batch_reference(&ctx->batch, batch); + + mtx_lock(&ctx->screen->lock); + + /* Mark SSBOs as being written.. we don't actually know which ones are + * read vs written, so just assume the worst + */ + foreach_bit(i, ctx->shaderbuf[PIPE_SHADER_COMPUTE].enabled_mask) + resource_read(batch, ctx->shaderbuf[PIPE_SHADER_COMPUTE].sb[i].buffer); + + /* UBO's are read */ + foreach_bit(i, ctx->constbuf[PIPE_SHADER_COMPUTE].enabled_mask) + resource_read(batch, ctx->constbuf[PIPE_SHADER_COMPUTE].cb[i].buffer); + + /* Mark textures as being read */ + foreach_bit(i, ctx->tex[PIPE_SHADER_COMPUTE].valid_textures) + resource_read(batch, ctx->tex[PIPE_SHADER_COMPUTE].textures[i]->texture); + + mtx_unlock(&ctx->screen->lock); + + ctx->launch_grid(ctx, info); + + fd_gmem_flush_compute(batch); + + fd_batch_reference(&ctx->batch, save_batch); + fd_batch_reference(&save_batch, NULL); +} + void fd_draw_init(struct pipe_context *pctx) { @@ -392,4 +465,8 @@ fd_draw_init(struct pipe_context *pctx) pctx->clear = fd_clear; pctx->clear_render_target = fd_clear_render_target; pctx->clear_depth_stencil = fd_clear_depth_stencil; + + if (has_compute(fd_screen(pctx->screen))) { + pctx->launch_grid = fd_launch_grid; + } } diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c index a5f717169..f20c6ac12 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c @@ -26,6 +26,8 @@ * Rob Clark <robclark@freedesktop.org> */ +#include <libsync.h> + #include "util/u_inlines.h" #include "freedreno_fence.h" @@ -36,16 +38,23 @@ struct pipe_fence_handle { struct pipe_reference reference; struct fd_context *ctx; struct fd_screen *screen; + int fence_fd; uint32_t timestamp; }; -void -fd_fence_ref(struct pipe_screen *pscreen, +static void fd_fence_destroy(struct pipe_fence_handle *fence) +{ + if (fence->fence_fd != -1) + close(fence->fence_fd); + FREE(fence); +} + +void fd_fence_ref(struct pipe_screen *pscreen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *pfence) { if (pipe_reference(&(*ptr)->reference, &pfence->reference)) - FREE(*ptr); + fd_fence_destroy(*ptr); *ptr = pfence; } @@ -55,14 +64,42 @@ boolean fd_fence_finish(struct pipe_screen *pscreen, struct pipe_fence_handle *fence, uint64_t timeout) { + if (fence->fence_fd != -1) { + int ret = sync_wait(fence->fence_fd, timeout / 1000000); + return ret == 0; + } + if (fd_pipe_wait_timeout(fence->screen->pipe, fence->timestamp, timeout)) return false; return true; } +void fd_create_fence_fd(struct pipe_context *pctx, + struct pipe_fence_handle **pfence, int fd) +{ + *pfence = fd_fence_create(fd_context(pctx), 0, dup(fd)); +} + +void fd_fence_server_sync(struct pipe_context *pctx, + struct pipe_fence_handle *fence) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_batch *batch = ctx->batch; + + if (sync_accumulate("freedreno", &batch->in_fence_fd, fence->fence_fd)) { + /* error */ + } +} + +int fd_fence_get_fd(struct pipe_screen *pscreen, + struct pipe_fence_handle *fence) +{ + return dup(fence->fence_fd); +} + struct pipe_fence_handle * fd_fence_create(struct fd_context *ctx, - uint32_t timestamp) + uint32_t timestamp, int fence_fd) { struct pipe_fence_handle *fence; @@ -75,6 +112,7 @@ struct pipe_fence_handle * fd_fence_create(struct fd_context *ctx, fence->ctx = ctx; fence->screen = ctx->screen; fence->timestamp = timestamp; + fence->fence_fd = fence_fd; return fence; } diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h index 32bfacc76..1de2d0a51 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h @@ -38,9 +38,15 @@ boolean fd_fence_finish(struct pipe_screen *screen, struct pipe_context *ctx, struct pipe_fence_handle *pfence, uint64_t timeout); +void fd_create_fence_fd(struct pipe_context *pctx, + struct pipe_fence_handle **pfence, int fd); +void fd_fence_server_sync(struct pipe_context *pctx, + struct pipe_fence_handle *fence); +int fd_fence_get_fd(struct pipe_screen *pscreen, + struct pipe_fence_handle *pfence); struct fd_context; struct pipe_fence_handle * fd_fence_create(struct fd_context *ctx, - uint32_t timestamp); + uint32_t timestamp, int fence_fd); #endif /* FREEDRENO_FENCE_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c index 3b2ecbaea..03400717b 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -70,7 +70,7 @@ static uint32_t bin_width(struct fd_screen *screen) { - if (is_a4xx(screen)) + if (is_a4xx(screen) || is_a5xx(screen)) return 1024; if (is_a3xx(screen)) return 992; @@ -110,7 +110,10 @@ calculate_tiles(struct fd_batch *batch) struct fd_gmem_stateobj *gmem = &ctx->gmem; struct pipe_scissor_state *scissor = &batch->max_scissor; struct pipe_framebuffer_state *pfb = &batch->framebuffer; - uint32_t gmem_size = ctx->screen->gmemsize_bytes; + const uint32_t gmem_alignw = ctx->screen->gmem_alignw; + const uint32_t gmem_alignh = ctx->screen->gmem_alignh; + const unsigned npipes = ctx->screen->num_vsc_pipes; + const uint32_t gmem_size = ctx->screen->gmemsize_bytes; uint32_t minx, miny, width, height; uint32_t nbins_x = 1, nbins_y = 1; uint32_t bin_w, bin_h; @@ -119,7 +122,7 @@ calculate_tiles(struct fd_batch *batch) uint32_t i, j, t, xoff, yoff; uint32_t tpp_x, tpp_y; bool has_zs = !!(batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)); - int tile_n[ARRAY_SIZE(ctx->pipe)]; + int tile_n[npipes]; if (has_zs) { struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); @@ -147,21 +150,22 @@ calculate_tiles(struct fd_batch *batch) width = pfb->width; height = pfb->height; } else { - minx = scissor->minx & ~31; /* round down to multiple of 32 */ - miny = scissor->miny & ~31; + /* round down to multiple of alignment: */ + minx = scissor->minx & ~(gmem_alignw - 1); + miny = scissor->miny & ~(gmem_alignh - 1); width = scissor->maxx - minx; height = scissor->maxy - miny; } - bin_w = align(width, 32); - bin_h = align(height, 32); + bin_w = align(width, gmem_alignw); + bin_h = align(height, gmem_alignh); /* first, find a bin width that satisfies the maximum width * restrictions: */ while (bin_w > max_width) { nbins_x++; - bin_w = align(width / nbins_x, 32); + bin_w = align(width / nbins_x, gmem_alignw); } if (fd_mesa_debug & FD_DBG_MSGS) { @@ -178,10 +182,10 @@ calculate_tiles(struct fd_batch *batch) while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) { if (bin_w > bin_h) { nbins_x++; - bin_w = align(width / nbins_x, 32); + bin_w = align(width / nbins_x, gmem_alignw); } else { nbins_y++; - bin_h = align(height / nbins_y, 32); + bin_h = align(height / nbins_y, gmem_alignh); } } @@ -216,9 +220,12 @@ calculate_tiles(struct fd_batch *batch) div_round_up(nbins_x, tpp_x)) > 8) tpp_x += 1; + gmem->maxpw = tpp_x; + gmem->maxph = tpp_y; + /* configure pipes: */ xoff = yoff = 0; - for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) { + for (i = 0; i < npipes; i++) { struct fd_vsc_pipe *pipe = &ctx->pipe[i]; if (xoff >= nbins_x) { @@ -238,7 +245,7 @@ calculate_tiles(struct fd_batch *batch) xoff += tpp_x; } - for (; i < ARRAY_SIZE(ctx->pipe); i++) { + for (; i < npipes; i++) { struct fd_vsc_pipe *pipe = &ctx->pipe[i]; pipe->x = pipe->y = pipe->w = pipe->h = 0; } @@ -329,7 +336,8 @@ render_tiles(struct fd_batch *batch) ctx->emit_tile_renderprep(batch, tile); - fd_hw_query_prepare_tile(batch, i, batch->gmem); + if (ctx->query_prepare_tile) + ctx->query_prepare_tile(batch, i, batch->gmem); /* emit IB to drawcmds: */ ctx->emit_ib(batch->gmem, batch->draw); @@ -338,6 +346,9 @@ render_tiles(struct fd_batch *batch) /* emit gmem2mem to transfer tile back to system memory: */ ctx->emit_tile_gmem2mem(batch, tile); } + + if (ctx->emit_tile_fini) + ctx->emit_tile_fini(batch); } static void @@ -347,11 +358,29 @@ render_sysmem(struct fd_batch *batch) ctx->emit_sysmem_prep(batch); - fd_hw_query_prepare_tile(batch, 0, batch->gmem); + if (ctx->query_prepare_tile) + ctx->query_prepare_tile(batch, 0, batch->gmem); /* emit IB to drawcmds: */ ctx->emit_ib(batch->gmem, batch->draw); fd_reset_wfi(batch); + + if (ctx->emit_sysmem_fini) + ctx->emit_sysmem_fini(batch); +} + +static void +flush_ring(struct fd_batch *batch) +{ + struct fd_context *ctx = batch->ctx; + int out_fence_fd = -1; + + fd_ringbuffer_flush2(batch->gmem, batch->in_fence_fd, + batch->needs_out_fence_fd ? &out_fence_fd : NULL); + + fd_fence_ref(&ctx->screen->base, &ctx->last_fence, NULL); + ctx->last_fence = fd_fence_create(ctx, + fd_ringbuffer_timestamp(batch->gmem), out_fence_fd); } void @@ -379,7 +408,8 @@ fd_gmem_render_tiles(struct fd_batch *batch) batch, pfb->width, pfb->height, util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); - fd_hw_query_prepare(batch, 1); + if (ctx->query_prepare) + ctx->query_prepare(batch, 1); render_sysmem(batch); ctx->stats.batch_sysmem++; } else { @@ -389,15 +419,35 @@ fd_gmem_render_tiles(struct fd_batch *batch) batch, pfb->width, pfb->height, gmem->nbins_x, gmem->nbins_y, util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); - fd_hw_query_prepare(batch, gmem->nbins_x * gmem->nbins_y); + if (ctx->query_prepare) + ctx->query_prepare(batch, gmem->nbins_x * gmem->nbins_y); render_tiles(batch); ctx->stats.batch_gmem++; } - fd_ringbuffer_flush(batch->gmem); + flush_ring(batch); +} - fd_fence_ref(&ctx->screen->base, &ctx->last_fence, NULL); - ctx->last_fence = fd_fence_create(ctx, fd_ringbuffer_timestamp(batch->gmem)); +/* special case for when we need to create a fence but have no rendering + * to flush.. just emit a no-op string-marker packet. + */ +void +fd_gmem_render_noop(struct fd_batch *batch) +{ + struct fd_context *ctx = batch->ctx; + struct pipe_context *pctx = &ctx->base; + + pctx->emit_string_marker(pctx, "noop", 4); + /* emit IB to drawcmds (which contain the string marker): */ + ctx->emit_ib(batch->gmem, batch->draw); + flush_ring(batch); +} + +void +fd_gmem_flush_compute(struct fd_batch *batch) +{ + render_sysmem(batch); + flush_ring(batch); } /* tile needs restore if it isn't completely contained within the diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.h index 116423a40..f5276ce44 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.h @@ -57,11 +57,14 @@ struct fd_gmem_stateobj { uint16_t bin_w, nbins_x; uint16_t minx, miny; uint16_t width, height; + uint16_t maxpw, maxph; /* maximum pipe width/height */ }; struct fd_batch; void fd_gmem_render_tiles(struct fd_batch *batch); +void fd_gmem_render_noop(struct fd_batch *batch); +void fd_gmem_flush_compute(struct fd_batch *batch); bool fd_gmem_needs_restore(struct fd_batch *batch, struct fd_tile *tile, uint32_t buffers); diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query.c index 18e0c793c..0d7bc9f20 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query.c @@ -46,8 +46,8 @@ fd_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index) struct fd_query *q; q = fd_sw_create_query(ctx, query_type); - if (!q) - q = fd_hw_create_query(ctx, query_type); + if (!q && ctx->create_query) + q = ctx->create_query(ctx, query_type); return (struct pipe_query *) q; } @@ -63,14 +63,34 @@ static boolean fd_begin_query(struct pipe_context *pctx, struct pipe_query *pq) { struct fd_query *q = fd_query(pq); - return q->funcs->begin_query(fd_context(pctx), q); + boolean ret; + + if (q->active) + return false; + + ret = q->funcs->begin_query(fd_context(pctx), q); + q->active = ret; + + return ret; } static bool fd_end_query(struct pipe_context *pctx, struct pipe_query *pq) { struct fd_query *q = fd_query(pq); + + /* there are a couple special cases, which don't have + * a matching ->begin_query(): + */ + if (skip_begin_query(q->type) && !q->active) + fd_begin_query(pctx, pq); + + if (!q->active) + return false; + q->funcs->end_query(fd_context(pctx), q); + q->active = false; + return true; } @@ -79,12 +99,18 @@ fd_get_query_result(struct pipe_context *pctx, struct pipe_query *pq, boolean wait, union pipe_query_result *result) { struct fd_query *q = fd_query(pq); + + if (q->active) + return false; + + util_query_clear_result(result, q->type); + return q->funcs->get_query_result(fd_context(pctx), q, wait, result); } static void fd_render_condition(struct pipe_context *pctx, struct pipe_query *pq, - boolean condition, uint mode) + boolean condition, enum pipe_render_cond_flag mode) { struct fd_context *ctx = fd_context(pctx); ctx->cond_query = pq; diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.c index 5a21ca44e..5aa90ced6 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.c @@ -51,39 +51,46 @@ static void fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc) { - int i; - /* Go through the entire state and see if the resource is bound * anywhere. If it is, mark the relevant state as dirty. This is called on * realloc_bo. */ - /* Constbufs */ - for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS && !(ctx->dirty & FD_DIRTY_CONSTBUF); i++) { - if (ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer == prsc) - ctx->dirty |= FD_DIRTY_CONSTBUF; - if (ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer == prsc) - ctx->dirty |= FD_DIRTY_CONSTBUF; - } - /* VBOs */ - for (i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) { - if (ctx->vtx.vertexbuf.vb[i].buffer == prsc) + for (unsigned i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) { + if (ctx->vtx.vertexbuf.vb[i].buffer.resource == prsc) ctx->dirty |= FD_DIRTY_VTXBUF; } - /* Index buffer */ - if (ctx->indexbuf.buffer == prsc) - ctx->dirty |= FD_DIRTY_INDEXBUF; + /* per-shader-stage resources: */ + for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) { + /* Constbufs.. note that constbuf[0] is normal uniforms emitted in + * cmdstream rather than by pointer.. + */ + const unsigned num_ubos = util_last_bit(ctx->constbuf[stage].enabled_mask); + for (unsigned i = 1; i < num_ubos; i++) { + if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_CONST) + break; + if (ctx->constbuf[stage].cb[i].buffer == prsc) + ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_CONST; + } - /* Textures */ - for (i = 0; i < ctx->verttex.num_textures && !(ctx->dirty & FD_DIRTY_VERTTEX); i++) { - if (ctx->verttex.textures[i] && (ctx->verttex.textures[i]->texture == prsc)) - ctx->dirty |= FD_DIRTY_VERTTEX; - } - for (i = 0; i < ctx->fragtex.num_textures && !(ctx->dirty & FD_DIRTY_FRAGTEX); i++) { - if (ctx->fragtex.textures[i] && (ctx->fragtex.textures[i]->texture == prsc)) - ctx->dirty |= FD_DIRTY_FRAGTEX; + /* Textures */ + for (unsigned i = 0; i < ctx->tex[stage].num_textures; i++) { + if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_TEX) + break; + if (ctx->tex[stage].textures[i] && (ctx->tex[stage].textures[i]->texture == prsc)) + ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_TEX; + } + + /* SSBOs */ + const unsigned num_ssbos = util_last_bit(ctx->shaderbuf[stage].enabled_mask); + for (unsigned i = 0; i < num_ssbos; i++) { + if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_SSBO) + break; + if (ctx->shaderbuf[stage].sb[i].buffer == prsc) + ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_SSBO; + } } } @@ -102,7 +109,6 @@ realloc_bo(struct fd_resource *rsc, uint32_t size) fd_bo_del(rsc->bo); rsc->bo = fd_bo_new(screen->dev, size, flags); - rsc->timestamp = 0; util_range_set_empty(&rsc->valid_buffer_range); fd_bc_invalidate_resource(rsc, true); } @@ -179,7 +185,7 @@ fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc, */ fd_bc_invalidate_resource(rsc, false); - pipe_mutex_lock(ctx->screen->lock); + mtx_lock(&ctx->screen->lock); /* Swap the backing bo's, so shadow becomes the old buffer, * blit from shadow to new buffer. From here on out, we @@ -196,7 +202,6 @@ fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc, /* TODO valid_buffer_range?? */ swap(rsc->bo, shadow->bo); - swap(rsc->timestamp, shadow->timestamp); swap(rsc->write_batch, shadow->write_batch); /* at this point, the newly created shadow buffer is not referenced @@ -212,7 +217,7 @@ fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc, } swap(rsc->batch_mask, shadow->batch_mask); - pipe_mutex_unlock(ctx->screen->lock); + mtx_unlock(&ctx->screen->lock); struct pipe_blit_info blit = {0}; blit.dst.resource = prsc; @@ -699,7 +704,9 @@ static uint32_t setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format) { struct pipe_resource *prsc = &rsc->base.b; + struct fd_screen *screen = fd_screen(prsc->screen); enum util_format_layout layout = util_format_description(format)->layout; + uint32_t pitchalign = screen->gmem_alignw; uint32_t level, size = 0; uint32_t width = prsc->width0; uint32_t height = prsc->height0; @@ -709,15 +716,18 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format forma */ uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size; + if (is_a5xx(screen) && (rsc->base.b.target >= PIPE_TEXTURE_2D)) + height = align(height, screen->gmem_alignh); + for (level = 0; level <= prsc->last_level; level++) { struct fd_resource_slice *slice = fd_resource_slice(rsc, level); uint32_t blocks; if (layout == UTIL_FORMAT_LAYOUT_ASTC) slice->pitch = width = - util_align_npot(width, 32 * util_format_get_blockwidth(format)); + util_align_npot(width, pitchalign * util_format_get_blockwidth(format)); else - slice->pitch = width = align(width, 32); + slice->pitch = width = align(width, pitchalign); slice->offset = size; blocks = util_format_get_nblocks(format, width, height); /* 1d array and 2d array textures must all have the same layer size @@ -775,6 +785,25 @@ fd_resource_resize(struct pipe_resource *prsc, uint32_t sz) realloc_bo(rsc, setup_slices(rsc, 1, prsc->format)); } +// TODO common helper? +static bool +has_depth(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z32_UNORM: + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return true; + default: + return false; + } +} + /** * Create a new texture object, using the given template info. */ @@ -782,6 +811,7 @@ static struct pipe_resource * fd_resource_create(struct pipe_screen *pscreen, const struct pipe_resource *tmpl) { + struct fd_screen *screen = fd_screen(pscreen); struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); struct pipe_resource *prsc = &rsc->base.b; enum pipe_format format = tmpl->format; @@ -809,7 +839,7 @@ fd_resource_create(struct pipe_screen *pscreen, if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) format = PIPE_FORMAT_Z32_FLOAT; - else if (fd_screen(pscreen)->gpu_id < 400 && + else if (screen->gpu_id < 400 && util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) format = PIPE_FORMAT_R8G8B8A8_UNORM; rsc->internal_format = format; @@ -817,8 +847,24 @@ fd_resource_create(struct pipe_screen *pscreen, assert(rsc->cpp); + // XXX probably need some extra work if we hit rsc shadowing path w/ lrz.. + if (is_a5xx(screen) && (fd_mesa_debug & FD_DBG_LRZ) && has_depth(format)) { + const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE | + DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */ + unsigned lrz_pitch = align(DIV_ROUND_UP(tmpl->width0, 8), 32); + unsigned lrz_height = DIV_ROUND_UP(tmpl->height0, 8); + unsigned size = lrz_pitch * lrz_height * 2; + + size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */ + + rsc->lrz_height = lrz_height; + rsc->lrz_width = lrz_pitch; + rsc->lrz_pitch = lrz_pitch; + rsc->lrz = fd_bo_new(screen->dev, size, flags); + } + alignment = slice_alignment(pscreen, tmpl); - if (is_a4xx(fd_screen(pscreen))) { + if (is_a4xx(screen) || is_a5xx(screen)) { switch (tmpl->target) { case PIPE_TEXTURE_3D: rsc->layer_first = false; @@ -882,6 +928,7 @@ fd_resource_from_handle(struct pipe_screen *pscreen, struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); struct fd_resource_slice *slice = &rsc->slices[0]; struct pipe_resource *prsc = &rsc->base.b; + uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw; DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, " "nr_samples=%u, usage=%u, bind=%x, flags=%x", @@ -909,8 +956,10 @@ fd_resource_from_handle(struct pipe_screen *pscreen, rsc->cpp = util_format_get_blocksize(tmpl->format); slice->pitch = handle->stride / rsc->cpp; slice->offset = handle->offset; + slice->size0 = handle->stride * prsc->height0; - if ((slice->pitch < align(prsc->width0, 32)) || (slice->pitch % 32)) + if ((slice->pitch < align(prsc->width0, pitchalign)) || + (slice->pitch & (pitchalign - 1))) goto fail; assert(rsc->cpp); @@ -1075,16 +1124,17 @@ fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard, util_blitter_save_framebuffer(ctx->blitter, ctx->batch ? &ctx->batch->framebuffer : NULL); util_blitter_save_fragment_sampler_states(ctx->blitter, - ctx->fragtex.num_samplers, - (void **)ctx->fragtex.samplers); + ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers, + (void **)ctx->tex[PIPE_SHADER_FRAGMENT].samplers); util_blitter_save_fragment_sampler_views(ctx->blitter, - ctx->fragtex.num_textures, ctx->fragtex.textures); + ctx->tex[PIPE_SHADER_FRAGMENT].num_textures, + ctx->tex[PIPE_SHADER_FRAGMENT].textures); if (!render_cond) util_blitter_save_render_condition(ctx->blitter, ctx->cond_query, ctx->cond_cond, ctx->cond_mode); if (ctx->batch) - fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, stage); + fd_batch_set_stage(ctx->batch, stage); ctx->in_blit = discard; } @@ -1093,7 +1143,7 @@ void fd_blitter_pipe_end(struct fd_context *ctx) { if (ctx->batch) - fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_NULL); + fd_batch_set_stage(ctx->batch, FD_STAGE_NULL); ctx->in_blit = false; } @@ -1124,7 +1174,7 @@ fd_resource_context_init(struct pipe_context *pctx) pctx->transfer_flush_region = u_transfer_flush_region_vtbl; pctx->transfer_unmap = u_transfer_unmap_vtbl; pctx->buffer_subdata = u_default_buffer_subdata; - pctx->texture_subdata = u_default_texture_subdata; + pctx->texture_subdata = u_default_texture_subdata; pctx->create_surface = fd_create_surface; pctx->surface_destroy = fd_surface_destroy; pctx->resource_copy_region = fd_resource_copy_region; diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c index cc75c509a..a915d65ee 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c @@ -53,6 +53,7 @@ #include "a2xx/fd2_screen.h" #include "a3xx/fd3_screen.h" #include "a4xx/fd4_screen.h" +#include "a5xx/fd5_screen.h" #include "ir3/ir3_nir.h" @@ -74,9 +75,10 @@ static const struct debug_named_value debug_options[] = { {"shaderdb", FD_DBG_SHADERDB, "Enable shaderdb output"}, {"flush", FD_DBG_FLUSH, "Force flush after every draw"}, {"deqp", FD_DBG_DEQP, "Enable dEQP hacks"}, - {"nir", FD_DBG_NIR, "Prefer NIR as native IR"}, - {"reorder", FD_DBG_REORDER,"Enable reordering for draws/blits"}, + {"inorder", FD_DBG_INORDER,"Disable reordering for draws/blits"}, {"bstat", FD_DBG_BSTAT, "Print batch stats at context destroy"}, + {"nogrow", FD_DBG_NOGROW, "Disable \"growable\" cmdstream buffers, even if kernel supports it"}, + {"lrz", FD_DBG_LRZ, "Enable experimental LRZ support (a5xx+)"}, DEBUG_NAMED_VALUE_END }; @@ -140,7 +142,9 @@ fd_screen_destroy(struct pipe_screen *pscreen) slab_destroy_parent(&screen->transfer_pool); - pipe_mutex_destroy(screen->lock); + mtx_destroy(&screen->lock); + + ralloc_free(screen->compiler); free(screen); } @@ -175,21 +179,25 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: - case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_STRING_MARKER: case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: return 1; + case PIPE_CAP_VERTEXID_NOBASE: + return is_a3xx(screen) || is_a4xx(screen); + case PIPE_CAP_USER_CONSTANT_BUFFERS: return is_a4xx(screen) ? 0 : 1; + case PIPE_CAP_COMPUTE: + return has_compute(screen); + case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: case PIPE_CAP_TEXTURE_MULTISAMPLE: case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_COMPUTE: case PIPE_CAP_QUERY_MEMORY_INFO: case PIPE_CAP_PCI_GROUP: case PIPE_CAP_PCI_BUS: @@ -209,15 +217,21 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: - case PIPE_CAP_DEPTH_CLIP_DISABLE: case PIPE_CAP_CLIP_HALFZ: + return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen); + + case PIPE_CAP_DEPTH_CLIP_DISABLE: return is_a3xx(screen) || is_a4xx(screen); + case PIPE_CAP_POLYGON_OFFSET_CLAMP: + return is_a5xx(screen); + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: return 0; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: if (is_a3xx(screen)) return 16; if (is_a4xx(screen)) return 32; + if (is_a5xx(screen)) return 32; return 0; case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: /* We could possibly emulate more by pretending 2d/rect textures and @@ -225,6 +239,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) */ if (is_a3xx(screen)) return 8192; if (is_a4xx(screen)) return 16384; + if (is_a5xx(screen)) return 16384; return 0; case PIPE_CAP_TEXTURE_FLOAT_LINEAR: @@ -232,7 +247,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_START_INSTANCE: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_TEXTURE_QUERY_LOD: - return is_a4xx(screen); + return is_a4xx(screen) || is_a5xx(screen); case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 64; @@ -242,12 +257,16 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 120; return is_ir3(screen) ? 140 : 120; + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + if (is_a5xx(screen)) + return 4; + return 0; + /* Unsupported features. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: case PIPE_CAP_USER_VERTEX_BUFFERS: - case PIPE_CAP_USER_INDEX_BUFFERS: case PIPE_CAP_QUERY_PIPELINE_STATISTICS: case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: @@ -260,7 +279,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MULTI_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: - case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: @@ -275,7 +293,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_PACK_HALF_FLOAT: case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: - case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: @@ -288,12 +305,29 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: case PIPE_CAP_TGSI_ARRAY_COMPONENTS: + case PIPE_CAP_TGSI_CAN_READ_OUTPUTS: + case PIPE_CAP_TGSI_FS_FBFETCH: + case PIPE_CAP_TGSI_MUL_ZERO_WINS: + case PIPE_CAP_DOUBLES: + case PIPE_CAP_INT64: + case PIPE_CAP_INT64_DIVMOD: + case PIPE_CAP_TGSI_TEX_TXF_LZ: + case PIPE_CAP_TGSI_CLOCK: + case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: + case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: + case PIPE_CAP_TGSI_BALLOT: + case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: + case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: + case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: + case PIPE_CAP_POST_DEPTH_COVERAGE: + case PIPE_CAP_BINDLESS_TEXTURE: return 0; case PIPE_CAP_MAX_VIEWPORTS: return 1; case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: /* manage the variants for these ourself, to avoid breaking precompile: */ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: @@ -307,6 +341,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return PIPE_MAX_SO_BUFFERS; return 0; case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: if (is_ir3(screen)) return 1; return 0; @@ -333,7 +368,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 11; case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: - return (is_a3xx(screen) || is_a4xx(screen)) ? 256 : 0; + return (is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen)) ? 256 : 0; /* Render targets. */ case PIPE_CAP_MAX_RENDER_TARGETS: @@ -345,11 +380,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_QUERY_BUFFER_OBJECT: return 0; case PIPE_CAP_OCCLUSION_QUERY: - return is_a3xx(screen) || is_a4xx(screen); + return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen); case PIPE_CAP_QUERY_TIMESTAMP: case PIPE_CAP_QUERY_TIME_ELAPSED: /* only a4xx, requires new enough kernel so we know max_freq: */ - return (screen->max_freq > 0) && is_a4xx(screen); + return (screen->max_freq > 0) && (is_a4xx(screen) || is_a5xx(screen)); case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MIN_TEXEL_OFFSET: @@ -376,6 +411,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 10; case PIPE_CAP_UMA: return 1; + case PIPE_CAP_NATIVE_FENCE_FD: + return fd_device_version(screen->dev) >= FD_VERSION_FENCE_FD; } debug_printf("unknown param %d\n", param); return 0; @@ -415,7 +452,8 @@ fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) } static int -fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, +fd_screen_get_shader_param(struct pipe_screen *pscreen, + enum pipe_shader_type shader, enum pipe_shader_cap param) { struct fd_screen *screen = fd_screen(pscreen); @@ -426,6 +464,9 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_VERTEX: break; case PIPE_SHADER_COMPUTE: + if (has_compute(screen)) + break; + return 0; case PIPE_SHADER_GEOMETRY: /* maye we could emulate.. */ return 0; @@ -453,11 +494,9 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, * split between VS and FS. Use lower limit of 256 to * avoid getting into impossible situations: */ - return ((is_a3xx(screen) || is_a4xx(screen)) ? 4096 : 64) * sizeof(float[4]); + return ((is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen)) ? 4096 : 64) * sizeof(float[4]); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return is_ir3(screen) ? 16 : 1; - case PIPE_SHADER_CAP_MAX_PREDS: - return 0; /* nothing uses this */ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 1; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: @@ -473,7 +512,6 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, /* a2xx compiler doesn't handle indirect: */ return is_ir3(screen) ? 1 : 0; case PIPE_SHADER_CAP_SUBROUTINES: - case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: @@ -489,21 +527,142 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: return 16; case PIPE_SHADER_CAP_PREFERRED_IR: - if ((fd_mesa_debug & FD_DBG_NIR) && is_ir3(screen)) + if (is_ir3(screen)) return PIPE_SHADER_IR_NIR; return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_SUPPORTED_IRS: + if (is_ir3(screen)) { + return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI); + } else { + return (1 << PIPE_SHADER_IR_TGSI); + } return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; + case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: + case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: + return 0; case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + if (is_a5xx(screen)) { + /* a5xx (and a4xx for that matter) has one state-block + * for compute-shader SSBO's and another that is shared + * by VS/HS/DS/GS/FS.. so to simplify things for now + * just advertise SSBOs for FS and CS. We could possibly + * do what blob does, and partition the space for + * VS/HS/DS/GS/FS. The blob advertises: + * + * GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS: 4 + * GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS: 4 + * GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS: 4 + * GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS: 4 + * GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS: 4 + * GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS: 24 + * GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS: 24 + * + * I think that way we could avoid having to patch shaders + * for actual SSBO indexes by using a static partitioning. + */ + switch(shader) + { + case PIPE_SHADER_FRAGMENT: + case PIPE_SHADER_COMPUTE: + return 24; + default: + return 0; + } + } + return 0; case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + /* probably should be same as MAX_SHADRER_BUFFERS but not implemented yet */ return 0; } debug_printf("unknown shader param %d\n", param); return 0; } +/* TODO depending on how much the limits differ for a3xx/a4xx, maybe move this + * into per-generation backend? + */ +static int +fd_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type, + enum pipe_compute_cap param, void *ret) +{ + struct fd_screen *screen = fd_screen(pscreen); + const char * const ir = "ir3"; + + if (!has_compute(screen)) + return 0; + + switch (param) { + case PIPE_COMPUTE_CAP_ADDRESS_BITS: + if (ret) { + uint32_t *address_bits = ret; + address_bits[0] = 32; + + if (is_a5xx(screen)) + address_bits[0] = 64; + } + return 1 * sizeof(uint32_t); + + case PIPE_COMPUTE_CAP_IR_TARGET: + if (ret) + sprintf(ret, ir); + return strlen(ir) * sizeof(char); + + case PIPE_COMPUTE_CAP_GRID_DIMENSION: + if (ret) { + uint64_t *grid_dimension = ret; + grid_dimension[0] = 3; + } + return 1 * sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: + if (ret) { + uint64_t *grid_size = ret; + grid_size[0] = 65535; + grid_size[1] = 65535; + grid_size[2] = 65535; + } + return 3 * sizeof(uint64_t) ; + + case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: + if (ret) { + uint64_t *grid_size = ret; + grid_size[0] = 1024; + grid_size[1] = 1024; + grid_size[2] = 64; + } + return 3 * sizeof(uint64_t) ; + + case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: + if (ret) { + uint64_t *max_threads_per_block = ret; + *max_threads_per_block = 1024; + } + return sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: + case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: + case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: + case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: + break; + case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: + if (ret) { + uint64_t *max = ret; + *max = 32768; + } + return sizeof(uint64_t); + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: + break; + } + + return 0; +} + static const void * fd_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir, unsigned shader) @@ -511,7 +670,7 @@ fd_get_compiler_options(struct pipe_screen *pscreen, struct fd_screen *screen = fd_screen(pscreen); if (is_ir3(screen)) - return ir3_get_compiler_options(); + return ir3_get_compiler_options(screen->compiler); return NULL; } @@ -664,27 +823,41 @@ fd_screen_create(struct fd_device *dev) case 430: fd4_screen_init(pscreen); break; + case 530: + fd5_screen_init(pscreen); + break; default: debug_printf("unsupported GPU: a%03d\n", screen->gpu_id); goto fail; } + if (screen->gpu_id >= 500) { + screen->gmem_alignw = 64; + screen->gmem_alignh = 32; + screen->num_vsc_pipes = 16; + } else { + screen->gmem_alignw = 32; + screen->gmem_alignh = 32; + screen->num_vsc_pipes = 8; + } + /* NOTE: don't enable reordering on a2xx, since completely untested. * Also, don't enable if we have too old of a kernel to support * growable cmdstream buffers, since memory requirement for cmdstream * buffers would be too much otherwise. */ if ((screen->gpu_id >= 300) && (fd_device_version(dev) >= FD_VERSION_UNLIMITED_CMDS)) - screen->reorder = !!(fd_mesa_debug & FD_DBG_REORDER); + screen->reorder = !(fd_mesa_debug & FD_DBG_INORDER); fd_bc_init(&screen->batch_cache); - pipe_mutex_init(screen->lock); + (void) mtx_init(&screen->lock, mtx_plain); pscreen->destroy = fd_screen_destroy; pscreen->get_param = fd_screen_get_param; pscreen->get_paramf = fd_screen_get_paramf; pscreen->get_shader_param = fd_screen_get_shader_param; + pscreen->get_compute_param = fd_get_compute_param; pscreen->get_compiler_options = fd_get_compiler_options; fd_resource_screen_init(pscreen); @@ -698,6 +871,7 @@ fd_screen_create(struct fd_device *dev) pscreen->fence_reference = fd_fence_ref; pscreen->fence_finish = fd_fence_finish; + pscreen->fence_get_fd = fd_fence_get_fd; slab_create_parent(&screen->transfer_pool, sizeof(struct fd_transfer), 16); diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.h index db9050e71..c5018da4b 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.h @@ -38,13 +38,14 @@ #include "os/os_thread.h" #include "freedreno_batch_cache.h" +#include "freedreno_util.h" struct fd_bo; struct fd_screen { struct pipe_screen base; - pipe_mutex lock; + mtx_t lock; /* it would be tempting to use pipe_reference here, but that * really doesn't work well if it isn't the first member of @@ -64,6 +65,8 @@ struct fd_screen { uint32_t chip_id; /* coreid:8 majorrev:8 minorrev:8 patch:8 */ uint32_t max_freq; uint32_t max_rts; /* max # of render targets */ + uint32_t gmem_alignw, gmem_alignh; + uint32_t num_vsc_pipes; bool has_timestamp; void *compiler; /* currently unused for a2xx */ @@ -113,11 +116,23 @@ is_a4xx(struct fd_screen *screen) return (screen->gpu_id >= 400) && (screen->gpu_id < 500); } +static inline boolean +is_a5xx(struct fd_screen *screen) +{ + return (screen->gpu_id >= 500) && (screen->gpu_id < 600); +} + /* is it using the ir3 compiler (shader isa introduced with a3xx)? */ static inline boolean is_ir3(struct fd_screen *screen) { - return is_a3xx(screen) || is_a4xx(screen); + return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen); +} + +static inline bool +has_compute(struct fd_screen *screen) +{ + return is_a5xx(screen); } #endif /* FREEDRENO_SCREEN_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_state.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_state.c index 8c9040545..012e2b3e9 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_state.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_state.c @@ -89,7 +89,8 @@ fd_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) * index>0 will be UBO's.. well, I'll worry about that later */ static void -fd_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index, +fd_set_constant_buffer(struct pipe_context *pctx, + enum pipe_shader_type shader, uint index, const struct pipe_constant_buffer *cb) { struct fd_context *ctx = fd_context(pctx); @@ -108,7 +109,56 @@ fd_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index, so->enabled_mask |= 1 << index; so->dirty_mask |= 1 << index; - ctx->dirty |= FD_DIRTY_CONSTBUF; + ctx->dirty_shader[shader] |= FD_DIRTY_SHADER_CONST; + ctx->dirty |= FD_DIRTY_CONST; +} + +static void +fd_set_shader_buffers(struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start, unsigned count, + const struct pipe_shader_buffer *buffers) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_shaderbuf_stateobj *so = &ctx->shaderbuf[shader]; + unsigned mask = 0; + + if (buffers) { + for (unsigned i = 0; i < count; i++) { + unsigned n = i + start; + struct pipe_shader_buffer *buf = &so->sb[n]; + + if ((buf->buffer == buffers[i].buffer) && + (buf->buffer_offset == buffers[i].buffer_offset) && + (buf->buffer_size == buffers[i].buffer_size)) + continue; + + mask |= BIT(n); + + buf->buffer_offset = buffers[i].buffer_offset; + buf->buffer_size = buffers[i].buffer_size; + pipe_resource_reference(&buf->buffer, buffers[i].buffer); + + if (buf->buffer) + so->enabled_mask |= BIT(n); + else + so->enabled_mask &= ~BIT(n); + } + } else { + mask = (BIT(count) - 1) << start; + + for (unsigned i = 0; i < count; i++) { + unsigned n = i + start; + struct pipe_shader_buffer *buf = &so->sb[n]; + + pipe_resource_reference(&buf->buffer, NULL); + } + + so->enabled_mask &= ~mask; + } + + so->dirty_mask |= mask; + ctx->dirty_shader[shader] |= FD_DIRTY_SHADER_SSBO; } static void @@ -124,13 +174,13 @@ fd_set_framebuffer_state(struct pipe_context *pctx, fd_batch_reference(&old_batch, ctx->batch); if (likely(old_batch)) - fd_hw_query_set_stage(old_batch, old_batch->draw, FD_STAGE_NULL); + fd_batch_set_stage(old_batch, FD_STAGE_NULL); batch = fd_batch_from_fb(&ctx->screen->batch_cache, ctx, framebuffer); fd_batch_reference(&ctx->batch, NULL); fd_reset_wfi(batch); ctx->batch = batch; - ctx->dirty = ~0; + fd_context_all_dirty(ctx); if (old_batch && old_batch->blit && !old_batch->back_blit) { /* for blits, there is not really much point in hanging on @@ -209,8 +259,8 @@ fd_set_vertex_buffers(struct pipe_context *pctx, */ if (ctx->screen->gpu_id < 300) { for (i = 0; i < count; i++) { - bool new_enabled = vb && (vb[i].buffer || vb[i].user_buffer); - bool old_enabled = so->vb[i].buffer || so->vb[i].user_buffer; + bool new_enabled = vb && vb[i].buffer.resource; + bool old_enabled = so->vb[i].buffer.resource != NULL; uint32_t new_stride = vb ? vb[i].stride : 0; uint32_t old_stride = so->vb[i].stride; if ((new_enabled != old_enabled) || (new_stride != old_stride)) { @@ -227,24 +277,6 @@ fd_set_vertex_buffers(struct pipe_context *pctx, } static void -fd_set_index_buffer(struct pipe_context *pctx, - const struct pipe_index_buffer *ib) -{ - struct fd_context *ctx = fd_context(pctx); - - if (ib) { - pipe_resource_reference(&ctx->indexbuf.buffer, ib->buffer); - ctx->indexbuf.index_size = ib->index_size; - ctx->indexbuf.offset = ib->offset; - ctx->indexbuf.user_buffer = ib->user_buffer; - } else { - pipe_resource_reference(&ctx->indexbuf.buffer, NULL); - } - - ctx->dirty |= FD_DIRTY_INDEXBUF; -} - -static void fd_blend_state_bind(struct pipe_context *pctx, void *hwcso) { struct fd_context *ctx = fd_context(pctx); @@ -401,6 +433,32 @@ fd_set_stream_output_targets(struct pipe_context *pctx, ctx->dirty |= FD_DIRTY_STREAMOUT; } +static void +fd_bind_compute_state(struct pipe_context *pctx, void *state) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->compute = state; + ctx->dirty_shader[PIPE_SHADER_COMPUTE] |= FD_DIRTY_SHADER_PROG; +} + +static void +fd_set_compute_resources(struct pipe_context *pctx, + unsigned start, unsigned count, struct pipe_surface **prscs) +{ + // TODO +} + +static void +fd_set_global_binding(struct pipe_context *pctx, + unsigned first, unsigned count, struct pipe_resource **prscs, + uint32_t **handles) +{ + /* TODO only used by clover.. seems to need us to return the actual + * gpuaddr of the buffer.. which isn't really exposed to mesa atm. + * How is this used? + */ +} + void fd_state_init(struct pipe_context *pctx) { @@ -409,13 +467,13 @@ fd_state_init(struct pipe_context *pctx) pctx->set_clip_state = fd_set_clip_state; pctx->set_sample_mask = fd_set_sample_mask; pctx->set_constant_buffer = fd_set_constant_buffer; + pctx->set_shader_buffers = fd_set_shader_buffers; pctx->set_framebuffer_state = fd_set_framebuffer_state; pctx->set_polygon_stipple = fd_set_polygon_stipple; pctx->set_scissor_states = fd_set_scissor_states; pctx->set_viewport_states = fd_set_viewport_states; pctx->set_vertex_buffers = fd_set_vertex_buffers; - pctx->set_index_buffer = fd_set_index_buffer; pctx->bind_blend_state = fd_blend_state_bind; pctx->delete_blend_state = fd_blend_state_delete; @@ -433,4 +491,10 @@ fd_state_init(struct pipe_context *pctx) pctx->create_stream_output_target = fd_create_stream_output_target; pctx->stream_output_target_destroy = fd_stream_output_target_destroy; pctx->set_stream_output_targets = fd_set_stream_output_targets; + + if (has_compute(fd_screen(pctx->screen))) { + pctx->bind_compute_state = fd_bind_compute_state; + pctx->set_compute_resources = fd_set_compute_resources; + pctx->set_global_binding = fd_set_global_binding; + } } diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_texture.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_texture.h index 612453299..b12ce2f12 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_texture.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_texture.h @@ -32,7 +32,7 @@ #include "pipe/p_context.h" void fd_sampler_states_bind(struct pipe_context *pctx, - unsigned shader, unsigned start, + enum pipe_shader_type shader, unsigned start, unsigned nr, void **hwcso); void fd_set_sampler_views(struct pipe_context *pctx, diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.c index c8f2127c9..5d0ea4eea 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.c @@ -67,6 +67,26 @@ fd_pipe2index(enum pipe_format format) } } +/* we need to special case a bit the depth/stencil restore, because we are + * using the texture sampler to blit into the depth/stencil buffer, *not* + * into a color buffer. Otherwise fdN_tex_swiz() will do the wrong thing, + * as it is assuming that you are sampling into normal render target.. + */ +enum pipe_format +fd_gmem_restore_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return PIPE_FORMAT_R8G8B8A8_UNORM; + case PIPE_FORMAT_Z16_UNORM: + return PIPE_FORMAT_R8G8_UNORM; + case PIPE_FORMAT_S8_UINT: + return PIPE_FORMAT_R8_UNORM; + default: + return format; + } +} enum adreno_rb_blend_factor fd_blend_factor(unsigned factor) diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.h index 30097008e..14fcf1d67 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.h @@ -46,6 +46,7 @@ enum adreno_rb_depth_format fd_pipe2depth(enum pipe_format format); enum pc_di_index_size fd_pipe2index(enum pipe_format format); +enum pipe_format fd_gmem_restore_format(enum pipe_format format); enum adreno_rb_blend_factor fd_blend_factor(unsigned factor); enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode); enum adreno_stencil_op fd_stencil_op(unsigned op); @@ -57,8 +58,9 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define A2XX_MAX_RENDER_TARGETS 1 #define A3XX_MAX_RENDER_TARGETS 4 #define A4XX_MAX_RENDER_TARGETS 8 +#define A5XX_MAX_RENDER_TARGETS 8 -#define MAX_RENDER_TARGETS A4XX_MAX_RENDER_TARGETS +#define MAX_RENDER_TARGETS A5XX_MAX_RENDER_TARGETS #define FD_DBG_MSGS 0x0001 #define FD_DBG_DISASM 0x0002 @@ -74,9 +76,10 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define FD_DBG_SHADERDB 0x0800 #define FD_DBG_FLUSH 0x1000 #define FD_DBG_DEQP 0x2000 -#define FD_DBG_NIR 0x4000 -#define FD_DBG_REORDER 0x8000 -#define FD_DBG_BSTAT 0x10000 +#define FD_DBG_INORDER 0x4000 +#define FD_DBG_BSTAT 0x8000 +#define FD_DBG_NOGROW 0x10000 +#define FD_DBG_LRZ 0x20000 extern int fd_mesa_debug; extern bool fd_binning_enabled; @@ -176,6 +179,7 @@ fd_half_precision(struct pipe_framebuffer_state *pfb) #define LOG_DWORDS 0 static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx); +static inline void emit_marker5(struct fd_ringbuffer *ring, int scratch_idx); static inline void OUT_RING(struct fd_ringbuffer *ring, uint32_t data) @@ -202,39 +206,45 @@ OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data, })); } +/* + * NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+ + */ + static inline void OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint32_t or, int32_t shift) + uint32_t offset, uint64_t or, int32_t shift) { if (LOG_DWORDS) { DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, (uint32_t)(ring->cur - ring->last_start), bo, offset, shift); } debug_assert(offset < fd_bo_size(bo)); - fd_ringbuffer_reloc(ring, &(struct fd_reloc){ + fd_ringbuffer_reloc2(ring, &(struct fd_reloc){ .bo = bo, .flags = FD_RELOC_READ, .offset = offset, .or = or, .shift = shift, + .orhi = or >> 32, }); } static inline void OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint32_t or, int32_t shift) + uint32_t offset, uint64_t or, int32_t shift) { if (LOG_DWORDS) { DBG("ring[%p]: OUT_RELOCW %04x: %p+%u << %d", ring, (uint32_t)(ring->cur - ring->last_start), bo, offset, shift); } debug_assert(offset < fd_bo_size(bo)); - fd_ringbuffer_reloc(ring, &(struct fd_reloc){ + fd_ringbuffer_reloc2(ring, &(struct fd_reloc){ .bo = bo, .flags = FD_RELOC_READ | FD_RELOC_WRITE, .offset = offset, .or = or, .shift = shift, + .orhi = or >> 32, }); } @@ -244,9 +254,18 @@ static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) fd_ringbuffer_grow(ring, ndwords); } +static inline uint32_t +__gpu_id(struct fd_ringbuffer *ring) +{ + uint64_t val; + fd_pipe_get_param(ring->pipe, FD_GPU_ID, &val); + return val; +} + static inline void OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { + debug_assert(__gpu_id(ring) < 500); BEGIN_RING(ring, cnt+1); OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); } @@ -254,6 +273,7 @@ OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) static inline void OUT_PKT2(struct fd_ringbuffer *ring) { + debug_assert(__gpu_id(ring) < 500); BEGIN_RING(ring, 1); OUT_RING(ring, CP_TYPE2_PKT); } @@ -261,10 +281,48 @@ OUT_PKT2(struct fd_ringbuffer *ring) static inline void OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { + debug_assert(__gpu_id(ring) < 500); BEGIN_RING(ring, cnt+1); OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); } +/* + * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3 + */ + +static inline unsigned +_odd_parity_bit(unsigned val) +{ + /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel + * note that we want odd parity so 0x6996 is inverted. + */ + val ^= val >> 16; + val ^= val >> 8; + val ^= val >> 4; + val &= 0xf; + return (~0x6996 >> val) & 1; +} + +static inline void +OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) +{ + BEGIN_RING(ring, cnt+1); + OUT_RING(ring, CP_TYPE4_PKT | cnt | + (_odd_parity_bit(cnt) << 7) | + ((regindx & 0x3ffff) << 8) | + ((_odd_parity_bit(regindx) << 27))); +} + +static inline void +OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) +{ + BEGIN_RING(ring, cnt+1); + OUT_RING(ring, CP_TYPE7_PKT | cnt | + (_odd_parity_bit(cnt) << 15) | + ((opcode & 0x7f) << 16) | + ((_odd_parity_bit(opcode) << 23))); +} + static inline void OUT_WFI(struct fd_ringbuffer *ring) { @@ -273,10 +331,18 @@ OUT_WFI(struct fd_ringbuffer *ring) } static inline void +OUT_WFI5(struct fd_ringbuffer *ring) +{ + OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); +} + +static inline void __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target) { unsigned count = fd_ringbuffer_cmd_count(target); + debug_assert(__gpu_id(ring) < 500); + /* for debug after a lock up, write a unique counter value * to scratch6 for each IB, to make it easier to match up * register dumps to cmdstream. The combination of IB and @@ -297,7 +363,34 @@ __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target emit_marker(ring, 6); } +static inline void +__OUT_IB5(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) +{ + unsigned count = fd_ringbuffer_cmd_count(target); + + /* for debug after a lock up, write a unique counter value + * to scratch6 for each IB, to make it easier to match up + * register dumps to cmdstream. The combination of IB and + * DRAW (scratch7) is enough to "triangulate" the particular + * draw that caused lockup. + */ + emit_marker5(ring, 6); + + for (unsigned i = 0; i < count; i++) { + uint32_t dwords; + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); + dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4; + assert(dwords > 0); + OUT_RING(ring, dwords); + } + + emit_marker5(ring, 6); +} + /* CP_SCRATCH_REG4 is used to hold base address for query results: */ +// XXX annoyingly scratch regs move on a5xx.. and additionally different +// packet types.. so freedreno_query_hw is going to need a bit of +// rework.. #define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4 static inline void @@ -312,6 +405,16 @@ emit_marker(struct fd_ringbuffer *ring, int scratch_idx) OUT_RING(ring, ++marker_cnt); } +static inline void +emit_marker5(struct fd_ringbuffer *ring, int scratch_idx) +{ + extern unsigned marker_cnt; +//XXX unsigned reg = REG_A5XX_CP_SCRATCH_REG(scratch_idx); + unsigned reg = 0x00000b78 + scratch_idx; + OUT_PKT4(ring, reg, 1); + OUT_RING(ring, ++marker_cnt); +} + /* helper to get numeric value from environment variable.. mostly * just leaving this here because it is helpful to brute-force figure * out unknown formats, etc, which blob driver does not support: @@ -341,4 +444,27 @@ pack_rgba(enum pipe_format format, const float *rgba) #define foreach_bit(b, mask) \ for (uint32_t _m = (mask); _m && ({(b) = u_bit_scan(&_m); 1;});) + +#define BIT(bit) (1u << bit) + +/* + * a4xx+ helpers: + */ + +static inline enum a4xx_state_block +fd4_stage2shadersb(enum shader_t type) +{ + switch (type) { + case SHADER_VERTEX: + return SB4_VS_SHADER; + case SHADER_FRAGMENT: + return SB4_FS_SHADER; + case SHADER_COMPUTE: + return SB4_CS_SHADER; + default: + unreachable("bad shader type"); + return ~0; + } +} + #endif /* FREEDRENO_UTIL_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/lib/mesa/src/gallium/drivers/freedreno/ir3/instr-a3xx.h index 87083fd1e..b429b3b9f 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/instr-a3xx.h +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/instr-a3xx.h @@ -189,10 +189,10 @@ typedef enum { OPC_ATOMIC_AND = _OPC(6, 24), OPC_ATOMIC_OR = _OPC(6, 25), OPC_ATOMIC_XOR = _OPC(6, 26), - OPC_LDGB_TYPED_4D = _OPC(6, 27), - OPC_STGB_4D_4 = _OPC(6, 28), + OPC_LDGB = _OPC(6, 27), + OPC_STGB = _OPC(6, 28), OPC_STIB = _OPC(6, 29), - OPC_LDC_4 = _OPC(6, 30), + OPC_LDC = _OPC(6, 30), OPC_LDLV = _OPC(6, 31), /* meta instructions (category -1): */ @@ -296,6 +296,9 @@ typedef struct PACKED { int32_t immed : 20; uint32_t dummy1 : 12; } a4xx; + struct PACKED { + uint32_t immed : 32; + } a5xx; }; /* dword1: */ @@ -636,18 +639,63 @@ typedef struct PACKED { uint32_t dst : 8; uint32_t mustbe0 : 1; - uint32_t pad0 : 23; + uint32_t idx : 8; + uint32_t pad0 : 15; } instr_cat6d_t; -/* I think some of the other cat6 instructions use additional - * sub-encodings.. +/* ldgb and atomics.. atomics use 3rd src and pad0=1, pad3=3. For + * ldgb pad0=0, pad3=2 + */ +typedef struct PACKED { + /* dword0: */ + uint32_t pad0 : 1; + uint32_t src3 : 8; + uint32_t d : 2; + uint32_t typed : 1; + uint32_t type_size : 2; + uint32_t src1 : 8; + uint32_t src1_im : 1; + uint32_t src2_im : 1; + uint32_t src2 : 8; + + /* dword1: */ + uint32_t dst : 8; + uint32_t mustbe0 : 1; + uint32_t src_ssbo : 8; + uint32_t pad2 : 3; // type + uint32_t pad3 : 2; + uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat +} instr_cat6ldgb_t; + +/* stgb, pad0=0, pad3=2 */ +typedef struct PACKED { + /* dword0: */ + uint32_t mustbe1 : 1; // ??? + uint32_t src1 : 8; + uint32_t d : 2; + uint32_t typed : 1; + uint32_t type_size : 2; + uint32_t pad0 : 9; + uint32_t src2_im : 1; + uint32_t src2 : 8; + + /* dword1: */ + uint32_t src3 : 8; + uint32_t src3_im : 1; + uint32_t dst_ssbo : 8; + uint32_t pad2 : 3; // type + uint32_t pad3 : 2; + uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat +} instr_cat6stgb_t; typedef union PACKED { instr_cat6a_t a; instr_cat6b_t b; instr_cat6c_t c; instr_cat6d_t d; + instr_cat6ldgb_t ldgb; + instr_cat6stgb_t stgb; struct PACKED { /* dword0: */ uint32_t src_off : 1; @@ -730,4 +778,24 @@ static inline bool is_madsh(opc_t opc) } } +static inline bool is_atomic(opc_t opc) +{ + switch (opc) { + case OPC_ATOMIC_ADD: + case OPC_ATOMIC_SUB: + case OPC_ATOMIC_XCHG: + case OPC_ATOMIC_INC: + case OPC_ATOMIC_DEC: + case OPC_ATOMIC_CMPXCHG: + case OPC_ATOMIC_MIN: + case OPC_ATOMIC_MAX: + case OPC_ATOMIC_AND: + case OPC_ATOMIC_OR: + case OPC_ATOMIC_XOR: + return true; + default: + return false; + } +} + #endif /* INSTR_A3XX_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3.c index 78ec1cc48..d703f4e7f 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3.c @@ -40,13 +40,13 @@ */ void * ir3_alloc(struct ir3 *shader, int sz) { - return ralloc_size(shader, sz); + return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */ } struct ir3 * ir3_create(struct ir3_compiler *compiler, unsigned nin, unsigned nout) { - struct ir3 *shader = ralloc(compiler, struct ir3); + struct ir3 *shader = rzalloc(compiler, struct ir3); shader->compiler = compiler; shader->ninputs = nin; @@ -63,12 +63,6 @@ struct ir3 * ir3_create(struct ir3_compiler *compiler, void ir3_destroy(struct ir3 *shader) { - /* TODO convert the dynamic array to ralloc too: */ - free(shader->indirects); - free(shader->predicates); - free(shader->baryfs); - free(shader->keeps); - free(shader->astc_srgb); ralloc_free(shader); } @@ -112,7 +106,7 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, info->max_const = MAX2(info->max_const, max); } else if (val.num == 63) { /* ignore writes to dummy register r63.x */ - } else if ((max != REG_A0) && (max != REG_P0)) { + } else if (max < 48) { if (reg->flags & IR3_REG_HALF) { info->max_half_reg = MAX2(info->max_half_reg, max); } else { @@ -129,7 +123,9 @@ static int emit_cat0(struct ir3_instruction *instr, void *ptr, { instr_cat0_t *cat0 = ptr; - if (info->gpu_id >= 400) { + if (info->gpu_id >= 500) { + cat0->a5xx.immed = instr->cat0.immed; + } else if (info->gpu_id >= 400) { cat0->a4xx.immed = instr->cat0.immed; } else { cat0->a3xx.immed = instr->cat0.immed; @@ -479,6 +475,13 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, struct ir3_register *dst, *src1, *src2; instr_cat6_t *cat6 = ptr; + cat6->type = instr->cat6.type; + cat6->opc = instr->opc; + cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat6->sync = !!(instr->flags & IR3_INSTR_SY); + cat6->g = !!(instr->flags & IR3_INSTR_G); + cat6->opc_cat = 6; + /* the "dst" for a store instruction is (from the perspective * of data flow in the shader, ie. register use/def, etc) in * fact a register that is read by the instruction, rather @@ -504,7 +507,65 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, * indicate to use the src_off encoding even if offset is zero * (but then what to do about dst_off?) */ - if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) { + if ((instr->opc == OPC_LDGB) || is_atomic(instr->opc)) { + struct ir3_register *src3 = instr->regs[3]; + instr_cat6ldgb_t *ldgb = ptr; + + /* maybe these two bits both determine the instruction encoding? */ + cat6->src_off = false; + + ldgb->d = 4 - 1; /* always .4d ? */ + ldgb->typed = false; /* TODO true for images */ + ldgb->type_size = instr->cat6.iim_val - 1; + + ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + + /* first src is src_ssbo: */ + iassert(src1->flags & IR3_REG_IMMED); + ldgb->src_ssbo = src1->uim_val; + + /* then next two are src1/src2: */ + ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED); + ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED); + ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED); + ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED); + + if (is_atomic(instr->opc)) { + struct ir3_register *src4 = instr->regs[4]; + ldgb->src3 = reg(src4, info, instr->repeat, 0); + ldgb->pad0 = 0x1; + ldgb->pad3 = 0x3; + } else { + ldgb->pad0 = 0x0; + ldgb->pad3 = 0x2; + } + + return 0; + } else if (instr->opc == OPC_STGB) { + struct ir3_register *src3 = instr->regs[4]; + instr_cat6stgb_t *stgb = ptr; + + /* maybe these two bits both determine the instruction encoding? */ + cat6->src_off = true; + stgb->pad3 = 0x2; + + stgb->d = 4 - 1; /* always .4d ? */ + stgb->typed = false; + stgb->type_size = instr->cat6.iim_val - 1; + + /* first src is dst_ssbo: */ + iassert(dst->flags & IR3_REG_IMMED); + stgb->dst_ssbo = dst->uim_val; + + /* then src1/src2/src3: */ + stgb->src1 = reg(src1, info, instr->repeat, 0); + stgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); + stgb->src2_im = !!(src2->flags & IR3_REG_IMMED); + stgb->src3 = reg(src3, info, instr->repeat, IR3_REG_IMMED); + stgb->src3_im = !!(src3->flags & IR3_REG_IMMED); + + return 0; + } else if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) { instr_cat6a_t *cat6a = ptr; cat6->src_off = true; @@ -540,13 +601,6 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); } - cat6->type = instr->cat6.type; - cat6->opc = instr->opc; - cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat6->sync = !!(instr->flags & IR3_INSTR_SY); - cat6->g = !!(instr->flags & IR3_INSTR_G); - cat6->opc_cat = 6; - return 0; } @@ -624,7 +678,7 @@ static void insert_instr(struct ir3_block *block, list_addtail(&instr->node, &block->instr_list); if (is_input(instr)) - array_insert(shader->baryfs, instr); + array_insert(shader, shader->baryfs, instr); } struct ir3_block * ir3_block_create(struct ir3 *shader) @@ -727,7 +781,7 @@ ir3_instr_set_address(struct ir3_instruction *instr, if (instr->address != addr) { struct ir3 *ir = instr->block->shader; instr->address = addr; - array_insert(ir->indirects, instr); + array_insert(ir, ir->indirects, instr); } } diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3.h b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3.h index e0d0eeebc..de7a2a887 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3.h @@ -58,8 +58,14 @@ struct ir3_register { IR3_REG_CONST = 0x001, IR3_REG_IMMED = 0x002, IR3_REG_HALF = 0x004, - IR3_REG_RELATIV= 0x008, - IR3_REG_R = 0x010, + /* high registers are used for some things in compute shaders, + * for example. Seems to be for things that are global to all + * threads in a wave, so possibly these are global/shared by + * all the threads in the wave? + */ + IR3_REG_HIGH = 0x008, + IR3_REG_RELATIV= 0x010, + IR3_REG_R = 0x020, /* Most instructions, it seems, can do float abs/neg but not * integer. The CP pass needs to know what is intended (int or * float) in order to do the right thing. For this reason the @@ -68,23 +74,23 @@ struct ir3_register { * bitwise not, so split that out into a new flag to make it * more clear. */ - IR3_REG_FNEG = 0x020, - IR3_REG_FABS = 0x040, - IR3_REG_SNEG = 0x080, - IR3_REG_SABS = 0x100, - IR3_REG_BNOT = 0x200, - IR3_REG_EVEN = 0x400, - IR3_REG_POS_INF= 0x800, + IR3_REG_FNEG = 0x040, + IR3_REG_FABS = 0x080, + IR3_REG_SNEG = 0x100, + IR3_REG_SABS = 0x200, + IR3_REG_BNOT = 0x400, + IR3_REG_EVEN = 0x800, + IR3_REG_POS_INF= 0x1000, /* (ei) flag, end-input? Set on last bary, presumably to signal * that the shader needs no more input: */ - IR3_REG_EI = 0x1000, + IR3_REG_EI = 0x2000, /* meta-flags, for intermediate stages of IR, ie. * before register assignment is done: */ - IR3_REG_SSA = 0x2000, /* 'instr' is ptr to assigning instr */ - IR3_REG_ARRAY = 0x4000, - IR3_REG_PHI_SRC= 0x8000, /* phi src, regs[0]->instr points to phi */ + IR3_REG_SSA = 0x4000, /* 'instr' is ptr to assigning instr */ + IR3_REG_ARRAY = 0x8000, + IR3_REG_PHI_SRC= 0x10000, /* phi src, regs[0]->instr points to phi */ } flags; union { @@ -220,7 +226,7 @@ struct ir3_instruction { type_t type; int src_offset; int dst_offset; - int iim_val; + int iim_val; /* for ldgb/stgb, # of components */ } cat6; /* for meta-instructions, just used to hold extra data * before instruction scheduling, etc @@ -337,6 +343,21 @@ static inline int ir3_neighbor_count(struct ir3_instruction *instr) return num; } +/* + * Stupid/simple growable array implementation: + */ +#define DECLARE_ARRAY(type, name) \ + unsigned name ## _count, name ## _sz; \ + type * name; + +#define array_insert(ctx, arr, val) do { \ + if (arr ## _count == arr ## _sz) { \ + arr ## _sz = MAX2(2 * arr ## _sz, 16); \ + arr = reralloc_size(ctx, arr, arr ## _sz * sizeof(arr[0])); \ + } \ + arr[arr ##_count++] = val; \ + } while (0) + struct ir3 { struct ir3_compiler *compiler; @@ -350,8 +371,7 @@ struct ir3 { * threads in a group are killed before the last bary.f gets * a chance to signal end of input (ei). */ - unsigned baryfs_count, baryfs_sz; - struct ir3_instruction **baryfs; + DECLARE_ARRAY(struct ir3_instruction *, baryfs); /* Track all indirect instructions (read and write). To avoid * deadlock scenario where an address register gets scheduled, @@ -363,23 +383,15 @@ struct ir3 { * convenient list of instructions that reference some address * register simplifies this. */ - unsigned indirects_count, indirects_sz; - struct ir3_instruction **indirects; - /* and same for instructions that consume predicate register: */ - unsigned predicates_count, predicates_sz; - struct ir3_instruction **predicates; + DECLARE_ARRAY(struct ir3_instruction *, indirects); - /* Track instructions which do not write a register but other- - * wise must not be discarded (such as kill, stg, etc) - */ - unsigned keeps_count, keeps_sz; - struct ir3_instruction **keeps; + /* and same for instructions that consume predicate register: */ + DECLARE_ARRAY(struct ir3_instruction *, predicates); /* Track texture sample instructions which need texture state * patched in (for astc-srgb workaround): */ - unsigned astc_srgb_count, astc_srgb_sz; - struct ir3_instruction **astc_srgb; + DECLARE_ARRAY(struct ir3_instruction *, astc_srgb); /* List of blocks: */ struct list_head block_list; @@ -388,14 +400,14 @@ struct ir3 { struct list_head array_list; }; -typedef struct nir_variable nir_variable; +typedef struct nir_register nir_register; struct ir3_array { struct list_head node; unsigned length; unsigned id; - nir_variable *var; + nir_register *r; /* We track the last write and last access (read or write) to * setup dependencies on instructions that read or write the @@ -435,6 +447,11 @@ struct ir3_block { uint16_t start_ip, end_ip; + /* Track instructions which do not write a register but other- + * wise must not be discarded (such as kill, stg, etc) + */ + DECLARE_ARRAY(struct ir3_instruction *, keeps); + /* used for per-pass extra block data. Mainly used right * now in RA step to track livein/liveout. */ @@ -596,6 +613,7 @@ is_store(struct ir3_instruction *instr) */ switch (instr->opc) { case OPC_STG: + case OPC_STGB: case OPC_STP: case OPC_STL: case OPC_STLW: @@ -611,11 +629,12 @@ static inline bool is_load(struct ir3_instruction *instr) { switch (instr->opc) { case OPC_LDG: + case OPC_LDGB: case OPC_LDL: case OPC_LDP: case OPC_L2G: case OPC_LDLW: - case OPC_LDC_4: + case OPC_LDC: case OPC_LDLV: /* probably some others too.. */ return true; @@ -854,14 +873,6 @@ static inline unsigned ir3_cat3_absneg(opc_t opc) } } -#define array_insert(arr, val) do { \ - if (arr ## _count == arr ## _sz) { \ - arr ## _sz = MAX2(2 * arr ## _sz, 16); \ - arr = realloc(arr, arr ## _sz * sizeof(arr[0])); \ - } \ - arr[arr ##_count++] = val; \ - } while (0) - /* iterator for an instructions's sources (reg), also returns src #: */ #define foreach_src_n(__srcreg, __n, __instr) \ if ((__instr)->regs_count) \ @@ -925,7 +936,7 @@ int ir3_ra(struct ir3 *ir3, enum shader_t type, bool frag_coord, bool frag_face); /* legalize: */ -void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary); +void ir3_legalize(struct ir3 *ir, bool *has_samp, bool *has_ssbo, int *max_bary); /* ************************************************************************* */ /* instruction helpers */ @@ -1019,6 +1030,24 @@ ir3_##name(struct ir3_block *block, \ return instr; \ } +#define INSTR4(name) \ +static inline struct ir3_instruction * \ +ir3_##name(struct ir3_block *block, \ + struct ir3_instruction *a, unsigned aflags, \ + struct ir3_instruction *b, unsigned bflags, \ + struct ir3_instruction *c, unsigned cflags, \ + struct ir3_instruction *d, unsigned dflags) \ +{ \ + struct ir3_instruction *instr = \ + ir3_instr_create2(block, OPC_##name, 5); \ + ir3_reg_create(instr, 0, 0); /* dst */ \ + ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ + ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ + ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c; \ + ir3_reg_create(instr, 0, IR3_REG_SSA | dflags)->instr = d; \ + return instr; \ +} + /* cat0 instructions: */ INSTR0(BR); INSTR0(JUMP); @@ -1136,6 +1165,19 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, INSTR2(LDLV) INSTR2(LDG) INSTR3(STG) +INSTR3(LDGB); +INSTR4(STGB); +INSTR4(ATOMIC_ADD); +INSTR4(ATOMIC_SUB); +INSTR4(ATOMIC_XCHG); +INSTR4(ATOMIC_INC); +INSTR4(ATOMIC_DEC); +INSTR4(ATOMIC_CMPXCHG); +INSTR4(ATOMIC_MIN); +INSTR4(ATOMIC_MAX); +INSTR4(ATOMIC_AND); +INSTR4(ATOMIC_OR); +INSTR4(ATOMIC_XOR); /* ************************************************************************* */ /* split this out or find some helper to use.. like main/bitset.h.. */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index d749bfad8..fdec3f20b 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -59,6 +59,45 @@ static void dump_info(struct ir3_shader_variant *so, const char *str) int st_glsl_type_size(const struct glsl_type *type); +static void +insert_sorted(struct exec_list *var_list, nir_variable *new_var) +{ + nir_foreach_variable(var, var_list) { + if (var->data.location > new_var->data.location) { + exec_node_insert_node_before(&var->node, &new_var->node); + return; + } + } + exec_list_push_tail(var_list, &new_var->node); +} + +static void +sort_varyings(struct exec_list *var_list) +{ + struct exec_list new_list; + exec_list_make_empty(&new_list); + nir_foreach_variable_safe(var, var_list) { + exec_node_remove(&var->node); + insert_sorted(&new_list, var); + } + exec_list_move_nodes_to(&new_list, var_list); +} + +static void +fixup_varying_slots(struct exec_list *var_list) +{ + nir_foreach_variable(var, var_list) { + if (var->data.location >= VARYING_SLOT_VAR0) { + var->data.location += 9; + } else if ((var->data.location >= VARYING_SLOT_TEX0) && + (var->data.location <= VARYING_SLOT_TEX7)) { + var->data.location += VARYING_SLOT_VAR0 - VARYING_SLOT_TEX0; + } + } +} + +static struct ir3_compiler *compiler; + static nir_shader * load_glsl(unsigned num_files, char* const* files, gl_shader_stage stage) { @@ -72,9 +111,7 @@ load_glsl(unsigned num_files, char* const* files, gl_shader_stage stage) if (!prog) errx(1, "couldn't parse `%s'", files[0]); - nir_shader *nir = glsl_to_nir(prog, stage, ir3_get_compiler_options()); - - standalone_compiler_cleanup(prog); + nir_shader *nir = glsl_to_nir(prog, stage, ir3_get_compiler_options(compiler)); /* required NIR passes: */ /* TODO cmdline args for some of the conditional lowering passes? */ @@ -90,7 +127,38 @@ load_glsl(unsigned num_files, char* const* files, gl_shader_stage stage) NIR_PASS_V(nir, nir_lower_var_copies); NIR_PASS_V(nir, nir_lower_io_types); - // TODO nir_assign_var_locations?? + switch (stage) { + case MESA_SHADER_VERTEX: + nir_assign_var_locations(&nir->inputs, + &nir->num_inputs, + st_glsl_type_size); + + /* Re-lower global vars, to deal with any dead VS inputs. */ + NIR_PASS_V(nir, nir_lower_global_vars_to_local); + + sort_varyings(&nir->outputs); + nir_assign_var_locations(&nir->outputs, + &nir->num_outputs, + st_glsl_type_size); + fixup_varying_slots(&nir->outputs); + break; + case MESA_SHADER_FRAGMENT: + sort_varyings(&nir->inputs); + nir_assign_var_locations(&nir->inputs, + &nir->num_inputs, + st_glsl_type_size); + fixup_varying_slots(&nir->inputs); + nir_assign_var_locations(&nir->outputs, + &nir->num_outputs, + st_glsl_type_size); + break; + default: + errx(1, "unhandled shader stage: %d", stage); + } + + nir_assign_var_locations(&nir->uniforms, + &nir->num_uniforms, + st_glsl_type_size); NIR_PASS_V(nir, nir_lower_system_values); NIR_PASS_V(nir, nir_lower_io, nir_var_all, st_glsl_type_size, 0); @@ -298,6 +366,8 @@ int main(int argc, char **argv) nir_shader *nir; + compiler = ir3_compiler_create(NULL, gpu_id); + if (s.from_tgsi) { struct tgsi_token toks[65536]; @@ -324,7 +394,7 @@ int main(int argc, char **argv) return -1; } - s.compiler = ir3_compiler_create(NULL, gpu_id); + s.compiler = compiler; s.nir = ir3_optimize_nir(&s, nir, NULL); v.key = key; diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index 37ad73380..ff4d1b09e 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -38,8 +38,3 @@ struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id compiler->set = ir3_ra_alloc_reg_set(compiler); return compiler; } - -void ir3_compiler_destroy(struct ir3_compiler *compiler) -{ - ralloc_free(compiler); -} diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_compiler.h b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_compiler.h index 0ad689ca1..671332e81 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_compiler.h +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_compiler.h @@ -41,7 +41,6 @@ struct ir3_compiler { }; struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id); -void ir3_compiler_destroy(struct ir3_compiler *compiler); int ir3_compile_shader_nir(struct ir3_compiler *compiler, struct ir3_shader_variant *so); diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index b1b9d6b97..764aeb49f 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -71,6 +71,23 @@ struct ir3_compile { /* For vertex shaders, keep track of the system values sources */ struct ir3_instruction *vertex_id, *basevertex, *instance_id; + /* Compute shader inputs: */ + struct ir3_instruction *local_invocation_id, *work_group_id; + + /* For SSBO's and atomics, we need to preserve order, such + * that reads don't overtake writes, and the order of writes + * is preserved. Atomics are considered as a write. + * + * To do this, we track last write and last access, in a + * similar way to ir3_array. But since we don't know whether + * the same SSBO is bound to multiple slots, so we simply + * track this globally rather than per-SSBO. + * + * TODO should we track this per block instead? I guess it + * shouldn't matter much? + */ + struct ir3_instruction *last_write, *last_access; + /* mapping from nir_register to defining instruction: */ struct hash_table *def_ht; @@ -81,8 +98,17 @@ struct ir3_compile { * duplicate instruction sequences (which our backend does not * try to clean up, since that should be done as the NIR stage) * we cache the address value generated for a given src value: + * + * Note that we have to cache these per alignment, since same + * src used for an array of vec1 cannot be also used for an + * array of vec4. + */ + struct hash_table *addr_ht[4]; + + /* last dst array, for indirect we need to insert a var-store. */ - struct hash_table *addr_ht; + struct ir3_instruction **last_dst; + unsigned last_dst_n; /* maps nir_block to ir3_block, mostly for the purposes of * figuring out the blocks successors @@ -119,6 +145,11 @@ struct ir3_compile { bool error; }; +/* gpu pointer size in units of 32bit registers/slots */ +static unsigned pointer_size(struct ir3_compile *ctx) +{ + return (ctx->compiler->gpu_id >= 500) ? 2 : 1; +} static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val); static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock); @@ -173,6 +204,11 @@ compile_init(struct ir3_compiler *compiler, ctx->s = so->shader->nir; } + /* this needs to be the last pass run, so do this here instead of + * in ir3_optimize_nir(): + */ + NIR_PASS_V(ctx->s, nir_lower_locals_to_regs); + if (fd_mesa_debug & FD_DBG_DISASM) { DBG("dump nir%dv%d: type=%d, k={bp=%u,cts=%u,hp=%u}", so->shader->id, so->id, so->type, @@ -181,31 +217,53 @@ compile_init(struct ir3_compiler *compiler, nir_print_shader(ctx->s, stdout); } - so->first_driver_param = so->first_immediate = align(ctx->s->num_uniforms, 4); + so->num_uniforms = ctx->s->num_uniforms; + so->num_ubos = ctx->s->info.num_ubos; - /* Layout of constant registers: + /* Layout of constant registers, each section aligned to vec4. Note + * that pointer size (ubo, etc) changes depending on generation. * - * num_uniform * vec4 - user consts - * 4 * vec4 - UBO addresses + * user consts + * UBO addresses * if (vertex shader) { - * N * vec4 - driver params (IR3_DP_*) - * 1 * vec4 - stream-out addresses + * driver params (IR3_DP_*) + * if (stream_output.num_outputs > 0) + * stream-out addresses * } + * immediates * - * TODO this could be made more dynamic, to at least skip sections - * that we don't need.. + * Immediates go last mostly because they are inserted in the CP pass + * after the nir -> ir3 frontend. */ + unsigned constoff = align(ctx->s->num_uniforms, 4); + unsigned ptrsz = pointer_size(ctx); - /* reserve 4 (vec4) slots for ubo base addresses: */ - so->first_immediate += 4; + memset(&so->constbase, ~0, sizeof(so->constbase)); + + if (so->num_ubos > 0) { + so->constbase.ubo = constoff; + constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4; + } + unsigned num_driver_params = 0; if (so->type == SHADER_VERTEX) { - /* driver params (see ir3_driver_param): */ - so->first_immediate += IR3_DP_COUNT/4; /* convert to vec4 */ - /* one (vec4) slot for stream-output base addresses: */ - so->first_immediate++; + num_driver_params = IR3_DP_VS_COUNT; + } else if (so->type == SHADER_COMPUTE) { + num_driver_params = IR3_DP_CS_COUNT; } + so->constbase.driver_param = constoff; + constoff += align(num_driver_params, 4) / 4; + + if ((so->type == SHADER_VERTEX) && + (compiler->gpu_id < 500) && + so->shader->stream_output.num_outputs > 0) { + so->constbase.tfbo = constoff; + constoff += align(PIPE_MAX_SO_BUFFERS * ptrsz, 4) / 4; + } + + so->constbase.immediate = constoff; + return ctx; } @@ -232,68 +290,180 @@ compile_free(struct ir3_compile *ctx) } static void -declare_var(struct ir3_compile *ctx, nir_variable *var) +declare_array(struct ir3_compile *ctx, nir_register *reg) { - unsigned length = glsl_get_length(var->type) * 4; /* always vec4, at least with ttn */ - struct ir3_array *arr = ralloc(ctx, struct ir3_array); + struct ir3_array *arr = rzalloc(ctx, struct ir3_array); arr->id = ++ctx->num_arrays; - arr->length = length; - arr->var = var; + /* NOTE: sometimes we get non array regs, for example for arrays of + * length 1. See fs-const-array-of-struct-of-array.shader_test. So + * treat a non-array as if it was an array of length 1. + * + * It would be nice if there was a nir pass to convert arrays of + * length 1 to ssa. + */ + arr->length = reg->num_components * MAX2(1, reg->num_array_elems); + compile_assert(ctx, arr->length > 0); + arr->r = reg; list_addtail(&arr->node, &ctx->ir->array_list); } static struct ir3_array * -get_var(struct ir3_compile *ctx, nir_variable *var) +get_array(struct ir3_compile *ctx, nir_register *reg) { list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) { - if (arr->var == var) + if (arr->r == reg) return arr; } - compile_error(ctx, "bogus var: %s\n", var->name); + compile_error(ctx, "bogus reg: %s\n", reg->name); return NULL; } +/* relative (indirect) if address!=NULL */ +static struct ir3_instruction * +create_array_load(struct ir3_compile *ctx, struct ir3_array *arr, int n, + struct ir3_instruction *address) +{ + struct ir3_block *block = ctx->block; + struct ir3_instruction *mov; + struct ir3_register *src; + + mov = ir3_instr_create(block, OPC_MOV); + mov->cat1.src_type = TYPE_U32; + mov->cat1.dst_type = TYPE_U32; + ir3_reg_create(mov, 0, 0); + src = ir3_reg_create(mov, 0, IR3_REG_ARRAY | + COND(address, IR3_REG_RELATIV)); + src->instr = arr->last_write; + src->size = arr->length; + src->array.id = arr->id; + src->array.offset = n; + + if (address) + ir3_instr_set_address(mov, address); + + arr->last_access = mov; + + return mov; +} + +/* relative (indirect) if address!=NULL */ +static struct ir3_instruction * +create_array_store(struct ir3_compile *ctx, struct ir3_array *arr, int n, + struct ir3_instruction *src, struct ir3_instruction *address) +{ + struct ir3_block *block = ctx->block; + struct ir3_instruction *mov; + struct ir3_register *dst; + + mov = ir3_instr_create(block, OPC_MOV); + mov->cat1.src_type = TYPE_U32; + mov->cat1.dst_type = TYPE_U32; + dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY | + COND(address, IR3_REG_RELATIV)); + dst->instr = arr->last_access; + dst->size = arr->length; + dst->array.id = arr->id; + dst->array.offset = n; + ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src; + + if (address) + ir3_instr_set_address(mov, address); + + arr->last_write = arr->last_access = mov; + + return mov; +} + /* allocate a n element value array (to be populated by caller) and * insert in def_ht */ static struct ir3_instruction ** -__get_dst(struct ir3_compile *ctx, void *key, unsigned n) +get_dst_ssa(struct ir3_compile *ctx, nir_ssa_def *dst, unsigned n) { struct ir3_instruction **value = ralloc_array(ctx->def_ht, struct ir3_instruction *, n); - _mesa_hash_table_insert(ctx->def_ht, key, value); + _mesa_hash_table_insert(ctx->def_ht, dst, value); return value; } static struct ir3_instruction ** get_dst(struct ir3_compile *ctx, nir_dest *dst, unsigned n) { - compile_assert(ctx, dst->is_ssa); + struct ir3_instruction **value; + if (dst->is_ssa) { - return __get_dst(ctx, &dst->ssa, n); + value = get_dst_ssa(ctx, &dst->ssa, n); } else { - return __get_dst(ctx, dst->reg.reg, n); + value = ralloc_array(ctx, struct ir3_instruction *, n); } -} -static struct ir3_instruction ** -get_dst_ssa(struct ir3_compile *ctx, nir_ssa_def *dst, unsigned n) -{ - return __get_dst(ctx, dst, n); + /* NOTE: in non-ssa case, we don't really need to store last_dst + * but this helps us catch cases where put_dst() call is forgotten + */ + compile_assert(ctx, !ctx->last_dst); + ctx->last_dst = value; + ctx->last_dst_n = n; + + return value; } +static struct ir3_instruction * get_addr(struct ir3_compile *ctx, struct ir3_instruction *src, int align); + static struct ir3_instruction * const * get_src(struct ir3_compile *ctx, nir_src *src) { - struct hash_entry *entry; - compile_assert(ctx, src->is_ssa); if (src->is_ssa) { + struct hash_entry *entry; entry = _mesa_hash_table_search(ctx->def_ht, src->ssa); + compile_assert(ctx, entry); + return entry->data; } else { - entry = _mesa_hash_table_search(ctx->def_ht, src->reg.reg); + nir_register *reg = src->reg.reg; + struct ir3_array *arr = get_array(ctx, reg); + unsigned num_components = arr->r->num_components; + struct ir3_instruction *addr = NULL; + struct ir3_instruction **value = + ralloc_array(ctx, struct ir3_instruction *, num_components); + + if (src->reg.indirect) + addr = get_addr(ctx, get_src(ctx, src->reg.indirect)[0], + reg->num_components); + + for (unsigned i = 0; i < num_components; i++) { + unsigned n = src->reg.base_offset * reg->num_components + i; + compile_assert(ctx, n < arr->length); + value[i] = create_array_load(ctx, arr, n, addr); + } + + return value; } - compile_assert(ctx, entry); - return entry->data; +} + +static void +put_dst(struct ir3_compile *ctx, nir_dest *dst) +{ + if (!dst->is_ssa) { + nir_register *reg = dst->reg.reg; + struct ir3_array *arr = get_array(ctx, reg); + unsigned num_components = ctx->last_dst_n; + struct ir3_instruction *addr = NULL; + + if (dst->reg.indirect) + addr = get_addr(ctx, get_src(ctx, dst->reg.indirect)[0], + reg->num_components); + + for (unsigned i = 0; i < num_components; i++) { + unsigned n = dst->reg.base_offset * reg->num_components + i; + compile_assert(ctx, n < arr->length); + if (!ctx->last_dst[i]) + continue; + create_array_store(ctx, arr, n, ctx->last_dst[i], addr); + } + + ralloc_free(ctx->last_dst); + } + ctx->last_dst = NULL; + ctx->last_dst_n = 0; } static struct ir3_instruction * @@ -311,7 +481,7 @@ create_immed(struct ir3_block *block, uint32_t val) } static struct ir3_instruction * -create_addr(struct ir3_block *block, struct ir3_instruction *src) +create_addr(struct ir3_block *block, struct ir3_instruction *src, int align) { struct ir3_instruction *instr, *immed; @@ -321,12 +491,41 @@ create_addr(struct ir3_block *block, struct ir3_instruction *src) instr = ir3_COV(block, src, TYPE_U32, TYPE_S16); instr->regs[0]->flags |= IR3_REG_HALF; - immed = create_immed(block, 2); - immed->regs[0]->flags |= IR3_REG_HALF; + switch(align){ + case 1: + /* src *= 1: */ + break; + case 2: + /* src *= 2 => src <<= 1: */ + immed = create_immed(block, 1); + immed->regs[0]->flags |= IR3_REG_HALF; - instr = ir3_SHL_B(block, instr, 0, immed, 0); - instr->regs[0]->flags |= IR3_REG_HALF; - instr->regs[1]->flags |= IR3_REG_HALF; + instr = ir3_SHL_B(block, instr, 0, immed, 0); + instr->regs[0]->flags |= IR3_REG_HALF; + instr->regs[1]->flags |= IR3_REG_HALF; + break; + case 3: + /* src *= 3: */ + immed = create_immed(block, 3); + immed->regs[0]->flags |= IR3_REG_HALF; + + instr = ir3_MULL_U(block, instr, 0, immed, 0); + instr->regs[0]->flags |= IR3_REG_HALF; + instr->regs[1]->flags |= IR3_REG_HALF; + break; + case 4: + /* src *= 4 => src <<= 2: */ + immed = create_immed(block, 2); + immed->regs[0]->flags |= IR3_REG_HALF; + + instr = ir3_SHL_B(block, instr, 0, immed, 0); + instr->regs[0]->flags |= IR3_REG_HALF; + instr->regs[1]->flags |= IR3_REG_HALF; + break; + default: + unreachable("bad align"); + return NULL; + } instr = ir3_MOV(block, instr, TYPE_S16); instr->regs[0]->num = regid(REG_A0, 0); @@ -340,22 +539,25 @@ create_addr(struct ir3_block *block, struct ir3_instruction *src) * sequences for each use of a given NIR level src as address */ static struct ir3_instruction * -get_addr(struct ir3_compile *ctx, struct ir3_instruction *src) +get_addr(struct ir3_compile *ctx, struct ir3_instruction *src, int align) { struct ir3_instruction *addr; + unsigned idx = align - 1; + + compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr_ht)); - if (!ctx->addr_ht) { - ctx->addr_ht = _mesa_hash_table_create(ctx, + if (!ctx->addr_ht[idx]) { + ctx->addr_ht[idx] = _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); } else { struct hash_entry *entry; - entry = _mesa_hash_table_search(ctx->addr_ht, src); + entry = _mesa_hash_table_search(ctx->addr_ht[idx], src); if (entry) return entry->data; } - addr = create_addr(ctx->block, src); - _mesa_hash_table_insert(ctx->addr_ht, src, addr); + addr = create_addr(ctx->block, src, align); + _mesa_hash_table_insert(ctx->addr_ht[idx], src, addr); return addr; } @@ -409,7 +611,7 @@ create_uniform_indirect(struct ir3_compile *ctx, int n, } static struct ir3_instruction * -create_collect(struct ir3_block *block, struct ir3_instruction **arr, +create_collect(struct ir3_block *block, struct ir3_instruction *const *arr, unsigned arrsz) { struct ir3_instruction *collect; @@ -447,71 +649,24 @@ create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, int n, return mov; } -/* relative (indirect) if address!=NULL */ static struct ir3_instruction * -create_var_load(struct ir3_compile *ctx, struct ir3_array *arr, int n, - struct ir3_instruction *address) +create_input_compmask(struct ir3_block *block, unsigned n, unsigned compmask) { - struct ir3_block *block = ctx->block; - struct ir3_instruction *mov; - struct ir3_register *src; - - mov = ir3_instr_create(block, OPC_MOV); - mov->cat1.src_type = TYPE_U32; - mov->cat1.dst_type = TYPE_U32; - ir3_reg_create(mov, 0, 0); - src = ir3_reg_create(mov, 0, IR3_REG_ARRAY | - COND(address, IR3_REG_RELATIV)); - src->instr = arr->last_write; - src->size = arr->length; - src->array.id = arr->id; - src->array.offset = n; - - if (address) - ir3_instr_set_address(mov, address); - - arr->last_access = mov; - - return mov; -} - -/* relative (indirect) if address!=NULL */ -static struct ir3_instruction * -create_var_store(struct ir3_compile *ctx, struct ir3_array *arr, int n, - struct ir3_instruction *src, struct ir3_instruction *address) -{ - struct ir3_block *block = ctx->block; - struct ir3_instruction *mov; - struct ir3_register *dst; - - mov = ir3_instr_create(block, OPC_MOV); - mov->cat1.src_type = TYPE_U32; - mov->cat1.dst_type = TYPE_U32; - dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY | - COND(address, IR3_REG_RELATIV)); - dst->instr = arr->last_access; - dst->size = arr->length; - dst->array.id = arr->id; - dst->array.offset = n; - ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src; + struct ir3_instruction *in; - ir3_instr_set_address(mov, address); + in = ir3_instr_create(block, OPC_META_INPUT); + in->inout.block = block; + ir3_reg_create(in, n, 0); - arr->last_write = arr->last_access = mov; + in->regs[0]->wrmask = compmask; - return mov; + return in; } static struct ir3_instruction * create_input(struct ir3_block *block, unsigned n) { - struct ir3_instruction *in; - - in = ir3_instr_create(block, OPC_META_INPUT); - in->inout.block = block; - ir3_reg_create(in, n, 0); - - return in; + return create_input_compmask(block, n, 0x1); } static struct ir3_instruction * @@ -576,7 +731,7 @@ create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp) { /* first four vec4 sysval's reserved for UBOs: */ /* NOTE: dp is in scalar, but there can be >4 dp components: */ - unsigned n = ctx->so->first_driver_param + IR3_DRIVER_PARAM_OFF; + unsigned n = ctx->so->constbase.driver_param; unsigned r = regid(n + dp / 4, dp % 4); return create_uniform(ctx, r); } @@ -661,8 +816,17 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu) const nir_op_info *info = &nir_op_infos[alu->op]; struct ir3_instruction **dst, *src[info->num_inputs]; struct ir3_block *b = ctx->block; + unsigned dst_sz, wrmask; + + if (alu->dest.dest.is_ssa) { + dst_sz = alu->dest.dest.ssa.num_components; + wrmask = (1 << dst_sz) - 1; + } else { + dst_sz = alu->dest.dest.reg.reg->num_components; + wrmask = alu->dest.write_mask; + } - dst = get_dst(ctx, &alu->dest.dest, MAX2(info->output_size, 1)); + dst = get_dst(ctx, &alu->dest.dest, dst_sz); /* Vectors are special in that they have non-scalarized writemasks, * and just take the first swizzle channel for each argument in @@ -684,9 +848,32 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu) dst[i] = ir3_MOV(b, src[i], TYPE_U32); } + put_dst(ctx, &alu->dest.dest); return; } + /* We also get mov's with more than one component for mov's so + * handle those specially: + */ + if ((alu->op == nir_op_imov) || (alu->op == nir_op_fmov)) { + type_t type = (alu->op == nir_op_imov) ? TYPE_U32 : TYPE_F32; + nir_alu_src *asrc = &alu->src[0]; + struct ir3_instruction *const *src0 = get_src(ctx, &asrc->src); + + for (unsigned i = 0; i < dst_sz; i++) { + if (wrmask & (1 << i)) { + dst[i] = ir3_MOV(b, src0[asrc->swizzle[i]], type); + } else { + dst[i] = NULL; + } + } + + put_dst(ctx, &alu->dest.dest); + return; + } + + compile_assert(ctx, alu->dest.dest.is_ssa); + /* General case: We can just grab the one used channel per src. */ for (int i = 0; i < info->num_inputs; i++) { unsigned chan = ffs(alu->dest.write_mask) - 1; @@ -701,24 +888,18 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu) } switch (alu->op) { - case nir_op_f2i: + case nir_op_f2i32: dst[0] = ir3_COV(b, src[0], TYPE_F32, TYPE_S32); break; - case nir_op_f2u: + case nir_op_f2u32: dst[0] = ir3_COV(b, src[0], TYPE_F32, TYPE_U32); break; - case nir_op_i2f: + case nir_op_i2f32: dst[0] = ir3_COV(b, src[0], TYPE_S32, TYPE_F32); break; - case nir_op_u2f: + case nir_op_u2f32: dst[0] = ir3_COV(b, src[0], TYPE_U32, TYPE_F32); break; - case nir_op_imov: - dst[0] = ir3_MOV(b, src[0], TYPE_S32); - break; - case nir_op_fmov: - dst[0] = ir3_MOV(b, src[0], TYPE_F32); - break; case nir_op_f2b: dst[0] = ir3_CMPS_F(b, src[0], 0, create_immed(b, fui(0.0)), 0); dst[0]->cat2.condition = IR3_COND_NE; @@ -964,6 +1145,8 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu) nir_op_infos[alu->op].name); break; } + + put_dst(ctx, &alu->dest.dest); } /* handles direct/indirect UBO reads: */ @@ -972,21 +1155,28 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, struct ir3_instruction **dst) { struct ir3_block *b = ctx->block; - struct ir3_instruction *addr, *src0, *src1; + struct ir3_instruction *base_lo, *base_hi, *addr, *src0, *src1; nir_const_value *const_offset; /* UBO addresses are the first driver params: */ - unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0); + unsigned ubo = regid(ctx->so->constbase.ubo, 0); + const unsigned ptrsz = pointer_size(ctx); + int off = 0; /* First src is ubo index, which could either be an immed or not: */ src0 = get_src(ctx, &intr->src[0])[0]; if (is_same_type_mov(src0) && (src0->regs[1]->flags & IR3_REG_IMMED)) { - addr = create_uniform(ctx, ubo + src0->regs[1]->iim_val); + base_lo = create_uniform(ctx, ubo + (src0->regs[1]->iim_val * ptrsz)); + base_hi = create_uniform(ctx, ubo + (src0->regs[1]->iim_val * ptrsz) + 1); } else { - addr = create_uniform_indirect(ctx, ubo, get_addr(ctx, src0)); + base_lo = create_uniform_indirect(ctx, ubo, get_addr(ctx, src0, 4)); + base_hi = create_uniform_indirect(ctx, ubo + 1, get_addr(ctx, src0, 4)); } + /* note: on 32bit gpu's base_hi is ignored and DCE'd */ + addr = base_lo; + const_offset = nir_src_as_const_value(intr->src[1]); if (const_offset) { off += const_offset->u32[0]; @@ -1008,6 +1198,20 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, off -= off2; } + if (ptrsz == 2) { + struct ir3_instruction *carry; + + /* handle 32b rollover, ie: + * if (addr < base_lo) + * base_hi++ + */ + carry = ir3_CMPS_U(b, addr, 0, base_lo, 0); + carry->cat2.condition = IR3_COND_LT; + base_hi = ir3_ADD_S(b, base_hi, 0, carry, 0); + + addr = create_collect(b, (struct ir3_instruction*[]){ addr, base_hi }, 2); + } + for (int i = 0; i < intr->num_components; i++) { struct ir3_instruction *load = ir3_LDG(b, addr, 0, create_immed(b, 1), 0); @@ -1017,84 +1221,169 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, } } -/* handles array reads: */ static void -emit_intrinsic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr, +mark_ssbo_read(struct ir3_compile *ctx, struct ir3_instruction *instr) +{ + instr->regs[0]->instr = ctx->last_write; + instr->regs[0]->flags |= IR3_REG_SSA; + ctx->last_access = instr; +} + +static void +mark_ssbo_write(struct ir3_compile *ctx, struct ir3_instruction *instr) +{ + instr->regs[0]->instr = ctx->last_access; + instr->regs[0]->flags |= IR3_REG_SSA; + ctx->last_write = ctx->last_access = instr; +} + +static void +emit_intrinsic_load_ssbo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, struct ir3_instruction **dst) { - nir_deref_var *dvar = intr->variables[0]; - nir_deref_array *darr = nir_deref_as_array(dvar->deref.child); - struct ir3_array *arr = get_var(ctx, dvar->var); + struct ir3_block *b = ctx->block; + struct ir3_instruction *ldgb, *src0, *src1, *offset; + nir_const_value *const_offset; - compile_assert(ctx, dvar->deref.child && - (dvar->deref.child->deref_type == nir_deref_type_array)); + /* can this be non-const buffer_index? how do we handle that? */ + const_offset = nir_src_as_const_value(intr->src[0]); + compile_assert(ctx, const_offset); - switch (darr->deref_array_type) { - case nir_deref_array_type_direct: - /* direct access does not require anything special: */ - for (int i = 0; i < intr->num_components; i++) { - unsigned n = darr->base_offset * 4 + i; - compile_assert(ctx, n < arr->length); - dst[i] = create_var_load(ctx, arr, n, NULL); - } - break; - case nir_deref_array_type_indirect: { - /* for indirect, we need to collect all the array elements: */ - struct ir3_instruction *addr = - get_addr(ctx, get_src(ctx, &darr->indirect)[0]); - for (int i = 0; i < intr->num_components; i++) { - unsigned n = darr->base_offset * 4 + i; - compile_assert(ctx, n < arr->length); - dst[i] = create_var_load(ctx, arr, n, addr); - } - break; - } - default: - compile_error(ctx, "Unhandled load deref type: %u\n", - darr->deref_array_type); - break; - } + offset = get_src(ctx, &intr->src[1])[0]; + + /* src0 is uvec2(offset*4, 0), src1 is offset.. nir already *= 4: */ + src0 = create_collect(b, (struct ir3_instruction*[]){ + offset, + create_immed(b, 0), + }, 2); + src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0); + + ldgb = ir3_LDGB(b, create_immed(b, const_offset->u32[0]), 0, + src0, 0, src1, 0); + ldgb->regs[0]->wrmask = (1 << intr->num_components) - 1; + ldgb->cat6.iim_val = intr->num_components; + ldgb->cat6.type = TYPE_U32; + mark_ssbo_read(ctx, ldgb); + + split_dest(b, dst, ldgb, 0, intr->num_components); } -/* handles array writes: */ +/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */ static void -emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr) +emit_intrinsic_store_ssbo(struct ir3_compile *ctx, nir_intrinsic_instr *intr) { - nir_deref_var *dvar = intr->variables[0]; - nir_deref_array *darr = nir_deref_as_array(dvar->deref.child); - struct ir3_array *arr = get_var(ctx, dvar->var); - struct ir3_instruction *addr; - struct ir3_instruction * const *src; - unsigned wrmask = nir_intrinsic_write_mask(intr); + struct ir3_block *b = ctx->block; + struct ir3_instruction *stgb, *src0, *src1, *src2, *offset; + nir_const_value *const_offset; + unsigned ncomp = ffs(~intr->const_index[0]) - 1; + + /* can this be non-const buffer_index? how do we handle that? */ + const_offset = nir_src_as_const_value(intr->src[1]); + compile_assert(ctx, const_offset); + + offset = get_src(ctx, &intr->src[2])[0]; + + /* src0 is value, src1 is offset, src2 is uvec2(offset*4, 0).. + * nir already *= 4: + */ + src0 = create_collect(b, get_src(ctx, &intr->src[0]), ncomp); + src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0); + src2 = create_collect(b, (struct ir3_instruction*[]){ + offset, + create_immed(b, 0), + }, 2); + + stgb = ir3_STGB(b, create_immed(b, const_offset->u32[0]), 0, + src0, 0, src1, 0, src2, 0); + stgb->cat6.iim_val = ncomp; + stgb->cat6.type = TYPE_U32; + mark_ssbo_write(ctx, stgb); + + array_insert(b, b->keeps, stgb); +} + +static struct ir3_instruction * +emit_intrinsic_atomic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *atomic, *ssbo, *src0, *src1, *src2, *offset; + nir_const_value *const_offset; + type_t type = TYPE_U32; + + /* can this be non-const buffer_index? how do we handle that? */ + const_offset = nir_src_as_const_value(intr->src[0]); + compile_assert(ctx, const_offset); + ssbo = create_immed(b, const_offset->u32[0]); - compile_assert(ctx, dvar->deref.child && - (dvar->deref.child->deref_type == nir_deref_type_array)); + offset = get_src(ctx, &intr->src[1])[0]; - src = get_src(ctx, &intr->src[0]); + /* src0 is data (or uvec2(data, compare) + * src1 is offset + * src2 is uvec2(offset*4, 0) + * + * Note that nir already multiplies the offset by four + */ + src0 = get_src(ctx, &intr->src[2])[0]; + src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0); + src2 = create_collect(b, (struct ir3_instruction*[]){ + offset, + create_immed(b, 0), + }, 2); - switch (darr->deref_array_type) { - case nir_deref_array_type_direct: - addr = NULL; + switch (intr->intrinsic) { + case nir_intrinsic_ssbo_atomic_add: + atomic = ir3_ATOMIC_ADD(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_ssbo_atomic_imin: + atomic = ir3_ATOMIC_MIN(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + type = TYPE_S32; + break; + case nir_intrinsic_ssbo_atomic_umin: + atomic = ir3_ATOMIC_MIN(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_ssbo_atomic_imax: + atomic = ir3_ATOMIC_MAX(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + type = TYPE_S32; + break; + case nir_intrinsic_ssbo_atomic_umax: + atomic = ir3_ATOMIC_MAX(b, ssbo, 0, src0, 0, src1, 0, src2, 0); break; - case nir_deref_array_type_indirect: - addr = get_addr(ctx, get_src(ctx, &darr->indirect)[0]); + case nir_intrinsic_ssbo_atomic_and: + atomic = ir3_ATOMIC_AND(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_ssbo_atomic_or: + atomic = ir3_ATOMIC_OR(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_ssbo_atomic_xor: + atomic = ir3_ATOMIC_XOR(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_ssbo_atomic_exchange: + atomic = ir3_ATOMIC_XCHG(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_ssbo_atomic_comp_swap: + /* for cmpxchg, src0 is [ui]vec2(data, compare): */ + src0 = create_collect(b, (struct ir3_instruction*[]){ + src0, + get_src(ctx, &intr->src[3])[0], + }, 2); + atomic = ir3_ATOMIC_CMPXCHG(b, ssbo, 0, src0, 0, src1, 0, src2, 0); break; default: - compile_error(ctx, "Unhandled store deref type: %u\n", - darr->deref_array_type); - return; + unreachable("boo"); } - for (int i = 0; i < intr->num_components; i++) { - if (!(wrmask & (1 << i))) - continue; - unsigned n = darr->base_offset * 4 + i; - compile_assert(ctx, n < arr->length); - create_var_store(ctx, arr, n, src[i], addr); - } + atomic->cat6.iim_val = 1; + atomic->cat6.type = type; + mark_ssbo_write(ctx, atomic); + + /* even if nothing consume the result, we can't DCE the instruction: */ + array_insert(b, b->keeps, atomic); + + return atomic; } -static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot, +static void add_sysval_input_compmask(struct ir3_compile *ctx, + gl_system_value slot, unsigned compmask, struct ir3_instruction *instr) { struct ir3_shader_variant *so = ctx->so; @@ -1103,7 +1392,7 @@ static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot, so->inputs[n].sysval = true; so->inputs[n].slot = slot; - so->inputs[n].compmask = 1; + so->inputs[n].compmask = compmask; so->inputs[n].regid = r; so->inputs[n].interpolate = INTERP_MODE_FLAT; so->total_in++; @@ -1112,6 +1401,12 @@ static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot, ctx->ir->inputs[r] = instr; } +static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot, + struct ir3_instruction *instr) +{ + add_sysval_input_compmask(ctx, slot, 0x1, instr); +} + static void emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) { @@ -1143,7 +1438,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) for (int i = 0; i < intr->num_components; i++) { int n = idx * 4 + i; dst[i] = create_uniform_indirect(ctx, n, - get_addr(ctx, src[0])); + get_addr(ctx, src[0], 4)); } /* NOTE: if relative addressing is used, we set * constlen in the compiler (to worst-case value) @@ -1169,7 +1464,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) src = get_src(ctx, &intr->src[0]); struct ir3_instruction *collect = create_collect(b, ctx->ir->inputs, ctx->ir->ninputs); - struct ir3_instruction *addr = get_addr(ctx, src[0]); + struct ir3_instruction *addr = get_addr(ctx, src[0], 4); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; dst[i] = create_indirect_load(ctx, ctx->ir->ninputs, @@ -1177,11 +1472,28 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) } } break; - case nir_intrinsic_load_var: - emit_intrinsic_load_var(ctx, intr, dst); - break; - case nir_intrinsic_store_var: - emit_intrinsic_store_var(ctx, intr); + case nir_intrinsic_load_ssbo: + emit_intrinsic_load_ssbo(ctx, intr, dst); + break; + case nir_intrinsic_store_ssbo: + emit_intrinsic_store_ssbo(ctx, intr); + break; + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + if (info->has_dest) { + compile_assert(ctx, intr->num_components == 1); + dst[0] = emit_intrinsic_atomic(ctx, intr); + } else { + emit_intrinsic_atomic(ctx, intr); + } break; case nir_intrinsic_store_output: idx = nir_intrinsic_base(intr); @@ -1204,10 +1516,12 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) dst[0] = ctx->basevertex; break; case nir_intrinsic_load_vertex_id_zero_base: + case nir_intrinsic_load_vertex_id: if (!ctx->vertex_id) { + gl_system_value sv = (intr->intrinsic == nir_intrinsic_load_vertex_id) ? + SYSTEM_VALUE_VERTEX_ID : SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; ctx->vertex_id = create_input(b, 0); - add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, - ctx->vertex_id); + add_sysval_input(ctx, sv, ctx->vertex_id); } dst[0] = ctx->vertex_id; break; @@ -1232,14 +1546,33 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) ctx->frag_face = create_input(b, 0); ctx->frag_face->regs[0]->flags |= IR3_REG_HALF; } - /* for fragface, we always get -1 or 0, but that is inverse - * of what nir expects (where ~0 is true). Unfortunately - * trying to widen from half to full in add.s seems to do a - * non-sign-extending widen (resulting in something that - * gets interpreted as float Inf??) + /* for fragface, we get -1 for back and 0 for front. However this is + * the inverse of what nir expects (where ~0 is true). */ dst[0] = ir3_COV(b, ctx->frag_face, TYPE_S16, TYPE_S32); - dst[0] = ir3_ADD_S(b, dst[0], 0, create_immed(b, 1), 0); + dst[0] = ir3_NOT_B(b, dst[0], 0); + break; + case nir_intrinsic_load_local_invocation_id: + if (!ctx->local_invocation_id) { + ctx->local_invocation_id = create_input_compmask(b, 0, 0x7); + add_sysval_input_compmask(ctx, SYSTEM_VALUE_LOCAL_INVOCATION_ID, + 0x7, ctx->local_invocation_id); + } + split_dest(b, dst, ctx->local_invocation_id, 0, 3); + break; + case nir_intrinsic_load_work_group_id: + if (!ctx->work_group_id) { + ctx->work_group_id = create_input_compmask(b, 0, 0x7); + add_sysval_input_compmask(ctx, SYSTEM_VALUE_WORK_GROUP_ID, + 0x7, ctx->work_group_id); + ctx->work_group_id->regs[0]->flags |= IR3_REG_HIGH; + } + split_dest(b, dst, ctx->work_group_id, 0, 3); + break; + case nir_intrinsic_load_num_work_groups: + for (int i = 0; i < intr->num_components; i++) { + dst[i] = create_driver_param(ctx, IR3_DP_NUM_WORK_GROUPS_X + i); + } break; case nir_intrinsic_discard_if: case nir_intrinsic_discard: { @@ -1262,9 +1595,9 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) cond->regs[0]->num = regid(REG_P0, 0); kill = ir3_KILL(b, cond, 0); - array_insert(ctx->ir->predicates, kill); + array_insert(ctx->ir, ctx->ir->predicates, kill); - array_insert(ctx->ir->keeps, kill); + array_insert(b, b->keeps, kill); ctx->so->has_kill = true; break; @@ -1274,6 +1607,9 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) nir_intrinsic_infos[intr->intrinsic].name); break; } + + if (info->has_dest) + put_dst(ctx, &intr->dest); } static void @@ -1372,7 +1708,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) lod = get_src(ctx, &tex->src[i].src)[0]; has_lod = true; break; - case nir_tex_src_comparitor: /* shadow comparator */ + case nir_tex_src_comparator: /* shadow comparator */ compare = get_src(ctx, &tex->src[i].src)[0]; break; case nir_tex_src_projector: @@ -1539,7 +1875,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_W, flags, tex_idx, tex_idx, col0, col1); - array_insert(ctx->ir->astc_srgb, sam); + array_insert(ctx->ir, ctx->ir->astc_srgb, sam); /* fixup .w component: */ split_dest(b, &dst[3], sam, 3, 1); @@ -1558,6 +1894,8 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) factor, 0); } } + + put_dst(ctx, &tex->dest); } static void @@ -1581,6 +1919,8 @@ emit_tex_query_levels(struct ir3_compile *ctx, nir_tex_instr *tex) */ if (ctx->levels_add_one) dst[0] = ir3_ADD_U(b, dst[0], 0, create_immed(b, 1), 0); + + put_dst(ctx, &tex->dest); } static void @@ -1623,6 +1963,8 @@ emit_tex_txs(struct ir3_compile *ctx, nir_tex_instr *tex) dst[coords] = ir3_MOV(b, dst[3], TYPE_U32); } } + + put_dst(ctx, &tex->dest); } static void @@ -1641,6 +1983,8 @@ emit_phi(struct ir3_compile *ctx, nir_phi_instr *nphi) phi->phi.nphi = nphi; dst[0] = phi; + + put_dst(ctx, &nphi->dest); } /* phi instructions are left partially constructed. We don't resolve @@ -1776,8 +2120,10 @@ emit_block(struct ir3_compile *ctx, nir_block *nblock) list_addtail(&block->node, &ctx->ir->block_list); /* re-emit addr register in each block if needed: */ - _mesa_hash_table_destroy(ctx->addr_ht, NULL); - ctx->addr_ht = NULL; + for (int i = 0; i < ARRAY_SIZE(ctx->addr_ht); i++) { + _mesa_hash_table_destroy(ctx->addr_ht[i], NULL); + ctx->addr_ht[i] = NULL; + } nir_foreach_instr(instr, nblock) { emit_instr(ctx, instr); @@ -1905,7 +2251,7 @@ emit_stream_out(struct ir3_compile *ctx) unsigned stride = strmout->stride[i]; struct ir3_instruction *base, *off; - base = create_uniform(ctx, regid(v->first_driver_param + IR3_TFBOS_OFF, i)); + base = create_uniform(ctx, regid(v->constbase.tfbo, i)); /* 24-bit should be enough: */ off = ir3_MUL_U(ctx->block, vtxcnt, 0, @@ -1928,7 +2274,7 @@ emit_stream_out(struct ir3_compile *ctx) stg->cat6.type = TYPE_U32; stg->cat6.dst_offset = (strmout->output[i].dst_offset + j) * 4; - array_insert(ctx->ir->keeps, stg); + array_insert(ctx->block, ctx->block->keeps, stg); } } @@ -1960,7 +2306,8 @@ emit_function(struct ir3_compile *ctx, nir_function_impl *impl) * out, we guarantee that all exit paths flow into the stream- * out instructions. */ - if ((ctx->so->shader->stream_output.num_outputs > 0) && + if ((ctx->compiler->gpu_id < 500) && + (ctx->so->shader->stream_output.num_outputs > 0) && !ctx->so->key.binning_pass) { debug_assert(ctx->so->type == SHADER_VERTEX); emit_stream_out(ctx); @@ -2108,10 +2455,8 @@ setup_output(struct ir3_compile *ctx, nir_variable *out) case VARYING_SLOT_FOGC: case VARYING_SLOT_CLIP_DIST0: case VARYING_SLOT_CLIP_DIST1: - break; case VARYING_SLOT_CLIP_VERTEX: - /* handled entirely in nir_lower_clip: */ - return; + break; default: if (slot >= VARYING_SLOT_VAR0) break; @@ -2147,6 +2492,11 @@ max_drvloc(struct exec_list *vars) return drvloc; } +static const unsigned max_sysvals[SHADER_MAX] = { + [SHADER_VERTEX] = 16, + [SHADER_COMPUTE] = 16, // TODO how many do we actually need? +}; + static void emit_instructions(struct ir3_compile *ctx) { @@ -2156,11 +2506,9 @@ emit_instructions(struct ir3_compile *ctx) ninputs = (max_drvloc(&ctx->s->inputs) + 1) * 4; noutputs = (max_drvloc(&ctx->s->outputs) + 1) * 4; - /* or vtx shaders, we need to leave room for sysvals: + /* we need to leave room for sysvals: */ - if (ctx->so->type == SHADER_VERTEX) { - ninputs += 16; - } + ninputs += max_sysvals[ctx->so->type]; ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs); @@ -2169,9 +2517,7 @@ emit_instructions(struct ir3_compile *ctx) ctx->in_block = ctx->block; list_addtail(&ctx->block->node, &ctx->ir->block_list); - if (ctx->so->type == SHADER_VERTEX) { - ctx->ir->ninputs -= 16; - } + ninputs -= max_sysvals[ctx->so->type]; /* for fragment shader, we have a single input register (usually * r0.xy) which is used as the base for bary.f varying fetch instrs: @@ -2196,17 +2542,15 @@ emit_instructions(struct ir3_compile *ctx) setup_output(ctx, var); } - /* Setup global variables (which should only be arrays): */ - nir_foreach_variable(var, &ctx->s->globals) { - declare_var(ctx, var); + /* Setup registers (which should only be arrays): */ + nir_foreach_register(reg, &ctx->s->registers) { + declare_array(ctx, reg); } - /* Setup local variables (which should only be arrays): */ /* NOTE: need to do something more clever when we support >1 fxn */ - nir_foreach_variable(var, &fxn->locals) { - declare_var(ctx, var); + nir_foreach_register(reg, &fxn->registers) { + declare_array(ctx, reg); } - /* And emit the body: */ ctx->impl = fxn; emit_function(ctx, fxn); @@ -2385,8 +2729,14 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, if (so->key.half_precision) { for (i = 0; i < ir->noutputs; i++) { struct ir3_instruction *out = ir->outputs[i]; + if (!out) continue; + + /* if frag shader writes z, that needs to be full precision: */ + if (so->outputs[i/4].slot == FRAG_RESULT_DEPTH) + continue; + out->regs[0]->flags |= IR3_REG_HALF; /* output could be a fanout (ie. texture fetch output) * in which case we need to propagate the half-reg flag @@ -2458,9 +2808,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, actual_in = 0; inloc = 0; for (i = 0; i < so->inputs_count; i++) { - unsigned j, regid = ~0, compmask = 0; + unsigned j, regid = ~0, compmask = 0, maxcomp = 0; so->inputs[i].ncomp = 0; - so->inputs[i].inloc = inloc + 8; + so->inputs[i].inloc = inloc; for (j = 0; j < 4; j++) { struct ir3_instruction *in = inputs[(i*4) + j]; if (in && !(in->flags & IR3_INSTR_UNUSED)) { @@ -2471,14 +2821,19 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, if ((so->type == SHADER_FRAGMENT) && so->inputs[i].bary) { /* assign inloc: */ assert(in->regs[1]->flags & IR3_REG_IMMED); - in->regs[1]->iim_val = inloc++; + in->regs[1]->iim_val = inloc + j; + maxcomp = j + 1; } } } - if ((so->type == SHADER_FRAGMENT) && compmask && so->inputs[i].bary) + if ((so->type == SHADER_FRAGMENT) && compmask && so->inputs[i].bary) { so->varying_in++; + so->inputs[i].compmask = (1 << maxcomp) - 1; + inloc += maxcomp; + } else { + so->inputs[i].compmask = compmask; + } so->inputs[i].regid = regid; - so->inputs[i].compmask = compmask; } if (ctx->astc_srgb) @@ -2487,7 +2842,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, /* We need to do legalize after (for frag shader's) the "bary.f" * offsets (inloc) have been assigned. */ - ir3_legalize(ir, &so->has_samp, &max_bary); + ir3_legalize(ir, &so->has_samp, &so->has_ssbo, &max_bary); if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER LEGALIZE:\n"); diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 57c37e263..8c907eb5a 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -111,36 +111,12 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, if (flags & IR3_REG_RELATIV) return false; - /* clear flags that are 'ok' */ switch (opc_cat(instr->opc)) { case 1: valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV; if (flags & ~valid_flags) return false; break; - case 5: - /* no flags allowed */ - if (flags) - return false; - break; - case 6: - valid_flags = IR3_REG_IMMED; - if (flags & ~valid_flags) - return false; - - if (flags & IR3_REG_IMMED) { - /* doesn't seem like we can have immediate src for store - * instructions: - * - * TODO this restriction could also apply to load instructions, - * but for load instructions this arg is the address (and not - * really sure any good way to test a hard-coded immed addr src) - */ - if (is_store(instr) && (n == 1)) - return false; - } - - break; case 2: valid_flags = ir3_cat2_absneg(instr->opc) | IR3_REG_CONST | IR3_REG_RELATIV; @@ -197,6 +173,35 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, if (flags & (IR3_REG_SABS | IR3_REG_SNEG)) return false; break; + case 5: + /* no flags allowed */ + if (flags) + return false; + break; + case 6: + valid_flags = IR3_REG_IMMED; + if (flags & ~valid_flags) + return false; + + if (flags & IR3_REG_IMMED) { + /* doesn't seem like we can have immediate src for store + * instructions: + * + * TODO this restriction could also apply to load instructions, + * but for load instructions this arg is the address (and not + * really sure any good way to test a hard-coded immed addr src) + */ + if (is_store(instr) && (n == 1)) + return false; + + /* disallow CP into anything but the SSBO slot argument for + * atomics: + */ + if (is_atomic(instr->opc) && (n != 0)) + return false; + } + + break; } return true; @@ -296,7 +301,7 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags new_flags &= ~IR3_REG_IMMED; new_flags |= IR3_REG_CONST; reg->flags = new_flags; - reg->num = i + (4 * ctx->so->first_immediate); + reg->num = i + (4 * ctx->so->constbase.immediate); return reg; } @@ -576,15 +581,15 @@ ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so) } } - for (unsigned i = 0; i < ir->keeps_count; i++) { - instr_cp(&ctx, ir->keeps[i]); - ir->keeps[i] = eliminate_output_mov(ir->keeps[i]); - } - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { if (block->condition) { instr_cp(&ctx, block->condition); block->condition = eliminate_output_mov(block->condition); } + + for (unsigned i = 0; i < block->keeps_count; i++) { + instr_cp(&ctx, block->keeps[i]); + block->keeps[i] = eliminate_output_mov(block->keeps[i]); + } } } diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_depth.c index 1b8a446ca..be39027b6 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -159,11 +159,11 @@ ir3_depth(struct ir3 *ir) if (ir->outputs[i]) ir3_instr_depth(ir->outputs[i]); - for (i = 0; i < ir->keeps_count; i++) - ir3_instr_depth(ir->keeps[i]); - - /* We also need to account for if-condition: */ list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + for (i = 0; i < block->keeps_count; i++) + ir3_instr_depth(block->keeps[i]); + + /* We also need to account for if-condition: */ if (block->condition) ir3_instr_depth(block->condition); } diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_group.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_group.c index 633d66c58..2719b6459 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_group.c +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_group.c @@ -254,9 +254,11 @@ find_neighbors(struct ir3 *ir) } } - for (i = 0; i < ir->keeps_count; i++) { - struct ir3_instruction *instr = ir->keeps[i]; - instr_find_neighbors(instr); + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + for (i = 0; i < block->keeps_count; i++) { + struct ir3_instruction *instr = block->keeps[i]; + instr_find_neighbors(instr); + } } } diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_nir.c index 2d86a524c..d30543d76 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_nir.c +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_nir.c @@ -52,6 +52,23 @@ static const nir_shader_compiler_options options = { .lower_extract_word = true, }; +static const nir_shader_compiler_options options_5xx = { + .lower_fpow = true, + .lower_fsat = true, + .lower_scmp = true, + .lower_flrp32 = true, + .lower_flrp64 = true, + .lower_ffract = true, + .lower_fmod32 = true, + .lower_fmod64 = true, + .lower_fdiv = true, + .fuse_ffma = true, + .native_integers = true, + .vertex_id_zero_based = false, + .lower_extract_byte = true, + .lower_extract_word = true, +}; + struct nir_shader * ir3_tgsi_to_nir(const struct tgsi_token *tokens) { @@ -59,8 +76,10 @@ ir3_tgsi_to_nir(const struct tgsi_token *tokens) } const nir_shader_compiler_options * -ir3_get_compiler_options(void) +ir3_get_compiler_options(struct ir3_compiler *compiler) { + if (compiler->gpu_id >= 500) + return &options_5xx; return &options; } @@ -90,6 +109,7 @@ ir3_optimize_loop(nir_shader *s) progress = false; OPT_V(s, nir_lower_vars_to_ssa); + progress |= OPT(s, nir_opt_copy_prop_vars); progress |= OPT(s, nir_lower_alu_to_scalar); progress |= OPT(s, nir_lower_phis_to_scalar); @@ -114,7 +134,6 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, if (key) { switch (shader->type) { case SHADER_FRAGMENT: - case SHADER_COMPUTE: tex_options.saturate_s = key->fsaturate_s; tex_options.saturate_t = key->fsaturate_t; tex_options.saturate_r = key->fsaturate_r; @@ -124,6 +143,9 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, tex_options.saturate_t = key->vsaturate_t; tex_options.saturate_r = key->vsaturate_r; break; + default: + /* TODO */ + break; } } @@ -142,7 +164,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, } OPT_V(s, nir_opt_global_to_local); - OPT_V(s, nir_convert_to_ssa); + OPT_V(s, nir_lower_regs_to_ssa); if (key) { if (s->stage == MESA_SHADER_VERTEX) { diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.c index 3e9bbcc0a..5ab52e1e1 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.c +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.c @@ -2,6 +2,7 @@ #include "nir.h" #include "nir_search.h" +#include "nir_search_helpers.h" #ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS #define NIR_OPT_ALGEBRAIC_STRUCT_DEFS @@ -15,7 +16,6 @@ struct transform { #endif -#include "compiler/nir/nir_search_helpers.h" static const nir_search_variable search1_0 = { { nir_search_value_variable, 0 }, 0, /* x */ @@ -23,27 +23,24 @@ static const nir_search_variable search1_0 = { nir_type_invalid, NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_expression search1 = { { nir_search_value_expression, 0 }, false, nir_op_fcos, { &search1_0.value }, + NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_constant replace1_0_0_0 = { { nir_search_value_constant, 0 }, nir_type_float, { 0x401921fb3fa6defc /* 6.283185 */ }, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_constant replace1_0_0_1_0_0_0 = { { nir_search_value_constant, 0 }, nir_type_float, { 0x3fc45f30e7ff583a /* 0.159155 */ }, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_variable replace1_0_0_1_0_0_1 = { { nir_search_value_variable, 0 }, 0, /* x */ @@ -51,66 +48,63 @@ static const nir_search_variable replace1_0_0_1_0_0_1 = { nir_type_invalid, NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_expression replace1_0_0_1_0_0 = { { nir_search_value_expression, 0 }, false, nir_op_fmul, { &replace1_0_0_1_0_0_0.value, &replace1_0_0_1_0_0_1.value }, + NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_constant replace1_0_0_1_0_1 = { { nir_search_value_constant, 0 }, nir_type_float, { 0x3fe0000000000000 /* 0.5 */ }, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_expression replace1_0_0_1_0 = { { nir_search_value_expression, 0 }, false, nir_op_fadd, { &replace1_0_0_1_0_0.value, &replace1_0_0_1_0_1.value }, + NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_expression replace1_0_0_1 = { { nir_search_value_expression, 0 }, false, nir_op_ffract, { &replace1_0_0_1_0.value }, + NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_expression replace1_0_0 = { { nir_search_value_expression, 0 }, false, nir_op_fmul, { &replace1_0_0_0.value, &replace1_0_0_1.value }, + NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_constant replace1_0_1 = { { nir_search_value_constant, 0 }, nir_type_float, { 0x400921fb82c2bd7f /* 3.141593 */ }, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_expression replace1_0 = { { nir_search_value_expression, 0 }, false, nir_op_fsub, { &replace1_0_0.value, &replace1_0_1.value }, + NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_expression replace1 = { { nir_search_value_expression, 0 }, false, nir_op_fcos, { &replace1_0.value }, + NULL, }; static const struct transform ir3_nir_apply_trig_workarounds_fcos_xforms[] = { { &search1, &replace1.value, 0 }, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_variable search0_0 = { { nir_search_value_variable, 0 }, 0, /* x */ @@ -118,27 +112,24 @@ static const nir_search_variable search0_0 = { nir_type_invalid, NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_expression search0 = { { nir_search_value_expression, 0 }, false, nir_op_fsin, { &search0_0.value }, + NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_constant replace0_0_0_0 = { { nir_search_value_constant, 0 }, nir_type_float, { 0x401921fb3fa6defc /* 6.283185 */ }, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_constant replace0_0_0_1_0_0_0 = { { nir_search_value_constant, 0 }, nir_type_float, { 0x3fc45f30e7ff583a /* 0.159155 */ }, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_variable replace0_0_0_1_0_0_1 = { { nir_search_value_variable, 0 }, 0, /* x */ @@ -146,59 +137,57 @@ static const nir_search_variable replace0_0_0_1_0_0_1 = { nir_type_invalid, NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_expression replace0_0_0_1_0_0 = { { nir_search_value_expression, 0 }, false, nir_op_fmul, { &replace0_0_0_1_0_0_0.value, &replace0_0_0_1_0_0_1.value }, + NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_constant replace0_0_0_1_0_1 = { { nir_search_value_constant, 0 }, nir_type_float, { 0x3fe0000000000000 /* 0.5 */ }, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_expression replace0_0_0_1_0 = { { nir_search_value_expression, 0 }, false, nir_op_fadd, { &replace0_0_0_1_0_0.value, &replace0_0_0_1_0_1.value }, + NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_expression replace0_0_0_1 = { { nir_search_value_expression, 0 }, false, nir_op_ffract, { &replace0_0_0_1_0.value }, + NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_expression replace0_0_0 = { { nir_search_value_expression, 0 }, false, nir_op_fmul, { &replace0_0_0_0.value, &replace0_0_0_1.value }, + NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_constant replace0_0_1 = { { nir_search_value_constant, 0 }, nir_type_float, { 0x400921fb82c2bd7f /* 3.141593 */ }, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_expression replace0_0 = { { nir_search_value_expression, 0 }, false, nir_op_fsub, { &replace0_0_0.value, &replace0_0_1.value }, + NULL, }; -#include "compiler/nir/nir_search_helpers.h" static const nir_search_expression replace0 = { { nir_search_value_expression, 0 }, false, nir_op_fsin, { &replace0_0.value }, + NULL, }; static const struct transform ir3_nir_apply_trig_workarounds_fsin_xforms[] = { diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.py b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.py index f49bccee0..f358f4d6b 100755 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.py +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.py @@ -1,4 +1,3 @@ -#! /usr/bin/env python # # Copyright (C) 2016 Intel Corporation # diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 76460d923..a176f16e7 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -223,7 +223,6 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key, */ switch (shader->type) { case SHADER_FRAGMENT: - case SHADER_COMPUTE: key.binning_pass = false; if (key.has_per_samp) { key.vsaturate_s = 0; @@ -243,6 +242,9 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key, key.fastc_srgb = 0; } break; + default: + /* TODO */ + break; } for (v = shader->variants; v; v = v->next) @@ -289,6 +291,7 @@ ir3_shader_create(struct ir3_compiler *compiler, /* we take ownership of the reference: */ nir = cso->ir.nir; } else { + debug_assert(cso->type == PIPE_SHADER_IR_TGSI); if (fd_mesa_debug & FD_DBG_DISASM) { DBG("dump tgsi: type=%d", shader->type); tgsi_dump(cso->tokens, 0); @@ -315,6 +318,43 @@ ir3_shader_create(struct ir3_compiler *compiler, return shader; } +/* a bit annoying that compute-shader and normal shader state objects + * aren't a bit more aligned. + */ +struct ir3_shader * +ir3_shader_create_compute(struct ir3_compiler *compiler, + const struct pipe_compute_state *cso, + struct pipe_debug_callback *debug) +{ + struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader); + + shader->compiler = compiler; + shader->id = ++shader->compiler->shader_count; + shader->type = SHADER_COMPUTE; + + nir_shader *nir; + if (cso->ir_type == PIPE_SHADER_IR_NIR) { + /* we take ownership of the reference: */ + nir = (nir_shader *)cso->prog; + } else { + debug_assert(cso->ir_type == PIPE_SHADER_IR_TGSI); + if (fd_mesa_debug & FD_DBG_DISASM) { + DBG("dump tgsi: type=%d", shader->type); + tgsi_dump(cso->prog, 0); + } + nir = ir3_tgsi_to_nir(cso->prog); + } + + /* do first pass optimization, ignoring the key: */ + shader->nir = ir3_optimize_nir(shader, nir, NULL); + if (fd_mesa_debug & FD_DBG_DISASM) { + DBG("dump nir%d: type=%d", shader->id, shader->type); + nir_print_shader(shader->nir, stdout); + } + + return shader; +} + static void dump_reg(const char *name, uint32_t r) { if (r != regid(63,0)) @@ -366,7 +406,7 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin) } for (i = 0; i < so->immediates_count; i++) { - debug_printf("@const(c%d.x)\t", so->first_immediate + i); + debug_printf("@const(c%d.x)\t", so->constbase.immediate + i); debug_printf("0x%08x, 0x%08x, 0x%08x, 0x%08x\n", so->immediates[i].val[0], so->immediates[i].val[1], @@ -418,7 +458,8 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin) } debug_printf("\n"); break; - case SHADER_COMPUTE: + default: + /* TODO */ break; } @@ -462,7 +503,8 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin) if (so->frag_face) debug_printf("; fragface: hr0.x\n"); break; - case SHADER_COMPUTE: + default: + /* TODO */ break; } @@ -503,7 +545,7 @@ emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v, * the user consts early to avoid HLSQ lockup caused by * writing too many consts */ - uint32_t max_const = MIN2(v->first_driver_param, v->constlen); + uint32_t max_const = MIN2(v->num_uniforms, v->constlen); // I expect that size should be a multiple of vec4's: assert(size == align(size, 4)); @@ -527,9 +569,9 @@ static void emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) { - uint32_t offset = v->first_driver_param + IR3_UBOS_OFF; + uint32_t offset = v->constbase.ubo; if (v->constlen > offset) { - uint32_t params = MIN2(4, v->constlen - offset) * 4; + uint32_t params = v->num_ubos; uint32_t offsets[params]; struct pipe_resource *prscs[params]; @@ -557,7 +599,7 @@ emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { int size = v->immediates_count; - uint32_t base = v->first_immediate; + uint32_t base = v->constbase.immediate; /* truncate size to avoid writing constants that shader * does not use: @@ -581,7 +623,7 @@ emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { /* streamout addresses after driver-params: */ - uint32_t offset = v->first_driver_param + IR3_TFBOS_OFF; + uint32_t offset = v->constbase.tfbo; if (v->constlen > offset) { struct fd_streamout_stateobj *so = &ctx->streamout; struct pipe_stream_output_info *info = &v->shader->stream_output; @@ -614,6 +656,8 @@ max_tf_vtx(struct fd_context *ctx, const struct ir3_shader_variant *v) struct pipe_stream_output_info *info = &v->shader->stream_output; uint32_t maxvtxcnt = 0x7fffffff; + if (ctx->screen->gpu_id >= 500) + return 0; if (v->key.binning_pass) return 0; if (v->shader->stream_output.num_outputs == 0) @@ -653,23 +697,19 @@ max_tf_vtx(struct fd_context *ctx, const struct ir3_shader_variant *v) } void -ir3_emit_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, - struct fd_context *ctx, const struct pipe_draw_info *info, uint32_t dirty) +ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx, const struct pipe_draw_info *info) { - if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) { + enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_VERTEX]; + + debug_assert(v->type == SHADER_VERTEX); + + if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) { struct fd_constbuf_stateobj *constbuf; bool shader_dirty; - if (v->type == SHADER_VERTEX) { - constbuf = &ctx->constbuf[PIPE_SHADER_VERTEX]; - shader_dirty = !!(dirty & FD_SHADER_DIRTY_VP); - } else if (v->type == SHADER_FRAGMENT) { - constbuf = &ctx->constbuf[PIPE_SHADER_FRAGMENT]; - shader_dirty = !!(dirty & FD_SHADER_DIRTY_FP); - } else { - unreachable("bad shader type"); - return; - } + constbuf = &ctx->constbuf[PIPE_SHADER_VERTEX]; + shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG); emit_user_consts(ctx, v, ring, constbuf); emit_ubos(ctx, v, ring, constbuf); @@ -679,11 +719,11 @@ ir3_emit_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, /* emit driver params every time: */ /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */ - if (info && (v->type == SHADER_VERTEX)) { - uint32_t offset = v->first_driver_param + IR3_DRIVER_PARAM_OFF; - if (v->constlen >= offset) { - uint32_t vertex_params[IR3_DP_COUNT] = { - [IR3_DP_VTXID_BASE] = info->indexed ? + if (info) { + uint32_t offset = v->constbase.driver_param; + if (v->constlen > offset) { + uint32_t vertex_params[IR3_DP_VS_COUNT] = { + [IR3_DP_VTXID_BASE] = info->index_size ? info->index_bias : info->start, [IR3_DP_VTXCNT_MAX] = max_tf_vtx(ctx, v), }; @@ -715,3 +755,61 @@ ir3_emit_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, } } } + +void +ir3_emit_fs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx) +{ + enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_FRAGMENT]; + + debug_assert(v->type == SHADER_FRAGMENT); + + if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) { + struct fd_constbuf_stateobj *constbuf; + bool shader_dirty; + + constbuf = &ctx->constbuf[PIPE_SHADER_FRAGMENT]; + shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG); + + emit_user_consts(ctx, v, ring, constbuf); + emit_ubos(ctx, v, ring, constbuf); + if (shader_dirty) + emit_immediates(ctx, v, ring); + } +} + +/* emit compute-shader consts: */ +void +ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx, const struct pipe_grid_info *info) +{ + enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE]; + + if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) { + struct fd_constbuf_stateobj *constbuf; + bool shader_dirty; + + constbuf = &ctx->constbuf[PIPE_SHADER_COMPUTE]; + shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG); + + emit_user_consts(ctx, v, ring, constbuf); + emit_ubos(ctx, v, ring, constbuf); + if (shader_dirty) + emit_immediates(ctx, v, ring); + } + + /* emit compute-shader driver-params: */ + uint32_t offset = v->constbase.driver_param; + if (v->constlen > offset) { + uint32_t compute_params[IR3_DP_CS_COUNT] = { + [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0], + [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1], + [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2], + /* do we need work-group-size? */ + }; + + fd_wfi(ctx->batch, ring); + ctx->emit_const(ring, SHADER_COMPUTE, offset * 4, 0, + ARRAY_SIZE(compute_params), compute_params, NULL); + } +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 8c9483e1b..6c2af6d36 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -31,37 +31,29 @@ #include "pipe/p_state.h" #include "compiler/shader_enums.h" +#include "util/bitscan.h" #include "ir3.h" #include "disasm.h" /* driver param indices: */ enum ir3_driver_param { + /* compute shader driver params: */ + IR3_DP_NUM_WORK_GROUPS_X = 0, + IR3_DP_NUM_WORK_GROUPS_Y = 1, + IR3_DP_NUM_WORK_GROUPS_Z = 2, + IR3_DP_CS_COUNT = 4, /* must be aligned to vec4 */ + + /* vertex shader driver params: */ IR3_DP_VTXID_BASE = 0, IR3_DP_VTXCNT_MAX = 1, /* user-clip-plane components, up to 8x vec4's: */ IR3_DP_UCP0_X = 4, /* .... */ IR3_DP_UCP7_W = 35, - IR3_DP_COUNT = 36 /* must be aligned to vec4 */ + IR3_DP_VS_COUNT = 36 /* must be aligned to vec4 */ }; -/* Layout of constant registers: - * - * num_uniform * vec4 - user consts - * 4 * vec4 - UBO addresses - * if (vertex shader) { - * N * vec4 - driver params (IR3_DP_*) - * 1 * vec4 - stream-out addresses - * } - * - * TODO this could be made more dynamic, to at least skip sections - * that we don't need.. - */ -#define IR3_UBOS_OFF 0 /* UBOs after user consts */ -#define IR3_DRIVER_PARAM_OFF 4 /* driver params after UBOs */ -#define IR3_TFBOS_OFF (IR3_DRIVER_PARAM_OFF + IR3_DP_COUNT/4) - /* Configuration key used to identify a shader variant.. different * shader variants can be used to implement features not supported * in hw (two sided color), binning-pass vertex shader, etc. @@ -120,6 +112,57 @@ ir3_shader_key_equal(struct ir3_shader_key *a, struct ir3_shader_key *b) return a->global == b->global; } +/* will the two keys produce different lowering for a fragment shader? */ +static inline bool +ir3_shader_key_changes_fs(struct ir3_shader_key *key, struct ir3_shader_key *last_key) +{ + if (last_key->has_per_samp || key->has_per_samp) { + if ((last_key->fsaturate_s != key->fsaturate_s) || + (last_key->fsaturate_t != key->fsaturate_t) || + (last_key->fsaturate_r != key->fsaturate_r) || + (last_key->fastc_srgb != key->fastc_srgb)) + return true; + } + + if (last_key->fclamp_color != key->fclamp_color) + return true; + + if (last_key->color_two_side != key->color_two_side) + return true; + + if (last_key->half_precision != key->half_precision) + return true; + + if (last_key->rasterflat != key->rasterflat) + return true; + + if (last_key->ucp_enables != key->ucp_enables) + return true; + + return false; +} + +/* will the two keys produce different lowering for a vertex shader? */ +static inline bool +ir3_shader_key_changes_vs(struct ir3_shader_key *key, struct ir3_shader_key *last_key) +{ + if (last_key->has_per_samp || key->has_per_samp) { + if ((last_key->vsaturate_s != key->vsaturate_s) || + (last_key->vsaturate_t != key->vsaturate_t) || + (last_key->vsaturate_r != key->vsaturate_r) || + (last_key->vastc_srgb != key->vastc_srgb)) + return true; + } + + if (last_key->vclamp_color != key->vclamp_color) + return true; + + if (last_key->ucp_enables != key->ucp_enables) + return true; + + return false; +} + struct ir3_shader_variant { struct fd_bo *bo; @@ -142,6 +185,12 @@ struct ir3_shader_variant { */ unsigned constlen; + /* number of uniforms (in vec4), not including built-in compiler + * constants, etc. + */ + unsigned num_uniforms; + unsigned num_ubos; + /* About Linkage: * + Let the frag shader determine the position/compmask for the * varyings, since it is the place where we know if the varying @@ -180,16 +229,10 @@ struct ir3_shader_variant { uint8_t regid; uint8_t compmask; uint8_t ncomp; - /* In theory inloc of fs should match outloc of vs. Or - * rather the outloc of the vs is 8 plus the offset passed - * to bary.f. Presumably that +8 is to account for - * gl_Position/gl_PointSize? - * - * NOTE inloc is currently aligned to 4 (we don't try - * to pack varyings). Changing this would likely break - * assumptions in few places (like setting up of flat - * shading in fd3_program) so be sure to check all the - * spots where inloc is used. + /* location of input (ie. offset passed to bary.f, etc). This + * matches the SP_VS_VPC_DST_REG.OUTLOCn value (a3xx and a4xx + * have the OUTLOCn value offset by 8, presumably to account + * for gl_Position/gl_PointSize) */ uint8_t inloc; /* vertex shader specific: */ @@ -213,15 +256,24 @@ struct ir3_shader_variant { /* do we have one or more texture sample instructions: */ bool has_samp; + /* do we have one or more SSBO instructions: */ + bool has_ssbo; + /* do we have kill instructions: */ bool has_kill; - /* const reg # of first immediate, ie. 1 == c1 - * (not regid, because TGSI thinks in terms of vec4 registers, - * not scalar registers) + /* Layout of constant registers, each section (in vec4). Pointer size + * is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the + * UBO and stream-out consts. */ - unsigned first_driver_param; - unsigned first_immediate; + struct { + /* user const start at zero */ + unsigned ubo; + unsigned driver_param; + unsigned tfbo; + unsigned immediate; + } constbase; + unsigned immediates_count; struct { uint32_t val[4]; @@ -268,6 +320,10 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id); struct ir3_shader * ir3_shader_create(struct ir3_compiler *compiler, const struct pipe_shader_state *cso, enum shader_t type, struct pipe_debug_callback *debug); +struct ir3_shader * +ir3_shader_create_compute(struct ir3_compiler *compiler, + const struct pipe_compute_state *cso, + struct pipe_debug_callback *debug); void ir3_shader_destroy(struct ir3_shader *shader); struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key, struct pipe_debug_callback *debug); @@ -276,8 +332,12 @@ uint64_t ir3_shader_outputs(const struct ir3_shader *so); struct fd_ringbuffer; struct fd_context; -void ir3_emit_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, - struct fd_context *ctx, const struct pipe_draw_info *info, uint32_t dirty); +void ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx, const struct pipe_draw_info *info); +void ir3_emit_fs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx); +void ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx, const struct pipe_grid_info *info); static inline const char * ir3_shader_stage(struct ir3_shader *shader) @@ -344,6 +404,52 @@ ir3_next_varying(const struct ir3_shader_variant *so, int i) return i; } +struct ir3_shader_linkage { + uint8_t max_loc; + uint8_t cnt; + struct { + uint8_t regid; + uint8_t compmask; + uint8_t loc; + } var[32]; +}; + +static inline void +ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid, uint8_t compmask, uint8_t loc) +{ + int i = l->cnt++; + + debug_assert(i < ARRAY_SIZE(l->var)); + + l->var[i].regid = regid; + l->var[i].compmask = compmask; + l->var[i].loc = loc; + l->max_loc = MAX2(l->max_loc, loc + util_last_bit(compmask)); +} + +static inline void +ir3_link_shaders(struct ir3_shader_linkage *l, + const struct ir3_shader_variant *vs, + const struct ir3_shader_variant *fs) +{ + int j = -1, k; + + while (l->cnt < ARRAY_SIZE(l->var)) { + j = ir3_next_varying(fs, j); + + if (j >= fs->inputs_count) + break; + + if (fs->inputs[j].inloc >= fs->total_in) + continue; + + k = ir3_find_output(vs, fs->inputs[j].slot); + + ir3_link_add(l, vs->outputs[k].regid, + fs->inputs[j].compmask, fs->inputs[j].inloc); + } +} + static inline uint32_t ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot) { @@ -354,4 +460,14 @@ ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot) return regid(63, 0); } +static inline uint32_t +ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot) +{ + int j; + for (j = 0; j < so->inputs_count; j++) + if (so->inputs[j].sysval && (so->inputs[j].slot == slot)) + return so->inputs[j].regid; + return regid(63, 0); +} + #endif /* IR3_SHADER_H_ */ |