summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/radeon
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2019-01-29 11:52:33 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2019-01-29 11:52:33 +0000
commit37bbf6a1792773f11c15a4da1588a7520ee2fb4e (patch)
tree64944d4aa665a1e479cfc004e446593062254550 /lib/mesa/src/gallium/drivers/radeon
parent6b139c2063623e9310025247cd966490b9aa57ea (diff)
Merge Mesa 18.3.2
Diffstat (limited to 'lib/mesa/src/gallium/drivers/radeon')
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/Makefile.am28
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/Makefile.in870
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/Makefile.sources23
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c681
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_cs.h133
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_gpu_load.c283
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_perfcounter.c292
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c1498
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.h913
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_query.c2101
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_query.h328
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_test_dma.c398
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_texture.c2933
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c197
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h6
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_vce.c57
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c13
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c8
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c17
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.c446
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.h272
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_video.c182
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_video.h24
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h244
24 files changed, 1000 insertions, 10947 deletions
diff --git a/lib/mesa/src/gallium/drivers/radeon/Makefile.am b/lib/mesa/src/gallium/drivers/radeon/Makefile.am
deleted file mode 100644
index 7f64b7615..000000000
--- a/lib/mesa/src/gallium/drivers/radeon/Makefile.am
+++ /dev/null
@@ -1,28 +0,0 @@
-include Makefile.sources
-include $(top_srcdir)/src/gallium/Automake.inc
-
-
-AM_CFLAGS = \
- $(GALLIUM_DRIVER_CFLAGS) \
- $(RADEON_CFLAGS) \
- -Wstrict-overflow=0
-# ^^ disable warnings about overflows (os_time_timeout)
-
-noinst_LTLIBRARIES = libradeon.la
-
-libradeon_la_SOURCES = \
- $(C_SOURCES)
-
-if HAVE_GALLIUM_LLVM
-
-AM_CFLAGS += \
- $(LLVM_CFLAGS)
-
-libradeon_la_LIBADD = \
- $(CLOCK_LIB) \
- $(LLVM_LIBS)
-
-libradeon_la_LDFLAGS = \
- $(LLVM_LDFLAGS)
-
-endif
diff --git a/lib/mesa/src/gallium/drivers/radeon/Makefile.in b/lib/mesa/src/gallium/drivers/radeon/Makefile.in
deleted file mode 100644
index fa9d3a7b7..000000000
--- a/lib/mesa/src/gallium/drivers/radeon/Makefile.in
+++ /dev/null
@@ -1,870 +0,0 @@
-# Makefile.in generated by automake 1.12.6 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994-2012 Free Software Foundation, Inc.
-
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-@SET_MAKE@
-
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-target_triplet = @target@
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
- $(srcdir)/Makefile.sources $(top_srcdir)/bin/depcomp \
- $(top_srcdir)/src/gallium/Automake.inc
-@HAVE_LIBDRM_TRUE@am__append_1 = \
-@HAVE_LIBDRM_TRUE@ $(LIBDRM_LIBS)
-
-@HAVE_DRISW_TRUE@am__append_2 = \
-@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
-
-@HAVE_DRISW_KMS_TRUE@am__append_3 = \
-@HAVE_DRISW_KMS_TRUE@ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \
-@HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS)
-
-@HAVE_GALLIUM_LLVM_TRUE@am__append_4 = \
-@HAVE_GALLIUM_LLVM_TRUE@ $(LLVM_CFLAGS)
-
-subdir = src/gallium/drivers/radeon
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
- $(top_srcdir)/m4/ax_check_gnu_make.m4 \
- $(top_srcdir)/m4/ax_check_python_mako_module.m4 \
- $(top_srcdir)/m4/ax_gcc_builtin.m4 \
- $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
- $(top_srcdir)/m4/ax_prog_bison.m4 \
- $(top_srcdir)/m4/ax_prog_flex.m4 \
- $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \
- $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
- $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
- $(top_srcdir)/VERSION $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-LTLIBRARIES = $(noinst_LTLIBRARIES)
-am__DEPENDENCIES_1 =
-@HAVE_GALLIUM_LLVM_TRUE@libradeon_la_DEPENDENCIES = \
-@HAVE_GALLIUM_LLVM_TRUE@ $(am__DEPENDENCIES_1) \
-@HAVE_GALLIUM_LLVM_TRUE@ $(am__DEPENDENCIES_1)
-am__objects_1 = r600_buffer_common.lo r600_gpu_load.lo \
- r600_perfcounter.lo r600_pipe_common.lo r600_query.lo \
- r600_test_dma.lo r600_texture.lo radeon_uvd.lo \
- radeon_vcn_dec.lo radeon_vce_40_2_2.lo radeon_vce_50.lo \
- radeon_vce_52.lo radeon_vce.lo radeon_video.lo
-am_libradeon_la_OBJECTS = $(am__objects_1)
-libradeon_la_OBJECTS = $(am_libradeon_la_OBJECTS)
-AM_V_lt = $(am__v_lt_@AM_V@)
-am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
-am__v_lt_0 = --silent
-am__v_lt_1 =
-libradeon_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
- $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
- $(libradeon_la_LDFLAGS) $(LDFLAGS) -o $@
-AM_V_P = $(am__v_P_@AM_V@)
-am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
-am__v_P_0 = false
-am__v_P_1 = :
-AM_V_GEN = $(am__v_GEN_@AM_V@)
-am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
-am__v_GEN_0 = @echo " GEN " $@;
-am__v_GEN_1 =
-AM_V_at = $(am__v_at_@AM_V@)
-am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
-am__v_at_0 = @
-am__v_at_1 =
-DEFAULT_INCLUDES = -I.@am__isrc@
-depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
-am__depfiles_maybe = depfiles
-am__mv = mv -f
-COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
- $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
- $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
- $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
- $(AM_CFLAGS) $(CFLAGS)
-AM_V_CC = $(am__v_CC_@AM_V@)
-am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
-am__v_CC_0 = @echo " CC " $@;
-am__v_CC_1 =
-CCLD = $(CC)
-LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
- $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
- $(AM_LDFLAGS) $(LDFLAGS) -o $@
-AM_V_CCLD = $(am__v_CCLD_@AM_V@)
-am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
-am__v_CCLD_0 = @echo " CCLD " $@;
-am__v_CCLD_1 =
-SOURCES = $(libradeon_la_SOURCES)
-DIST_SOURCES = $(libradeon_la_SOURCES)
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-ETAGS = etags
-CTAGS = ctags
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMDGPU_CFLAGS = @AMDGPU_CFLAGS@
-AMDGPU_LIBS = @AMDGPU_LIBS@
-AMTAR = @AMTAR@
-AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
-ANDROID_CFLAGS = @ANDROID_CFLAGS@
-ANDROID_LIBS = @ANDROID_LIBS@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BSYMBOLIC = @BSYMBOLIC@
-CC = @CC@
-CCAS = @CCAS@
-CCASDEPMODE = @CCASDEPMODE@
-CCASFLAGS = @CCASFLAGS@
-CCDEPMODE = @CCDEPMODE@
-CFLAGS = @CFLAGS@
-CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
-CLOCK_LIB = @CLOCK_LIB@
-CLOVER_STD_OVERRIDE = @CLOVER_STD_OVERRIDE@
-CPP = @CPP@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXDEPMODE = @CXXDEPMODE@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@
-DEFINES = @DEFINES@
-DEFS = @DEFS@
-DEPDIR = @DEPDIR@
-DLLTOOL = @DLLTOOL@
-DLOPEN_LIBS = @DLOPEN_LIBS@
-DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
-DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
-DRIGL_CFLAGS = @DRIGL_CFLAGS@
-DRIGL_LIBS = @DRIGL_LIBS@
-DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
-DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
-DRI_LIB_DEPS = @DRI_LIB_DEPS@
-DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGL_CFLAGS = @EGL_CFLAGS@
-EGL_LIB_DEPS = @EGL_LIB_DEPS@
-EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
-EGREP = @EGREP@
-ETNAVIV_CFLAGS = @ETNAVIV_CFLAGS@
-ETNAVIV_LIBS = @ETNAVIV_LIBS@
-EXEEXT = @EXEEXT@
-EXPAT_CFLAGS = @EXPAT_CFLAGS@
-EXPAT_LIBS = @EXPAT_LIBS@
-FGREP = @FGREP@
-FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
-FREEDRENO_LIBS = @FREEDRENO_LIBS@
-GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
-GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
-GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
-GC_SECTIONS = @GC_SECTIONS@
-GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
-GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
-GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
-GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
-GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
-GLPROTO_LIBS = @GLPROTO_LIBS@
-GLVND_CFLAGS = @GLVND_CFLAGS@
-GLVND_LIBS = @GLVND_LIBS@
-GLX_TLS = @GLX_TLS@
-GL_LIB = @GL_LIB@
-GL_LIB_DEPS = @GL_LIB_DEPS@
-GL_PC_CFLAGS = @GL_PC_CFLAGS@
-GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
-GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
-GREP = @GREP@
-HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
-I915_CFLAGS = @I915_CFLAGS@
-I915_LIBS = @I915_LIBS@
-INDENT = @INDENT@
-INDENT_FLAGS = @INDENT_FLAGS@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LD_NO_UNDEFINED = @LD_NO_UNDEFINED@
-LEX = @LEX@
-LEXLIB = @LEXLIB@
-LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
-LIBATOMIC_LIBS = @LIBATOMIC_LIBS@
-LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
-LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
-LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
-LIBDRM_LIBS = @LIBDRM_LIBS@
-LIBELF_CFLAGS = @LIBELF_CFLAGS@
-LIBELF_LIBS = @LIBELF_LIBS@
-LIBGLVND_DATADIR = @LIBGLVND_DATADIR@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBSENSORS_LIBS = @LIBSENSORS_LIBS@
-LIBTOOL = @LIBTOOL@
-LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@
-LIBUNWIND_LIBS = @LIBUNWIND_LIBS@
-LIB_DIR = @LIB_DIR@
-LIB_EXT = @LIB_EXT@
-LIPO = @LIPO@
-LLVM_CFLAGS = @LLVM_CFLAGS@
-LLVM_CONFIG = @LLVM_CONFIG@
-LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
-LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
-LLVM_LDFLAGS = @LLVM_LDFLAGS@
-LLVM_LIBS = @LLVM_LIBS@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@
-MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@
-NINE_MAJOR = @NINE_MAJOR@
-NINE_MINOR = @NINE_MINOR@
-NINE_TINY = @NINE_TINY@
-NINE_VERSION = @NINE_VERSION@
-NM = @NM@
-NMEDIT = @NMEDIT@
-NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
-NOUVEAU_LIBS = @NOUVEAU_LIBS@
-NVVIEUX_CFLAGS = @NVVIEUX_CFLAGS@
-NVVIEUX_LIBS = @NVVIEUX_LIBS@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
-OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
-OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
-OPENCL_LIBNAME = @OPENCL_LIBNAME@
-OPENCL_VERSION = @OPENCL_VERSION@
-OSMESA_LIB = @OSMESA_LIB@
-OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
-OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
-OSMESA_PC_REQ = @OSMESA_PC_REQ@
-OSMESA_VERSION = @OSMESA_VERSION@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PKG_CONFIG = @PKG_CONFIG@
-PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
-PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
-POSIX_SHELL = @POSIX_SHELL@
-PTHREADSTUBS_CFLAGS = @PTHREADSTUBS_CFLAGS@
-PTHREADSTUBS_LIBS = @PTHREADSTUBS_LIBS@
-PTHREAD_CC = @PTHREAD_CC@
-PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
-PTHREAD_LIBS = @PTHREAD_LIBS@
-PWR8_CFLAGS = @PWR8_CFLAGS@
-PYTHON2 = @PYTHON2@
-RADEON_CFLAGS = @RADEON_CFLAGS@
-RADEON_LIBS = @RADEON_LIBS@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SELINUX_CFLAGS = @SELINUX_CFLAGS@
-SELINUX_LIBS = @SELINUX_LIBS@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-SIMPENROSE_CFLAGS = @SIMPENROSE_CFLAGS@
-SIMPENROSE_LIBS = @SIMPENROSE_LIBS@
-SSE41_CFLAGS = @SSE41_CFLAGS@
-STRIP = @STRIP@
-SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@
-SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@
-SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@
-SWR_KNL_CXXFLAGS = @SWR_KNL_CXXFLAGS@
-SWR_SKX_CXXFLAGS = @SWR_SKX_CXXFLAGS@
-VALGRIND_CFLAGS = @VALGRIND_CFLAGS@
-VALGRIND_LIBS = @VALGRIND_LIBS@
-VA_CFLAGS = @VA_CFLAGS@
-VA_LIBS = @VA_LIBS@
-VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
-VA_MAJOR = @VA_MAJOR@
-VA_MINOR = @VA_MINOR@
-VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
-VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
-VDPAU_CFLAGS = @VDPAU_CFLAGS@
-VDPAU_LIBS = @VDPAU_LIBS@
-VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
-VDPAU_MAJOR = @VDPAU_MAJOR@
-VDPAU_MINOR = @VDPAU_MINOR@
-VERSION = @VERSION@
-VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
-VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
-VL_CFLAGS = @VL_CFLAGS@
-VL_LIBS = @VL_LIBS@
-VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
-WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
-WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
-WAYLAND_SCANNER = @WAYLAND_SCANNER@
-WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
-WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
-WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
-WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
-WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
-X11_INCLUDES = @X11_INCLUDES@
-XA_MAJOR = @XA_MAJOR@
-XA_MINOR = @XA_MINOR@
-XA_TINY = @XA_TINY@
-XA_VERSION = @XA_VERSION@
-XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
-XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
-XCB_DRI3_CFLAGS = @XCB_DRI3_CFLAGS@
-XCB_DRI3_LIBS = @XCB_DRI3_LIBS@
-XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
-XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
-XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
-XLIBGL_LIBS = @XLIBGL_LIBS@
-XVMC_CFLAGS = @XVMC_CFLAGS@
-XVMC_LIBS = @XVMC_LIBS@
-XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
-XVMC_MAJOR = @XVMC_MAJOR@
-XVMC_MINOR = @XVMC_MINOR@
-YACC = @YACC@
-YFLAGS = @YFLAGS@
-ZLIB_CFLAGS = @ZLIB_CFLAGS@
-ZLIB_LIBS = @ZLIB_LIBS@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-acv_mako_found = @acv_mako_found@
-am__include = @am__include@
-am__leading_dot = @am__leading_dot@
-am__quote = @am__quote@
-am__tar = @am__tar@
-am__untar = @am__untar@
-ax_pthread_config = @ax_pthread_config@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-ifGNUmake = @ifGNUmake@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target = @target@
-target_alias = @target_alias@
-target_cpu = @target_cpu@
-target_os = @target_os@
-target_vendor = @target_vendor@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-C_SOURCES := \
- r600_buffer_common.c \
- r600_cs.h \
- r600_gpu_load.c \
- r600_perfcounter.c \
- r600_pipe_common.c \
- r600_pipe_common.h \
- r600_query.c \
- r600_query.h \
- r600_test_dma.c \
- r600_texture.c \
- radeon_uvd.c \
- radeon_uvd.h \
- radeon_vcn_dec.c \
- radeon_vcn_dec.h \
- radeon_vce_40_2_2.c \
- radeon_vce_50.c \
- radeon_vce_52.c \
- radeon_vce.c \
- radeon_vce.h \
- radeon_video.c \
- radeon_video.h \
- radeon_winsys.h
-
-GALLIUM_CFLAGS = \
- -I$(top_srcdir)/include \
- -I$(top_srcdir)/src \
- -I$(top_srcdir)/src/gallium/include \
- -I$(top_srcdir)/src/gallium/auxiliary \
- $(DEFINES)
-
-
-# src/gallium/auxiliary must appear before src/gallium/drivers
-# because there are stupidly two rbug_context.h files in
-# different directories, and which one is included by the
-# preprocessor is determined by the ordering of the -I flags.
-GALLIUM_DRIVER_CFLAGS = \
- -I$(srcdir)/include \
- -I$(top_srcdir)/src \
- -I$(top_srcdir)/include \
- -I$(top_srcdir)/src/gallium/include \
- -I$(top_srcdir)/src/gallium/auxiliary \
- -I$(top_srcdir)/src/gallium/drivers \
- -I$(top_srcdir)/src/gallium/winsys \
- $(DEFINES) \
- $(VISIBILITY_CFLAGS)
-
-GALLIUM_DRIVER_CXXFLAGS = \
- -I$(srcdir)/include \
- -I$(top_srcdir)/src \
- -I$(top_srcdir)/include \
- -I$(top_srcdir)/src/gallium/include \
- -I$(top_srcdir)/src/gallium/auxiliary \
- -I$(top_srcdir)/src/gallium/drivers \
- -I$(top_srcdir)/src/gallium/winsys \
- $(DEFINES) \
- $(VISIBILITY_CXXFLAGS)
-
-GALLIUM_TARGET_CFLAGS = \
- -I$(top_srcdir)/src \
- -I$(top_srcdir)/include \
- -I$(top_srcdir)/src/loader \
- -I$(top_srcdir)/src/gallium/include \
- -I$(top_srcdir)/src/gallium/auxiliary \
- -I$(top_srcdir)/src/gallium/drivers \
- -I$(top_srcdir)/src/gallium/winsys \
- -I$(top_builddir)/src/util/ \
- -I$(top_builddir)/src/gallium/drivers/ \
- $(DEFINES) \
- $(PTHREAD_CFLAGS) \
- $(LIBDRM_CFLAGS) \
- $(VISIBILITY_CFLAGS)
-
-GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \
- $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1)
-GALLIUM_WINSYS_CFLAGS = \
- -I$(top_srcdir)/src \
- -I$(top_srcdir)/include \
- -I$(top_srcdir)/src/gallium/include \
- -I$(top_srcdir)/src/gallium/auxiliary \
- $(DEFINES) \
- $(VISIBILITY_CFLAGS)
-
-GALLIUM_PIPE_LOADER_WINSYS_LIBS = \
- $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \
- $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
- $(am__append_2) $(am__append_3)
-AM_CFLAGS = $(GALLIUM_DRIVER_CFLAGS) $(RADEON_CFLAGS) \
- -Wstrict-overflow=0 $(am__append_4)
-# ^^ disable warnings about overflows (os_time_timeout)
-noinst_LTLIBRARIES = libradeon.la
-libradeon_la_SOURCES = \
- $(C_SOURCES)
-
-@HAVE_GALLIUM_LLVM_TRUE@libradeon_la_LIBADD = \
-@HAVE_GALLIUM_LLVM_TRUE@ $(CLOCK_LIB) \
-@HAVE_GALLIUM_LLVM_TRUE@ $(LLVM_LIBS)
-
-@HAVE_GALLIUM_LLVM_TRUE@libradeon_la_LDFLAGS = \
-@HAVE_GALLIUM_LLVM_TRUE@ $(LLVM_LDFLAGS)
-
-all: all-am
-
-.SUFFIXES:
-.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/drivers/radeon/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --foreign src/gallium/drivers/radeon/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc:
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-clean-noinstLTLIBRARIES:
- -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
- @list='$(noinst_LTLIBRARIES)'; \
- locs=`for p in $$list; do echo $$p; done | \
- sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
- sort -u`; \
- test -z "$$locs" || { \
- echo rm -f $${locs}; \
- rm -f $${locs}; \
- }
-libradeon.la: $(libradeon_la_OBJECTS) $(libradeon_la_DEPENDENCIES) $(EXTRA_libradeon_la_DEPENDENCIES)
- $(AM_V_CCLD)$(libradeon_la_LINK) $(libradeon_la_OBJECTS) $(libradeon_la_LIBADD) $(LIBS)
-
-mostlyclean-compile:
- -rm -f *.$(OBJEXT)
-
-distclean-compile:
- -rm -f *.tab.c
-
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_buffer_common.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_gpu_load.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_perfcounter.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_pipe_common.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_query.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_test_dma.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_texture.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_uvd.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce_40_2_2.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce_50.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce_52.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vcn_dec.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_video.Plo@am__quote@
-
-.c.o:
-@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
-@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
-@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
-
-.c.obj:
-@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
-@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
-@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
-
-.c.lo:
-@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
-@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
-@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-cscopelist: $(HEADERS) $(SOURCES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP)'; \
- case "$(srcdir)" in \
- [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
- *) sdir=$(subdir)/$(srcdir) ;; \
- esac; \
- for i in $$list; do \
- if test -f "$$i"; then \
- echo "$(subdir)/$$i"; \
- else \
- echo "$$sdir/$$i"; \
- fi; \
- done >> $(top_builddir)/cscope.files
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile $(LTLIBRARIES)
-installdirs:
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
- mostlyclean-am
-
-distclean: distclean-am
- -rm -rf ./$(DEPDIR)
- -rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
- distclean-tags
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -rf ./$(DEPDIR)
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic \
- mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: install-am install-strip
-
-.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
- clean-libtool clean-noinstLTLIBRARIES cscopelist ctags \
- distclean distclean-compile distclean-generic \
- distclean-libtool distclean-tags distdir dvi dvi-am html \
- html-am info info-am install install-am install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-man install-pdf install-pdf-am \
- install-ps install-ps-am install-strip installcheck \
- installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
- tags uninstall uninstall-am
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/lib/mesa/src/gallium/drivers/radeon/Makefile.sources b/lib/mesa/src/gallium/drivers/radeon/Makefile.sources
deleted file mode 100644
index 22de12973..000000000
--- a/lib/mesa/src/gallium/drivers/radeon/Makefile.sources
+++ /dev/null
@@ -1,23 +0,0 @@
-C_SOURCES := \
- r600_buffer_common.c \
- r600_cs.h \
- r600_gpu_load.c \
- r600_perfcounter.c \
- r600_pipe_common.c \
- r600_pipe_common.h \
- r600_query.c \
- r600_query.h \
- r600_test_dma.c \
- r600_texture.c \
- radeon_uvd.c \
- radeon_uvd.h \
- radeon_vcn_dec.c \
- radeon_vcn_dec.h \
- radeon_vce_40_2_2.c \
- radeon_vce_50.c \
- radeon_vce_52.c \
- radeon_vce.c \
- radeon_vce.h \
- radeon_video.c \
- radeon_video.h \
- radeon_winsys.h
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c b/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c
deleted file mode 100644
index 366581d45..000000000
--- a/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c
+++ /dev/null
@@ -1,681 +0,0 @@
-/*
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Marek Olšák
- */
-
-#include "r600_cs.h"
-#include "util/u_memory.h"
-#include "util/u_upload_mgr.h"
-#include <inttypes.h>
-#include <stdio.h>
-
-bool si_rings_is_buffer_referenced(struct r600_common_context *ctx,
- struct pb_buffer *buf,
- enum radeon_bo_usage usage)
-{
- if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) {
- return true;
- }
- if (radeon_emitted(ctx->dma.cs, 0) &&
- ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, buf, usage)) {
- return true;
- }
- return false;
-}
-
-void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx,
- struct r600_resource *resource,
- unsigned usage)
-{
- enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
- bool busy = false;
-
- assert(!(resource->flags & RADEON_FLAG_SPARSE));
-
- if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
- return ctx->ws->buffer_map(resource->buf, NULL, usage);
- }
-
- if (!(usage & PIPE_TRANSFER_WRITE)) {
- /* have to wait for the last write */
- rusage = RADEON_USAGE_WRITE;
- }
-
- if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
- ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
- resource->buf, rusage)) {
- if (usage & PIPE_TRANSFER_DONTBLOCK) {
- ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
- return NULL;
- } else {
- ctx->gfx.flush(ctx, 0, NULL);
- busy = true;
- }
- }
- if (radeon_emitted(ctx->dma.cs, 0) &&
- ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
- resource->buf, rusage)) {
- if (usage & PIPE_TRANSFER_DONTBLOCK) {
- ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
- return NULL;
- } else {
- ctx->dma.flush(ctx, 0, NULL);
- busy = true;
- }
- }
-
- if (busy || !ctx->ws->buffer_wait(resource->buf, 0, rusage)) {
- if (usage & PIPE_TRANSFER_DONTBLOCK) {
- return NULL;
- } else {
- /* We will be wait for the GPU. Wait for any offloaded
- * CS flush to complete to avoid busy-waiting in the winsys. */
- ctx->ws->cs_sync_flush(ctx->gfx.cs);
- if (ctx->dma.cs)
- ctx->ws->cs_sync_flush(ctx->dma.cs);
- }
- }
-
- /* Setting the CS to NULL will prevent doing checks we have done already. */
- return ctx->ws->buffer_map(resource->buf, NULL, usage);
-}
-
-void si_init_resource_fields(struct r600_common_screen *rscreen,
- struct r600_resource *res,
- uint64_t size, unsigned alignment)
-{
- struct r600_texture *rtex = (struct r600_texture*)res;
-
- res->bo_size = size;
- res->bo_alignment = alignment;
- res->flags = 0;
- res->texture_handle_allocated = false;
- res->image_handle_allocated = false;
-
- switch (res->b.b.usage) {
- case PIPE_USAGE_STREAM:
- res->flags = RADEON_FLAG_GTT_WC;
- /* fall through */
- case PIPE_USAGE_STAGING:
- /* Transfers are likely to occur more often with these
- * resources. */
- res->domains = RADEON_DOMAIN_GTT;
- break;
- case PIPE_USAGE_DYNAMIC:
- /* Older kernels didn't always flush the HDP cache before
- * CS execution
- */
- if (rscreen->info.drm_major == 2 &&
- rscreen->info.drm_minor < 40) {
- res->domains = RADEON_DOMAIN_GTT;
- res->flags |= RADEON_FLAG_GTT_WC;
- break;
- }
- /* fall through */
- case PIPE_USAGE_DEFAULT:
- case PIPE_USAGE_IMMUTABLE:
- default:
- /* Not listing GTT here improves performance in some
- * apps. */
- res->domains = RADEON_DOMAIN_VRAM;
- res->flags |= RADEON_FLAG_GTT_WC;
- break;
- }
-
- if (res->b.b.target == PIPE_BUFFER &&
- res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
- PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
- /* Use GTT for all persistent mappings with older
- * kernels, because they didn't always flush the HDP
- * cache before CS execution.
- *
- * Write-combined CPU mappings are fine, the kernel
- * ensures all CPU writes finish before the GPU
- * executes a command stream.
- */
- if (rscreen->info.drm_major == 2 &&
- rscreen->info.drm_minor < 40)
- res->domains = RADEON_DOMAIN_GTT;
- }
-
- /* Tiled textures are unmappable. Always put them in VRAM. */
- if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) ||
- res->b.b.flags & R600_RESOURCE_FLAG_UNMAPPABLE) {
- res->domains = RADEON_DOMAIN_VRAM;
- res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
- RADEON_FLAG_GTT_WC;
- }
-
- /* Displayable and shareable surfaces are not suballocated. */
- if (res->b.b.bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
- res->flags |= RADEON_FLAG_NO_SUBALLOC; /* shareable */
- else
- res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
-
- /* If VRAM is just stolen system memory, allow both VRAM and
- * GTT, whichever has free space. If a buffer is evicted from
- * VRAM to GTT, it will stay there.
- *
- * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only
- * placements even with a low amount of stolen VRAM.
- */
- if (!rscreen->info.has_dedicated_vram &&
- (rscreen->info.drm_major < 3 || rscreen->info.drm_minor < 6) &&
- res->domains == RADEON_DOMAIN_VRAM) {
- res->domains = RADEON_DOMAIN_VRAM_GTT;
- res->flags &= ~RADEON_FLAG_NO_CPU_ACCESS; /* disallowed with VRAM_GTT */
- }
-
- if (rscreen->debug_flags & DBG(NO_WC))
- res->flags &= ~RADEON_FLAG_GTT_WC;
-
- /* Set expected VRAM and GART usage for the buffer. */
- res->vram_usage = 0;
- res->gart_usage = 0;
-
- if (res->domains & RADEON_DOMAIN_VRAM)
- res->vram_usage = size;
- else if (res->domains & RADEON_DOMAIN_GTT)
- res->gart_usage = size;
-}
-
-bool si_alloc_resource(struct r600_common_screen *rscreen,
- struct r600_resource *res)
-{
- struct pb_buffer *old_buf, *new_buf;
-
- /* Allocate a new resource. */
- new_buf = rscreen->ws->buffer_create(rscreen->ws, res->bo_size,
- res->bo_alignment,
- res->domains, res->flags);
- if (!new_buf) {
- return false;
- }
-
- /* Replace the pointer such that if res->buf wasn't NULL, it won't be
- * NULL. This should prevent crashes with multiple contexts using
- * the same buffer where one of the contexts invalidates it while
- * the others are using it. */
- old_buf = res->buf;
- res->buf = new_buf; /* should be atomic */
-
- if (rscreen->info.has_virtual_memory)
- res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf);
- else
- res->gpu_address = 0;
-
- pb_reference(&old_buf, NULL);
-
- util_range_set_empty(&res->valid_buffer_range);
- res->TC_L2_dirty = false;
-
- /* Print debug information. */
- if (rscreen->debug_flags & DBG(VM) && res->b.b.target == PIPE_BUFFER) {
- fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n",
- res->gpu_address, res->gpu_address + res->buf->size,
- res->buf->size);
- }
- return true;
-}
-
-static void r600_buffer_destroy(struct pipe_screen *screen,
- struct pipe_resource *buf)
-{
- struct r600_resource *rbuffer = r600_resource(buf);
-
- threaded_resource_deinit(buf);
- util_range_destroy(&rbuffer->valid_buffer_range);
- pb_reference(&rbuffer->buf, NULL);
- FREE(rbuffer);
-}
-
-static bool
-r600_invalidate_buffer(struct r600_common_context *rctx,
- struct r600_resource *rbuffer)
-{
- /* Shared buffers can't be reallocated. */
- if (rbuffer->b.is_shared)
- return false;
-
- /* Sparse buffers can't be reallocated. */
- if (rbuffer->flags & RADEON_FLAG_SPARSE)
- return false;
-
- /* In AMD_pinned_memory, the user pointer association only gets
- * broken when the buffer is explicitly re-allocated.
- */
- if (rbuffer->b.is_user_ptr)
- return false;
-
- /* Check if mapping this buffer would cause waiting for the GPU. */
- if (si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
- !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
- rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
- } else {
- util_range_set_empty(&rbuffer->valid_buffer_range);
- }
-
- return true;
-}
-
-/* Replace the storage of dst with src. */
-void si_replace_buffer_storage(struct pipe_context *ctx,
- struct pipe_resource *dst,
- struct pipe_resource *src)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_resource *rdst = r600_resource(dst);
- struct r600_resource *rsrc = r600_resource(src);
- uint64_t old_gpu_address = rdst->gpu_address;
-
- pb_reference(&rdst->buf, rsrc->buf);
- rdst->gpu_address = rsrc->gpu_address;
- rdst->b.b.bind = rsrc->b.b.bind;
- rdst->flags = rsrc->flags;
-
- assert(rdst->vram_usage == rsrc->vram_usage);
- assert(rdst->gart_usage == rsrc->gart_usage);
- assert(rdst->bo_size == rsrc->bo_size);
- assert(rdst->bo_alignment == rsrc->bo_alignment);
- assert(rdst->domains == rsrc->domains);
-
- rctx->rebind_buffer(ctx, dst, old_gpu_address);
-}
-
-void si_invalidate_resource(struct pipe_context *ctx,
- struct pipe_resource *resource)
-{
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- struct r600_resource *rbuffer = r600_resource(resource);
-
- /* We currently only do anyting here for buffers */
- if (resource->target == PIPE_BUFFER)
- (void)r600_invalidate_buffer(rctx, rbuffer);
-}
-
-static void *r600_buffer_get_transfer(struct pipe_context *ctx,
- struct pipe_resource *resource,
- unsigned usage,
- const struct pipe_box *box,
- struct pipe_transfer **ptransfer,
- void *data, struct r600_resource *staging,
- unsigned offset)
-{
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- struct r600_transfer *transfer;
-
- if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC)
- transfer = slab_alloc(&rctx->pool_transfers_unsync);
- else
- transfer = slab_alloc(&rctx->pool_transfers);
-
- transfer->b.b.resource = NULL;
- pipe_resource_reference(&transfer->b.b.resource, resource);
- transfer->b.b.level = 0;
- transfer->b.b.usage = usage;
- transfer->b.b.box = *box;
- transfer->b.b.stride = 0;
- transfer->b.b.layer_stride = 0;
- transfer->b.staging = NULL;
- transfer->offset = offset;
- transfer->staging = staging;
- *ptransfer = &transfer->b.b;
- return data;
-}
-
-static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
- unsigned dstx, unsigned srcx, unsigned size)
-{
- bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
-
- return rctx->screen->has_cp_dma ||
- (dword_aligned && (rctx->dma.cs ||
- rctx->screen->has_streamout));
-
-}
-
-static void *r600_buffer_transfer_map(struct pipe_context *ctx,
- struct pipe_resource *resource,
- unsigned level,
- unsigned usage,
- const struct pipe_box *box,
- struct pipe_transfer **ptransfer)
-{
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
- struct r600_resource *rbuffer = r600_resource(resource);
- uint8_t *data;
-
- assert(box->x + box->width <= resource->width0);
-
- /* From GL_AMD_pinned_memory issues:
- *
- * 4) Is glMapBuffer on a shared buffer guaranteed to return the
- * same system address which was specified at creation time?
- *
- * RESOLVED: NO. The GL implementation might return a different
- * virtual mapping of that memory, although the same physical
- * page will be used.
- *
- * So don't ever use staging buffers.
- */
- if (rbuffer->b.is_user_ptr)
- usage |= PIPE_TRANSFER_PERSISTENT;
-
- /* See if the buffer range being mapped has never been initialized,
- * in which case it can be mapped unsynchronized. */
- if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
- TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) &&
- usage & PIPE_TRANSFER_WRITE &&
- !rbuffer->b.is_shared &&
- !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
- usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
- }
-
- /* If discarding the entire range, discard the whole resource instead. */
- if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
- box->x == 0 && box->width == resource->width0) {
- usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
- }
-
- if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
- !(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
- TC_TRANSFER_MAP_NO_INVALIDATE))) {
- assert(usage & PIPE_TRANSFER_WRITE);
-
- if (r600_invalidate_buffer(rctx, rbuffer)) {
- /* At this point, the buffer is always idle. */
- usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
- } else {
- /* Fall back to a temporary buffer. */
- usage |= PIPE_TRANSFER_DISCARD_RANGE;
- }
- }
-
- if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
- !(rscreen->debug_flags & DBG(NO_DISCARD_RANGE)) &&
- ((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
- PIPE_TRANSFER_PERSISTENT)) &&
- r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) ||
- (rbuffer->flags & RADEON_FLAG_SPARSE))) {
- assert(usage & PIPE_TRANSFER_WRITE);
-
- /* Check if mapping this buffer would cause waiting for the GPU.
- */
- if (rbuffer->flags & RADEON_FLAG_SPARSE ||
- si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
- !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
- /* Do a wait-free write-only transfer using a temporary buffer. */
- unsigned offset;
- struct r600_resource *staging = NULL;
-
- u_upload_alloc(ctx->stream_uploader, 0,
- box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
- rctx->screen->info.tcc_cache_line_size,
- &offset, (struct pipe_resource**)&staging,
- (void**)&data);
-
- if (staging) {
- data += box->x % R600_MAP_BUFFER_ALIGNMENT;
- return r600_buffer_get_transfer(ctx, resource, usage, box,
- ptransfer, data, staging, offset);
- } else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
- return NULL;
- }
- } else {
- /* At this point, the buffer is always idle (we checked it above). */
- usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
- }
- }
- /* Use a staging buffer in cached GTT for reads. */
- else if (((usage & PIPE_TRANSFER_READ) &&
- !(usage & PIPE_TRANSFER_PERSISTENT) &&
- (rbuffer->domains & RADEON_DOMAIN_VRAM ||
- rbuffer->flags & RADEON_FLAG_GTT_WC) &&
- r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) ||
- (rbuffer->flags & RADEON_FLAG_SPARSE)) {
- struct r600_resource *staging;
-
- assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC));
- staging = (struct r600_resource*) pipe_buffer_create(
- ctx->screen, 0, PIPE_USAGE_STAGING,
- box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
- if (staging) {
- /* Copy the VRAM buffer to the staging buffer. */
- rctx->dma_copy(ctx, &staging->b.b, 0,
- box->x % R600_MAP_BUFFER_ALIGNMENT,
- 0, 0, resource, 0, box);
-
- data = si_buffer_map_sync_with_rings(rctx, staging,
- usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
- if (!data) {
- r600_resource_reference(&staging, NULL);
- return NULL;
- }
- data += box->x % R600_MAP_BUFFER_ALIGNMENT;
-
- return r600_buffer_get_transfer(ctx, resource, usage, box,
- ptransfer, data, staging, 0);
- } else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
- return NULL;
- }
- }
-
- data = si_buffer_map_sync_with_rings(rctx, rbuffer, usage);
- if (!data) {
- return NULL;
- }
- data += box->x;
-
- return r600_buffer_get_transfer(ctx, resource, usage, box,
- ptransfer, data, NULL, 0);
-}
-
-static void r600_buffer_do_flush_region(struct pipe_context *ctx,
- struct pipe_transfer *transfer,
- const struct pipe_box *box)
-{
- struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
- struct r600_resource *rbuffer = r600_resource(transfer->resource);
-
- if (rtransfer->staging) {
- struct pipe_resource *dst, *src;
- unsigned soffset;
- struct pipe_box dma_box;
-
- dst = transfer->resource;
- src = &rtransfer->staging->b.b;
- soffset = rtransfer->offset + box->x % R600_MAP_BUFFER_ALIGNMENT;
-
- u_box_1d(soffset, box->width, &dma_box);
-
- /* Copy the staging buffer into the original one. */
- ctx->resource_copy_region(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box);
- }
-
- util_range_add(&rbuffer->valid_buffer_range, box->x,
- box->x + box->width);
-}
-
-static void r600_buffer_flush_region(struct pipe_context *ctx,
- struct pipe_transfer *transfer,
- const struct pipe_box *rel_box)
-{
- unsigned required_usage = PIPE_TRANSFER_WRITE |
- PIPE_TRANSFER_FLUSH_EXPLICIT;
-
- if ((transfer->usage & required_usage) == required_usage) {
- struct pipe_box box;
-
- u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
- r600_buffer_do_flush_region(ctx, transfer, &box);
- }
-}
-
-static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
- struct pipe_transfer *transfer)
-{
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
-
- if (transfer->usage & PIPE_TRANSFER_WRITE &&
- !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
- r600_buffer_do_flush_region(ctx, transfer, &transfer->box);
-
- r600_resource_reference(&rtransfer->staging, NULL);
- assert(rtransfer->b.staging == NULL); /* for threaded context only */
- pipe_resource_reference(&transfer->resource, NULL);
-
- /* Don't use pool_transfers_unsync. We are always in the driver
- * thread. */
- slab_free(&rctx->pool_transfers, transfer);
-}
-
-void si_buffer_subdata(struct pipe_context *ctx,
- struct pipe_resource *buffer,
- unsigned usage, unsigned offset,
- unsigned size, const void *data)
-{
- struct pipe_transfer *transfer = NULL;
- struct pipe_box box;
- uint8_t *map = NULL;
-
- u_box_1d(offset, size, &box);
- map = r600_buffer_transfer_map(ctx, buffer, 0,
- PIPE_TRANSFER_WRITE |
- PIPE_TRANSFER_DISCARD_RANGE |
- usage,
- &box, &transfer);
- if (!map)
- return;
-
- memcpy(map, data, size);
- r600_buffer_transfer_unmap(ctx, transfer);
-}
-
-static const struct u_resource_vtbl r600_buffer_vtbl =
-{
- NULL, /* get_handle */
- r600_buffer_destroy, /* resource_destroy */
- r600_buffer_transfer_map, /* transfer_map */
- r600_buffer_flush_region, /* transfer_flush_region */
- r600_buffer_transfer_unmap, /* transfer_unmap */
-};
-
-static struct r600_resource *
-r600_alloc_buffer_struct(struct pipe_screen *screen,
- const struct pipe_resource *templ)
-{
- struct r600_resource *rbuffer;
-
- rbuffer = MALLOC_STRUCT(r600_resource);
-
- rbuffer->b.b = *templ;
- rbuffer->b.b.next = NULL;
- pipe_reference_init(&rbuffer->b.b.reference, 1);
- rbuffer->b.b.screen = screen;
-
- rbuffer->b.vtbl = &r600_buffer_vtbl;
- threaded_resource_init(&rbuffer->b.b);
-
- rbuffer->buf = NULL;
- rbuffer->bind_history = 0;
- rbuffer->TC_L2_dirty = false;
- util_range_init(&rbuffer->valid_buffer_range);
- return rbuffer;
-}
-
-struct pipe_resource *si_buffer_create(struct pipe_screen *screen,
- const struct pipe_resource *templ,
- unsigned alignment)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
- struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
-
- si_init_resource_fields(rscreen, rbuffer, templ->width0, alignment);
-
- if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
- rbuffer->flags |= RADEON_FLAG_SPARSE;
-
- if (!si_alloc_resource(rscreen, rbuffer)) {
- FREE(rbuffer);
- return NULL;
- }
- return &rbuffer->b.b;
-}
-
-struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen,
- unsigned flags,
- unsigned usage,
- unsigned size,
- unsigned alignment)
-{
- struct pipe_resource buffer;
-
- memset(&buffer, 0, sizeof buffer);
- buffer.target = PIPE_BUFFER;
- buffer.format = PIPE_FORMAT_R8_UNORM;
- buffer.bind = 0;
- buffer.usage = usage;
- buffer.flags = flags;
- buffer.width0 = size;
- buffer.height0 = 1;
- buffer.depth0 = 1;
- buffer.array_size = 1;
- return si_buffer_create(screen, &buffer, alignment);
-}
-
-struct pipe_resource *
-si_buffer_from_user_memory(struct pipe_screen *screen,
- const struct pipe_resource *templ,
- void *user_memory)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
- struct radeon_winsys *ws = rscreen->ws;
- struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
-
- rbuffer->domains = RADEON_DOMAIN_GTT;
- rbuffer->flags = 0;
- rbuffer->b.is_user_ptr = true;
- util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0);
- util_range_add(&rbuffer->b.valid_buffer_range, 0, templ->width0);
-
- /* Convert a user pointer to a buffer. */
- rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
- if (!rbuffer->buf) {
- FREE(rbuffer);
- return NULL;
- }
-
- if (rscreen->info.has_virtual_memory)
- rbuffer->gpu_address =
- ws->buffer_get_virtual_address(rbuffer->buf);
- else
- rbuffer->gpu_address = 0;
-
- rbuffer->vram_usage = 0;
- rbuffer->gart_usage = templ->width0;
-
- return &rbuffer->b.b;
-}
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_cs.h b/lib/mesa/src/gallium/drivers/radeon/r600_cs.h
deleted file mode 100644
index 03a04b754..000000000
--- a/lib/mesa/src/gallium/drivers/radeon/r600_cs.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Marek Olšák <maraeo@gmail.com>
- */
-
-/**
- * This file contains helpers for writing commands to commands streams.
- */
-
-#ifndef R600_CS_H
-#define R600_CS_H
-
-#include "r600_pipe_common.h"
-#include "r600d_common.h"
-
-static inline unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
- struct r600_ring *ring,
- struct r600_resource *rbo,
- enum radeon_bo_usage usage,
- enum radeon_bo_priority priority)
-{
- assert(usage);
-
- /* Make sure that all previous rings are flushed so that everything
- * looks serialized from the driver point of view.
- */
- if (!ring->flushing) {
- if (ring == &rctx->rings.gfx) {
- if (rctx->rings.dma.cs) {
- /* flush dma ring */
- rctx->rings.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
- }
- } else {
- /* flush gfx ring */
- rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
- }
- }
- return rctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage,
- rbo->domains, priority) * 4;
-}
-
-static inline void r600_emit_reloc(struct r600_common_context *rctx,
- struct r600_ring *ring, struct r600_resource *rbo,
- enum radeon_bo_usage usage,
- enum radeon_bo_priority priority)
-{
- struct radeon_winsys_cs *cs = ring->cs;
- bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address;
- unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage, priority);
-
- if (!has_vm) {
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, reloc);
- }
-}
-
-static inline void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
-{
- assert(reg < R600_CONTEXT_REG_OFFSET);
- assert(cs->cdw+2+num <= cs->max_dw);
- radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
- radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
-}
-
-static inline void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
-{
- r600_write_config_reg_seq(cs, reg, 1);
- radeon_emit(cs, value);
-}
-
-static inline void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
-{
- assert(reg >= R600_CONTEXT_REG_OFFSET);
- assert(cs->cdw+2+num <= cs->max_dw);
- radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
- radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
-}
-
-static inline void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
-{
- r600_write_context_reg_seq(cs, reg, 1);
- radeon_emit(cs, value);
-}
-
-static inline void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
-{
- assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
- assert(cs->cdw+2+num <= cs->max_dw);
- radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
- radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
-}
-
-static inline void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
-{
- si_write_sh_reg_seq(cs, reg, 1);
- radeon_emit(cs, value);
-}
-
-static inline void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
-{
- assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
- assert(cs->cdw+2+num <= cs->max_dw);
- radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
- radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
-}
-
-static inline void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
-{
- cik_write_uconfig_reg_seq(cs, reg, 1);
- radeon_emit(cs, value);
-}
-
-#endif
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_gpu_load.c b/lib/mesa/src/gallium/drivers/radeon/r600_gpu_load.c
deleted file mode 100644
index 625370b8e..000000000
--- a/lib/mesa/src/gallium/drivers/radeon/r600_gpu_load.c
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors: Marek Olšák <maraeo@gmail.com>
- *
- */
-
-/* The GPU load is measured as follows.
- *
- * There is a thread which samples the GRBM_STATUS register at a certain
- * frequency and the "busy" or "idle" counter is incremented based on
- * whether the GUI_ACTIVE bit is set or not.
- *
- * Then, the user can sample the counters twice and calculate the average
- * GPU load between the two samples.
- */
-
-#include "r600_pipe_common.h"
-#include "r600_query.h"
-#include "os/os_time.h"
-
-/* For good accuracy at 1000 fps or lower. This will be inaccurate for higher
- * fps (there are too few samples per frame). */
-#define SAMPLES_PER_SEC 10000
-
-#define GRBM_STATUS 0x8010
-#define TA_BUSY(x) (((x) >> 14) & 0x1)
-#define GDS_BUSY(x) (((x) >> 15) & 0x1)
-#define VGT_BUSY(x) (((x) >> 17) & 0x1)
-#define IA_BUSY(x) (((x) >> 19) & 0x1)
-#define SX_BUSY(x) (((x) >> 20) & 0x1)
-#define WD_BUSY(x) (((x) >> 21) & 0x1)
-#define SPI_BUSY(x) (((x) >> 22) & 0x1)
-#define BCI_BUSY(x) (((x) >> 23) & 0x1)
-#define SC_BUSY(x) (((x) >> 24) & 0x1)
-#define PA_BUSY(x) (((x) >> 25) & 0x1)
-#define DB_BUSY(x) (((x) >> 26) & 0x1)
-#define CP_BUSY(x) (((x) >> 29) & 0x1)
-#define CB_BUSY(x) (((x) >> 30) & 0x1)
-#define GUI_ACTIVE(x) (((x) >> 31) & 0x1)
-
-#define SRBM_STATUS2 0x0e4c
-#define SDMA_BUSY(x) (((x) >> 5) & 0x1)
-
-#define CP_STAT 0x8680
-#define PFP_BUSY(x) (((x) >> 15) & 0x1)
-#define MEQ_BUSY(x) (((x) >> 16) & 0x1)
-#define ME_BUSY(x) (((x) >> 17) & 0x1)
-#define SURFACE_SYNC_BUSY(x) (((x) >> 21) & 0x1)
-#define DMA_BUSY(x) (((x) >> 22) & 0x1)
-#define SCRATCH_RAM_BUSY(x) (((x) >> 24) & 0x1)
-
-#define IDENTITY(x) x
-
-#define UPDATE_COUNTER(field, mask) \
- do { \
- if (mask(value)) \
- p_atomic_inc(&counters->named.field.busy); \
- else \
- p_atomic_inc(&counters->named.field.idle); \
- } while (0)
-
-static void r600_update_mmio_counters(struct r600_common_screen *rscreen,
- union r600_mmio_counters *counters)
-{
- uint32_t value = 0;
- bool gui_busy, sdma_busy = false;
-
- /* GRBM_STATUS */
- rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value);
-
- UPDATE_COUNTER(ta, TA_BUSY);
- UPDATE_COUNTER(gds, GDS_BUSY);
- UPDATE_COUNTER(vgt, VGT_BUSY);
- UPDATE_COUNTER(ia, IA_BUSY);
- UPDATE_COUNTER(sx, SX_BUSY);
- UPDATE_COUNTER(wd, WD_BUSY);
- UPDATE_COUNTER(spi, SPI_BUSY);
- UPDATE_COUNTER(bci, BCI_BUSY);
- UPDATE_COUNTER(sc, SC_BUSY);
- UPDATE_COUNTER(pa, PA_BUSY);
- UPDATE_COUNTER(db, DB_BUSY);
- UPDATE_COUNTER(cp, CP_BUSY);
- UPDATE_COUNTER(cb, CB_BUSY);
- UPDATE_COUNTER(gui, GUI_ACTIVE);
- gui_busy = GUI_ACTIVE(value);
-
- if (rscreen->chip_class == CIK || rscreen->chip_class == VI) {
- /* SRBM_STATUS2 */
- rscreen->ws->read_registers(rscreen->ws, SRBM_STATUS2, 1, &value);
-
- UPDATE_COUNTER(sdma, SDMA_BUSY);
- sdma_busy = SDMA_BUSY(value);
- }
-
- if (rscreen->chip_class >= VI) {
- /* CP_STAT */
- rscreen->ws->read_registers(rscreen->ws, CP_STAT, 1, &value);
-
- UPDATE_COUNTER(pfp, PFP_BUSY);
- UPDATE_COUNTER(meq, MEQ_BUSY);
- UPDATE_COUNTER(me, ME_BUSY);
- UPDATE_COUNTER(surf_sync, SURFACE_SYNC_BUSY);
- UPDATE_COUNTER(cp_dma, DMA_BUSY);
- UPDATE_COUNTER(scratch_ram, SCRATCH_RAM_BUSY);
- }
-
- value = gui_busy || sdma_busy;
- UPDATE_COUNTER(gpu, IDENTITY);
-}
-
-#undef UPDATE_COUNTER
-
-static int
-r600_gpu_load_thread(void *param)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)param;
- const int period_us = 1000000 / SAMPLES_PER_SEC;
- int sleep_us = period_us;
- int64_t cur_time, last_time = os_time_get();
-
- while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) {
- if (sleep_us)
- os_time_sleep(sleep_us);
-
- /* Make sure we sleep the ideal amount of time to match
- * the expected frequency. */
- cur_time = os_time_get();
-
- if (os_time_timeout(last_time, last_time + period_us,
- cur_time))
- sleep_us = MAX2(sleep_us - 1, 1);
- else
- sleep_us += 1;
-
- /*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/
- last_time = cur_time;
-
- /* Update the counters. */
- r600_update_mmio_counters(rscreen, &rscreen->mmio_counters);
- }
- p_atomic_dec(&rscreen->gpu_load_stop_thread);
- return 0;
-}
-
-void si_gpu_load_kill_thread(struct r600_common_screen *rscreen)
-{
- if (!rscreen->gpu_load_thread)
- return;
-
- p_atomic_inc(&rscreen->gpu_load_stop_thread);
- thrd_join(rscreen->gpu_load_thread, NULL);
- rscreen->gpu_load_thread = 0;
-}
-
-static uint64_t r600_read_mmio_counter(struct r600_common_screen *rscreen,
- unsigned busy_index)
-{
- /* Start the thread if needed. */
- if (!rscreen->gpu_load_thread) {
- mtx_lock(&rscreen->gpu_load_mutex);
- /* Check again inside the mutex. */
- if (!rscreen->gpu_load_thread)
- rscreen->gpu_load_thread =
- u_thread_create(r600_gpu_load_thread, rscreen);
- mtx_unlock(&rscreen->gpu_load_mutex);
- }
-
- unsigned busy = p_atomic_read(&rscreen->mmio_counters.array[busy_index]);
- unsigned idle = p_atomic_read(&rscreen->mmio_counters.array[busy_index + 1]);
-
- return busy | ((uint64_t)idle << 32);
-}
-
-static unsigned r600_end_mmio_counter(struct r600_common_screen *rscreen,
- uint64_t begin, unsigned busy_index)
-{
- uint64_t end = r600_read_mmio_counter(rscreen, busy_index);
- unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff);
- unsigned idle = (end >> 32) - (begin >> 32);
-
- /* Calculate the % of time the busy counter was being incremented.
- *
- * If no counters were incremented, return the current counter status.
- * It's for the case when the load is queried faster than
- * the counters are updated.
- */
- if (idle || busy) {
- return busy*100 / (busy + idle);
- } else {
- union r600_mmio_counters counters;
-
- memset(&counters, 0, sizeof(counters));
- r600_update_mmio_counters(rscreen, &counters);
- return counters.array[busy_index] ? 100 : 0;
- }
-}
-
-#define BUSY_INDEX(rscreen, field) (&rscreen->mmio_counters.named.field.busy - \
- rscreen->mmio_counters.array)
-
-static unsigned busy_index_from_type(struct r600_common_screen *rscreen,
- unsigned type)
-{
- switch (type) {
- case R600_QUERY_GPU_LOAD:
- return BUSY_INDEX(rscreen, gpu);
- case R600_QUERY_GPU_SHADERS_BUSY:
- return BUSY_INDEX(rscreen, spi);
- case R600_QUERY_GPU_TA_BUSY:
- return BUSY_INDEX(rscreen, ta);
- case R600_QUERY_GPU_GDS_BUSY:
- return BUSY_INDEX(rscreen, gds);
- case R600_QUERY_GPU_VGT_BUSY:
- return BUSY_INDEX(rscreen, vgt);
- case R600_QUERY_GPU_IA_BUSY:
- return BUSY_INDEX(rscreen, ia);
- case R600_QUERY_GPU_SX_BUSY:
- return BUSY_INDEX(rscreen, sx);
- case R600_QUERY_GPU_WD_BUSY:
- return BUSY_INDEX(rscreen, wd);
- case R600_QUERY_GPU_BCI_BUSY:
- return BUSY_INDEX(rscreen, bci);
- case R600_QUERY_GPU_SC_BUSY:
- return BUSY_INDEX(rscreen, sc);
- case R600_QUERY_GPU_PA_BUSY:
- return BUSY_INDEX(rscreen, pa);
- case R600_QUERY_GPU_DB_BUSY:
- return BUSY_INDEX(rscreen, db);
- case R600_QUERY_GPU_CP_BUSY:
- return BUSY_INDEX(rscreen, cp);
- case R600_QUERY_GPU_CB_BUSY:
- return BUSY_INDEX(rscreen, cb);
- case R600_QUERY_GPU_SDMA_BUSY:
- return BUSY_INDEX(rscreen, sdma);
- case R600_QUERY_GPU_PFP_BUSY:
- return BUSY_INDEX(rscreen, pfp);
- case R600_QUERY_GPU_MEQ_BUSY:
- return BUSY_INDEX(rscreen, meq);
- case R600_QUERY_GPU_ME_BUSY:
- return BUSY_INDEX(rscreen, me);
- case R600_QUERY_GPU_SURF_SYNC_BUSY:
- return BUSY_INDEX(rscreen, surf_sync);
- case R600_QUERY_GPU_CP_DMA_BUSY:
- return BUSY_INDEX(rscreen, cp_dma);
- case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
- return BUSY_INDEX(rscreen, scratch_ram);
- default:
- unreachable("invalid query type");
- }
-}
-
-uint64_t si_begin_counter(struct r600_common_screen *rscreen, unsigned type)
-{
- unsigned busy_index = busy_index_from_type(rscreen, type);
- return r600_read_mmio_counter(rscreen, busy_index);
-}
-
-unsigned si_end_counter(struct r600_common_screen *rscreen, unsigned type,
- uint64_t begin)
-{
- unsigned busy_index = busy_index_from_type(rscreen, type);
- return r600_end_mmio_counter(rscreen, begin, busy_index);
-}
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_perfcounter.c b/lib/mesa/src/gallium/drivers/radeon/r600_perfcounter.c
index 6c68dc469..57c324689 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600_perfcounter.c
+++ b/lib/mesa/src/gallium/drivers/radeon/r600_perfcounter.c
@@ -1,5 +1,6 @@
/*
* Copyright 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -19,25 +20,21 @@
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * Authors:
- * Nicolai Hähnle <nicolai.haehnle@amd.com>
- *
*/
#include "util/u_memory.h"
-#include "r600_query.h"
-#include "r600_pipe_common.h"
+#include "radeonsi/si_query.h"
+#include "radeonsi/si_pipe.h"
#include "amd/common/sid.h"
/* Max counters per HW block */
-#define R600_QUERY_MAX_COUNTERS 16
+#define SI_QUERY_MAX_COUNTERS 16
-static struct r600_perfcounter_block *
-lookup_counter(struct r600_perfcounters *pc, unsigned index,
+static struct si_perfcounter_block *
+lookup_counter(struct si_perfcounters *pc, unsigned index,
unsigned *base_gid, unsigned *sub_index)
{
- struct r600_perfcounter_block *block = pc->blocks;
+ struct si_perfcounter_block *block = pc->blocks;
unsigned bid;
*base_gid = 0;
@@ -56,11 +53,11 @@ lookup_counter(struct r600_perfcounters *pc, unsigned index,
return NULL;
}
-static struct r600_perfcounter_block *
-lookup_group(struct r600_perfcounters *pc, unsigned *index)
+static struct si_perfcounter_block *
+lookup_group(struct si_perfcounters *pc, unsigned *index)
{
unsigned bid;
- struct r600_perfcounter_block *block = pc->blocks;
+ struct si_perfcounter_block *block = pc->blocks;
for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
if (*index < block->num_groups)
@@ -71,113 +68,113 @@ lookup_group(struct r600_perfcounters *pc, unsigned *index)
return NULL;
}
-struct r600_pc_group {
- struct r600_pc_group *next;
- struct r600_perfcounter_block *block;
+struct si_pc_group {
+ struct si_pc_group *next;
+ struct si_perfcounter_block *block;
unsigned sub_gid; /* only used during init */
unsigned result_base; /* only used during init */
int se;
int instance;
unsigned num_counters;
- unsigned selectors[R600_QUERY_MAX_COUNTERS];
+ unsigned selectors[SI_QUERY_MAX_COUNTERS];
};
-struct r600_pc_counter {
+struct si_pc_counter {
unsigned base;
unsigned qwords;
unsigned stride; /* in uint64s */
};
-#define R600_PC_SHADERS_WINDOWING (1 << 31)
+#define SI_PC_SHADERS_WINDOWING (1 << 31)
-struct r600_query_pc {
- struct r600_query_hw b;
+struct si_query_pc {
+ struct si_query_hw b;
unsigned shaders;
unsigned num_counters;
- struct r600_pc_counter *counters;
- struct r600_pc_group *groups;
+ struct si_pc_counter *counters;
+ struct si_pc_group *groups;
};
-static void r600_pc_query_destroy(struct r600_common_screen *rscreen,
- struct r600_query *rquery)
+static void si_pc_query_destroy(struct si_screen *sscreen,
+ struct si_query *rquery)
{
- struct r600_query_pc *query = (struct r600_query_pc *)rquery;
+ struct si_query_pc *query = (struct si_query_pc *)rquery;
while (query->groups) {
- struct r600_pc_group *group = query->groups;
+ struct si_pc_group *group = query->groups;
query->groups = group->next;
FREE(group);
}
FREE(query->counters);
- si_query_hw_destroy(rscreen, rquery);
+ si_query_hw_destroy(sscreen, rquery);
}
-static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen,
- struct r600_query_hw *hwquery,
- struct r600_resource *buffer)
+static bool si_pc_query_prepare_buffer(struct si_screen *screen,
+ struct si_query_hw *hwquery,
+ struct r600_resource *buffer)
{
/* no-op */
return true;
}
-static void r600_pc_query_emit_start(struct r600_common_context *ctx,
- struct r600_query_hw *hwquery,
- struct r600_resource *buffer, uint64_t va)
+static void si_pc_query_emit_start(struct si_context *sctx,
+ struct si_query_hw *hwquery,
+ struct r600_resource *buffer, uint64_t va)
{
- struct r600_perfcounters *pc = ctx->screen->perfcounters;
- struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
- struct r600_pc_group *group;
+ struct si_perfcounters *pc = sctx->screen->perfcounters;
+ struct si_query_pc *query = (struct si_query_pc *)hwquery;
+ struct si_pc_group *group;
int current_se = -1;
int current_instance = -1;
if (query->shaders)
- pc->emit_shaders(ctx, query->shaders);
+ pc->emit_shaders(sctx, query->shaders);
for (group = query->groups; group; group = group->next) {
- struct r600_perfcounter_block *block = group->block;
+ struct si_perfcounter_block *block = group->block;
if (group->se != current_se || group->instance != current_instance) {
current_se = group->se;
current_instance = group->instance;
- pc->emit_instance(ctx, group->se, group->instance);
+ pc->emit_instance(sctx, group->se, group->instance);
}
- pc->emit_select(ctx, block, group->num_counters, group->selectors);
+ pc->emit_select(sctx, block, group->num_counters, group->selectors);
}
if (current_se != -1 || current_instance != -1)
- pc->emit_instance(ctx, -1, -1);
+ pc->emit_instance(sctx, -1, -1);
- pc->emit_start(ctx, buffer, va);
+ pc->emit_start(sctx, buffer, va);
}
-static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
- struct r600_query_hw *hwquery,
- struct r600_resource *buffer, uint64_t va)
+static void si_pc_query_emit_stop(struct si_context *sctx,
+ struct si_query_hw *hwquery,
+ struct r600_resource *buffer, uint64_t va)
{
- struct r600_perfcounters *pc = ctx->screen->perfcounters;
- struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
- struct r600_pc_group *group;
+ struct si_perfcounters *pc = sctx->screen->perfcounters;
+ struct si_query_pc *query = (struct si_query_pc *)hwquery;
+ struct si_pc_group *group;
- pc->emit_stop(ctx, buffer, va);
+ pc->emit_stop(sctx, buffer, va);
for (group = query->groups; group; group = group->next) {
- struct r600_perfcounter_block *block = group->block;
+ struct si_perfcounter_block *block = group->block;
unsigned se = group->se >= 0 ? group->se : 0;
unsigned se_end = se + 1;
- if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0))
- se_end = ctx->screen->info.max_se;
+ if ((block->flags & SI_PC_BLOCK_SE) && (group->se < 0))
+ se_end = sctx->screen->info.max_se;
do {
unsigned instance = group->instance >= 0 ? group->instance : 0;
do {
- pc->emit_instance(ctx, se, instance);
- pc->emit_read(ctx, block,
+ pc->emit_instance(sctx, se, instance);
+ pc->emit_read(sctx, block,
group->num_counters, group->selectors,
buffer, va);
va += sizeof(uint64_t) * group->num_counters;
@@ -185,28 +182,28 @@ static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
} while (++se < se_end);
}
- pc->emit_instance(ctx, -1, -1);
+ pc->emit_instance(sctx, -1, -1);
}
-static void r600_pc_query_clear_result(struct r600_query_hw *hwquery,
- union pipe_query_result *result)
+static void si_pc_query_clear_result(struct si_query_hw *hwquery,
+ union pipe_query_result *result)
{
- struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+ struct si_query_pc *query = (struct si_query_pc *)hwquery;
memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
}
-static void r600_pc_query_add_result(struct r600_common_screen *rscreen,
- struct r600_query_hw *hwquery,
- void *buffer,
- union pipe_query_result *result)
+static void si_pc_query_add_result(struct si_screen *sscreen,
+ struct si_query_hw *hwquery,
+ void *buffer,
+ union pipe_query_result *result)
{
- struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+ struct si_query_pc *query = (struct si_query_pc *)hwquery;
uint64_t *results = buffer;
unsigned i, j;
for (i = 0; i < query->num_counters; ++i) {
- struct r600_pc_counter *counter = &query->counters[i];
+ struct si_pc_counter *counter = &query->counters[i];
for (j = 0; j < counter->qwords; ++j) {
uint32_t value = results[counter->base + j * counter->stride];
@@ -215,27 +212,27 @@ static void r600_pc_query_add_result(struct r600_common_screen *rscreen,
}
}
-static struct r600_query_ops batch_query_ops = {
- .destroy = r600_pc_query_destroy,
+static struct si_query_ops batch_query_ops = {
+ .destroy = si_pc_query_destroy,
.begin = si_query_hw_begin,
.end = si_query_hw_end,
.get_result = si_query_hw_get_result
};
-static struct r600_query_hw_ops batch_query_hw_ops = {
- .prepare_buffer = r600_pc_query_prepare_buffer,
- .emit_start = r600_pc_query_emit_start,
- .emit_stop = r600_pc_query_emit_stop,
- .clear_result = r600_pc_query_clear_result,
- .add_result = r600_pc_query_add_result,
+static struct si_query_hw_ops batch_query_hw_ops = {
+ .prepare_buffer = si_pc_query_prepare_buffer,
+ .emit_start = si_pc_query_emit_start,
+ .emit_stop = si_pc_query_emit_stop,
+ .clear_result = si_pc_query_clear_result,
+ .add_result = si_pc_query_add_result,
};
-static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
- struct r600_query_pc *query,
- struct r600_perfcounter_block *block,
+static struct si_pc_group *get_group_state(struct si_screen *screen,
+ struct si_query_pc *query,
+ struct si_perfcounter_block *block,
unsigned sub_gid)
{
- struct r600_pc_group *group = query->groups;
+ struct si_pc_group *group = query->groups;
while (group) {
if (group->block == block && group->sub_gid == sub_gid)
@@ -243,49 +240,49 @@ static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
group = group->next;
}
- group = CALLOC_STRUCT(r600_pc_group);
+ group = CALLOC_STRUCT(si_pc_group);
if (!group)
return NULL;
group->block = block;
group->sub_gid = sub_gid;
- if (block->flags & R600_PC_BLOCK_SHADER) {
+ if (block->flags & SI_PC_BLOCK_SHADER) {
unsigned sub_gids = block->num_instances;
unsigned shader_id;
unsigned shaders;
unsigned query_shaders;
- if (block->flags & R600_PC_BLOCK_SE_GROUPS)
+ if (block->flags & SI_PC_BLOCK_SE_GROUPS)
sub_gids = sub_gids * screen->info.max_se;
shader_id = sub_gid / sub_gids;
sub_gid = sub_gid % sub_gids;
shaders = screen->perfcounters->shader_type_bits[shader_id];
- query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
+ query_shaders = query->shaders & ~SI_PC_SHADERS_WINDOWING;
if (query_shaders && query_shaders != shaders) {
- fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
+ fprintf(stderr, "si_perfcounter: incompatible shader groups\n");
FREE(group);
return NULL;
}
query->shaders = shaders;
}
- if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
+ if (block->flags & SI_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
// A non-zero value in query->shaders ensures that the shader
// masking is reset unless the user explicitly requests one.
- query->shaders = R600_PC_SHADERS_WINDOWING;
+ query->shaders = SI_PC_SHADERS_WINDOWING;
}
- if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
+ if (block->flags & SI_PC_BLOCK_SE_GROUPS) {
group->se = sub_gid / block->num_instances;
sub_gid = sub_gid % block->num_instances;
} else {
group->se = -1;
}
- if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
+ if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) {
group->instance = sub_gid;
} else {
group->instance = -1;
@@ -301,19 +298,19 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
unsigned num_queries,
unsigned *query_types)
{
- struct r600_common_screen *screen =
- (struct r600_common_screen *)ctx->screen;
- struct r600_perfcounters *pc = screen->perfcounters;
- struct r600_perfcounter_block *block;
- struct r600_pc_group *group;
- struct r600_query_pc *query;
+ struct si_screen *screen =
+ (struct si_screen *)ctx->screen;
+ struct si_perfcounters *pc = screen->perfcounters;
+ struct si_perfcounter_block *block;
+ struct si_pc_group *group;
+ struct si_query_pc *query;
unsigned base_gid, sub_gid, sub_index;
unsigned i, j;
if (!pc)
return NULL;
- query = CALLOC_STRUCT(r600_query_pc);
+ query = CALLOC_STRUCT(si_query_pc);
if (!query)
return NULL;
@@ -326,10 +323,10 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
for (i = 0; i < num_queries; ++i) {
unsigned sub_gid;
- if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER)
+ if (query_types[i] < SI_QUERY_FIRST_PERFCOUNTER)
goto error;
- block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
+ block = lookup_counter(pc, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER,
&base_gid, &sub_index);
if (!block)
goto error;
@@ -352,19 +349,16 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
}
/* Compute result bases and CS size per group */
- query->b.num_cs_dw_begin = pc->num_start_cs_dwords;
query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
-
- query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
i = 0;
for (group = query->groups; group; group = group->next) {
- struct r600_perfcounter_block *block = group->block;
- unsigned select_dw, read_dw;
+ struct si_perfcounter_block *block = group->block;
+ unsigned read_dw;
unsigned instances = 1;
- if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
+ if ((block->flags & SI_PC_BLOCK_SE) && group->se < 0)
instances = screen->info.max_se;
if (group->instance < 0)
instances *= block->num_instances;
@@ -373,27 +367,23 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
i += instances * group->num_counters;
- pc->get_size(block, group->num_counters, group->selectors,
- &select_dw, &read_dw);
- query->b.num_cs_dw_begin += select_dw;
+ read_dw = 6 * group->num_counters;
query->b.num_cs_dw_end += instances * read_dw;
- query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
}
if (query->shaders) {
- if (query->shaders == R600_PC_SHADERS_WINDOWING)
+ if (query->shaders == SI_PC_SHADERS_WINDOWING)
query->shaders = 0xffffffff;
- query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
}
/* Map user-supplied query array to result indices */
query->counters = CALLOC(num_queries, sizeof(*query->counters));
for (i = 0; i < num_queries; ++i) {
- struct r600_pc_counter *counter = &query->counters[i];
- struct r600_perfcounter_block *block;
+ struct si_pc_counter *counter = &query->counters[i];
+ struct si_perfcounter_block *block;
- block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
+ block = lookup_counter(pc, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER,
&base_gid, &sub_index);
sub_gid = sub_index / block->num_selectors;
@@ -411,7 +401,7 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
counter->stride = group->num_counters;
counter->qwords = 1;
- if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
+ if ((block->flags & SI_PC_BLOCK_SE) && group->se < 0)
counter->qwords = screen->info.max_se;
if (group->instance < 0)
counter->qwords *= block->num_instances;
@@ -423,12 +413,12 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
return (struct pipe_query *)query;
error:
- r600_pc_query_destroy(screen, &query->b.b);
+ si_pc_query_destroy(screen, &query->b.b);
return NULL;
}
-static bool r600_init_block_names(struct r600_common_screen *screen,
- struct r600_perfcounter_block *block)
+static bool si_init_block_names(struct si_screen *screen,
+ struct si_perfcounter_block *block)
{
unsigned i, j, k;
unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
@@ -436,25 +426,25 @@ static bool r600_init_block_names(struct r600_common_screen *screen,
char *groupname;
char *p;
- if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS)
groups_instance = block->num_instances;
- if (block->flags & R600_PC_BLOCK_SE_GROUPS)
+ if (block->flags & SI_PC_BLOCK_SE_GROUPS)
groups_se = screen->info.max_se;
- if (block->flags & R600_PC_BLOCK_SHADER)
+ if (block->flags & SI_PC_BLOCK_SHADER)
groups_shader = screen->perfcounters->num_shader_types;
namelen = strlen(block->basename);
block->group_name_stride = namelen + 1;
- if (block->flags & R600_PC_BLOCK_SHADER)
+ if (block->flags & SI_PC_BLOCK_SHADER)
block->group_name_stride += 3;
- if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
+ if (block->flags & SI_PC_BLOCK_SE_GROUPS) {
assert(groups_se <= 10);
block->group_name_stride += 1;
- if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS)
block->group_name_stride += 1;
}
- if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
+ if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) {
assert(groups_instance <= 100);
block->group_name_stride += 2;
}
@@ -472,18 +462,18 @@ static bool r600_init_block_names(struct r600_common_screen *screen,
strcpy(groupname, block->basename);
p = groupname + namelen;
- if (block->flags & R600_PC_BLOCK_SHADER) {
+ if (block->flags & SI_PC_BLOCK_SHADER) {
strcpy(p, shader_suffix);
p += shaderlen;
}
- if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
+ if (block->flags & SI_PC_BLOCK_SE_GROUPS) {
p += sprintf(p, "%d", j);
- if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS)
*p++ = '_';
}
- if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS)
p += sprintf(p, "%d", k);
groupname += block->group_name_stride;
@@ -511,12 +501,12 @@ static bool r600_init_block_names(struct r600_common_screen *screen,
return true;
}
-int si_get_perfcounter_info(struct r600_common_screen *screen,
+int si_get_perfcounter_info(struct si_screen *screen,
unsigned index,
struct pipe_driver_query_info *info)
{
- struct r600_perfcounters *pc = screen->perfcounters;
- struct r600_perfcounter_block *block;
+ struct si_perfcounters *pc = screen->perfcounters;
+ struct si_perfcounter_block *block;
unsigned base_gid, sub;
if (!pc)
@@ -538,11 +528,11 @@ int si_get_perfcounter_info(struct r600_common_screen *screen,
return 0;
if (!block->selector_names) {
- if (!r600_init_block_names(screen, block))
+ if (!si_init_block_names(screen, block))
return 0;
}
info->name = block->selector_names + sub * block->selector_name_stride;
- info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
+ info->query_type = SI_QUERY_FIRST_PERFCOUNTER + index;
info->max_value.u64 = 0;
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
@@ -553,12 +543,12 @@ int si_get_perfcounter_info(struct r600_common_screen *screen,
return 1;
}
-int si_get_perfcounter_group_info(struct r600_common_screen *screen,
+int si_get_perfcounter_group_info(struct si_screen *screen,
unsigned index,
struct pipe_driver_query_group_info *info)
{
- struct r600_perfcounters *pc = screen->perfcounters;
- struct r600_perfcounter_block *block;
+ struct si_perfcounters *pc = screen->perfcounters;
+ struct si_perfcounter_block *block;
if (!pc)
return 0;
@@ -571,7 +561,7 @@ int si_get_perfcounter_group_info(struct r600_common_screen *screen,
return 0;
if (!block->group_names) {
- if (!r600_init_block_names(screen, block))
+ if (!si_init_block_names(screen, block))
return 0;
}
info->name = block->group_names + index * block->group_name_stride;
@@ -580,16 +570,16 @@ int si_get_perfcounter_group_info(struct r600_common_screen *screen,
return 1;
}
-void si_perfcounters_destroy(struct r600_common_screen *rscreen)
+void si_perfcounters_destroy(struct si_screen *sscreen)
{
- if (rscreen->perfcounters)
- rscreen->perfcounters->cleanup(rscreen);
+ if (sscreen->perfcounters)
+ sscreen->perfcounters->cleanup(sscreen);
}
-bool si_perfcounters_init(struct r600_perfcounters *pc,
+bool si_perfcounters_init(struct si_perfcounters *pc,
unsigned num_blocks)
{
- pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
+ pc->blocks = CALLOC(num_blocks, sizeof(struct si_perfcounter_block));
if (!pc->blocks)
return false;
@@ -599,15 +589,15 @@ bool si_perfcounters_init(struct r600_perfcounters *pc,
return true;
}
-void si_perfcounters_add_block(struct r600_common_screen *rscreen,
- struct r600_perfcounters *pc,
+void si_perfcounters_add_block(struct si_screen *sscreen,
+ struct si_perfcounters *pc,
const char *name, unsigned flags,
unsigned counters, unsigned selectors,
unsigned instances, void *data)
{
- struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
+ struct si_perfcounter_block *block = &pc->blocks[pc->num_blocks];
- assert(counters <= R600_QUERY_MAX_COUNTERS);
+ assert(counters <= SI_QUERY_MAX_COUNTERS);
block->basename = name;
block->flags = flags;
@@ -616,27 +606,27 @@ void si_perfcounters_add_block(struct r600_common_screen *rscreen,
block->num_instances = MAX2(instances, 1);
block->data = data;
- if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE))
- block->flags |= R600_PC_BLOCK_SE_GROUPS;
+ if (pc->separate_se && (block->flags & SI_PC_BLOCK_SE))
+ block->flags |= SI_PC_BLOCK_SE_GROUPS;
if (pc->separate_instance && block->num_instances > 1)
- block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS;
+ block->flags |= SI_PC_BLOCK_INSTANCE_GROUPS;
- if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
+ if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) {
block->num_groups = block->num_instances;
} else {
block->num_groups = 1;
}
- if (block->flags & R600_PC_BLOCK_SE_GROUPS)
- block->num_groups *= rscreen->info.max_se;
- if (block->flags & R600_PC_BLOCK_SHADER)
+ if (block->flags & SI_PC_BLOCK_SE_GROUPS)
+ block->num_groups *= sscreen->info.max_se;
+ if (block->flags & SI_PC_BLOCK_SHADER)
block->num_groups *= pc->num_shader_types;
++pc->num_blocks;
pc->num_groups += block->num_groups;
}
-void si_perfcounters_do_destroy(struct r600_perfcounters *pc)
+void si_perfcounters_do_destroy(struct si_perfcounters *pc)
{
unsigned i;
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c b/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c
deleted file mode 100644
index e5a31bbba..000000000
--- a/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c
+++ /dev/null
@@ -1,1498 +0,0 @@
-/*
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors: Marek Olšák <maraeo@gmail.com>
- *
- */
-
-#include "r600_pipe_common.h"
-#include "r600_cs.h"
-#include "tgsi/tgsi_parse.h"
-#include "util/list.h"
-#include "util/u_draw_quad.h"
-#include "util/u_memory.h"
-#include "util/u_format_s3tc.h"
-#include "util/u_upload_mgr.h"
-#include "os/os_time.h"
-#include "vl/vl_decoder.h"
-#include "vl/vl_video_buffer.h"
-#include "radeon/radeon_video.h"
-#include "amd/common/sid.h"
-#include <inttypes.h>
-#include <sys/utsname.h>
-#include <libsync.h>
-
-#include <llvm-c/TargetMachine.h>
-
-
-struct r600_multi_fence {
- struct pipe_reference reference;
- struct pipe_fence_handle *gfx;
- struct pipe_fence_handle *sdma;
-
- /* If the context wasn't flushed at fence creation, this is non-NULL. */
- struct {
- struct r600_common_context *ctx;
- unsigned ib_index;
- } gfx_unflushed;
-};
-
-/*
- * shader binary helpers.
- */
-void si_radeon_shader_binary_init(struct ac_shader_binary *b)
-{
- memset(b, 0, sizeof(*b));
-}
-
-void si_radeon_shader_binary_clean(struct ac_shader_binary *b)
-{
- if (!b)
- return;
- FREE(b->code);
- FREE(b->config);
- FREE(b->rodata);
- FREE(b->global_symbol_offsets);
- FREE(b->relocs);
- FREE(b->disasm_string);
- FREE(b->llvm_ir_string);
-}
-
-/*
- * pipe_context
- */
-
-/**
- * Write an EOP event.
- *
- * \param event EVENT_TYPE_*
- * \param event_flags Optional cache flush flags (TC)
- * \param data_sel 1 = fence, 3 = timestamp
- * \param buf Buffer
- * \param va GPU address
- * \param old_value Previous fence value (for a bug workaround)
- * \param new_value Fence value to write for this event.
- */
-void si_gfx_write_event_eop(struct r600_common_context *ctx,
- unsigned event, unsigned event_flags,
- unsigned data_sel,
- struct r600_resource *buf, uint64_t va,
- uint32_t new_fence, unsigned query_type)
-{
- struct radeon_winsys_cs *cs = ctx->gfx.cs;
- unsigned op = EVENT_TYPE(event) |
- EVENT_INDEX(5) |
- event_flags;
- unsigned sel = EOP_DATA_SEL(data_sel);
-
- /* Wait for write confirmation before writing data, but don't send
- * an interrupt. */
- if (data_sel != EOP_DATA_SEL_DISCARD)
- sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
-
- if (ctx->chip_class >= GFX9) {
- /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
- * counters) must immediately precede every timestamp event to
- * prevent a GPU hang on GFX9.
- *
- * Occlusion queries don't need to do it here, because they
- * always do ZPASS_DONE before the timestamp.
- */
- if (ctx->chip_class == GFX9 &&
- query_type != PIPE_QUERY_OCCLUSION_COUNTER &&
- query_type != PIPE_QUERY_OCCLUSION_PREDICATE &&
- query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
- struct r600_resource *scratch = ctx->eop_bug_scratch;
-
- assert(16 * ctx->screen->info.num_render_backends <=
- scratch->b.b.width0);
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
- radeon_emit(cs, scratch->gpu_address);
- radeon_emit(cs, scratch->gpu_address >> 32);
-
- radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
- RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
- }
-
- radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0));
- radeon_emit(cs, op);
- radeon_emit(cs, sel);
- radeon_emit(cs, va); /* address lo */
- radeon_emit(cs, va >> 32); /* address hi */
- radeon_emit(cs, new_fence); /* immediate data lo */
- radeon_emit(cs, 0); /* immediate data hi */
- radeon_emit(cs, 0); /* unused */
- } else {
- if (ctx->chip_class == CIK ||
- ctx->chip_class == VI) {
- struct r600_resource *scratch = ctx->eop_bug_scratch;
- uint64_t va = scratch->gpu_address;
-
- /* Two EOP events are required to make all engines go idle
- * (and optional cache flushes executed) before the timestamp
- * is written.
- */
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, op);
- radeon_emit(cs, va);
- radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
- radeon_emit(cs, 0); /* immediate data */
- radeon_emit(cs, 0); /* unused */
-
- radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
- RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
- }
-
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, op);
- radeon_emit(cs, va);
- radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
- radeon_emit(cs, new_fence); /* immediate data */
- radeon_emit(cs, 0); /* unused */
- }
-
- if (buf) {
- radeon_add_to_buffer_list(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE,
- RADEON_PRIO_QUERY);
- }
-}
-
-unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen)
-{
- unsigned dwords = 6;
-
- if (screen->chip_class == CIK ||
- screen->chip_class == VI)
- dwords *= 2;
-
- if (!screen->info.has_virtual_memory)
- dwords += 2;
-
- return dwords;
-}
-
-void si_gfx_wait_fence(struct r600_common_context *ctx,
- uint64_t va, uint32_t ref, uint32_t mask)
-{
- struct radeon_winsys_cs *cs = ctx->gfx.cs;
-
- radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, ref); /* reference value */
- radeon_emit(cs, mask); /* mask */
- radeon_emit(cs, 4); /* poll interval */
-}
-
-static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
-{
- struct radeon_winsys_cs *cs = rctx->dma.cs;
-
- /* NOP waits for idle on Evergreen and later. */
- if (rctx->chip_class >= CIK)
- radeon_emit(cs, 0x00000000); /* NOP */
- else
- radeon_emit(cs, 0xf0000000); /* NOP */
-}
-
-void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
- struct r600_resource *dst, struct r600_resource *src)
-{
- uint64_t vram = ctx->dma.cs->used_vram;
- uint64_t gtt = ctx->dma.cs->used_gart;
-
- if (dst) {
- vram += dst->vram_usage;
- gtt += dst->gart_usage;
- }
- if (src) {
- vram += src->vram_usage;
- gtt += src->gart_usage;
- }
-
- /* Flush the GFX IB if DMA depends on it. */
- if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
- ((dst &&
- ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf,
- RADEON_USAGE_READWRITE)) ||
- (src &&
- ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf,
- RADEON_USAGE_WRITE))))
- ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
-
- /* Flush if there's not enough space, or if the memory usage per IB
- * is too large.
- *
- * IBs using too little memory are limited by the IB submission overhead.
- * IBs using too much memory are limited by the kernel/TTM overhead.
- * Too long IBs create CPU-GPU pipeline bubbles and add latency.
- *
- * This heuristic makes sure that DMA requests are executed
- * very soon after the call is made and lowers memory usage.
- * It improves texture upload performance by keeping the DMA
- * engine busy while uploads are being submitted.
- */
- num_dw++; /* for emit_wait_idle below */
- if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) ||
- ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 ||
- !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) {
- ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
- assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw);
- }
-
- /* Wait for idle if either buffer has been used in the IB before to
- * prevent read-after-write hazards.
- */
- if ((dst &&
- ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf,
- RADEON_USAGE_READWRITE)) ||
- (src &&
- ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf,
- RADEON_USAGE_WRITE)))
- r600_dma_emit_wait_idle(ctx);
-
- /* If GPUVM is not supported, the CS checker needs 2 entries
- * in the buffer list per packet, which has to be done manually.
- */
- if (ctx->screen->info.has_virtual_memory) {
- if (dst)
- radeon_add_to_buffer_list(ctx, &ctx->dma, dst,
- RADEON_USAGE_WRITE,
- RADEON_PRIO_SDMA_BUFFER);
- if (src)
- radeon_add_to_buffer_list(ctx, &ctx->dma, src,
- RADEON_USAGE_READ,
- RADEON_PRIO_SDMA_BUFFER);
- }
-
- /* this function is called before all DMA calls, so increment this. */
- ctx->num_dma_calls++;
-}
-
-static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
-{
-}
-
-void si_preflush_suspend_features(struct r600_common_context *ctx)
-{
- /* suspend queries */
- if (!LIST_IS_EMPTY(&ctx->active_queries))
- si_suspend_queries(ctx);
-}
-
-void si_postflush_resume_features(struct r600_common_context *ctx)
-{
- /* resume queries */
- if (!LIST_IS_EMPTY(&ctx->active_queries))
- si_resume_queries(ctx);
-}
-
-static void r600_add_fence_dependency(struct r600_common_context *rctx,
- struct pipe_fence_handle *fence)
-{
- struct radeon_winsys *ws = rctx->ws;
-
- if (rctx->dma.cs)
- ws->cs_add_fence_dependency(rctx->dma.cs, fence);
- ws->cs_add_fence_dependency(rctx->gfx.cs, fence);
-}
-
-static void r600_fence_server_sync(struct pipe_context *ctx,
- struct pipe_fence_handle *fence)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
-
- /* Only amdgpu needs to handle fence dependencies (for fence imports).
- * radeon synchronizes all rings by default and will not implement
- * fence imports.
- */
- if (rctx->screen->info.drm_major == 2)
- return;
-
- /* Only imported fences need to be handled by fence_server_sync,
- * because the winsys handles synchronizations automatically for BOs
- * within the process.
- *
- * Simply skip unflushed fences here, and the winsys will drop no-op
- * dependencies (i.e. dependencies within the same ring).
- */
- if (rfence->gfx_unflushed.ctx)
- return;
-
- /* All unflushed commands will not start execution before
- * this fence dependency is signalled.
- *
- * Should we flush the context to allow more GPU parallelism?
- */
- if (rfence->sdma)
- r600_add_fence_dependency(rctx, rfence->sdma);
- if (rfence->gfx)
- r600_add_fence_dependency(rctx, rfence->gfx);
-}
-
-static void r600_create_fence_fd(struct pipe_context *ctx,
- struct pipe_fence_handle **pfence, int fd)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
- struct radeon_winsys *ws = rscreen->ws;
- struct r600_multi_fence *rfence;
-
- *pfence = NULL;
-
- if (!rscreen->info.has_sync_file)
- return;
-
- rfence = CALLOC_STRUCT(r600_multi_fence);
- if (!rfence)
- return;
-
- pipe_reference_init(&rfence->reference, 1);
- rfence->gfx = ws->fence_import_sync_file(ws, fd);
- if (!rfence->gfx) {
- FREE(rfence);
- return;
- }
-
- *pfence = (struct pipe_fence_handle*)rfence;
-}
-
-static int r600_fence_get_fd(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
- struct radeon_winsys *ws = rscreen->ws;
- struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
- int gfx_fd = -1, sdma_fd = -1;
-
- if (!rscreen->info.has_sync_file)
- return -1;
-
- /* Deferred fences aren't supported. */
- assert(!rfence->gfx_unflushed.ctx);
- if (rfence->gfx_unflushed.ctx)
- return -1;
-
- if (rfence->sdma) {
- sdma_fd = ws->fence_export_sync_file(ws, rfence->sdma);
- if (sdma_fd == -1)
- return -1;
- }
- if (rfence->gfx) {
- gfx_fd = ws->fence_export_sync_file(ws, rfence->gfx);
- if (gfx_fd == -1) {
- if (sdma_fd != -1)
- close(sdma_fd);
- return -1;
- }
- }
-
- /* If we don't have FDs at this point, it means we don't have fences
- * either. */
- if (sdma_fd == -1 && gfx_fd == -1)
- return ws->export_signalled_sync_file(ws);
- if (sdma_fd == -1)
- return gfx_fd;
- if (gfx_fd == -1)
- return sdma_fd;
-
- /* Get a fence that will be a combination of both fences. */
- sync_accumulate("radeonsi", &gfx_fd, sdma_fd);
- close(sdma_fd);
- return gfx_fd;
-}
-
-static void r600_flush_from_st(struct pipe_context *ctx,
- struct pipe_fence_handle **fence,
- unsigned flags)
-{
- struct pipe_screen *screen = ctx->screen;
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct radeon_winsys *ws = rctx->ws;
- struct pipe_fence_handle *gfx_fence = NULL;
- struct pipe_fence_handle *sdma_fence = NULL;
- bool deferred_fence = false;
- unsigned rflags = RADEON_FLUSH_ASYNC;
-
- if (flags & PIPE_FLUSH_END_OF_FRAME)
- rflags |= RADEON_FLUSH_END_OF_FRAME;
-
- /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */
- if (rctx->dma.cs)
- rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
-
- if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) {
- if (fence)
- ws->fence_reference(&gfx_fence, rctx->last_gfx_fence);
- if (!(flags & PIPE_FLUSH_DEFERRED))
- ws->cs_sync_flush(rctx->gfx.cs);
- } else {
- /* Instead of flushing, create a deferred fence. Constraints:
- * - The state tracker must allow a deferred flush.
- * - The state tracker must request a fence.
- * - fence_get_fd is not allowed.
- * Thread safety in fence_finish must be ensured by the state tracker.
- */
- if (flags & PIPE_FLUSH_DEFERRED &&
- !(flags & PIPE_FLUSH_FENCE_FD) &&
- fence) {
- gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs);
- deferred_fence = true;
- } else {
- rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
- }
- }
-
- /* Both engines can signal out of order, so we need to keep both fences. */
- if (fence) {
- struct r600_multi_fence *multi_fence =
- CALLOC_STRUCT(r600_multi_fence);
- if (!multi_fence) {
- ws->fence_reference(&sdma_fence, NULL);
- ws->fence_reference(&gfx_fence, NULL);
- goto finish;
- }
-
- multi_fence->reference.count = 1;
- /* If both fences are NULL, fence_finish will always return true. */
- multi_fence->gfx = gfx_fence;
- multi_fence->sdma = sdma_fence;
-
- if (deferred_fence) {
- multi_fence->gfx_unflushed.ctx = rctx;
- multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes;
- }
-
- screen->fence_reference(screen, fence, NULL);
- *fence = (struct pipe_fence_handle*)multi_fence;
- }
-finish:
- if (!(flags & PIPE_FLUSH_DEFERRED)) {
- if (rctx->dma.cs)
- ws->cs_sync_flush(rctx->dma.cs);
- ws->cs_sync_flush(rctx->gfx.cs);
- }
-}
-
-static void r600_flush_dma_ring(void *ctx, unsigned flags,
- struct pipe_fence_handle **fence)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct radeon_winsys_cs *cs = rctx->dma.cs;
- struct radeon_saved_cs saved;
- bool check_vm =
- (rctx->screen->debug_flags & DBG(CHECK_VM)) &&
- rctx->check_vm_faults;
-
- if (!radeon_emitted(cs, 0)) {
- if (fence)
- rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
- return;
- }
-
- if (check_vm)
- si_save_cs(rctx->ws, cs, &saved, true);
-
- rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
- if (fence)
- rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
-
- if (check_vm) {
- /* Use conservative timeout 800ms, after which we won't wait any
- * longer and assume the GPU is hung.
- */
- rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000);
-
- rctx->check_vm_faults(rctx, &saved, RING_DMA);
- si_clear_saved_cs(&saved);
- }
-}
-
-/**
- * Store a linearized copy of all chunks of \p cs together with the buffer
- * list in \p saved.
- */
-void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
- struct radeon_saved_cs *saved, bool get_buffer_list)
-{
- uint32_t *buf;
- unsigned i;
-
- /* Save the IB chunks. */
- saved->num_dw = cs->prev_dw + cs->current.cdw;
- saved->ib = MALLOC(4 * saved->num_dw);
- if (!saved->ib)
- goto oom;
-
- buf = saved->ib;
- for (i = 0; i < cs->num_prev; ++i) {
- memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);
- buf += cs->prev[i].cdw;
- }
- memcpy(buf, cs->current.buf, cs->current.cdw * 4);
-
- if (!get_buffer_list)
- return;
-
- /* Save the buffer list. */
- saved->bo_count = ws->cs_get_buffer_list(cs, NULL);
- saved->bo_list = CALLOC(saved->bo_count,
- sizeof(saved->bo_list[0]));
- if (!saved->bo_list) {
- FREE(saved->ib);
- goto oom;
- }
- ws->cs_get_buffer_list(cs, saved->bo_list);
-
- return;
-
-oom:
- fprintf(stderr, "%s: out of memory\n", __func__);
- memset(saved, 0, sizeof(*saved));
-}
-
-void si_clear_saved_cs(struct radeon_saved_cs *saved)
-{
- FREE(saved->ib);
- FREE(saved->bo_list);
-
- memset(saved, 0, sizeof(*saved));
-}
-
-static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- unsigned latest = rctx->ws->query_value(rctx->ws,
- RADEON_GPU_RESET_COUNTER);
-
- if (rctx->gpu_reset_counter == latest)
- return PIPE_NO_RESET;
-
- rctx->gpu_reset_counter = latest;
- return PIPE_UNKNOWN_CONTEXT_RESET;
-}
-
-static void r600_set_debug_callback(struct pipe_context *ctx,
- const struct pipe_debug_callback *cb)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
-
- if (cb)
- rctx->debug = *cb;
- else
- memset(&rctx->debug, 0, sizeof(rctx->debug));
-}
-
-static void r600_set_device_reset_callback(struct pipe_context *ctx,
- const struct pipe_device_reset_callback *cb)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
-
- if (cb)
- rctx->device_reset_callback = *cb;
- else
- memset(&rctx->device_reset_callback, 0,
- sizeof(rctx->device_reset_callback));
-}
-
-bool si_check_device_reset(struct r600_common_context *rctx)
-{
- enum pipe_reset_status status;
-
- if (!rctx->device_reset_callback.reset)
- return false;
-
- if (!rctx->b.get_device_reset_status)
- return false;
-
- status = rctx->b.get_device_reset_status(&rctx->b);
- if (status == PIPE_NO_RESET)
- return false;
-
- rctx->device_reset_callback.reset(rctx->device_reset_callback.data, status);
- return true;
-}
-
-static void r600_dma_clear_buffer_fallback(struct pipe_context *ctx,
- struct pipe_resource *dst,
- uint64_t offset, uint64_t size,
- unsigned value)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
-
- rctx->clear_buffer(ctx, dst, offset, size, value, R600_COHERENCY_NONE);
-}
-
-static bool r600_resource_commit(struct pipe_context *pctx,
- struct pipe_resource *resource,
- unsigned level, struct pipe_box *box,
- bool commit)
-{
- struct r600_common_context *ctx = (struct r600_common_context *)pctx;
- struct r600_resource *res = r600_resource(resource);
-
- /*
- * Since buffer commitment changes cannot be pipelined, we need to
- * (a) flush any pending commands that refer to the buffer we're about
- * to change, and
- * (b) wait for threaded submit to finish, including those that were
- * triggered by some other, earlier operation.
- */
- if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
- ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
- res->buf, RADEON_USAGE_READWRITE)) {
- ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
- }
- if (radeon_emitted(ctx->dma.cs, 0) &&
- ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
- res->buf, RADEON_USAGE_READWRITE)) {
- ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
- }
-
- ctx->ws->cs_sync_flush(ctx->dma.cs);
- ctx->ws->cs_sync_flush(ctx->gfx.cs);
-
- assert(resource->target == PIPE_BUFFER);
-
- return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
-}
-
-bool si_common_context_init(struct r600_common_context *rctx,
- struct r600_common_screen *rscreen,
- unsigned context_flags)
-{
- slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers);
- slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers);
-
- rctx->screen = rscreen;
- rctx->ws = rscreen->ws;
- rctx->family = rscreen->family;
- rctx->chip_class = rscreen->chip_class;
-
- rctx->b.invalidate_resource = si_invalidate_resource;
- rctx->b.resource_commit = r600_resource_commit;
- rctx->b.transfer_map = u_transfer_map_vtbl;
- rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
- rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
- rctx->b.texture_subdata = u_default_texture_subdata;
- rctx->b.memory_barrier = r600_memory_barrier;
- rctx->b.flush = r600_flush_from_st;
- rctx->b.set_debug_callback = r600_set_debug_callback;
- rctx->b.create_fence_fd = r600_create_fence_fd;
- rctx->b.fence_server_sync = r600_fence_server_sync;
- rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback;
- rctx->b.buffer_subdata = si_buffer_subdata;
-
- if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
- rctx->b.get_device_reset_status = r600_get_reset_status;
- rctx->gpu_reset_counter =
- rctx->ws->query_value(rctx->ws,
- RADEON_GPU_RESET_COUNTER);
- }
-
- rctx->b.set_device_reset_callback = r600_set_device_reset_callback;
-
- si_init_context_texture_functions(rctx);
- si_init_query_functions(rctx);
-
- if (rctx->chip_class == CIK ||
- rctx->chip_class == VI ||
- rctx->chip_class == GFX9) {
- rctx->eop_bug_scratch = (struct r600_resource*)
- pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
- 16 * rscreen->info.num_render_backends);
- if (!rctx->eop_bug_scratch)
- return false;
- }
-
- rctx->allocator_zeroed_memory =
- u_suballocator_create(&rctx->b, rscreen->info.gart_page_size,
- 0, PIPE_USAGE_DEFAULT, 0, true);
- if (!rctx->allocator_zeroed_memory)
- return false;
-
- rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
- 0, PIPE_USAGE_STREAM);
- if (!rctx->b.stream_uploader)
- return false;
-
- rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
- 0, PIPE_USAGE_DEFAULT);
- if (!rctx->b.const_uploader)
- return false;
-
- rctx->ctx = rctx->ws->ctx_create(rctx->ws);
- if (!rctx->ctx)
- return false;
-
- if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
- rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
- r600_flush_dma_ring,
- rctx);
- rctx->dma.flush = r600_flush_dma_ring;
- }
-
- return true;
-}
-
-void si_common_context_cleanup(struct r600_common_context *rctx)
-{
- unsigned i,j;
-
- /* Release DCC stats. */
- for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) {
- assert(!rctx->dcc_stats[i].query_active);
-
- for (j = 0; j < ARRAY_SIZE(rctx->dcc_stats[i].ps_stats); j++)
- if (rctx->dcc_stats[i].ps_stats[j])
- rctx->b.destroy_query(&rctx->b,
- rctx->dcc_stats[i].ps_stats[j]);
-
- r600_texture_reference(&rctx->dcc_stats[i].tex, NULL);
- }
-
- if (rctx->query_result_shader)
- rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader);
-
- if (rctx->gfx.cs)
- rctx->ws->cs_destroy(rctx->gfx.cs);
- if (rctx->dma.cs)
- rctx->ws->cs_destroy(rctx->dma.cs);
- if (rctx->ctx)
- rctx->ws->ctx_destroy(rctx->ctx);
-
- if (rctx->b.stream_uploader)
- u_upload_destroy(rctx->b.stream_uploader);
- if (rctx->b.const_uploader)
- u_upload_destroy(rctx->b.const_uploader);
-
- slab_destroy_child(&rctx->pool_transfers);
- slab_destroy_child(&rctx->pool_transfers_unsync);
-
- if (rctx->allocator_zeroed_memory) {
- u_suballocator_destroy(rctx->allocator_zeroed_memory);
- }
- rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL);
- rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL);
- r600_resource_reference(&rctx->eop_bug_scratch, NULL);
-}
-
-/*
- * pipe_screen
- */
-
-static const struct debug_named_value common_debug_options[] = {
- /* logging */
- { "tex", DBG(TEX), "Print texture info" },
- { "nir", DBG(NIR), "Enable experimental NIR shaders" },
- { "compute", DBG(COMPUTE), "Print compute info" },
- { "vm", DBG(VM), "Print virtual addresses when creating resources" },
- { "info", DBG(INFO), "Print driver information" },
-
- /* shaders */
- { "vs", DBG(VS), "Print vertex shaders" },
- { "gs", DBG(GS), "Print geometry shaders" },
- { "ps", DBG(PS), "Print pixel shaders" },
- { "cs", DBG(CS), "Print compute shaders" },
- { "tcs", DBG(TCS), "Print tessellation control shaders" },
- { "tes", DBG(TES), "Print tessellation evaluation shaders" },
- { "noir", DBG(NO_IR), "Don't print the LLVM IR"},
- { "notgsi", DBG(NO_TGSI), "Don't print the TGSI"},
- { "noasm", DBG(NO_ASM), "Don't print disassembled shaders"},
- { "preoptir", DBG(PREOPT_IR), "Print the LLVM IR before initial optimizations" },
- { "checkir", DBG(CHECK_IR), "Enable additional sanity checks on shader IR" },
- { "nooptvariant", DBG(NO_OPT_VARIANT), "Disable compiling optimized shader variants." },
-
- { "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
- { "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." },
- { "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." },
- { "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
-
- /* features */
- { "nodma", DBG(NO_ASYNC_DMA), "Disable asynchronous DMA" },
- { "nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z" },
- /* GL uses the word INVALIDATE, gallium uses the word DISCARD */
- { "noinvalrange", DBG(NO_DISCARD_RANGE), "Disable handling of INVALIDATE_RANGE map flags" },
- { "no2d", DBG(NO_2D_TILING), "Disable 2D tiling" },
- { "notiling", DBG(NO_TILING), "Disable tiling" },
- { "switch_on_eop", DBG(SWITCH_ON_EOP), "Program WD/IA to switch on end-of-packet." },
- { "forcedma", DBG(FORCE_DMA), "Use asynchronous DMA for all operations when possible." },
- { "precompile", DBG(PRECOMPILE), "Compile one shader variant at shader creation." },
- { "nowc", DBG(NO_WC), "Disable GTT write combining" },
- { "check_vm", DBG(CHECK_VM), "Check VM faults and dump debug info." },
- { "nodcc", DBG(NO_DCC), "Disable DCC." },
- { "nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear." },
- { "norbplus", DBG(NO_RB_PLUS), "Disable RB+." },
- { "sisched", DBG(SI_SCHED), "Enable LLVM SI Machine Instruction Scheduler." },
- { "mono", DBG(MONOLITHIC_SHADERS), "Use old-style monolithic shaders compiled on demand" },
- { "unsafemath", DBG(UNSAFE_MATH), "Enable unsafe math shader optimizations" },
- { "nodccfb", DBG(NO_DCC_FB), "Disable separate DCC on the main framebuffer" },
- { "nodpbb", DBG(NO_DPBB), "Disable DPBB." },
- { "nodfsm", DBG(NO_DFSM), "Disable DFSM." },
- { "dpbb", DBG(DPBB), "Enable DPBB." },
- { "dfsm", DBG(DFSM), "Enable DFSM." },
- { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" },
-
- DEBUG_NAMED_VALUE_END /* must be last */
-};
-
-static const char* r600_get_vendor(struct pipe_screen* pscreen)
-{
- return "X.Org";
-}
-
-static const char* r600_get_device_vendor(struct pipe_screen* pscreen)
-{
- return "AMD";
-}
-
-static const char *r600_get_marketing_name(struct radeon_winsys *ws)
-{
- if (!ws->get_chip_name)
- return NULL;
- return ws->get_chip_name(ws);
-}
-
-static const char *r600_get_family_name(const struct r600_common_screen *rscreen)
-{
- switch (rscreen->info.family) {
- case CHIP_TAHITI: return "AMD TAHITI";
- case CHIP_PITCAIRN: return "AMD PITCAIRN";
- case CHIP_VERDE: return "AMD CAPE VERDE";
- case CHIP_OLAND: return "AMD OLAND";
- case CHIP_HAINAN: return "AMD HAINAN";
- case CHIP_BONAIRE: return "AMD BONAIRE";
- case CHIP_KAVERI: return "AMD KAVERI";
- case CHIP_KABINI: return "AMD KABINI";
- case CHIP_HAWAII: return "AMD HAWAII";
- case CHIP_MULLINS: return "AMD MULLINS";
- case CHIP_TONGA: return "AMD TONGA";
- case CHIP_ICELAND: return "AMD ICELAND";
- case CHIP_CARRIZO: return "AMD CARRIZO";
- case CHIP_FIJI: return "AMD FIJI";
- case CHIP_POLARIS10: return "AMD POLARIS10";
- case CHIP_POLARIS11: return "AMD POLARIS11";
- case CHIP_POLARIS12: return "AMD POLARIS12";
- case CHIP_STONEY: return "AMD STONEY";
- case CHIP_VEGA10: return "AMD VEGA10";
- case CHIP_RAVEN: return "AMD RAVEN";
- default: return "AMD unknown";
- }
-}
-
-static void r600_disk_cache_create(struct r600_common_screen *rscreen)
-{
- /* Don't use the cache if shader dumping is enabled. */
- if (rscreen->debug_flags & DBG_ALL_SHADERS)
- return;
-
- uint32_t mesa_timestamp;
- if (disk_cache_get_function_timestamp(r600_disk_cache_create,
- &mesa_timestamp)) {
- char *timestamp_str;
- int res = -1;
- uint32_t llvm_timestamp;
-
- if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
- &llvm_timestamp)) {
- res = asprintf(&timestamp_str, "%u_%u",
- mesa_timestamp, llvm_timestamp);
- }
-
- if (res != -1) {
- /* These flags affect shader compilation. */
- uint64_t shader_debug_flags =
- rscreen->debug_flags &
- (DBG(FS_CORRECT_DERIVS_AFTER_KILL) |
- DBG(SI_SCHED) |
- DBG(UNSAFE_MATH));
-
- rscreen->disk_shader_cache =
- disk_cache_create(r600_get_family_name(rscreen),
- timestamp_str,
- shader_debug_flags);
- free(timestamp_str);
- }
- }
-}
-
-static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
- return rscreen->disk_shader_cache;
-}
-
-static const char* r600_get_name(struct pipe_screen* pscreen)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
-
- return rscreen->renderer_string;
-}
-
-static float r600_get_paramf(struct pipe_screen* pscreen,
- enum pipe_capf param)
-{
- switch (param) {
- case PIPE_CAPF_MAX_LINE_WIDTH:
- case PIPE_CAPF_MAX_LINE_WIDTH_AA:
- case PIPE_CAPF_MAX_POINT_WIDTH:
- case PIPE_CAPF_MAX_POINT_WIDTH_AA:
- return 8192.0f;
- case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
- return 16.0f;
- case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
- return 16.0f;
- case PIPE_CAPF_GUARD_BAND_LEFT:
- case PIPE_CAPF_GUARD_BAND_TOP:
- case PIPE_CAPF_GUARD_BAND_RIGHT:
- case PIPE_CAPF_GUARD_BAND_BOTTOM:
- return 0.0f;
- }
- return 0.0f;
-}
-
-static int r600_get_video_param(struct pipe_screen *screen,
- enum pipe_video_profile profile,
- enum pipe_video_entrypoint entrypoint,
- enum pipe_video_cap param)
-{
- switch (param) {
- case PIPE_VIDEO_CAP_SUPPORTED:
- return vl_profile_supported(screen, profile, entrypoint);
- case PIPE_VIDEO_CAP_NPOT_TEXTURES:
- return 1;
- case PIPE_VIDEO_CAP_MAX_WIDTH:
- case PIPE_VIDEO_CAP_MAX_HEIGHT:
- return vl_video_buffer_max_size(screen);
- case PIPE_VIDEO_CAP_PREFERED_FORMAT:
- return PIPE_FORMAT_NV12;
- case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
- return false;
- case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
- return false;
- case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
- return true;
- case PIPE_VIDEO_CAP_MAX_LEVEL:
- return vl_level_supported(screen, profile);
- default:
- return 0;
- }
-}
-
-const char *si_get_llvm_processor_name(enum radeon_family family)
-{
- switch (family) {
- case CHIP_TAHITI: return "tahiti";
- case CHIP_PITCAIRN: return "pitcairn";
- case CHIP_VERDE: return "verde";
- case CHIP_OLAND: return "oland";
- case CHIP_HAINAN: return "hainan";
- case CHIP_BONAIRE: return "bonaire";
- case CHIP_KABINI: return "kabini";
- case CHIP_KAVERI: return "kaveri";
- case CHIP_HAWAII: return "hawaii";
- case CHIP_MULLINS:
- return "mullins";
- case CHIP_TONGA: return "tonga";
- case CHIP_ICELAND: return "iceland";
- case CHIP_CARRIZO: return "carrizo";
- case CHIP_FIJI:
- return "fiji";
- case CHIP_STONEY:
- return "stoney";
- case CHIP_POLARIS10:
- return "polaris10";
- case CHIP_POLARIS11:
- case CHIP_POLARIS12: /* same as polaris11 */
- return "polaris11";
- case CHIP_VEGA10:
- case CHIP_RAVEN:
- return "gfx900";
- default:
- return "";
- }
-}
-
-static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
- enum pipe_shader_ir ir_type)
-{
- if (ir_type != PIPE_SHADER_IR_TGSI)
- return 256;
-
- /* Only 16 waves per thread-group on gfx9. */
- if (screen->chip_class >= GFX9)
- return 1024;
-
- /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
- * round number.
- */
- return 2048;
-}
-
-static int r600_get_compute_param(struct pipe_screen *screen,
- enum pipe_shader_ir ir_type,
- enum pipe_compute_cap param,
- void *ret)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
-
- //TODO: select these params by asic
- switch (param) {
- case PIPE_COMPUTE_CAP_IR_TARGET: {
- const char *gpu;
- const char *triple;
-
- if (HAVE_LLVM < 0x0400)
- triple = "amdgcn--";
- else
- triple = "amdgcn-mesa-mesa3d";
-
- gpu = si_get_llvm_processor_name(rscreen->family);
- if (ret) {
- sprintf(ret, "%s-%s", gpu, triple);
- }
- /* +2 for dash and terminating NIL byte */
- return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
- }
- case PIPE_COMPUTE_CAP_GRID_DIMENSION:
- if (ret) {
- uint64_t *grid_dimension = ret;
- grid_dimension[0] = 3;
- }
- return 1 * sizeof(uint64_t);
-
- case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
- if (ret) {
- uint64_t *grid_size = ret;
- grid_size[0] = 65535;
- grid_size[1] = 65535;
- grid_size[2] = 65535;
- }
- return 3 * sizeof(uint64_t) ;
-
- case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
- if (ret) {
- uint64_t *block_size = ret;
- unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type);
- block_size[0] = threads_per_block;
- block_size[1] = threads_per_block;
- block_size[2] = threads_per_block;
- }
- return 3 * sizeof(uint64_t);
-
- case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
- if (ret) {
- uint64_t *max_threads_per_block = ret;
- *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type);
- }
- return sizeof(uint64_t);
- case PIPE_COMPUTE_CAP_ADDRESS_BITS:
- if (ret) {
- uint32_t *address_bits = ret;
- address_bits[0] = 64;
- }
- return 1 * sizeof(uint32_t);
-
- case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
- if (ret) {
- uint64_t *max_global_size = ret;
- uint64_t max_mem_alloc_size;
-
- r600_get_compute_param(screen, ir_type,
- PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
- &max_mem_alloc_size);
-
- /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
- * 1/4 of the MAX_GLOBAL_SIZE. Since the
- * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
- * make sure we never report more than
- * 4 * MAX_MEM_ALLOC_SIZE.
- */
- *max_global_size = MIN2(4 * max_mem_alloc_size,
- MAX2(rscreen->info.gart_size,
- rscreen->info.vram_size));
- }
- return sizeof(uint64_t);
-
- case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
- if (ret) {
- uint64_t *max_local_size = ret;
- /* Value reported by the closed source driver. */
- *max_local_size = 32768;
- }
- return sizeof(uint64_t);
-
- case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
- if (ret) {
- uint64_t *max_input_size = ret;
- /* Value reported by the closed source driver. */
- *max_input_size = 1024;
- }
- return sizeof(uint64_t);
-
- case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
- if (ret) {
- uint64_t *max_mem_alloc_size = ret;
-
- *max_mem_alloc_size = rscreen->info.max_alloc_size;
- }
- return sizeof(uint64_t);
-
- case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
- if (ret) {
- uint32_t *max_clock_frequency = ret;
- *max_clock_frequency = rscreen->info.max_shader_clock;
- }
- return sizeof(uint32_t);
-
- case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
- if (ret) {
- uint32_t *max_compute_units = ret;
- *max_compute_units = rscreen->info.num_good_compute_units;
- }
- return sizeof(uint32_t);
-
- case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
- if (ret) {
- uint32_t *images_supported = ret;
- *images_supported = 0;
- }
- return sizeof(uint32_t);
- case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
- break; /* unused */
- case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
- if (ret) {
- uint32_t *subgroup_size = ret;
- *subgroup_size = 64;
- }
- return sizeof(uint32_t);
- case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
- if (ret) {
- uint64_t *max_variable_threads_per_block = ret;
- if (ir_type == PIPE_SHADER_IR_TGSI)
- *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
- else
- *max_variable_threads_per_block = 0;
- }
- return sizeof(uint64_t);
- }
-
- fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
- return 0;
-}
-
-static uint64_t r600_get_timestamp(struct pipe_screen *screen)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
-
- return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) /
- rscreen->info.clock_crystal_freq;
-}
-
-static void r600_fence_reference(struct pipe_screen *screen,
- struct pipe_fence_handle **dst,
- struct pipe_fence_handle *src)
-{
- struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws;
- struct r600_multi_fence **rdst = (struct r600_multi_fence **)dst;
- struct r600_multi_fence *rsrc = (struct r600_multi_fence *)src;
-
- if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) {
- ws->fence_reference(&(*rdst)->gfx, NULL);
- ws->fence_reference(&(*rdst)->sdma, NULL);
- FREE(*rdst);
- }
- *rdst = rsrc;
-}
-
-static boolean r600_fence_finish(struct pipe_screen *screen,
- struct pipe_context *ctx,
- struct pipe_fence_handle *fence,
- uint64_t timeout)
-{
- struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
- struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
- struct r600_common_context *rctx;
- int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
-
- ctx = threaded_context_unwrap_sync(ctx);
- rctx = ctx ? (struct r600_common_context*)ctx : NULL;
-
- if (rfence->sdma) {
- if (!rws->fence_wait(rws, rfence->sdma, timeout))
- return false;
-
- /* Recompute the timeout after waiting. */
- if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
- int64_t time = os_time_get_nano();
- timeout = abs_timeout > time ? abs_timeout - time : 0;
- }
- }
-
- if (!rfence->gfx)
- return true;
-
- /* Flush the gfx IB if it hasn't been flushed yet. */
- if (rctx &&
- rfence->gfx_unflushed.ctx == rctx &&
- rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) {
- rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL);
- rfence->gfx_unflushed.ctx = NULL;
-
- if (!timeout)
- return false;
-
- /* Recompute the timeout after all that. */
- if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
- int64_t time = os_time_get_nano();
- timeout = abs_timeout > time ? abs_timeout - time : 0;
- }
- }
-
- return rws->fence_wait(rws, rfence->gfx, timeout);
-}
-
-static void r600_query_memory_info(struct pipe_screen *screen,
- struct pipe_memory_info *info)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
- struct radeon_winsys *ws = rscreen->ws;
- unsigned vram_usage, gtt_usage;
-
- info->total_device_memory = rscreen->info.vram_size / 1024;
- info->total_staging_memory = rscreen->info.gart_size / 1024;
-
- /* The real TTM memory usage is somewhat random, because:
- *
- * 1) TTM delays freeing memory, because it can only free it after
- * fences expire.
- *
- * 2) The memory usage can be really low if big VRAM evictions are
- * taking place, but the real usage is well above the size of VRAM.
- *
- * Instead, return statistics of this process.
- */
- vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
- gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
-
- info->avail_device_memory =
- vram_usage <= info->total_device_memory ?
- info->total_device_memory - vram_usage : 0;
- info->avail_staging_memory =
- gtt_usage <= info->total_staging_memory ?
- info->total_staging_memory - gtt_usage : 0;
-
- info->device_memory_evicted =
- ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
-
- if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4)
- info->nr_device_memory_evictions =
- ws->query_value(ws, RADEON_NUM_EVICTIONS);
- else
- /* Just return the number of evicted 64KB pages. */
- info->nr_device_memory_evictions = info->device_memory_evicted / 64;
-}
-
-struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
- const struct pipe_resource *templ)
-{
- if (templ->target == PIPE_BUFFER) {
- return si_buffer_create(screen, templ, 256);
- } else {
- return si_texture_create(screen, templ);
- }
-}
-
-bool si_common_screen_init(struct r600_common_screen *rscreen,
- struct radeon_winsys *ws)
-{
- char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {};
- struct utsname uname_data;
- const char *chip_name;
-
- ws->query_info(ws, &rscreen->info);
- rscreen->ws = ws;
-
- if ((chip_name = r600_get_marketing_name(ws)))
- snprintf(family_name, sizeof(family_name), "%s / ",
- r600_get_family_name(rscreen) + 4);
- else
- chip_name = r600_get_family_name(rscreen);
-
- if (uname(&uname_data) == 0)
- snprintf(kernel_version, sizeof(kernel_version),
- " / %s", uname_data.release);
-
- if (HAVE_LLVM > 0) {
- snprintf(llvm_string, sizeof(llvm_string),
- ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
- HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
- }
-
- snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string),
- "%s (%sDRM %i.%i.%i%s%s)",
- chip_name, family_name, rscreen->info.drm_major,
- rscreen->info.drm_minor, rscreen->info.drm_patchlevel,
- kernel_version, llvm_string);
-
- rscreen->b.get_name = r600_get_name;
- rscreen->b.get_vendor = r600_get_vendor;
- rscreen->b.get_device_vendor = r600_get_device_vendor;
- rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache;
- rscreen->b.get_compute_param = r600_get_compute_param;
- rscreen->b.get_paramf = r600_get_paramf;
- rscreen->b.get_timestamp = r600_get_timestamp;
- rscreen->b.fence_finish = r600_fence_finish;
- rscreen->b.fence_reference = r600_fence_reference;
- rscreen->b.resource_destroy = u_resource_destroy_vtbl;
- rscreen->b.resource_from_user_memory = si_buffer_from_user_memory;
- rscreen->b.query_memory_info = r600_query_memory_info;
- rscreen->b.fence_get_fd = r600_fence_get_fd;
-
- if (rscreen->info.has_hw_decode) {
- rscreen->b.get_video_param = si_vid_get_video_param;
- rscreen->b.is_video_format_supported = si_vid_is_format_supported;
- } else {
- rscreen->b.get_video_param = r600_get_video_param;
- rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
- }
-
- si_init_screen_texture_functions(rscreen);
- si_init_screen_query_functions(rscreen);
-
- rscreen->family = rscreen->info.family;
- rscreen->chip_class = rscreen->info.chip_class;
- rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
- rscreen->has_rbplus = false;
- rscreen->rbplus_allowed = false;
-
- r600_disk_cache_create(rscreen);
-
- slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64);
-
- rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
- if (rscreen->force_aniso >= 0) {
- printf("radeon: Forcing anisotropy filter to %ix\n",
- /* round down to a power of two */
- 1 << util_logbase2(rscreen->force_aniso));
- }
-
- (void) mtx_init(&rscreen->aux_context_lock, mtx_plain);
- (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain);
-
- if (rscreen->debug_flags & DBG(INFO)) {
- printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
- rscreen->info.pci_domain, rscreen->info.pci_bus,
- rscreen->info.pci_dev, rscreen->info.pci_func);
- printf("pci_id = 0x%x\n", rscreen->info.pci_id);
- printf("family = %i (%s)\n", rscreen->info.family,
- r600_get_family_name(rscreen));
- printf("chip_class = %i\n", rscreen->info.chip_class);
- printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size);
- printf("gart_page_size = %u\n", rscreen->info.gart_page_size);
- printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
- printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
- printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024));
- printf("max_alloc_size = %i MB\n",
- (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024));
- printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size);
- printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram);
- printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
- printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2);
- printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode);
- printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings);
- printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings);
- printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version);
- printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version);
- printf("me_fw_version = %i\n", rscreen->info.me_fw_version);
- printf("me_fw_feature = %i\n", rscreen->info.me_fw_feature);
- printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version);
- printf("pfp_fw_feature = %i\n", rscreen->info.pfp_fw_feature);
- printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version);
- printf("ce_fw_feature = %i\n", rscreen->info.ce_fw_feature);
- printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config);
- printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq);
- printf("tcc_cache_line_size = %u\n", rscreen->info.tcc_cache_line_size);
- printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
- rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
- printf("has_userptr = %i\n", rscreen->info.has_userptr);
- printf("has_syncobj = %u\n", rscreen->info.has_syncobj);
- printf("has_sync_file = %u\n", rscreen->info.has_sync_file);
-
- printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
- printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
- printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
- printf("max_se = %i\n", rscreen->info.max_se);
- printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
-
- printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map);
- printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid);
- printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
- printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
- printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
- printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
- printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask);
- printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment);
- }
- return true;
-}
-
-void si_destroy_common_screen(struct r600_common_screen *rscreen)
-{
- si_perfcounters_destroy(rscreen);
- si_gpu_load_kill_thread(rscreen);
-
- mtx_destroy(&rscreen->gpu_load_mutex);
- mtx_destroy(&rscreen->aux_context_lock);
- rscreen->aux_context->destroy(rscreen->aux_context);
-
- slab_destroy_parent(&rscreen->pool_transfers);
-
- disk_cache_destroy(rscreen->disk_shader_cache);
- rscreen->ws->destroy(rscreen->ws);
- FREE(rscreen);
-}
-
-bool si_can_dump_shader(struct r600_common_screen *rscreen,
- unsigned processor)
-{
- return rscreen->debug_flags & (1 << processor);
-}
-
-bool si_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor)
-{
- return (rscreen->debug_flags & DBG(CHECK_IR)) ||
- si_can_dump_shader(rscreen, processor);
-}
-
-void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
- uint64_t offset, uint64_t size, unsigned value)
-{
- struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
-
- mtx_lock(&rscreen->aux_context_lock);
- rctx->dma_clear_buffer(&rctx->b, dst, offset, size, value);
- rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
- mtx_unlock(&rscreen->aux_context_lock);
-}
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.h b/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.h
deleted file mode 100644
index a7c91cb8a..000000000
--- a/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.h
+++ /dev/null
@@ -1,913 +0,0 @@
-/*
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors: Marek Olšák <maraeo@gmail.com>
- *
- */
-
-/**
- * This file contains common screen and context structures and functions
- * for r600g and radeonsi.
- */
-
-#ifndef R600_PIPE_COMMON_H
-#define R600_PIPE_COMMON_H
-
-#include <stdio.h>
-
-#include "amd/common/ac_binary.h"
-
-#include "radeon/radeon_winsys.h"
-
-#include "util/disk_cache.h"
-#include "util/u_blitter.h"
-#include "util/list.h"
-#include "util/u_range.h"
-#include "util/slab.h"
-#include "util/u_suballoc.h"
-#include "util/u_transfer.h"
-#include "util/u_threaded_context.h"
-
-struct u_log_context;
-
-#define ATI_VENDOR_ID 0x1002
-
-#define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
-#define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
-#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
-#define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
-#define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
-
-#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
-/* Pipeline & streamout query controls. */
-#define R600_CONTEXT_START_PIPELINE_STATS (1u << 1)
-#define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2)
-#define R600_CONTEXT_FLUSH_FOR_RENDER_COND (1u << 3)
-#define R600_CONTEXT_PRIVATE_FLAG (1u << 4)
-
-/* special primitive types */
-#define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX
-
-#define R600_NOT_QUERY 0xffffffff
-
-/* Debug flags. */
-enum {
- /* Shader logging options: */
- DBG_VS = PIPE_SHADER_VERTEX,
- DBG_PS = PIPE_SHADER_FRAGMENT,
- DBG_GS = PIPE_SHADER_GEOMETRY,
- DBG_TCS = PIPE_SHADER_TESS_CTRL,
- DBG_TES = PIPE_SHADER_TESS_EVAL,
- DBG_CS = PIPE_SHADER_COMPUTE,
- DBG_NO_IR,
- DBG_NO_TGSI,
- DBG_NO_ASM,
- DBG_PREOPT_IR,
-
- /* Shader compiler options the shader cache should be aware of: */
- DBG_FS_CORRECT_DERIVS_AFTER_KILL,
- DBG_UNSAFE_MATH,
- DBG_SI_SCHED,
-
- /* Shader compiler options (with no effect on the shader cache): */
- DBG_CHECK_IR,
- DBG_PRECOMPILE,
- DBG_NIR,
- DBG_MONOLITHIC_SHADERS,
- DBG_NO_OPT_VARIANT,
-
- /* Information logging options: */
- DBG_INFO,
- DBG_TEX,
- DBG_COMPUTE,
- DBG_VM,
-
- /* Driver options: */
- DBG_FORCE_DMA,
- DBG_NO_ASYNC_DMA,
- DBG_NO_DISCARD_RANGE,
- DBG_NO_WC,
- DBG_CHECK_VM,
-
- /* 3D engine options: */
- DBG_SWITCH_ON_EOP,
- DBG_NO_OUT_OF_ORDER,
- DBG_NO_DPBB,
- DBG_NO_DFSM,
- DBG_DPBB,
- DBG_DFSM,
- DBG_NO_HYPERZ,
- DBG_NO_RB_PLUS,
- DBG_NO_2D_TILING,
- DBG_NO_TILING,
- DBG_NO_DCC,
- DBG_NO_DCC_CLEAR,
- DBG_NO_DCC_FB,
-
- /* Tests: */
- DBG_TEST_DMA,
- DBG_TEST_VMFAULT_CP,
- DBG_TEST_VMFAULT_SDMA,
- DBG_TEST_VMFAULT_SHADER,
-};
-
-#define DBG_ALL_SHADERS (((1 << (DBG_CS + 1)) - 1))
-#define DBG(name) (1ull << DBG_##name)
-
-#define R600_MAP_BUFFER_ALIGNMENT 64
-
-#define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
-
-enum r600_coherency {
- R600_COHERENCY_NONE, /* no cache flushes needed */
- R600_COHERENCY_SHADER,
- R600_COHERENCY_CB_META,
-};
-
-#ifdef PIPE_ARCH_BIG_ENDIAN
-#define R600_BIG_ENDIAN 1
-#else
-#define R600_BIG_ENDIAN 0
-#endif
-
-struct r600_common_context;
-struct r600_perfcounters;
-struct tgsi_shader_info;
-struct r600_qbo_state;
-
-void si_radeon_shader_binary_init(struct ac_shader_binary *b);
-void si_radeon_shader_binary_clean(struct ac_shader_binary *b);
-
-/* Only 32-bit buffer allocations are supported, gallium doesn't support more
- * at the moment.
- */
-struct r600_resource {
- struct threaded_resource b;
-
- /* Winsys objects. */
- struct pb_buffer *buf;
- uint64_t gpu_address;
- /* Memory usage if the buffer placement is optimal. */
- uint64_t vram_usage;
- uint64_t gart_usage;
-
- /* Resource properties. */
- uint64_t bo_size;
- unsigned bo_alignment;
- enum radeon_bo_domain domains;
- enum radeon_bo_flag flags;
- unsigned bind_history;
-
- /* The buffer range which is initialized (with a write transfer,
- * streamout, DMA, or as a random access target). The rest of
- * the buffer is considered invalid and can be mapped unsynchronized.
- *
- * This allows unsychronized mapping of a buffer range which hasn't
- * been used yet. It's for applications which forget to use
- * the unsynchronized map flag and expect the driver to figure it out.
- */
- struct util_range valid_buffer_range;
-
- /* For buffers only. This indicates that a write operation has been
- * performed by TC L2, but the cache hasn't been flushed.
- * Any hw block which doesn't use or bypasses TC L2 should check this
- * flag and flush the cache before using the buffer.
- *
- * For example, TC L2 must be flushed if a buffer which has been
- * modified by a shader store instruction is about to be used as
- * an index buffer. The reason is that VGT DMA index fetching doesn't
- * use TC L2.
- */
- bool TC_L2_dirty;
-
- /* Whether the resource has been exported via resource_get_handle. */
- unsigned external_usage; /* PIPE_HANDLE_USAGE_* */
-
- /* Whether this resource is referenced by bindless handles. */
- bool texture_handle_allocated;
- bool image_handle_allocated;
-};
-
-struct r600_transfer {
- struct threaded_transfer b;
- struct r600_resource *staging;
- unsigned offset;
-};
-
-struct r600_fmask_info {
- uint64_t offset;
- uint64_t size;
- unsigned alignment;
- unsigned pitch_in_pixels;
- unsigned bank_height;
- unsigned slice_tile_max;
- unsigned tile_mode_index;
- unsigned tile_swizzle;
-};
-
-struct r600_cmask_info {
- uint64_t offset;
- uint64_t size;
- unsigned alignment;
- unsigned slice_tile_max;
- uint64_t base_address_reg;
-};
-
-struct r600_texture {
- struct r600_resource resource;
-
- uint64_t size;
- unsigned num_level0_transfers;
- enum pipe_format db_render_format;
- bool is_depth;
- bool db_compatible;
- bool can_sample_z;
- bool can_sample_s;
- unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */
- unsigned stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
- struct r600_texture *flushed_depth_texture;
- struct radeon_surf surface;
-
- /* Colorbuffer compression and fast clear. */
- struct r600_fmask_info fmask;
- struct r600_cmask_info cmask;
- struct r600_resource *cmask_buffer;
- uint64_t dcc_offset; /* 0 = disabled */
- unsigned cb_color_info; /* fast clear enable bit */
- unsigned color_clear_value[2];
- unsigned last_msaa_resolve_target_micro_mode;
-
- /* Depth buffer compression and fast clear. */
- uint64_t htile_offset;
- bool tc_compatible_htile;
- bool depth_cleared; /* if it was cleared at least once */
- float depth_clear_value;
- bool stencil_cleared; /* if it was cleared at least once */
- uint8_t stencil_clear_value;
- bool upgraded_depth; /* upgraded from unorm to Z32_FLOAT */
-
- bool non_disp_tiling; /* R600-Cayman only */
-
- /* Whether the texture is a displayable back buffer and needs DCC
- * decompression, which is expensive. Therefore, it's enabled only
- * if statistics suggest that it will pay off and it's allocated
- * separately. It can't be bound as a sampler by apps. Limited to
- * target == 2D and last_level == 0. If enabled, dcc_offset contains
- * the absolute GPUVM address, not the relative one.
- */
- struct r600_resource *dcc_separate_buffer;
- /* When DCC is temporarily disabled, the separate buffer is here. */
- struct r600_resource *last_dcc_separate_buffer;
- /* We need to track DCC dirtiness, because st/dri usually calls
- * flush_resource twice per frame (not a bug) and we don't wanna
- * decompress DCC twice. Also, the dirty tracking must be done even
- * if DCC isn't used, because it's required by the DCC usage analysis
- * for a possible future enablement.
- */
- bool separate_dcc_dirty;
- /* Statistics gathering for the DCC enablement heuristic. */
- bool dcc_gather_statistics;
- /* Estimate of how much this color buffer is written to in units of
- * full-screen draws: ps_invocations / (width * height)
- * Shader kills, late Z, and blending with trivial discards make it
- * inaccurate (we need to count CB updates, not PS invocations).
- */
- unsigned ps_draw_ratio;
- /* The number of clears since the last DCC usage analysis. */
- unsigned num_slow_clears;
-
- /* Counter that should be non-zero if the texture is bound to a
- * framebuffer. Implemented in radeonsi only.
- */
- uint32_t framebuffers_bound;
-};
-
-struct r600_surface {
- struct pipe_surface base;
-
- /* These can vary with block-compressed textures. */
- unsigned width0;
- unsigned height0;
-
- bool color_initialized;
- bool depth_initialized;
-
- /* Misc. color flags. */
- bool alphatest_bypass;
- bool export_16bpc;
- bool color_is_int8;
- bool color_is_int10;
- bool dcc_incompatible;
-
- /* Color registers. */
- unsigned cb_color_info;
- unsigned cb_color_base;
- unsigned cb_color_view;
- unsigned cb_color_size; /* R600 only */
- unsigned cb_color_dim; /* EG only */
- unsigned cb_color_pitch; /* EG and later */
- unsigned cb_color_slice; /* EG and later */
- unsigned cb_color_attrib; /* EG and later */
- unsigned cb_color_attrib2; /* GFX9 and later */
- unsigned cb_dcc_control; /* VI and later */
- unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
- unsigned cb_color_fmask_slice; /* EG and later */
- unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
- unsigned cb_color_mask; /* R600 only */
- unsigned spi_shader_col_format; /* SI+, no blending, no alpha-to-coverage. */
- unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */
- unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */
- unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */
- struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
- struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
-
- /* DB registers. */
- uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */
- uint64_t db_stencil_base; /* EG and later */
- uint64_t db_htile_data_base;
- unsigned db_depth_info; /* R600 only, then SI and later */
- unsigned db_z_info; /* EG and later */
- unsigned db_z_info2; /* GFX9+ */
- unsigned db_depth_view;
- unsigned db_depth_size;
- unsigned db_depth_slice; /* EG and later */
- unsigned db_stencil_info; /* EG and later */
- unsigned db_stencil_info2; /* GFX9+ */
- unsigned db_prefetch_limit; /* R600 only */
- unsigned db_htile_surface;
- unsigned db_preload_control; /* EG and later */
-};
-
-struct r600_mmio_counter {
- unsigned busy;
- unsigned idle;
-};
-
-union r600_mmio_counters {
- struct {
- /* For global GPU load including SDMA. */
- struct r600_mmio_counter gpu;
-
- /* GRBM_STATUS */
- struct r600_mmio_counter spi;
- struct r600_mmio_counter gui;
- struct r600_mmio_counter ta;
- struct r600_mmio_counter gds;
- struct r600_mmio_counter vgt;
- struct r600_mmio_counter ia;
- struct r600_mmio_counter sx;
- struct r600_mmio_counter wd;
- struct r600_mmio_counter bci;
- struct r600_mmio_counter sc;
- struct r600_mmio_counter pa;
- struct r600_mmio_counter db;
- struct r600_mmio_counter cp;
- struct r600_mmio_counter cb;
-
- /* SRBM_STATUS2 */
- struct r600_mmio_counter sdma;
-
- /* CP_STAT */
- struct r600_mmio_counter pfp;
- struct r600_mmio_counter meq;
- struct r600_mmio_counter me;
- struct r600_mmio_counter surf_sync;
- struct r600_mmio_counter cp_dma;
- struct r600_mmio_counter scratch_ram;
- } named;
- unsigned array[0];
-};
-
-struct r600_memory_object {
- struct pipe_memory_object b;
- struct pb_buffer *buf;
- uint32_t stride;
- uint32_t offset;
-};
-
-struct r600_common_screen {
- struct pipe_screen b;
- struct radeon_winsys *ws;
- enum radeon_family family;
- enum chip_class chip_class;
- struct radeon_info info;
- uint64_t debug_flags;
- bool has_cp_dma;
- bool has_streamout;
- bool has_rbplus; /* if RB+ registers exist */
- bool rbplus_allowed; /* if RB+ is allowed */
-
- struct disk_cache *disk_shader_cache;
-
- struct slab_parent_pool pool_transfers;
-
- /* Texture filter settings. */
- int force_aniso; /* -1 = disabled */
-
- /* Auxiliary context. Mainly used to initialize resources.
- * It must be locked prior to using and flushed before unlocking. */
- struct pipe_context *aux_context;
- mtx_t aux_context_lock;
-
- /* This must be in the screen, because UE4 uses one context for
- * compilation and another one for rendering.
- */
- unsigned num_compilations;
- /* Along with ST_DEBUG=precompile, this should show if applications
- * are loading shaders on demand. This is a monotonic counter.
- */
- unsigned num_shaders_created;
- unsigned num_shader_cache_hits;
-
- /* GPU load thread. */
- mtx_t gpu_load_mutex;
- thrd_t gpu_load_thread;
- union r600_mmio_counters mmio_counters;
- volatile unsigned gpu_load_stop_thread; /* bool */
-
- char renderer_string[100];
-
- /* Performance counters. */
- struct r600_perfcounters *perfcounters;
-
- /* If pipe_screen wants to recompute and re-emit the framebuffer,
- * sampler, and image states of all contexts, it should atomically
- * increment this.
- *
- * Each context will compare this with its own last known value of
- * the counter before drawing and re-emit the states accordingly.
- */
- unsigned dirty_tex_counter;
-
- /* Atomically increment this counter when an existing texture's
- * metadata is enabled or disabled in a way that requires changing
- * contexts' compressed texture binding masks.
- */
- unsigned compressed_colortex_counter;
-
- struct {
- /* Context flags to set so that all writes from earlier jobs
- * in the CP are seen by L2 clients.
- */
- unsigned cp_to_L2;
-
- /* Context flags to set so that all writes from earlier jobs
- * that end in L2 are seen by CP.
- */
- unsigned L2_to_cp;
-
- /* Context flags to set so that all writes from earlier
- * compute jobs are seen by L2 clients.
- */
- unsigned compute_to_L2;
- } barrier_flags;
-
- void (*query_opaque_metadata)(struct r600_common_screen *rscreen,
- struct r600_texture *rtex,
- struct radeon_bo_metadata *md);
-
- void (*apply_opaque_metadata)(struct r600_common_screen *rscreen,
- struct r600_texture *rtex,
- struct radeon_bo_metadata *md);
-};
-
-/* This encapsulates a state or an operation which can emitted into the GPU
- * command stream. */
-struct r600_atom {
- void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
- unsigned short id;
-};
-
-struct r600_ring {
- struct radeon_winsys_cs *cs;
- void (*flush)(void *ctx, unsigned flags,
- struct pipe_fence_handle **fence);
-};
-
-/* Saved CS data for debugging features. */
-struct radeon_saved_cs {
- uint32_t *ib;
- unsigned num_dw;
-
- struct radeon_bo_list_item *bo_list;
- unsigned bo_count;
-};
-
-struct r600_common_context {
- struct pipe_context b; /* base class */
-
- struct r600_common_screen *screen;
- struct radeon_winsys *ws;
- struct radeon_winsys_ctx *ctx;
- enum radeon_family family;
- enum chip_class chip_class;
- struct r600_ring gfx;
- struct r600_ring dma;
- struct pipe_fence_handle *last_gfx_fence;
- struct pipe_fence_handle *last_sdma_fence;
- struct r600_resource *eop_bug_scratch;
- unsigned num_gfx_cs_flushes;
- unsigned initial_gfx_cs_size;
- unsigned gpu_reset_counter;
- unsigned last_dirty_tex_counter;
- unsigned last_compressed_colortex_counter;
- unsigned last_num_draw_calls;
-
- struct threaded_context *tc;
- struct u_suballocator *allocator_zeroed_memory;
- struct slab_child_pool pool_transfers;
- struct slab_child_pool pool_transfers_unsync; /* for threaded_context */
-
- /* Current unaccounted memory usage. */
- uint64_t vram;
- uint64_t gtt;
-
- /* Additional context states. */
- unsigned flags; /* flush flags */
-
- /* Queries. */
- /* Maintain the list of active queries for pausing between IBs. */
- int num_occlusion_queries;
- int num_perfect_occlusion_queries;
- struct list_head active_queries;
- unsigned num_cs_dw_queries_suspend;
- /* Misc stats. */
- unsigned num_draw_calls;
- unsigned num_decompress_calls;
- unsigned num_mrt_draw_calls;
- unsigned num_prim_restart_calls;
- unsigned num_spill_draw_calls;
- unsigned num_compute_calls;
- unsigned num_spill_compute_calls;
- unsigned num_dma_calls;
- unsigned num_cp_dma_calls;
- unsigned num_vs_flushes;
- unsigned num_ps_flushes;
- unsigned num_cs_flushes;
- unsigned num_cb_cache_flushes;
- unsigned num_db_cache_flushes;
- unsigned num_L2_invalidates;
- unsigned num_L2_writebacks;
- unsigned num_resident_handles;
- uint64_t num_alloc_tex_transfer_bytes;
- unsigned last_tex_ps_draw_ratio; /* for query */
-
- /* Render condition. */
- struct r600_atom render_cond_atom;
- struct pipe_query *render_cond;
- unsigned render_cond_mode;
- bool render_cond_invert;
- bool render_cond_force_off; /* for u_blitter */
-
- /* Statistics gathering for the DCC enablement heuristic. It can't be
- * in r600_texture because r600_texture can be shared by multiple
- * contexts. This is for back buffers only. We shouldn't get too many
- * of those.
- *
- * X11 DRI3 rotates among a finite set of back buffers. They should
- * all fit in this array. If they don't, separate DCC might never be
- * enabled by DCC stat gathering.
- */
- struct {
- struct r600_texture *tex;
- /* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */
- struct pipe_query *ps_stats[3];
- /* If all slots are used and another slot is needed,
- * the least recently used slot is evicted based on this. */
- int64_t last_use_timestamp;
- bool query_active;
- } dcc_stats[5];
-
- struct pipe_debug_callback debug;
- struct pipe_device_reset_callback device_reset_callback;
- struct u_log_context *log;
-
- void *query_result_shader;
-
- /* Copy one resource to another using async DMA. */
- void (*dma_copy)(struct pipe_context *ctx,
- struct pipe_resource *dst,
- unsigned dst_level,
- unsigned dst_x, unsigned dst_y, unsigned dst_z,
- struct pipe_resource *src,
- unsigned src_level,
- const struct pipe_box *src_box);
-
- void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
- uint64_t offset, uint64_t size, unsigned value);
-
- void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
- uint64_t offset, uint64_t size, unsigned value,
- enum r600_coherency coher);
-
- void (*blit_decompress_depth)(struct pipe_context *ctx,
- struct r600_texture *texture,
- struct r600_texture *staging,
- unsigned first_level, unsigned last_level,
- unsigned first_layer, unsigned last_layer,
- unsigned first_sample, unsigned last_sample);
-
- void (*decompress_dcc)(struct pipe_context *ctx,
- struct r600_texture *rtex);
-
- /* Reallocate the buffer and update all resource bindings where
- * the buffer is bound, including all resource descriptors. */
- void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf);
-
- /* Update all resource bindings where the buffer is bound, including
- * all resource descriptors. This is invalidate_buffer without
- * the invalidation. */
- void (*rebind_buffer)(struct pipe_context *ctx, struct pipe_resource *buf,
- uint64_t old_gpu_address);
-
- /* Enable or disable occlusion queries. */
- void (*set_occlusion_query_state)(struct pipe_context *ctx,
- bool old_enable,
- bool old_perfect_enable);
-
- void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st);
-
- /* This ensures there is enough space in the command stream. */
- void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
- bool include_draw_vbo);
-
- void (*set_atom_dirty)(struct r600_common_context *ctx,
- struct r600_atom *atom, bool dirty);
-
- void (*check_vm_faults)(struct r600_common_context *ctx,
- struct radeon_saved_cs *saved,
- enum ring_type ring);
-};
-
-/* r600_buffer_common.c */
-bool si_rings_is_buffer_referenced(struct r600_common_context *ctx,
- struct pb_buffer *buf,
- enum radeon_bo_usage usage);
-void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx,
- struct r600_resource *resource,
- unsigned usage);
-void si_buffer_subdata(struct pipe_context *ctx,
- struct pipe_resource *buffer,
- unsigned usage, unsigned offset,
- unsigned size, const void *data);
-void si_init_resource_fields(struct r600_common_screen *rscreen,
- struct r600_resource *res,
- uint64_t size, unsigned alignment);
-bool si_alloc_resource(struct r600_common_screen *rscreen,
- struct r600_resource *res);
-struct pipe_resource *si_buffer_create(struct pipe_screen *screen,
- const struct pipe_resource *templ,
- unsigned alignment);
-struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen,
- unsigned flags,
- unsigned usage,
- unsigned size,
- unsigned alignment);
-struct pipe_resource *
-si_buffer_from_user_memory(struct pipe_screen *screen,
- const struct pipe_resource *templ,
- void *user_memory);
-void si_invalidate_resource(struct pipe_context *ctx,
- struct pipe_resource *resource);
-void si_replace_buffer_storage(struct pipe_context *ctx,
- struct pipe_resource *dst,
- struct pipe_resource *src);
-
-/* r600_common_pipe.c */
-void si_gfx_write_event_eop(struct r600_common_context *ctx,
- unsigned event, unsigned event_flags,
- unsigned data_sel,
- struct r600_resource *buf, uint64_t va,
- uint32_t new_fence, unsigned query_type);
-unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen);
-void si_gfx_wait_fence(struct r600_common_context *ctx,
- uint64_t va, uint32_t ref, uint32_t mask);
-bool si_common_screen_init(struct r600_common_screen *rscreen,
- struct radeon_winsys *ws);
-void si_destroy_common_screen(struct r600_common_screen *rscreen);
-void si_preflush_suspend_features(struct r600_common_context *ctx);
-void si_postflush_resume_features(struct r600_common_context *ctx);
-bool si_common_context_init(struct r600_common_context *rctx,
- struct r600_common_screen *rscreen,
- unsigned context_flags);
-void si_common_context_cleanup(struct r600_common_context *rctx);
-bool si_can_dump_shader(struct r600_common_screen *rscreen,
- unsigned processor);
-bool si_extra_shader_checks(struct r600_common_screen *rscreen,
- unsigned processor);
-void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
- uint64_t offset, uint64_t size, unsigned value);
-struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
- const struct pipe_resource *templ);
-const char *si_get_llvm_processor_name(enum radeon_family family);
-void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
- struct r600_resource *dst, struct r600_resource *src);
-void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
- struct radeon_saved_cs *saved, bool get_buffer_list);
-void si_clear_saved_cs(struct radeon_saved_cs *saved);
-bool si_check_device_reset(struct r600_common_context *rctx);
-
-/* r600_gpu_load.c */
-void si_gpu_load_kill_thread(struct r600_common_screen *rscreen);
-uint64_t si_begin_counter(struct r600_common_screen *rscreen, unsigned type);
-unsigned si_end_counter(struct r600_common_screen *rscreen, unsigned type,
- uint64_t begin);
-
-/* r600_perfcounters.c */
-void si_perfcounters_destroy(struct r600_common_screen *rscreen);
-
-/* r600_query.c */
-void si_init_screen_query_functions(struct r600_common_screen *rscreen);
-void si_init_query_functions(struct r600_common_context *rctx);
-void si_suspend_queries(struct r600_common_context *ctx);
-void si_resume_queries(struct r600_common_context *ctx);
-
-/* r600_test_dma.c */
-void si_test_dma(struct r600_common_screen *rscreen);
-
-/* r600_texture.c */
-bool si_prepare_for_dma_blit(struct r600_common_context *rctx,
- struct r600_texture *rdst,
- unsigned dst_level, unsigned dstx,
- unsigned dsty, unsigned dstz,
- struct r600_texture *rsrc,
- unsigned src_level,
- const struct pipe_box *src_box);
-void si_texture_get_fmask_info(struct r600_common_screen *rscreen,
- struct r600_texture *rtex,
- unsigned nr_samples,
- struct r600_fmask_info *out);
-bool si_init_flushed_depth_texture(struct pipe_context *ctx,
- struct pipe_resource *texture,
- struct r600_texture **staging);
-void si_print_texture_info(struct r600_common_screen *rscreen,
- struct r600_texture *rtex, struct u_log_context *log);
-struct pipe_resource *si_texture_create(struct pipe_screen *screen,
- const struct pipe_resource *templ);
-bool vi_dcc_formats_compatible(enum pipe_format format1,
- enum pipe_format format2);
-bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex,
- unsigned level,
- enum pipe_format view_format);
-void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx,
- struct pipe_resource *tex,
- unsigned level,
- enum pipe_format view_format);
-struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
- struct pipe_resource *texture,
- const struct pipe_surface *templ,
- unsigned width0, unsigned height0,
- unsigned width, unsigned height);
-unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap);
-void vi_separate_dcc_start_query(struct pipe_context *ctx,
- struct r600_texture *tex);
-void vi_separate_dcc_stop_query(struct pipe_context *ctx,
- struct r600_texture *tex);
-void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
- struct r600_texture *tex);
-void vi_dcc_clear_level(struct r600_common_context *rctx,
- struct r600_texture *rtex,
- unsigned level, unsigned clear_value);
-void si_do_fast_color_clear(struct r600_common_context *rctx,
- struct pipe_framebuffer_state *fb,
- struct r600_atom *fb_state,
- unsigned *buffers, ubyte *dirty_cbufs,
- const union pipe_color_union *color);
-bool si_texture_disable_dcc(struct r600_common_context *rctx,
- struct r600_texture *rtex);
-void si_init_screen_texture_functions(struct r600_common_screen *rscreen);
-void si_init_context_texture_functions(struct r600_common_context *rctx);
-
-
-/* Inline helpers. */
-
-static inline struct r600_resource *r600_resource(struct pipe_resource *r)
-{
- return (struct r600_resource*)r;
-}
-
-static inline void
-r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
-{
- pipe_resource_reference((struct pipe_resource **)ptr,
- (struct pipe_resource *)res);
-}
-
-static inline void
-r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res)
-{
- pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b);
-}
-
-static inline void
-r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_resource *res = (struct r600_resource *)r;
-
- if (res) {
- /* Add memory usage for need_gfx_cs_space */
- rctx->vram += res->vram_usage;
- rctx->gtt += res->gart_usage;
- }
-}
-
-#define SQ_TEX_XY_FILTER_POINT 0x00
-#define SQ_TEX_XY_FILTER_BILINEAR 0x01
-#define SQ_TEX_XY_FILTER_ANISO_POINT 0x02
-#define SQ_TEX_XY_FILTER_ANISO_BILINEAR 0x03
-
-static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso)
-{
- if (filter == PIPE_TEX_FILTER_LINEAR)
- return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR
- : SQ_TEX_XY_FILTER_BILINEAR;
- else
- return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT
- : SQ_TEX_XY_FILTER_POINT;
-}
-
-static inline unsigned r600_tex_aniso_filter(unsigned filter)
-{
- if (filter < 2)
- return 0;
- if (filter < 4)
- return 1;
- if (filter < 8)
- return 2;
- if (filter < 16)
- return 3;
- return 4;
-}
-
-static inline enum radeon_bo_priority
-r600_get_sampler_view_priority(struct r600_resource *res)
-{
- if (res->b.b.target == PIPE_BUFFER)
- return RADEON_PRIO_SAMPLER_BUFFER;
-
- if (res->b.b.nr_samples > 1)
- return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
-
- return RADEON_PRIO_SAMPLER_TEXTURE;
-}
-
-static inline bool
-r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler)
-{
- return (stencil_sampler && tex->can_sample_s) ||
- (!stencil_sampler && tex->can_sample_z);
-}
-
-static inline bool
-vi_dcc_enabled(struct r600_texture *tex, unsigned level)
-{
- return tex->dcc_offset && level < tex->surface.num_dcc_levels;
-}
-
-static inline bool
-r600_htile_enabled(struct r600_texture *tex, unsigned level)
-{
- return tex->htile_offset && level == 0;
-}
-
-static inline bool
-vi_tc_compat_htile_enabled(struct r600_texture *tex, unsigned level)
-{
- assert(!tex->tc_compatible_htile || tex->htile_offset);
- return tex->tc_compatible_htile && level == 0;
-}
-
-#define COMPUTE_DBG(rscreen, fmt, args...) \
- do { \
- if ((rscreen->b.debug_flags & DBG(COMPUTE))) fprintf(stderr, fmt, ##args); \
- } while (0);
-
-#define R600_ERR(fmt, args...) \
- fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
-
-static inline int S_FIXED(float value, unsigned frac_bits)
-{
- return value * (1 << frac_bits);
-}
-
-#endif
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_query.c b/lib/mesa/src/gallium/drivers/radeon/r600_query.c
deleted file mode 100644
index aedf950ff..000000000
--- a/lib/mesa/src/gallium/drivers/radeon/r600_query.c
+++ /dev/null
@@ -1,2101 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- * Copyright 2014 Marek Olšák <marek.olsak@amd.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "r600_query.h"
-#include "r600_cs.h"
-#include "util/u_memory.h"
-#include "util/u_upload_mgr.h"
-#include "os/os_time.h"
-#include "tgsi/tgsi_text.h"
-#include "amd/common/sid.h"
-
-/* TODO: remove this: */
-void si_update_prims_generated_query_state(struct r600_common_context *rctx,
- unsigned type, int diff);
-
-#define R600_MAX_STREAMS 4
-
-struct r600_hw_query_params {
- unsigned start_offset;
- unsigned end_offset;
- unsigned fence_offset;
- unsigned pair_stride;
- unsigned pair_count;
-};
-
-/* Queries without buffer handling or suspend/resume. */
-struct r600_query_sw {
- struct r600_query b;
-
- uint64_t begin_result;
- uint64_t end_result;
-
- uint64_t begin_time;
- uint64_t end_time;
-
- /* Fence for GPU_FINISHED. */
- struct pipe_fence_handle *fence;
-};
-
-static void r600_query_sw_destroy(struct r600_common_screen *rscreen,
- struct r600_query *rquery)
-{
- struct r600_query_sw *query = (struct r600_query_sw *)rquery;
-
- rscreen->b.fence_reference(&rscreen->b, &query->fence, NULL);
- FREE(query);
-}
-
-static enum radeon_value_id winsys_id_from_type(unsigned type)
-{
- switch (type) {
- case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
- case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
- case R600_QUERY_MAPPED_VRAM: return RADEON_MAPPED_VRAM;
- case R600_QUERY_MAPPED_GTT: return RADEON_MAPPED_GTT;
- case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
- case R600_QUERY_NUM_MAPPED_BUFFERS: return RADEON_NUM_MAPPED_BUFFERS;
- case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS;
- case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS;
- case R600_QUERY_GFX_BO_LIST_SIZE: return RADEON_GFX_BO_LIST_COUNTER;
- case R600_QUERY_GFX_IB_SIZE: return RADEON_GFX_IB_SIZE_COUNTER;
- case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
- case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS;
- case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: return RADEON_NUM_VRAM_CPU_PAGE_FAULTS;
- case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
- case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE;
- case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
- case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
- case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
- case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
- case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME;
- default: unreachable("query type does not correspond to winsys id");
- }
-}
-
-static bool r600_query_sw_begin(struct r600_common_context *rctx,
- struct r600_query *rquery)
-{
- struct r600_query_sw *query = (struct r600_query_sw *)rquery;
- enum radeon_value_id ws_id;
-
- switch(query->b.type) {
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- case PIPE_QUERY_GPU_FINISHED:
- break;
- case R600_QUERY_DRAW_CALLS:
- query->begin_result = rctx->num_draw_calls;
- break;
- case R600_QUERY_DECOMPRESS_CALLS:
- query->begin_result = rctx->num_decompress_calls;
- break;
- case R600_QUERY_MRT_DRAW_CALLS:
- query->begin_result = rctx->num_mrt_draw_calls;
- break;
- case R600_QUERY_PRIM_RESTART_CALLS:
- query->begin_result = rctx->num_prim_restart_calls;
- break;
- case R600_QUERY_SPILL_DRAW_CALLS:
- query->begin_result = rctx->num_spill_draw_calls;
- break;
- case R600_QUERY_COMPUTE_CALLS:
- query->begin_result = rctx->num_compute_calls;
- break;
- case R600_QUERY_SPILL_COMPUTE_CALLS:
- query->begin_result = rctx->num_spill_compute_calls;
- break;
- case R600_QUERY_DMA_CALLS:
- query->begin_result = rctx->num_dma_calls;
- break;
- case R600_QUERY_CP_DMA_CALLS:
- query->begin_result = rctx->num_cp_dma_calls;
- break;
- case R600_QUERY_NUM_VS_FLUSHES:
- query->begin_result = rctx->num_vs_flushes;
- break;
- case R600_QUERY_NUM_PS_FLUSHES:
- query->begin_result = rctx->num_ps_flushes;
- break;
- case R600_QUERY_NUM_CS_FLUSHES:
- query->begin_result = rctx->num_cs_flushes;
- break;
- case R600_QUERY_NUM_CB_CACHE_FLUSHES:
- query->begin_result = rctx->num_cb_cache_flushes;
- break;
- case R600_QUERY_NUM_DB_CACHE_FLUSHES:
- query->begin_result = rctx->num_db_cache_flushes;
- break;
- case R600_QUERY_NUM_L2_INVALIDATES:
- query->begin_result = rctx->num_L2_invalidates;
- break;
- case R600_QUERY_NUM_L2_WRITEBACKS:
- query->begin_result = rctx->num_L2_writebacks;
- break;
- case R600_QUERY_NUM_RESIDENT_HANDLES:
- query->begin_result = rctx->num_resident_handles;
- break;
- case R600_QUERY_TC_OFFLOADED_SLOTS:
- query->begin_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0;
- break;
- case R600_QUERY_TC_DIRECT_SLOTS:
- query->begin_result = rctx->tc ? rctx->tc->num_direct_slots : 0;
- break;
- case R600_QUERY_TC_NUM_SYNCS:
- query->begin_result = rctx->tc ? rctx->tc->num_syncs : 0;
- break;
- case R600_QUERY_REQUESTED_VRAM:
- case R600_QUERY_REQUESTED_GTT:
- case R600_QUERY_MAPPED_VRAM:
- case R600_QUERY_MAPPED_GTT:
- case R600_QUERY_VRAM_USAGE:
- case R600_QUERY_VRAM_VIS_USAGE:
- case R600_QUERY_GTT_USAGE:
- case R600_QUERY_GPU_TEMPERATURE:
- case R600_QUERY_CURRENT_GPU_SCLK:
- case R600_QUERY_CURRENT_GPU_MCLK:
- case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
- case R600_QUERY_NUM_MAPPED_BUFFERS:
- query->begin_result = 0;
- break;
- case R600_QUERY_BUFFER_WAIT_TIME:
- case R600_QUERY_GFX_IB_SIZE:
- case R600_QUERY_NUM_GFX_IBS:
- case R600_QUERY_NUM_SDMA_IBS:
- case R600_QUERY_NUM_BYTES_MOVED:
- case R600_QUERY_NUM_EVICTIONS:
- case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
- enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
- query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
- break;
- }
- case R600_QUERY_GFX_BO_LIST_SIZE:
- ws_id = winsys_id_from_type(query->b.type);
- query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
- query->begin_time = rctx->ws->query_value(rctx->ws,
- RADEON_NUM_GFX_IBS);
- break;
- case R600_QUERY_CS_THREAD_BUSY:
- ws_id = winsys_id_from_type(query->b.type);
- query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
- query->begin_time = os_time_get_nano();
- break;
- case R600_QUERY_GALLIUM_THREAD_BUSY:
- query->begin_result =
- rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0;
- query->begin_time = os_time_get_nano();
- break;
- case R600_QUERY_GPU_LOAD:
- case R600_QUERY_GPU_SHADERS_BUSY:
- case R600_QUERY_GPU_TA_BUSY:
- case R600_QUERY_GPU_GDS_BUSY:
- case R600_QUERY_GPU_VGT_BUSY:
- case R600_QUERY_GPU_IA_BUSY:
- case R600_QUERY_GPU_SX_BUSY:
- case R600_QUERY_GPU_WD_BUSY:
- case R600_QUERY_GPU_BCI_BUSY:
- case R600_QUERY_GPU_SC_BUSY:
- case R600_QUERY_GPU_PA_BUSY:
- case R600_QUERY_GPU_DB_BUSY:
- case R600_QUERY_GPU_CP_BUSY:
- case R600_QUERY_GPU_CB_BUSY:
- case R600_QUERY_GPU_SDMA_BUSY:
- case R600_QUERY_GPU_PFP_BUSY:
- case R600_QUERY_GPU_MEQ_BUSY:
- case R600_QUERY_GPU_ME_BUSY:
- case R600_QUERY_GPU_SURF_SYNC_BUSY:
- case R600_QUERY_GPU_CP_DMA_BUSY:
- case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
- query->begin_result = si_begin_counter(rctx->screen,
- query->b.type);
- break;
- case R600_QUERY_NUM_COMPILATIONS:
- query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
- break;
- case R600_QUERY_NUM_SHADERS_CREATED:
- query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
- break;
- case R600_QUERY_NUM_SHADER_CACHE_HITS:
- query->begin_result =
- p_atomic_read(&rctx->screen->num_shader_cache_hits);
- break;
- case R600_QUERY_GPIN_ASIC_ID:
- case R600_QUERY_GPIN_NUM_SIMD:
- case R600_QUERY_GPIN_NUM_RB:
- case R600_QUERY_GPIN_NUM_SPI:
- case R600_QUERY_GPIN_NUM_SE:
- break;
- default:
- unreachable("r600_query_sw_begin: bad query type");
- }
-
- return true;
-}
-
-static bool r600_query_sw_end(struct r600_common_context *rctx,
- struct r600_query *rquery)
-{
- struct r600_query_sw *query = (struct r600_query_sw *)rquery;
- enum radeon_value_id ws_id;
-
- switch(query->b.type) {
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- break;
- case PIPE_QUERY_GPU_FINISHED:
- rctx->b.flush(&rctx->b, &query->fence, PIPE_FLUSH_DEFERRED);
- break;
- case R600_QUERY_DRAW_CALLS:
- query->end_result = rctx->num_draw_calls;
- break;
- case R600_QUERY_DECOMPRESS_CALLS:
- query->end_result = rctx->num_decompress_calls;
- break;
- case R600_QUERY_MRT_DRAW_CALLS:
- query->end_result = rctx->num_mrt_draw_calls;
- break;
- case R600_QUERY_PRIM_RESTART_CALLS:
- query->end_result = rctx->num_prim_restart_calls;
- break;
- case R600_QUERY_SPILL_DRAW_CALLS:
- query->end_result = rctx->num_spill_draw_calls;
- break;
- case R600_QUERY_COMPUTE_CALLS:
- query->end_result = rctx->num_compute_calls;
- break;
- case R600_QUERY_SPILL_COMPUTE_CALLS:
- query->end_result = rctx->num_spill_compute_calls;
- break;
- case R600_QUERY_DMA_CALLS:
- query->end_result = rctx->num_dma_calls;
- break;
- case R600_QUERY_CP_DMA_CALLS:
- query->end_result = rctx->num_cp_dma_calls;
- break;
- case R600_QUERY_NUM_VS_FLUSHES:
- query->end_result = rctx->num_vs_flushes;
- break;
- case R600_QUERY_NUM_PS_FLUSHES:
- query->end_result = rctx->num_ps_flushes;
- break;
- case R600_QUERY_NUM_CS_FLUSHES:
- query->end_result = rctx->num_cs_flushes;
- break;
- case R600_QUERY_NUM_CB_CACHE_FLUSHES:
- query->end_result = rctx->num_cb_cache_flushes;
- break;
- case R600_QUERY_NUM_DB_CACHE_FLUSHES:
- query->end_result = rctx->num_db_cache_flushes;
- break;
- case R600_QUERY_NUM_L2_INVALIDATES:
- query->end_result = rctx->num_L2_invalidates;
- break;
- case R600_QUERY_NUM_L2_WRITEBACKS:
- query->end_result = rctx->num_L2_writebacks;
- break;
- case R600_QUERY_NUM_RESIDENT_HANDLES:
- query->end_result = rctx->num_resident_handles;
- break;
- case R600_QUERY_TC_OFFLOADED_SLOTS:
- query->end_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0;
- break;
- case R600_QUERY_TC_DIRECT_SLOTS:
- query->end_result = rctx->tc ? rctx->tc->num_direct_slots : 0;
- break;
- case R600_QUERY_TC_NUM_SYNCS:
- query->end_result = rctx->tc ? rctx->tc->num_syncs : 0;
- break;
- case R600_QUERY_REQUESTED_VRAM:
- case R600_QUERY_REQUESTED_GTT:
- case R600_QUERY_MAPPED_VRAM:
- case R600_QUERY_MAPPED_GTT:
- case R600_QUERY_VRAM_USAGE:
- case R600_QUERY_VRAM_VIS_USAGE:
- case R600_QUERY_GTT_USAGE:
- case R600_QUERY_GPU_TEMPERATURE:
- case R600_QUERY_CURRENT_GPU_SCLK:
- case R600_QUERY_CURRENT_GPU_MCLK:
- case R600_QUERY_BUFFER_WAIT_TIME:
- case R600_QUERY_GFX_IB_SIZE:
- case R600_QUERY_NUM_MAPPED_BUFFERS:
- case R600_QUERY_NUM_GFX_IBS:
- case R600_QUERY_NUM_SDMA_IBS:
- case R600_QUERY_NUM_BYTES_MOVED:
- case R600_QUERY_NUM_EVICTIONS:
- case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
- enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
- query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
- break;
- }
- case R600_QUERY_GFX_BO_LIST_SIZE:
- ws_id = winsys_id_from_type(query->b.type);
- query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
- query->end_time = rctx->ws->query_value(rctx->ws,
- RADEON_NUM_GFX_IBS);
- break;
- case R600_QUERY_CS_THREAD_BUSY:
- ws_id = winsys_id_from_type(query->b.type);
- query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
- query->end_time = os_time_get_nano();
- break;
- case R600_QUERY_GALLIUM_THREAD_BUSY:
- query->end_result =
- rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0;
- query->end_time = os_time_get_nano();
- break;
- case R600_QUERY_GPU_LOAD:
- case R600_QUERY_GPU_SHADERS_BUSY:
- case R600_QUERY_GPU_TA_BUSY:
- case R600_QUERY_GPU_GDS_BUSY:
- case R600_QUERY_GPU_VGT_BUSY:
- case R600_QUERY_GPU_IA_BUSY:
- case R600_QUERY_GPU_SX_BUSY:
- case R600_QUERY_GPU_WD_BUSY:
- case R600_QUERY_GPU_BCI_BUSY:
- case R600_QUERY_GPU_SC_BUSY:
- case R600_QUERY_GPU_PA_BUSY:
- case R600_QUERY_GPU_DB_BUSY:
- case R600_QUERY_GPU_CP_BUSY:
- case R600_QUERY_GPU_CB_BUSY:
- case R600_QUERY_GPU_SDMA_BUSY:
- case R600_QUERY_GPU_PFP_BUSY:
- case R600_QUERY_GPU_MEQ_BUSY:
- case R600_QUERY_GPU_ME_BUSY:
- case R600_QUERY_GPU_SURF_SYNC_BUSY:
- case R600_QUERY_GPU_CP_DMA_BUSY:
- case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
- query->end_result = si_end_counter(rctx->screen,
- query->b.type,
- query->begin_result);
- query->begin_result = 0;
- break;
- case R600_QUERY_NUM_COMPILATIONS:
- query->end_result = p_atomic_read(&rctx->screen->num_compilations);
- break;
- case R600_QUERY_NUM_SHADERS_CREATED:
- query->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
- break;
- case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
- query->end_result = rctx->last_tex_ps_draw_ratio;
- break;
- case R600_QUERY_NUM_SHADER_CACHE_HITS:
- query->end_result =
- p_atomic_read(&rctx->screen->num_shader_cache_hits);
- break;
- case R600_QUERY_GPIN_ASIC_ID:
- case R600_QUERY_GPIN_NUM_SIMD:
- case R600_QUERY_GPIN_NUM_RB:
- case R600_QUERY_GPIN_NUM_SPI:
- case R600_QUERY_GPIN_NUM_SE:
- break;
- default:
- unreachable("r600_query_sw_end: bad query type");
- }
-
- return true;
-}
-
-static bool r600_query_sw_get_result(struct r600_common_context *rctx,
- struct r600_query *rquery,
- bool wait,
- union pipe_query_result *result)
-{
- struct r600_query_sw *query = (struct r600_query_sw *)rquery;
-
- switch (query->b.type) {
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- /* Convert from cycles per millisecond to cycles per second (Hz). */
- result->timestamp_disjoint.frequency =
- (uint64_t)rctx->screen->info.clock_crystal_freq * 1000;
- result->timestamp_disjoint.disjoint = false;
- return true;
- case PIPE_QUERY_GPU_FINISHED: {
- struct pipe_screen *screen = rctx->b.screen;
- struct pipe_context *ctx = rquery->b.flushed ? NULL : &rctx->b;
-
- result->b = screen->fence_finish(screen, ctx, query->fence,
- wait ? PIPE_TIMEOUT_INFINITE : 0);
- return result->b;
- }
-
- case R600_QUERY_GFX_BO_LIST_SIZE:
- result->u64 = (query->end_result - query->begin_result) /
- (query->end_time - query->begin_time);
- return true;
- case R600_QUERY_CS_THREAD_BUSY:
- case R600_QUERY_GALLIUM_THREAD_BUSY:
- result->u64 = (query->end_result - query->begin_result) * 100 /
- (query->end_time - query->begin_time);
- return true;
- case R600_QUERY_GPIN_ASIC_ID:
- result->u32 = 0;
- return true;
- case R600_QUERY_GPIN_NUM_SIMD:
- result->u32 = rctx->screen->info.num_good_compute_units;
- return true;
- case R600_QUERY_GPIN_NUM_RB:
- result->u32 = rctx->screen->info.num_render_backends;
- return true;
- case R600_QUERY_GPIN_NUM_SPI:
- result->u32 = 1; /* all supported chips have one SPI per SE */
- return true;
- case R600_QUERY_GPIN_NUM_SE:
- result->u32 = rctx->screen->info.max_se;
- return true;
- }
-
- result->u64 = query->end_result - query->begin_result;
-
- switch (query->b.type) {
- case R600_QUERY_BUFFER_WAIT_TIME:
- case R600_QUERY_GPU_TEMPERATURE:
- result->u64 /= 1000;
- break;
- case R600_QUERY_CURRENT_GPU_SCLK:
- case R600_QUERY_CURRENT_GPU_MCLK:
- result->u64 *= 1000000;
- break;
- }
-
- return true;
-}
-
-
-static struct r600_query_ops sw_query_ops = {
- .destroy = r600_query_sw_destroy,
- .begin = r600_query_sw_begin,
- .end = r600_query_sw_end,
- .get_result = r600_query_sw_get_result,
- .get_result_resource = NULL
-};
-
-static struct pipe_query *r600_query_sw_create(unsigned query_type)
-{
- struct r600_query_sw *query;
-
- query = CALLOC_STRUCT(r600_query_sw);
- if (!query)
- return NULL;
-
- query->b.type = query_type;
- query->b.ops = &sw_query_ops;
-
- return (struct pipe_query *)query;
-}
-
-void si_query_hw_destroy(struct r600_common_screen *rscreen,
- struct r600_query *rquery)
-{
- struct r600_query_hw *query = (struct r600_query_hw *)rquery;
- struct r600_query_buffer *prev = query->buffer.previous;
-
- /* Release all query buffers. */
- while (prev) {
- struct r600_query_buffer *qbuf = prev;
- prev = prev->previous;
- r600_resource_reference(&qbuf->buf, NULL);
- FREE(qbuf);
- }
-
- r600_resource_reference(&query->buffer.buf, NULL);
- r600_resource_reference(&query->workaround_buf, NULL);
- FREE(rquery);
-}
-
-static struct r600_resource *r600_new_query_buffer(struct r600_common_screen *rscreen,
- struct r600_query_hw *query)
-{
- unsigned buf_size = MAX2(query->result_size,
- rscreen->info.min_alloc_size);
-
- /* Queries are normally read by the CPU after
- * being written by the gpu, hence staging is probably a good
- * usage pattern.
- */
- struct r600_resource *buf = (struct r600_resource*)
- pipe_buffer_create(&rscreen->b, 0,
- PIPE_USAGE_STAGING, buf_size);
- if (!buf)
- return NULL;
-
- if (!query->ops->prepare_buffer(rscreen, query, buf)) {
- r600_resource_reference(&buf, NULL);
- return NULL;
- }
-
- return buf;
-}
-
-static bool r600_query_hw_prepare_buffer(struct r600_common_screen *rscreen,
- struct r600_query_hw *query,
- struct r600_resource *buffer)
-{
- /* Callers ensure that the buffer is currently unused by the GPU. */
- uint32_t *results = rscreen->ws->buffer_map(buffer->buf, NULL,
- PIPE_TRANSFER_WRITE |
- PIPE_TRANSFER_UNSYNCHRONIZED);
- if (!results)
- return false;
-
- memset(results, 0, buffer->b.b.width0);
-
- if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
- query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE ||
- query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
- unsigned max_rbs = rscreen->info.num_render_backends;
- unsigned enabled_rb_mask = rscreen->info.enabled_rb_mask;
- unsigned num_results;
- unsigned i, j;
-
- /* Set top bits for unused backends. */
- num_results = buffer->b.b.width0 / query->result_size;
- for (j = 0; j < num_results; j++) {
- for (i = 0; i < max_rbs; i++) {
- if (!(enabled_rb_mask & (1<<i))) {
- results[(i * 4)+1] = 0x80000000;
- results[(i * 4)+3] = 0x80000000;
- }
- }
- results += 4 * max_rbs;
- }
- }
-
- return true;
-}
-
-static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
- struct r600_query *rquery,
- bool wait,
- enum pipe_query_value_type result_type,
- int index,
- struct pipe_resource *resource,
- unsigned offset);
-
-static struct r600_query_ops query_hw_ops = {
- .destroy = si_query_hw_destroy,
- .begin = si_query_hw_begin,
- .end = si_query_hw_end,
- .get_result = si_query_hw_get_result,
- .get_result_resource = r600_query_hw_get_result_resource,
-};
-
-static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
- struct r600_query_hw *query,
- struct r600_resource *buffer,
- uint64_t va);
-static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
- struct r600_query_hw *query,
- struct r600_resource *buffer,
- uint64_t va);
-static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
- struct r600_query_hw *, void *buffer,
- union pipe_query_result *result);
-static void r600_query_hw_clear_result(struct r600_query_hw *,
- union pipe_query_result *);
-
-static struct r600_query_hw_ops query_hw_default_hw_ops = {
- .prepare_buffer = r600_query_hw_prepare_buffer,
- .emit_start = r600_query_hw_do_emit_start,
- .emit_stop = r600_query_hw_do_emit_stop,
- .clear_result = r600_query_hw_clear_result,
- .add_result = r600_query_hw_add_result,
-};
-
-bool si_query_hw_init(struct r600_common_screen *rscreen,
- struct r600_query_hw *query)
-{
- query->buffer.buf = r600_new_query_buffer(rscreen, query);
- if (!query->buffer.buf)
- return false;
-
- return true;
-}
-
-static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscreen,
- unsigned query_type,
- unsigned index)
-{
- struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw);
- if (!query)
- return NULL;
-
- query->b.type = query_type;
- query->b.ops = &query_hw_ops;
- query->ops = &query_hw_default_hw_ops;
-
- switch (query_type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
- query->result_size = 16 * rscreen->info.num_render_backends;
- query->result_size += 16; /* for the fence + alignment */
- query->num_cs_dw_begin = 6;
- query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(rscreen);
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- query->result_size = 24;
- query->num_cs_dw_begin = 8;
- query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(rscreen);
- break;
- case PIPE_QUERY_TIMESTAMP:
- query->result_size = 16;
- query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(rscreen);
- query->flags = R600_QUERY_HW_FLAG_NO_START;
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_SO_STATISTICS:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
- query->result_size = 32;
- query->num_cs_dw_begin = 6;
- query->num_cs_dw_end = 6;
- query->stream = index;
- break;
- case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
- /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
- query->result_size = 32 * R600_MAX_STREAMS;
- query->num_cs_dw_begin = 6 * R600_MAX_STREAMS;
- query->num_cs_dw_end = 6 * R600_MAX_STREAMS;
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- /* 11 values on GCN. */
- query->result_size = 11 * 16;
- query->result_size += 8; /* for the fence + alignment */
- query->num_cs_dw_begin = 6;
- query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(rscreen);
- break;
- default:
- assert(0);
- FREE(query);
- return NULL;
- }
-
- if (!si_query_hw_init(rscreen, query)) {
- FREE(query);
- return NULL;
- }
-
- return (struct pipe_query *)query;
-}
-
-static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
- unsigned type, int diff)
-{
- if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
- type == PIPE_QUERY_OCCLUSION_PREDICATE ||
- type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
- bool old_enable = rctx->num_occlusion_queries != 0;
- bool old_perfect_enable =
- rctx->num_perfect_occlusion_queries != 0;
- bool enable, perfect_enable;
-
- rctx->num_occlusion_queries += diff;
- assert(rctx->num_occlusion_queries >= 0);
-
- if (type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
- rctx->num_perfect_occlusion_queries += diff;
- assert(rctx->num_perfect_occlusion_queries >= 0);
- }
-
- enable = rctx->num_occlusion_queries != 0;
- perfect_enable = rctx->num_perfect_occlusion_queries != 0;
-
- if (enable != old_enable || perfect_enable != old_perfect_enable) {
- rctx->set_occlusion_query_state(&rctx->b, old_enable,
- old_perfect_enable);
- }
- }
-}
-
-static unsigned event_type_for_stream(unsigned stream)
-{
- switch (stream) {
- default:
- case 0: return V_028A90_SAMPLE_STREAMOUTSTATS;
- case 1: return V_028A90_SAMPLE_STREAMOUTSTATS1;
- case 2: return V_028A90_SAMPLE_STREAMOUTSTATS2;
- case 3: return V_028A90_SAMPLE_STREAMOUTSTATS3;
- }
-}
-
-static void emit_sample_streamout(struct radeon_winsys_cs *cs, uint64_t va,
- unsigned stream)
-{
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
-}
-
-static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
- struct r600_query_hw *query,
- struct r600_resource *buffer,
- uint64_t va)
-{
- struct radeon_winsys_cs *cs = ctx->gfx.cs;
-
- switch (query->b.type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_SO_STATISTICS:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- emit_sample_streamout(cs, va, query->stream);
- break;
- case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
- for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream)
- emit_sample_streamout(cs, va + 32 * stream, stream);
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- /* Write the timestamp from the CP not waiting for
- * outstanding draws (top-of-pipe).
- */
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_COUNT_SEL |
- COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
- COPY_DATA_DST_SEL(COPY_DATA_MEM_ASYNC));
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- break;
- default:
- assert(0);
- }
- radeon_add_to_buffer_list(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
- RADEON_PRIO_QUERY);
-}
-
-static void r600_query_hw_emit_start(struct r600_common_context *ctx,
- struct r600_query_hw *query)
-{
- uint64_t va;
-
- if (!query->buffer.buf)
- return; // previous buffer allocation failure
-
- r600_update_occlusion_query_state(ctx, query->b.type, 1);
- si_update_prims_generated_query_state(ctx, query->b.type, 1);
-
- ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end,
- true);
-
- /* Get a new query buffer if needed. */
- if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
- struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
- *qbuf = query->buffer;
- query->buffer.results_end = 0;
- query->buffer.previous = qbuf;
- query->buffer.buf = r600_new_query_buffer(ctx->screen, query);
- if (!query->buffer.buf)
- return;
- }
-
- /* emit begin query */
- va = query->buffer.buf->gpu_address + query->buffer.results_end;
-
- query->ops->emit_start(ctx, query, query->buffer.buf, va);
-
- ctx->num_cs_dw_queries_suspend += query->num_cs_dw_end;
-}
-
-static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
- struct r600_query_hw *query,
- struct r600_resource *buffer,
- uint64_t va)
-{
- struct radeon_winsys_cs *cs = ctx->gfx.cs;
- uint64_t fence_va = 0;
-
- switch (query->b.type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
- va += 8;
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
-
- fence_va = va + ctx->screen->info.num_render_backends * 16 - 8;
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_SO_STATISTICS:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- va += 16;
- emit_sample_streamout(cs, va, query->stream);
- break;
- case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
- va += 16;
- for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream)
- emit_sample_streamout(cs, va + 32 * stream, stream);
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- va += 8;
- /* fall through */
- case PIPE_QUERY_TIMESTAMP:
- si_gfx_write_event_eop(ctx, V_028A90_BOTTOM_OF_PIPE_TS,
- 0, EOP_DATA_SEL_TIMESTAMP, NULL, va,
- 0, query->b.type);
- fence_va = va + 8;
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS: {
- unsigned sample_size = (query->result_size - 8) / 2;
-
- va += sample_size;
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
-
- fence_va = va + sample_size;
- break;
- }
- default:
- assert(0);
- }
- radeon_add_to_buffer_list(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
- RADEON_PRIO_QUERY);
-
- if (fence_va)
- si_gfx_write_event_eop(ctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DATA_SEL_VALUE_32BIT,
- query->buffer.buf, fence_va, 0x80000000,
- query->b.type);
-}
-
-static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
- struct r600_query_hw *query)
-{
- uint64_t va;
-
- if (!query->buffer.buf)
- return; // previous buffer allocation failure
-
- /* The queries which need begin already called this in begin_query. */
- if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
- ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, false);
- }
-
- /* emit end query */
- va = query->buffer.buf->gpu_address + query->buffer.results_end;
-
- query->ops->emit_stop(ctx, query, query->buffer.buf, va);
-
- query->buffer.results_end += query->result_size;
-
- if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
- ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
-
- r600_update_occlusion_query_state(ctx, query->b.type, -1);
- si_update_prims_generated_query_state(ctx, query->b.type, -1);
-}
-
-static void emit_set_predicate(struct r600_common_context *ctx,
- struct r600_resource *buf, uint64_t va,
- uint32_t op)
-{
- struct radeon_winsys_cs *cs = ctx->gfx.cs;
-
- if (ctx->chip_class >= GFX9) {
- radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
- radeon_emit(cs, op);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- } else {
- radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
- radeon_emit(cs, va);
- radeon_emit(cs, op | ((va >> 32) & 0xFF));
- }
- radeon_add_to_buffer_list(ctx, &ctx->gfx, buf, RADEON_USAGE_READ,
- RADEON_PRIO_QUERY);
-}
-
-static void r600_emit_query_predication(struct r600_common_context *ctx,
- struct r600_atom *atom)
-{
- struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond;
- struct r600_query_buffer *qbuf;
- uint32_t op;
- bool flag_wait, invert;
-
- if (!query)
- return;
-
- invert = ctx->render_cond_invert;
- flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
- ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
-
- if (query->workaround_buf) {
- op = PRED_OP(PREDICATION_OP_BOOL64);
- } else {
- switch (query->b.type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
- op = PRED_OP(PREDICATION_OP_ZPASS);
- break;
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
- op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
- invert = !invert;
- break;
- default:
- assert(0);
- return;
- }
- }
-
- /* if true then invert, see GL_ARB_conditional_render_inverted */
- if (invert)
- op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visible or overflow */
- else
- op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no overflow */
-
- /* Use the value written by compute shader as a workaround. Note that
- * the wait flag does not apply in this predication mode.
- *
- * The shader outputs the result value to L2. Workarounds only affect VI
- * and later, where the CP reads data from L2, so we don't need an
- * additional flush.
- */
- if (query->workaround_buf) {
- uint64_t va = query->workaround_buf->gpu_address + query->workaround_offset;
- emit_set_predicate(ctx, query->workaround_buf, va, op);
- return;
- }
-
- op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
-
- /* emit predicate packets for all data blocks */
- for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
- unsigned results_base = 0;
- uint64_t va_base = qbuf->buf->gpu_address;
-
- while (results_base < qbuf->results_end) {
- uint64_t va = va_base + results_base;
-
- if (query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
- for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) {
- emit_set_predicate(ctx, qbuf->buf, va + 32 * stream, op);
-
- /* set CONTINUE bit for all packets except the first */
- op |= PREDICATION_CONTINUE;
- }
- } else {
- emit_set_predicate(ctx, qbuf->buf, va, op);
- op |= PREDICATION_CONTINUE;
- }
-
- results_base += query->result_size;
- }
- }
-}
-
-static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
-{
- struct r600_common_screen *rscreen =
- (struct r600_common_screen *)ctx->screen;
-
- if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT ||
- query_type == PIPE_QUERY_GPU_FINISHED ||
- query_type >= PIPE_QUERY_DRIVER_SPECIFIC)
- return r600_query_sw_create(query_type);
-
- return r600_query_hw_create(rscreen, query_type, index);
-}
-
-static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_query *rquery = (struct r600_query *)query;
-
- rquery->ops->destroy(rctx->screen, rquery);
-}
-
-static boolean r600_begin_query(struct pipe_context *ctx,
- struct pipe_query *query)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_query *rquery = (struct r600_query *)query;
-
- return rquery->ops->begin(rctx, rquery);
-}
-
-void si_query_hw_reset_buffers(struct r600_common_context *rctx,
- struct r600_query_hw *query)
-{
- struct r600_query_buffer *prev = query->buffer.previous;
-
- /* Discard the old query buffers. */
- while (prev) {
- struct r600_query_buffer *qbuf = prev;
- prev = prev->previous;
- r600_resource_reference(&qbuf->buf, NULL);
- FREE(qbuf);
- }
-
- query->buffer.results_end = 0;
- query->buffer.previous = NULL;
-
- /* Obtain a new buffer if the current one can't be mapped without a stall. */
- if (si_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
- !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
- r600_resource_reference(&query->buffer.buf, NULL);
- query->buffer.buf = r600_new_query_buffer(rctx->screen, query);
- } else {
- if (!query->ops->prepare_buffer(rctx->screen, query, query->buffer.buf))
- r600_resource_reference(&query->buffer.buf, NULL);
- }
-}
-
-bool si_query_hw_begin(struct r600_common_context *rctx,
- struct r600_query *rquery)
-{
- struct r600_query_hw *query = (struct r600_query_hw *)rquery;
-
- if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
- assert(0);
- return false;
- }
-
- if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES))
- si_query_hw_reset_buffers(rctx, query);
-
- r600_resource_reference(&query->workaround_buf, NULL);
-
- r600_query_hw_emit_start(rctx, query);
- if (!query->buffer.buf)
- return false;
-
- LIST_ADDTAIL(&query->list, &rctx->active_queries);
- return true;
-}
-
-static bool r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_query *rquery = (struct r600_query *)query;
-
- return rquery->ops->end(rctx, rquery);
-}
-
-bool si_query_hw_end(struct r600_common_context *rctx,
- struct r600_query *rquery)
-{
- struct r600_query_hw *query = (struct r600_query_hw *)rquery;
-
- if (query->flags & R600_QUERY_HW_FLAG_NO_START)
- si_query_hw_reset_buffers(rctx, query);
-
- r600_query_hw_emit_stop(rctx, query);
-
- if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
- LIST_DELINIT(&query->list);
-
- if (!query->buffer.buf)
- return false;
-
- return true;
-}
-
-static void r600_get_hw_query_params(struct r600_common_context *rctx,
- struct r600_query_hw *rquery, int index,
- struct r600_hw_query_params *params)
-{
- unsigned max_rbs = rctx->screen->info.num_render_backends;
-
- params->pair_stride = 0;
- params->pair_count = 1;
-
- switch (rquery->b.type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
- params->start_offset = 0;
- params->end_offset = 8;
- params->fence_offset = max_rbs * 16;
- params->pair_stride = 16;
- params->pair_count = max_rbs;
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- params->start_offset = 0;
- params->end_offset = 8;
- params->fence_offset = 16;
- break;
- case PIPE_QUERY_TIMESTAMP:
- params->start_offset = 0;
- params->end_offset = 0;
- params->fence_offset = 8;
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- params->start_offset = 8;
- params->end_offset = 24;
- params->fence_offset = params->end_offset + 4;
- break;
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- params->start_offset = 0;
- params->end_offset = 16;
- params->fence_offset = params->end_offset + 4;
- break;
- case PIPE_QUERY_SO_STATISTICS:
- params->start_offset = 8 - index * 8;
- params->end_offset = 24 - index * 8;
- params->fence_offset = params->end_offset + 4;
- break;
- case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
- params->pair_count = R600_MAX_STREAMS;
- params->pair_stride = 32;
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- params->start_offset = 0;
- params->end_offset = 16;
-
- /* We can re-use the high dword of the last 64-bit value as a
- * fence: it is initialized as 0, and the high bit is set by
- * the write of the streamout stats event.
- */
- params->fence_offset = rquery->result_size - 4;
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- {
- /* Offsets apply to EG+ */
- static const unsigned offsets[] = {56, 48, 24, 32, 40, 16, 8, 0, 64, 72, 80};
- params->start_offset = offsets[index];
- params->end_offset = 88 + offsets[index];
- params->fence_offset = 2 * 88;
- break;
- }
- default:
- unreachable("r600_get_hw_query_params unsupported");
- }
-}
-
-static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index,
- bool test_status_bit)
-{
- uint32_t *current_result = (uint32_t*)map;
- uint64_t start, end;
-
- start = (uint64_t)current_result[start_index] |
- (uint64_t)current_result[start_index+1] << 32;
- end = (uint64_t)current_result[end_index] |
- (uint64_t)current_result[end_index+1] << 32;
-
- if (!test_status_bit ||
- ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
- return end - start;
- }
- return 0;
-}
-
-static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
- struct r600_query_hw *query,
- void *buffer,
- union pipe_query_result *result)
-{
- unsigned max_rbs = rscreen->info.num_render_backends;
-
- switch (query->b.type) {
- case PIPE_QUERY_OCCLUSION_COUNTER: {
- for (unsigned i = 0; i < max_rbs; ++i) {
- unsigned results_base = i * 16;
- result->u64 +=
- r600_query_read_result(buffer + results_base, 0, 2, true);
- }
- break;
- }
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
- for (unsigned i = 0; i < max_rbs; ++i) {
- unsigned results_base = i * 16;
- result->b = result->b ||
- r600_query_read_result(buffer + results_base, 0, 2, true) != 0;
- }
- break;
- }
- case PIPE_QUERY_TIME_ELAPSED:
- result->u64 += r600_query_read_result(buffer, 0, 2, false);
- break;
- case PIPE_QUERY_TIMESTAMP:
- result->u64 = *(uint64_t*)buffer;
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- /* SAMPLE_STREAMOUTSTATS stores this structure:
- * {
- * u64 NumPrimitivesWritten;
- * u64 PrimitiveStorageNeeded;
- * }
- * We only need NumPrimitivesWritten here. */
- result->u64 += r600_query_read_result(buffer, 2, 6, true);
- break;
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- /* Here we read PrimitiveStorageNeeded. */
- result->u64 += r600_query_read_result(buffer, 0, 4, true);
- break;
- case PIPE_QUERY_SO_STATISTICS:
- result->so_statistics.num_primitives_written +=
- r600_query_read_result(buffer, 2, 6, true);
- result->so_statistics.primitives_storage_needed +=
- r600_query_read_result(buffer, 0, 4, true);
- break;
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- result->b = result->b ||
- r600_query_read_result(buffer, 2, 6, true) !=
- r600_query_read_result(buffer, 0, 4, true);
- break;
- case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
- for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) {
- result->b = result->b ||
- r600_query_read_result(buffer, 2, 6, true) !=
- r600_query_read_result(buffer, 0, 4, true);
- buffer = (char *)buffer + 32;
- }
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- result->pipeline_statistics.ps_invocations +=
- r600_query_read_result(buffer, 0, 22, false);
- result->pipeline_statistics.c_primitives +=
- r600_query_read_result(buffer, 2, 24, false);
- result->pipeline_statistics.c_invocations +=
- r600_query_read_result(buffer, 4, 26, false);
- result->pipeline_statistics.vs_invocations +=
- r600_query_read_result(buffer, 6, 28, false);
- result->pipeline_statistics.gs_invocations +=
- r600_query_read_result(buffer, 8, 30, false);
- result->pipeline_statistics.gs_primitives +=
- r600_query_read_result(buffer, 10, 32, false);
- result->pipeline_statistics.ia_primitives +=
- r600_query_read_result(buffer, 12, 34, false);
- result->pipeline_statistics.ia_vertices +=
- r600_query_read_result(buffer, 14, 36, false);
- result->pipeline_statistics.hs_invocations +=
- r600_query_read_result(buffer, 16, 38, false);
- result->pipeline_statistics.ds_invocations +=
- r600_query_read_result(buffer, 18, 40, false);
- result->pipeline_statistics.cs_invocations +=
- r600_query_read_result(buffer, 20, 42, false);
-#if 0 /* for testing */
- printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
- "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
- "Clipper prims=%llu, PS=%llu, CS=%llu\n",
- result->pipeline_statistics.ia_vertices,
- result->pipeline_statistics.ia_primitives,
- result->pipeline_statistics.vs_invocations,
- result->pipeline_statistics.hs_invocations,
- result->pipeline_statistics.ds_invocations,
- result->pipeline_statistics.gs_invocations,
- result->pipeline_statistics.gs_primitives,
- result->pipeline_statistics.c_invocations,
- result->pipeline_statistics.c_primitives,
- result->pipeline_statistics.ps_invocations,
- result->pipeline_statistics.cs_invocations);
-#endif
- break;
- default:
- assert(0);
- }
-}
-
-static boolean r600_get_query_result(struct pipe_context *ctx,
- struct pipe_query *query, boolean wait,
- union pipe_query_result *result)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_query *rquery = (struct r600_query *)query;
-
- return rquery->ops->get_result(rctx, rquery, wait, result);
-}
-
-static void r600_get_query_result_resource(struct pipe_context *ctx,
- struct pipe_query *query,
- boolean wait,
- enum pipe_query_value_type result_type,
- int index,
- struct pipe_resource *resource,
- unsigned offset)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_query *rquery = (struct r600_query *)query;
-
- rquery->ops->get_result_resource(rctx, rquery, wait, result_type, index,
- resource, offset);
-}
-
-static void r600_query_hw_clear_result(struct r600_query_hw *query,
- union pipe_query_result *result)
-{
- util_query_clear_result(result, query->b.type);
-}
-
-bool si_query_hw_get_result(struct r600_common_context *rctx,
- struct r600_query *rquery,
- bool wait, union pipe_query_result *result)
-{
- struct r600_common_screen *rscreen = rctx->screen;
- struct r600_query_hw *query = (struct r600_query_hw *)rquery;
- struct r600_query_buffer *qbuf;
-
- query->ops->clear_result(query, result);
-
- for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
- unsigned usage = PIPE_TRANSFER_READ |
- (wait ? 0 : PIPE_TRANSFER_DONTBLOCK);
- unsigned results_base = 0;
- void *map;
-
- if (rquery->b.flushed)
- map = rctx->ws->buffer_map(qbuf->buf->buf, NULL, usage);
- else
- map = si_buffer_map_sync_with_rings(rctx, qbuf->buf, usage);
-
- if (!map)
- return false;
-
- while (results_base != qbuf->results_end) {
- query->ops->add_result(rscreen, query, map + results_base,
- result);
- results_base += query->result_size;
- }
- }
-
- /* Convert the time to expected units. */
- if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
- rquery->type == PIPE_QUERY_TIMESTAMP) {
- result->u64 = (1000000 * result->u64) / rscreen->info.clock_crystal_freq;
- }
- return true;
-}
-
-/* Create the compute shader that is used to collect the results.
- *
- * One compute grid with a single thread is launched for every query result
- * buffer. The thread (optionally) reads a previous summary buffer, then
- * accumulates data from the query result buffer, and writes the result either
- * to a summary buffer to be consumed by the next grid invocation or to the
- * user-supplied buffer.
- *
- * Data layout:
- *
- * CONST
- * 0.x = end_offset
- * 0.y = result_stride
- * 0.z = result_count
- * 0.w = bit field:
- * 1: read previously accumulated values
- * 2: write accumulated values for chaining
- * 4: write result available
- * 8: convert result to boolean (0/1)
- * 16: only read one dword and use that as result
- * 32: apply timestamp conversion
- * 64: store full 64 bits result
- * 128: store signed 32 bits result
- * 256: SO_OVERFLOW mode: take the difference of two successive half-pairs
- * 1.x = fence_offset
- * 1.y = pair_stride
- * 1.z = pair_count
- *
- * BUFFER[0] = query result buffer
- * BUFFER[1] = previous summary buffer
- * BUFFER[2] = next summary buffer or user-supplied buffer
- */
-static void r600_create_query_result_shader(struct r600_common_context *rctx)
-{
- /* TEMP[0].xy = accumulated result so far
- * TEMP[0].z = result not available
- *
- * TEMP[1].x = current result index
- * TEMP[1].y = current pair index
- */
- static const char text_tmpl[] =
- "COMP\n"
- "PROPERTY CS_FIXED_BLOCK_WIDTH 1\n"
- "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n"
- "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
- "DCL BUFFER[0]\n"
- "DCL BUFFER[1]\n"
- "DCL BUFFER[2]\n"
- "DCL CONST[0][0..1]\n"
- "DCL TEMP[0..5]\n"
- "IMM[0] UINT32 {0, 31, 2147483647, 4294967295}\n"
- "IMM[1] UINT32 {1, 2, 4, 8}\n"
- "IMM[2] UINT32 {16, 32, 64, 128}\n"
- "IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */
- "IMM[4] UINT32 {256, 0, 0, 0}\n"
-
- "AND TEMP[5], CONST[0][0].wwww, IMM[2].xxxx\n"
- "UIF TEMP[5]\n"
- /* Check result availability. */
- "LOAD TEMP[1].x, BUFFER[0], CONST[0][1].xxxx\n"
- "ISHR TEMP[0].z, TEMP[1].xxxx, IMM[0].yyyy\n"
- "MOV TEMP[1], TEMP[0].zzzz\n"
- "NOT TEMP[0].z, TEMP[0].zzzz\n"
-
- /* Load result if available. */
- "UIF TEMP[1]\n"
- "LOAD TEMP[0].xy, BUFFER[0], IMM[0].xxxx\n"
- "ENDIF\n"
- "ELSE\n"
- /* Load previously accumulated result if requested. */
- "MOV TEMP[0], IMM[0].xxxx\n"
- "AND TEMP[4], CONST[0][0].wwww, IMM[1].xxxx\n"
- "UIF TEMP[4]\n"
- "LOAD TEMP[0].xyz, BUFFER[1], IMM[0].xxxx\n"
- "ENDIF\n"
-
- "MOV TEMP[1].x, IMM[0].xxxx\n"
- "BGNLOOP\n"
- /* Break if accumulated result so far is not available. */
- "UIF TEMP[0].zzzz\n"
- "BRK\n"
- "ENDIF\n"
-
- /* Break if result_index >= result_count. */
- "USGE TEMP[5], TEMP[1].xxxx, CONST[0][0].zzzz\n"
- "UIF TEMP[5]\n"
- "BRK\n"
- "ENDIF\n"
-
- /* Load fence and check result availability */
- "UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy, CONST[0][1].xxxx\n"
- "LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n"
- "ISHR TEMP[0].z, TEMP[5].xxxx, IMM[0].yyyy\n"
- "NOT TEMP[0].z, TEMP[0].zzzz\n"
- "UIF TEMP[0].zzzz\n"
- "BRK\n"
- "ENDIF\n"
-
- "MOV TEMP[1].y, IMM[0].xxxx\n"
- "BGNLOOP\n"
- /* Load start and end. */
- "UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy\n"
- "UMAD TEMP[5].x, TEMP[1].yyyy, CONST[0][1].yyyy, TEMP[5].xxxx\n"
- "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
-
- "UADD TEMP[5].y, TEMP[5].xxxx, CONST[0][0].xxxx\n"
- "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
-
- "U64ADD TEMP[4].xy, TEMP[3], -TEMP[2]\n"
-
- "AND TEMP[5].z, CONST[0][0].wwww, IMM[4].xxxx\n"
- "UIF TEMP[5].zzzz\n"
- /* Load second start/end half-pair and
- * take the difference
- */
- "UADD TEMP[5].xy, TEMP[5], IMM[1].wwww\n"
- "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
- "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
-
- "U64ADD TEMP[3].xy, TEMP[3], -TEMP[2]\n"
- "U64ADD TEMP[4].xy, TEMP[4], -TEMP[3]\n"
- "ENDIF\n"
-
- "U64ADD TEMP[0].xy, TEMP[0], TEMP[4]\n"
-
- /* Increment pair index */
- "UADD TEMP[1].y, TEMP[1].yyyy, IMM[1].xxxx\n"
- "USGE TEMP[5], TEMP[1].yyyy, CONST[0][1].zzzz\n"
- "UIF TEMP[5]\n"
- "BRK\n"
- "ENDIF\n"
- "ENDLOOP\n"
-
- /* Increment result index */
- "UADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx\n"
- "ENDLOOP\n"
- "ENDIF\n"
-
- "AND TEMP[4], CONST[0][0].wwww, IMM[1].yyyy\n"
- "UIF TEMP[4]\n"
- /* Store accumulated data for chaining. */
- "STORE BUFFER[2].xyz, IMM[0].xxxx, TEMP[0]\n"
- "ELSE\n"
- "AND TEMP[4], CONST[0][0].wwww, IMM[1].zzzz\n"
- "UIF TEMP[4]\n"
- /* Store result availability. */
- "NOT TEMP[0].z, TEMP[0]\n"
- "AND TEMP[0].z, TEMP[0].zzzz, IMM[1].xxxx\n"
- "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].zzzz\n"
-
- "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n"
- "UIF TEMP[4]\n"
- "STORE BUFFER[2].y, IMM[0].xxxx, IMM[0].xxxx\n"
- "ENDIF\n"
- "ELSE\n"
- /* Store result if it is available. */
- "NOT TEMP[4], TEMP[0].zzzz\n"
- "UIF TEMP[4]\n"
- /* Apply timestamp conversion */
- "AND TEMP[4], CONST[0][0].wwww, IMM[2].yyyy\n"
- "UIF TEMP[4]\n"
- "U64MUL TEMP[0].xy, TEMP[0], IMM[3].xyxy\n"
- "U64DIV TEMP[0].xy, TEMP[0], IMM[3].zwzw\n"
- "ENDIF\n"
-
- /* Convert to boolean */
- "AND TEMP[4], CONST[0][0].wwww, IMM[1].wwww\n"
- "UIF TEMP[4]\n"
- "U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n"
- "AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n"
- "MOV TEMP[0].y, IMM[0].xxxx\n"
- "ENDIF\n"
-
- "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n"
- "UIF TEMP[4]\n"
- "STORE BUFFER[2].xy, IMM[0].xxxx, TEMP[0].xyxy\n"
- "ELSE\n"
- /* Clamping */
- "UIF TEMP[0].yyyy\n"
- "MOV TEMP[0].x, IMM[0].wwww\n"
- "ENDIF\n"
-
- "AND TEMP[4], CONST[0][0].wwww, IMM[2].wwww\n"
- "UIF TEMP[4]\n"
- "UMIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz\n"
- "ENDIF\n"
-
- "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].xxxx\n"
- "ENDIF\n"
- "ENDIF\n"
- "ENDIF\n"
- "ENDIF\n"
-
- "END\n";
-
- char text[sizeof(text_tmpl) + 32];
- struct tgsi_token tokens[1024];
- struct pipe_compute_state state = {};
-
- /* Hard code the frequency into the shader so that the backend can
- * use the full range of optimizations for divide-by-constant.
- */
- snprintf(text, sizeof(text), text_tmpl,
- rctx->screen->info.clock_crystal_freq);
-
- if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {
- assert(false);
- return;
- }
-
- state.ir_type = PIPE_SHADER_IR_TGSI;
- state.prog = tokens;
-
- rctx->query_result_shader = rctx->b.create_compute_state(&rctx->b, &state);
-}
-
-static void r600_restore_qbo_state(struct r600_common_context *rctx,
- struct r600_qbo_state *st)
-{
- rctx->b.bind_compute_state(&rctx->b, st->saved_compute);
-
- rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
- pipe_resource_reference(&st->saved_const0.buffer, NULL);
-
- rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
- for (unsigned i = 0; i < 3; ++i)
- pipe_resource_reference(&st->saved_ssbo[i].buffer, NULL);
-}
-
-static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
- struct r600_query *rquery,
- bool wait,
- enum pipe_query_value_type result_type,
- int index,
- struct pipe_resource *resource,
- unsigned offset)
-{
- struct r600_query_hw *query = (struct r600_query_hw *)rquery;
- struct r600_query_buffer *qbuf;
- struct r600_query_buffer *qbuf_prev;
- struct pipe_resource *tmp_buffer = NULL;
- unsigned tmp_buffer_offset = 0;
- struct r600_qbo_state saved_state = {};
- struct pipe_grid_info grid = {};
- struct pipe_constant_buffer constant_buffer = {};
- struct pipe_shader_buffer ssbo[3];
- struct r600_hw_query_params params;
- struct {
- uint32_t end_offset;
- uint32_t result_stride;
- uint32_t result_count;
- uint32_t config;
- uint32_t fence_offset;
- uint32_t pair_stride;
- uint32_t pair_count;
- } consts;
-
- if (!rctx->query_result_shader) {
- r600_create_query_result_shader(rctx);
- if (!rctx->query_result_shader)
- return;
- }
-
- if (query->buffer.previous) {
- u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 16,
- &tmp_buffer_offset, &tmp_buffer);
- if (!tmp_buffer)
- return;
- }
-
- rctx->save_qbo_state(&rctx->b, &saved_state);
-
- r600_get_hw_query_params(rctx, query, index >= 0 ? index : 0, &params);
- consts.end_offset = params.end_offset - params.start_offset;
- consts.fence_offset = params.fence_offset - params.start_offset;
- consts.result_stride = query->result_size;
- consts.pair_stride = params.pair_stride;
- consts.pair_count = params.pair_count;
-
- constant_buffer.buffer_size = sizeof(consts);
- constant_buffer.user_buffer = &consts;
-
- ssbo[1].buffer = tmp_buffer;
- ssbo[1].buffer_offset = tmp_buffer_offset;
- ssbo[1].buffer_size = 16;
-
- ssbo[2] = ssbo[1];
-
- rctx->b.bind_compute_state(&rctx->b, rctx->query_result_shader);
-
- grid.block[0] = 1;
- grid.block[1] = 1;
- grid.block[2] = 1;
- grid.grid[0] = 1;
- grid.grid[1] = 1;
- grid.grid[2] = 1;
-
- consts.config = 0;
- if (index < 0)
- consts.config |= 4;
- if (query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE ||
- query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE)
- consts.config |= 8;
- else if (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
- query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
- consts.config |= 8 | 256;
- else if (query->b.type == PIPE_QUERY_TIMESTAMP ||
- query->b.type == PIPE_QUERY_TIME_ELAPSED)
- consts.config |= 32;
-
- switch (result_type) {
- case PIPE_QUERY_TYPE_U64:
- case PIPE_QUERY_TYPE_I64:
- consts.config |= 64;
- break;
- case PIPE_QUERY_TYPE_I32:
- consts.config |= 128;
- break;
- case PIPE_QUERY_TYPE_U32:
- break;
- }
-
- rctx->flags |= rctx->screen->barrier_flags.cp_to_L2;
-
- for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) {
- if (query->b.type != PIPE_QUERY_TIMESTAMP) {
- qbuf_prev = qbuf->previous;
- consts.result_count = qbuf->results_end / query->result_size;
- consts.config &= ~3;
- if (qbuf != &query->buffer)
- consts.config |= 1;
- if (qbuf->previous)
- consts.config |= 2;
- } else {
- /* Only read the last timestamp. */
- qbuf_prev = NULL;
- consts.result_count = 0;
- consts.config |= 16;
- params.start_offset += qbuf->results_end - query->result_size;
- }
-
- rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer);
-
- ssbo[0].buffer = &qbuf->buf->b.b;
- ssbo[0].buffer_offset = params.start_offset;
- ssbo[0].buffer_size = qbuf->results_end - params.start_offset;
-
- if (!qbuf->previous) {
- ssbo[2].buffer = resource;
- ssbo[2].buffer_offset = offset;
- ssbo[2].buffer_size = 8;
-
- ((struct r600_resource *)resource)->TC_L2_dirty = true;
- }
-
- rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo);
-
- if (wait && qbuf == &query->buffer) {
- uint64_t va;
-
- /* Wait for result availability. Wait only for readiness
- * of the last entry, since the fence writes should be
- * serialized in the CP.
- */
- va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size;
- va += params.fence_offset;
-
- si_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000);
- }
-
- rctx->b.launch_grid(&rctx->b, &grid);
- rctx->flags |= rctx->screen->barrier_flags.compute_to_L2;
- }
-
- r600_restore_qbo_state(rctx, &saved_state);
- pipe_resource_reference(&tmp_buffer, NULL);
-}
-
-static void r600_render_condition(struct pipe_context *ctx,
- struct pipe_query *query,
- boolean condition,
- enum pipe_render_cond_flag mode)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_query_hw *rquery = (struct r600_query_hw *)query;
- struct r600_atom *atom = &rctx->render_cond_atom;
-
- if (query) {
- bool needs_workaround = false;
-
- /* There was a firmware regression in VI which causes successive
- * SET_PREDICATION packets to give the wrong answer for
- * non-inverted stream overflow predication.
- */
- if (((rctx->chip_class == VI && rctx->screen->info.pfp_fw_feature < 49) ||
- (rctx->chip_class == GFX9 && rctx->screen->info.pfp_fw_feature < 38)) &&
- !condition &&
- (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
- (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE &&
- (rquery->buffer.previous ||
- rquery->buffer.results_end > rquery->result_size)))) {
- needs_workaround = true;
- }
-
- if (needs_workaround && !rquery->workaround_buf) {
- bool old_force_off = rctx->render_cond_force_off;
- rctx->render_cond_force_off = true;
-
- u_suballocator_alloc(
- rctx->allocator_zeroed_memory, 8, 8,
- &rquery->workaround_offset,
- (struct pipe_resource **)&rquery->workaround_buf);
-
- /* Reset to NULL to avoid a redundant SET_PREDICATION
- * from launching the compute grid.
- */
- rctx->render_cond = NULL;
-
- ctx->get_query_result_resource(
- ctx, query, true, PIPE_QUERY_TYPE_U64, 0,
- &rquery->workaround_buf->b.b, rquery->workaround_offset);
-
- /* Settings this in the render cond atom is too late,
- * so set it here. */
- rctx->flags |= rctx->screen->barrier_flags.L2_to_cp |
- R600_CONTEXT_FLUSH_FOR_RENDER_COND;
-
- rctx->render_cond_force_off = old_force_off;
- }
- }
-
- rctx->render_cond = query;
- rctx->render_cond_invert = condition;
- rctx->render_cond_mode = mode;
-
- rctx->set_atom_dirty(rctx, atom, query != NULL);
-}
-
-void si_suspend_queries(struct r600_common_context *ctx)
-{
- struct r600_query_hw *query;
-
- LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
- r600_query_hw_emit_stop(ctx, query);
- }
- assert(ctx->num_cs_dw_queries_suspend == 0);
-}
-
-static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx,
- struct list_head *query_list)
-{
- struct r600_query_hw *query;
- unsigned num_dw = 0;
-
- LIST_FOR_EACH_ENTRY(query, query_list, list) {
- /* begin + end */
- num_dw += query->num_cs_dw_begin + query->num_cs_dw_end;
-
- /* Workaround for the fact that
- * num_cs_dw_nontimer_queries_suspend is incremented for every
- * resumed query, which raises the bar in need_cs_space for
- * queries about to be resumed.
- */
- num_dw += query->num_cs_dw_end;
- }
- /* guess for ZPASS enable or PERFECT_ZPASS_COUNT enable updates */
- num_dw += 13;
-
- return num_dw;
-}
-
-void si_resume_queries(struct r600_common_context *ctx)
-{
- struct r600_query_hw *query;
- unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries);
-
- assert(ctx->num_cs_dw_queries_suspend == 0);
-
- /* Check CS space here. Resuming must not be interrupted by flushes. */
- ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, true);
-
- LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
- r600_query_hw_emit_start(ctx, query);
- }
-}
-
-#define XFULL(name_, query_type_, type_, result_type_, group_id_) \
- { \
- .name = name_, \
- .query_type = R600_QUERY_##query_type_, \
- .type = PIPE_DRIVER_QUERY_TYPE_##type_, \
- .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
- .group_id = group_id_ \
- }
-
-#define X(name_, query_type_, type_, result_type_) \
- XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0)
-
-#define XG(group_, name_, query_type_, type_, result_type_) \
- XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_)
-
-static struct pipe_driver_query_info r600_driver_query_list[] = {
- X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
- X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
- X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE),
- X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
- X("decompress-calls", DECOMPRESS_CALLS, UINT64, AVERAGE),
- X("MRT-draw-calls", MRT_DRAW_CALLS, UINT64, AVERAGE),
- X("prim-restart-calls", PRIM_RESTART_CALLS, UINT64, AVERAGE),
- X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE),
- X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE),
- X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE),
- X("dma-calls", DMA_CALLS, UINT64, AVERAGE),
- X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE),
- X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE),
- X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE),
- X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE),
- X("num-CB-cache-flushes", NUM_CB_CACHE_FLUSHES, UINT64, AVERAGE),
- X("num-DB-cache-flushes", NUM_DB_CACHE_FLUSHES, UINT64, AVERAGE),
- X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, AVERAGE),
- X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, AVERAGE),
- X("num-resident-handles", NUM_RESIDENT_HANDLES, UINT64, AVERAGE),
- X("tc-offloaded-slots", TC_OFFLOADED_SLOTS, UINT64, AVERAGE),
- X("tc-direct-slots", TC_DIRECT_SLOTS, UINT64, AVERAGE),
- X("tc-num-syncs", TC_NUM_SYNCS, UINT64, AVERAGE),
- X("CS-thread-busy", CS_THREAD_BUSY, UINT64, AVERAGE),
- X("gallium-thread-busy", GALLIUM_THREAD_BUSY, UINT64, AVERAGE),
- X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
- X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
- X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE),
- X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE),
- X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
- X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE),
- X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE),
- X("num-SDMA-IBs", NUM_SDMA_IBS, UINT64, AVERAGE),
- X("GFX-BO-list-size", GFX_BO_LIST_SIZE, UINT64, AVERAGE),
- X("GFX-IB-size", GFX_IB_SIZE, UINT64, AVERAGE),
- X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
- X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE),
- X("VRAM-CPU-page-faults", NUM_VRAM_CPU_PAGE_FAULTS, UINT64, CUMULATIVE),
- X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
- X("VRAM-vis-usage", VRAM_VIS_USAGE, BYTES, AVERAGE),
- X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
- X("back-buffer-ps-draw-ratio", BACK_BUFFER_PS_DRAW_RATIO, UINT64, AVERAGE),
-
- /* GPIN queries are for the benefit of old versions of GPUPerfStudio,
- * which use it as a fallback path to detect the GPU type.
- *
- * Note: The names of these queries are significant for GPUPerfStudio
- * (and possibly their order as well). */
- XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE),
- XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE),
- XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE),
- XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE),
- XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE),
-
- X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE),
- X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE),
- X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE),
-
- /* The following queries must be at the end of the list because their
- * availability is adjusted dynamically based on the DRM version. */
- X("GPU-load", GPU_LOAD, UINT64, AVERAGE),
- X("GPU-shaders-busy", GPU_SHADERS_BUSY, UINT64, AVERAGE),
- X("GPU-ta-busy", GPU_TA_BUSY, UINT64, AVERAGE),
- X("GPU-gds-busy", GPU_GDS_BUSY, UINT64, AVERAGE),
- X("GPU-vgt-busy", GPU_VGT_BUSY, UINT64, AVERAGE),
- X("GPU-ia-busy", GPU_IA_BUSY, UINT64, AVERAGE),
- X("GPU-sx-busy", GPU_SX_BUSY, UINT64, AVERAGE),
- X("GPU-wd-busy", GPU_WD_BUSY, UINT64, AVERAGE),
- X("GPU-bci-busy", GPU_BCI_BUSY, UINT64, AVERAGE),
- X("GPU-sc-busy", GPU_SC_BUSY, UINT64, AVERAGE),
- X("GPU-pa-busy", GPU_PA_BUSY, UINT64, AVERAGE),
- X("GPU-db-busy", GPU_DB_BUSY, UINT64, AVERAGE),
- X("GPU-cp-busy", GPU_CP_BUSY, UINT64, AVERAGE),
- X("GPU-cb-busy", GPU_CB_BUSY, UINT64, AVERAGE),
- X("GPU-sdma-busy", GPU_SDMA_BUSY, UINT64, AVERAGE),
- X("GPU-pfp-busy", GPU_PFP_BUSY, UINT64, AVERAGE),
- X("GPU-meq-busy", GPU_MEQ_BUSY, UINT64, AVERAGE),
- X("GPU-me-busy", GPU_ME_BUSY, UINT64, AVERAGE),
- X("GPU-surf-sync-busy", GPU_SURF_SYNC_BUSY, UINT64, AVERAGE),
- X("GPU-cp-dma-busy", GPU_CP_DMA_BUSY, UINT64, AVERAGE),
- X("GPU-scratch-ram-busy", GPU_SCRATCH_RAM_BUSY, UINT64, AVERAGE),
-};
-
-#undef X
-#undef XG
-#undef XFULL
-
-static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
-{
- if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
- return ARRAY_SIZE(r600_driver_query_list);
- else if (rscreen->info.drm_major == 3) {
- if (rscreen->chip_class >= VI)
- return ARRAY_SIZE(r600_driver_query_list);
- else
- return ARRAY_SIZE(r600_driver_query_list) - 7;
- }
- else
- return ARRAY_SIZE(r600_driver_query_list) - 25;
-}
-
-static int r600_get_driver_query_info(struct pipe_screen *screen,
- unsigned index,
- struct pipe_driver_query_info *info)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
- unsigned num_queries = r600_get_num_queries(rscreen);
-
- if (!info) {
- unsigned num_perfcounters =
- si_get_perfcounter_info(rscreen, 0, NULL);
-
- return num_queries + num_perfcounters;
- }
-
- if (index >= num_queries)
- return si_get_perfcounter_info(rscreen, index - num_queries, info);
-
- *info = r600_driver_query_list[index];
-
- switch (info->query_type) {
- case R600_QUERY_REQUESTED_VRAM:
- case R600_QUERY_VRAM_USAGE:
- case R600_QUERY_MAPPED_VRAM:
- info->max_value.u64 = rscreen->info.vram_size;
- break;
- case R600_QUERY_REQUESTED_GTT:
- case R600_QUERY_GTT_USAGE:
- case R600_QUERY_MAPPED_GTT:
- info->max_value.u64 = rscreen->info.gart_size;
- break;
- case R600_QUERY_GPU_TEMPERATURE:
- info->max_value.u64 = 125;
- break;
- case R600_QUERY_VRAM_VIS_USAGE:
- info->max_value.u64 = rscreen->info.vram_vis_size;
- break;
- }
-
- if (info->group_id != ~(unsigned)0 && rscreen->perfcounters)
- info->group_id += rscreen->perfcounters->num_groups;
-
- return 1;
-}
-
-/* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware
- * performance counter groups, so be careful when changing this and related
- * functions.
- */
-static int r600_get_driver_query_group_info(struct pipe_screen *screen,
- unsigned index,
- struct pipe_driver_query_group_info *info)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
- unsigned num_pc_groups = 0;
-
- if (rscreen->perfcounters)
- num_pc_groups = rscreen->perfcounters->num_groups;
-
- if (!info)
- return num_pc_groups + R600_NUM_SW_QUERY_GROUPS;
-
- if (index < num_pc_groups)
- return si_get_perfcounter_group_info(rscreen, index, info);
-
- index -= num_pc_groups;
- if (index >= R600_NUM_SW_QUERY_GROUPS)
- return 0;
-
- info->name = "GPIN";
- info->max_active_queries = 5;
- info->num_queries = 5;
- return 1;
-}
-
-void si_init_query_functions(struct r600_common_context *rctx)
-{
- rctx->b.create_query = r600_create_query;
- rctx->b.create_batch_query = si_create_batch_query;
- rctx->b.destroy_query = r600_destroy_query;
- rctx->b.begin_query = r600_begin_query;
- rctx->b.end_query = r600_end_query;
- rctx->b.get_query_result = r600_get_query_result;
- rctx->b.get_query_result_resource = r600_get_query_result_resource;
- rctx->render_cond_atom.emit = r600_emit_query_predication;
-
- if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0)
- rctx->b.render_condition = r600_render_condition;
-
- LIST_INITHEAD(&rctx->active_queries);
-}
-
-void si_init_screen_query_functions(struct r600_common_screen *rscreen)
-{
- rscreen->b.get_driver_query_info = r600_get_driver_query_info;
- rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info;
-}
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_query.h b/lib/mesa/src/gallium/drivers/radeon/r600_query.h
deleted file mode 100644
index 04943da36..000000000
--- a/lib/mesa/src/gallium/drivers/radeon/r600_query.h
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- * Nicolai Hähnle <nicolai.haehnle@amd.com>
- *
- */
-
-#ifndef R600_QUERY_H
-#define R600_QUERY_H
-
-#include "util/u_threaded_context.h"
-
-struct pipe_context;
-struct pipe_query;
-struct pipe_resource;
-
-struct r600_common_context;
-struct r600_common_screen;
-struct r600_query;
-struct r600_query_hw;
-struct r600_resource;
-
-enum {
- R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
- R600_QUERY_DECOMPRESS_CALLS,
- R600_QUERY_MRT_DRAW_CALLS,
- R600_QUERY_PRIM_RESTART_CALLS,
- R600_QUERY_SPILL_DRAW_CALLS,
- R600_QUERY_COMPUTE_CALLS,
- R600_QUERY_SPILL_COMPUTE_CALLS,
- R600_QUERY_DMA_CALLS,
- R600_QUERY_CP_DMA_CALLS,
- R600_QUERY_NUM_VS_FLUSHES,
- R600_QUERY_NUM_PS_FLUSHES,
- R600_QUERY_NUM_CS_FLUSHES,
- R600_QUERY_NUM_CB_CACHE_FLUSHES,
- R600_QUERY_NUM_DB_CACHE_FLUSHES,
- R600_QUERY_NUM_L2_INVALIDATES,
- R600_QUERY_NUM_L2_WRITEBACKS,
- R600_QUERY_NUM_RESIDENT_HANDLES,
- R600_QUERY_TC_OFFLOADED_SLOTS,
- R600_QUERY_TC_DIRECT_SLOTS,
- R600_QUERY_TC_NUM_SYNCS,
- R600_QUERY_CS_THREAD_BUSY,
- R600_QUERY_GALLIUM_THREAD_BUSY,
- R600_QUERY_REQUESTED_VRAM,
- R600_QUERY_REQUESTED_GTT,
- R600_QUERY_MAPPED_VRAM,
- R600_QUERY_MAPPED_GTT,
- R600_QUERY_BUFFER_WAIT_TIME,
- R600_QUERY_NUM_MAPPED_BUFFERS,
- R600_QUERY_NUM_GFX_IBS,
- R600_QUERY_NUM_SDMA_IBS,
- R600_QUERY_GFX_BO_LIST_SIZE,
- R600_QUERY_GFX_IB_SIZE,
- R600_QUERY_NUM_BYTES_MOVED,
- R600_QUERY_NUM_EVICTIONS,
- R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS,
- R600_QUERY_VRAM_USAGE,
- R600_QUERY_VRAM_VIS_USAGE,
- R600_QUERY_GTT_USAGE,
- R600_QUERY_GPU_TEMPERATURE,
- R600_QUERY_CURRENT_GPU_SCLK,
- R600_QUERY_CURRENT_GPU_MCLK,
- R600_QUERY_GPU_LOAD,
- R600_QUERY_GPU_SHADERS_BUSY,
- R600_QUERY_GPU_TA_BUSY,
- R600_QUERY_GPU_GDS_BUSY,
- R600_QUERY_GPU_VGT_BUSY,
- R600_QUERY_GPU_IA_BUSY,
- R600_QUERY_GPU_SX_BUSY,
- R600_QUERY_GPU_WD_BUSY,
- R600_QUERY_GPU_BCI_BUSY,
- R600_QUERY_GPU_SC_BUSY,
- R600_QUERY_GPU_PA_BUSY,
- R600_QUERY_GPU_DB_BUSY,
- R600_QUERY_GPU_CP_BUSY,
- R600_QUERY_GPU_CB_BUSY,
- R600_QUERY_GPU_SDMA_BUSY,
- R600_QUERY_GPU_PFP_BUSY,
- R600_QUERY_GPU_MEQ_BUSY,
- R600_QUERY_GPU_ME_BUSY,
- R600_QUERY_GPU_SURF_SYNC_BUSY,
- R600_QUERY_GPU_CP_DMA_BUSY,
- R600_QUERY_GPU_SCRATCH_RAM_BUSY,
- R600_QUERY_NUM_COMPILATIONS,
- R600_QUERY_NUM_SHADERS_CREATED,
- R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO,
- R600_QUERY_NUM_SHADER_CACHE_HITS,
- R600_QUERY_GPIN_ASIC_ID,
- R600_QUERY_GPIN_NUM_SIMD,
- R600_QUERY_GPIN_NUM_RB,
- R600_QUERY_GPIN_NUM_SPI,
- R600_QUERY_GPIN_NUM_SE,
-
- R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100,
-};
-
-enum {
- R600_QUERY_GROUP_GPIN = 0,
- R600_NUM_SW_QUERY_GROUPS
-};
-
-struct r600_query_ops {
- void (*destroy)(struct r600_common_screen *, struct r600_query *);
- bool (*begin)(struct r600_common_context *, struct r600_query *);
- bool (*end)(struct r600_common_context *, struct r600_query *);
- bool (*get_result)(struct r600_common_context *,
- struct r600_query *, bool wait,
- union pipe_query_result *result);
- void (*get_result_resource)(struct r600_common_context *,
- struct r600_query *, bool wait,
- enum pipe_query_value_type result_type,
- int index,
- struct pipe_resource *resource,
- unsigned offset);
-};
-
-struct r600_query {
- struct threaded_query b;
- struct r600_query_ops *ops;
-
- /* The type of query */
- unsigned type;
-};
-
-enum {
- R600_QUERY_HW_FLAG_NO_START = (1 << 0),
- /* gap */
- /* whether begin_query doesn't clear the result */
- R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2),
-};
-
-struct r600_query_hw_ops {
- bool (*prepare_buffer)(struct r600_common_screen *,
- struct r600_query_hw *,
- struct r600_resource *);
- void (*emit_start)(struct r600_common_context *,
- struct r600_query_hw *,
- struct r600_resource *buffer, uint64_t va);
- void (*emit_stop)(struct r600_common_context *,
- struct r600_query_hw *,
- struct r600_resource *buffer, uint64_t va);
- void (*clear_result)(struct r600_query_hw *, union pipe_query_result *);
- void (*add_result)(struct r600_common_screen *screen,
- struct r600_query_hw *, void *buffer,
- union pipe_query_result *result);
-};
-
-struct r600_query_buffer {
- /* The buffer where query results are stored. */
- struct r600_resource *buf;
- /* Offset of the next free result after current query data */
- unsigned results_end;
- /* If a query buffer is full, a new buffer is created and the old one
- * is put in here. When we calculate the result, we sum up the samples
- * from all buffers. */
- struct r600_query_buffer *previous;
-};
-
-struct r600_query_hw {
- struct r600_query b;
- struct r600_query_hw_ops *ops;
- unsigned flags;
-
- /* The query buffer and how many results are in it. */
- struct r600_query_buffer buffer;
- /* Size of the result in memory for both begin_query and end_query,
- * this can be one or two numbers, or it could even be a size of a structure. */
- unsigned result_size;
- /* The number of dwords for begin_query or end_query. */
- unsigned num_cs_dw_begin;
- unsigned num_cs_dw_end;
- /* Linked list of queries */
- struct list_head list;
- /* For transform feedback: which stream the query is for */
- unsigned stream;
-
- /* Workaround via compute shader */
- struct r600_resource *workaround_buf;
- unsigned workaround_offset;
-};
-
-bool si_query_hw_init(struct r600_common_screen *rscreen,
- struct r600_query_hw *query);
-void si_query_hw_destroy(struct r600_common_screen *rscreen,
- struct r600_query *rquery);
-bool si_query_hw_begin(struct r600_common_context *rctx,
- struct r600_query *rquery);
-bool si_query_hw_end(struct r600_common_context *rctx,
- struct r600_query *rquery);
-bool si_query_hw_get_result(struct r600_common_context *rctx,
- struct r600_query *rquery,
- bool wait,
- union pipe_query_result *result);
-
-/* Performance counters */
-enum {
- /* This block is part of the shader engine */
- R600_PC_BLOCK_SE = (1 << 0),
-
- /* Expose per-instance groups instead of summing all instances (within
- * an SE). */
- R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
-
- /* Expose per-SE groups instead of summing instances across SEs. */
- R600_PC_BLOCK_SE_GROUPS = (1 << 2),
-
- /* Shader block */
- R600_PC_BLOCK_SHADER = (1 << 3),
-
- /* Non-shader block with perfcounters windowed by shaders. */
- R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
-};
-
-/* Describes a hardware block with performance counters. Multiple instances of
- * each block, possibly per-SE, may exist on the chip. Depending on the block
- * and on the user's configuration, we either
- * (a) expose every instance as a performance counter group,
- * (b) expose a single performance counter group that reports the sum over all
- * instances, or
- * (c) expose one performance counter group per instance, but summed over all
- * shader engines.
- */
-struct r600_perfcounter_block {
- const char *basename;
- unsigned flags;
- unsigned num_counters;
- unsigned num_selectors;
- unsigned num_instances;
-
- unsigned num_groups;
- char *group_names;
- unsigned group_name_stride;
-
- char *selector_names;
- unsigned selector_name_stride;
-
- void *data;
-};
-
-struct r600_perfcounters {
- unsigned num_groups;
- unsigned num_blocks;
- struct r600_perfcounter_block *blocks;
-
- unsigned num_start_cs_dwords;
- unsigned num_stop_cs_dwords;
- unsigned num_instance_cs_dwords;
- unsigned num_shaders_cs_dwords;
-
- unsigned num_shader_types;
- const char * const *shader_type_suffixes;
- const unsigned *shader_type_bits;
-
- void (*get_size)(struct r600_perfcounter_block *,
- unsigned count, unsigned *selectors,
- unsigned *num_select_dw, unsigned *num_read_dw);
-
- void (*emit_instance)(struct r600_common_context *,
- int se, int instance);
- void (*emit_shaders)(struct r600_common_context *, unsigned shaders);
- void (*emit_select)(struct r600_common_context *,
- struct r600_perfcounter_block *,
- unsigned count, unsigned *selectors);
- void (*emit_start)(struct r600_common_context *,
- struct r600_resource *buffer, uint64_t va);
- void (*emit_stop)(struct r600_common_context *,
- struct r600_resource *buffer, uint64_t va);
- void (*emit_read)(struct r600_common_context *,
- struct r600_perfcounter_block *,
- unsigned count, unsigned *selectors,
- struct r600_resource *buffer, uint64_t va);
-
- void (*cleanup)(struct r600_common_screen *);
-
- bool separate_se;
- bool separate_instance;
-};
-
-struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
- unsigned num_queries,
- unsigned *query_types);
-
-int si_get_perfcounter_info(struct r600_common_screen *,
- unsigned index,
- struct pipe_driver_query_info *info);
-int si_get_perfcounter_group_info(struct r600_common_screen *,
- unsigned index,
- struct pipe_driver_query_group_info *info);
-
-bool si_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks);
-void si_perfcounters_add_block(struct r600_common_screen *,
- struct r600_perfcounters *,
- const char *name, unsigned flags,
- unsigned counters, unsigned selectors,
- unsigned instances, void *data);
-void si_perfcounters_do_destroy(struct r600_perfcounters *);
-void si_query_hw_reset_buffers(struct r600_common_context *rctx,
- struct r600_query_hw *query);
-
-struct r600_qbo_state {
- void *saved_compute;
- struct pipe_constant_buffer saved_const0;
- struct pipe_shader_buffer saved_ssbo[3];
-};
-
-#endif /* R600_QUERY_H */
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_test_dma.c b/lib/mesa/src/gallium/drivers/radeon/r600_test_dma.c
deleted file mode 100644
index f7002bc39..000000000
--- a/lib/mesa/src/gallium/drivers/radeon/r600_test_dma.c
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-/* This file implements randomized SDMA texture blit tests. */
-
-#include "r600_pipe_common.h"
-#include "util/u_surface.h"
-#include "util/rand_xor.h"
-
-static uint64_t seed_xorshift128plus[2];
-
-#define RAND_NUM_SIZE 8
-
-/* The GPU blits are emulated on the CPU using these CPU textures. */
-
-struct cpu_texture {
- uint8_t *ptr;
- uint64_t size;
- uint64_t layer_stride;
- unsigned stride;
-};
-
-static void alloc_cpu_texture(struct cpu_texture *tex,
- struct pipe_resource *templ, int bpp)
-{
- tex->stride = align(templ->width0 * bpp, RAND_NUM_SIZE);
- tex->layer_stride = (uint64_t)tex->stride * templ->height0;
- tex->size = tex->layer_stride * templ->array_size;
- tex->ptr = malloc(tex->size);
- assert(tex->ptr);
-}
-
-static void set_random_pixels(struct pipe_context *ctx,
- struct pipe_resource *tex,
- struct cpu_texture *cpu)
-{
- struct pipe_transfer *t;
- uint8_t *map;
- int x,y,z;
-
- map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_WRITE,
- 0, 0, 0, tex->width0, tex->height0,
- tex->array_size, &t);
- assert(map);
-
- for (z = 0; z < tex->array_size; z++) {
- for (y = 0; y < tex->height0; y++) {
- uint64_t *ptr = (uint64_t*)
- (map + t->layer_stride*z + t->stride*y);
- uint64_t *ptr_cpu = (uint64_t*)
- (cpu->ptr + cpu->layer_stride*z + cpu->stride*y);
- unsigned size = cpu->stride / RAND_NUM_SIZE;
-
- assert(t->stride % RAND_NUM_SIZE == 0);
- assert(cpu->stride % RAND_NUM_SIZE == 0);
-
- for (x = 0; x < size; x++) {
- *ptr++ = *ptr_cpu++ =
- rand_xorshift128plus(seed_xorshift128plus);
- }
- }
- }
-
- pipe_transfer_unmap(ctx, t);
-}
-
-static bool compare_textures(struct pipe_context *ctx,
- struct pipe_resource *tex,
- struct cpu_texture *cpu, int bpp)
-{
- struct pipe_transfer *t;
- uint8_t *map;
- int y,z;
- bool pass = true;
-
- map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_READ,
- 0, 0, 0, tex->width0, tex->height0,
- tex->array_size, &t);
- assert(map);
-
- for (z = 0; z < tex->array_size; z++) {
- for (y = 0; y < tex->height0; y++) {
- uint8_t *ptr = map + t->layer_stride*z + t->stride*y;
- uint8_t *cpu_ptr = cpu->ptr +
- cpu->layer_stride*z + cpu->stride*y;
-
- if (memcmp(ptr, cpu_ptr, tex->width0 * bpp)) {
- pass = false;
- goto done;
- }
- }
- }
-done:
- pipe_transfer_unmap(ctx, t);
- return pass;
-}
-
-static enum pipe_format get_format_from_bpp(int bpp)
-{
- switch (bpp) {
- case 1:
- return PIPE_FORMAT_R8_UINT;
- case 2:
- return PIPE_FORMAT_R16_UINT;
- case 4:
- return PIPE_FORMAT_R32_UINT;
- case 8:
- return PIPE_FORMAT_R32G32_UINT;
- case 16:
- return PIPE_FORMAT_R32G32B32A32_UINT;
- default:
- assert(0);
- return PIPE_FORMAT_NONE;
- }
-}
-
-static const char *array_mode_to_string(struct r600_common_screen *rscreen,
- struct radeon_surf *surf)
-{
- if (rscreen->chip_class >= GFX9) {
- /* TODO */
- return " UNKNOWN";
- } else {
- switch (surf->u.legacy.level[0].mode) {
- case RADEON_SURF_MODE_LINEAR_ALIGNED:
- return "LINEAR_ALIGNED";
- case RADEON_SURF_MODE_1D:
- return "1D_TILED_THIN1";
- case RADEON_SURF_MODE_2D:
- return "2D_TILED_THIN1";
- default:
- assert(0);
- return " UNKNOWN";
- }
- }
-}
-
-static unsigned generate_max_tex_side(unsigned max_tex_side)
-{
- switch (rand() % 4) {
- case 0:
- /* Try to hit large sizes in 1/4 of the cases. */
- return max_tex_side;
- case 1:
- /* Try to hit 1D tiling in 1/4 of the cases. */
- return 128;
- default:
- /* Try to hit common sizes in 2/4 of the cases. */
- return 2048;
- }
-}
-
-void si_test_dma(struct r600_common_screen *rscreen)
-{
- struct pipe_screen *screen = &rscreen->b;
- struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- uint64_t max_alloc_size;
- unsigned i, iterations, num_partial_copies, max_levels, max_tex_side;
- unsigned num_pass = 0, num_fail = 0;
-
- max_levels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
- max_tex_side = 1 << (max_levels - 1);
-
- /* Max 128 MB allowed for both textures. */
- max_alloc_size = 128 * 1024 * 1024;
-
- /* the seed for random test parameters */
- srand(0x9b47d95b);
- /* the seed for random pixel data */
- s_rand_xorshift128plus(seed_xorshift128plus, false);
-
- iterations = 1000000000; /* just kill it when you are bored */
- num_partial_copies = 30;
-
- /* These parameters are randomly generated per test:
- * - whether to do one whole-surface copy or N partial copies per test
- * - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D)
- * - which texture dimensions to use
- * - whether to use VRAM (all tiling modes) and GTT (staging, linear
- * only) allocations
- * - random initial pixels in src
- * - generate random subrectangle copies for partial blits
- */
- for (i = 0; i < iterations; i++) {
- struct pipe_resource tsrc = {}, tdst = {}, *src, *dst;
- struct r600_texture *rdst;
- struct r600_texture *rsrc;
- struct cpu_texture src_cpu, dst_cpu;
- unsigned bpp, max_width, max_height, max_depth, j, num;
- unsigned gfx_blits = 0, dma_blits = 0, max_tex_side_gen;
- unsigned max_tex_layers;
- bool pass;
- bool do_partial_copies = rand() & 1;
-
- /* generate a random test case */
- tsrc.target = tdst.target = PIPE_TEXTURE_2D_ARRAY;
- tsrc.depth0 = tdst.depth0 = 1;
-
- bpp = 1 << (rand() % 5);
- tsrc.format = tdst.format = get_format_from_bpp(bpp);
-
- max_tex_side_gen = generate_max_tex_side(max_tex_side);
- max_tex_layers = rand() % 4 ? 1 : 5;
-
- tsrc.width0 = (rand() % max_tex_side_gen) + 1;
- tsrc.height0 = (rand() % max_tex_side_gen) + 1;
- tsrc.array_size = (rand() % max_tex_layers) + 1;
-
- /* Have a 1/4 chance of getting power-of-two dimensions. */
- if (rand() % 4 == 0) {
- tsrc.width0 = util_next_power_of_two(tsrc.width0);
- tsrc.height0 = util_next_power_of_two(tsrc.height0);
- }
-
- if (!do_partial_copies) {
- /* whole-surface copies only, same dimensions */
- tdst = tsrc;
- } else {
- max_tex_side_gen = generate_max_tex_side(max_tex_side);
- max_tex_layers = rand() % 4 ? 1 : 5;
-
- /* many partial copies, dimensions can be different */
- tdst.width0 = (rand() % max_tex_side_gen) + 1;
- tdst.height0 = (rand() % max_tex_side_gen) + 1;
- tdst.array_size = (rand() % max_tex_layers) + 1;
-
- /* Have a 1/4 chance of getting power-of-two dimensions. */
- if (rand() % 4 == 0) {
- tdst.width0 = util_next_power_of_two(tdst.width0);
- tdst.height0 = util_next_power_of_two(tdst.height0);
- }
- }
-
- /* check texture sizes */
- if ((uint64_t)tsrc.width0 * tsrc.height0 * tsrc.array_size * bpp +
- (uint64_t)tdst.width0 * tdst.height0 * tdst.array_size * bpp >
- max_alloc_size) {
- /* too large, try again */
- i--;
- continue;
- }
-
- /* VRAM + the tiling mode depends on dimensions (3/4 of cases),
- * or GTT + linear only (1/4 of cases)
- */
- tsrc.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING;
- tdst.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING;
-
- /* Allocate textures (both the GPU and CPU copies).
- * The CPU will emulate what the GPU should be doing.
- */
- src = screen->resource_create(screen, &tsrc);
- dst = screen->resource_create(screen, &tdst);
- assert(src);
- assert(dst);
- rdst = (struct r600_texture*)dst;
- rsrc = (struct r600_texture*)src;
- alloc_cpu_texture(&src_cpu, &tsrc, bpp);
- alloc_cpu_texture(&dst_cpu, &tdst, bpp);
-
- printf("%4u: dst = (%5u x %5u x %u, %s), "
- " src = (%5u x %5u x %u, %s), bpp = %2u, ",
- i, tdst.width0, tdst.height0, tdst.array_size,
- array_mode_to_string(rscreen, &rdst->surface),
- tsrc.width0, tsrc.height0, tsrc.array_size,
- array_mode_to_string(rscreen, &rsrc->surface), bpp);
- fflush(stdout);
-
- /* set src pixels */
- set_random_pixels(ctx, src, &src_cpu);
-
- /* clear dst pixels */
- rctx->clear_buffer(ctx, dst, 0, rdst->surface.surf_size, 0, true);
- memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);
-
- /* preparation */
- max_width = MIN2(tsrc.width0, tdst.width0);
- max_height = MIN2(tsrc.height0, tdst.height0);
- max_depth = MIN2(tsrc.array_size, tdst.array_size);
-
- num = do_partial_copies ? num_partial_copies : 1;
- for (j = 0; j < num; j++) {
- int width, height, depth;
- int srcx, srcy, srcz, dstx, dsty, dstz;
- struct pipe_box box;
- unsigned old_num_draw_calls = rctx->num_draw_calls;
- unsigned old_num_dma_calls = rctx->num_dma_calls;
-
- if (!do_partial_copies) {
- /* copy whole src to dst */
- width = max_width;
- height = max_height;
- depth = max_depth;
-
- srcx = srcy = srcz = dstx = dsty = dstz = 0;
- } else {
- /* random sub-rectangle copies from src to dst */
- depth = (rand() % max_depth) + 1;
- srcz = rand() % (tsrc.array_size - depth + 1);
- dstz = rand() % (tdst.array_size - depth + 1);
-
- /* special code path to hit the tiled partial copies */
- if (!rsrc->surface.is_linear &&
- !rdst->surface.is_linear &&
- rand() & 1) {
- if (max_width < 8 || max_height < 8)
- continue;
- width = ((rand() % (max_width / 8)) + 1) * 8;
- height = ((rand() % (max_height / 8)) + 1) * 8;
-
- srcx = rand() % (tsrc.width0 - width + 1) & ~0x7;
- srcy = rand() % (tsrc.height0 - height + 1) & ~0x7;
-
- dstx = rand() % (tdst.width0 - width + 1) & ~0x7;
- dsty = rand() % (tdst.height0 - height + 1) & ~0x7;
- } else {
- /* just make sure that it doesn't divide by zero */
- assert(max_width > 0 && max_height > 0);
-
- width = (rand() % max_width) + 1;
- height = (rand() % max_height) + 1;
-
- srcx = rand() % (tsrc.width0 - width + 1);
- srcy = rand() % (tsrc.height0 - height + 1);
-
- dstx = rand() % (tdst.width0 - width + 1);
- dsty = rand() % (tdst.height0 - height + 1);
- }
-
- /* special code path to hit out-of-bounds reads in L2T */
- if (rsrc->surface.is_linear &&
- !rdst->surface.is_linear &&
- rand() % 4 == 0) {
- srcx = 0;
- srcy = 0;
- srcz = 0;
- }
- }
-
- /* GPU copy */
- u_box_3d(srcx, srcy, srcz, width, height, depth, &box);
- rctx->dma_copy(ctx, dst, 0, dstx, dsty, dstz, src, 0, &box);
-
- /* See which engine was used. */
- gfx_blits += rctx->num_draw_calls > old_num_draw_calls;
- dma_blits += rctx->num_dma_calls > old_num_dma_calls;
-
- /* CPU copy */
- util_copy_box(dst_cpu.ptr, tdst.format, dst_cpu.stride,
- dst_cpu.layer_stride,
- dstx, dsty, dstz, width, height, depth,
- src_cpu.ptr, src_cpu.stride,
- src_cpu.layer_stride,
- srcx, srcy, srcz);
- }
-
- pass = compare_textures(ctx, dst, &dst_cpu, bpp);
- if (pass)
- num_pass++;
- else
- num_fail++;
-
- printf("BLITs: GFX = %2u, DMA = %2u, %s [%u/%u]\n",
- gfx_blits, dma_blits, pass ? "pass" : "fail",
- num_pass, num_pass+num_fail);
-
- /* cleanup */
- pipe_resource_reference(&src, NULL);
- pipe_resource_reference(&dst, NULL);
- free(src_cpu.ptr);
- free(dst_cpu.ptr);
- }
-
- ctx->destroy(ctx);
- exit(0);
-}
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_texture.c b/lib/mesa/src/gallium/drivers/radeon/r600_texture.c
deleted file mode 100644
index 3d623c251..000000000
--- a/lib/mesa/src/gallium/drivers/radeon/r600_texture.c
+++ /dev/null
@@ -1,2933 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- * Corbin Simpson
- */
-#include "r600_pipe_common.h"
-#include "r600_cs.h"
-#include "r600_query.h"
-#include "util/u_format.h"
-#include "util/u_log.h"
-#include "util/u_memory.h"
-#include "util/u_pack_color.h"
-#include "util/u_surface.h"
-#include "os/os_time.h"
-#include <errno.h>
-#include <inttypes.h>
-#include "state_tracker/drm_driver.h"
-#include "amd/common/sid.h"
-
-static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
- struct r600_texture *rtex);
-static enum radeon_surf_mode
-r600_choose_tiling(struct r600_common_screen *rscreen,
- const struct pipe_resource *templ);
-
-
-bool si_prepare_for_dma_blit(struct r600_common_context *rctx,
- struct r600_texture *rdst,
- unsigned dst_level, unsigned dstx,
- unsigned dsty, unsigned dstz,
- struct r600_texture *rsrc,
- unsigned src_level,
- const struct pipe_box *src_box)
-{
- if (!rctx->dma.cs)
- return false;
-
- if (rdst->surface.bpe != rsrc->surface.bpe)
- return false;
-
- /* MSAA: Blits don't exist in the real world. */
- if (rsrc->resource.b.b.nr_samples > 1 ||
- rdst->resource.b.b.nr_samples > 1)
- return false;
-
- /* Depth-stencil surfaces:
- * When dst is linear, the DB->CB copy preserves HTILE.
- * When dst is tiled, the 3D path must be used to update HTILE.
- */
- if (rsrc->is_depth || rdst->is_depth)
- return false;
-
- /* DCC as:
- * src: Use the 3D path. DCC decompression is expensive.
- * dst: Use the 3D path to compress the pixels with DCC.
- */
- if (vi_dcc_enabled(rsrc, src_level) ||
- vi_dcc_enabled(rdst, dst_level))
- return false;
-
- /* CMASK as:
- * src: Both texture and SDMA paths need decompression. Use SDMA.
- * dst: If overwriting the whole texture, discard CMASK and use
- * SDMA. Otherwise, use the 3D path.
- */
- if (rdst->cmask.size && rdst->dirty_level_mask & (1 << dst_level)) {
- /* The CMASK clear is only enabled for the first level. */
- assert(dst_level == 0);
- if (!util_texrange_covers_whole_level(&rdst->resource.b.b, dst_level,
- dstx, dsty, dstz, src_box->width,
- src_box->height, src_box->depth))
- return false;
-
- r600_texture_discard_cmask(rctx->screen, rdst);
- }
-
- /* All requirements are met. Prepare textures for SDMA. */
- if (rsrc->cmask.size && rsrc->dirty_level_mask & (1 << src_level))
- rctx->b.flush_resource(&rctx->b, &rsrc->resource.b.b);
-
- assert(!(rsrc->dirty_level_mask & (1 << src_level)));
- assert(!(rdst->dirty_level_mask & (1 << dst_level)));
-
- return true;
-}
-
-/* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */
-static void r600_copy_region_with_blit(struct pipe_context *pipe,
- struct pipe_resource *dst,
- unsigned dst_level,
- unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_resource *src,
- unsigned src_level,
- const struct pipe_box *src_box)
-{
- struct pipe_blit_info blit;
-
- memset(&blit, 0, sizeof(blit));
- blit.src.resource = src;
- blit.src.format = src->format;
- blit.src.level = src_level;
- blit.src.box = *src_box;
- blit.dst.resource = dst;
- blit.dst.format = dst->format;
- blit.dst.level = dst_level;
- blit.dst.box.x = dstx;
- blit.dst.box.y = dsty;
- blit.dst.box.z = dstz;
- blit.dst.box.width = src_box->width;
- blit.dst.box.height = src_box->height;
- blit.dst.box.depth = src_box->depth;
- blit.mask = util_format_get_mask(src->format) &
- util_format_get_mask(dst->format);
- blit.filter = PIPE_TEX_FILTER_NEAREST;
-
- if (blit.mask) {
- pipe->blit(pipe, &blit);
- }
-}
-
-/* Copy from a full GPU texture to a transfer's staging one. */
-static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
-{
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
- struct pipe_resource *dst = &rtransfer->staging->b.b;
- struct pipe_resource *src = transfer->resource;
-
- if (src->nr_samples > 1) {
- r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0,
- src, transfer->level, &transfer->box);
- return;
- }
-
- rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
- &transfer->box);
-}
-
-/* Copy from a transfer's staging texture to a full GPU one. */
-static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
-{
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
- struct pipe_resource *dst = transfer->resource;
- struct pipe_resource *src = &rtransfer->staging->b.b;
- struct pipe_box sbox;
-
- u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
-
- if (dst->nr_samples > 1) {
- r600_copy_region_with_blit(ctx, dst, transfer->level,
- transfer->box.x, transfer->box.y, transfer->box.z,
- src, 0, &sbox);
- return;
- }
-
- rctx->dma_copy(ctx, dst, transfer->level,
- transfer->box.x, transfer->box.y, transfer->box.z,
- src, 0, &sbox);
-}
-
-static unsigned r600_texture_get_offset(struct r600_common_screen *rscreen,
- struct r600_texture *rtex, unsigned level,
- const struct pipe_box *box,
- unsigned *stride,
- unsigned *layer_stride)
-{
- if (rscreen->chip_class >= GFX9) {
- *stride = rtex->surface.u.gfx9.surf_pitch * rtex->surface.bpe;
- *layer_stride = rtex->surface.u.gfx9.surf_slice_size;
-
- if (!box)
- return 0;
-
- /* Each texture is an array of slices. Each slice is an array
- * of mipmap levels. */
- return box->z * rtex->surface.u.gfx9.surf_slice_size +
- rtex->surface.u.gfx9.offset[level] +
- (box->y / rtex->surface.blk_h *
- rtex->surface.u.gfx9.surf_pitch +
- box->x / rtex->surface.blk_w) * rtex->surface.bpe;
- } else {
- *stride = rtex->surface.u.legacy.level[level].nblk_x *
- rtex->surface.bpe;
- *layer_stride = rtex->surface.u.legacy.level[level].slice_size;
-
- if (!box)
- return rtex->surface.u.legacy.level[level].offset;
-
- /* Each texture is an array of mipmap levels. Each level is
- * an array of slices. */
- return rtex->surface.u.legacy.level[level].offset +
- box->z * rtex->surface.u.legacy.level[level].slice_size +
- (box->y / rtex->surface.blk_h *
- rtex->surface.u.legacy.level[level].nblk_x +
- box->x / rtex->surface.blk_w) * rtex->surface.bpe;
- }
-}
-
-static int r600_init_surface(struct r600_common_screen *rscreen,
- struct radeon_surf *surface,
- const struct pipe_resource *ptex,
- enum radeon_surf_mode array_mode,
- unsigned pitch_in_bytes_override,
- unsigned offset,
- bool is_imported,
- bool is_scanout,
- bool is_flushed_depth,
- bool tc_compatible_htile)
-{
- const struct util_format_description *desc =
- util_format_description(ptex->format);
- bool is_depth, is_stencil;
- int r;
- unsigned i, bpe, flags = 0;
-
- is_depth = util_format_has_depth(desc);
- is_stencil = util_format_has_stencil(desc);
-
- if (!is_flushed_depth &&
- ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
- bpe = 4; /* stencil is allocated separately on evergreen */
- } else {
- bpe = util_format_get_blocksize(ptex->format);
- assert(util_is_power_of_two(bpe));
- }
-
- if (!is_flushed_depth && is_depth) {
- flags |= RADEON_SURF_ZBUFFER;
-
- if (tc_compatible_htile &&
- (rscreen->chip_class >= GFX9 ||
- array_mode == RADEON_SURF_MODE_2D)) {
- /* TC-compatible HTILE only supports Z32_FLOAT.
- * GFX9 also supports Z16_UNORM.
- * On VI, promote Z16 to Z32. DB->CB copies will convert
- * the format for transfers.
- */
- if (rscreen->chip_class == VI)
- bpe = 4;
-
- flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
- }
-
- if (is_stencil)
- flags |= RADEON_SURF_SBUFFER;
- }
-
- if (rscreen->chip_class >= VI &&
- (ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC ||
- ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT))
- flags |= RADEON_SURF_DISABLE_DCC;
-
- if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) {
- /* This should catch bugs in gallium users setting incorrect flags. */
- assert(ptex->nr_samples <= 1 &&
- ptex->array_size == 1 &&
- ptex->depth0 == 1 &&
- ptex->last_level == 0 &&
- !(flags & RADEON_SURF_Z_OR_SBUFFER));
-
- flags |= RADEON_SURF_SCANOUT;
- }
-
- if (ptex->bind & PIPE_BIND_SHARED)
- flags |= RADEON_SURF_SHAREABLE;
- if (is_imported)
- flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
- if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING))
- flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
-
- r = rscreen->ws->surface_init(rscreen->ws, ptex, flags, bpe,
- array_mode, surface);
- if (r) {
- return r;
- }
-
- unsigned pitch = pitch_in_bytes_override / bpe;
-
- if (rscreen->chip_class >= GFX9) {
- if (pitch) {
- surface->u.gfx9.surf_pitch = pitch;
- surface->u.gfx9.surf_slice_size =
- (uint64_t)pitch * surface->u.gfx9.surf_height * bpe;
- }
- surface->u.gfx9.surf_offset = offset;
- } else {
- if (pitch) {
- surface->u.legacy.level[0].nblk_x = pitch;
- surface->u.legacy.level[0].slice_size =
- ((uint64_t)pitch * surface->u.legacy.level[0].nblk_y * bpe);
- }
-
- if (offset) {
- for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
- surface->u.legacy.level[i].offset += offset;
- }
- }
- return 0;
-}
-
-static void r600_texture_init_metadata(struct r600_common_screen *rscreen,
- struct r600_texture *rtex,
- struct radeon_bo_metadata *metadata)
-{
- struct radeon_surf *surface = &rtex->surface;
-
- memset(metadata, 0, sizeof(*metadata));
-
- if (rscreen->chip_class >= GFX9) {
- metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
- } else {
- metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
- RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
- metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
- RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
- metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
- metadata->u.legacy.bankw = surface->u.legacy.bankw;
- metadata->u.legacy.bankh = surface->u.legacy.bankh;
- metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
- metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
- metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
- metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
- metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
- }
-}
-
-static void r600_surface_import_metadata(struct r600_common_screen *rscreen,
- struct radeon_surf *surf,
- struct radeon_bo_metadata *metadata,
- enum radeon_surf_mode *array_mode,
- bool *is_scanout)
-{
- if (rscreen->chip_class >= GFX9) {
- if (metadata->u.gfx9.swizzle_mode > 0)
- *array_mode = RADEON_SURF_MODE_2D;
- else
- *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
-
- *is_scanout = metadata->u.gfx9.swizzle_mode == 0 ||
- metadata->u.gfx9.swizzle_mode % 4 == 2;
-
- surf->u.gfx9.surf.swizzle_mode = metadata->u.gfx9.swizzle_mode;
- } else {
- surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config;
- surf->u.legacy.bankw = metadata->u.legacy.bankw;
- surf->u.legacy.bankh = metadata->u.legacy.bankh;
- surf->u.legacy.tile_split = metadata->u.legacy.tile_split;
- surf->u.legacy.mtilea = metadata->u.legacy.mtilea;
- surf->u.legacy.num_banks = metadata->u.legacy.num_banks;
-
- if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED)
- *array_mode = RADEON_SURF_MODE_2D;
- else if (metadata->u.legacy.microtile == RADEON_LAYOUT_TILED)
- *array_mode = RADEON_SURF_MODE_1D;
- else
- *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
-
- *is_scanout = metadata->u.legacy.scanout;
- }
-}
-
-static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx,
- struct r600_texture *rtex)
-{
- struct r600_common_screen *rscreen = rctx->screen;
- struct pipe_context *ctx = &rctx->b;
-
- if (ctx == rscreen->aux_context)
- mtx_lock(&rscreen->aux_context_lock);
-
- ctx->flush_resource(ctx, &rtex->resource.b.b);
- ctx->flush(ctx, NULL, 0);
-
- if (ctx == rscreen->aux_context)
- mtx_unlock(&rscreen->aux_context_lock);
-}
-
-static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
- struct r600_texture *rtex)
-{
- if (!rtex->cmask.size)
- return;
-
- assert(rtex->resource.b.b.nr_samples <= 1);
-
- /* Disable CMASK. */
- memset(&rtex->cmask, 0, sizeof(rtex->cmask));
- rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
- rtex->dirty_level_mask = 0;
-
- rtex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);
-
- if (rtex->cmask_buffer != &rtex->resource)
- r600_resource_reference(&rtex->cmask_buffer, NULL);
-
- /* Notify all contexts about the change. */
- p_atomic_inc(&rscreen->dirty_tex_counter);
- p_atomic_inc(&rscreen->compressed_colortex_counter);
-}
-
-static bool r600_can_disable_dcc(struct r600_texture *rtex)
-{
- /* We can't disable DCC if it can be written by another process. */
- return rtex->dcc_offset &&
- (!rtex->resource.b.is_shared ||
- !(rtex->resource.external_usage & PIPE_HANDLE_USAGE_WRITE));
-}
-
-static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen,
- struct r600_texture *rtex)
-{
- if (!r600_can_disable_dcc(rtex))
- return false;
-
- assert(rtex->dcc_separate_buffer == NULL);
-
- /* Disable DCC. */
- rtex->dcc_offset = 0;
-
- /* Notify all contexts about the change. */
- p_atomic_inc(&rscreen->dirty_tex_counter);
- return true;
-}
-
-/**
- * Disable DCC for the texture. (first decompress, then discard metadata).
- *
- * There is unresolved multi-context synchronization issue between
- * screen::aux_context and the current context. If applications do this with
- * multiple contexts, it's already undefined behavior for them and we don't
- * have to worry about that. The scenario is:
- *
- * If context 1 disables DCC and context 2 has queued commands that write
- * to the texture via CB with DCC enabled, and the order of operations is
- * as follows:
- * context 2 queues draw calls rendering to the texture, but doesn't flush
- * context 1 disables DCC and flushes
- * context 1 & 2 reset descriptors and FB state
- * context 2 flushes (new compressed tiles written by the draw calls)
- * context 1 & 2 read garbage, because DCC is disabled, yet there are
- * compressed tiled
- *
- * \param rctx the current context if you have one, or rscreen->aux_context
- * if you don't.
- */
-bool si_texture_disable_dcc(struct r600_common_context *rctx,
- struct r600_texture *rtex)
-{
- struct r600_common_screen *rscreen = rctx->screen;
-
- if (!r600_can_disable_dcc(rtex))
- return false;
-
- if (&rctx->b == rscreen->aux_context)
- mtx_lock(&rscreen->aux_context_lock);
-
- /* Decompress DCC. */
- rctx->decompress_dcc(&rctx->b, rtex);
- rctx->b.flush(&rctx->b, NULL, 0);
-
- if (&rctx->b == rscreen->aux_context)
- mtx_unlock(&rscreen->aux_context_lock);
-
- return r600_texture_discard_dcc(rscreen, rtex);
-}
-
-static void r600_reallocate_texture_inplace(struct r600_common_context *rctx,
- struct r600_texture *rtex,
- unsigned new_bind_flag,
- bool invalidate_storage)
-{
- struct pipe_screen *screen = rctx->b.screen;
- struct r600_texture *new_tex;
- struct pipe_resource templ = rtex->resource.b.b;
- unsigned i;
-
- templ.bind |= new_bind_flag;
-
- if (rtex->resource.b.is_shared)
- return;
-
- if (new_bind_flag == PIPE_BIND_LINEAR) {
- if (rtex->surface.is_linear)
- return;
-
- /* This fails with MSAA, depth, and compressed textures. */
- if (r600_choose_tiling(rctx->screen, &templ) !=
- RADEON_SURF_MODE_LINEAR_ALIGNED)
- return;
- }
-
- new_tex = (struct r600_texture*)screen->resource_create(screen, &templ);
- if (!new_tex)
- return;
-
- /* Copy the pixels to the new texture. */
- if (!invalidate_storage) {
- for (i = 0; i <= templ.last_level; i++) {
- struct pipe_box box;
-
- u_box_3d(0, 0, 0,
- u_minify(templ.width0, i), u_minify(templ.height0, i),
- util_max_layer(&templ, i) + 1, &box);
-
- rctx->dma_copy(&rctx->b, &new_tex->resource.b.b, i, 0, 0, 0,
- &rtex->resource.b.b, i, &box);
- }
- }
-
- if (new_bind_flag == PIPE_BIND_LINEAR) {
- r600_texture_discard_cmask(rctx->screen, rtex);
- r600_texture_discard_dcc(rctx->screen, rtex);
- }
-
- /* Replace the structure fields of rtex. */
- rtex->resource.b.b.bind = templ.bind;
- pb_reference(&rtex->resource.buf, new_tex->resource.buf);
- rtex->resource.gpu_address = new_tex->resource.gpu_address;
- rtex->resource.vram_usage = new_tex->resource.vram_usage;
- rtex->resource.gart_usage = new_tex->resource.gart_usage;
- rtex->resource.bo_size = new_tex->resource.bo_size;
- rtex->resource.bo_alignment = new_tex->resource.bo_alignment;
- rtex->resource.domains = new_tex->resource.domains;
- rtex->resource.flags = new_tex->resource.flags;
- rtex->size = new_tex->size;
- rtex->db_render_format = new_tex->db_render_format;
- rtex->db_compatible = new_tex->db_compatible;
- rtex->can_sample_z = new_tex->can_sample_z;
- rtex->can_sample_s = new_tex->can_sample_s;
- rtex->surface = new_tex->surface;
- rtex->fmask = new_tex->fmask;
- rtex->cmask = new_tex->cmask;
- rtex->cb_color_info = new_tex->cb_color_info;
- rtex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode;
- rtex->htile_offset = new_tex->htile_offset;
- rtex->tc_compatible_htile = new_tex->tc_compatible_htile;
- rtex->depth_cleared = new_tex->depth_cleared;
- rtex->stencil_cleared = new_tex->stencil_cleared;
- rtex->non_disp_tiling = new_tex->non_disp_tiling;
- rtex->dcc_gather_statistics = new_tex->dcc_gather_statistics;
- rtex->framebuffers_bound = new_tex->framebuffers_bound;
-
- if (new_bind_flag == PIPE_BIND_LINEAR) {
- assert(!rtex->htile_offset);
- assert(!rtex->cmask.size);
- assert(!rtex->fmask.size);
- assert(!rtex->dcc_offset);
- assert(!rtex->is_depth);
- }
-
- r600_texture_reference(&new_tex, NULL);
-
- p_atomic_inc(&rctx->screen->dirty_tex_counter);
-}
-
-static boolean r600_texture_get_handle(struct pipe_screen* screen,
- struct pipe_context *ctx,
- struct pipe_resource *resource,
- struct winsys_handle *whandle,
- unsigned usage)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
- struct r600_common_context *rctx;
- struct r600_resource *res = (struct r600_resource*)resource;
- struct r600_texture *rtex = (struct r600_texture*)resource;
- struct radeon_bo_metadata metadata;
- bool update_metadata = false;
- unsigned stride, offset, slice_size;
- bool flush = false;
-
- ctx = threaded_context_unwrap_sync(ctx);
- rctx = (struct r600_common_context*)(ctx ? ctx : rscreen->aux_context);
-
- if (resource->target != PIPE_BUFFER) {
- /* This is not supported now, but it might be required for OpenCL
- * interop in the future.
- */
- if (resource->nr_samples > 1 || rtex->is_depth)
- return false;
-
- /* Move a suballocated texture into a non-suballocated allocation. */
- if (rscreen->ws->buffer_is_suballocated(res->buf) ||
- rtex->surface.tile_swizzle ||
- (rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
- whandle->type != DRM_API_HANDLE_TYPE_KMS)) {
- assert(!res->b.is_shared);
- r600_reallocate_texture_inplace(rctx, rtex,
- PIPE_BIND_SHARED, false);
- flush = true;
- assert(res->b.b.bind & PIPE_BIND_SHARED);
- assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
- assert(!(res->flags & RADEON_FLAG_NO_INTERPROCESS_SHARING));
- assert(rtex->surface.tile_swizzle == 0);
- }
-
- /* Since shader image stores don't support DCC on VI,
- * disable it for external clients that want write
- * access.
- */
- if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
- if (si_texture_disable_dcc(rctx, rtex)) {
- update_metadata = true;
- /* si_texture_disable_dcc flushes the context */
- flush = false;
- }
- }
-
- if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
- (rtex->cmask.size || rtex->dcc_offset)) {
- /* Eliminate fast clear (both CMASK and DCC) */
- r600_eliminate_fast_color_clear(rctx, rtex);
- /* eliminate_fast_color_clear flushes the context */
- flush = false;
-
- /* Disable CMASK if flush_resource isn't going
- * to be called.
- */
- if (rtex->cmask.size)
- r600_texture_discard_cmask(rscreen, rtex);
- }
-
- /* Set metadata. */
- if (!res->b.is_shared || update_metadata) {
- r600_texture_init_metadata(rscreen, rtex, &metadata);
- if (rscreen->query_opaque_metadata)
- rscreen->query_opaque_metadata(rscreen, rtex,
- &metadata);
-
- rscreen->ws->buffer_set_metadata(res->buf, &metadata);
- }
-
- if (rscreen->chip_class >= GFX9) {
- offset = rtex->surface.u.gfx9.surf_offset;
- stride = rtex->surface.u.gfx9.surf_pitch *
- rtex->surface.bpe;
- slice_size = rtex->surface.u.gfx9.surf_slice_size;
- } else {
- offset = rtex->surface.u.legacy.level[0].offset;
- stride = rtex->surface.u.legacy.level[0].nblk_x *
- rtex->surface.bpe;
- slice_size = rtex->surface.u.legacy.level[0].slice_size;
- }
- } else {
- /* Buffer exports are for the OpenCL interop. */
- /* Move a suballocated buffer into a non-suballocated allocation. */
- if (rscreen->ws->buffer_is_suballocated(res->buf) ||
- /* A DMABUF export always fails if the BO is local. */
- rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING) {
- assert(!res->b.is_shared);
-
- /* Allocate a new buffer with PIPE_BIND_SHARED. */
- struct pipe_resource templ = res->b.b;
- templ.bind |= PIPE_BIND_SHARED;
-
- struct pipe_resource *newb =
- screen->resource_create(screen, &templ);
- if (!newb)
- return false;
-
- /* Copy the old buffer contents to the new one. */
- struct pipe_box box;
- u_box_1d(0, newb->width0, &box);
- rctx->b.resource_copy_region(&rctx->b, newb, 0, 0, 0, 0,
- &res->b.b, 0, &box);
- flush = true;
- /* Move the new buffer storage to the old pipe_resource. */
- si_replace_buffer_storage(&rctx->b, &res->b.b, newb);
- pipe_resource_reference(&newb, NULL);
-
- assert(res->b.b.bind & PIPE_BIND_SHARED);
- assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
- }
-
- /* Buffers */
- offset = 0;
- stride = 0;
- slice_size = 0;
- }
-
- if (flush)
- rctx->b.flush(&rctx->b, NULL, 0);
-
- if (res->b.is_shared) {
- /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
- * doesn't set it.
- */
- res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
- if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
- res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
- } else {
- res->b.is_shared = true;
- res->external_usage = usage;
- }
-
- return rscreen->ws->buffer_get_handle(res->buf, stride, offset,
- slice_size, whandle);
-}
-
-static void r600_texture_destroy(struct pipe_screen *screen,
- struct pipe_resource *ptex)
-{
- struct r600_texture *rtex = (struct r600_texture*)ptex;
- struct r600_resource *resource = &rtex->resource;
-
- r600_texture_reference(&rtex->flushed_depth_texture, NULL);
-
- if (rtex->cmask_buffer != &rtex->resource) {
- r600_resource_reference(&rtex->cmask_buffer, NULL);
- }
- pb_reference(&resource->buf, NULL);
- r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
- r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
- FREE(rtex);
-}
-
-static const struct u_resource_vtbl r600_texture_vtbl;
-
-/* The number of samples can be specified independently of the texture. */
-void si_texture_get_fmask_info(struct r600_common_screen *rscreen,
- struct r600_texture *rtex,
- unsigned nr_samples,
- struct r600_fmask_info *out)
-{
- /* FMASK is allocated like an ordinary texture. */
- struct pipe_resource templ = rtex->resource.b.b;
- struct radeon_surf fmask = {};
- unsigned flags, bpe;
-
- memset(out, 0, sizeof(*out));
-
- if (rscreen->chip_class >= GFX9) {
- out->alignment = rtex->surface.u.gfx9.fmask_alignment;
- out->size = rtex->surface.u.gfx9.fmask_size;
- return;
- }
-
- templ.nr_samples = 1;
- flags = rtex->surface.flags | RADEON_SURF_FMASK;
-
- switch (nr_samples) {
- case 2:
- case 4:
- bpe = 1;
- break;
- case 8:
- bpe = 4;
- break;
- default:
- R600_ERR("Invalid sample count for FMASK allocation.\n");
- return;
- }
-
- if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe,
- RADEON_SURF_MODE_2D, &fmask)) {
- R600_ERR("Got error in surface_init while allocating FMASK.\n");
- return;
- }
-
- assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
-
- out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
- if (out->slice_tile_max)
- out->slice_tile_max -= 1;
-
- out->tile_mode_index = fmask.u.legacy.tiling_index[0];
- out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
- out->bank_height = fmask.u.legacy.bankh;
- out->tile_swizzle = fmask.tile_swizzle;
- out->alignment = MAX2(256, fmask.surf_alignment);
- out->size = fmask.surf_size;
-}
-
-static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
- struct r600_texture *rtex)
-{
- si_texture_get_fmask_info(rscreen, rtex,
- rtex->resource.b.b.nr_samples, &rtex->fmask);
-
- rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment);
- rtex->size = rtex->fmask.offset + rtex->fmask.size;
-}
-
-static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
- struct r600_texture *rtex,
- struct r600_cmask_info *out)
-{
- unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
- unsigned num_pipes = rscreen->info.num_tile_pipes;
- unsigned cl_width, cl_height;
-
- if (rscreen->chip_class >= GFX9) {
- out->alignment = rtex->surface.u.gfx9.cmask_alignment;
- out->size = rtex->surface.u.gfx9.cmask_size;
- return;
- }
-
- switch (num_pipes) {
- case 2:
- cl_width = 32;
- cl_height = 16;
- break;
- case 4:
- cl_width = 32;
- cl_height = 32;
- break;
- case 8:
- cl_width = 64;
- cl_height = 32;
- break;
- case 16: /* Hawaii */
- cl_width = 64;
- cl_height = 64;
- break;
- default:
- assert(0);
- return;
- }
-
- unsigned base_align = num_pipes * pipe_interleave_bytes;
-
- unsigned width = align(rtex->resource.b.b.width0, cl_width*8);
- unsigned height = align(rtex->resource.b.b.height0, cl_height*8);
- unsigned slice_elements = (width * height) / (8*8);
-
- /* Each element of CMASK is a nibble. */
- unsigned slice_bytes = slice_elements / 2;
-
- out->slice_tile_max = (width * height) / (128*128);
- if (out->slice_tile_max)
- out->slice_tile_max -= 1;
-
- out->alignment = MAX2(256, base_align);
- out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
- align(slice_bytes, base_align);
-}
-
-static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
- struct r600_texture *rtex)
-{
- si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
-
- rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment);
- rtex->size = rtex->cmask.offset + rtex->cmask.size;
-
- rtex->cb_color_info |= S_028C70_FAST_CLEAR(1);
-}
-
-static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen,
- struct r600_texture *rtex)
-{
- if (rtex->cmask_buffer)
- return;
-
- assert(rtex->cmask.size == 0);
-
- si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
-
- rtex->cmask_buffer = (struct r600_resource *)
- si_aligned_buffer_create(&rscreen->b,
- R600_RESOURCE_FLAG_UNMAPPABLE,
- PIPE_USAGE_DEFAULT,
- rtex->cmask.size,
- rtex->cmask.alignment);
- if (rtex->cmask_buffer == NULL) {
- rtex->cmask.size = 0;
- return;
- }
-
- /* update colorbuffer state bits */
- rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
-
- rtex->cb_color_info |= S_028C70_FAST_CLEAR(1);
-
- p_atomic_inc(&rscreen->compressed_colortex_counter);
-}
-
-static void r600_texture_get_htile_size(struct r600_common_screen *rscreen,
- struct r600_texture *rtex)
-{
- unsigned cl_width, cl_height, width, height;
- unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
- unsigned num_pipes = rscreen->info.num_tile_pipes;
-
- assert(rscreen->chip_class <= VI);
-
- rtex->surface.htile_size = 0;
-
- /* HTILE is broken with 1D tiling on old kernels and CIK. */
- if (rscreen->chip_class >= CIK &&
- rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
- rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
- return;
-
- /* Overalign HTILE on P2 configs to work around GPU hangs in
- * piglit/depthstencil-render-miplevels 585.
- *
- * This has been confirmed to help Kabini & Stoney, where the hangs
- * are always reproducible. I think I have seen the test hang
- * on Carrizo too, though it was very rare there.
- */
- if (rscreen->chip_class >= CIK && num_pipes < 4)
- num_pipes = 4;
-
- switch (num_pipes) {
- case 1:
- cl_width = 32;
- cl_height = 16;
- break;
- case 2:
- cl_width = 32;
- cl_height = 32;
- break;
- case 4:
- cl_width = 64;
- cl_height = 32;
- break;
- case 8:
- cl_width = 64;
- cl_height = 64;
- break;
- case 16:
- cl_width = 128;
- cl_height = 64;
- break;
- default:
- assert(0);
- return;
- }
-
- width = align(rtex->resource.b.b.width0, cl_width * 8);
- height = align(rtex->resource.b.b.height0, cl_height * 8);
-
- slice_elements = (width * height) / (8 * 8);
- slice_bytes = slice_elements * 4;
-
- pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
- base_align = num_pipes * pipe_interleave_bytes;
-
- rtex->surface.htile_alignment = base_align;
- rtex->surface.htile_size =
- (util_max_layer(&rtex->resource.b.b, 0) + 1) *
- align(slice_bytes, base_align);
-}
-
-static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
- struct r600_texture *rtex)
-{
- if (rscreen->chip_class <= VI && !rtex->tc_compatible_htile)
- r600_texture_get_htile_size(rscreen, rtex);
-
- if (!rtex->surface.htile_size)
- return;
-
- rtex->htile_offset = align(rtex->size, rtex->surface.htile_alignment);
- rtex->size = rtex->htile_offset + rtex->surface.htile_size;
-}
-
-void si_print_texture_info(struct r600_common_screen *rscreen,
- struct r600_texture *rtex, struct u_log_context *log)
-{
- int i;
-
- /* Common parameters. */
- u_log_printf(log, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
- "blk_h=%u, array_size=%u, last_level=%u, "
- "bpe=%u, nsamples=%u, flags=0x%x, %s\n",
- rtex->resource.b.b.width0, rtex->resource.b.b.height0,
- rtex->resource.b.b.depth0, rtex->surface.blk_w,
- rtex->surface.blk_h,
- rtex->resource.b.b.array_size, rtex->resource.b.b.last_level,
- rtex->surface.bpe, rtex->resource.b.b.nr_samples,
- rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format));
-
- if (rscreen->chip_class >= GFX9) {
- u_log_printf(log, " Surf: size=%"PRIu64", slice_size=%"PRIu64", "
- "alignment=%u, swmode=%u, epitch=%u, pitch=%u\n",
- rtex->surface.surf_size,
- rtex->surface.u.gfx9.surf_slice_size,
- rtex->surface.surf_alignment,
- rtex->surface.u.gfx9.surf.swizzle_mode,
- rtex->surface.u.gfx9.surf.epitch,
- rtex->surface.u.gfx9.surf_pitch);
-
- if (rtex->fmask.size) {
- u_log_printf(log, " FMASK: offset=%"PRIu64", size=%"PRIu64", "
- "alignment=%u, swmode=%u, epitch=%u\n",
- rtex->fmask.offset,
- rtex->surface.u.gfx9.fmask_size,
- rtex->surface.u.gfx9.fmask_alignment,
- rtex->surface.u.gfx9.fmask.swizzle_mode,
- rtex->surface.u.gfx9.fmask.epitch);
- }
-
- if (rtex->cmask.size) {
- u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", "
- "alignment=%u, rb_aligned=%u, pipe_aligned=%u\n",
- rtex->cmask.offset,
- rtex->surface.u.gfx9.cmask_size,
- rtex->surface.u.gfx9.cmask_alignment,
- rtex->surface.u.gfx9.cmask.rb_aligned,
- rtex->surface.u.gfx9.cmask.pipe_aligned);
- }
-
- if (rtex->htile_offset) {
- u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
- "rb_aligned=%u, pipe_aligned=%u\n",
- rtex->htile_offset,
- rtex->surface.htile_size,
- rtex->surface.htile_alignment,
- rtex->surface.u.gfx9.htile.rb_aligned,
- rtex->surface.u.gfx9.htile.pipe_aligned);
- }
-
- if (rtex->dcc_offset) {
- u_log_printf(log, " DCC: offset=%"PRIu64", size=%"PRIu64", "
- "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n",
- rtex->dcc_offset, rtex->surface.dcc_size,
- rtex->surface.dcc_alignment,
- rtex->surface.u.gfx9.dcc_pitch_max,
- rtex->surface.num_dcc_levels);
- }
-
- if (rtex->surface.u.gfx9.stencil_offset) {
- u_log_printf(log, " Stencil: offset=%"PRIu64", swmode=%u, epitch=%u\n",
- rtex->surface.u.gfx9.stencil_offset,
- rtex->surface.u.gfx9.stencil.swizzle_mode,
- rtex->surface.u.gfx9.stencil.epitch);
- }
- return;
- }
-
- u_log_printf(log, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, "
- "bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
- rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.u.legacy.bankw,
- rtex->surface.u.legacy.bankh, rtex->surface.u.legacy.num_banks, rtex->surface.u.legacy.mtilea,
- rtex->surface.u.legacy.tile_split, rtex->surface.u.legacy.pipe_config,
- (rtex->surface.flags & RADEON_SURF_SCANOUT) != 0);
-
- if (rtex->fmask.size)
- u_log_printf(log, " FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, "
- "bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n",
- rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment,
- rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height,
- rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index);
-
- if (rtex->cmask.size)
- u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
- "slice_tile_max=%u\n",
- rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment,
- rtex->cmask.slice_tile_max);
-
- if (rtex->htile_offset)
- u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", "
- "alignment=%u, TC_compatible = %u\n",
- rtex->htile_offset, rtex->surface.htile_size,
- rtex->surface.htile_alignment,
- rtex->tc_compatible_htile);
-
- if (rtex->dcc_offset) {
- u_log_printf(log, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%u\n",
- rtex->dcc_offset, rtex->surface.dcc_size,
- rtex->surface.dcc_alignment);
- for (i = 0; i <= rtex->resource.b.b.last_level; i++)
- u_log_printf(log, " DCCLevel[%i]: enabled=%u, offset=%"PRIu64", "
- "fast_clear_size=%"PRIu64"\n",
- i, i < rtex->surface.num_dcc_levels,
- rtex->surface.u.legacy.level[i].dcc_offset,
- rtex->surface.u.legacy.level[i].dcc_fast_clear_size);
- }
-
- for (i = 0; i <= rtex->resource.b.b.last_level; i++)
- u_log_printf(log, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", "
- "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
- "mode=%u, tiling_index = %u\n",
- i, rtex->surface.u.legacy.level[i].offset,
- rtex->surface.u.legacy.level[i].slice_size,
- u_minify(rtex->resource.b.b.width0, i),
- u_minify(rtex->resource.b.b.height0, i),
- u_minify(rtex->resource.b.b.depth0, i),
- rtex->surface.u.legacy.level[i].nblk_x,
- rtex->surface.u.legacy.level[i].nblk_y,
- rtex->surface.u.legacy.level[i].mode,
- rtex->surface.u.legacy.tiling_index[i]);
-
- if (rtex->surface.has_stencil) {
- u_log_printf(log, " StencilLayout: tilesplit=%u\n",
- rtex->surface.u.legacy.stencil_tile_split);
- for (i = 0; i <= rtex->resource.b.b.last_level; i++) {
- u_log_printf(log, " StencilLevel[%i]: offset=%"PRIu64", "
- "slice_size=%"PRIu64", npix_x=%u, "
- "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
- "mode=%u, tiling_index = %u\n",
- i, rtex->surface.u.legacy.stencil_level[i].offset,
- rtex->surface.u.legacy.stencil_level[i].slice_size,
- u_minify(rtex->resource.b.b.width0, i),
- u_minify(rtex->resource.b.b.height0, i),
- u_minify(rtex->resource.b.b.depth0, i),
- rtex->surface.u.legacy.stencil_level[i].nblk_x,
- rtex->surface.u.legacy.stencil_level[i].nblk_y,
- rtex->surface.u.legacy.stencil_level[i].mode,
- rtex->surface.u.legacy.stencil_tiling_index[i]);
- }
- }
-}
-
-/* Common processing for r600_texture_create and r600_texture_from_handle */
-static struct r600_texture *
-r600_texture_create_object(struct pipe_screen *screen,
- const struct pipe_resource *base,
- struct pb_buffer *buf,
- struct radeon_surf *surface)
-{
- struct r600_texture *rtex;
- struct r600_resource *resource;
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
-
- rtex = CALLOC_STRUCT(r600_texture);
- if (!rtex)
- return NULL;
-
- resource = &rtex->resource;
- resource->b.b = *base;
- resource->b.b.next = NULL;
- resource->b.vtbl = &r600_texture_vtbl;
- pipe_reference_init(&resource->b.b.reference, 1);
- resource->b.b.screen = screen;
-
- /* don't include stencil-only formats which we don't support for rendering */
- rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format));
-
- rtex->surface = *surface;
- rtex->size = rtex->surface.surf_size;
-
- rtex->tc_compatible_htile = rtex->surface.htile_size != 0 &&
- (rtex->surface.flags &
- RADEON_SURF_TC_COMPATIBLE_HTILE);
-
- /* TC-compatible HTILE:
- * - VI only supports Z32_FLOAT.
- * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
- if (rtex->tc_compatible_htile) {
- if (rscreen->chip_class >= GFX9 &&
- base->format == PIPE_FORMAT_Z16_UNORM)
- rtex->db_render_format = base->format;
- else {
- rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
- rtex->upgraded_depth = base->format != PIPE_FORMAT_Z32_FLOAT &&
- base->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT;
- }
- } else {
- rtex->db_render_format = base->format;
- }
-
- /* Tiled depth textures utilize the non-displayable tile order.
- * This must be done after r600_setup_surface.
- * Applies to R600-Cayman. */
- rtex->non_disp_tiling = rtex->is_depth && rtex->surface.u.legacy.level[0].mode >= RADEON_SURF_MODE_1D;
- /* Applies to GCN. */
- rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
-
- /* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers
- * between frames, so the only thing that can enable separate DCC
- * with DRI2 is multiple slow clears within a frame.
- */
- rtex->ps_draw_ratio = 0;
-
- if (rtex->is_depth) {
- if (rscreen->chip_class >= GFX9) {
- rtex->can_sample_z = true;
- rtex->can_sample_s = true;
- } else {
- rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
- rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
- }
-
- if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
- R600_RESOURCE_FLAG_FLUSHED_DEPTH))) {
- rtex->db_compatible = true;
-
- if (!(rscreen->debug_flags & DBG(NO_HYPERZ)))
- r600_texture_allocate_htile(rscreen, rtex);
- }
- } else {
- if (base->nr_samples > 1) {
- if (!buf) {
- r600_texture_allocate_fmask(rscreen, rtex);
- r600_texture_allocate_cmask(rscreen, rtex);
- rtex->cmask_buffer = &rtex->resource;
- }
- if (!rtex->fmask.size || !rtex->cmask.size) {
- FREE(rtex);
- return NULL;
- }
- }
-
- /* Shared textures must always set up DCC here.
- * If it's not present, it will be disabled by
- * apply_opaque_metadata later.
- */
- if (rtex->surface.dcc_size &&
- (buf || !(rscreen->debug_flags & DBG(NO_DCC))) &&
- !(rtex->surface.flags & RADEON_SURF_SCANOUT)) {
- /* Reserve space for the DCC buffer. */
- rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment);
- rtex->size = rtex->dcc_offset + rtex->surface.dcc_size;
- }
- }
-
- /* Now create the backing buffer. */
- if (!buf) {
- si_init_resource_fields(rscreen, resource, rtex->size,
- rtex->surface.surf_alignment);
-
- if (!si_alloc_resource(rscreen, resource)) {
- FREE(rtex);
- return NULL;
- }
- } else {
- resource->buf = buf;
- resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf);
- resource->bo_size = buf->size;
- resource->bo_alignment = buf->alignment;
- resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf);
- if (resource->domains & RADEON_DOMAIN_VRAM)
- resource->vram_usage = buf->size;
- else if (resource->domains & RADEON_DOMAIN_GTT)
- resource->gart_usage = buf->size;
- }
-
- if (rtex->cmask.size) {
- /* Initialize the cmask to 0xCC (= compressed state). */
- si_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
- rtex->cmask.offset, rtex->cmask.size,
- 0xCCCCCCCC);
- }
- if (rtex->htile_offset) {
- uint32_t clear_value = 0;
-
- if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile)
- clear_value = 0x0000030F;
-
- si_screen_clear_buffer(rscreen, &rtex->resource.b.b,
- rtex->htile_offset,
- rtex->surface.htile_size,
- clear_value);
- }
-
- /* Initialize DCC only if the texture is not being imported. */
- if (!buf && rtex->dcc_offset) {
- si_screen_clear_buffer(rscreen, &rtex->resource.b.b,
- rtex->dcc_offset,
- rtex->surface.dcc_size,
- 0xFFFFFFFF);
- }
-
- /* Initialize the CMASK base register value. */
- rtex->cmask.base_address_reg =
- (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
-
- if (rscreen->debug_flags & DBG(VM)) {
- fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n",
- rtex->resource.gpu_address,
- rtex->resource.gpu_address + rtex->resource.buf->size,
- base->width0, base->height0, util_max_layer(base, 0)+1, base->last_level+1,
- base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
- }
-
- if (rscreen->debug_flags & DBG(TEX)) {
- puts("Texture:");
- struct u_log_context log;
- u_log_context_init(&log);
- si_print_texture_info(rscreen, rtex, &log);
- u_log_new_page_print(&log, stdout);
- fflush(stdout);
- u_log_context_destroy(&log);
- }
-
- return rtex;
-}
-
-static enum radeon_surf_mode
-r600_choose_tiling(struct r600_common_screen *rscreen,
- const struct pipe_resource *templ)
-{
- const struct util_format_description *desc = util_format_description(templ->format);
- bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING;
- bool is_depth_stencil = util_format_is_depth_or_stencil(templ->format) &&
- !(templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
-
- /* MSAA resources must be 2D tiled. */
- if (templ->nr_samples > 1)
- return RADEON_SURF_MODE_2D;
-
- /* Transfer resources should be linear. */
- if (templ->flags & R600_RESOURCE_FLAG_TRANSFER)
- return RADEON_SURF_MODE_LINEAR_ALIGNED;
-
- /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on VI,
- * which requires 2D tiling.
- */
- if (rscreen->chip_class == VI &&
- is_depth_stencil &&
- (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY))
- return RADEON_SURF_MODE_2D;
-
- /* Handle common candidates for the linear mode.
- * Compressed textures and DB surfaces must always be tiled.
- */
- if (!force_tiling &&
- !is_depth_stencil &&
- !util_format_is_compressed(templ->format)) {
- if (rscreen->debug_flags & DBG(NO_TILING))
- return RADEON_SURF_MODE_LINEAR_ALIGNED;
-
- /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */
- if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
- return RADEON_SURF_MODE_LINEAR_ALIGNED;
-
- /* Cursors are linear on SI.
- * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
- if (templ->bind & PIPE_BIND_CURSOR)
- return RADEON_SURF_MODE_LINEAR_ALIGNED;
-
- if (templ->bind & PIPE_BIND_LINEAR)
- return RADEON_SURF_MODE_LINEAR_ALIGNED;
-
- /* Textures with a very small height are recommended to be linear. */
- if (templ->target == PIPE_TEXTURE_1D ||
- templ->target == PIPE_TEXTURE_1D_ARRAY ||
- /* Only very thin and long 2D textures should benefit from
- * linear_aligned. */
- (templ->width0 > 8 && templ->height0 <= 2))
- return RADEON_SURF_MODE_LINEAR_ALIGNED;
-
- /* Textures likely to be mapped often. */
- if (templ->usage == PIPE_USAGE_STAGING ||
- templ->usage == PIPE_USAGE_STREAM)
- return RADEON_SURF_MODE_LINEAR_ALIGNED;
- }
-
- /* Make small textures 1D tiled. */
- if (templ->width0 <= 16 || templ->height0 <= 16 ||
- (rscreen->debug_flags & DBG(NO_2D_TILING)))
- return RADEON_SURF_MODE_1D;
-
- /* The allocator will switch to 1D if needed. */
- return RADEON_SURF_MODE_2D;
-}
-
-struct pipe_resource *si_texture_create(struct pipe_screen *screen,
- const struct pipe_resource *templ)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
- struct radeon_surf surface = {0};
- bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH;
- bool tc_compatible_htile =
- rscreen->chip_class >= VI &&
- (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
- !(rscreen->debug_flags & DBG(NO_HYPERZ)) &&
- !is_flushed_depth &&
- templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
- util_format_is_depth_or_stencil(templ->format);
-
- int r;
-
- r = r600_init_surface(rscreen, &surface, templ,
- r600_choose_tiling(rscreen, templ), 0, 0,
- false, false, is_flushed_depth,
- tc_compatible_htile);
- if (r) {
- return NULL;
- }
-
- return (struct pipe_resource *)
- r600_texture_create_object(screen, templ, NULL, &surface);
-}
-
-static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
- const struct pipe_resource *templ,
- struct winsys_handle *whandle,
- unsigned usage)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
- struct pb_buffer *buf = NULL;
- unsigned stride = 0, offset = 0;
- enum radeon_surf_mode array_mode;
- struct radeon_surf surface = {};
- int r;
- struct radeon_bo_metadata metadata = {};
- struct r600_texture *rtex;
- bool is_scanout;
-
- /* Support only 2D textures without mipmaps */
- if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
- templ->depth0 != 1 || templ->last_level != 0)
- return NULL;
-
- buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset);
- if (!buf)
- return NULL;
-
- rscreen->ws->buffer_get_metadata(buf, &metadata);
- r600_surface_import_metadata(rscreen, &surface, &metadata,
- &array_mode, &is_scanout);
-
- r = r600_init_surface(rscreen, &surface, templ, array_mode, stride,
- offset, true, is_scanout, false, false);
- if (r) {
- return NULL;
- }
-
- rtex = r600_texture_create_object(screen, templ, buf, &surface);
- if (!rtex)
- return NULL;
-
- rtex->resource.b.is_shared = true;
- rtex->resource.external_usage = usage;
-
- if (rscreen->apply_opaque_metadata)
- rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
-
- assert(rtex->surface.tile_swizzle == 0);
- return &rtex->resource.b.b;
-}
-
-bool si_init_flushed_depth_texture(struct pipe_context *ctx,
- struct pipe_resource *texture,
- struct r600_texture **staging)
-{
- struct r600_texture *rtex = (struct r600_texture*)texture;
- struct pipe_resource resource;
- struct r600_texture **flushed_depth_texture = staging ?
- staging : &rtex->flushed_depth_texture;
- enum pipe_format pipe_format = texture->format;
-
- if (!staging) {
- if (rtex->flushed_depth_texture)
- return true; /* it's ready */
-
- if (!rtex->can_sample_z && rtex->can_sample_s) {
- switch (pipe_format) {
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- /* Save memory by not allocating the S plane. */
- pipe_format = PIPE_FORMAT_Z32_FLOAT;
- break;
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- case PIPE_FORMAT_S8_UINT_Z24_UNORM:
- /* Save memory bandwidth by not copying the
- * stencil part during flush.
- *
- * This potentially increases memory bandwidth
- * if an application uses both Z and S texturing
- * simultaneously (a flushed Z24S8 texture
- * would be stored compactly), but how often
- * does that really happen?
- */
- pipe_format = PIPE_FORMAT_Z24X8_UNORM;
- break;
- default:;
- }
- } else if (!rtex->can_sample_s && rtex->can_sample_z) {
- assert(util_format_has_stencil(util_format_description(pipe_format)));
-
- /* DB->CB copies to an 8bpp surface don't work. */
- pipe_format = PIPE_FORMAT_X24S8_UINT;
- }
- }
-
- memset(&resource, 0, sizeof(resource));
- resource.target = texture->target;
- resource.format = pipe_format;
- resource.width0 = texture->width0;
- resource.height0 = texture->height0;
- resource.depth0 = texture->depth0;
- resource.array_size = texture->array_size;
- resource.last_level = texture->last_level;
- resource.nr_samples = texture->nr_samples;
- resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
- resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
- resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH;
-
- if (staging)
- resource.flags |= R600_RESOURCE_FLAG_TRANSFER;
-
- *flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource);
- if (*flushed_depth_texture == NULL) {
- R600_ERR("failed to create temporary texture to hold flushed depth\n");
- return false;
- }
-
- (*flushed_depth_texture)->non_disp_tiling = false;
- return true;
-}
-
-/**
- * Initialize the pipe_resource descriptor to be of the same size as the box,
- * which is supposed to hold a subregion of the texture "orig" at the given
- * mipmap level.
- */
-static void r600_init_temp_resource_from_box(struct pipe_resource *res,
- struct pipe_resource *orig,
- const struct pipe_box *box,
- unsigned level, unsigned flags)
-{
- memset(res, 0, sizeof(*res));
- res->format = orig->format;
- res->width0 = box->width;
- res->height0 = box->height;
- res->depth0 = 1;
- res->array_size = 1;
- res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
- res->flags = flags;
-
- /* We must set the correct texture target and dimensions for a 3D box. */
- if (box->depth > 1 && util_max_layer(orig, level) > 0) {
- res->target = PIPE_TEXTURE_2D_ARRAY;
- res->array_size = box->depth;
- } else {
- res->target = PIPE_TEXTURE_2D;
- }
-}
-
-static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen,
- struct r600_texture *rtex,
- unsigned transfer_usage,
- const struct pipe_box *box)
-{
- return !rtex->resource.b.is_shared &&
- !(transfer_usage & PIPE_TRANSFER_READ) &&
- rtex->resource.b.b.last_level == 0 &&
- util_texrange_covers_whole_level(&rtex->resource.b.b, 0,
- box->x, box->y, box->z,
- box->width, box->height,
- box->depth);
-}
-
-static void r600_texture_invalidate_storage(struct r600_common_context *rctx,
- struct r600_texture *rtex)
-{
- struct r600_common_screen *rscreen = rctx->screen;
-
- /* There is no point in discarding depth and tiled buffers. */
- assert(!rtex->is_depth);
- assert(rtex->surface.is_linear);
-
- /* Reallocate the buffer in the same pipe_resource. */
- si_alloc_resource(rscreen, &rtex->resource);
-
- /* Initialize the CMASK base address (needed even without CMASK). */
- rtex->cmask.base_address_reg =
- (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
-
- p_atomic_inc(&rscreen->dirty_tex_counter);
-
- rctx->num_alloc_tex_transfer_bytes += rtex->size;
-}
-
-static void *r600_texture_transfer_map(struct pipe_context *ctx,
- struct pipe_resource *texture,
- unsigned level,
- unsigned usage,
- const struct pipe_box *box,
- struct pipe_transfer **ptransfer)
-{
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- struct r600_texture *rtex = (struct r600_texture*)texture;
- struct r600_transfer *trans;
- struct r600_resource *buf;
- unsigned offset = 0;
- char *map;
- bool use_staging_texture = false;
-
- assert(!(texture->flags & R600_RESOURCE_FLAG_TRANSFER));
- assert(box->width && box->height && box->depth);
-
- /* Depth textures use staging unconditionally. */
- if (!rtex->is_depth) {
- /* Degrade the tile mode if we get too many transfers on APUs.
- * On dGPUs, the staging texture is always faster.
- * Only count uploads that are at least 4x4 pixels large.
- */
- if (!rctx->screen->info.has_dedicated_vram &&
- level == 0 &&
- box->width >= 4 && box->height >= 4 &&
- p_atomic_inc_return(&rtex->num_level0_transfers) == 10) {
- bool can_invalidate =
- r600_can_invalidate_texture(rctx->screen, rtex,
- usage, box);
-
- r600_reallocate_texture_inplace(rctx, rtex,
- PIPE_BIND_LINEAR,
- can_invalidate);
- }
-
- /* Tiled textures need to be converted into a linear texture for CPU
- * access. The staging texture is always linear and is placed in GART.
- *
- * Reading from VRAM or GTT WC is slow, always use the staging
- * texture in this case.
- *
- * Use the staging texture for uploads if the underlying BO
- * is busy.
- */
- if (!rtex->surface.is_linear)
- use_staging_texture = true;
- else if (usage & PIPE_TRANSFER_READ)
- use_staging_texture =
- rtex->resource.domains & RADEON_DOMAIN_VRAM ||
- rtex->resource.flags & RADEON_FLAG_GTT_WC;
- /* Write & linear only: */
- else if (si_rings_is_buffer_referenced(rctx, rtex->resource.buf,
- RADEON_USAGE_READWRITE) ||
- !rctx->ws->buffer_wait(rtex->resource.buf, 0,
- RADEON_USAGE_READWRITE)) {
- /* It's busy. */
- if (r600_can_invalidate_texture(rctx->screen, rtex,
- usage, box))
- r600_texture_invalidate_storage(rctx, rtex);
- else
- use_staging_texture = true;
- }
- }
-
- trans = CALLOC_STRUCT(r600_transfer);
- if (!trans)
- return NULL;
- pipe_resource_reference(&trans->b.b.resource, texture);
- trans->b.b.level = level;
- trans->b.b.usage = usage;
- trans->b.b.box = *box;
-
- if (rtex->is_depth) {
- struct r600_texture *staging_depth;
-
- if (rtex->resource.b.b.nr_samples > 1) {
- /* MSAA depth buffers need to be converted to single sample buffers.
- *
- * Mapping MSAA depth buffers can occur if ReadPixels is called
- * with a multisample GLX visual.
- *
- * First downsample the depth buffer to a temporary texture,
- * then decompress the temporary one to staging.
- *
- * Only the region being mapped is transfered.
- */
- struct pipe_resource resource;
-
- r600_init_temp_resource_from_box(&resource, texture, box, level, 0);
-
- if (!si_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
- R600_ERR("failed to create temporary texture to hold untiled copy\n");
- FREE(trans);
- return NULL;
- }
-
- if (usage & PIPE_TRANSFER_READ) {
- struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
- if (!temp) {
- R600_ERR("failed to create a temporary depth texture\n");
- FREE(trans);
- return NULL;
- }
-
- r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
- rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
- 0, 0, 0, box->depth, 0, 0);
- pipe_resource_reference(&temp, NULL);
- }
-
- /* Just get the strides. */
- r600_texture_get_offset(rctx->screen, staging_depth, level, NULL,
- &trans->b.b.stride,
- &trans->b.b.layer_stride);
- } else {
- /* XXX: only readback the rectangle which is being mapped? */
- /* XXX: when discard is true, no need to read back from depth texture */
- if (!si_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
- R600_ERR("failed to create temporary texture to hold untiled copy\n");
- FREE(trans);
- return NULL;
- }
-
- rctx->blit_decompress_depth(ctx, rtex, staging_depth,
- level, level,
- box->z, box->z + box->depth - 1,
- 0, 0);
-
- offset = r600_texture_get_offset(rctx->screen, staging_depth,
- level, box,
- &trans->b.b.stride,
- &trans->b.b.layer_stride);
- }
-
- trans->staging = (struct r600_resource*)staging_depth;
- buf = trans->staging;
- } else if (use_staging_texture) {
- struct pipe_resource resource;
- struct r600_texture *staging;
-
- r600_init_temp_resource_from_box(&resource, texture, box, level,
- R600_RESOURCE_FLAG_TRANSFER);
- resource.usage = (usage & PIPE_TRANSFER_READ) ?
- PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
-
- /* Create the temporary texture. */
- staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource);
- if (!staging) {
- R600_ERR("failed to create temporary texture to hold untiled copy\n");
- FREE(trans);
- return NULL;
- }
- trans->staging = &staging->resource;
-
- /* Just get the strides. */
- r600_texture_get_offset(rctx->screen, staging, 0, NULL,
- &trans->b.b.stride,
- &trans->b.b.layer_stride);
-
- if (usage & PIPE_TRANSFER_READ)
- r600_copy_to_staging_texture(ctx, trans);
- else
- usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
-
- buf = trans->staging;
- } else {
- /* the resource is mapped directly */
- offset = r600_texture_get_offset(rctx->screen, rtex, level, box,
- &trans->b.b.stride,
- &trans->b.b.layer_stride);
- buf = &rtex->resource;
- }
-
- if (!(map = si_buffer_map_sync_with_rings(rctx, buf, usage))) {
- r600_resource_reference(&trans->staging, NULL);
- FREE(trans);
- return NULL;
- }
-
- *ptransfer = &trans->b.b;
- return map + offset;
-}
-
-static void r600_texture_transfer_unmap(struct pipe_context *ctx,
- struct pipe_transfer* transfer)
-{
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
- struct pipe_resource *texture = transfer->resource;
- struct r600_texture *rtex = (struct r600_texture*)texture;
-
- if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
- if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) {
- ctx->resource_copy_region(ctx, texture, transfer->level,
- transfer->box.x, transfer->box.y, transfer->box.z,
- &rtransfer->staging->b.b, transfer->level,
- &transfer->box);
- } else {
- r600_copy_from_staging_texture(ctx, rtransfer);
- }
- }
-
- if (rtransfer->staging) {
- rctx->num_alloc_tex_transfer_bytes += rtransfer->staging->buf->size;
- r600_resource_reference(&rtransfer->staging, NULL);
- }
-
- /* Heuristic for {upload, draw, upload, draw, ..}:
- *
- * Flush the gfx IB if we've allocated too much texture storage.
- *
- * The idea is that we don't want to build IBs that use too much
- * memory and put pressure on the kernel memory manager and we also
- * want to make temporary and invalidated buffers go idle ASAP to
- * decrease the total memory usage or make them reusable. The memory
- * usage will be slightly higher than given here because of the buffer
- * cache in the winsys.
- *
- * The result is that the kernel memory manager is never a bottleneck.
- */
- if (rctx->num_alloc_tex_transfer_bytes > rctx->screen->info.gart_size / 4) {
- rctx->gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
- rctx->num_alloc_tex_transfer_bytes = 0;
- }
-
- pipe_resource_reference(&transfer->resource, NULL);
- FREE(transfer);
-}
-
-static const struct u_resource_vtbl r600_texture_vtbl =
-{
- NULL, /* get_handle */
- r600_texture_destroy, /* resource_destroy */
- r600_texture_transfer_map, /* transfer_map */
- u_default_transfer_flush_region, /* transfer_flush_region */
- r600_texture_transfer_unmap, /* transfer_unmap */
-};
-
-/* DCC channel type categories within which formats can be reinterpreted
- * while keeping the same DCC encoding. The swizzle must also match. */
-enum dcc_channel_type {
- dcc_channel_float32,
- dcc_channel_uint32,
- dcc_channel_sint32,
- dcc_channel_float16,
- dcc_channel_uint16,
- dcc_channel_sint16,
- dcc_channel_uint_10_10_10_2,
- dcc_channel_uint8,
- dcc_channel_sint8,
- dcc_channel_incompatible,
-};
-
-/* Return the type of DCC encoding. */
-static enum dcc_channel_type
-vi_get_dcc_channel_type(const struct util_format_description *desc)
-{
- int i;
-
- /* Find the first non-void channel. */
- for (i = 0; i < desc->nr_channels; i++)
- if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
- break;
- if (i == desc->nr_channels)
- return dcc_channel_incompatible;
-
- switch (desc->channel[i].size) {
- case 32:
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
- return dcc_channel_float32;
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
- return dcc_channel_uint32;
- return dcc_channel_sint32;
- case 16:
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
- return dcc_channel_float16;
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
- return dcc_channel_uint16;
- return dcc_channel_sint16;
- case 10:
- return dcc_channel_uint_10_10_10_2;
- case 8:
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
- return dcc_channel_uint8;
- return dcc_channel_sint8;
- default:
- return dcc_channel_incompatible;
- }
-}
-
-/* Return if it's allowed to reinterpret one format as another with DCC enabled. */
-bool vi_dcc_formats_compatible(enum pipe_format format1,
- enum pipe_format format2)
-{
- const struct util_format_description *desc1, *desc2;
- enum dcc_channel_type type1, type2;
- int i;
-
- if (format1 == format2)
- return true;
-
- desc1 = util_format_description(format1);
- desc2 = util_format_description(format2);
-
- if (desc1->nr_channels != desc2->nr_channels)
- return false;
-
- /* Swizzles must be the same. */
- for (i = 0; i < desc1->nr_channels; i++)
- if (desc1->swizzle[i] <= PIPE_SWIZZLE_W &&
- desc2->swizzle[i] <= PIPE_SWIZZLE_W &&
- desc1->swizzle[i] != desc2->swizzle[i])
- return false;
-
- type1 = vi_get_dcc_channel_type(desc1);
- type2 = vi_get_dcc_channel_type(desc2);
-
- return type1 != dcc_channel_incompatible &&
- type2 != dcc_channel_incompatible &&
- type1 == type2;
-}
-
-bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex,
- unsigned level,
- enum pipe_format view_format)
-{
- struct r600_texture *rtex = (struct r600_texture *)tex;
-
- return vi_dcc_enabled(rtex, level) &&
- !vi_dcc_formats_compatible(tex->format, view_format);
-}
-
-/* This can't be merged with the above function, because
- * vi_dcc_formats_compatible should be called only when DCC is enabled. */
-void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx,
- struct pipe_resource *tex,
- unsigned level,
- enum pipe_format view_format)
-{
- struct r600_texture *rtex = (struct r600_texture *)tex;
-
- if (vi_dcc_formats_are_incompatible(tex, level, view_format))
- if (!si_texture_disable_dcc(rctx, (struct r600_texture*)tex))
- rctx->decompress_dcc(&rctx->b, rtex);
-}
-
-struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
- struct pipe_resource *texture,
- const struct pipe_surface *templ,
- unsigned width0, unsigned height0,
- unsigned width, unsigned height)
-{
- struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
-
- if (!surface)
- return NULL;
-
- assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level));
- assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level));
-
- pipe_reference_init(&surface->base.reference, 1);
- pipe_resource_reference(&surface->base.texture, texture);
- surface->base.context = pipe;
- surface->base.format = templ->format;
- surface->base.width = width;
- surface->base.height = height;
- surface->base.u = templ->u;
-
- surface->width0 = width0;
- surface->height0 = height0;
-
- surface->dcc_incompatible =
- texture->target != PIPE_BUFFER &&
- vi_dcc_formats_are_incompatible(texture, templ->u.tex.level,
- templ->format);
- return &surface->base;
-}
-
-static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
- struct pipe_resource *tex,
- const struct pipe_surface *templ)
-{
- unsigned level = templ->u.tex.level;
- unsigned width = u_minify(tex->width0, level);
- unsigned height = u_minify(tex->height0, level);
- unsigned width0 = tex->width0;
- unsigned height0 = tex->height0;
-
- if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
- const struct util_format_description *tex_desc
- = util_format_description(tex->format);
- const struct util_format_description *templ_desc
- = util_format_description(templ->format);
-
- assert(tex_desc->block.bits == templ_desc->block.bits);
-
- /* Adjust size of surface if and only if the block width or
- * height is changed. */
- if (tex_desc->block.width != templ_desc->block.width ||
- tex_desc->block.height != templ_desc->block.height) {
- unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
- unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
-
- width = nblks_x * templ_desc->block.width;
- height = nblks_y * templ_desc->block.height;
-
- width0 = util_format_get_nblocksx(tex->format, width0);
- height0 = util_format_get_nblocksy(tex->format, height0);
- }
- }
-
- return si_create_surface_custom(pipe, tex, templ,
- width0, height0,
- width, height);
-}
-
-static void r600_surface_destroy(struct pipe_context *pipe,
- struct pipe_surface *surface)
-{
- struct r600_surface *surf = (struct r600_surface*)surface;
- r600_resource_reference(&surf->cb_buffer_fmask, NULL);
- r600_resource_reference(&surf->cb_buffer_cmask, NULL);
- pipe_resource_reference(&surface->texture, NULL);
- FREE(surface);
-}
-
-static void r600_clear_texture(struct pipe_context *pipe,
- struct pipe_resource *tex,
- unsigned level,
- const struct pipe_box *box,
- const void *data)
-{
- struct pipe_screen *screen = pipe->screen;
- struct r600_texture *rtex = (struct r600_texture*)tex;
- struct pipe_surface tmpl = {{0}};
- struct pipe_surface *sf;
- const struct util_format_description *desc =
- util_format_description(tex->format);
-
- tmpl.format = tex->format;
- tmpl.u.tex.first_layer = box->z;
- tmpl.u.tex.last_layer = box->z + box->depth - 1;
- tmpl.u.tex.level = level;
- sf = pipe->create_surface(pipe, tex, &tmpl);
- if (!sf)
- return;
-
- if (rtex->is_depth) {
- unsigned clear;
- float depth;
- uint8_t stencil = 0;
-
- /* Depth is always present. */
- clear = PIPE_CLEAR_DEPTH;
- desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
-
- if (rtex->surface.has_stencil) {
- clear |= PIPE_CLEAR_STENCIL;
- desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
- }
-
- pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil,
- box->x, box->y,
- box->width, box->height, false);
- } else {
- union pipe_color_union color;
-
- /* pipe_color_union requires the full vec4 representation. */
- if (util_format_is_pure_uint(tex->format))
- desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1);
- else if (util_format_is_pure_sint(tex->format))
- desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1);
- else
- desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1);
-
- if (screen->is_format_supported(screen, tex->format,
- tex->target, 0,
- PIPE_BIND_RENDER_TARGET)) {
- pipe->clear_render_target(pipe, sf, &color,
- box->x, box->y,
- box->width, box->height, false);
- } else {
- /* Software fallback - just for R9G9B9E5_FLOAT */
- util_clear_render_target(pipe, sf, &color,
- box->x, box->y,
- box->width, box->height);
- }
- }
- pipe_surface_reference(&sf, NULL);
-}
-
-unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap)
-{
- const struct util_format_description *desc = util_format_description(format);
-
-#define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
-
- if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
- return V_028C70_SWAP_STD;
-
- if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
- return ~0U;
-
- switch (desc->nr_channels) {
- case 1:
- if (HAS_SWIZZLE(0,X))
- return V_028C70_SWAP_STD; /* X___ */
- else if (HAS_SWIZZLE(3,X))
- return V_028C70_SWAP_ALT_REV; /* ___X */
- break;
- case 2:
- if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
- (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
- (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
- return V_028C70_SWAP_STD; /* XY__ */
- else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
- (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
- (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
- /* YX__ */
- return (do_endian_swap ? V_028C70_SWAP_STD : V_028C70_SWAP_STD_REV);
- else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
- return V_028C70_SWAP_ALT; /* X__Y */
- else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
- return V_028C70_SWAP_ALT_REV; /* Y__X */
- break;
- case 3:
- if (HAS_SWIZZLE(0,X))
- return (do_endian_swap ? V_028C70_SWAP_STD_REV : V_028C70_SWAP_STD);
- else if (HAS_SWIZZLE(0,Z))
- return V_028C70_SWAP_STD_REV; /* ZYX */
- break;
- case 4:
- /* check the middle channels, the 1st and 4th channel can be NONE */
- if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) {
- return V_028C70_SWAP_STD; /* XYZW */
- } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) {
- return V_028C70_SWAP_STD_REV; /* WZYX */
- } else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) {
- return V_028C70_SWAP_ALT; /* ZYXW */
- } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) {
- /* YZWX */
- if (desc->is_array)
- return V_028C70_SWAP_ALT_REV;
- else
- return (do_endian_swap ? V_028C70_SWAP_ALT : V_028C70_SWAP_ALT_REV);
- }
- break;
- }
- return ~0U;
-}
-
-/* PIPELINE_STAT-BASED DCC ENABLEMENT FOR DISPLAYABLE SURFACES */
-
-static void vi_dcc_clean_up_context_slot(struct r600_common_context *rctx,
- int slot)
-{
- int i;
-
- if (rctx->dcc_stats[slot].query_active)
- vi_separate_dcc_stop_query(&rctx->b,
- rctx->dcc_stats[slot].tex);
-
- for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats[slot].ps_stats); i++)
- if (rctx->dcc_stats[slot].ps_stats[i]) {
- rctx->b.destroy_query(&rctx->b,
- rctx->dcc_stats[slot].ps_stats[i]);
- rctx->dcc_stats[slot].ps_stats[i] = NULL;
- }
-
- r600_texture_reference(&rctx->dcc_stats[slot].tex, NULL);
-}
-
-/**
- * Return the per-context slot where DCC statistics queries for the texture live.
- */
-static unsigned vi_get_context_dcc_stats_index(struct r600_common_context *rctx,
- struct r600_texture *tex)
-{
- int i, empty_slot = -1;
-
- /* Remove zombie textures (textures kept alive by this array only). */
- for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++)
- if (rctx->dcc_stats[i].tex &&
- rctx->dcc_stats[i].tex->resource.b.b.reference.count == 1)
- vi_dcc_clean_up_context_slot(rctx, i);
-
- /* Find the texture. */
- for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) {
- /* Return if found. */
- if (rctx->dcc_stats[i].tex == tex) {
- rctx->dcc_stats[i].last_use_timestamp = os_time_get();
- return i;
- }
-
- /* Record the first seen empty slot. */
- if (empty_slot == -1 && !rctx->dcc_stats[i].tex)
- empty_slot = i;
- }
-
- /* Not found. Remove the oldest member to make space in the array. */
- if (empty_slot == -1) {
- int oldest_slot = 0;
-
- /* Find the oldest slot. */
- for (i = 1; i < ARRAY_SIZE(rctx->dcc_stats); i++)
- if (rctx->dcc_stats[oldest_slot].last_use_timestamp >
- rctx->dcc_stats[i].last_use_timestamp)
- oldest_slot = i;
-
- /* Clean up the oldest slot. */
- vi_dcc_clean_up_context_slot(rctx, oldest_slot);
- empty_slot = oldest_slot;
- }
-
- /* Add the texture to the new slot. */
- r600_texture_reference(&rctx->dcc_stats[empty_slot].tex, tex);
- rctx->dcc_stats[empty_slot].last_use_timestamp = os_time_get();
- return empty_slot;
-}
-
-static struct pipe_query *
-vi_create_resuming_pipestats_query(struct pipe_context *ctx)
-{
- struct r600_query_hw *query = (struct r600_query_hw*)
- ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0);
-
- query->flags |= R600_QUERY_HW_FLAG_BEGIN_RESUMES;
- return (struct pipe_query*)query;
-}
-
-/**
- * Called when binding a color buffer.
- */
-void vi_separate_dcc_start_query(struct pipe_context *ctx,
- struct r600_texture *tex)
-{
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- unsigned i = vi_get_context_dcc_stats_index(rctx, tex);
-
- assert(!rctx->dcc_stats[i].query_active);
-
- if (!rctx->dcc_stats[i].ps_stats[0])
- rctx->dcc_stats[i].ps_stats[0] = vi_create_resuming_pipestats_query(ctx);
-
- /* begin or resume the query */
- ctx->begin_query(ctx, rctx->dcc_stats[i].ps_stats[0]);
- rctx->dcc_stats[i].query_active = true;
-}
-
-/**
- * Called when unbinding a color buffer.
- */
-void vi_separate_dcc_stop_query(struct pipe_context *ctx,
- struct r600_texture *tex)
-{
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- unsigned i = vi_get_context_dcc_stats_index(rctx, tex);
-
- assert(rctx->dcc_stats[i].query_active);
- assert(rctx->dcc_stats[i].ps_stats[0]);
-
- /* pause or end the query */
- ctx->end_query(ctx, rctx->dcc_stats[i].ps_stats[0]);
- rctx->dcc_stats[i].query_active = false;
-}
-
-static bool vi_should_enable_separate_dcc(struct r600_texture *tex)
-{
- /* The minimum number of fullscreen draws per frame that is required
- * to enable DCC. */
- return tex->ps_draw_ratio + tex->num_slow_clears >= 5;
-}
-
-/* Called by fast clear. */
-static void vi_separate_dcc_try_enable(struct r600_common_context *rctx,
- struct r600_texture *tex)
-{
- /* The intent is to use this with shared displayable back buffers,
- * but it's not strictly limited only to them.
- */
- if (!tex->resource.b.is_shared ||
- !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) ||
- tex->resource.b.b.target != PIPE_TEXTURE_2D ||
- tex->resource.b.b.last_level > 0 ||
- !tex->surface.dcc_size)
- return;
-
- if (tex->dcc_offset)
- return; /* already enabled */
-
- /* Enable the DCC stat gathering. */
- if (!tex->dcc_gather_statistics) {
- tex->dcc_gather_statistics = true;
- vi_separate_dcc_start_query(&rctx->b, tex);
- }
-
- if (!vi_should_enable_separate_dcc(tex))
- return; /* stats show that DCC decompression is too expensive */
-
- assert(tex->surface.num_dcc_levels);
- assert(!tex->dcc_separate_buffer);
-
- r600_texture_discard_cmask(rctx->screen, tex);
-
- /* Get a DCC buffer. */
- if (tex->last_dcc_separate_buffer) {
- assert(tex->dcc_gather_statistics);
- assert(!tex->dcc_separate_buffer);
- tex->dcc_separate_buffer = tex->last_dcc_separate_buffer;
- tex->last_dcc_separate_buffer = NULL;
- } else {
- tex->dcc_separate_buffer = (struct r600_resource*)
- si_aligned_buffer_create(rctx->b.screen,
- R600_RESOURCE_FLAG_UNMAPPABLE,
- PIPE_USAGE_DEFAULT,
- tex->surface.dcc_size,
- tex->surface.dcc_alignment);
- if (!tex->dcc_separate_buffer)
- return;
- }
-
- /* dcc_offset is the absolute GPUVM address. */
- tex->dcc_offset = tex->dcc_separate_buffer->gpu_address;
-
- /* no need to flag anything since this is called by fast clear that
- * flags framebuffer state
- */
-}
-
-/**
- * Called by pipe_context::flush_resource, the place where DCC decompression
- * takes place.
- */
-void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
- struct r600_texture *tex)
-{
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- struct pipe_query *tmp;
- unsigned i = vi_get_context_dcc_stats_index(rctx, tex);
- bool query_active = rctx->dcc_stats[i].query_active;
- bool disable = false;
-
- if (rctx->dcc_stats[i].ps_stats[2]) {
- union pipe_query_result result;
-
- /* Read the results. */
- ctx->get_query_result(ctx, rctx->dcc_stats[i].ps_stats[2],
- true, &result);
- si_query_hw_reset_buffers(rctx,
- (struct r600_query_hw*)
- rctx->dcc_stats[i].ps_stats[2]);
-
- /* Compute the approximate number of fullscreen draws. */
- tex->ps_draw_ratio =
- result.pipeline_statistics.ps_invocations /
- (tex->resource.b.b.width0 * tex->resource.b.b.height0);
- rctx->last_tex_ps_draw_ratio = tex->ps_draw_ratio;
-
- disable = tex->dcc_separate_buffer &&
- !vi_should_enable_separate_dcc(tex);
- }
-
- tex->num_slow_clears = 0;
-
- /* stop the statistics query for ps_stats[0] */
- if (query_active)
- vi_separate_dcc_stop_query(ctx, tex);
-
- /* Move the queries in the queue by one. */
- tmp = rctx->dcc_stats[i].ps_stats[2];
- rctx->dcc_stats[i].ps_stats[2] = rctx->dcc_stats[i].ps_stats[1];
- rctx->dcc_stats[i].ps_stats[1] = rctx->dcc_stats[i].ps_stats[0];
- rctx->dcc_stats[i].ps_stats[0] = tmp;
-
- /* create and start a new query as ps_stats[0] */
- if (query_active)
- vi_separate_dcc_start_query(ctx, tex);
-
- if (disable) {
- assert(!tex->last_dcc_separate_buffer);
- tex->last_dcc_separate_buffer = tex->dcc_separate_buffer;
- tex->dcc_separate_buffer = NULL;
- tex->dcc_offset = 0;
- /* no need to flag anything since this is called after
- * decompression that re-sets framebuffer state
- */
- }
-}
-
-/* FAST COLOR CLEAR */
-
-static void evergreen_set_clear_color(struct r600_texture *rtex,
- enum pipe_format surface_format,
- const union pipe_color_union *color)
-{
- union util_color uc;
-
- memset(&uc, 0, sizeof(uc));
-
- if (rtex->surface.bpe == 16) {
- /* DCC fast clear only:
- * CLEAR_WORD0 = R = G = B
- * CLEAR_WORD1 = A
- */
- assert(color->ui[0] == color->ui[1] &&
- color->ui[0] == color->ui[2]);
- uc.ui[0] = color->ui[0];
- uc.ui[1] = color->ui[3];
- } else if (util_format_is_pure_uint(surface_format)) {
- util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
- } else if (util_format_is_pure_sint(surface_format)) {
- util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
- } else {
- util_pack_color(color->f, surface_format, &uc);
- }
-
- memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
-}
-
-static bool vi_get_fast_clear_parameters(enum pipe_format surface_format,
- const union pipe_color_union *color,
- uint32_t* reset_value,
- bool* clear_words_needed)
-{
- bool values[4] = {};
- int i;
- bool main_value = false;
- bool extra_value = false;
- int extra_channel;
-
- /* This is needed to get the correct DCC clear value for luminance formats.
- * 1) Get the linear format (because the next step can't handle L8_SRGB).
- * 2) Convert luminance to red. (the real hw format for luminance)
- */
- surface_format = util_format_linear(surface_format);
- surface_format = util_format_luminance_to_red(surface_format);
-
- const struct util_format_description *desc = util_format_description(surface_format);
-
- if (desc->block.bits == 128 &&
- (color->ui[0] != color->ui[1] ||
- color->ui[0] != color->ui[2]))
- return false;
-
- *clear_words_needed = true;
- *reset_value = 0x20202020U;
-
- /* If we want to clear without needing a fast clear eliminate step, we
- * can set each channel to 0 or 1 (or 0/max for integer formats). We
- * have two sets of flags, one for the last or first channel(extra) and
- * one for the other channels(main).
- */
-
- if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT ||
- surface_format == PIPE_FORMAT_B5G6R5_UNORM ||
- surface_format == PIPE_FORMAT_B5G6R5_SRGB ||
- util_format_is_alpha(surface_format)) {
- extra_channel = -1;
- } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
- if(si_translate_colorswap(surface_format, false) <= 1)
- extra_channel = desc->nr_channels - 1;
- else
- extra_channel = 0;
- } else
- return true;
-
- for (i = 0; i < 4; ++i) {
- int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
-
- if (desc->swizzle[i] < PIPE_SWIZZLE_X ||
- desc->swizzle[i] > PIPE_SWIZZLE_W)
- continue;
-
- if (desc->channel[i].pure_integer &&
- desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
- /* Use the maximum value for clamping the clear color. */
- int max = u_bit_consecutive(0, desc->channel[i].size - 1);
-
- values[i] = color->i[i] != 0;
- if (color->i[i] != 0 && MIN2(color->i[i], max) != max)
- return true;
- } else if (desc->channel[i].pure_integer &&
- desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
- /* Use the maximum value for clamping the clear color. */
- unsigned max = u_bit_consecutive(0, desc->channel[i].size);
-
- values[i] = color->ui[i] != 0U;
- if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max)
- return true;
- } else {
- values[i] = color->f[i] != 0.0F;
- if (color->f[i] != 0.0F && color->f[i] != 1.0F)
- return true;
- }
-
- if (index == extra_channel)
- extra_value = values[i];
- else
- main_value = values[i];
- }
-
- for (int i = 0; i < 4; ++i)
- if (values[i] != main_value &&
- desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
- desc->swizzle[i] >= PIPE_SWIZZLE_X &&
- desc->swizzle[i] <= PIPE_SWIZZLE_W)
- return true;
-
- *clear_words_needed = false;
- if (main_value)
- *reset_value |= 0x80808080U;
-
- if (extra_value)
- *reset_value |= 0x40404040U;
- return true;
-}
-
-void vi_dcc_clear_level(struct r600_common_context *rctx,
- struct r600_texture *rtex,
- unsigned level, unsigned clear_value)
-{
- struct pipe_resource *dcc_buffer;
- uint64_t dcc_offset, clear_size;
-
- assert(vi_dcc_enabled(rtex, level));
-
- if (rtex->dcc_separate_buffer) {
- dcc_buffer = &rtex->dcc_separate_buffer->b.b;
- dcc_offset = 0;
- } else {
- dcc_buffer = &rtex->resource.b.b;
- dcc_offset = rtex->dcc_offset;
- }
-
- if (rctx->chip_class >= GFX9) {
- /* Mipmap level clears aren't implemented. */
- assert(rtex->resource.b.b.last_level == 0);
- /* MSAA needs a different clear size. */
- assert(rtex->resource.b.b.nr_samples <= 1);
- clear_size = rtex->surface.dcc_size;
- } else {
- unsigned num_layers = util_max_layer(&rtex->resource.b.b, level) + 1;
-
- dcc_offset += rtex->surface.u.legacy.level[level].dcc_offset;
- clear_size = rtex->surface.u.legacy.level[level].dcc_fast_clear_size *
- num_layers;
- }
-
- rctx->clear_buffer(&rctx->b, dcc_buffer, dcc_offset, clear_size,
- clear_value, R600_COHERENCY_CB_META);
-}
-
-/* Set the same micro tile mode as the destination of the last MSAA resolve.
- * This allows hitting the MSAA resolve fast path, which requires that both
- * src and dst micro tile modes match.
- */
-static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
- struct r600_texture *rtex)
-{
- if (rtex->resource.b.is_shared ||
- rtex->resource.b.b.nr_samples <= 1 ||
- rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode)
- return;
-
- assert(rscreen->chip_class >= GFX9 ||
- rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
- assert(rtex->resource.b.b.last_level == 0);
-
- if (rscreen->chip_class >= GFX9) {
- /* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */
- assert(rtex->surface.u.gfx9.surf.swizzle_mode >= 4);
-
- /* If you do swizzle_mode % 4, you'll get:
- * 0 = Depth
- * 1 = Standard,
- * 2 = Displayable
- * 3 = Rotated
- *
- * Depth-sample order isn't allowed:
- */
- assert(rtex->surface.u.gfx9.surf.swizzle_mode % 4 != 0);
-
- switch (rtex->last_msaa_resolve_target_micro_mode) {
- case RADEON_MICRO_MODE_DISPLAY:
- rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
- rtex->surface.u.gfx9.surf.swizzle_mode += 2; /* D */
- break;
- case RADEON_MICRO_MODE_THIN:
- rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
- rtex->surface.u.gfx9.surf.swizzle_mode += 1; /* S */
- break;
- case RADEON_MICRO_MODE_ROTATED:
- rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
- rtex->surface.u.gfx9.surf.swizzle_mode += 3; /* R */
- break;
- default: /* depth */
- assert(!"unexpected micro mode");
- return;
- }
- } else if (rscreen->chip_class >= CIK) {
- /* These magic numbers were copied from addrlib. It doesn't use
- * any definitions for them either. They are all 2D_TILED_THIN1
- * modes with different bpp and micro tile mode.
- */
- switch (rtex->last_msaa_resolve_target_micro_mode) {
- case RADEON_MICRO_MODE_DISPLAY:
- rtex->surface.u.legacy.tiling_index[0] = 10;
- break;
- case RADEON_MICRO_MODE_THIN:
- rtex->surface.u.legacy.tiling_index[0] = 14;
- break;
- case RADEON_MICRO_MODE_ROTATED:
- rtex->surface.u.legacy.tiling_index[0] = 28;
- break;
- default: /* depth, thick */
- assert(!"unexpected micro mode");
- return;
- }
- } else { /* SI */
- switch (rtex->last_msaa_resolve_target_micro_mode) {
- case RADEON_MICRO_MODE_DISPLAY:
- switch (rtex->surface.bpe) {
- case 1:
- rtex->surface.u.legacy.tiling_index[0] = 10;
- break;
- case 2:
- rtex->surface.u.legacy.tiling_index[0] = 11;
- break;
- default: /* 4, 8 */
- rtex->surface.u.legacy.tiling_index[0] = 12;
- break;
- }
- break;
- case RADEON_MICRO_MODE_THIN:
- switch (rtex->surface.bpe) {
- case 1:
- rtex->surface.u.legacy.tiling_index[0] = 14;
- break;
- case 2:
- rtex->surface.u.legacy.tiling_index[0] = 15;
- break;
- case 4:
- rtex->surface.u.legacy.tiling_index[0] = 16;
- break;
- default: /* 8, 16 */
- rtex->surface.u.legacy.tiling_index[0] = 17;
- break;
- }
- break;
- default: /* depth, thick */
- assert(!"unexpected micro mode");
- return;
- }
- }
-
- rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode;
-
- p_atomic_inc(&rscreen->dirty_tex_counter);
-}
-
-void si_do_fast_color_clear(struct r600_common_context *rctx,
- struct pipe_framebuffer_state *fb,
- struct r600_atom *fb_state,
- unsigned *buffers, ubyte *dirty_cbufs,
- const union pipe_color_union *color)
-{
- int i;
-
- /* This function is broken in BE, so just disable this path for now */
-#ifdef PIPE_ARCH_BIG_ENDIAN
- return;
-#endif
-
- if (rctx->render_cond)
- return;
-
- for (i = 0; i < fb->nr_cbufs; i++) {
- struct r600_texture *tex;
- unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
-
- if (!fb->cbufs[i])
- continue;
-
- /* if this colorbuffer is not being cleared */
- if (!(*buffers & clear_bit))
- continue;
-
- tex = (struct r600_texture *)fb->cbufs[i]->texture;
-
- /* the clear is allowed if all layers are bound */
- if (fb->cbufs[i]->u.tex.first_layer != 0 ||
- fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) {
- continue;
- }
-
- /* cannot clear mipmapped textures */
- if (fb->cbufs[i]->texture->last_level != 0) {
- continue;
- }
-
- /* only supported on tiled surfaces */
- if (tex->surface.is_linear) {
- continue;
- }
-
- /* shared textures can't use fast clear without an explicit flush,
- * because there is no way to communicate the clear color among
- * all clients
- */
- if (tex->resource.b.is_shared &&
- !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
- continue;
-
- /* fast color clear with 1D tiling doesn't work on old kernels and CIK */
- if (rctx->chip_class == CIK &&
- tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
- rctx->screen->info.drm_major == 2 &&
- rctx->screen->info.drm_minor < 38) {
- continue;
- }
-
- /* Fast clear is the most appropriate place to enable DCC for
- * displayable surfaces.
- */
- if (rctx->chip_class >= VI &&
- !(rctx->screen->debug_flags & DBG(NO_DCC_FB))) {
- vi_separate_dcc_try_enable(rctx, tex);
-
- /* RB+ isn't supported with a CMASK clear only on Stoney,
- * so all clears are considered to be hypothetically slow
- * clears, which is weighed when determining whether to
- * enable separate DCC.
- */
- if (tex->dcc_gather_statistics &&
- rctx->family == CHIP_STONEY)
- tex->num_slow_clears++;
- }
-
- /* Try to clear DCC first, otherwise try CMASK. */
- if (vi_dcc_enabled(tex, 0)) {
- uint32_t reset_value;
- bool clear_words_needed;
-
- if (rctx->screen->debug_flags & DBG(NO_DCC_CLEAR))
- continue;
-
- if (!vi_get_fast_clear_parameters(fb->cbufs[i]->format,
- color, &reset_value,
- &clear_words_needed))
- continue;
-
- vi_dcc_clear_level(rctx, tex, 0, reset_value);
-
- unsigned level_bit = 1 << fb->cbufs[i]->u.tex.level;
- if (clear_words_needed) {
- bool need_compressed_update = !tex->dirty_level_mask;
-
- tex->dirty_level_mask |= level_bit;
-
- if (need_compressed_update)
- p_atomic_inc(&rctx->screen->compressed_colortex_counter);
- }
- tex->separate_dcc_dirty = true;
- } else {
- /* 128-bit formats are unusupported */
- if (tex->surface.bpe > 8) {
- continue;
- }
-
- /* RB+ doesn't work with CMASK fast clear on Stoney. */
- if (rctx->family == CHIP_STONEY)
- continue;
-
- /* ensure CMASK is enabled */
- r600_texture_alloc_cmask_separate(rctx->screen, tex);
- if (tex->cmask.size == 0) {
- continue;
- }
-
- /* Do the fast clear. */
- rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
- tex->cmask.offset, tex->cmask.size, 0,
- R600_COHERENCY_CB_META);
-
- bool need_compressed_update = !tex->dirty_level_mask;
-
- tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
-
- if (need_compressed_update)
- p_atomic_inc(&rctx->screen->compressed_colortex_counter);
- }
-
- /* We can change the micro tile mode before a full clear. */
- si_set_optimal_micro_tile_mode(rctx->screen, tex);
-
- evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
-
- if (dirty_cbufs)
- *dirty_cbufs |= 1 << i;
- rctx->set_atom_dirty(rctx, fb_state, true);
- *buffers &= ~clear_bit;
- }
-}
-
-static struct pipe_memory_object *
-r600_memobj_from_handle(struct pipe_screen *screen,
- struct winsys_handle *whandle,
- bool dedicated)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
- struct r600_memory_object *memobj = CALLOC_STRUCT(r600_memory_object);
- struct pb_buffer *buf = NULL;
- uint32_t stride, offset;
-
- if (!memobj)
- return NULL;
-
- buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle,
- &stride, &offset);
- if (!buf) {
- free(memobj);
- return NULL;
- }
-
- memobj->b.dedicated = dedicated;
- memobj->buf = buf;
- memobj->stride = stride;
- memobj->offset = offset;
-
- return (struct pipe_memory_object *)memobj;
-
-}
-
-static void
-r600_memobj_destroy(struct pipe_screen *screen,
- struct pipe_memory_object *_memobj)
-{
- struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj;
-
- pb_reference(&memobj->buf, NULL);
- free(memobj);
-}
-
-static struct pipe_resource *
-r600_texture_from_memobj(struct pipe_screen *screen,
- const struct pipe_resource *templ,
- struct pipe_memory_object *_memobj,
- uint64_t offset)
-{
- int r;
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
- struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj;
- struct r600_texture *rtex;
- struct radeon_surf surface = {};
- struct radeon_bo_metadata metadata = {};
- enum radeon_surf_mode array_mode;
- bool is_scanout;
- struct pb_buffer *buf = NULL;
-
- if (memobj->b.dedicated) {
- rscreen->ws->buffer_get_metadata(memobj->buf, &metadata);
- r600_surface_import_metadata(rscreen, &surface, &metadata,
- &array_mode, &is_scanout);
- } else {
- /**
- * The bo metadata is unset for un-dedicated images. So we fall
- * back to linear. See answer to question 5 of the
- * VK_KHX_external_memory spec for some details.
- *
- * It is possible that this case isn't going to work if the
- * surface pitch isn't correctly aligned by default.
- *
- * In order to support it correctly we require multi-image
- * metadata to be syncrhonized between radv and radeonsi. The
- * semantics of associating multiple image metadata to a memory
- * object on the vulkan export side are not concretely defined
- * either.
- *
- * All the use cases we are aware of at the moment for memory
- * objects use dedicated allocations. So lets keep the initial
- * implementation simple.
- *
- * A possible alternative is to attempt to reconstruct the
- * tiling information when the TexParameter TEXTURE_TILING_EXT
- * is set.
- */
- array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
- is_scanout = false;
-
- }
-
- r = r600_init_surface(rscreen, &surface, templ,
- array_mode, memobj->stride,
- offset, true, is_scanout,
- false, false);
- if (r)
- return NULL;
-
- rtex = r600_texture_create_object(screen, templ, memobj->buf, &surface);
- if (!rtex)
- return NULL;
-
- /* r600_texture_create_object doesn't increment refcount of
- * memobj->buf, so increment it here.
- */
- pb_reference(&buf, memobj->buf);
-
- rtex->resource.b.is_shared = true;
- rtex->resource.external_usage = PIPE_HANDLE_USAGE_READ_WRITE;
-
- if (rscreen->apply_opaque_metadata)
- rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
-
- return &rtex->resource.b.b;
-}
-
-static bool si_check_resource_capability(struct pipe_screen *screen,
- struct pipe_resource *resource,
- unsigned bind)
-{
- struct r600_texture *tex = (struct r600_texture*)resource;
-
- /* Buffers only support the linear flag. */
- if (resource->target == PIPE_BUFFER)
- return (bind & ~PIPE_BIND_LINEAR) == 0;
-
- if (bind & PIPE_BIND_LINEAR && !tex->surface.is_linear)
- return false;
-
- if (bind & PIPE_BIND_SCANOUT && !tex->surface.is_displayable)
- return false;
-
- /* TODO: PIPE_BIND_CURSOR - do we care? */
- return true;
-}
-
-void si_init_screen_texture_functions(struct r600_common_screen *rscreen)
-{
- rscreen->b.resource_from_handle = r600_texture_from_handle;
- rscreen->b.resource_get_handle = r600_texture_get_handle;
- rscreen->b.resource_from_memobj = r600_texture_from_memobj;
- rscreen->b.memobj_create_from_handle = r600_memobj_from_handle;
- rscreen->b.memobj_destroy = r600_memobj_destroy;
- rscreen->b.check_resource_capability = si_check_resource_capability;
-}
-
-void si_init_context_texture_functions(struct r600_common_context *rctx)
-{
- rctx->b.create_surface = r600_create_surface;
- rctx->b.surface_destroy = r600_surface_destroy;
- rctx->b.clear_texture = r600_clear_texture;
-}
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c
index e91cb2155..0f3b43de8 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c
@@ -25,12 +25,6 @@
*
**************************************************************************/
-/*
- * Authors:
- * Christian König <christian.koenig@amd.com>
- *
- */
-
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
@@ -45,7 +39,7 @@
#include "vl/vl_defines.h"
#include "vl/vl_mpeg12_decoder.h"
-#include "r600_pipe_common.h"
+#include "radeonsi/si_pipe.h"
#include "radeon_video.h"
#include "radeon_uvd.h"
@@ -73,7 +67,7 @@ struct ruvd_decoder {
struct pipe_screen *screen;
struct radeon_winsys* ws;
- struct radeon_winsys_cs* cs;
+ struct radeon_cmdbuf* cs;
unsigned cur_buffer;
@@ -122,8 +116,7 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
int reloc_idx;
reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
- domain,
- RADEON_PRIO_UVD);
+ domain, 0);
if (!dec->use_legacy) {
uint64_t addr;
addr = dec->ws->buffer_get_virtual_address(buf);
@@ -337,7 +330,7 @@ static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_
static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec)
{
- if (((struct r600_common_screen*)dec->screen)->family < CHIP_VEGA10)
+ if (((struct si_screen*)dec->screen)->info.family < CHIP_VEGA10)
return 16;
else
return 32;
@@ -402,7 +395,7 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec)
max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
dpb_size = image_size * max_references;
if ((dec->stream_type != RUVD_CODEC_H264_PERF) ||
- (((struct r600_common_screen*)dec->screen)->family < CHIP_POLARIS10)) {
+ (((struct si_screen*)dec->screen)->info.family < CHIP_POLARIS10)) {
dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment);
dpb_size += align(width_in_mb * height_in_mb * 32, alignment);
}
@@ -412,7 +405,7 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec)
// reference picture buffer
dpb_size = image_size * max_references;
if ((dec->stream_type != RUVD_CODEC_H264_PERF) ||
- (((struct r600_common_screen*)dec->screen)->family < CHIP_POLARIS10)) {
+ (((struct si_screen*)dec->screen)->info.family < CHIP_POLARIS10)) {
// macroblock context buffer
dpb_size += width_in_mb * height_in_mb * max_references * 192;
// IT surface buffer
@@ -612,7 +605,7 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video
result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
- if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO)
+ if (((struct si_screen*)dec->screen)->info.family == CHIP_CARRIZO)
result.sps_info_flags |= 1 << 9;
if (pic->UseRefPicList == true)
result.sps_info_flags |= 1 << 10;
@@ -971,139 +964,6 @@ static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec,
return result;
}
-static void get_mjpeg_slice_header(struct ruvd_decoder *dec, struct pipe_mjpeg_picture_desc *pic)
-{
- int size = 0, saved_size, len_pos, i;
- uint16_t *bs;
- uint8_t *buf = dec->bs_ptr;
-
- /* SOI */
- buf[size++] = 0xff;
- buf[size++] = 0xd8;
-
- /* DQT */
- buf[size++] = 0xff;
- buf[size++] = 0xdb;
-
- len_pos = size++;
- size++;
-
- for (i = 0; i < 4; ++i) {
- if (pic->quantization_table.load_quantiser_table[i] == 0)
- continue;
-
- buf[size++] = i;
- memcpy((buf + size), &pic->quantization_table.quantiser_table[i], 64);
- size += 64;
- }
-
- bs = (uint16_t*)&buf[len_pos];
- *bs = util_bswap16(size - 4);
-
- saved_size = size;
-
- /* DHT */
- buf[size++] = 0xff;
- buf[size++] = 0xc4;
-
- len_pos = size++;
- size++;
-
- for (i = 0; i < 2; ++i) {
- if (pic->huffman_table.load_huffman_table[i] == 0)
- continue;
-
- buf[size++] = 0x00 | i;
- memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16);
- size += 16;
- memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12);
- size += 12;
- }
-
- for (i = 0; i < 2; ++i) {
- if (pic->huffman_table.load_huffman_table[i] == 0)
- continue;
-
- buf[size++] = 0x10 | i;
- memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16);
- size += 16;
- memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162);
- size += 162;
- }
-
- bs = (uint16_t*)&buf[len_pos];
- *bs = util_bswap16(size - saved_size - 2);
-
- saved_size = size;
-
- /* DRI */
- if (pic->slice_parameter.restart_interval) {
- buf[size++] = 0xff;
- buf[size++] = 0xdd;
- buf[size++] = 0x00;
- buf[size++] = 0x04;
- bs = (uint16_t*)&buf[size++];
- *bs = util_bswap16(pic->slice_parameter.restart_interval);
- saved_size = ++size;
- }
-
- /* SOF */
- buf[size++] = 0xff;
- buf[size++] = 0xc0;
-
- len_pos = size++;
- size++;
-
- buf[size++] = 0x08;
-
- bs = (uint16_t*)&buf[size++];
- *bs = util_bswap16(pic->picture_parameter.picture_height);
- size++;
-
- bs = (uint16_t*)&buf[size++];
- *bs = util_bswap16(pic->picture_parameter.picture_width);
- size++;
-
- buf[size++] = pic->picture_parameter.num_components;
-
- for (i = 0; i < pic->picture_parameter.num_components; ++i) {
- buf[size++] = pic->picture_parameter.components[i].component_id;
- buf[size++] = pic->picture_parameter.components[i].h_sampling_factor << 4 |
- pic->picture_parameter.components[i].v_sampling_factor;
- buf[size++] = pic->picture_parameter.components[i].quantiser_table_selector;
- }
-
- bs = (uint16_t*)&buf[len_pos];
- *bs = util_bswap16(size - saved_size - 2);
-
- saved_size = size;
-
- /* SOS */
- buf[size++] = 0xff;
- buf[size++] = 0xda;
-
- len_pos = size++;
- size++;
-
- buf[size++] = pic->slice_parameter.num_components;
-
- for (i = 0; i < pic->slice_parameter.num_components; ++i) {
- buf[size++] = pic->slice_parameter.components[i].component_selector;
- buf[size++] = pic->slice_parameter.components[i].dc_table_selector << 4 |
- pic->slice_parameter.components[i].ac_table_selector;
- }
-
- buf[size++] = 0x00;
- buf[size++] = 0x3f;
- buf[size++] = 0x00;
-
- bs = (uint16_t*)&buf[len_pos];
- *bs = util_bswap16(size - saved_size - 2);
-
- dec->bs_ptr += size;
- dec->bs_size += size;
-}
-
/**
* destroy this video decoder
*/
@@ -1182,7 +1042,6 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
const unsigned *sizes)
{
struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
- enum pipe_video_format format = u_reduce_video_profile(picture->profile);
unsigned i;
assert(decoder);
@@ -1190,16 +1049,10 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
if (!dec->bs_ptr)
return;
- if (format == PIPE_VIDEO_FORMAT_JPEG)
- get_mjpeg_slice_header(dec, (struct pipe_mjpeg_picture_desc*)picture);
-
for (i = 0; i < num_buffers; ++i) {
struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
unsigned new_size = dec->bs_size + sizes[i];
- if (format == PIPE_VIDEO_FORMAT_JPEG)
- new_size += 2; /* save for EOI */
-
if (new_size > buf->res->buf->size) {
dec->ws->buffer_unmap(buf->res->buf);
if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
@@ -1219,13 +1072,6 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
dec->bs_size += sizes[i];
dec->bs_ptr += sizes[i];
}
-
- if (format == PIPE_VIDEO_FORMAT_JPEG) {
- ((uint8_t *)dec->bs_ptr)[0] = 0xff; /* EOI */
- ((uint8_t *)dec->bs_ptr)[1] = 0xd9;
- dec->bs_size += 2;
- dec->bs_ptr += 2;
- }
}
/**
@@ -1275,11 +1121,11 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec));
if (dec->stream_type == RUVD_CODEC_H264_PERF &&
- ((struct r600_common_screen*)dec->screen)->family >= CHIP_POLARIS10)
+ ((struct si_screen*)dec->screen)->info.family >= CHIP_POLARIS10)
dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size;
dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
- if (((struct r600_common_screen*)dec->screen)->family >= CHIP_STONEY)
+ if (((struct si_screen*)dec->screen)->info.family >= CHIP_STONEY)
dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2;
switch (u_reduce_video_profile(picture->profile)) {
@@ -1351,7 +1197,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
set_reg(dec, dec->reg.cntl, 1);
- flush(dec, RADEON_FLUSH_ASYNC);
+ flush(dec, PIPE_FLUSH_ASYNC);
next_buffer(dec);
}
@@ -1369,17 +1215,14 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte
const struct pipe_video_codec *templ,
ruvd_set_dtb set_dtb)
{
- struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
- struct r600_common_context *rctx = (struct r600_common_context*)context;
+ struct si_context *sctx = (struct si_context*)context;
+ struct radeon_winsys *ws = sctx->ws;
unsigned dpb_size;
unsigned width = templ->width, height = templ->height;
unsigned bs_buf_size;
- struct radeon_info info;
struct ruvd_decoder *dec;
int r, i;
- ws->query_info(ws, &info);
-
switch(u_reduce_video_profile(templ->profile)) {
case PIPE_VIDEO_FORMAT_MPEG12:
if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
@@ -1405,7 +1248,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte
if (!dec)
return NULL;
- if (info.drm_major < 3)
+ if (sctx->screen->info.drm_major < 3)
dec->use_legacy = true;
dec->base = *templ;
@@ -1420,12 +1263,12 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte
dec->base.end_frame = ruvd_end_frame;
dec->base.flush = ruvd_flush;
- dec->stream_type = profile2stream_type(dec, info.family);
+ dec->stream_type = profile2stream_type(dec, sctx->family);
dec->set_dtb = set_dtb;
dec->stream_handle = si_vid_alloc_stream_handle();
dec->screen = context->screen;
dec->ws = ws;
- dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL);
+ dec->cs = ws->cs_create(sctx->ctx, RING_UVD, NULL, NULL);
if (!dec->cs) {
RVID_ERR("Can't get command submission context.\n");
goto error;
@@ -1433,7 +1276,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte
for (i = 0; i < 16; i++)
dec->render_pic_list[i] = NULL;
- dec->fb_size = (info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA :
+ dec->fb_size = (sctx->family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA :
FB_BUFFER_SIZE;
bs_buf_size = width * height * (512 / (16 * 16));
for (i = 0; i < NUM_BUFFERS; ++i) {
@@ -1466,7 +1309,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte
si_vid_clear_buffer(context, &dec->dpb);
}
- if (dec->stream_type == RUVD_CODEC_H264_PERF && info.family >= CHIP_POLARIS10) {
+ if (dec->stream_type == RUVD_CODEC_H264_PERF && sctx->family >= CHIP_POLARIS10) {
unsigned ctx_size = calc_ctx_size_h264_perf(dec);
if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated context buffer.\n");
@@ -1475,7 +1318,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte
si_vid_clear_buffer(context, &dec->ctx);
}
- if (info.family >= CHIP_POLARIS10 && info.drm_minor >= 3) {
+ if (sctx->family >= CHIP_POLARIS10 && sctx->screen->info.drm_minor >= 3) {
if (!si_vid_create_buffer(dec->screen, &dec->sessionctx,
UVD_SESSION_CONTEXT_SIZE,
PIPE_USAGE_DEFAULT)) {
@@ -1485,7 +1328,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte
si_vid_clear_buffer(context, &dec->sessionctx);
}
- if (info.family >= CHIP_VEGA10) {
+ if (sctx->family >= CHIP_VEGA10) {
dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15;
dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15;
dec->reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15;
@@ -1539,7 +1382,7 @@ static unsigned texture_offset(struct radeon_surf *surface, unsigned layer,
default:
case RUVD_SURFACE_TYPE_LEGACY:
return surface->u.legacy.level[0].offset +
- layer * surface->u.legacy.level[0].slice_size;
+ layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4;
break;
case RUVD_SURFACE_TYPE_GFX9:
return surface->u.gfx9.surf_offset +
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h
index 2bb2ce21d..583b4d5e4 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h
@@ -25,12 +25,6 @@
*
**************************************************************************/
-/*
- * Authors:
- * Christian König <christian.koenig@amd.com>
- *
- */
-
#ifndef RADEON_UVD_H
#define RADEON_UVD_H
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c
index 9e98741fb..8972253c7 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c
@@ -25,12 +25,6 @@
*
**************************************************************************/
-/*
- * Authors:
- * Christian König <christian.koenig@amd.com>
- *
- */
-
#include <stdio.h>
#include "pipe/p_video_codec.h"
@@ -40,7 +34,7 @@
#include "vl/vl_video_buffer.h"
-#include "r600_pipe_common.h"
+#include "radeonsi/si_pipe.h"
#include "radeon_video.h"
#include "radeon_vce.h"
@@ -59,7 +53,7 @@
*/
static void flush(struct rvce_encoder *enc)
{
- enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL);
+ enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL);
enc->task_info_idx = 0;
enc->bs_idx = 0;
}
@@ -225,10 +219,10 @@ struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc)
void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
signed *luma_offset, signed *chroma_offset)
{
- struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen;
+ struct si_screen *sscreen = (struct si_screen *)enc->screen;
unsigned pitch, vpitch, fsize;
- if (rscreen->chip_class < GFX9) {
+ if (sscreen->info.chip_class < GFX9) {
pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128);
vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);
} else {
@@ -394,18 +388,18 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
struct radeon_winsys* ws,
rvce_get_buffer get_buffer)
{
- struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
- struct r600_common_context *rctx = (struct r600_common_context*)context;
+ struct si_screen *sscreen = (struct si_screen *)context->screen;
+ struct si_context *sctx = (struct si_context*)context;
struct rvce_encoder *enc;
struct pipe_video_buffer *tmp_buf, templat = {};
struct radeon_surf *tmp_surf;
unsigned cpb_size;
- if (!rscreen->info.vce_fw_version) {
+ if (!sscreen->info.vce_fw_version) {
RVID_ERR("Kernel doesn't supports VCE!\n");
return NULL;
- } else if (!si_vce_is_fw_version_supported(rscreen)) {
+ } else if (!si_vce_is_fw_version_supported(sscreen)) {
RVID_ERR("Unsupported VCE fw version loaded!\n");
return NULL;
}
@@ -414,20 +408,21 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
if (!enc)
return NULL;
- if (rscreen->info.drm_major == 3)
+ if (sscreen->info.drm_major == 3)
enc->use_vm = true;
- if ((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) ||
- rscreen->info.drm_major == 3)
+ if ((sscreen->info.drm_major == 2 && sscreen->info.drm_minor >= 42) ||
+ sscreen->info.drm_major == 3)
enc->use_vui = true;
- if (rscreen->info.family >= CHIP_TONGA &&
- rscreen->info.family != CHIP_STONEY &&
- rscreen->info.family != CHIP_POLARIS11 &&
- rscreen->info.family != CHIP_POLARIS12)
+ if (sscreen->info.family >= CHIP_TONGA &&
+ sscreen->info.family != CHIP_STONEY &&
+ sscreen->info.family != CHIP_POLARIS11 &&
+ sscreen->info.family != CHIP_POLARIS12 &&
+ sscreen->info.family != CHIP_VEGAM)
enc->dual_pipe = true;
/* TODO enable B frame with dual instance */
- if ((rscreen->info.family >= CHIP_TONGA) &&
+ if ((sscreen->info.family >= CHIP_TONGA) &&
(templ->max_references == 1) &&
- (rscreen->info.vce_harvest_config == 0))
+ (sscreen->info.vce_harvest_config == 0))
enc->dual_inst = true;
enc->base = *templ;
@@ -443,7 +438,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
enc->screen = context->screen;
enc->ws = ws;
- enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc);
+ enc->cs = ws->cs_create(sctx->ctx, RING_VCE, rvce_cs_flush, enc);
if (!enc->cs) {
RVID_ERR("Can't get command submission context.\n");
goto error;
@@ -465,7 +460,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf);
- cpb_size = (rscreen->chip_class < GFX9) ?
+ cpb_size = (sscreen->info.chip_class < GFX9) ?
align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *
align(tmp_surf->u.legacy.level[0].nblk_y, 32) :
@@ -489,7 +484,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
reset_cpb(enc);
- switch (rscreen->info.vce_fw_version) {
+ switch (sscreen->info.vce_fw_version) {
case FW_40_2_2:
si_vce_40_2_2_init(enc);
si_get_pic_param = si_vce_40_2_2_get_param;
@@ -511,7 +506,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
break;
default:
- if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53) {
+ if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) {
si_vce_52_init(enc);
si_get_pic_param = si_vce_52_get_param;
} else
@@ -534,9 +529,9 @@ error:
/**
* check if kernel has the right fw version loaded
*/
-bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen)
+bool si_vce_is_fw_version_supported(struct si_screen *sscreen)
{
- switch (rscreen->info.vce_fw_version) {
+ switch (sscreen->info.vce_fw_version) {
case FW_40_2_2:
case FW_50_0_1:
case FW_50_1_2:
@@ -547,7 +542,7 @@ bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen)
case FW_52_8_3:
return true;
default:
- if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53)
+ if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53)
return true;
else
return false;
@@ -564,7 +559,7 @@ void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
int reloc_idx;
reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
- domain, RADEON_PRIO_VCE);
+ domain, 0);
if (enc->use_vm) {
uint64_t addr;
addr = enc->ws->buffer_get_virtual_address(buf);
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
index 3be38bca1..e17468c90 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
@@ -25,12 +25,6 @@
*
**************************************************************************/
-/*
- * Authors:
- * Christian König <christian.koenig@amd.com>
- *
- */
-
#include <stdio.h>
#include "pipe/p_video_codec.h"
@@ -40,12 +34,10 @@
#include "vl/vl_video_buffer.h"
-#include "r600_pipe_common.h"
+#include "si_pipe.h"
#include "radeon_video.h"
#include "radeon_vce.h"
-static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
-
static void session(struct rvce_encoder *enc)
{
RVCE_BEGIN(0x00000001); // session cmd
@@ -88,8 +80,7 @@ static void create(struct rvce_encoder *enc)
RVCE_BEGIN(0x01000001); // create cmd
RVCE_CS(0x00000000); // encUseCircularBuffer
- RVCE_CS(profiles[enc->base.profile -
- PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE]); // encProfile
+ RVCE_CS(u_get_h264_profile_idc(enc->base.profile)); // encProfile
RVCE_CS(enc->base.level); // encLevel
RVCE_CS(0x00000000); // encPicStructRestriction
RVCE_CS(enc->base.width); // encImageWidth
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c
index 96bb557eb..f4cbc9bb8 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c
@@ -25,12 +25,6 @@
*
**************************************************************************/
-/*
- * Authors:
- * Christian König <christian.koenig@amd.com>
- *
- */
-
#include <stdio.h>
#include "pipe/p_video_codec.h"
@@ -40,7 +34,7 @@
#include "vl/vl_video_buffer.h"
-#include "r600_pipe_common.h"
+#include "si_pipe.h"
#include "radeon_video.h"
#include "radeon_vce.h"
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c
index 09fe424fd..fc7ddc62a 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c
@@ -34,12 +34,10 @@
#include "vl/vl_video_buffer.h"
-#include "r600_pipe_common.h"
+#include "radeonsi/si_pipe.h"
#include "radeon_video.h"
#include "radeon_vce.h"
-static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
-
static void get_rate_control_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
{
enc->enc_pic.rc.rc_method = pic->rate_ctrl.rate_ctrl_method;
@@ -162,24 +160,23 @@ void si_vce_52_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_
enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants = 0x00000201;
else
enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants = 0x01000201;
- enc->enc_pic.is_idr = pic->is_idr;
+ enc->enc_pic.is_idr = (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR);
}
static void create(struct rvce_encoder *enc)
{
- struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen;
+ struct si_screen *sscreen = (struct si_screen *)enc->screen;
enc->task_info(enc, 0x00000000, 0, 0, 0);
RVCE_BEGIN(0x01000001); // create cmd
RVCE_CS(enc->enc_pic.ec.enc_use_circular_buffer);
- RVCE_CS(profiles[enc->base.profile -
- PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE]); // encProfile
+ RVCE_CS(u_get_h264_profile_idc(enc->base.profile)); // encProfile
RVCE_CS(enc->base.level); // encLevel
RVCE_CS(enc->enc_pic.ec.enc_pic_struct_restriction);
RVCE_CS(enc->base.width); // encImageWidth
RVCE_CS(enc->base.height); // encImageHeight
- if (rscreen->chip_class < GFX9) {
+ if (sscreen->info.chip_class < GFX9) {
RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encRefPicLumaPitch
RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encRefPicChromaPitch
RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // encRefYHeightInQw
@@ -200,7 +197,7 @@ static void create(struct rvce_encoder *enc)
static void encode(struct rvce_encoder *enc)
{
- struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen;
+ struct si_screen *sscreen = (struct si_screen *)enc->screen;
signed luma_offset, chroma_offset, bs_offset;
unsigned dep, bs_idx = enc->bs_idx++;
int i;
@@ -250,7 +247,7 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(enc->enc_pic.eo.end_of_sequence);
RVCE_CS(enc->enc_pic.eo.end_of_stream);
- if (rscreen->chip_class < GFX9) {
+ if (sscreen->info.chip_class < GFX9) {
RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo
RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.c
index 59724869b..75ef4a5d4 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.c
@@ -35,13 +35,15 @@
#include "vl/vl_mpeg12_decoder.h"
-#include "r600_pipe_common.h"
+#include "radeonsi/si_pipe.h"
#include "radeon_video.h"
#include "radeon_vcn_dec.h"
+#include "vl/vl_probs_table.h"
#define FB_BUFFER_OFFSET 0x1000
#define FB_BUFFER_SIZE 2048
#define IT_SCALING_TABLE_SIZE 992
+#define VP9_PROBS_TABLE_SIZE (RDECODE_VP9_PROBS_DATA_SIZE + 256)
#define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024)
#define RDECODE_GPCOM_VCPU_CMD 0x2070c
@@ -49,37 +51,10 @@
#define RDECODE_GPCOM_VCPU_DATA1 0x20714
#define RDECODE_ENGINE_CNTL 0x20718
-#define NUM_BUFFERS 4
#define NUM_MPEG2_REFS 6
#define NUM_H264_REFS 17
#define NUM_VC1_REFS 5
-
-struct radeon_decoder {
- struct pipe_video_codec base;
-
- unsigned stream_handle;
- unsigned stream_type;
- unsigned frame_number;
-
- struct pipe_screen *screen;
- struct radeon_winsys *ws;
- struct radeon_winsys_cs *cs;
-
- void *msg;
- uint32_t *fb;
- uint8_t *it;
- void *bs_ptr;
-
- struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS];
- struct rvid_buffer bs_buffers[NUM_BUFFERS];
- struct rvid_buffer dpb;
- struct rvid_buffer ctx;
- struct rvid_buffer sessionctx;
-
- unsigned bs_size;
- unsigned cur_buffer;
- void *render_pic_list[16];
-};
+#define NUM_VP9_REFS 8
static rvcn_dec_message_avc_t get_h264_msg(struct radeon_decoder *dec,
struct pipe_h264_picture_desc *pic)
@@ -200,7 +175,7 @@ static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec,
result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
- if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO)
+ if (((struct si_screen*)dec->screen)->info.family == CHIP_CARRIZO)
result.sps_info_flags |= 1 << 9;
if (pic->UseRefPicList == true)
result.sps_info_flags |= 1 << 10;
@@ -358,6 +333,205 @@ static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec,
return result;
}
+static void fill_probs_table(void *ptr)
+{
+ rvcn_dec_vp9_probs_t *probs = (rvcn_dec_vp9_probs_t *)ptr;
+
+ memcpy(&probs->coef_probs[0], default_coef_probs_4x4, sizeof(default_coef_probs_4x4));
+ memcpy(&probs->coef_probs[1], default_coef_probs_8x8, sizeof(default_coef_probs_8x8));
+ memcpy(&probs->coef_probs[2], default_coef_probs_16x16, sizeof(default_coef_probs_16x16));
+ memcpy(&probs->coef_probs[3], default_coef_probs_32x32, sizeof(default_coef_probs_32x32));
+ memcpy(probs->y_mode_prob, default_if_y_probs, sizeof(default_if_y_probs));
+ memcpy(probs->uv_mode_prob, default_if_uv_probs, sizeof(default_if_uv_probs));
+ memcpy(probs->single_ref_prob, default_single_ref_p, sizeof(default_single_ref_p));
+ memcpy(probs->switchable_interp_prob, default_switchable_interp_prob, sizeof(default_switchable_interp_prob));
+ memcpy(probs->partition_prob, default_partition_probs, sizeof(default_partition_probs));
+ memcpy(probs->inter_mode_probs, default_inter_mode_probs, sizeof(default_inter_mode_probs));
+ memcpy(probs->mbskip_probs, default_skip_probs, sizeof(default_skip_probs));
+ memcpy(probs->intra_inter_prob, default_intra_inter_p, sizeof(default_intra_inter_p));
+ memcpy(probs->comp_inter_prob, default_comp_inter_p, sizeof(default_comp_inter_p));
+ memcpy(probs->comp_ref_prob, default_comp_ref_p, sizeof(default_comp_ref_p));
+ memcpy(probs->tx_probs_32x32, default_tx_probs_32x32, sizeof(default_tx_probs_32x32));
+ memcpy(probs->tx_probs_16x16, default_tx_probs_16x16, sizeof(default_tx_probs_16x16));
+ memcpy(probs->tx_probs_8x8, default_tx_probs_8x8, sizeof(default_tx_probs_8x8));
+ memcpy(probs->mv_joints, default_nmv_joints, sizeof(default_nmv_joints));
+ memcpy(&probs->mv_comps[0], default_nmv_components, sizeof(default_nmv_components));
+ memset(&probs->nmvc_mask, 0, sizeof(rvcn_dec_vp9_nmv_ctx_mask_t));
+}
+
+static rvcn_dec_message_vp9_t get_vp9_msg(struct radeon_decoder *dec,
+ struct pipe_video_buffer *target,
+ struct pipe_vp9_picture_desc *pic)
+{
+ rvcn_dec_message_vp9_t result;
+ unsigned i;
+
+ memset(&result, 0, sizeof(result));
+
+ /* segment table */
+ rvcn_dec_vp9_probs_segment_t *prbs = (rvcn_dec_vp9_probs_segment_t *)(dec->probs);
+
+ if (pic->picture_parameter.pic_fields.segmentation_enabled) {
+ for (i = 0; i < 8; ++i) {
+ prbs->seg.feature_data[i] =
+ (pic->slice_parameter.seg_param[i].alt_quant & 0xffff) |
+ ((pic->slice_parameter.seg_param[i].alt_lf & 0xff) << 16) |
+ ((pic->slice_parameter.seg_param[i].segment_flags.segment_reference & 0xf) << 24);
+ prbs->seg.feature_mask[i] =
+ (pic->slice_parameter.seg_param[i].alt_quant_enabled << 0) |
+ (pic->slice_parameter.seg_param[i].alt_lf_enabled << 1) |
+ (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_enabled << 2) |
+ (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_skipped << 3);
+ }
+
+ for (i = 0; i < 7; ++i)
+ prbs->seg.tree_probs[i] = pic->picture_parameter.mb_segment_tree_probs[i];
+
+ for (i = 0; i < 3; ++i)
+ prbs->seg.pred_probs[i] = pic->picture_parameter.segment_pred_probs[i];
+
+ prbs->seg.abs_delta = 0;
+ } else
+ memset(&prbs->seg, 0, 256);
+
+ result.frame_header_flags =
+ (pic->picture_parameter.pic_fields.frame_type <<
+ RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT) &
+ RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK;
+
+ result.frame_header_flags |=
+ (pic->picture_parameter.pic_fields.error_resilient_mode <<
+ RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT) &
+ RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK;
+
+ result.frame_header_flags |=
+ (pic->picture_parameter.pic_fields.intra_only <<
+ RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT) &
+ RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK;
+
+ result.frame_header_flags |=
+ (pic->picture_parameter.pic_fields.allow_high_precision_mv <<
+ RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT) &
+ RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK;
+
+ result.frame_header_flags |=
+ (pic->picture_parameter.pic_fields.frame_parallel_decoding_mode <<
+ RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT) &
+ RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK;
+
+ result.frame_header_flags |=
+ (pic->picture_parameter.pic_fields.refresh_frame_context <<
+ RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT) &
+ RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK;
+
+ result.frame_header_flags |=
+ (pic->picture_parameter.pic_fields.segmentation_enabled <<
+ RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT) &
+ RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK;
+
+ result.frame_header_flags |=
+ (pic->picture_parameter.pic_fields.segmentation_update_map <<
+ RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT) &
+ RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK;
+
+ result.frame_header_flags |=
+ (pic->picture_parameter.pic_fields.segmentation_temporal_update <<
+ RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) &
+ RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK;
+
+ result.frame_header_flags |=
+ (pic->picture_parameter.mode_ref_delta_enabled <<
+ RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT) &
+ RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK;
+
+ result.frame_header_flags |=
+ (pic->picture_parameter.mode_ref_delta_update <<
+ RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT) &
+ RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK;
+
+ result.frame_header_flags |= ((dec->show_frame &&
+ !pic->picture_parameter.pic_fields.error_resilient_mode)
+ << RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT) &
+ RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK;
+ dec->show_frame = pic->picture_parameter.pic_fields.show_frame;
+
+ result.interp_filter = pic->picture_parameter.pic_fields.mcomp_filter_type;
+
+ result.frame_context_idx = pic->picture_parameter.pic_fields.frame_context_idx;
+ result.reset_frame_context = pic->picture_parameter.pic_fields.reset_frame_context;
+
+ result.filter_level = pic->picture_parameter.filter_level;
+ result.sharpness_level = pic->picture_parameter.sharpness_level;
+
+ for (i = 0; i < 8; ++i)
+ memcpy(result.lf_adj_level[i], pic->slice_parameter.seg_param[i].filter_level, 4 * 2);
+
+ if (pic->picture_parameter.pic_fields.lossless_flag) {
+ result.base_qindex = 0;
+ result.y_dc_delta_q = 0;
+ result.uv_ac_delta_q = 0;
+ result.uv_dc_delta_q = 0;
+ } else {
+ result.base_qindex = pic->picture_parameter.base_qindex;
+ result.y_dc_delta_q = pic->picture_parameter.y_dc_delta_q;
+ result.uv_ac_delta_q = pic->picture_parameter.uv_ac_delta_q;
+ result.uv_dc_delta_q = pic->picture_parameter.uv_dc_delta_q;
+ }
+
+ result.log2_tile_cols = pic->picture_parameter.log2_tile_columns;
+ result.log2_tile_rows = pic->picture_parameter.log2_tile_rows;
+ result.chroma_format = 1;
+ result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8
+ = (pic->picture_parameter.bit_depth - 8);
+
+ result.vp9_frame_size = align(dec->bs_size, 128);
+ result.uncompressed_header_size = pic->picture_parameter.frame_header_length_in_bytes;
+ result.compressed_header_size = pic->picture_parameter.first_partition_size;
+
+ assert(dec->base.max_references + 1 <= 16);
+
+ for (i = 0 ; i < dec->base.max_references + 1 ; ++i) {
+ if (dec->render_pic_list[i] && dec->render_pic_list[i] == target) {
+ result.curr_pic_idx =
+ (uintptr_t)vl_video_buffer_get_associated_data(target, &dec->base);
+ break;
+ } else if (!dec->render_pic_list[i]) {
+ dec->render_pic_list[i] = target;
+ result.curr_pic_idx = dec->ref_idx;
+ vl_video_buffer_set_associated_data(target, &dec->base,
+ (void *)(uintptr_t)dec->ref_idx++,
+ &radeon_dec_destroy_associated_data);
+ break;
+ }
+ }
+
+ for (i = 0 ; i < 8; i++) {
+ result.ref_frame_map[i] = (pic->ref[i]) ?
+ (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base) :
+ 0x7f;
+ }
+
+ result.frame_refs[0] = result.ref_frame_map[pic->picture_parameter.pic_fields.last_ref_frame];
+ result.ref_frame_sign_bias[0] = pic->picture_parameter.pic_fields.last_ref_frame_sign_bias;
+ result.frame_refs[1] = result.ref_frame_map[pic->picture_parameter.pic_fields.golden_ref_frame];
+ result.ref_frame_sign_bias[1] = pic->picture_parameter.pic_fields.golden_ref_frame_sign_bias;
+ result.frame_refs[2] = result.ref_frame_map[pic->picture_parameter.pic_fields.alt_ref_frame];
+ result.ref_frame_sign_bias[2] = pic->picture_parameter.pic_fields.alt_ref_frame_sign_bias;
+
+ if (pic->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) {
+ if (target->buffer_format == PIPE_FORMAT_P016) {
+ result.p010_mode = 1;
+ result.msb_mode = 1;
+ } else {
+ result.p010_mode = 0;
+ result.luma_10to8 = 1;
+ result.chroma_10to8 = 1;
+ }
+ }
+
+ return result;
+}
+
static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec)
{
unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
@@ -554,15 +728,15 @@ static rvcn_dec_message_mpeg4_asp_vld_t get_mpeg4_msg(struct radeon_decoder *dec
result.vop_time_increment_resolution = pic->vop_time_increment_resolution;
- result.short_video_header |= pic->short_video_header << 0;
- result.interlaced |= pic->interlaced << 2;
- result.load_intra_quant_mat |= 1 << 3;
- result.load_nonintra_quant_mat |= 1 << 4;
- result.quarter_sample |= pic->quarter_sample << 5;
- result.complexity_estimation_disable |= 1 << 6;
- result.resync_marker_disable |= pic->resync_marker_disable << 7;
- result.newpred_enable |= 0 << 10; //
- result.reduced_resolution_vop_enable |= 0 << 11;
+ result.short_video_header = pic->short_video_header;
+ result.interlaced = pic->interlaced;
+ result.load_intra_quant_mat = 1;
+ result.load_nonintra_quant_mat = 1;
+ result.quarter_sample = pic->quarter_sample;
+ result.complexity_estimation_disable = 1;
+ result.resync_marker_disable = pic->resync_marker_disable;
+ result.newpred_enable = 0;
+ result.reduced_resolution_vop_enable = 0;
result.quant_type = pic->quant_type;
@@ -603,10 +777,10 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
struct pipe_video_buffer *target,
struct pipe_picture_desc *picture)
{
- struct r600_texture *luma = (struct r600_texture *)
- ((struct vl_video_buffer *)target)->resources[0];
- struct r600_texture *chroma = (struct r600_texture *)
- ((struct vl_video_buffer *)target)->resources[1];
+ struct si_texture *luma = (struct si_texture *)
+ ((struct vl_video_buffer *)target)->resources[0];
+ struct si_texture *chroma = (struct si_texture *)
+ ((struct vl_video_buffer *)target)->resources[1];
rvcn_dec_message_header_t *header;
rvcn_dec_message_index_t *index;
rvcn_dec_message_decode_t *decode;
@@ -640,16 +814,16 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
index->size = sizeof(rvcn_dec_message_avc_t);
index->filled = 0;
- decode->stream_type = dec->stream_type;;
+ decode->stream_type = dec->stream_type;
decode->decode_flags = 0x1;
- decode->width_in_samples = dec->base.width;;
- decode->height_in_samples = dec->base.height;;
+ decode->width_in_samples = dec->base.width;
+ decode->height_in_samples = dec->base.height;
decode->bsd_size = align(dec->bs_size, 128);
decode->dpb_size = dec->dpb.res->buf->size;
decode->dt_size =
- ((struct r600_resource *)((struct vl_video_buffer *)target)->resources[0])->buf->size +
- ((struct r600_resource *)((struct vl_video_buffer *)target)->resources[1])->buf->size;
+ r600_resource(((struct vl_video_buffer *)target)->resources[0])->buf->size +
+ r600_resource(((struct vl_video_buffer *)target)->resources[1])->buf->size;
decode->sct_size = 0;
decode->sc_coeff_size = 0;
@@ -736,6 +910,43 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
index->message_id = RDECODE_MESSAGE_MPEG4_ASP_VLD;
break;
}
+ case PIPE_VIDEO_FORMAT_VP9: {
+ rvcn_dec_message_vp9_t vp9 =
+ get_vp9_msg(dec, target, (struct pipe_vp9_picture_desc*)picture);
+
+ memcpy(codec, (void*)&vp9, sizeof(rvcn_dec_message_vp9_t));
+ index->message_id = RDECODE_MESSAGE_VP9;
+
+ if (dec->ctx.res == NULL) {
+ unsigned ctx_size;
+ uint8_t *ptr;
+
+ /* default probability + probability data */
+ ctx_size = 2304 * 5;
+
+ /* SRE collocated context data */
+ ctx_size += 32 * 2 * 64 * 64;
+
+ /* SMP collocated context data */
+ ctx_size += 9 * 64 * 2 * 64 * 64;
+
+ /* SDB left tile pixel */
+ ctx_size += 8 * 2 * 4096;
+
+ if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)
+ ctx_size += 8 * 2 * 4096;
+
+ if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))
+ RVID_ERR("Can't allocated context buffer.\n");
+ si_vid_clear_buffer(dec->base.context, &dec->ctx);
+
+ /* ctx needs probs table */
+ ptr = dec->ws->buffer_map(dec->ctx.res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+ fill_probs_table(ptr);
+ dec->ws->buffer_unmap(dec->ctx.res->buf);
+ }
+ break;
+ }
default:
assert(0);
return NULL;
@@ -744,7 +955,7 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
if (dec->ctx.res)
decode->hw_ctxt_size = dec->ctx.res->buf->size;
- return luma->resource.buf;
+ return luma->buffer.buf;
}
static void rvcn_dec_message_destroy(struct radeon_decoder *dec)
@@ -791,7 +1002,7 @@ static void send_cmd(struct radeon_decoder *dec, unsigned cmd,
uint64_t addr;
dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
- domain, RADEON_PRIO_UVD);
+ domain, 0);
addr = dec->ws->buffer_get_virtual_address(buf);
addr = addr + off;
@@ -807,14 +1018,20 @@ static bool have_it(struct radeon_decoder *dec)
dec->stream_type == RDECODE_CODEC_H265;
}
+/* do the codec needs an probs buffer? */
+static bool have_probs(struct radeon_decoder *dec)
+{
+ return dec->stream_type == RDECODE_CODEC_VP9;
+}
+
/* map the next available message/feedback/itscaling buffer */
-static void map_msg_fb_it_buf(struct radeon_decoder *dec)
+static void map_msg_fb_it_probs_buf(struct radeon_decoder *dec)
{
struct rvid_buffer* buf;
uint8_t *ptr;
/* grab the current message/feedback buffer */
- buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
+ buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
/* and map it for CPU access */
ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
@@ -825,6 +1042,8 @@ static void map_msg_fb_it_buf(struct radeon_decoder *dec)
dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
if (have_it(dec))
dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
+ else if (have_probs(dec))
+ dec->probs = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
}
/* unmap and send a message command to the VCPU */
@@ -837,13 +1056,14 @@ static void send_msg_buf(struct radeon_decoder *dec)
return;
/* grab the current message buffer */
- buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
+ buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
/* unmap the buffer */
dec->ws->buffer_unmap(buf->res->buf);
dec->msg = NULL;
dec->fb = NULL;
dec->it = NULL;
+ dec->probs = NULL;
if (dec->sessionctx.res)
send_cmd(dec, RDECODE_CMD_SESSION_CONTEXT_BUFFER,
@@ -1019,6 +1239,18 @@ static unsigned calc_dpb_size(struct radeon_decoder *dec)
dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
break;
+ case PIPE_VIDEO_FORMAT_VP9:
+ max_references = MAX2(max_references, 9);
+
+ dpb_size = (4096 * 3000 * 3 / 2) * max_references;
+ if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)
+ dpb_size *= (3 / 2);
+ break;
+
+ case PIPE_VIDEO_FORMAT_JPEG:
+ dpb_size = 0;
+ break;
+
default:
// something is missing here
assert(0);
@@ -1040,7 +1272,7 @@ static void radeon_dec_destroy(struct pipe_video_codec *decoder)
assert(decoder);
- map_msg_fb_it_buf(dec);
+ map_msg_fb_it_probs_buf(dec);
rvcn_dec_message_destroy(dec);
send_msg_buf(dec);
@@ -1049,7 +1281,7 @@ static void radeon_dec_destroy(struct pipe_video_codec *decoder)
dec->ws->cs_destroy(dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
- si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
+ si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]);
si_vid_destroy_buffer(&dec->bs_buffers[i]);
}
@@ -1073,7 +1305,8 @@ static void radeon_dec_begin_frame(struct pipe_video_codec *decoder,
assert(decoder);
frame = ++dec->frame_number;
- vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
+ if (dec->stream_type != RDECODE_CODEC_VP9)
+ vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
&radeon_dec_destroy_associated_data);
dec->bs_size = 0;
@@ -1139,28 +1372,22 @@ static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder,
}
/**
- * end decoding of the current frame
+ * send cmd for vcn dec
*/
-static void radeon_dec_end_frame(struct pipe_video_codec *decoder,
+void send_cmd_dec(struct radeon_decoder *dec,
struct pipe_video_buffer *target,
struct pipe_picture_desc *picture)
{
- struct radeon_decoder *dec = (struct radeon_decoder*)decoder;
struct pb_buffer *dt;
- struct rvid_buffer *msg_fb_it_buf, *bs_buf;
-
- assert(decoder);
+ struct rvid_buffer *msg_fb_it_probs_buf, *bs_buf;
- if (!dec->bs_ptr)
- return;
-
- msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
+ msg_fb_it_probs_buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
bs_buf = &dec->bs_buffers[dec->cur_buffer];
memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size);
dec->ws->buffer_unmap(bs_buf->res->buf);
- map_msg_fb_it_buf(dec);
+ map_msg_fb_it_probs_buf(dec);
dt = rvcn_dec_message_decode(dec, target, picture);
rvcn_dec_message_feedback(dec);
send_msg_buf(dec);
@@ -1174,14 +1401,34 @@ static void radeon_dec_end_frame(struct pipe_video_codec *decoder,
0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
send_cmd(dec, RDECODE_CMD_DECODING_TARGET_BUFFER, dt, 0,
RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
- send_cmd(dec, RDECODE_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf,
+ send_cmd(dec, RDECODE_CMD_FEEDBACK_BUFFER, msg_fb_it_probs_buf->res->buf,
FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
if (have_it(dec))
- send_cmd(dec, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf,
+ send_cmd(dec, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, msg_fb_it_probs_buf->res->buf,
+ FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
+ else if (have_probs(dec))
+ send_cmd(dec, RDECODE_CMD_PROB_TBL_BUFFER, msg_fb_it_probs_buf->res->buf,
FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
set_reg(dec, RDECODE_ENGINE_CNTL, 1);
+}
+
+/**
+ * end decoding of the current frame
+ */
+static void radeon_dec_end_frame(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture)
+{
+ struct radeon_decoder *dec = (struct radeon_decoder*)decoder;
+
+ assert(decoder);
+
+ if (!dec->bs_ptr)
+ return;
+
+ dec->send_cmd(dec, target, picture);
- flush(dec, RADEON_FLUSH_ASYNC);
+ flush(dec, PIPE_FLUSH_ASYNC);
next_buffer(dec);
}
@@ -1198,10 +1445,10 @@ static void radeon_dec_flush(struct pipe_video_codec *decoder)
struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
const struct pipe_video_codec *templ)
{
- struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
- struct r600_common_context *rctx = (struct r600_common_context*)context;
+ struct si_context *sctx = (struct si_context*)context;
+ struct radeon_winsys *ws = sctx->ws;
unsigned width = templ->width, height = templ->height;
- unsigned dpb_size, bs_buf_size, stream_type = 0;
+ unsigned dpb_size, bs_buf_size, stream_type = 0, ring = RING_VCN_DEC;
struct radeon_decoder *dec;
int r, i;
@@ -1227,6 +1474,13 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
case PIPE_VIDEO_FORMAT_HEVC:
stream_type = RDECODE_CODEC_H265;
break;
+ case PIPE_VIDEO_FORMAT_VP9:
+ stream_type = RDECODE_CODEC_VP9;
+ break;
+ case PIPE_VIDEO_FORMAT_JPEG:
+ stream_type = RDECODE_CODEC_JPEG;
+ ring = RING_VCN_JPEG;
+ break;
default:
assert(0);
break;
@@ -1253,7 +1507,7 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
dec->stream_handle = si_vid_alloc_stream_handle();
dec->screen = context->screen;
dec->ws = ws;
- dec->cs = ws->cs_create(rctx->ctx, RING_VCN_DEC, NULL, NULL);
+ dec->cs = ws->cs_create(sctx->ctx, ring, NULL, NULL);
if (!dec->cs) {
RVID_ERR("Can't get command submission context.\n");
goto error;
@@ -1263,12 +1517,14 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
dec->render_pic_list[i] = NULL;
bs_buf_size = width * height * (512 / (16 * 16));
for (i = 0; i < NUM_BUFFERS; ++i) {
- unsigned msg_fb_it_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
+ unsigned msg_fb_it_probs_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
if (have_it(dec))
- msg_fb_it_size += IT_SCALING_TABLE_SIZE;
+ msg_fb_it_probs_size += IT_SCALING_TABLE_SIZE;
+ else if (have_probs(dec))
+ msg_fb_it_probs_size += VP9_PROBS_TABLE_SIZE;
/* use vram to improve performance, workaround an unknown bug */
- if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
- msg_fb_it_size, PIPE_USAGE_DEFAULT)) {
+ if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_probs_buffers[i],
+ msg_fb_it_probs_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated message buffers.\n");
goto error;
}
@@ -1279,18 +1535,29 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
goto error;
}
- si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
+ si_vid_clear_buffer(context, &dec->msg_fb_it_probs_buffers[i]);
si_vid_clear_buffer(context, &dec->bs_buffers[i]);
- }
- dpb_size = calc_dpb_size(dec);
+ if (have_probs(dec)) {
+ struct rvid_buffer* buf;
+ void *ptr;
- if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
- RVID_ERR("Can't allocated dpb.\n");
- goto error;
+ buf = &dec->msg_fb_it_probs_buffers[i];
+ ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+ ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
+ fill_probs_table(ptr);
+ dec->ws->buffer_unmap(buf->res->buf);
+ }
}
- si_vid_clear_buffer(context, &dec->dpb);
+ dpb_size = calc_dpb_size(dec);
+ if (dpb_size) {
+ if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
+ RVID_ERR("Can't allocated dpb.\n");
+ goto error;
+ }
+ si_vid_clear_buffer(context, &dec->dpb);
+ }
if (dec->stream_type == RDECODE_CODEC_H264_PERF) {
unsigned ctx_size = calc_ctx_size_h264_perf(dec);
@@ -1309,7 +1576,7 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
}
si_vid_clear_buffer(context, &dec->sessionctx);
- map_msg_fb_it_buf(dec);
+ map_msg_fb_it_probs_buf(dec);
rvcn_dec_message_create(dec);
send_msg_buf(dec);
r = flush(dec, 0);
@@ -1318,13 +1585,18 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
next_buffer(dec);
+ if (stream_type == RDECODE_CODEC_JPEG)
+ dec->send_cmd = send_cmd_jpeg;
+ else
+ dec->send_cmd = send_cmd_dec;
+
return &dec->base;
error:
if (dec->cs) dec->ws->cs_destroy(dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
- si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
+ si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]);
si_vid_destroy_buffer(&dec->bs_buffers[i]);
}
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.h b/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.h
index accffef6d..a6a726f46 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.h
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.h
@@ -43,10 +43,20 @@
#define RDECODE_PKT2() (RDECODE_PKT_TYPE_S(2))
+#define RDECODE_PKT_REG_J(x) ((unsigned)(x) & 0x3FFFF)
+#define RDECODE_PKT_RES_J(x) (((unsigned)(x) & 0x3F) << 18)
+#define RDECODE_PKT_COND_J(x) (((unsigned)(x) & 0xF) << 24)
+#define RDECODE_PKT_TYPE_J(x) (((unsigned)(x) & 0xF) << 28)
+#define RDECODE_PKTJ(reg, cond, type) (RDECODE_PKT_REG_J(reg) | \
+ RDECODE_PKT_RES_J(0) | \
+ RDECODE_PKT_COND_J(cond) | \
+ RDECODE_PKT_TYPE_J(type))
+
#define RDECODE_CMD_MSG_BUFFER 0x00000000
#define RDECODE_CMD_DPB_BUFFER 0x00000001
#define RDECODE_CMD_DECODING_TARGET_BUFFER 0x00000002
#define RDECODE_CMD_FEEDBACK_BUFFER 0x00000003
+#define RDECODE_CMD_PROB_TBL_BUFFER 0x00000004
#define RDECODE_CMD_SESSION_CONTEXT_BUFFER 0x00000005
#define RDECODE_CMD_BITSTREAM_BUFFER 0x00000100
#define RDECODE_CMD_IT_SCALING_TABLE_BUFFER 0x00000204
@@ -61,7 +71,9 @@
#define RDECODE_CODEC_MPEG2_VLD 0x00000003
#define RDECODE_CODEC_MPEG4 0x00000004
#define RDECODE_CODEC_H264_PERF 0x00000007
+#define RDECODE_CODEC_JPEG 0x00000008
#define RDECODE_CODEC_H265 0x00000010
+#define RDECODE_CODEC_VP9 0x00000011
#define RDECODE_ARRAY_MODE_LINEAR 0x00000000
#define RDECODE_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001
@@ -100,11 +112,118 @@
#define RDECODE_MESSAGE_MPEG2_VLD 0x0000000A
#define RDECODE_MESSAGE_MPEG4_ASP_VLD 0x0000000B
#define RDECODE_MESSAGE_HEVC 0x0000000D
+#define RDECODE_MESSAGE_VP9 0x0000000E
#define RDECODE_FEEDBACK_PROFILING 0x00000001
#define RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT 7
+#define NUM_BUFFERS 4
+
+#define RDECODE_VP9_PROBS_DATA_SIZE 2304
+
+#define mmUVD_JPEG_CNTL 0x0200
+#define mmUVD_JPEG_CNTL_BASE_IDX 1
+#define mmUVD_JPEG_RB_BASE 0x0201
+#define mmUVD_JPEG_RB_BASE_BASE_IDX 1
+#define mmUVD_JPEG_RB_WPTR 0x0202
+#define mmUVD_JPEG_RB_WPTR_BASE_IDX 1
+#define mmUVD_JPEG_RB_RPTR 0x0203
+#define mmUVD_JPEG_RB_RPTR_BASE_IDX 1
+#define mmUVD_JPEG_RB_SIZE 0x0204
+#define mmUVD_JPEG_RB_SIZE_BASE_IDX 1
+#define mmUVD_JPEG_TIER_CNTL2 0x021a
+#define mmUVD_JPEG_TIER_CNTL2_BASE_IDX 1
+#define mmUVD_JPEG_UV_TILING_CTRL 0x021c
+#define mmUVD_JPEG_UV_TILING_CTRL_BASE_IDX 1
+#define mmUVD_JPEG_TILING_CTRL 0x021e
+#define mmUVD_JPEG_TILING_CTRL_BASE_IDX 1
+#define mmUVD_JPEG_OUTBUF_RPTR 0x0220
+#define mmUVD_JPEG_OUTBUF_RPTR_BASE_IDX 1
+#define mmUVD_JPEG_OUTBUF_WPTR 0x0221
+#define mmUVD_JPEG_OUTBUF_WPTR_BASE_IDX 1
+#define mmUVD_JPEG_PITCH 0x0222
+#define mmUVD_JPEG_PITCH_BASE_IDX 1
+#define mmUVD_JPEG_INT_EN 0x0229
+#define mmUVD_JPEG_INT_EN_BASE_IDX 1
+#define mmUVD_JPEG_UV_PITCH 0x022b
+#define mmUVD_JPEG_UV_PITCH_BASE_IDX 1
+#define mmUVD_JPEG_INDEX 0x023e
+#define mmUVD_JPEG_INDEX_BASE_IDX 1
+#define mmUVD_JPEG_DATA 0x023f
+#define mmUVD_JPEG_DATA_BASE_IDX 1
+#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x0438
+#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH_BASE_IDX 1
+#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x0439
+#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW_BASE_IDX 1
+#define mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH 0x045a
+#define mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH_BASE_IDX 1
+#define mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW 0x045b
+#define mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW_BASE_IDX 1
+#define mmUVD_CTX_INDEX 0x0528
+#define mmUVD_CTX_INDEX_BASE_IDX 1
+#define mmUVD_CTX_DATA 0x0529
+#define mmUVD_CTX_DATA_BASE_IDX 1
+#define mmUVD_SOFT_RESET 0x05a0
+#define mmUVD_SOFT_RESET_BASE_IDX 1
+
+#define UVD_BASE_INST0_SEG0 0x00007800
+#define UVD_BASE_INST0_SEG1 0x00007E00
+#define UVD_BASE_INST0_SEG2 0
+#define UVD_BASE_INST0_SEG3 0
+#define UVD_BASE_INST0_SEG4 0
+
+#define SOC15_REG_ADDR(reg) (UVD_BASE_INST0_SEG1 + reg)
+
+#define COND0 0
+#define COND1 1
+#define COND2 2
+#define COND3 3
+#define COND4 4
+#define COND5 5
+#define COND6 6
+#define COND7 7
+
+#define TYPE0 0
+#define TYPE1 1
+#define TYPE2 2
+#define TYPE3 3
+#define TYPE4 4
+#define TYPE5 5
+#define TYPE6 6
+#define TYPE7 7
+
+/* VP9 Frame header flags */
+#define RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT (13)
+#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT (12)
+#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT (11)
+#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_SHIFT (10)
+#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT (9)
+#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT (8)
+#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT (7)
+#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT (6)
+#define RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT (5)
+#define RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT (4)
+#define RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT (3)
+#define RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT (2)
+#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT (1)
+#define RDECODE_FRAME_HDR_INFO_VP9_SHOW_EXISTING_FRAME_SHIFT (0)
+
+#define RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK (0x00002000)
+#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK (0x00001000)
+#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK (0x00000800)
+#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_MASK (0x00000400)
+#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK (0x00000200)
+#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK (0x00000100)
+#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK (0x00000080)
+#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK (0x00000040)
+#define RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK (0x00000020)
+#define RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK (0x00000010)
+#define RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK (0x00000008)
+#define RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK (0x00000004)
+#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK (0x00000002)
+#define RDECODE_FRAME_HDR_INFO_VP9_SHOW_EXISTING_FRAME_MASK (0x00000001)
+
typedef struct rvcn_dec_message_index_s {
unsigned int message_id;
unsigned int offset;
@@ -443,6 +562,47 @@ typedef struct rvcn_dec_message_hevc_s {
unsigned char direct_reflist[2][15];
} rvcn_dec_message_hevc_t;
+typedef struct rvcn_dec_message_vp9_s {
+ unsigned int frame_header_flags;
+
+ unsigned char frame_context_idx;
+ unsigned char reset_frame_context;
+
+ unsigned char curr_pic_idx;
+ unsigned char interp_filter;
+
+ unsigned char filter_level;
+ unsigned char sharpness_level;
+ unsigned char lf_adj_level[8][4][2];
+ unsigned char base_qindex;
+ signed char y_dc_delta_q;
+ signed char uv_ac_delta_q;
+ signed char uv_dc_delta_q;
+
+ unsigned char log2_tile_cols;
+ unsigned char log2_tile_rows;
+ unsigned char tx_mode;
+ unsigned char reference_mode;
+ unsigned char chroma_format;
+
+ unsigned char ref_frame_map[8];
+
+ unsigned char frame_refs[3];
+ unsigned char ref_frame_sign_bias[3];
+ unsigned char frame_to_show;
+ unsigned char bit_depth_luma_minus8;
+ unsigned char bit_depth_chroma_minus8;
+
+ unsigned char p010_mode;
+ unsigned char msb_mode;
+ unsigned char luma_10to8;
+ unsigned char chroma_10to8;
+
+ unsigned int vp9_frame_size;
+ unsigned int compressed_header_size;
+ unsigned int uncompressed_header_size;
+} rvcn_dec_message_vp9_t;
+
typedef struct rvcn_dec_feature_index_s {
unsigned int feature_id;
unsigned int offset;
@@ -500,6 +660,118 @@ typedef struct rvcn_dec_feedback_profiling_s {
unsigned int dmaHwCrc32Value2;
} rvcn_dec_feedback_profiling_t;
+typedef struct rvcn_dec_vp9_nmv_ctx_mask_s {
+ unsigned short classes_mask[2];
+ unsigned short bits_mask[2];
+ unsigned char joints_mask;
+ unsigned char sign_mask[2];
+ unsigned char class0_mask[2];
+ unsigned char class0_fp_mask[2];
+ unsigned char fp_mask[2];
+ unsigned char class0_hp_mask[2];
+ unsigned char hp_mask[2];
+ unsigned char reserve[11];
+} rvcn_dec_vp9_nmv_ctx_mask_t;
+
+typedef struct rvcn_dec_vp9_nmv_component_s{
+ unsigned char sign;
+ unsigned char classes[10];
+ unsigned char class0[1];
+ unsigned char bits[10];
+ unsigned char class0_fp[2][3];
+ unsigned char fp[3];
+ unsigned char class0_hp;
+ unsigned char hp;
+} rvcn_dec_vp9_nmv_component_t;
+
+typedef struct rvcn_dec_vp9_probs_s {
+ rvcn_dec_vp9_nmv_ctx_mask_t nmvc_mask;
+ unsigned char coef_probs[4][2][2][6][6][3];
+ unsigned char y_mode_prob[4][9];
+ unsigned char uv_mode_prob[10][9];
+ unsigned char single_ref_prob[5][2];
+ unsigned char switchable_interp_prob[4][2];
+ unsigned char partition_prob[16][3];
+ unsigned char inter_mode_probs[7][3];
+ unsigned char mbskip_probs[3];
+ unsigned char intra_inter_prob[4];
+ unsigned char comp_inter_prob[5];
+ unsigned char comp_ref_prob[5];
+ unsigned char tx_probs_32x32[2][3];
+ unsigned char tx_probs_16x16[2][2];
+ unsigned char tx_probs_8x8[2][1];
+ unsigned char mv_joints[3];
+ rvcn_dec_vp9_nmv_component_t mv_comps[2];
+} rvcn_dec_vp9_probs_t;
+
+typedef struct rvcn_dec_vp9_probs_segment_s {
+ union {
+ rvcn_dec_vp9_probs_t probs;
+ unsigned char probs_data[RDECODE_VP9_PROBS_DATA_SIZE];
+ };
+
+ union {
+ struct {
+ unsigned int feature_data[8];
+ unsigned char tree_probs[7];
+ unsigned char pred_probs[3];
+ unsigned char abs_delta;
+ unsigned char feature_mask[8];
+ } seg;
+ unsigned char segment_data[256];
+ };
+} rvcn_dec_vp9_probs_segment_t;
+
+struct jpeg_params {
+ unsigned bsd_size;
+ unsigned dt_pitch;
+ unsigned dt_uv_pitch;
+ unsigned dt_luma_top_offset;
+ unsigned dt_chroma_top_offset;
+};
+
+struct radeon_decoder {
+ struct pipe_video_codec base;
+
+ unsigned stream_handle;
+ unsigned stream_type;
+ unsigned frame_number;
+
+ struct pipe_screen *screen;
+ struct radeon_winsys *ws;
+ struct radeon_cmdbuf *cs;
+
+ void *msg;
+ uint32_t *fb;
+ uint8_t *it;
+ uint8_t *probs;
+ void *bs_ptr;
+
+ struct rvid_buffer msg_fb_it_probs_buffers[NUM_BUFFERS];
+ struct rvid_buffer bs_buffers[NUM_BUFFERS];
+ struct rvid_buffer dpb;
+ struct rvid_buffer ctx;
+ struct rvid_buffer sessionctx;
+
+ unsigned bs_size;
+ unsigned cur_buffer;
+ void *render_pic_list[16];
+ bool show_frame;
+ unsigned ref_idx;
+ struct jpeg_params jpg;
+ void (*send_cmd)(struct radeon_decoder *dec,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture);
+};
+
+void send_cmd_dec(struct radeon_decoder *dec,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture);
+
+void send_cmd_jpeg(struct radeon_decoder *dec,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture);
+
struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
const struct pipe_video_codec *templat);
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_video.c b/lib/mesa/src/gallium/drivers/radeon/radeon_video.c
index 2a7ad187b..a39ce4cc7 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_video.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_video.c
@@ -25,12 +25,6 @@
*
**************************************************************************/
-/*
- * Authors:
- * Christian König <christian.koenig@amd.com>
- *
- */
-
#include <unistd.h>
#include "util/u_memory.h"
@@ -39,12 +33,10 @@
#include "vl/vl_defines.h"
#include "vl/vl_video_buffer.h"
-#include "r600_pipe_common.h"
+#include "radeonsi/si_pipe.h"
#include "radeon_video.h"
#include "radeon_vce.h"
-#define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
-
/* generate an stream handle */
unsigned si_vid_alloc_stream_handle()
{
@@ -71,9 +63,8 @@ bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer
* able to move buffers around individually, so request a
* non-sub-allocated buffer.
*/
- buffer->res = (struct r600_resource *)
- pipe_buffer_create(screen, PIPE_BIND_SHARED,
- usage, size);
+ buffer->res = r600_resource(pipe_buffer_create(screen, PIPE_BIND_SHARED,
+ usage, size));
return buffer->res != NULL;
}
@@ -85,11 +76,11 @@ void si_vid_destroy_buffer(struct rvid_buffer *buffer)
}
/* reallocate a buffer, preserving its content */
-bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
+bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs,
struct rvid_buffer *new_buf, unsigned new_size)
{
- struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
- struct radeon_winsys* ws = rscreen->ws;
+ struct si_screen *sscreen = (struct si_screen *)screen;
+ struct radeon_winsys* ws = sscreen->ws;
unsigned bytes = MIN2(new_buf->res->buf->size, new_size);
struct rvid_buffer old_buf = *new_buf;
void *src = NULL, *dst = NULL;
@@ -127,10 +118,9 @@ error:
/* clear the buffer with zeros */
void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
{
- struct r600_common_context *rctx = (struct r600_common_context*)context;
+ struct si_context *sctx = (struct si_context*)context;
- rctx->dma_clear_buffer(context, &buffer->res->b.b, 0,
- buffer->res->buf->size, 0);
+ si_sdma_clear_buffer(sctx, &buffer->res->b.b, 0, buffer->res->buf->size, 0);
context->flush(context, NULL, 0);
}
@@ -138,25 +128,23 @@ void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffe
* join surfaces into the same buffer with identical tiling params
* sumup their sizes and replace the backend buffers with a single bo
*/
-void si_vid_join_surfaces(struct r600_common_context *rctx,
+void si_vid_join_surfaces(struct si_context *sctx,
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
{
- struct radeon_winsys* ws;
+ struct radeon_winsys *ws = sctx->ws;;
unsigned best_tiling, best_wh, off;
unsigned size, alignment;
struct pb_buffer *pb;
unsigned i, j;
- ws = rctx->ws;
-
for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) {
unsigned wh;
if (!surfaces[i])
continue;
- if (rctx->chip_class < GFX9) {
+ if (sctx->chip_class < GFX9) {
/* choose the smallest bank w/h for now */
wh = surfaces[i]->u.legacy.bankw * surfaces[i]->u.legacy.bankh;
if (wh < best_wh) {
@@ -173,7 +161,7 @@ void si_vid_join_surfaces(struct r600_common_context *rctx,
/* adjust the texture layer offsets */
off = align(off, surfaces[i]->surf_alignment);
- if (rctx->chip_class < GFX9) {
+ if (sctx->chip_class < GFX9) {
/* copy the tiling parameters */
surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw;
surfaces[i]->u.legacy.bankh = surfaces[best_tiling]->u.legacy.bankh;
@@ -220,149 +208,3 @@ void si_vid_join_surfaces(struct r600_common_context *rctx,
pb_reference(&pb, NULL);
}
-
-int si_vid_get_video_param(struct pipe_screen *screen,
- enum pipe_video_profile profile,
- enum pipe_video_entrypoint entrypoint,
- enum pipe_video_cap param)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
- enum pipe_video_format codec = u_reduce_video_profile(profile);
- struct radeon_info info;
-
- rscreen->ws->query_info(rscreen->ws, &info);
-
- if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
- switch (param) {
- case PIPE_VIDEO_CAP_SUPPORTED:
- return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
- si_vce_is_fw_version_supported(rscreen);
- case PIPE_VIDEO_CAP_NPOT_TEXTURES:
- return 1;
- case PIPE_VIDEO_CAP_MAX_WIDTH:
- return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
- case PIPE_VIDEO_CAP_MAX_HEIGHT:
- return (rscreen->family < CHIP_TONGA) ? 1152 : 2304;
- case PIPE_VIDEO_CAP_PREFERED_FORMAT:
- return PIPE_FORMAT_NV12;
- case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
- return false;
- case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
- return false;
- case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
- return true;
- case PIPE_VIDEO_CAP_STACKED_FRAMES:
- return (rscreen->family < CHIP_TONGA) ? 1 : 2;
- default:
- return 0;
- }
- }
-
- switch (param) {
- case PIPE_VIDEO_CAP_SUPPORTED:
- switch (codec) {
- case PIPE_VIDEO_FORMAT_MPEG12:
- return profile != PIPE_VIDEO_PROFILE_MPEG1;
- case PIPE_VIDEO_FORMAT_MPEG4:
- return 1;
- case PIPE_VIDEO_FORMAT_MPEG4_AVC:
- if ((rscreen->family == CHIP_POLARIS10 ||
- rscreen->family == CHIP_POLARIS11) &&
- info.uvd_fw_version < UVD_FW_1_66_16 ) {
- RVID_ERR("POLARIS10/11 firmware version need to be updated.\n");
- return false;
- }
- return true;
- case PIPE_VIDEO_FORMAT_VC1:
- return true;
- case PIPE_VIDEO_FORMAT_HEVC:
- /* Carrizo only supports HEVC Main */
- if (rscreen->family >= CHIP_STONEY)
- return (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN ||
- profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10);
- else if (rscreen->family >= CHIP_CARRIZO)
- return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
- return false;
- case PIPE_VIDEO_FORMAT_JPEG:
- if (rscreen->family < CHIP_CARRIZO || rscreen->family >= CHIP_VEGA10)
- return false;
- if (!(rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 19)) {
- RVID_ERR("No MJPEG support for the kernel version\n");
- return false;
- }
- return true;
- default:
- return false;
- }
- case PIPE_VIDEO_CAP_NPOT_TEXTURES:
- return 1;
- case PIPE_VIDEO_CAP_MAX_WIDTH:
- return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
- case PIPE_VIDEO_CAP_MAX_HEIGHT:
- return (rscreen->family < CHIP_TONGA) ? 1152 : 4096;
- case PIPE_VIDEO_CAP_PREFERED_FORMAT:
- if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
- return PIPE_FORMAT_P016;
- else
- return PIPE_FORMAT_NV12;
-
- case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
- case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: {
- enum pipe_video_format format = u_reduce_video_profile(profile);
-
- if (format == PIPE_VIDEO_FORMAT_HEVC)
- return false; //The firmware doesn't support interlaced HEVC.
- else if (format == PIPE_VIDEO_FORMAT_JPEG)
- return false;
- return true;
- }
- case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
- return true;
- case PIPE_VIDEO_CAP_MAX_LEVEL:
- switch (profile) {
- case PIPE_VIDEO_PROFILE_MPEG1:
- return 0;
- case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
- case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
- return 3;
- case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE:
- return 3;
- case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE:
- return 5;
- case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
- return 1;
- case PIPE_VIDEO_PROFILE_VC1_MAIN:
- return 2;
- case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
- return 4;
- case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
- case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
- case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
- return (rscreen->family < CHIP_TONGA) ? 41 : 52;
- case PIPE_VIDEO_PROFILE_HEVC_MAIN:
- case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
- return 186;
- default:
- return 0;
- }
- default:
- return 0;
- }
-}
-
-boolean si_vid_is_format_supported(struct pipe_screen *screen,
- enum pipe_format format,
- enum pipe_video_profile profile,
- enum pipe_video_entrypoint entrypoint)
-{
- /* HEVC 10 bit decoding should use P016 instead of NV12 if possible */
- if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
- return (format == PIPE_FORMAT_NV12) ||
- (format == PIPE_FORMAT_P016);
-
- /* we can only handle this one with UVD */
- if (profile != PIPE_VIDEO_PROFILE_UNKNOWN)
- return format == PIPE_FORMAT_NV12;
-
- return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint);
-}
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_video.h b/lib/mesa/src/gallium/drivers/radeon/radeon_video.h
index 7e70be98b..71904b313 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_video.h
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_video.h
@@ -25,12 +25,6 @@
*
**************************************************************************/
-/*
- * Authors:
- * Christian König <christian.koenig@amd.com>
- *
- */
-
#ifndef RADEON_VIDEO_H
#define RADEON_VIDEO_H
@@ -40,6 +34,8 @@
#define RVID_ERR(fmt, args...) \
fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args)
+#define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
+
/* video buffer representation */
struct rvid_buffer
{
@@ -58,7 +54,7 @@ bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer
void si_vid_destroy_buffer(struct rvid_buffer *buffer);
/* reallocate a buffer, preserving its content */
-bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
+bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs,
struct rvid_buffer *new_buf, unsigned new_size);
/* clear the buffer with zeros */
@@ -66,20 +62,8 @@ void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffe
/* join surfaces into the same buffer with identical tiling params
sumup their sizes and replace the backend buffers with a single bo */
-void si_vid_join_surfaces(struct r600_common_context *rctx,
+void si_vid_join_surfaces(struct si_context *sctx,
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
-/* returns supported codecs and other parameters */
-int si_vid_get_video_param(struct pipe_screen *screen,
- enum pipe_video_profile profile,
- enum pipe_video_entrypoint entrypoint,
- enum pipe_video_cap param);
-
-/* the hardware only supports NV12 */
-boolean si_vid_is_format_supported(struct pipe_screen *screen,
- enum pipe_format format,
- enum pipe_video_profile profile,
- enum pipe_video_entrypoint entrypoint);
-
#endif // RADEON_VIDEO_H
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h b/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h
index 206c299ac..c6800808c 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h
@@ -1,6 +1,8 @@
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -26,14 +28,18 @@
/* The public winsys interface header for the radeon driver. */
+/* Whether the next IB can start immediately and not wait for draws and
+ * dispatches from the current IB to finish. */
+#define RADEON_FLUSH_START_NEXT_GFX_IB_NOW (1u << 31)
+
+#define RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW \
+ (PIPE_FLUSH_ASYNC | RADEON_FLUSH_START_NEXT_GFX_IB_NOW)
+
#include "pipebuffer/pb_buffer.h"
#include "amd/common/ac_gpu_info.h"
#include "amd/common/ac_surface.h"
-#define RADEON_FLUSH_ASYNC (1 << 0)
-#define RADEON_FLUSH_END_OF_FRAME (1 << 1)
-
/* Tiling flags. */
enum radeon_bo_layout {
RADEON_LAYOUT_LINEAR = 0,
@@ -55,6 +61,8 @@ enum radeon_bo_flag { /* bitfield */
RADEON_FLAG_NO_SUBALLOC = (1 << 2),
RADEON_FLAG_SPARSE = (1 << 3),
RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
+ RADEON_FLAG_READ_ONLY = (1 << 5),
+ RADEON_FLAG_32BIT = (1 << 6),
};
enum radeon_bo_usage { /* bitfield */
@@ -78,6 +86,8 @@ enum ring_type {
RING_VCE,
RING_UVD_ENC,
RING_VCN_DEC,
+ RING_VCN_ENC,
+ RING_VCN_JPEG,
RING_LAST,
};
@@ -106,71 +116,65 @@ enum radeon_value_id {
RADEON_CS_THREAD_TIME,
};
-/* Each group of four has the same priority. */
enum radeon_bo_priority {
+ /* Each group of two has the same priority. */
RADEON_PRIO_FENCE = 0,
RADEON_PRIO_TRACE,
- RADEON_PRIO_SO_FILLED_SIZE,
+
+ RADEON_PRIO_SO_FILLED_SIZE = 2,
RADEON_PRIO_QUERY,
RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */
RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */
- RADEON_PRIO_DRAW_INDIRECT,
- RADEON_PRIO_INDEX_BUFFER,
- RADEON_PRIO_VCE = 8,
- RADEON_PRIO_UVD,
- RADEON_PRIO_SDMA_BUFFER,
- RADEON_PRIO_SDMA_TEXTURE,
+ RADEON_PRIO_DRAW_INDIRECT = 6,
+ RADEON_PRIO_INDEX_BUFFER,
- RADEON_PRIO_CP_DMA = 12,
+ RADEON_PRIO_CP_DMA = 8,
+ RADEON_PRIO_BORDER_COLORS,
- RADEON_PRIO_CONST_BUFFER = 16,
+ RADEON_PRIO_CONST_BUFFER = 10,
RADEON_PRIO_DESCRIPTORS,
- RADEON_PRIO_BORDER_COLORS,
- RADEON_PRIO_SAMPLER_BUFFER = 20,
+ RADEON_PRIO_SAMPLER_BUFFER = 12,
RADEON_PRIO_VERTEX_BUFFER,
- RADEON_PRIO_SHADER_RW_BUFFER = 24,
+ RADEON_PRIO_SHADER_RW_BUFFER = 14,
RADEON_PRIO_COMPUTE_GLOBAL,
- RADEON_PRIO_SAMPLER_TEXTURE = 28,
+ RADEON_PRIO_SAMPLER_TEXTURE = 16,
RADEON_PRIO_SHADER_RW_IMAGE,
- RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 32,
+ RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 18,
+ RADEON_PRIO_COLOR_BUFFER,
- RADEON_PRIO_COLOR_BUFFER = 36,
+ RADEON_PRIO_DEPTH_BUFFER = 20,
- RADEON_PRIO_DEPTH_BUFFER = 40,
+ RADEON_PRIO_COLOR_BUFFER_MSAA = 22,
- RADEON_PRIO_COLOR_BUFFER_MSAA = 44,
+ RADEON_PRIO_DEPTH_BUFFER_MSAA = 24,
- RADEON_PRIO_DEPTH_BUFFER_MSAA = 48,
-
- RADEON_PRIO_CMASK = 52,
- RADEON_PRIO_DCC,
- RADEON_PRIO_HTILE,
+ RADEON_PRIO_SEPARATE_META = 26,
RADEON_PRIO_SHADER_BINARY, /* the hw can't hide instruction cache misses */
- RADEON_PRIO_SHADER_RINGS = 56,
+ RADEON_PRIO_SHADER_RINGS = 28,
- RADEON_PRIO_SCRATCH_BUFFER = 60,
- /* 63 is the maximum value */
+ RADEON_PRIO_SCRATCH_BUFFER = 30,
+ /* 31 is the maximum value */
};
struct winsys_handle;
struct radeon_winsys_ctx;
-struct radeon_winsys_cs_chunk {
+struct radeon_cmdbuf_chunk {
unsigned cdw; /* Number of used dwords. */
unsigned max_dw; /* Maximum number of dwords. */
uint32_t *buf; /* The base pointer of the chunk. */
};
-struct radeon_winsys_cs {
- struct radeon_winsys_cs_chunk current;
- struct radeon_winsys_cs_chunk *prev;
+struct radeon_cmdbuf {
+ struct radeon_cmdbuf_chunk current;
+ struct radeon_cmdbuf_chunk *prev;
unsigned num_prev; /* Number of previous chunks. */
unsigned max_prev; /* Space in array pointed to by prev. */
unsigned prev_dw; /* Total number of dwords in previous chunks. */
@@ -221,7 +225,7 @@ enum radeon_feature_id {
struct radeon_bo_list_item {
uint64_t bo_size;
uint64_t vm_address;
- uint64_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
+ uint32_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
};
struct radeon_winsys {
@@ -254,6 +258,14 @@ struct radeon_winsys {
void (*query_info)(struct radeon_winsys *ws,
struct radeon_info *info);
+ /**
+ * A hint for the winsys that it should pin its execution threads to
+ * a group of cores sharing a specific L3 cache if the CPU has multiple
+ * L3 caches. This is needed for good multithreading performance on
+ * AMD Zen CPUs.
+ */
+ void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cache);
+
/**************************************************************************
* Buffer management. Buffer attributes are mostly fixed over its lifetime.
*
@@ -288,7 +300,7 @@ struct radeon_winsys {
* \return The pointer at the beginning of the buffer.
*/
void *(*buffer_map)(struct pb_buffer *buf,
- struct radeon_winsys_cs *cs,
+ struct radeon_cmdbuf *cs,
enum pipe_transfer_usage usage);
/**
@@ -451,7 +463,7 @@ struct radeon_winsys {
* \param flush Flush callback function associated with the command stream.
* \param user User pointer that will be passed to the flush callback.
*/
- struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys_ctx *ctx,
+ struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys_ctx *ctx,
enum ring_type ring_type,
void (*flush)(void *ctx, unsigned flags,
struct pipe_fence_handle **fence),
@@ -462,7 +474,7 @@ struct radeon_winsys {
*
* \param cs A command stream to destroy.
*/
- void (*cs_destroy)(struct radeon_winsys_cs *cs);
+ void (*cs_destroy)(struct radeon_cmdbuf *cs);
/**
* Add a buffer. Each buffer used by a CS must be added using this function.
@@ -475,7 +487,7 @@ struct radeon_winsys {
* placed in the requested domain. 15 is the maximum.
* \return Buffer index.
*/
- unsigned (*cs_add_buffer)(struct radeon_winsys_cs *cs,
+ unsigned (*cs_add_buffer)(struct radeon_cmdbuf *cs,
struct pb_buffer *buf,
enum radeon_bo_usage usage,
enum radeon_bo_domain domain,
@@ -491,7 +503,7 @@ struct radeon_winsys {
* \param buf Buffer
* \return The buffer index, or -1 if the buffer has not been added.
*/
- int (*cs_lookup_buffer)(struct radeon_winsys_cs *cs,
+ int (*cs_lookup_buffer)(struct radeon_cmdbuf *cs,
struct pb_buffer *buf);
/**
@@ -502,7 +514,7 @@ struct radeon_winsys {
*
* \param cs A command stream to validate.
*/
- bool (*cs_validate)(struct radeon_winsys_cs *cs);
+ bool (*cs_validate)(struct radeon_cmdbuf *cs);
/**
* Check whether the given number of dwords is available in the IB.
@@ -511,7 +523,7 @@ struct radeon_winsys {
* \param cs A command stream.
* \param dw Number of CS dwords requested by the caller.
*/
- bool (*cs_check_space)(struct radeon_winsys_cs *cs, unsigned dw);
+ bool (*cs_check_space)(struct radeon_cmdbuf *cs, unsigned dw);
/**
* Return the buffer list.
@@ -523,29 +535,31 @@ struct radeon_winsys {
* \param list Returned buffer list. Set to NULL to query the count only.
* \return The buffer count.
*/
- unsigned (*cs_get_buffer_list)(struct radeon_winsys_cs *cs,
+ unsigned (*cs_get_buffer_list)(struct radeon_cmdbuf *cs,
struct radeon_bo_list_item *list);
/**
* Flush a command stream.
*
* \param cs A command stream to flush.
- * \param flags, RADEON_FLUSH_ASYNC or 0.
+ * \param flags, PIPE_FLUSH_* flags.
* \param fence Pointer to a fence. If non-NULL, a fence is inserted
* after the CS and is returned through this parameter.
* \return Negative POSIX error code or 0 for success.
* Asynchronous submissions never return an error.
*/
- int (*cs_flush)(struct radeon_winsys_cs *cs,
+ int (*cs_flush)(struct radeon_cmdbuf *cs,
unsigned flags,
struct pipe_fence_handle **fence);
/**
* Create a fence before the CS is flushed.
* The user must flush manually to complete the initializaton of the fence.
- * The fence must not be used before the flush.
+ *
+ * The fence must not be used for anything except \ref cs_add_fence_dependency
+ * before the flush.
*/
- struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_winsys_cs *cs);
+ struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_cmdbuf *cs);
/**
* Return true if a buffer is referenced by a command stream.
@@ -553,7 +567,7 @@ struct radeon_winsys {
* \param cs A command stream.
* \param buf A winsys buffer.
*/
- bool (*cs_is_buffer_referenced)(struct radeon_winsys_cs *cs,
+ bool (*cs_is_buffer_referenced)(struct radeon_cmdbuf *cs,
struct pb_buffer *buf,
enum radeon_bo_usage usage);
@@ -564,7 +578,7 @@ struct radeon_winsys {
* \param fid Feature ID, one of RADEON_FID_*
* \param enable Whether to enable or disable the feature.
*/
- bool (*cs_request_feature)(struct radeon_winsys_cs *cs,
+ bool (*cs_request_feature)(struct radeon_cmdbuf *cs,
enum radeon_feature_id fid,
bool enable);
/**
@@ -572,16 +586,22 @@ struct radeon_winsys {
*
* \param cs A command stream.
*/
- void (*cs_sync_flush)(struct radeon_winsys_cs *cs);
+ void (*cs_sync_flush)(struct radeon_cmdbuf *cs);
/**
* Add a fence dependency to the CS, so that the CS will wait for
* the fence before execution.
*/
- void (*cs_add_fence_dependency)(struct radeon_winsys_cs *cs,
+ void (*cs_add_fence_dependency)(struct radeon_cmdbuf *cs,
struct pipe_fence_handle *fence);
/**
+ * Signal a syncobj when the CS finishes execution.
+ */
+ void (*cs_add_syncobj_signal)(struct radeon_cmdbuf *cs,
+ struct pipe_fence_handle *fence);
+
+ /**
* Wait for the fence and return true if the fence has been signalled.
* The timeout of 0 will only return the status.
* The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence
@@ -598,6 +618,12 @@ struct radeon_winsys {
struct pipe_fence_handle *src);
/**
+ * Create a new fence object corresponding to the given syncobj fd.
+ */
+ struct pipe_fence_handle *(*fence_import_syncobj)(struct radeon_winsys *ws,
+ int fd);
+
+ /**
* Create a new fence object corresponding to the given sync_file.
*/
struct pipe_fence_handle *(*fence_import_sync_file)(struct radeon_winsys *ws,
@@ -639,17 +665,17 @@ struct radeon_winsys {
const char* (*get_chip_name)(struct radeon_winsys *ws);
};
-static inline bool radeon_emitted(struct radeon_winsys_cs *cs, unsigned num_dw)
+static inline bool radeon_emitted(struct radeon_cmdbuf *cs, unsigned num_dw)
{
return cs && (cs->prev_dw + cs->current.cdw > num_dw);
}
-static inline void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
+static inline void radeon_emit(struct radeon_cmdbuf *cs, uint32_t value)
{
cs->current.buf[cs->current.cdw++] = value;
}
-static inline void radeon_emit_array(struct radeon_winsys_cs *cs,
+static inline void radeon_emit_array(struct radeon_cmdbuf *cs,
const uint32_t *values, unsigned count)
{
memcpy(cs->current.buf + cs->current.cdw, values, count * 4);
@@ -658,9 +684,14 @@ static inline void radeon_emit_array(struct radeon_winsys_cs *cs,
enum radeon_heap {
RADEON_HEAP_VRAM_NO_CPU_ACCESS,
+ RADEON_HEAP_VRAM_READ_ONLY,
+ RADEON_HEAP_VRAM_READ_ONLY_32BIT,
+ RADEON_HEAP_VRAM_32BIT,
RADEON_HEAP_VRAM,
- RADEON_HEAP_VRAM_GTT, /* combined heaps */
RADEON_HEAP_GTT_WC,
+ RADEON_HEAP_GTT_WC_READ_ONLY,
+ RADEON_HEAP_GTT_WC_READ_ONLY_32BIT,
+ RADEON_HEAP_GTT_WC_32BIT,
RADEON_HEAP_GTT,
RADEON_MAX_SLAB_HEAPS,
RADEON_MAX_CACHED_HEAPS = RADEON_MAX_SLAB_HEAPS,
@@ -670,11 +701,15 @@ static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap hea
{
switch (heap) {
case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
+ case RADEON_HEAP_VRAM_READ_ONLY:
+ case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
+ case RADEON_HEAP_VRAM_32BIT:
case RADEON_HEAP_VRAM:
return RADEON_DOMAIN_VRAM;
- case RADEON_HEAP_VRAM_GTT:
- return RADEON_DOMAIN_VRAM_GTT;
case RADEON_HEAP_GTT_WC:
+ case RADEON_HEAP_GTT_WC_READ_ONLY:
+ case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
+ case RADEON_HEAP_GTT_WC_32BIT:
case RADEON_HEAP_GTT:
return RADEON_DOMAIN_GTT;
default:
@@ -685,40 +720,35 @@ static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap hea
static inline unsigned radeon_flags_from_heap(enum radeon_heap heap)
{
+ unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING |
+ (heap != RADEON_HEAP_GTT ? RADEON_FLAG_GTT_WC : 0);
+
switch (heap) {
case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
- return RADEON_FLAG_GTT_WC |
- RADEON_FLAG_NO_CPU_ACCESS |
- RADEON_FLAG_NO_INTERPROCESS_SHARING;
+ return flags |
+ RADEON_FLAG_NO_CPU_ACCESS;
- case RADEON_HEAP_VRAM:
- case RADEON_HEAP_VRAM_GTT:
- case RADEON_HEAP_GTT_WC:
- return RADEON_FLAG_GTT_WC |
- RADEON_FLAG_NO_INTERPROCESS_SHARING;
+ case RADEON_HEAP_VRAM_READ_ONLY:
+ case RADEON_HEAP_GTT_WC_READ_ONLY:
+ return flags |
+ RADEON_FLAG_READ_ONLY;
- case RADEON_HEAP_GTT:
- default:
- return RADEON_FLAG_NO_INTERPROCESS_SHARING;
- }
-}
+ case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
+ case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
+ return flags |
+ RADEON_FLAG_READ_ONLY |
+ RADEON_FLAG_32BIT;
+
+ case RADEON_HEAP_VRAM_32BIT:
+ case RADEON_HEAP_GTT_WC_32BIT:
+ return flags |
+ RADEON_FLAG_32BIT;
-/* The pb cache bucket is chosen to minimize pb_cache misses.
- * It must be between 0 and 3 inclusive.
- */
-static inline unsigned radeon_get_pb_cache_bucket_index(enum radeon_heap heap)
-{
- switch (heap) {
- case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
- return 0;
case RADEON_HEAP_VRAM:
- case RADEON_HEAP_VRAM_GTT:
- return 1;
case RADEON_HEAP_GTT_WC:
- return 2;
case RADEON_HEAP_GTT:
default:
- return 3;
+ return flags;
}
}
@@ -738,22 +768,60 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain,
/* Unsupported flags: NO_SUBALLOC, SPARSE. */
if (flags & ~(RADEON_FLAG_GTT_WC |
RADEON_FLAG_NO_CPU_ACCESS |
- RADEON_FLAG_NO_INTERPROCESS_SHARING))
+ RADEON_FLAG_NO_INTERPROCESS_SHARING |
+ RADEON_FLAG_READ_ONLY |
+ RADEON_FLAG_32BIT))
return -1;
switch (domain) {
case RADEON_DOMAIN_VRAM:
- if (flags & RADEON_FLAG_NO_CPU_ACCESS)
+ switch (flags & (RADEON_FLAG_NO_CPU_ACCESS |
+ RADEON_FLAG_READ_ONLY |
+ RADEON_FLAG_32BIT)) {
+ case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT:
+ case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY:
+ assert(!"NO_CPU_ACCESS | READ_ONLY doesn't make sense");
+ return -1;
+ case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT:
+ assert(!"NO_CPU_ACCESS with 32BIT is disallowed");
+ return -1;
+ case RADEON_FLAG_NO_CPU_ACCESS:
return RADEON_HEAP_VRAM_NO_CPU_ACCESS;
- else
+ case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT:
+ return RADEON_HEAP_VRAM_READ_ONLY_32BIT;
+ case RADEON_FLAG_READ_ONLY:
+ return RADEON_HEAP_VRAM_READ_ONLY;
+ case RADEON_FLAG_32BIT:
+ return RADEON_HEAP_VRAM_32BIT;
+ case 0:
return RADEON_HEAP_VRAM;
- case RADEON_DOMAIN_VRAM_GTT:
- return RADEON_HEAP_VRAM_GTT;
+ }
+ break;
case RADEON_DOMAIN_GTT:
- if (flags & RADEON_FLAG_GTT_WC)
+ switch (flags & (RADEON_FLAG_GTT_WC |
+ RADEON_FLAG_READ_ONLY |
+ RADEON_FLAG_32BIT)) {
+ case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT:
+ return RADEON_HEAP_GTT_WC_READ_ONLY_32BIT;
+ case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY:
+ return RADEON_HEAP_GTT_WC_READ_ONLY;
+ case RADEON_FLAG_GTT_WC | RADEON_FLAG_32BIT:
+ return RADEON_HEAP_GTT_WC_32BIT;
+ case RADEON_FLAG_GTT_WC:
return RADEON_HEAP_GTT_WC;
- else
+ case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT:
+ case RADEON_FLAG_READ_ONLY:
+ assert(!"READ_ONLY without WC is disallowed");
+ return -1;
+ case RADEON_FLAG_32BIT:
+ assert(!"32BIT without WC is disallowed");
+ return -1;
+ case 0:
return RADEON_HEAP_GTT;
+ }
+ break;
+ default:
+ break;
}
return -1;
}